pmeunier/postrep - Change FIIWK33FEEUURH7QGHQPHGQHSVN7XXLDDPLI5CKKWRK2DGIAHLGAC

Fixing lock issues

Created by pmeunier on January 18, 2022

FIIWK33FEEUURH7QGHQPHGQHSVN7XXLDDPLI5CKKWRK2DGIAHLGAC

Dependencies

In channels

main

Change contents

Insertion in src/main.rs at line 65 [4.17]
[3.128]
[3.163]
```
        let mut err: Option<String> = None;
```
Deletion in src/main.rs at line 67 [4.17]
B:BD[3.178] → [3.178:211]
```
            debug!("reconnect");
```

Replacement in src/main.rs at line 71 [4.17]

B:BD[2.850] → [2.850:900]

                    *db_.lock().await = Some(db);

[2.850]

[2.900]

                    err = None;
                    debug!("reconnected");
                    {
                        *db_.lock().await = Some(db);
                    }
                    debug!("waiting for connection to end");

Insertion in src/main.rs at line 79 [4.17]

[2.1003]

[3.422]

                    {
                        *db_.lock().await = None;
                    }

Replacement in src/main.rs at line 84 [4.17]

B:BD[2.1032] → [2.1032:1071]

                    error!("{:?}", e);

[2.1032]

[2.1071]

                    let ee = format!("{}", e);
                    if let Some(ee_) = err.take() {
                        if ee_ != ee {
                            error!("{}", e);
                            err = Some(ee)
                        } else {
                            err = Some(ee_)
                        }
                    } else {
                        error!("{}", e);
                        err = Some(ee)
                    }

Replacement in src/main.rs at line 112 [4.17]

B:BD[4.1943] → [2.1191:1263]

    let mut race = tokio::spawn(race(host.clone(), is_leader.clone()));

[4.1943]

[3.1366]

    let mut race = tokio::spawn(race(
        host.clone(),
        path.clone(),
        is_leader.clone(),
        db.clone(),
    ));

Insertion in src/main.rs at line 160 [4.17]

[4.3870]

[2.1588]

    trace!("handle: {:?} {:?}", is_leader.borrow(), name);

Replacement in src/main.rs at line 170 [4.17]

B:BD[4.4094] → [2.1643:1732]

async fn race(host: String, mut leader: Receiver<String>) -> Result<(), anyhow::Error> {

[4.4094]

[3.1697]

async fn race(
    host: String,
    path: String,
    mut leader: Receiver<String>,
    pool: Arc<Mutex<Option<tokio_postgres::Client>>>,
) -> Result<(), anyhow::Error> {

Insertion in src/main.rs at line 197 [4.17]

[4.4476]

        let has_db = pool.lock().await.is_some();
        debug!("has_db = {:?}", has_db);
        if !has_db {
            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
            continue;
        }

Deletion in src/main.rs at line 204 [4.17]
B:BD[4.4553] → [4.4553:4591]
```
        debug!("lock = {:?}", lock);
```

Replacement in src/main.rs at line 205 [4.17]

∅:D[2.1787] → [5.234:289]

B:BD[4.5292] → [5.234:289]

B:BD[5.289] → [2.1788:1890]

∅:D[2.1890] → [5.383:394]

B:BD[5.383] → [5.383:394]

        info!(
            "Lock acquired: {:?} {:?}",
            std::str::from_utf8(leader_key.name()),
            std::str::from_utf8(leader_key.key())
        );

[2.1787]

[4.5486]

        debug!("Lock acquired {:?}", leader_key);
        promote(&path).await;

Insertion in src/main.rs at line 233 [4.17]
[4.6236]
[4.6236]
```
    let mut was_leader = None;
```

Deletion in src/main.rs at line 235 [4.17]

B:BD[4.6288] → [2.2268:2659]

    let mut was_leader = {
        if let Some(pool_) = pool.lock().await.take() {
            let row = pool_.query_one("SELECT pg_is_in_recovery()", &[]).await?;
            let is_in_recovery: bool = row.get(0);
            *pool.lock().await = Some(pool_);
            Some(!is_in_recovery)
        } else {
            None
        }
    };
    debug!("was_leader = {:?}", was_leader);

Deletion in src/main.rs at line 236 [4.17]
B:BD[4.6335] → [4.6335:6367]
```
        debug!("obs: {:?}", m);
```

Replacement in src/main.rs at line 237 [4.17]

B:BD[4.6418] → [3.2477:2518]

        debug!("leader = {:?}", leader);

[4.6418]

[2.2660]

        debug!("leader = {:?}, host = {:?}", leader, host);

Deletion in src/main.rs at line 239 [4.17]
B:BD[2.2700] → [2.2700:2724]
∅:D[2.2724] → [4.6446:6495]
B:BD[4.6446] → [4.6446:6495]
B:BD[4.6589] → [4.6589:6643]
B:BD[4.6738] → [4.6738:6748]
```
        if is_leader {
            info!("The leader is {:?}", leader);
        } else {
            info!("I'm the leader");
        }
```

Replacement in src/main.rs at line 241 [4.17]

B:BD[2.2774] → [2.2774:2830]

        if let Some(pool_) = pool.lock().await.take() {

[2.2774]

[3.2602]

        let pool_ = { pool.lock().await.take() };
        if let Some(pool_) = pool_ {

Insertion in src/main.rs at line 247 [4.17]

[2.2888]

        debug!("is_leader = {:?}, was_leader = {:?}", is_leader, was_leader);

Replacement in src/main.rs at line 250 [4.17]

B:BD[2.2972] → [2.2972:3088]

        } else if !is_leader && was_leader == Some(true) {
            rewind(&path, port, &db_name, &leader).await

[2.2972]

[3.3173]

        } else if !is_leader && was_leader != Some(false) {
            let pool_ = { pool.lock().await.take() };
            let recov: bool = if let Some(pool_) = pool_ {
                let row = pool_.query_one("SELECT pg_is_in_recovery()", &[]).await?;
                *pool.lock().await = Some(pool_);
                row.get(0)
            } else {
                false
            };
            debug!("recov = {:?}", recov);
            if !recov {
                rewind(&path, port, &db_name, leader).await;
            }

Insertion in src/main.rs at line 271 [4.17]
[2.3168]
[2.3168]
```
    debug!("promoting");
```
Replacement in src/main.rs at line 273 [4.17]
B:BD[2.3201] → [2.3201:3228]
```
    Command::new("pg_ctl")
```
[2.3201]
[2.3228]
```
    let out = Command::new("pg_ctl")
```

Insertion in src/main.rs at line 278 [4.17]

[2.3346]

[4.6959]

    use std::io::Write;
    std::io::stdout().write_all(&out.stdout).unwrap();
    std::io::stderr().write_all(&out.stderr).unwrap();

Insertion in src/main.rs at line 284 [4.17]
[2.3418]
[2.3418]
```
    debug!("rewinding");
```
Replacement in src/main.rs at line 286 [4.17]
B:BD[2.3451] → [2.3451:3481]
```
    Command::new("systemctl")
```
[2.3451]
[2.3481]
```
    let out = Command::new("systemctl")
```

Replacement in src/main.rs at line 291 [4.17]

B:BD[2.3603] → [2.3603:3989]

    Command::new("pg_rewind")
        .args([
            "-D",
            &path,
            "-R",
            "--source-server",
            &format!(
                "port={} user=postgres dbname={} host={}",
                port, db_name, leader
            ),
        ])
        .output()
        .await
        .expect("failed to execute process");
    Command::new("systemctl")

[2.3603]

[2.3989]

    use std::io::Write;
    std::io::stdout().write_all(&out.stdout).unwrap();
    std::io::stderr().write_all(&out.stderr).unwrap();
    let mut success = false;
    while !success {
        // Waiting for source to promote.
        let out = Command::new("pg_rewind")
            .args([
                "-D",
                &path,
                "-R",
                "--source-server",
                &format!(
                    "port={} user=postgres dbname={} host={}",
                    port, db_name, leader
                ),
            ])
            .output()
            .await
            .expect("failed to execute process");
        std::io::stdout().write_all(&out.stdout).unwrap();
        std::io::stderr().write_all(&out.stderr).unwrap();
        success = out.status.success();
        tokio::time::sleep(std::time::Duration::from_secs(1)).await;
    }
    let out = Command::new("systemctl")

Insertion in src/main.rs at line 321 [4.17]

[2.4112]

    std::io::stdout().write_all(&out.stdout).unwrap();
    std::io::stderr().write_all(&out.stderr).unwrap();

Replacement in README.md at line 3 [4.7109]

B:BD[4.7157] → [4.7157:7321]

Postrep uses a fixed replication strategy (streaming replication), a fixed leader election tool (Etcd 3), to achieve proper failover when the cluster leader fails.

[4.7157]

[6.335]

Postrep uses a leader election tool (Etcd 3), to achieve proper failover when the cluster leader fails, in the context of PostgreSQL's streaming replication feature.

Deletion in README.md at line 6 [4.7109]

B:BD[6.364] → [6.364:582]


PostgreSQL has multiple ways to operate in a "leader-replicae" mode. However, failover isn't automatic, and requires another protocol to decide which of the replicae becomes the leader when the previous leader fails.

Replacement in README.md at line 7 [4.7109]

B:BD[6.583] → [6.583:773]

This crate solves this problem, in the particular configuration where one has an Etcd v3 server running, for example because of requirements from another server running on the same service.

[6.583]

[2.4115]

Failover between the leader and replica mode of PostgreSQL isn't automatic, and requires another protocol to decide which of the replicae becomes the leader when the previous leader fails.

Fixing lock issues

Dependencies

In channels

Change contents

Insertion in src/main.rs at line 65 [4.17]

Deletion in src/main.rs at line 67 [4.17]