Restarting after various failures

pmeunier
Apr 17, 2022, 4:09 PM
UZXIRPR3NPWWFX7R2PB7TI2GPHH6XYQWCY5IOFZ2ALHGSWGYI37AC

Dependencies

  • [2] UEGA35KU Signalling the postgres server when the leader changes, but does not become this node
  • [3] ENEREHTW Restart and rewind the server during failover (requires policykit)
  • [4] 6Z6PYXTS Testing and debugging
  • [5] NPSWSVZN Init
  • [6] FIIWK33F Fixing lock issues

Change contents

  • replacement in src/main.rs at line 102
    [3.1073][3.1073:1074]()
    [3.1073]
    [3.1757]
    let process_lock = Arc::new(Mutex::new(()));
  • edit in src/main.rs at line 111
    [3.1935]
    [3.1935]
    process_lock.clone(),
  • edit in src/main.rs at line 118
    [3.965]
    [3.965]
    process_lock.clone(),
  • edit in src/main.rs at line 173
    [3.1049]
    [3.1049]
    host: String,
    path: String,
    mut leader: Receiver<String>,
    pool: Arc<Mutex<Option<tokio_postgres::Client>>>,
    process_lock: Arc<Mutex<()>>,
    ) {
    loop {
    match race_(
    host.clone(),
    path.clone(),
    leader.clone(),
    pool.clone(),
    process_lock.clone(),
    )
    .await
    {
    Ok(()) => {
    error!("race stopped")
    }
    Err(e) => {
    error!("race_ error {:?}", e);
    }
    }
    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
    }
    }
    async fn race_(
  • edit in src/main.rs at line 204
    [3.1173]
    [3.1173]
    process_lock: Arc<Mutex<()>>,
  • replacement in src/main.rs at line 236
    [3.1475][3.1475:1505]()
    promote(&path).await;
    [3.1475]
    [3.5486]
    // promote(&process_lock, &path).await;
  • edit in src/main.rs at line 261
    [3.2476]
    [3.6132]
    process_lock: Arc<Mutex<()>>,
  • replacement in src/main.rs at line 280
    [3.2939][3.2939:2972]()
    promote(&path).await
    [3.2939]
    [3.1766]
    promote(&process_lock, &path).await
  • replacement in src/main.rs at line 292
    [3.2226][2.0:72]()
    rewind(&path, port, &db_name, leader, &password).await;
    [3.2226]
    [2.72]
    rewind(&process_lock, &path, port, &db_name, leader, &password).await;
  • replacement in src/main.rs at line 294
    [2.93][2.93:130]()
    reload(&path).await;
    [2.93]
    [3.2287]
    reload(&process_lock, &path).await;
  • replacement in src/main.rs at line 297
    [2.148][2.148:181]()
    reload(&path).await;
    [2.148]
    [3.3173]
    reload(&process_lock, &path).await;
  • replacement in src/main.rs at line 305
    [3.3137][3.3137:3168]()
    async fn promote(path: &str) {
    [3.3137]
    [3.2302]
    async fn promote(process_lock: &Mutex<()>, path: &str) {
  • edit in src/main.rs at line 308
    [3.3201]
    [3.2328]
    let guard = process_lock.lock().await;
  • edit in src/main.rs at line 317
    [3.2500]
    [3.6959]
    std::mem::drop(guard);
  • replacement in src/main.rs at line 320
    [3.3348][2.182:212]()
    async fn reload(path: &str) {
    [3.3348]
    [2.212]
    async fn reload(process_lock: &Mutex<()>, path: &str) {
  • edit in src/main.rs at line 323
    [3.3451]
    [2.235]
    let guard = process_lock.lock().await;
  • edit in src/main.rs at line 332
    [3.2702]
    [2.311]
    std::mem::drop(guard);
  • replacement in src/main.rs at line 335
    [2.314][2.314:400]()
    async fn rewind(path: &str, port: u16, db_name: &str, leader: &str, password: &str) {
    [2.314]
    [2.400]
    async fn rewind(
    process_lock: &Mutex<()>,
    path: &str,
    port: u16,
    db_name: &str,
    leader: &str,
    password: &str,
    ) {
  • edit in src/main.rs at line 344
    [2.425]
    [2.425]
    let guard = process_lock.lock().await;
  • edit in src/main.rs at line 403
    [2.1664]
    [3.4112]
    std::mem::drop(guard);