Make the queue monitor more robust, and better debug output

[?]
Jun 15, 2015, 2:54 PM
GKZN4UV75GV7GEHQGBM2O6UHN6CVSHA7BMASCUQSDIDYVUKKZL7AC

Dependencies

  • [2] IWB3F4Z6 Fail builds with previously failed steps early
  • [3] ATJ54SPX Use PostgreSQL notifications for queue events
  • [4] 5AIYUMTB Basic remote building
  • [5] T2EIYJNG On SIGINT, shut down the builder threads
  • [6] ENXUSMSV Make concurrency more robust
  • [7] RYTQLATY Keep track of failed paths in the Hydra database
  • [8] C6HOMHZW Don't try to handle SIGINT
  • [9] 2IQRXLWE Support cancelling builds
  • [10] 62MQPRXC Pass null values to libpqxx properly
  • [11] 24BMQDZA Start of single-process hydra-queue-runner
  • [12] 22LDPAIP Check non-runnable steps for unsupported system type
  • [13] YZAI5GQU Implement a database connection pool
  • [14] NJJ7H64S Very basic multi-threaded queue runner
  • [15] FQQRJUO4 Mark builds as busy
  • [16] UQQ4IL55 Add a error type for "unsupported system type"

Change contents

  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 79
    [3.229][3.229:294]()
    printMsg(lvlError, format("destroying build %1%") % id);
    [3.229]
    [3.294]
    printMsg(lvlDebug, format("destroying build %1%") % id);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 113
    [3.676][3.676:745]()
    printMsg(lvlError, format("destroying step %1%") % drvPath);
    [3.676]
    [3.5921]
    printMsg(lvlDebug, format("destroying step %1%") % drvPath);
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 226
    [3.1]
    [3.1]
    void queueMonitorLoop();
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 275
    [3.7535][3.38:109]()
    printMsg(lvlError, "clearing active builds / build steps...");
    [3.7535]
    [3.109]
    printMsg(lvlInfo, "clearing active builds / build steps...");
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 387
    [3.226]
    [3.1433]
    {
    while (true) {
    try {
    queueMonitorLoop();
    } catch (std::exception & e) {
    printMsg(lvlError, format("queue monitor: %1%") % e.what());
    sleep(10); // probably a DB problem, so don't retry right away
    }
    }
    }
    void State::queueMonitorLoop()
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 435
    [3.980][3.980:1063]()
    printMsg(lvlError, "got notification: new builds added to the queue");
    [3.980]
    [3.1063]
    printMsg(lvlTalkative, "got notification: new builds added to the queue");
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 437
    [3.1100][3.1100:1170]()
    printMsg(lvlError, "got notification: builds restarted");
    [3.1100]
    [3.1170]
    printMsg(lvlTalkative, "got notification: builds restarted");
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 441
    [3.1266][3.1266:1336]()
    printMsg(lvlError, "got notification: builds cancelled");
    [3.1266]
    [3.213]
    printMsg(lvlTalkative, "got notification: builds cancelled");
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 445
    [3.1755][3.1755:1803]()
    printMsg(lvlError, "queue monitor exits");
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 450
    [3.9880][3.1427:1515]()
    printMsg(lvlError, format("checking the queue for builds > %1%...") % lastBuildId);
    [3.9880]
    [3.1943]
    printMsg(lvlInfo, format("checking the queue for builds > %1%...") % lastBuildId);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 483
    [3.10352][3.2857:2952]()
    printMsg(lvlInfo, format("loading build %1% (%2%)") % build->id % build->fullJobName);
    [3.10352]
    [3.10473]
    printMsg(lvlTalkative, format("loading build %1% (%2%)") % build->id % build->fullJobName);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 508
    [3.9878][3.9878:9949]()
    printMsg(lvlInfo, format("cached build %1%") % build->id);
    [3.9878]
    [3.11213]
    printMsg(lvlInfo, format("marking build %1% as cached successful") % build->id);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 540
    [2.503][2.503:604]()
    printMsg(lvlError, format("failing build %1% due to previous failure") % build->id);
    [2.503]
    [2.604]
    printMsg(lvlError, format("marking build %1% as cached failure") % build->id);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 574
    [3.3558][3.249:361]()
    printMsg(lvlInfo, format("added build %1% (top-level step %2%, %3% new steps, %4% new runnable steps)")
    [3.3558]
    [3.361]
    printMsg(lvlChatty, format("added build %1% (top-level step %2%, %3% new steps, %4% new runnable steps)")
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 630
    [3.11654][3.11654:11731]()
    printMsg(lvlInfo, format("considering derivation ‘%1%’") % drvPath);
    [3.11654]
    [3.11731]
    printMsg(lvlDebug, format("considering derivation ‘%1%’") % drvPath);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 655
    [3.12166][3.12166:12240]()
    printMsg(lvlInfo, format("creating build step ‘%1%’") % drvPath);
    [3.12166]
    [3.12240]
    printMsg(lvlDebug, format("creating build step ‘%1%’") % drvPath);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 687
    [3.10174][3.5073:5155](),[3.12643][3.5073:5155]()
    printMsg(lvlInfo, format("destroying build step ‘%1%’") % step->drvPath);
    [3.10174]
    [3.5155]
    printMsg(lvlDebug, format("destroying build step ‘%1%’") % step->drvPath);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 772
    [3.14048][3.10175:10256]()
    printMsg(lvlInfo, format("step ‘%1%’ is now runnable") % step->drvPath);
    [3.14048]
    [3.10256]
    printMsg(lvlChatty, format("step ‘%1%’ is now runnable") % step->drvPath);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 791
    [3.39][3.10336:10387](),[3.10336][3.10336:10387]()
    printMsg(lvlError, "dispatcher woken up");
    [3.39]
    [3.2577]
    printMsg(lvlDebug, "dispatcher woken up");
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 795
    [3.6839][3.10388:10471]()
    printMsg(lvlError, format("%1% runnable builds") % runnable_->size());
    [3.6839]
    [3.10471]
    printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 811
    [3.10957][3.10957:11064]()
    printMsg(lvlError, format("cannot execute step ‘%1%’ right now") % step->drvPath);
    [3.10957]
    [3.11064]
    printMsg(lvlDebug, format("cannot execute step ‘%1%’ right now") % step->drvPath);
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 816
    [3.11138][3.11138:11305]()
    printMsg(lvlInfo, format("WOOHOO: starting step ‘%1%’ on machine ‘%2%’")
    % step->drvPath % reservation->machine->sshName);
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 877
    [3.13097][3.13097:13139]()
    printMsg(lvlError, "builder exits");
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 912
    [3.15294][3.8006:8135]()
    printMsg(lvlInfo, format("performing build step ‘%1%’ (needed by %2% builds)") % step->drvPath % dependents.size());
    [3.15294]
    [3.8135]
    printMsg(lvlInfo, format("performing step ‘%1%’ on ‘%2%’ (needed by %3% builds)")
    % step->drvPath % machine->sshName % dependents.size());
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 947
    [3.1298][3.1298:1319]()
    abort();
    [3.1298]
    [3.1319]
    abort(); // FIXME
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1055
    [3.17826][3.14605:14683]()
    printMsg(lvlError, format("marking build %1% as succeeded") % build->id);
    [3.17826]
    [3.14683]
    printMsg(lvlInfo, format("marking build %1% as succeeded") % build->id);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1110
    [3.1131][3.93:131](),[3.131][3.812:887](),[3.812][3.812:887]()
    printMsg(lvlError, "exiting...");
    printMsg(lvlError, format("psql connections = %1%") % dbPool.count());
    [3.1131]
    [3.19072]
    //printMsg(lvlInfo, "exiting...");
    //printMsg(lvlInfo, format("psql connections = %1%") % dbPool.count());