Queue monitor: Bail out earlier if a step has failed previously

[?]
Apr 15, 2016, 12:28 PM
YTAYNN7VNYZNLGUSGY3EF33MGQWMJW76FKV657SBKASQFQC7EB3AC

Dependencies

  • [2] BRAESISH Warn if PostgreSQL appears stalled
  • [3] UNVMKJV5 Unify build and step status codes
  • [4] VQISTKOP hydra-queue-runner: Use substitutes
  • [5] TPNHTE5V Remove obsolete Builds columns and provide accurate "Running builds"
  • [6] YE6LD6MF hydra-queue-runner: Recycle finishedDrvs
  • [7] IK2UBDAU Revive jobset scheduling
  • [8] BCDHO4OU Set propagatedFrom for cached failed build steps
  • [9] 73YR46NJ hydra-queue-runner: Write directly to a binary cache
  • [10] 4CQWOODY Don't abort steps that have an unsupported system type
  • [11] MHVIT4JY Split hydra-queue-runner.cc more

Change contents

  • edit in src/hydra-queue-runner/queue-monitor.cc at line 66
    [4.21512]
    [4.12438]
    struct PreviousFailure : public std::exception {
    Step::ptr step;
    PreviousFailure(Step::ptr step) : step(step) { }
    };
  • replacement in src/hydra-queue-runner/queue-monitor.cc at line 148
    [4.24028][4.12620:12756]()
    Step::ptr step = createStep(destStore, conn, build, build->drvPath,
    build, 0, finishedDrvs, newSteps, newRunnable);
    [4.23973]
    [4.24135]
    Step::ptr step;
    /* Create steps for this derivation and its dependencies. */
    try {
    step = createStep(destStore, conn, build, build->drvPath,
    build, 0, finishedDrvs, newSteps, newRunnable);
    } catch (PreviousFailure & ex) {
    /* Some step previously failed, so mark the build as
    failed right away. */
    printMsg(lvlError, format("marking build %d as cached failure due to ‘%s’") % build->id % ex.step->drvPath);
    if (!build->finishedInDB) {
    auto mc = startDbUpdate();
    pqxx::work txn(conn);
    /* Find the previous build step record, first by
    derivation path, then by output path. */
    BuildID propagatedFrom = 0;
    auto res = txn.parameterized
    ("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")
    (ex.step->drvPath).exec();
    if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();
    if (!propagatedFrom) {
    for (auto & output : ex.step->drv.outputs) {
    auto res = txn.parameterized
    ("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")
    (output.second.path).exec();
    if (!res[0][0].is_null()) {
    propagatedFrom = res[0][0].as<BuildID>();
    break;
    }
    }
    }
    createBuildStep(txn, 0, build, ex.step, "", bsCachedFailure, "", propagatedFrom);
    txn.parameterized
    ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")
    (build->id)
    ((int) (ex.step->drvPath == build->drvPath ? bsFailed : bsDepFailed))
    (time(0)).exec();
    txn.commit();
    build->finishedInDB = true;
    nrBuildsDone++;
    }
    return;
    }
  • edit in src/hydra-queue-runner/queue-monitor.cc at line 229
    [4.25214][4.25214:25215](),[4.25215][4.0:106](),[4.106][4.25366:25396](),[4.25366][4.25366:25396](),[4.25396][4.107:141](),[4.141][4.25539:25681](),[4.25539][4.25539:25681](),[4.26486][4.26486:26530](),[4.26530][2.776:823](),[2.823][4.26530:26572](),[4.26530][4.26530:26572](),[4.26572][4.0:182]()
    /* If any step has a previously failed output path, then fail
    the build right away. */
    bool badStep = false;
    for (auto & r : newSteps)
    if (checkCachedFailure(r, conn)) {
    printMsg(lvlError, format("marking build %1% as cached failure") % build->id);
    if (!build->finishedInDB) {
    auto mc = startDbUpdate();
    pqxx::work txn(conn);
    /* Find the previous build step record, first by
    derivation path, then by output path. */
    BuildID propagatedFrom = 0;
  • edit in src/hydra-queue-runner/queue-monitor.cc at line 230
    [4.183][4.183:1158](),[4.1158][3.778:874](),[4.212][4.26648:26686](),[3.874][4.26648:26686](),[4.1255][4.26648:26686](),[4.26648][4.26648:26686](),[4.26686][4.510:668](),[4.381][4.26855:26891](),[4.668][4.26855:26891](),[4.26855][4.26855:26891](),[4.26891][4.382:493](),[4.493][4.27036:27241](),[4.27036][4.27036:27241](),[4.27251][4.27251:27282]()
    auto res = txn.parameterized
    ("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")
    (r->drvPath).exec();
    if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();
    if (!propagatedFrom) {
    for (auto & output : r->drv.outputs) {
    auto res = txn.parameterized
    ("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")
    (output.second.path).exec();
    if (!res[0][0].is_null()) {
    propagatedFrom = res[0][0].as<BuildID>();
    break;
    }
    }
    }
    createBuildStep(txn, 0, build, r, "", bsCachedFailure, "", propagatedFrom);
    txn.parameterized
    ("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")
    (build->id)
    ((int) (step == r ? bsFailed : bsDepFailed))
    (time(0)).exec();
    txn.commit();
    build->finishedInDB = true;
    nrBuildsDone++;
    }
    badStep = true;
    break;
    }
    if (badStep) return;
  • edit in src/hydra-queue-runner/queue-monitor.cc at line 410
    [4.31651]
    [4.31848]
    /* If this derivation failed previously, give up. */
    if (checkCachedFailure(step, conn))
    throw PreviousFailure{step};
  • edit in src/hydra-queue-runner/queue-monitor.cc at line 466
    [4.32310][4.32310:32337]()
    newSteps.insert(step);
  • edit in src/hydra-queue-runner/queue-monitor.cc at line 486
    [4.32904]
    [4.32904]
    newSteps.insert(step);