Simplify retry handling

[?]
Jun 18, 2015, 12:51 PM
QJRDO2B4RGTXPSSK2SM6PQ6VEJNOLNMG2EFNMBGNRSEI74KKLVRAC

Dependencies

  • [2] PQFOMNTL hydra-queue-runner: More stats
  • [3] LXB6WEYK Create failed build steps for cached failures
  • [4] OCZ4LSGG Automatically retry aborted builds
  • [5] ENXUSMSV Make concurrency more robust
  • [6] RYTQLATY Keep track of failed paths in the Hydra database
  • [7] MSIHMO45 Tweak build steps
  • [8] 5AIYUMTB Basic remote building
  • [9] 24BMQDZA Start of single-process hydra-queue-runner

Change contents

  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1103
    [4.1530][4.4383:4407]()
    bool retry = false;
    [4.1530]
    [4.4407]
    /* The step had a hopefully temporary failure (e.g. network
    issue). Retry a number of times. */
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1106
    [4.4463][4.4463:4548]()
    auto step_(step->state.lock());
    retry = step_->tries + 1 < maxTries;
    [4.4463]
    [4.4548]
    bool retry;
    {
    auto step_(step->state.lock());
    retry = step_->tries + 1 < maxTries;
    }
    if (retry) {
    pqxx::work txn(*conn);
    finishBuildStep(txn, result.startTime, result.stopTime, build->id,
    stepNr, machine->sshName, bssAborted, result.errorMsg);
    txn.commit();
    return true;
    }
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1126
    [4.8563][4.4556:4574](),[4.4574][4.8569:8648](),[4.8569][4.8569:8648]()
    if (!retry) {
    auto steps_(steps.lock());
    steps_->erase(step->drvPath);
    }
    [4.8563]
    [4.8648]
    auto steps_(steps.lock());
    steps_->erase(step->drvPath);
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 1170
    [4.4907][4.1742:1743](),[4.1742][4.1742:1743](),[4.1743][3.0:26](),[3.26][4.1777:1778](),[4.4952][4.1777:1778](),[4.1777][4.1777:1778](),[4.1778][3.27:352](),[3.352][4.1883:1983](),[4.1883][4.1883:1983](),[4.1983][3.353:424](),[3.424][4.4953:5099](),[4.1983][4.4953:5099](),[4.386][4.2113:2131](),[4.5099][4.2113:2131](),[4.2113][4.2113:2131]()
    if (!retry) {
    /* Create failed build steps for every build that
    depends on this. For cached failures, only create a
    step for builds that don't have this step as
    top-level (otherwise the user won't be able to see
    what caused the build to fail). */
    for (auto build2 : dependents) {
    if (build == build2) continue;
    if (cachedFailure && build2->drvPath == step->drvPath) continue;
    createBuildStep(txn, 0, build2, step, machine->sshName,
    buildStepStatus, result.errorMsg, build->id);
    }
  • edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 1171
    [4.16984]
    [4.2270]
    /* Create failed build steps for every build that depends
    on this. For cached failures, only create a step for
    builds that don't have this step as top-level
    (otherwise the user won't be able to see what caused
    the build to fail). */
    for (auto build2 : dependents) {
    if (build == build2) continue;
    if (cachedFailure && build2->drvPath == step->drvPath) continue;
    createBuildStep(txn, 0, build2, step, machine->sshName,
    buildStepStatus, result.errorMsg, build->id);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1188
    [4.17060][4.5298:6027](),[4.6027][2.373:409](),[2.409][4.6027:6045](),[4.6027][4.6027:6045]()
    if (!retry)
    for (auto build2 : dependents) {
    printMsg(lvlError, format("marking build %1% as failed") % build2->id);
    txn.parameterized
    ("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1")
    (build2->id)
    ((int) (build2->drvPath != step->drvPath && buildStatus == bsFailed ? bsDepFailed : buildStatus))
    (result.startTime)
    (result.stopTime)
    (cachedFailure ? 1 : 0).exec();
    build2->finishedInDB = true; // FIXME: txn might fail
    nrBuildsDone++;
    }
    [4.17060]
    [4.2526]
    for (auto build2 : dependents) {
    printMsg(lvlError, format("marking build %1% as failed") % build2->id);
    txn.parameterized
    ("update Builds set finished = 1, busy = 0, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5 where id = $1")
    (build2->id)
    ((int) (build2->drvPath != step->drvPath && buildStatus == bsFailed ? bsDepFailed : buildStatus))
    (result.startTime)
    (result.stopTime)
    (cachedFailure ? 1 : 0).exec();
    build2->finishedInDB = true; // FIXME: txn might fail
    nrBuildsDone++;
    }
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1215
    [4.9391][4.6046:6292]()
    if (!retry)
    for (auto build2 : dependents)
    if (build2->toplevel == step || result.status != RemoteResult::rrSuccess) {
    auto builds_(builds.lock());
    builds_->erase(build2->id);
    }
    [4.9391]
    [4.9569]
    for (auto build2 : dependents)
    if (build2->toplevel == step || result.status != RemoteResult::rrSuccess) {
    auto builds_(builds.lock());
    builds_->erase(build2->id);
    }
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1224
    [4.9718][4.6293:6378]()
    if (!retry)
    destroyStep(step, result.status == RemoteResult::rrSuccess);
    [4.9718]
    [4.6378]
    destroyStep(step, result.status == RemoteResult::rrSuccess);
  • replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1226
    [4.6379][4.6379:6397]()
    return retry;
    [4.6379]
    [4.17669]
    return false;