Queue monitor: Bail out earlier if a step has failed previously
[?]
Apr 15, 2016, 12:28 PM
YTAYNN7VNYZNLGUSGY3EF33MGQWMJW76FKV657SBKASQFQC7EB3ACDependencies
- [2]
BRAESISHWarn if PostgreSQL appears stalled - [3]
UNVMKJV5Unify build and step status codes - [4]
VQISTKOPhydra-queue-runner: Use substitutes - [5]
TPNHTE5VRemove obsolete Builds columns and provide accurate "Running builds" - [6]
YE6LD6MFhydra-queue-runner: Recycle finishedDrvs - [7]
IK2UBDAURevive jobset scheduling - [8]
BCDHO4OUSet propagatedFrom for cached failed build steps - [9]
73YR46NJhydra-queue-runner: Write directly to a binary cache - [10]
4CQWOODYDon't abort steps that have an unsupported system type - [11]
MHVIT4JYSplit hydra-queue-runner.cc more
Change contents
- edit in src/hydra-queue-runner/queue-monitor.cc at line 66
struct PreviousFailure : public std::exception {Step::ptr step;PreviousFailure(Step::ptr step) : step(step) { }}; - replacement in src/hydra-queue-runner/queue-monitor.cc at line 148
Step::ptr step = createStep(destStore, conn, build, build->drvPath,build, 0, finishedDrvs, newSteps, newRunnable);Step::ptr step;/* Create steps for this derivation and its dependencies. */try {step = createStep(destStore, conn, build, build->drvPath,build, 0, finishedDrvs, newSteps, newRunnable);} catch (PreviousFailure & ex) {/* Some step previously failed, so mark the build asfailed right away. */printMsg(lvlError, format("marking build %d as cached failure due to ‘%s’") % build->id % ex.step->drvPath);if (!build->finishedInDB) {auto mc = startDbUpdate();pqxx::work txn(conn);/* Find the previous build step record, first byderivation path, then by output path. */BuildID propagatedFrom = 0;auto res = txn.parameterized("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")(ex.step->drvPath).exec();if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();if (!propagatedFrom) {for (auto & output : ex.step->drv.outputs) {auto res = txn.parameterized("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")(output.second.path).exec();if (!res[0][0].is_null()) {propagatedFrom = res[0][0].as<BuildID>();break;}}}createBuildStep(txn, 0, build, ex.step, "", bsCachedFailure, "", propagatedFrom);txn.parameterized("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")(build->id)((int) (ex.step->drvPath == build->drvPath ? bsFailed : bsDepFailed))(time(0)).exec();txn.commit();build->finishedInDB = true;nrBuildsDone++;}return;} - edit in src/hydra-queue-runner/queue-monitor.cc at line 229[4.25214]→[4.25214:25215](∅→∅),[4.25215]→[4.0:106](∅→∅),[4.106]→[4.25366:25396](∅→∅),[4.25366]→[4.25366:25396](∅→∅),[4.25396]→[4.107:141](∅→∅),[4.141]→[4.25539:25681](∅→∅),[4.25539]→[4.25539:25681](∅→∅),[4.26486]→[4.26486:26530](∅→∅),[4.26530]→[2.776:823](∅→∅),[2.823]→[4.26530:26572](∅→∅),[4.26530]→[4.26530:26572](∅→∅),[4.26572]→[4.0:182](∅→∅)
/* If any step has a previously failed output path, then failthe build right away. */bool badStep = false;for (auto & r : newSteps)if (checkCachedFailure(r, conn)) {printMsg(lvlError, format("marking build %1% as cached failure") % build->id);if (!build->finishedInDB) {auto mc = startDbUpdate();pqxx::work txn(conn);/* Find the previous build step record, first byderivation path, then by output path. */BuildID propagatedFrom = 0; - edit in src/hydra-queue-runner/queue-monitor.cc at line 230[4.183]→[4.183:1158](∅→∅),[4.1158]→[3.778:874](∅→∅),[4.212]→[4.26648:26686](∅→∅),[3.874]→[4.26648:26686](∅→∅),[4.1255]→[4.26648:26686](∅→∅),[4.26648]→[4.26648:26686](∅→∅),[4.26686]→[4.510:668](∅→∅),[4.381]→[4.26855:26891](∅→∅),[4.668]→[4.26855:26891](∅→∅),[4.26855]→[4.26855:26891](∅→∅),[4.26891]→[4.382:493](∅→∅),[4.493]→[4.27036:27241](∅→∅),[4.27036]→[4.27036:27241](∅→∅),[4.27251]→[4.27251:27282](∅→∅)
auto res = txn.parameterized("select max(build) from BuildSteps where drvPath = $1 and startTime != 0 and stopTime != 0 and status = 1")(r->drvPath).exec();if (!res[0][0].is_null()) propagatedFrom = res[0][0].as<BuildID>();if (!propagatedFrom) {for (auto & output : r->drv.outputs) {auto res = txn.parameterized("select max(s.build) from BuildSteps s join BuildStepOutputs o on s.build = o.build where path = $1 and startTime != 0 and stopTime != 0 and status = 1")(output.second.path).exec();if (!res[0][0].is_null()) {propagatedFrom = res[0][0].as<BuildID>();break;}}}createBuildStep(txn, 0, build, r, "", bsCachedFailure, "", propagatedFrom);txn.parameterized("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $3, isCachedBuild = 1 where id = $1 and finished = 0")(build->id)((int) (step == r ? bsFailed : bsDepFailed))(time(0)).exec();txn.commit();build->finishedInDB = true;nrBuildsDone++;}badStep = true;break;}if (badStep) return; - edit in src/hydra-queue-runner/queue-monitor.cc at line 410
/* If this derivation failed previously, give up. */if (checkCachedFailure(step, conn))throw PreviousFailure{step}; - edit in src/hydra-queue-runner/queue-monitor.cc at line 466
newSteps.insert(step); - edit in src/hydra-queue-runner/queue-monitor.cc at line 486
newSteps.insert(step);