If we don't see machine that supports a build step for 'max_unsupported_time' seconds, the step is aborted. The default is 0, which is appropriate for Hydra installations that don't provision missing machines dynamically.
(cherry picked from commit f5cdbfe21d930db43d3812c7d8e87746d6378ef9)
NJXD2ABJHKH7MBAPEOXQRKC3RK2OVNRO4ND44SOZJECZGCGZK7KQC
7KLHBDYAWCWHC4ID5UM3JW2IEEMFY6QA2MXMDTRC3O3ENCPN2OJAC
32KJOERMPFWZZZCIN6TGGVX72GMAJT6VCCIP7S65EHNF3KM42KWAC
WV4SSAIYM4SVBQ2VISDTAXQJCPKRGTLSVFH44CQFEMC4COWG5OKQC
MHVIT4JYWUYD4UCGB2AHLXWLX6B5SYE22BREERNGANT7RGGDUFOAC
OBOTGFG63VKMKWE7AGSTNPDCJ42FGFDIVY6XYLJLS275Y7OE3FSQC
BRAESISHTN4IIWUBVDMPDMY7QLMJDKX7GQ7K6NSJN66L5VPWSX3QC
BG6PEOB2M2Y56QPVMELU7VNNCGNMSQ2K6ATBUCPJLKPLTDWNJQ5AC
LVQXQIYA7QMLVYOANYEFHDBTFAOSE3D2IYAVOG2DXURTASRCUNYQC
B7ENVLRS2KLKEG66TY5G4EW6M274JZOBDUCRB7BFBXLTBG455STQC
TPNHTE5VJ36IPKMFENDERDBFBHLYFXOVNDLV2QSC4G5STPPMBLMAC
7LWB2J2ZFB4XJDN5MBL2WDKVBRYQD7Y6RQU7LUYKH7PZTXK2H7KQC
UVQJBDHNWHTGOAEL426F6RZPD67QABBRP73E6QB5ND5OV2S3267QC
NKQOEVVPWAT7UQ4JKOK7VWS6H3PKN52DDWT7SEWPK3JS743OGZNQC
4I2HF4L3JOC6KPYLI2YTEVTHBRRYO5XKXOZ6VQ2SJKSQKAPNQXCQC
IK2UBDAU6QKUXHJG3SXJKYGIIXRDKI6UVRTFC6ZVDXDCGNCMEWVAC
NWFDDRUGDCRLAG6WOQS4WJLEKU5KMJ4RPWMAELH7MJ5U6B3L3IIQC
24BMQDZAWDQ7VNIA7TIROXSOYLOJBNZ2E4264WHWNJAEN6ZB3UOAC
NAYQT2GTCJPBFRSK7CBFX655F2NGTBPICJSCYG2CSCQ5NRDHZG6QC
HJOEIMLRDVQ2KZI5HGL2HKGBM3AHP7YIKGKDAGFUNKRUXVRB24NAC
TTBLPQAJKPRC6W23QIFTPI2MM4VYR3D3CR6F7U53WU5GJDAYJW4AC
OG3Z3QGCG2FNDGF4VQYWOXLZCF7LGTZCUMGKUCBNAPAQ5FRFHQ2AC
EYR3EW6JVHNVLXMI57FUVPHQAHPETBML4H44OGJFHUT54KTTHIGQC
/* Register failure in the database for all Build objects that
directly or indirectly depend on this step. */
// FIXME: keep stats about aborted steps?
nrStepsDone++;
totalStepTime += stepStopTime - stepStartTime;
totalStepBuildTime += result.stopTime - result.startTime;
machine->state->nrStepsDone++;
machine->state->totalStepTime += stepStopTime - stepStartTime;
machine->state->totalStepBuildTime += result.stopTime - result.startTime;
/* If there are no builds left, delete all referring
steps from ‘steps’. As for the success case, we can
be certain no new referrers can be added. */
if (indirect.empty()) {
for (auto & s : steps) {
printMsg(lvlDebug, "finishing build step ‘%s’",
localStore->printStorePath(s->drvPath));
steps_->erase(s->drvPath);
}
}
}
if (indirect.empty() && stepFinished) break;
void State::failStep(
Connection & conn,
Step::ptr step,
BuildID buildId,
const RemoteResult & result,
Machine::ptr machine,
bool & stepFinished,
bool & quit)
{
/* Register failure in the database for all Build objects that
directly or indirectly depend on this step. */
/* Create failed build steps for every build that
depends on this, except when this step is cached
and is the top-level of that build (since then it's
redundant with the build's isCachedBuild field). */
for (auto & build2 : indirect) {
if ((result.stepStatus == bsCachedFailure && build2->drvPath == step->drvPath) ||
(result.stepStatus != bsCachedFailure && buildId == build2->id) ||
build2->finishedInDB)
continue;
createBuildStep(txn, 0, build2->id, step, machine->sshName,
result.stepStatus, result.errorMsg, buildId == build2->id ? 0 : buildId);
/* If there are no builds left, delete all referring
steps from ‘steps’. As for the success case, we can
be certain no new referrers can be added. */
if (indirect.empty()) {
for (auto & s : steps) {
printMsg(lvlDebug, "finishing build step ‘%s’",
localStore->printStorePath(s->drvPath));
steps_->erase(s->drvPath);
/* Mark all builds that depend on this derivation as failed. */
for (auto & build2 : indirect) {
if (build2->finishedInDB) continue;
printMsg(lvlError, format("marking build %1% as failed") % build2->id);
txn.parameterized
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5, notificationPendingSince = $4 where id = $1 and finished = 0")
(build2->id)
((int) (build2->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus()))
(result.startTime)
(result.stopTime)
(result.stepStatus == bsCachedFailure ? 1 : 0).exec();
nrBuildsDone++;
}
if (indirect.empty() && stepFinished) break;
/* Remember failed paths in the database so that they
won't be built again. */
if (result.stepStatus != bsCachedFailure && result.canCache)
for (auto & path : step->drv->outputPaths())
txn.parameterized("insert into FailedPaths values ($1)")(localStore->printStorePath(path)).exec();
/* Update the database. */
{
auto mc = startDbUpdate();
stepFinished = true;
/* Create failed build steps for every build that
depends on this, except when this step is cached
and is the top-level of that build (since then it's
redundant with the build's isCachedBuild field). */
for (auto & build : indirect) {
if ((result.stepStatus == bsCachedFailure && build->drvPath == step->drvPath) ||
((result.stepStatus != bsCachedFailure && result.stepStatus != bsUnsupported) && buildId == build->id) ||
build->finishedInDB)
continue;
createBuildStep(txn,
0, build->id, step, machine ? machine->sshName : "",
result.stepStatus, result.errorMsg, buildId == build->id ? 0 : buildId);
}
/* Remove the indirect dependencies from ‘builds’. This
will cause them to be destroyed. */
for (auto & b : indirect) {
auto builds_(builds.lock());
b->finishedInDB = true;
builds_->erase(b->id);
dependentIDs.push_back(b->id);
if (buildOne == b->id) quit = true;
/* Mark all builds that depend on this derivation as failed. */
for (auto & build : indirect) {
if (build->finishedInDB) continue;
printMsg(lvlError, format("marking build %1% as failed") % build->id);
txn.parameterized
("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5, notificationPendingSince = $4 where id = $1 and finished = 0")
(build->id)
((int) (build->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus()))
(result.startTime)
(result.stopTime)
(result.stepStatus == bsCachedFailure ? 1 : 0).exec();
nrBuildsDone++;
/* Send notification about this build and its dependents. */
{
pqxx::work txn(*conn);
notifyBuildFinished(txn, buildId, dependentIDs);
/* Remember failed paths in the database so that they
won't be built again. */
if (result.stepStatus != bsCachedFailure && result.canCache)
for (auto & path : step->drv->outputPaths())
txn.parameterized("insert into FailedPaths values ($1)")(localStore->printStorePath(path)).exec();
// FIXME: keep stats about aborted steps?
nrStepsDone++;
totalStepTime += stepStopTime - stepStartTime;
totalStepBuildTime += result.stopTime - result.startTime;
machine->state->nrStepsDone++;
machine->state->totalStepTime += stepStopTime - stepStartTime;
machine->state->totalStepBuildTime += result.stopTime - result.startTime;
stepFinished = true;
if (quit) exit(0); // testing hack; FIXME: this won't run plugins
/* Remove the indirect dependencies from ‘builds’. This
will cause them to be destroyed. */
for (auto & b : indirect) {
auto builds_(builds.lock());
b->finishedInDB = true;
builds_->erase(b->id);
dependentIDs.push_back(b->id);
if (buildOne == b->id) quit = true;
}
}
for (auto & wstep : runnable2) {
auto step(wstep.lock());
if (!step) continue;
bool supported = false;
for (auto & machine : machines2) {
if (machine.second->supportsStep(step)) {
step->state.lock()->lastSupported = now;
supported = true;
break;
}
}
if (!supported
&& std::chrono::duration_cast<std::chrono::seconds>(now - step->state.lock()->lastSupported).count() >= maxUnsupportedTime)
{
printError("aborting unsupported build step '%s' (type '%s')",
localStore->printStorePath(step->drvPath),
step->systemType);
aborted.insert(step);
auto conn(dbPool.get());
std::set<Build::ptr> dependents;
std::set<Step::ptr> steps;
getDependents(step, dependents, steps);
/* Maybe the step got cancelled. */
if (dependents.empty()) continue;
/* Find the build that has this step as the top-level (if
any). */
Build::ptr build;
for (auto build2 : dependents) {
if (build2->drvPath == step->drvPath)
build = build2;
}
if (!build) build = *dependents.begin();
bool stepFinished = false;
bool quit = false;
failStep(
*conn, step, build->id,
RemoteResult {
.stepStatus = bsUnsupported,
.errorMsg = fmt("unsupported system type '%s'",
step->systemType),
.startTime = now2,
.stopTime = now2,
},
nullptr, stepFinished, quit);
if (quit) exit(1);
}
}
/* Clean up 'runnable'. */
{
auto runnable_(runnable.lock());
for (auto i = runnable_->begin(); i != runnable_->end(); ) {
if (aborted.count(i->lock()))
i = runnable_->erase(i);
else
++i;
}
}
}