Abort unsupported build steps
[?]
Mar 26, 2020, 2:00 PM
U55WNIDPCBAADLBSZXWUV6C3LVCILRWNDFJ7S3OJKL2WAWK2FLZACDependencies
- [2]
7KLHBDYAFix build - [3]
32KJOERMTurn hydra-notify into a daemon - [4]
FJEK6FVUFix build - [5]
YTJBED7GFix build - [6]
IWB3F4Z6Fail builds with previously failed steps early - [7]
T5BIOVJEAdd support for tracking custom metrics - [8]
UYUVQWXQFix hydra-queue-runner --build-one - [9]
6LIYTMFUFix build failure on GCC 5.4 - [10]
UVNTWTWGPrevent download of NARs we just uploaded - [11]
46ADBTMQStart steps in order of ascending build ID - [12]
NSBNNM77Add hydra.conf option "nar_buffer_size" to configure memoryTokens limit - [13]
WV4SSAIYBuild against nix-master - [14]
24BMQDZAStart of single-process hydra-queue-runner - [15]
IK2UBDAURevive jobset scheduling - [16]
BG6PEOB2Make the output size limit configurable - [17]
VQISTKOPhydra-queue-runner: Use substitutes - [18]
UVQJBDHNMove log compression to a plugin - [19]
EPWEMRI2Allow determinism checking for entire jobsets - [20]
N4IROACVMove buildRemote() into State - [21]
EBJP3MNABuild against nix-master - [22]
NTEDD7T4Provide a plugin hook for when build steps finish - [23]
XZYFFHLQhydra-queue-runner: Set a thread title for the builder threads - [24]
KBZHIGLGRecord the machine used for a build step - [25]
OBOTGFG6Prevent orphaned build steps - [26]
LVQXQIYAKill active build steps when builds are cancelled - [27]
WE5Q2NVIAllow build to be bumped to the front of the queue via the web interface - [28]
SGFLU2VSUSER -> LOGNAME for consistency - [29]
INVXGPNKFix root creation when the root already exists but is owned by another user - [30]
WDGARQ76Reuse build products / metrics stored in the database - [31]
UQQ4IL55Add a error type for "unsupported system type" - [32]
NAYQT2GThydra-queue-runner: Use cmdBuildDerivation - [33]
DKJFD6JNProcess Nix API changes - [34]
MSIHMO45Tweak build steps - [35]
BRAESISHWarn if PostgreSQL appears stalled - [36]
UNVMKJV5Unify build and step status codes - [37]
QAPR72LXAdd some debug code - [38]
OKQLN5AGSet proper charset on log files - [39]
B2L4T3X6Sync with Nix - [40]
PQFOMNTLhydra-queue-runner: More stats - [41]
DYVETV7Lhydra-queue-runner: Allow multiple concurrent daemon connections - [42]
YXYXJDMBhydra-queue-runner: Write GC roots for outputs paths - [43]
FITVNQ2SKeep track of the time we spend copying to/from build machines - [44]
HJOEIMLRRefactor - [45]
MJL3PDXIFix duplicate step_finished notification - [46]
4ZJIJCVFSlight cleanup - [47]
EYR3EW6JKeep stats for the Hydra auto scaler - [48]
FHVJYJFEUpload build logs to the binary cache - [49]
G7KWXSFMDistinguish build step states - [50]
IKJBYIGYUpdate to reflect BinaryCacheStore changes - [51]
B7ENVLRShydra-queue-runner: Make build notification more reliable - [52]
RNJILKTWUpload log files to the right location - [53]
HH3LID6LRe-implement log size limits - [54]
62MQPRXCPass null values to libpqxx properly - [55]
7LWB2J2ZPeriodically clear orphaned build steps - [56]
O3NM62IZSupport multiple machines files - [57]
OG3Z3QGCNamespace cleanup - [58]
SODOV2CMAutomatically reload $NIX_REMOTE_SYSTEMS when it changes - [59]
TPNHTE5VRemove obsolete Builds columns and provide accurate "Running builds" - [60]
NKQOEVVPGet rid of "will retry" messages after "maybe cancelling..." - [61]
LENA4O2VPeriodically close RemoteStore connections - [62]
FJ5H26J6Fix building against nix master - [63]
XIFDDTXZDon't lock the BuildSteps table when inserting - [64]
MHVIT4JYSplit hydra-queue-runner.cc more - [65]
WDQJRLNAHandle derivations with system type 'builtin' - [66]
DRC26KFBhydra-queue-runner: Fix a race keeping cancelled steps alive - [*]
4I2HF4L3Unindent - [*]
NWFDDRUGhydra-queue-runner: Limit concurrent database connections - [*]
TTBLPQAJKeep track of wait time per system type
Change contents
- edit in src/hydra-queue-runner/builder.cc at line 21
setThreadName("bld~" + std::string(reservation->step->drvPath.to_string())); - edit in src/hydra-queue-runner/builder.cc at line 42
printMsg(lvlError, "uncaught exception building ‘%s’ on ‘%s’: %s",localStore->printStorePath(reservation->step->drvPath),reservation->machine->sshName,e.what()); - edit in src/hydra-queue-runner/builder.cc at line 65
printMsg(lvlInfo, "will retry ‘%s’ after %ss", localStore->printStorePath(step->drvPath), delta); - edit in src/hydra-queue-runner/builder.cc at line 100
std::optional<StorePath> buildDrvPath; - edit in src/hydra-queue-runner/builder.cc at line 121
printMsg(lvlInfo, "maybe cancelling build step ‘%s’", localStore->printStorePath(step->drvPath)); - edit in src/hydra-queue-runner/builder.cc at line 143
buildDrvPath = build->drvPath.clone(); - edit in src/hydra-queue-runner/builder.cc at line 148
localStore->printStorePath(step->drvPath), repeats + 1, machine->sshName, buildId, (dependents.size() - 1)); - edit in src/hydra-queue-runner/builder.cc at line 152
bool quit = buildId == buildOne && step->drvPath == *buildDrvPath; - resolve order conflict in src/hydra-queue-runner/builder.cc at line 152
- edit in src/hydra-queue-runner/builder.cc at line 171
store->upsertFile("log/" + std::string(step->drvPath.to_string()), readFile(result.logFile), "text/plain; charset=utf-8"); - edit in src/hydra-queue-runner/builder.cc at line 223
res = getBuildOutput(destStore, ref<FSAccessor>(result.accessor), *step->drv); - edit in src/hydra-queue-runner/builder.cc at line 260
printMsg(lvlError, "possibly transient failure building ‘%s’ on ‘%s’: %s",localStore->printStorePath(step->drvPath), machine->sshName, result.errorMsg); - edit in src/hydra-queue-runner/builder.cc at line 280
for (auto & path : step->drv->outputPaths()) - edit in src/hydra-queue-runner/builder.cc at line 313
printMsg(lvlDebug, "finishing build step ‘%s’",localStore->printStorePath(step->drvPath)); - replacement in src/hydra-queue-runner/builder.cc at line 379
} else {} elsefailStep(*conn, step, buildId, result, machine, stepFinished, quit); - replacement in src/hydra-queue-runner/builder.cc at line 382
/* Register failure in the database for all Build objects thatdirectly or indirectly depend on this step. */// FIXME: keep stats about aborted steps?nrStepsDone++;totalStepTime += stepStopTime - stepStartTime;totalStepBuildTime += result.stopTime - result.startTime;machine->state->nrStepsDone++;machine->state->totalStepTime += stepStopTime - stepStartTime;machine->state->totalStepBuildTime += result.stopTime - result.startTime; - replacement in src/hydra-queue-runner/builder.cc at line 390
std::vector<BuildID> dependentIDs;if (quit) exit(0); // testing hack; FIXME: this won't run plugins - replacement in src/hydra-queue-runner/builder.cc at line 392
while (true) {/* Get the builds and steps that depend on this step. */std::set<Build::ptr> indirect;{auto steps_(steps.lock());std::set<Step::ptr> steps;getDependents(step, indirect, steps);return sDone;} - edit in src/hydra-queue-runner/builder.cc at line 395[6.9781]→[6.9781:10074](∅→∅),[6.10074]→[6.5557:5702](∅→∅),[6.10074]→[6.5050:5195](∅→∅),[6.5195]→[6.10173:10246](∅→∅),[6.5702]→[6.10173:10246](∅→∅),[6.10173]→[6.10173:10246](∅→∅),[6.10273]→[6.10273:10305](∅→∅)
/* If there are no builds left, delete all referringsteps from ‘steps’. As for the success case, we canbe certain no new referrers can be added. */if (indirect.empty()) {for (auto & s : steps) {printMsg(lvlDebug, "finishing build step ‘%s’",localStore->printStorePath(s->drvPath));printMsg(lvlDebug, "finishing build step ‘%s’",localStore->printStorePath(s->drvPath));steps_->erase(s->drvPath);}}} - replacement in src/hydra-queue-runner/builder.cc at line 396
if (indirect.empty() && stepFinished) break;void State::failStep(Connection & conn,Step::ptr step,BuildID buildId,const RemoteResult & result,Machine::ptr machine,bool & stepFinished,bool & quit){/* Register failure in the database for all Build objects thatdirectly or indirectly depend on this step. */ - replacement in src/hydra-queue-runner/builder.cc at line 408
/* Update the database. */{auto mc = startDbUpdate();std::vector<BuildID> dependentIDs; - replacement in src/hydra-queue-runner/builder.cc at line 410
pqxx::work txn(*conn);while (true) {/* Get the builds and steps that depend on this step. */std::set<Build::ptr> indirect;{auto steps_(steps.lock());std::set<Step::ptr> steps;getDependents(step, indirect, steps); - replacement in src/hydra-queue-runner/builder.cc at line 418[6.11220]→[6.2802:3078](∅→∅),[6.3078]→[6.11545:11594](∅→∅),[6.11545]→[6.11545:11594](∅→∅),[6.11594]→[6.3079:3181](∅→∅),[6.3181]→[6.2138:2229](∅→∅),[6.2229]→[6.11736:11816](∅→∅),[6.3266]→[6.11736:11816](∅→∅),[6.11736]→[6.11736:11816](∅→∅),[6.11816]→[6.2230:2408](∅→∅)
/* Create failed build steps for every build thatdepends on this, except when this step is cachedand is the top-level of that build (since then it'sredundant with the build's isCachedBuild field). */for (auto & build2 : indirect) {if ((result.stepStatus == bsCachedFailure && build2->drvPath == step->drvPath) ||(result.stepStatus != bsCachedFailure && buildId == build2->id) ||build2->finishedInDB)continue;createBuildStep(txn, 0, build2->id, step, machine->sshName,result.stepStatus, result.errorMsg, buildId == build2->id ? 0 : buildId);/* If there are no builds left, delete all referringsteps from ‘steps’. As for the success case, we canbe certain no new referrers can be added. */if (indirect.empty()) {for (auto & s : steps) {printMsg(lvlDebug, "finishing build step ‘%s’",localStore->printStorePath(s->drvPath));steps_->erase(s->drvPath); - edit in src/hydra-queue-runner/builder.cc at line 427
}} - replacement in src/hydra-queue-runner/builder.cc at line 430[6.12212]→[6.12212:12527](∅→∅),[6.12527]→[6.0:190](∅→∅),[6.184]→[6.12696:12733](∅→∅),[6.190]→[6.12696:12733](∅→∅),[6.12696]→[6.12696:12733](∅→∅),[6.12733]→[6.3520:3660](∅→∅),[6.3660]→[6.12855:12940](∅→∅),[6.12855]→[6.12855:12940](∅→∅),[6.12940]→[6.3661:3740](∅→∅),[6.3740]→[6.12996:13050](∅→∅),[6.12996]→[6.12996:13050](∅→∅)
/* Mark all builds that depend on this derivation as failed. */for (auto & build2 : indirect) {if (build2->finishedInDB) continue;printMsg(lvlError, format("marking build %1% as failed") % build2->id);txn.parameterized("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5, notificationPendingSince = $4 where id = $1 and finished = 0")(build2->id)((int) (build2->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus()))(result.startTime)(result.stopTime)(result.stepStatus == bsCachedFailure ? 1 : 0).exec();nrBuildsDone++;}if (indirect.empty() && stepFinished) break;/* Update the database. */{auto mc = startDbUpdate(); - replacement in src/hydra-queue-runner/builder.cc at line 436[6.13051]→[6.13051:13165](∅→∅),[6.13165]→[6.3741:3818](∅→∅),[6.3818]→[6.5703:5891](∅→∅),[6.3818]→[6.5196:5384](∅→∅)
/* Remember failed paths in the database so that theywon't be built again. */if (result.stepStatus != bsCachedFailure && result.canCache)for (auto & path : step->drv->outputPaths())txn.parameterized("insert into FailedPaths values ($1)")(localStore->printStorePath(path)).exec();for (auto & path : step->drv->outputPaths())txn.parameterized("insert into FailedPaths values ($1)")(localStore->printStorePath(path)).exec();pqxx::work txn(conn); - replacement in src/hydra-queue-runner/builder.cc at line 438
txn.commit();/* Create failed build steps for every build thatdepends on this, except when this step is cachedand is the top-level of that build (since then it'sredundant with the build's isCachedBuild field). */for (auto & build : indirect) {if ((result.stepStatus == bsCachedFailure && build->drvPath == step->drvPath) ||((result.stepStatus != bsCachedFailure && result.stepStatus != bsUnsupported) && buildId == build->id) ||build->finishedInDB)continue;createBuildStep(txn,0, build->id, step, machine ? machine->sshName : "",result.stepStatus, result.errorMsg, buildId == build->id ? 0 : buildId); - replacement in src/hydra-queue-runner/builder.cc at line 452
stepFinished = true;/* Mark all builds that depend on this derivation as failed. */for (auto & build : indirect) {if (build->finishedInDB) continue;printMsg(lvlError, format("marking build %1% as failed") % build->id);txn.parameterized("update Builds set finished = 1, buildStatus = $2, startTime = $3, stopTime = $4, isCachedBuild = $5, notificationPendingSince = $4 where id = $1 and finished = 0")(build->id)((int) (build->drvPath != step->drvPath && result.buildStatus() == bsFailed ? bsDepFailed : result.buildStatus()))(result.startTime)(result.stopTime)(result.stepStatus == bsCachedFailure ? 1 : 0).exec();nrBuildsDone++;} - replacement in src/hydra-queue-runner/builder.cc at line 466
/* Remove the indirect dependencies from ‘builds’. Thiswill cause them to be destroyed. */for (auto & b : indirect) {auto builds_(builds.lock());b->finishedInDB = true;builds_->erase(b->id);dependentIDs.push_back(b->id);if (buildOne == b->id) quit = true;}}/* Remember failed paths in the database so that theywon't be built again. */if (result.stepStatus != bsCachedFailure && result.canCache)for (auto & path : step->drv->outputPaths())txn.parameterized("insert into FailedPaths values ($1)")(localStore->printStorePath(path)).exec(); - edit in src/hydra-queue-runner/builder.cc at line 472
/* Send notification about this build and its dependents. */{pqxx::work txn(*conn);notifyBuildFinished(txn, buildId, dependentIDs); - edit in src/hydra-queue-runner/builder.cc at line 474
} - replacement in src/hydra-queue-runner/builder.cc at line 475
// FIXME: keep stats about aborted steps?nrStepsDone++;totalStepTime += stepStopTime - stepStartTime;totalStepBuildTime += result.stopTime - result.startTime;machine->state->nrStepsDone++;machine->state->totalStepTime += stepStopTime - stepStartTime;machine->state->totalStepBuildTime += result.stopTime - result.startTime;stepFinished = true; - replacement in src/hydra-queue-runner/builder.cc at line 477
if (quit) exit(0); // testing hack; FIXME: this won't run plugins/* Remove the indirect dependencies from ‘builds’. Thiswill cause them to be destroyed. */for (auto & b : indirect) {auto builds_(builds.lock());b->finishedInDB = true;builds_->erase(b->id);dependentIDs.push_back(b->id);if (buildOne == b->id) quit = true;}} - replacement in src/hydra-queue-runner/builder.cc at line 488
return sDone;/* Send notification about this build and its dependents. */{pqxx::work txn(conn);notifyBuildFinished(txn, buildId, dependentIDs);txn.commit();} - edit in src/hydra-queue-runner/builder.cc at line 498
void State::addRoot(const StorePath & storePath) - resolve order conflict in src/hydra-queue-runner/builder.cc at line 498
- edit in src/hydra-queue-runner/builder.cc at line 500
auto root = rootsDir + "/" + std::string(storePath.to_string()); - resolve order conflict in src/hydra-queue-runner/builder.cc at line 500
- edit in src/hydra-queue-runner/dispatcher.cc at line 13
printMsg(lvlChatty, "step ‘%s’ is now runnable", localStore->printStorePath(step->drvPath)); - edit in src/hydra-queue-runner/dispatcher.cc at line 251
mi.machine->sshName, localStore->printStorePath(step->drvPath), step->drv->platform); - edit in src/hydra-queue-runner/dispatcher.cc at line 302
abortUnsupported(); - edit in src/hydra-queue-runner/dispatcher.cc at line 317
void State::abortUnsupported(){/* Make a copy of 'runnable' and 'machines' so we don't block themvery long. */auto runnable2 = *runnable.lock();auto machines2 = *machines.lock();system_time now = std::chrono::system_clock::now();auto now2 = time(0); - edit in src/hydra-queue-runner/dispatcher.cc at line 329
std::unordered_set<Step::ptr> aborted; - edit in src/hydra-queue-runner/dispatcher.cc at line 331
for (auto & wstep : runnable2) {auto step(wstep.lock());if (!step) continue;bool supported = false;for (auto & machine : machines2) {if (machine.second->supportsStep(step)) {step->state.lock()->lastSupported = now;supported = true;break;}}if (!supported&& std::chrono::duration_cast<std::chrono::seconds>(now - step->state.lock()->lastSupported).count() >= maxUnsupportedTime){printError("aborting unsupported build step '%s' (type '%s')",localStore->printStorePath(step->drvPath),step->systemType);aborted.insert(step);auto conn(dbPool.get());std::set<Build::ptr> dependents;std::set<Step::ptr> steps;getDependents(step, dependents, steps);/* Maybe the step got cancelled. */if (dependents.empty()) continue;/* Find the build that has this step as the top-level (ifany). */Build::ptr build;for (auto build2 : dependents) {if (build2->drvPath == step->drvPath)build = build2;}if (!build) build = *dependents.begin();bool stepFinished = false;bool quit = false;failStep(*conn, step, build->id,RemoteResult {.stepStatus = bsUnsupported,.errorMsg = fmt("unsupported system type '%s'",step->systemType),.startTime = now2,.stopTime = now2,},nullptr, stepFinished, quit);if (quit) exit(1);}}/* Clean up 'runnable'. */{auto runnable_(runnable.lock());for (auto i = runnable_->begin(); i != runnable_->end(); ) {if (aborted.count(i->lock()))i = runnable_->erase(i);else++i;}}} - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 41
auto value = getEnv(key); - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 43
return *value; - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 49
, maxUnsupportedTime(config->getIntOption("max_unsupported_time", 0)) - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 165
getEnv("NIX_REMOTE_SYSTEMS").value_or(pathExists(defaultMachinesFile) ? defaultMachinesFile : ""), ":"); - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 165
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 257
(localStore->printStorePath(step->drvPath)) - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 257
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 260
(step->drv->platform) - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 260
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 268
for (auto & output : step->drv->outputs) - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 272
(buildId)(stepNr)(output.first)(localStore->printStorePath(output.second.path)).exec(); - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 272
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 315
Build::ptr build, const StorePath & drvPath, const string & outputName, const StorePath & storePath) - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 315
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 325
(localStore->printStorePath(drvPath)) - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 325
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 334
(build->id)(stepNr)(outputName)(localStore->printStorePath(storePath)).exec(); - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 459
for (auto & path : step->drv->outputPaths())if (!txn.parameterized("select 1 from FailedPaths where path = $1")(localStore->printStorePath(path)).exec().empty()) - resolve order conflict in src/hydra-queue-runner/hydra-queue-runner.cc at line 459
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 770
localStore = openStore(getEnv("NIX_REMOTE").value_or(""), localParams); - replacement in src/hydra-queue-runner/state.hh at line 71
BuildStatus buildStatus()BuildStatus buildStatus() const - edit in src/hydra-queue-runner/state.hh at line 126
nix::StorePath drvPath;std::map<std::string, nix::StorePath> outputs; - edit in src/hydra-queue-runner/state.hh at line 138
Build(nix::StorePath && drvPath) : drvPath(std::move(drvPath)){ } - edit in src/hydra-queue-runner/state.hh at line 156
nix::StorePath drvPath;std::unique_ptr<nix::Derivation> drv; - edit in src/hydra-queue-runner/state.hh at line 201[70.1366][6.3082]
/* The time that we last saw a machine that supports thisstep. */system_time lastSupported = std::chrono::system_clock::now(); - edit in src/hydra-queue-runner/state.hh at line 210
Step(nix::StorePath && drvPath) : drvPath(std::move(drvPath)){ } - resolve order conflict in src/hydra-queue-runner/state.hh at line 211
- edit in src/hydra-queue-runner/state.hh at line 265
if (!systemTypes.count(step->drv->platform == "builtin" ? nix::settings.thisSystem : step->drv->platform)) - edit in src/hydra-queue-runner/state.hh at line 310
/* Time in seconds before unsupported build steps are aborted. */const unsigned int maxUnsupportedTime = 0; - edit in src/hydra-queue-runner/state.hh at line 329
typedef std::map<nix::StorePath, Step::wptr> Steps; - edit in src/hydra-queue-runner/state.hh at line 470
Build::ptr build, const nix::StorePath & drvPath, const std::string & outputName, const nix::StorePath & storePath); - edit in src/hydra-queue-runner/state.hh at line 489
Connection & conn, Build::ptr build, const nix::StorePath & drvPath,Build::ptr referringBuild, Step::ptr referringStep, std::set<nix::StorePath> & finishedDrvs, - edit in src/hydra-queue-runner/state.hh at line 493
void failStep(Connection & conn,Step::ptr step,BuildID buildId,const RemoteResult & result,Machine::ptr machine,bool & stepFinished,bool & quit); - edit in src/hydra-queue-runner/state.hh at line 515
void abortUnsupported(); - edit in src/hydra-queue-runner/state.hh at line 551
void addRoot(const nix::StorePath & storePath); - resolve order conflict in src/hydra-queue-runner/state.hh at line 551