Temporarily disable machines on any exception, not just connection failures

[?]
Mar 22, 2016, 3:54 PM
BYVRA54QBKHLFOPIRBJKZZI7JYBYHSOK7MIA3TUZTALZQJGG3G7QC

Dependencies

  • [2] 4VYY2ADP Allow the machines file to specify host public keys
  • [3] DIEY5USN Keep better bytesReceived/bytesSent stats
  • [4] BG6PEOB2 Make the output size limit configurable
  • [5] HH3LID6L Re-implement log size limits
  • [6] FITVNQ2S Keep track of the time we spend copying to/from build machines
  • [7] TDSBTZKX Add log message
  • [8] 5AIYUMTB Basic remote building
  • [9] HHOMBU7G hydra-queue-runner: Implement timeouts
  • [10] YR2IM6Y5 Temporarily disable machines after a connection failure
  • [11] A2GL5FOZ Moar stats
  • [12] NAYQT2GT hydra-queue-runner: Use cmdBuildDerivation
  • [13] UVNTWTWG Prevent download of NARs we just uploaded
  • [14] 6EO3HVNA Merge remote-tracking branch 'origin/master' into binary-cache
  • [15] 73YR46NJ hydra-queue-runner: Write directly to a binary cache
  • [16] MB3TISH2 Rate-limit the number of threads copying closures at the same time
  • [17] GS4BE6TB Asynchronously compress build logs
  • [18] CNLNT3T4 Allow only 1 thread to send a closure to a given machine at the same time
  • [19] OCZ4LSGG Automatically retry aborted builds
  • [20] VZKB5CIE Workaround for RemoteStore not supporting cmdBuildDerivation yet
  • [21] AUMIJSEO Fix remote building on Nix 1.10
  • [22] 5LBMP7GA Fix remote building
  • [23] AF74AH2S Remove localhost hack
  • [24] N4IROACV Move buildRemote() into State
  • [25] DWFTK56E Keep track of how many threads are waiting
  • [26] 7LB6QBXY Keep track of the number of build steps that are being built
  • [27] RSISSEU6 Enable substitution on the build machines
  • [28] SL3WSRAC hydra-queue-runner: Limit memory usage
  • [29] LE4VZIY5 More stats

Change contents

  • replacement in src/hydra-queue-runner/build-remote.cc at line 133
    [2.795][5.3247:3264](),[5.3247][5.3247:3264](),[5.3264][2.796:847]()
    Child child;
    openConnection(machine, tmpDir, logFD, child);
    [2.795]
    [5.3315]
    try {
  • replacement in src/hydra-queue-runner/build-remote.cc at line 135
    [5.3316][5.3316:3335]()
    logFD.close();
    [5.3316]
    [5.3335]
    Child child;
    openConnection(machine, tmpDir, logFD, child);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 138
    [5.3336][5.3336:3392]()
    FdSource from(child.from);
    FdSink to(child.to);
    [5.3336]
    [3.23]
    logFD.close();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 140
    [3.24][3.24:133]()
    Finally updateStats([&]() {
    bytesReceived += from.read;
    bytesSent += to.written;
    });
    [3.24]
    [5.3392]
    FdSource from(child.from);
    FdSink to(child.to);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 143
    [5.3393][5.3393:3414](),[5.3414][5.133:165](),[5.165][5.0:32]()
    /* Handshake. */
    bool sendDerivation = true;
    unsigned int remoteVersion;
    [5.3393]
    [5.32]
    Finally updateStats([&]() {
    bytesReceived += from.read;
    bytesSent += to.written;
    });
  • replacement in src/hydra-queue-runner/build-remote.cc at line 148
    [5.33][5.79:89](),[5.165][5.79:89](),[5.3414][5.79:89](),[5.89][5.34:72](),[5.72][5.172:192](),[5.221][5.172:192](),[5.172][5.172:192]()
    try {
    to << SERVE_MAGIC_1 << 0x202;
    to.flush();
    [5.33]
    [5.3505]
    /* Handshake. */
    bool sendDerivation = true;
    unsigned int remoteVersion;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 152
    [5.3506][5.193:273](),[5.273][5.418:533](),[5.533][5.73:168](),[5.168][5.534:655](),[5.475][5.534:655](),[5.655][5.169:221](),[5.46][5.268:304](),[5.90][5.268:304](),[5.221][5.268:304](),[5.268][5.268:304]()
    unsigned int magic = readInt(from);
    if (magic != SERVE_MAGIC_2)
    throw Error(format("protocol mismatch with ‘nix-store --serve’ on ‘%1%’") % machine->sshName);
    remoteVersion = readInt(from);
    if (GET_PROTOCOL_MAJOR(remoteVersion) != 0x200)
    throw Error(format("unsupported ‘nix-store --serve’ protocol version on ‘%1%’") % machine->sshName);
    if (GET_PROTOCOL_MINOR(remoteVersion) >= 1)
    sendDerivation = false;
    [5.3506]
    [5.91]
    try {
    to << SERVE_MAGIC_1 << 0x202;
    to.flush();
    unsigned int magic = readInt(from);
    if (magic != SERVE_MAGIC_2)
    throw Error(format("protocol mismatch with ‘nix-store --serve’ on ‘%1%’") % machine->sshName);
    remoteVersion = readInt(from);
    if (GET_PROTOCOL_MAJOR(remoteVersion) != 0x200)
    throw Error(format("unsupported ‘nix-store --serve’ protocol version on ‘%1%’") % machine->sshName);
    if (GET_PROTOCOL_MINOR(remoteVersion) >= 1)
    sendDerivation = false;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 165
    [5.92][5.587:647](),[5.304][5.587:647](),[5.655][5.587:647](),[5.587][5.587:647]()
    } catch (EndOfFile & e) {
    child.pid.wait(true);
    [5.92]
    [5.0]
    } catch (EndOfFile & e) {
    child.pid.wait(true);
    string s = chomp(readFile(result.logFile));
    throw Error(format("cannot connect to ‘%1%’: %2%") % machine->sshName % s);
    }
  • edit in src/hydra-queue-runner/build-remote.cc at line 172
    [5.11][5.11:323]()
    /* Disable this machine until a certain period of time has
    passed. This period increases on every consecutive
    failure. However, don't count failures that occurred
    soon after the last one (to take into account steps
    started in parallel). */
  • replacement in src/hydra-queue-runner/build-remote.cc at line 173
    [5.382][5.382:998]()
    auto now = std::chrono::system_clock::now();
    if (info->consecutiveFailures == 0 || info->lastFailure < now - std::chrono::seconds(30)) {
    info->consecutiveFailures = std::min(info->consecutiveFailures + 1, (unsigned int) 4);
    info->lastFailure = now;
    int delta = retryInterval * powf(retryBackoff, info->consecutiveFailures - 1) + (rand() % 30);
    printMsg(lvlInfo, format("will disable machine ‘%1%’ for %2%s") % machine->sshName % delta);
    info->disabledUntil = now + std::chrono::seconds(delta);
    }
    [5.382]
    [5.998]
    info->consecutiveFailures = 0;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 176
    [5.1009][5.354:406](),[5.647][5.354:406](),[5.406][5.656:744](),[5.485][5.0:7](),[5.744][5.0:7](),[5.749][5.0:7](),[5.7][5.1010:1117](),[5.1117][5.305:376](),[5.7][5.305:376](),[5.376][5.0:73](),[5.73][5.446:650](),[5.446][5.446:650](),[5.650][5.74:115]()
    string s = chomp(readFile(result.logFile));
    throw Error(format("cannot connect to ‘%1%’: %2%") % machine->sshName % s);
    }
    {
    auto info(machine->state->connectInfo.lock());
    info->consecutiveFailures = 0;
    }
    /* Gather the inputs. If the remote side is Nix <= 1.9, we have to
    copy the entire closure of ‘drvPath’, as well as the required
    outputs of the input derivations. On Nix > 1.9, we only need to
    copy the immediate sources of the derivation and the required
    outputs of the input derivations. */
    PathSet inputs;
    BasicDerivation basicDrv(step->drv);
    [5.1009]
    [5.650]
    /* Gather the inputs. If the remote side is Nix <= 1.9, we have to
    copy the entire closure of ‘drvPath’, as well as the required
    outputs of the input derivations. On Nix > 1.9, we only need to
    copy the immediate sources of the derivation and the required
    outputs of the input derivations. */
    PathSet inputs;
    BasicDerivation basicDrv(step->drv);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 184
    [5.651][5.651:797]()
    if (sendDerivation)
    inputs.insert(step->drvPath);
    else
    for (auto & p : step->drv.inputSrcs)
    inputs.insert(p);
    [5.651]
    [5.797]
    if (sendDerivation)
    inputs.insert(step->drvPath);
    else
    for (auto & p : step->drv.inputSrcs)
    inputs.insert(p);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 190
    [5.798][5.782:829](),[5.782][5.782:829](),[5.829][5.108:252](),[5.108][5.108:252](),[5.252][5.799:893](),[5.893][5.116:171]()
    for (auto & input : step->drv.inputDrvs) {
    Derivation drv2 = readDerivation(input.first);
    for (auto & name : input.second) {
    auto i = drv2.outputs.find(name);
    if (i == drv2.outputs.end()) continue;
    inputs.insert(i->second.path);
    basicDrv.inputSrcs.insert(i->second.path);
    [5.798]
    [5.324]
    for (auto & input : step->drv.inputDrvs) {
    Derivation drv2 = readDerivation(input.first);
    for (auto & name : input.second) {
    auto i = drv2.outputs.find(name);
    if (i == drv2.outputs.end()) continue;
    inputs.insert(i->second.path);
    basicDrv.inputSrcs.insert(i->second.path);
    }
  • edit in src/hydra-queue-runner/build-remote.cc at line 199
    [5.334][5.749:755](),[5.749][5.749:755]()
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 200
    [5.592][5.592:861]()
    /* Ensure that the inputs exist in the destination store. This is
    a no-op for regular stores, but for the binary cache store,
    this will copy the inputs to the binary cache from the local
    store. */
    destStore->buildPaths(basicDrv.inputSrcs);
    [5.592]
    [5.3876]
    /* Ensure that the inputs exist in the destination store. This is
    a no-op for regular stores, but for the binary cache store,
    this will copy the inputs to the binary cache from the local
    store. */
    destStore->buildPaths(basicDrv.inputSrcs);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 206
    [5.3877][5.3877:3911](),[5.3911][5.862:916](),[5.916][5.0:68](),[5.873][5.0:68](),[5.68][5.26:98](),[5.205][5.26:98](),[5.98][5.69:251]()
    /* Copy the input closure. */
    if (/* machine->sshName != "localhost" */ true) {
    auto mc1 = std::make_shared<MaintainCount>(nrStepsWaiting);
    std::lock_guard<std::mutex> sendLock(machine->state->sendLock);
    mc1.reset();
    MaintainCount mc2(nrStepsCopyingTo);
    printMsg(lvlDebug, format("sending closure of ‘%1%’ to ‘%2%’") % step->drvPath % machine->sshName);
    [5.3877]
    [5.0]
    /* Copy the input closure. */
    if (/* machine->sshName != "localhost" */ true) {
    auto mc1 = std::make_shared<MaintainCount>(nrStepsWaiting);
    std::lock_guard<std::mutex> sendLock(machine->state->sendLock);
    mc1.reset();
    MaintainCount mc2(nrStepsCopyingTo);
    printMsg(lvlDebug, format("sending closure of ‘%1%’ to ‘%2%’") % step->drvPath % machine->sshName);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 214
    [5.1][5.1:55]()
    auto now1 = std::chrono::steady_clock::now();
    [5.1]
    [5.55]
    auto now1 = std::chrono::steady_clock::now();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 216
    [5.56][3.134:192]()
    copyClosureTo(destStore, from, to, inputs, true);
    [5.56]
    [5.57]
    copyClosureTo(destStore, from, to, inputs, true);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 218
    [5.58][5.58:112]()
    auto now2 = std::chrono::steady_clock::now();
    [5.58]
    [5.112]
    auto now2 = std::chrono::steady_clock::now();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 220
    [5.113][5.113:216](),[5.157][5.277:283](),[5.216][5.277:283](),[5.678][5.277:283](),[5.980][5.277:283](),[5.277][5.277:283]()
    result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
    }
    [5.113]
    [5.4064]
    result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 223
    [5.4065][5.486:511]()
    autoDelete.cancel();
    [5.4065]
    [5.511]
    autoDelete.cancel();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 225
    [5.512][5.4065:4089](),[5.4065][5.4065:4089](),[5.4089][5.990:1092]()
    /* Do the build. */
    printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName);
    [5.512]
    [5.894]
    /* Do the build. */
    printMsg(lvlDebug, format("building ‘%1%’ on ‘%2%’") % step->drvPath % machine->sshName);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 228
    [5.895][5.895:919](),[5.919][5.222:279](),[5.279][5.1009:1018](),[5.1009][5.1009:1018](),[5.1018][5.280:481](),[5.209][5.4352:4368](),[5.481][5.4352:4368](),[5.1150][5.4352:4368](),[5.4352][5.4352:4368]()
    if (sendDerivation)
    to << cmdBuildPaths << PathSet({step->drvPath});
    else
    to << cmdBuildDerivation << step->drvPath << basicDrv;
    to << maxSilentTime << buildTimeout;
    if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
    to << 64 * 1024 * 1024; // == maxLogSize
    to.flush();
    [5.895]
    [5.1151]
    if (sendDerivation)
    to << cmdBuildPaths << PathSet({step->drvPath});
    else
    to << cmdBuildDerivation << step->drvPath << basicDrv;
    to << maxSilentTime << buildTimeout;
    if (GET_PROTOCOL_MINOR(remoteVersion) >= 2)
    to << 64 * 1024 * 1024; // == maxLogSize
    to.flush();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 237
    [5.1152][5.4368:4400](),[5.4368][5.4368:4400](),[5.4400][5.55:152](),[5.152][5.4429:4460](),[5.4429][5.4429:4460]()
    result.startTime = time(0);
    int res;
    {
    MaintainCount mc(nrStepsBuilding);
    res = readInt(from);
    }
    result.stopTime = time(0);
    [5.1152]
    [5.1153]
    result.startTime = time(0);
    int res;
    {
    MaintainCount mc(nrStepsBuilding);
    res = readInt(from);
    }
    result.stopTime = time(0);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 245
    [5.1154][5.1154:1303](),[5.1303][4.0:116]()
    if (sendDerivation) {
    if (res) {
    result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
    if (res == 100) {
    result.stepStatus = bsFailed;
    result.canCache = true;
    [5.1154]
    [4.116]
    if (sendDerivation) {
    if (res) {
    result.errorMsg = (format("%1% on ‘%2%’") % readString(from) % machine->sshName).str();
    if (res == 100) {
    result.stepStatus = bsFailed;
    result.canCache = true;
    }
    else if (res == 101) {
    result.stepStatus = bsTimedOut;
    }
    else {
    result.stepStatus = bsAborted;
    result.canRetry = true;
    }
    return;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 261
    [4.130][4.130:333]()
    else if (res == 101) {
    result.stepStatus = bsTimedOut;
    }
    else {
    result.stepStatus = bsAborted;
    result.canRetry = true;
    [4.130]
    [4.333]
    result.stepStatus = bsSuccess;
    } else {
    result.errorMsg = readString(from);
    switch ((BuildResult::Status) res) {
    case BuildResult::Built:
    result.stepStatus = bsSuccess;
    break;
    case BuildResult::Substituted:
    case BuildResult::AlreadyValid:
    result.stepStatus = bsSuccess;
    result.isCached = true;
    break;
    case BuildResult::PermanentFailure:
    result.stepStatus = bsFailed;
    result.canCache = true;
    result.errorMsg = "";
    break;
    case BuildResult::InputRejected:
    case BuildResult::OutputRejected:
    result.stepStatus = bsFailed;
    result.canCache = true;
    break;
    case BuildResult::TransientFailure:
    result.stepStatus = bsFailed;
    result.canRetry = true;
    result.errorMsg = "";
    break;
    case BuildResult::CachedFailure: // cached on the build machine
    result.stepStatus = bsCachedFailure;
    result.canCache = true;
    result.errorMsg = "";
    break;
    case BuildResult::TimedOut:
    result.stepStatus = bsTimedOut;
    result.errorMsg = "";
    break;
    case BuildResult::MiscFailure:
    result.stepStatus = bsAborted;
    result.canRetry = true;
    break;
    case BuildResult::LogLimitExceeded:
    result.stepStatus = bsLogLimitExceeded;
    break;
    default:
    result.stepStatus = bsAborted;
    break;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 308
    [4.347][5.1509:1529](),[5.1509][5.1509:1529]()
    return;
    [4.347]
    [5.1529]
    if (result.stepStatus != bsSuccess) return;
  • replacement in src/hydra-queue-runner/build-remote.cc at line 310
    [5.1539][4.348:387](),[4.387][5.1583:1596](),[5.1583][5.1583:1596](),[5.1647][5.1647:1691](),[5.1691][4.388:2077]()
    result.stepStatus = bsSuccess;
    } else {
    result.errorMsg = readString(from);
    switch ((BuildResult::Status) res) {
    case BuildResult::Built:
    result.stepStatus = bsSuccess;
    break;
    case BuildResult::Substituted:
    case BuildResult::AlreadyValid:
    result.stepStatus = bsSuccess;
    result.isCached = true;
    break;
    case BuildResult::PermanentFailure:
    result.stepStatus = bsFailed;
    result.canCache = true;
    result.errorMsg = "";
    break;
    case BuildResult::InputRejected:
    case BuildResult::OutputRejected:
    result.stepStatus = bsFailed;
    result.canCache = true;
    break;
    case BuildResult::TransientFailure:
    result.stepStatus = bsFailed;
    result.canRetry = true;
    result.errorMsg = "";
    break;
    case BuildResult::CachedFailure: // cached on the build machine
    result.stepStatus = bsCachedFailure;
    result.canCache = true;
    result.errorMsg = "";
    break;
    case BuildResult::TimedOut:
    result.stepStatus = bsTimedOut;
    result.errorMsg = "";
    break;
    case BuildResult::MiscFailure:
    result.stepStatus = bsAborted;
    result.canRetry = true;
    break;
    case BuildResult::LogLimitExceeded:
    result.stepStatus = bsLogLimitExceeded;
    break;
    default:
    result.stepStatus = bsAborted;
    break;
    [5.1539]
    [4.2077]
    result.errorMsg = "";
    /* If the path was substituted or already valid, then we didn't
    get a build log. */
    if (result.isCached) {
    printMsg(lvlInfo, format("outputs of ‘%1%’ substituted or already valid on ‘%2%’") % step->drvPath % machine->sshName);
    unlink(result.logFile.c_str());
    result.logFile = "";
  • edit in src/hydra-queue-runner/build-remote.cc at line 320
    [4.2087][4.2087:2139](),[5.1730][5.4785:4791](),[4.2139][5.4785:4791](),[5.4785][5.4785:4791]()
    if (result.stepStatus != bsSuccess) return;
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 321
    [4.2141][4.2141:2167]()
    result.errorMsg = "";
    [4.2141]
    [5.4791]
    /* Copy the output paths. */
    if (/* machine->sshName != "localhost" */ true) {
    MaintainCount mc(nrStepsCopyingFrom);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 325
    [5.4792][5.1731:1826](),[5.1826][4.2168:2195](),[4.2195][5.0:136](),[5.1925][5.0:136](),[5.136][5.1925:2000](),[5.1925][5.1925:2000]()
    /* If the path was substituted or already valid, then we didn't
    get a build log. */
    if (result.isCached) {
    printMsg(lvlInfo, format("outputs of ‘%1%’ substituted or already valid on ‘%2%’") % step->drvPath % machine->sshName);
    unlink(result.logFile.c_str());
    result.logFile = "";
    }
    [5.4792]
    [5.2000]
    auto now1 = std::chrono::steady_clock::now();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 327
    [5.2001][5.4792:4825](),[5.4792][5.4792:4825](),[5.4825][5.981:1035](),[5.1035][5.0:46]()
    /* Copy the output paths. */
    if (/* machine->sshName != "localhost" */ true) {
    MaintainCount mc(nrStepsCopyingFrom);
    [5.2001]
    [5.46]
    PathSet outputs;
    for (auto & output : step->drv.outputs)
    outputs.insert(output.second.path);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 331
    [5.47][5.47:101]()
    auto now1 = std::chrono::steady_clock::now();
    [5.47]
    [5.101]
    /* Query the size of the output paths. */
    size_t totalNarSize = 0;
    to << cmdQueryPathInfos << outputs;
    to.flush();
    while (true) {
    if (readString(from) == "") break;
    readString(from); // deriver
    readStrings<PathSet>(from); // references
    readLongLong(from); // download size
    totalNarSize += readLongLong(from);
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 343
    [5.102][5.816:841](),[5.1404][5.816:841](),[5.816][5.816:841](),[5.841][5.1405:1453](),[5.1453][5.883:931](),[5.883][5.883:931]()
    PathSet outputs;
    for (auto & output : step->drv.outputs)
    outputs.insert(output.second.path);
    [5.102]
    [5.103]
    if (totalNarSize > maxOutputSize) {
    result.stepStatus = bsNarSizeLimitExceeded;
    return;
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 348
    [5.104][5.104:523]()
    /* Query the size of the output paths. */
    size_t totalNarSize = 0;
    to << cmdQueryPathInfos << outputs;
    to.flush();
    while (true) {
    if (readString(from) == "") break;
    readString(from); // deriver
    readStrings<PathSet>(from); // references
    readLongLong(from); // download size
    totalNarSize += readLongLong(from);
    }
    [5.104]
    [5.523]
    printMsg(lvlDebug, format("copying outputs of ‘%s’ from ‘%s’ (%d bytes)")
    % step->drvPath % machine->sshName % totalNarSize);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 351
    [5.524][4.2196:2326]()
    if (totalNarSize > maxOutputSize) {
    result.stepStatus = bsNarSizeLimitExceeded;
    return;
    }
    [5.524]
    [4.2326]
    /* Block until we have the required amount of memory
    available. FIXME: only need this for binary cache
    destination stores. */
    auto resStart = std::chrono::steady_clock::now();
    auto memoryReservation(memoryTokens.get(totalNarSize));
    auto resStop = std::chrono::steady_clock::now();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 358
    [4.2327][5.524:678](),[5.524][5.524:678]()
    printMsg(lvlDebug, format("copying outputs of ‘%s’ from ‘%s’ (%d bytes)")
    % step->drvPath % machine->sshName % totalNarSize);
    [4.2327]
    [5.678]
    auto resMs = std::chrono::duration_cast<std::chrono::milliseconds>(resStop - resStart).count();
    if (resMs >= 1000)
    printMsg(lvlError, format("warning: had to wait %d ms for %d memory tokens for %s")
    % resMs % totalNarSize % step->drvPath);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 363
    [5.679][5.679:1014]()
    /* Block until we have the required amount of memory
    available. FIXME: only need this for binary cache
    destination stores. */
    auto resStart = std::chrono::steady_clock::now();
    auto memoryReservation(memoryTokens.get(totalNarSize));
    auto resStop = std::chrono::steady_clock::now();
    [5.679]
    [5.217]
    result.accessor = destStore->getFSAccessor();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 365
    [5.218][5.1015:1299]()
    auto resMs = std::chrono::duration_cast<std::chrono::milliseconds>(resStop - resStart).count();
    if (resMs >= 1000)
    printMsg(lvlError, format("warning: had to wait %d ms for %d memory tokens for %s")
    % resMs % totalNarSize % step->drvPath);
    [5.218]
    [5.231]
    to << cmdExportPaths << 0 << outputs;
    to.flush();
    destStore->importPaths(false, from, result.accessor);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 369
    [5.232][5.1300:1354]()
    result.accessor = destStore->getFSAccessor();
    [5.232]
    [5.272]
    auto now2 = std::chrono::steady_clock::now();
  • replacement in src/hydra-queue-runner/build-remote.cc at line 371
    [5.273][3.193:321]()
    to << cmdExportPaths << 0 << outputs;
    to.flush();
    destStore->importPaths(false, from, result.accessor);
    [5.273]
    [5.274]
    result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
    }
  • replacement in src/hydra-queue-runner/build-remote.cc at line 374
    [5.275][5.275:329]()
    auto now2 = std::chrono::steady_clock::now();
    [5.275]
    [5.329]
    /* Shut down the connection. */
    child.to.close();
    child.pid.wait(true);
  • replacement in src/hydra-queue-runner/build-remote.cc at line 378
    [5.330][5.330:433]()
    result.overhead += std::chrono::duration_cast<std::chrono::milliseconds>(now2 - now1).count();
    [5.330]
    [5.387]
    } catch (Error & e) {
    /* Disable this machine until a certain period of time has
    passed. This period increases on every consecutive
    failure. However, don't count failures that occurred soon
    after the last one (to take into account steps started in
    parallel). */
    auto info(machine->state->connectInfo.lock());
    auto now = std::chrono::system_clock::now();
    if (info->consecutiveFailures == 0 || info->lastFailure < now - std::chrono::seconds(30)) {
    info->consecutiveFailures = std::min(info->consecutiveFailures + 1, (unsigned int) 4);
    info->lastFailure = now;
    int delta = retryInterval * powf(retryBackoff, info->consecutiveFailures - 1) + (rand() % 30);
    printMsg(lvlInfo, format("will disable machine ‘%1%’ for %2%s") % machine->sshName % delta);
    info->disabledUntil = now + std::chrono::seconds(delta);
    }
    throw;
  • edit in src/hydra-queue-runner/build-remote.cc at line 395
    [5.393][5.5074:5159](),[5.5074][5.5074:5159]()
    /* Shut down the connection. */
    child.to.close();
    child.pid.wait(true);