Step cancellation: Don't use pthread_cancel()
[?]
Nov 7, 2016, 6:34 PM
2DNPZFPNI2OM5FKYTC2KE5NKKKAP45AQ2VDDYLZZHCJ35X3EBJRQCDependencies
- [2]
NKQOEVVPGet rid of "will retry" messages after "maybe cancelling..." - [3]
XCDTFZUYhydra-queue-runner: Fix build - [4]
KPKXKDNGhydra-queue-runner: Fix assertion failure - [5]
MBWLLEYEhydra-queue-runner: Fix message - [6]
TTBLPQAJKeep track of wait time per system type - [7]
WE5Q2NVIAllow build to be bumped to the front of the queue via the web interface - [8]
73YR46NJhydra-queue-runner: Write directly to a binary cache - [9]
EHEQ4AY3Fix retry of transient failures - [10]
N4IROACVMove buildRemote() into State - [11]
MHVIT4JYSplit hydra-queue-runner.cc more - [12]
YR2IM6Y5Temporarily disable machines after a connection failure - [13]
UNVMKJV5Unify build and step status codes - [14]
BG6PEOB2Make the output size limit configurable - [15]
DKJFD6JNProcess Nix API changes - [16]
5AIYUMTBBasic remote building - [17]
6LIYTMFUFix build failure on GCC 5.4 - [18]
LVQXQIYAKill active build steps when builds are cancelled - [19]
HJOEIMLRRefactor - [*]
DIEY5USNKeep better bytesReceived/bytesSent stats
Change contents
- replacement in src/hydra-queue-runner/build-remote.cc at line 120
RemoteResult & result)RemoteResult & result, std::shared_ptr<ActiveStep> activeStep) - edit in src/hydra-queue-runner/build-remote.cc at line 140[3.553][21.23]
{auto activeStepState(activeStep->state_.lock());if (activeStepState->cancelled) throw Error("step cancelled");activeStepState->pid = child.pid;}Finally clearPid([&]() {auto activeStepState(activeStep->state_.lock());activeStepState->pid = -1;/* FIXME: there is a slight race here with stepcancellation in State::processQueueChange(), whichcould call kill() on this pid after we've done waitpid()on it. With pid wrap-around, there is a tinypossibility that we end up killing anotherprocess. Meh. */}); - edit in src/hydra-queue-runner/builder.cc at line 18
activeStep->threadId = pthread_self(); - edit in src/hydra-queue-runner/builder.cc at line 21
activeStep->threadId = -1; - replacement in src/hydra-queue-runner/builder.cc at line 28
res = doBuildStep(destStore, step, reservation->machine);res = doBuildStep(destStore, reservation, activeStep); - replacement in src/hydra-queue-runner/builder.cc at line 57[6.1479]→[2.151:231](∅→∅),[2.231]→[6.1552:1578](∅→∅),[6.694]→[6.1552:1578](∅→∅),[6.1329]→[6.1552:1578](∅→∅),[6.1552]→[6.1552:1578](∅→∅)
State::StepResult State::doBuildStep(nix::ref<Store> destStore, Step::ptr step,Machine::ptr machine)State::StepResult State::doBuildStep(nix::ref<Store> destStore,MachineReservation::ptr reservation,std::shared_ptr<ActiveStep> activeStep) - edit in src/hydra-queue-runner/builder.cc at line 61
auto & step(reservation->step);auto & machine(reservation->machine); - replacement in src/hydra-queue-runner/builder.cc at line 161
buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, result);buildRemote(destStore, machine, step, maxSilentTime, buildTimeout, result, activeStep); - replacement in src/hydra-queue-runner/builder.cc at line 165[6.4504]→[6.2505:2548](∅→∅),[6.2548]→[6.4558:4597](∅→∅),[6.4558]→[6.4558:4597](∅→∅),[6.4597]→[6.0:36](∅→∅),[6.36]→[6.1231:1365](∅→∅)
result.stepStatus = bsAborted;result.errorMsg = e.msg();result.canRetry = true;} catch (__cxxabiv1::__forced_unwind & e) {/* The queue monitor thread cancelled this step. */try {if (activeStep->state_.lock()->cancelled) { - replacement in src/hydra-queue-runner/builder.cc at line 167
pqxx::work txn(*conn);finishBuildStep(txn, result.startTime, time(0), result.overhead, buildId,stepNr, machine->sshName, bsCancelled, "");txn.commit();stepFinished = true;} catch (...) {ignoreException();result.stepStatus = bsCancelled;result.canRetry = false;} else {result.stepStatus = bsAborted;result.errorMsg = e.msg();result.canRetry = true; - edit in src/hydra-queue-runner/builder.cc at line 174
throw; - edit in src/hydra-queue-runner/queue-monitor.cc at line 340
auto threadId = activeStep->threadId; // FIXME: use Sync or atomic?if (threadId == 0) continue; - replacement in src/hydra-queue-runner/queue-monitor.cc at line 345
printInfo("cancelling thread for build step ‘%s’", activeStep->step->drvPath);int err = pthread_cancel(threadId);if (err)printError("error cancelling thread for build step ‘%s’: %s",activeStep->step->drvPath, strerror(err));{auto activeStepState(activeStep->state_.lock());if (activeStepState->cancelled) continue;activeStepState->cancelled = true;if (activeStepState->pid != -1) {printInfo("killing builder process %d of build step ‘%s’",activeStepState->pid, activeStep->step->drvPath);if (kill(activeStepState->pid, SIGINT) == -1)printError("error killing build step ‘%s’: %s",activeStep->step->drvPath, strerror(errno));}} - replacement in src/hydra-queue-runner/state.hh at line 380
pthread_t threadId;struct State{pid_t pid = -1;bool cancelled = false;};nix::Sync<State> state_; - replacement in src/hydra-queue-runner/state.hh at line 486
Step::ptr step, Machine::ptr machine);MachineReservation::ptr reservation,std::shared_ptr<ActiveStep> activeStep); - replacement in src/hydra-queue-runner/state.hh at line 492
RemoteResult & result);RemoteResult & result, std::shared_ptr<ActiveStep> activeStep);