hydra-queue-runner: Improve dispatcher
[?]
Jun 17, 2015, 11:52 PM
NNOCZ4ROWC64ZKSAE2MPHZ3LGLI34C5TJJFW4MHXN6OELK6VMWOQCDependencies
- [2]
OCZ4LSGGAutomatically retry aborted builds - [3]
5AIYUMTBBasic remote building - [4]
ENXUSMSVMake concurrency more robust - [5]
N5O7VEEOImmediately abort builds that require an unsupported system type - [6]
HLSHCK3CSupport requiredSystemFeatures - [7]
NJJ7H64SVery basic multi-threaded queue runner - [8]
GKZN4UV7Make the queue monitor more robust, and better debug output - [9]
24BMQDZAStart of single-process hydra-queue-runner - [10]
T2EIYJNGOn SIGINT, shut down the builder threads - [11]
WHULPA6SHandle failure with output - [12]
RQUAATWBAdd status dump facility
Change contents
- edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 9
#include <algorithm> - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 163
Sync<unsigned int> currentJobs;Machine(){auto currentJobs_(currentJobs.lock());*currentJobs_ = 0;}std::atomic<unsigned int> currentJobs{0}; - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 185
auto currentJobs_(machine->currentJobs.lock());(*currentJobs_)++;machine->currentJobs++; - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 189
auto currentJobs_(machine->currentJobs.lock());if (*currentJobs_ > 0) (*currentJobs_)--;machine->currentJobs--; - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 280
MachineReservation::ptr findMachine(Step::ptr step); - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 872[2.1915]→[3.2622:2632](∅→∅),[3.2622]→[3.2622:2632](∅→∅),[3.2632]→[3.6794:6839](∅→∅),[3.6839]→[3.1651:1734](∅→∅)
{auto runnable_(runnable.lock());printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());bool keepGoing; - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 874
/* FIXME: we're holding the runnable lock too longhere. This could be more efficient. */do {/* Bail out when there are no slots left. */std::vector<Machine::ptr> machinesSorted;{auto machines_(machines.lock());machinesSorted.insert(machinesSorted.end(),machines_->begin(), machines_->end());} - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 883
system_time now = std::chrono::system_clock::now();/* Sort the machines by a combination of speed factor andavailable slots. Prioritise the available machines asfollows: - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 887
for (auto i = runnable_->begin(); i != runnable_->end(); ) {auto step = i->lock();- First by load divided by speed factor, rounded to thenearest integer. This causes fast machines to bepreferred over slow machines with similar loads. - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 891
/* Delete dead steps. */if (!step) {i = runnable_->erase(i);continue;}- Then by speed factor. - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 893
/* Skip previously failed steps that aren't ready tobe retried. */- Finally by load. */sort(machinesSorted.begin(), machinesSorted.end(),[](const Machine::ptr & a, const Machine::ptr & b) -> bool - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 897
auto step_(step->state.lock());if (step_->tries > 0 && step_->after > now) {if (step_->after < sleepUntil)sleepUntil = step_->after;float ta = roundf(a->currentJobs / a->speedFactor);float tb = roundf(b->currentJobs / b->speedFactor);returnta != tb ? ta > tb :a->speedFactor != b->speedFactor ? a->speedFactor > b->speedFactor :a->maxJobs > b->maxJobs;});/* Find a machine with a free slot and find a step to runon it. Once we find such a pair, we restart the outerloop because the machine sorting will have changed. */keepGoing = false;system_time now = std::chrono::system_clock::now();for (auto & machine : machinesSorted) {// FIXME: can we lose a wakeup if a builder exits concurrently?if (machine->currentJobs >= machine->maxJobs) continue;auto runnable_(runnable.lock());printMsg(lvlDebug, format("%1% runnable builds") % runnable_->size());/* FIXME: we're holding the runnable lock too longhere. This could be more efficient. */for (auto i = runnable_->begin(); i != runnable_->end(); ) {auto step = i->lock();/* Delete dead steps. */if (!step) {i = runnable_->erase(i);continue;}/* Can this machine do this step? */if (!machine->supportsStep(step)) { - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 935
}/* Skip previously failed steps that aren't readyto be retried. */{auto step_(step->state.lock());if (step_->tries > 0 && step_->after > now) {if (step_->after < sleepUntil)sleepUntil = step_->after;++i;continue;}}/* Make a slot reservation and start a thread todo the build. */auto reservation = std::make_shared<MachineReservation>(machine);i = runnable_->erase(i);auto builderThread = std::thread(&State::builder, this, step, reservation);builderThread.detach(); // FIXME? - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 956[3.10867]→[3.10867:10957](∅→∅),[3.10957]→[3.1735:1842](∅→∅),[3.1842]→[3.11064:11119](∅→∅),[3.11064]→[3.11064:11119](∅→∅)
auto reservation = findMachine(step);if (!reservation) {printMsg(lvlDebug, format("cannot execute step ‘%1%’ right now") % step->drvPath);++i;continue;keepGoing = true;break; - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 959
i = runnable_->erase(i); - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 960
auto builderThread = std::thread(&State::builder, this, step, reservation);builderThread.detach(); // FIXME?if (keepGoing) break; - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 962
}} while (keepGoing); - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 984[3.11939]→[3.3040:3044](∅→∅),[3.3040]→[3.3040:3044](∅→∅),[3.3044]→[3.11940:12079](∅→∅),[3.12079]→[3.1283:1335](∅→∅),[3.1335]→[3.12182:12391](∅→∅),[3.12182]→[3.12182:12391](∅→∅),[3.12559]→[3.12559:12574](∅→∅)
}MachineReservation::ptr State::findMachine(Step::ptr step){auto machines_(machines.lock());for (auto & machine : *machines_) {if (!machine->supportsStep(step)) continue;{auto currentJobs_(machine->currentJobs.lock());if (*currentJobs_ >= machine->maxJobs) continue;}return std::make_shared<MachineReservation>(machine);}return 0; - edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 1300
auto currentJobs_(m->currentJobs.lock()); - replacement in src/hydra-queue-runner/hydra-queue-runner.cc at line 1301
% m->sshName % *currentJobs_ % m->maxJobs);% m->sshName % m->currentJobs % m->maxJobs);