Remove TokenServer in preparation of making NAR copying O(1) memory

[?]

Jul 27, 2020, 12:57 PM

ZMICO7M6COWUZLYQ5MV374BQJLWROSTEORLTOINSU4UGB7TJKJTAC

Dependencies

[2] EYIB6HA6 Supress debug message
[3] YSZQ3ORR Fix build
[4] 2DNPZFPN Step cancellation: Don't use pthread_cancel()
[5] 4YCF3KBG Concurrent hydra-evaluator
[6] MS676RZW Remove s3binarystore (moved to nix in d155d80)
[7] 73YR46NJ hydra-queue-runner: Write directly to a binary cache
[8] DIEY5USN Keep better bytesReceived/bytesSent stats
[9] MB3TISH2 Rate-limit the number of threads copying closures at the same time
[10] BRAESISH Warn if PostgreSQL appears stalled
[11] U55WNIDP Abort unsupported build steps
[12] 5CNQUQ77 Bump Nix
[13] NSBNNM77 Add hydra.conf option "nar_buffer_size" to configure memoryTokens limit
[14] BG6PEOB2 Make the output size limit configurable
[15] NAYQT2GT hydra-queue-runner: Use cmdBuildDerivation
[16] FJ5H26J6 Fix building against nix master
[17] NWFDDRUG hydra-queue-runner: Limit concurrent database connections
[18] MQNJPMCU Fix bad format string
[19] EBJP3MNA Build against nix-master
[20] V6H6BWMK Sync with Nix
[21] BUEWVH2M Remove signing parameter (nix#f435f82)
[22] PLOZBRTR Add command ‘hydra-queue-runner --status’ to show current status
[23] YXYXJDMB hydra-queue-runner: Write GC roots for outputs paths
[24] 3YSJ3LYK Remove finally.hh
[25] IKJBYIGY Update to reflect BinaryCacheStore changes
[26] 7LB6QBXY Keep track of the number of build steps that are being built
[27] GH4S4AWM Rename file
[28] UVNTWTWG Prevent download of NARs we just uploaded
[29] XLYHZUHT Cache .narinfo lookups
[30] EPWEMRI2 Allow determinism checking for entire jobsets
[31] H46LUUZY hydra-queue-runner: don't try to distribute builds on localhost
[32] YNO7CQ6P hydra-queue-runner: More accurate memory accounting
[33] NJXD2ABJ Abort unsupported build steps
[34] MHVIT4JY Split hydra-queue-runner.cc more
[35] 24BMQDZA Start of single-process hydra-queue-runner
[36] FITVNQ2S Keep track of the time we spend copying to/from build machines
[37] YZAI5GQU Implement a database connection pool
[38] FHVJYJFE Upload build logs to the binary cache
[39] N2NKSKHS Refactor local binary cache code into a subclass
[40] LVQXQIYA Kill active build steps when builds are cancelled
[41] GTUZLZRH Add an S3-backed binary cache store
[42] PW4TLMWS Sync with latest Nix
[43] HJOEIMLR Refactor
[44] 5AIYUMTB Basic remote building
[45] BYVRA54Q Temporarily disable machines on any exception, not just connection failures
[46] JPHDKOMJ hydra-queue-runner: Keep some notification statistics
[47] B2L4T3X6 Sync with Nix
[48] LSUX6IQR Update to latest nixUnstable
[49] SL3WSRAC hydra-queue-runner: Limit memory usage
[50] 7KLHBDYA Fix build
[51] G7KWXSFM Distinguish build step states
[52] N4IROACV Move buildRemote() into State
[53] SGFLU2VS USER -> LOGNAME for consistency
[54] IGLIPGT6 Make maxLogSize configurable
[55] ENXUSMSV Make concurrency more robust
[56] WV4SSAIY Build against nix-master
[57] EOO4EFWD Use a single BinaryCacheStore for all threads
[58] BTSZJ7W2 Quick hack to fix compilation
[59] 32KJOERM Turn hydra-notify into a daemon

Change contents

file deletion: token-server.hh (----------)

[4.187]→[4.3316:3355](∅→∅),[4.3355]→[4.1640:1640](∅→∅)

#pragma once

#include <atomic>

#include "sync.hh"
#include "types.hh"

namespace nix {

MakeError(NoTokens, Error);

/* This class hands out tokens. There are only ‘maxTokens’ tokens
available. Calling get(N) will return a Token object, representing
ownership of N tokens. If the requested number of tokens is
unavailable, get() will sleep until another thread returns a
token. */

class TokenServer
{
const size_t maxTokens;

Sync<size_t> inUse{0};
std::condition_variable wakeup;

public:
TokenServer(size_t maxTokens) : maxTokens(maxTokens) { }

class Token
{
friend TokenServer;

TokenServer * ts;

size_t tokens;

bool acquired = false;

Token(TokenServer * ts, size_t tokens, unsigned int timeout)
: ts(ts), tokens(tokens)
{
if (tokens >= ts->maxTokens)
throw NoTokens("requesting more tokens (%d) than exist (%d)", tokens, ts->maxTokens);
debug("acquiring %d tokens", tokens);
auto inUse(ts->inUse.lock());
while (*inUse + tokens > ts->maxTokens)
if (timeout) {
if (!inUse.wait_for(ts->wakeup, std::chrono::seconds(timeout),
[&]() { return *inUse + tokens <= ts->maxTokens; }))
return;
} else
inUse.wait(ts->wakeup);
*inUse += tokens;
acquired = true;
}

public:

Token(Token && t) : ts(t.ts), tokens(t.tokens), acquired(t.acquired)
{
t.ts = 0;
t.acquired = false;
}
Token(const Token & l) = delete;

~Token()
{
if (!ts || !acquired) return;
{
auto inUse(ts->inUse.lock());
assert(*inUse >= t);
*inUse -= t;
tokens -= t;
}
// FIXME: inefficient. Should wake up waiters that can
// proceed now.
ts->wakeup.notify_all();
}

};

Token get(size_t tokens = 1, unsigned int timeout = 0)
{
return Token(this, tokens, timeout);
}

size_t currentUse()
{
auto inUse_(inUse.lock());
return *inUse_;
}
};

}

size_t capacity()
{
return maxTokens;
}
give_back(tokens);
}

bool operator ()() { return acquired; }

void give_back(size_t t)
{
debug("returning %d tokens", t);
if (!t) return;
assert(acquired);
assert(t <= tokens);
replacement in src/hydra-queue-runner/Makefile.am at line 5

[4.131]→[4.0:59](∅→∅)

build-result.hh counter.hh token-server.hh state.hh db.hh

[4.131]

[3.299]

build-result.hh counter.hh state.hh db.hh
edit in src/hydra-queue-runner/build-remote.cc at line 428

[4.2468]→[4.523:524](∅→∅),[4.2975]→[4.523:524](∅→∅),[4.8075]→[4.523:524](∅→∅),[4.523]→[4.523:524](∅→∅),[4.524]→[4.8076:8141](∅→∅),[4.8141]→[4.0:264](∅→∅),[4.264]→[4.8244:8306](∅→∅),[4.8244]→[4.8244:8306](∅→∅),[4.8306]→[4.265:457](∅→∅),[4.457]→[4.8374:8435](∅→∅),[4.8374]→[4.8374:8435](∅→∅),[4.8435]→[4.2326:2327](∅→∅),[4.2326]→[4.2326:2327](∅→∅),[4.2327]→[4.8436:8575](∅→∅),[4.8575]→[4.2976:3154](∅→∅)

/* Block until we have the required amount of memory
available, which is twice the NAR size (namely the
uncompressed and worst-case compressed NAR), plus 150
MB for xz compression overhead. (The xz manpage claims
~94 MiB, but that's not was I'm seeing.) */
auto resStart = std::chrono::steady_clock::now();
size_t compressionCost = totalNarSize + 150 * 1024 * 1024;
result.tokens = std::make_unique<nix::TokenServer::Token>(memoryTokens.get(totalNarSize + compressionCost));
auto resStop = std::chrono::steady_clock::now();

auto resMs = std::chrono::duration_cast<std::chrono::milliseconds>(resStop - resStart).count();
if (resMs >= 1000)
printMsg(lvlError, "warning: had to wait %d ms for %d memory tokens for %s",
resMs, totalNarSize, localStore->printStorePath(step->drvPath));
edit in src/hydra-queue-runner/build-remote.cc at line 434

[4.232]→[4.458:668](∅→∅)

/* Release the tokens pertaining to NAR
compression. After this we only have the uncompressed
NAR in memory. */
result.tokens->give_back(compressionCost);
edit in src/hydra-queue-runner/builder.cc at line 207

[4.712]→[4.2415:2504](∅→∅),[4.1043]→[4.2415:2504](∅→∅),[4.1161]→[4.2415:2504](∅→∅),[4.1230]→[4.2415:2504](∅→∅),[4.1432]→[4.2415:2504](∅→∅)

} catch (NoTokens & e) {
result.stepStatus = bsNarSizeLimitExceeded;
edit in src/hydra-queue-runner/builder.cc at line 225

[4.699]→[4.699:726](∅→∅)

result.tokens = 0;
edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 52

[4.62]→[4.1421:1499](∅→∅),[4.1421]→[4.1421:1499](∅→∅)

, memoryTokens(config->getIntOption("nar_buffer_size", getMemSize() / 2))
edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 57

[4.1574]→[2.0:73](∅→∅),[2.73]→[4.1651:1652](∅→∅),[4.1651]→[4.1651:1652](∅→∅)

debug("using %d bytes for the NAR buffer", memoryTokens.capacity());
edit in src/hydra-queue-runner/hydra-queue-runner.cc at line 544

[4.626]→[4.1420:1487](∅→∅)

root.attr("memoryTokensInUse", memoryTokens.currentUse());
edit in src/hydra-queue-runner/state.hh at line 11

[4.1299]→[4.1493:1520](∅→∅)

#include "token-server.hh"
edit in src/hydra-queue-runner/state.hh at line 67

[4.1810]→[4.727:780](∅→∅)

std::unique_ptr<nix::TokenServer::Token> tokens;
edit in src/hydra-queue-runner/state.hh at line 411

[4.1569]→[4.1569:1906](∅→∅),[4.1906]→[4.1502:1503](∅→∅),[4.1502]→[4.1502:1503](∅→∅)

/* Token server to prevent threads from allocating too many big
strings concurrently while importing NARs from the build
machines. When a thread imports a NAR of size N, it will first
acquire N memory tokens, causing it to block until that many
tokens are available. */
nix::TokenServer memoryTokens;