In your hydra config, you can add an arbitrary number of <s3config> sections, with the following options:
After each build with an output (i.e. successful or failed-with-output builds), the output path and its closure are uploaded to the bucket as .nar files, with corresponding .narinfos to enable use as a binary cache.
This plugin requires that s3 credentials be available. It uses Net::Amazon::S3, which as of this commit the nixpkgs version can retrieve s3 credentials from the AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables, or from ec2 instance metadata when using an IAM role.
This commit also adds a hydra-s3-backup-collect-garbage program, which uses hydra's gc roots directory to determine which paths are live, and then deletes all files except nix-cache-info and any .nar or .narinfo files corresponding to live paths. hydra-s3-backup-collect-garbage respects the prefix configuration option, so it won't delete anything outside of the hierarchy you give it, and it has the same credential requirements as the plugin. Probably a timer unit running the garbage collection periodically should be added to hydra-module.nix
Note that two of the added tests fail, due to a bug in the interaction between Net::Amazon::S3 and fake-s3. Those behaviors work against real s3 though, so I'm committing this even with the broken tests.
Signed-off-by: Shea Levy <shea@shealevy.com>
UGA45FNCYAHX77QI3MTMSRF676N2BUW2ZUUI4OZCI2C3EJOYKY3QC IZYR3KTL4M5OERLZM4LAC6N6TWZLQXMRVQIFOCIKMDQU3F2E7FHQC LZVO64YG43JD7YMZSCTZNOBS5ROZA4FMPKJW2YOMHX2V5PTGBVWQC 6K5PBUUN4GQAMOVX5BS6YYMHJ3PIF2PPZBTIEQ4R7BQNHC23GS3AC T4LLYESZ2HUXSLKZ6GNBLVWUVG7R5IDFHYHYO773QIZ6QTOOXR2AC LOHWNXEJKUPDI33VZMC2XLPXO7TZ2Y3AJTFJPJI2ZPHJOMLUUPRAC YQWH4POV22KYCCKOTZXD36QKUOWEQ2DSPYPO5DNZDZ344RI25OAAC 5EQYVRWECBDJORGI5DRIOUEJXSXMRCQNT2562BM4Z4U52LT7JUHAC Y6H7Y3OTXVLF6PJ5BR6WXS7KVS4VMTUKRYVM6FALKMY3DCBYEJSQC FV2M6MOTAP4BJMEKU5XUDVEACWEJGEIRCCE2MRY3F6SF2SFOE3MQC D5QIOJGPKQJIYBUCSC3MFJ3TXLPNZ2XMI37GXMFRVRFWWR2VMTFAC QIW2WZKWE3ULO4O4QB6EQWUKZY2UFNN7WA464FMIZOAHP3UHFGSAC HX4QYOYAKKFKK4KF6SZ6FQCM5ZF7ZFDTQUR4GS5WPPTYZASYZZUAC A43SLRSH6OIIEZWLRKLMERX7HOMBICKG7WHELCZYLOJWZ3JFRV2AC G2ZB6464XGPBIMSZIPSB24EIXSCCGV4XWC3IWPS2CXYPDSUZSU5QC tests.s3backup = genAttrs' (system:with import <nixos/lib/testing.nix> { inherit system; };let hydra = builtins.getAttr system build; in # build."${system}"simpleTest {machine ={ config, pkgs, ... }:{ services.postgresql.enable = true;services.postgresql.package = pkgs.postgresql92;environment.systemPackages = [ hydra pkgs.rubyLibs.fakes3 ];virtualisation.memorySize = 2047;boot.kernelPackages = pkgs.linuxPackages_3_10;virtualisation.writableStore = true;networking.extraHosts = ''127.0.0.1 hydra.s3.amazonaws.com'';};testScript =''$machine->waitForJob("postgresql");# Initialise the database and the state.$machine->mustSucceed( "createdb -O root hydra", "psql hydra -f ${hydra}/libexec/hydra/sql/hydra-postgresql.sql", "mkdir /var/lib/hydra", "mkdir /tmp/jobs", "cp ${./tests/s3-backup-test.pl} /tmp/s3-backup-test.pl", "cp ${./tests/api-test.nix} /tmp/jobs/default.nix");# start fakes3$machine->mustSucceed("fakes3 --root /tmp/s3 --port 80 &>/dev/null &");$machine->waitForOpenPort("80");$machine->mustSucceed("cd /tmp && LOGNAME=root AWS_ACCESS_KEY_ID=foo AWS_SECRET_ACCESS_KEY=bar HYDRA_DBI='dbi:Pg:dbname=hydra;user=root;' HYDRA_CONFIG=${./tests/s3-backup-test.config} perl -I ${hydra}/libexec/hydra/lib -I ${hydra.perlDeps}/lib/perl5/site_perl ./s3-backup-test.pl >&2");'';});
package Hydra::Plugin::S3Backup;use strict;use parent 'Hydra::Plugin';use File::Temp;use File::Basename;use Fcntl;use IO::File;use Net::Amazon::S3;use Net::Amazon::S3::Client;use Digest::SHA;use Nix::Config;use Nix::Store;use Hydra::Model::DB;use Hydra::Helper::CatalystUtils;my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) );my %compressors = (xz => "| $Nix::Config::xz",bzip2 => "| $Nix::Config::bzip2",none => "");my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock";sub buildFinished {my ($self, $build, $dependents) = @_;return unless $build->buildstatus == 0 or $build->buildstatus == 6;my $jobName = showJobName $build;my $job = $build->job;my $cfg = $self->{config}->{s3backup};my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : ();my @matching_configs = ();foreach my $bucket_config (@config) {push @matching_configs, $bucket_config if $jobName =~ /^$bucket_config->{jobs}$/;}return unless @matching_configs;# !!! Maybe should do per-bucket locking?my $lockhandle = IO::File->new;open($lockhandle, "+>", $lockfile) or die "Opening $lockfile: $!";flock($lockhandle, Fcntl::LOCK_SH) or die "Read-locking $lockfile: $!";my @needed_paths = ();foreach my $output ($build->buildoutputs) {push @needed_paths, $output->path;}my %narinfos = ();my %compression_types = ();foreach my $bucket_config (@matching_configs) {my $compression_type =exists $bucket_config->{compression_type} ? $bucket_config->{compression_type} : "bzip2";die "Unsupported compression type $compression_type" unless exists $compressors{$compression_type};if (exists $compression_types{$compression_type}) {push @{$compression_types{$compression_type}}, $bucket_config;} else {$compression_types{$compression_type} = [ $bucket_config ];$narinfos{$compression_type} = [];}}my $build_id = $build->id;my $tempdir = File::Temp->newdir("s3-backup-nars-$build_id" . "XXXXX");my %seen = ();# Upload nars and build narinfoswhile (@needed_paths) {my $path = shift @needed_paths;next if exists $seen{$path};$seen{$path} = undef;my $hash = substr basename($path), 0, 32;my ($deriver, $narHash, $time, $narSize, $refs) = queryPathInfo($path, 0);my $system;if (defined $deriver and isValidPath($deriver)) {$system = derivationFromPath($deriver)->{platform};}foreach my $reference (@{$refs}) {push @needed_paths, $reference;}while (my ($compression_type, $configs) = each %compression_types) {my @incomplete_buckets = ();# Don't do any work if all the buckets have this pathforeach my $bucket_config (@{$configs}) {my $bucket = $client->bucket( name => $bucket_config->{name} );my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";push @incomplete_buckets, $bucket_configunless $bucket->object( key => $prefix . "$hash.narinfo" )->exists;}next unless @incomplete_buckets;my $compressor = $compressors{$compression_type};system("$Nix::Config::binDir/nix-store --export $path $compressor > $tempdir/nar") == 0 or die;my $digest = Digest::SHA->new(256);$digest->addfile("$tempdir/nar");my $file_hash = $digest->hexdigest;my @stats = stat "$tempdir/nar" or die "Couldn't stat $tempdir/nar";my $file_size = $stats[7];my $narinfo = "";$narinfo .= "StorePath: $path\n";$narinfo .= "URL: $hash.nar\n";$narinfo .= "Compression: $compression_type\n";$narinfo .= "FileHash: sha256:$file_hash\n";$narinfo .= "FileSize: $file_size\n";$narinfo .= "NarHash: $narHash\n";$narinfo .= "NarSize: $narSize\n";$narinfo .= "References: " . join(" ", map { basename $_ } @{$refs}) . "\n";if (defined $deriver) {$narinfo .= "Deriver: " . basename $deriver . "\n";if (defined $system) {$narinfo .= "System: $system\n";}}push @{$narinfos{$compression_type}}, { hash => $hash, info => $narinfo };foreach my $bucket_config (@incomplete_buckets) {my $bucket = $client->bucket( name => $bucket_config->{name} );my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";my $nar_object = $bucket->object(key => $prefix . "$hash.nar",content_type => "application/x-nix-archive");$nar_object->put_filename("$tempdir/nar");}}}# Upload narinfoswhile (my ($compression_type, $infos) = each %narinfos) {foreach my $bucket_config (@{$compression_types{$compression_type}}) {foreach my $info (@{$infos}) {my $bucket = $client->bucket( name => $bucket_config->{name} );my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";my $narinfo_object = $bucket->object(key => $prefix . $info->{hash} . ".narinfo",content_type => "text/x-nix-narinfo");$narinfo_object->put($info->{info}) unless $narinfo_object->exists;}}}}1;
#! /var/run/current-system/sw/bin/perl -wuse strict;use File::Basename;use Fcntl;use IO::File;use Net::Amazon::S3;use Net::Amazon::S3::Client;use Nix::Config;use Nix::Store;use Hydra::Model::DB;use Hydra::Helper::Nix;my $cfg = getHydraConfig()->{s3backup};my @config = defined $cfg ? ref $cfg eq "ARRAY" ? @$cfg : ($cfg) : ();exit 0 unless @config;my $lockfile = Hydra::Model::DB::getHydraPath . "/.hydra-s3backup.lock";my $lockhandle = IO::File->new;open($lockhandle, ">", $lockfile) or die "Opening $lockfile: $!";flock($lockhandle, Fcntl::LOCK_EX) or die "Write-locking $lockfile: $!";my $client = Net::Amazon::S3::Client->new( s3 => Net::Amazon::S3->new( retry => 1 ) );my $db = Hydra::Model::DB->new();my $gcRootsDir = getGCRootsDir;opendir DIR, $gcRootsDir or die;my @roots = readdir DIR;closedir DIR;my @actual_roots = ();foreach my $link (@roots) {next if $link eq "." || $link eq "..";push @actual_roots, $Nix::Config::storeDir . "/$link";}# Don't delete a nix-cache-info file, if presentmy %closure = ( "nix-cache-info" => undef );foreach my $path (computeFSClosure(0, 0, @actual_roots)) {my $hash = substr basename($path), 0, 32;$closure{"$hash.narinfo"} = undef;$closure{"$hash.nar"} = undef;}foreach my $bucket_config (@config) {my $bucket = $client->bucket( name => $bucket_config->{name} );my $prefix = exists $bucket_config->{prefix} ? $bucket_config->{prefix} : "";my $cache_stream = $bucket->list({ prefix => $prefix });until ($cache_stream->is_done) {foreach my $object ($cache_stream->items) {$object->delete unless exists $closure{basename($object->key)};}}}1;
<s3backup>jobs = tests:basic:jobname = hydra</s3backup>
use strict;use File::Basename;use Hydra::Model::DB;use Hydra::Helper::Nix;use Nix::Store;use Cwd;my $db = Hydra::Model::DB->new;use Test::Simple tests => 6;$db->resultset('Users')->create({ username => "root", emailaddress => 'root@invalid.org', password => '' });$db->resultset('Projects')->create({name => "tests", displayname => "", owner => "root"});my $project = $db->resultset('Projects')->update_or_create({name => "tests", displayname => "", owner => "root"});my $jobset = $project->jobsets->create({name => "basic", nixexprinput => "jobs", nixexprpath => "default.nix", emailoverride => ""});my $jobsetinput;$jobsetinput = $jobset->jobsetinputs->create({name => "jobs", type => "path"});$jobsetinput->jobsetinputalts->create({altnr => 0, value => getcwd . "/jobs"});system("hydra-evaluator " . $jobset->project->name . " " . $jobset->name);my $successful_hash;foreach my $build ($jobset->builds->search({finished => 0})) {system("hydra-build " . $build->id);my @outputs = $build->buildoutputs->all;my $hash = substr basename($outputs[0]->path), 0, 32;if ($build->job->name eq "job") {ok(-e "/tmp/s3/hydra/$hash.nar", "The nar of a successful matched build is uploaded");ok(-e "/tmp/s3/hydra/$hash.narinfo", "The narinfo of a successful matched build is uploaded");$successful_hash = $hash;}}system("hydra-s3-backup-collect-garbage");ok(-e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's a root is not removed by gc");ok(-e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's a root is not removed by gc");my $gcRootsDir = getGCRootsDir;opendir DIR, $gcRootsDir or die;while(readdir DIR) {next if $_ eq "." or $_ eq "..";unlink "$gcRootsDir/$_";}closedir DIR;system("hydra-s3-backup-collect-garbage");ok(not -e "/tmp/s3/hydra/$successful_hash.nar", "The nar of a build that's not a root is removed by gc");ok(not -e "/tmp/s3/hydra/$successful_hash.narinfo", "The narinfo of a build that's not a root is removed by gc");