The hydra-update-gc-roots script is taking around 95 minutes on our Hydra instance (though a lot of that is I/O wait). This patch significantly reduces the number of database queries. In particular, the N most recent successful builds for each job in a jobset are now determined in a single query. Also, it removes the calls to readlink().
KJQWSRCCQEKF64L4NYYZ7VKAF2YEPYXXTRW6BI464P6Q6KU34TQAC C5W3WZ3YKLMDB7P6COIHDKSKSYDGAJBSIFIHFTEKRGSEJX7J7BNAC HPEG2RHVNHOPB5T4ZRXANIRBMVOVY3B5GFETJRYOTDJFVAYH2TQAC WZ3AEJ67LOG5L335AAC2BDLIJPIU4VSCGBMATBHDZC26ECRS5A6AC 2T42QGZDK23C5V4ZHO4R7EBJESIQ62GKWDBWVX7OBEE3YVBUNUFAC 4X6NS66QDWR4S4ZZFKN2QOJQII5ZYIMTDYRHK25N6NJSROVLHJ6QC KNAVU6BHK77BDJBQKRQ2G2MVULWIVNVU4NYIA76OBVVILFOU7FAQC HLYVEJX74DMSDR7VTGSLZCLUKQDZFDIK4FFDV4S27UPBN5JH4KDQC IN272KZWHENW2TCR3LWQ6OZAEESJL5S7AEL3GYLJTWHJUDE6HADAC E3LZ5H7Q2X64BIPQNQ65ISSS5NK44QEHWGADN7TQLY3ZCMFWIZFAC AEFNBIIWLCTYPU6PDYG6D7OYGVP3E7DAGMYPIXVKCVVHJAM63FNAC 6KJXJB7NZ5XJ3WGUWKFNT6LMBNYJN3IPHZ7B7H654D5B6LPOL5QAC ONAQU4BRGYO6MGPGIQIK5KF4JE4TBJMFFBU5XBKFTYQ5RPW7FOYQC XBU2ODSPGKXUPOV5CFKOBOJLCIU5BMMZ5YVWFR7CP2G5QQZ5GAJAC NEWDDAOFCDLYBXQCZNQ2GDH7HPAHVN3YRDL52ZYEMVA4YH6LBDXAC MOX7XJ2E3XISXA7V7T4W6GEAGECGWBZ4PYSLTYBVVR4VAKOI33CQC A63IHCMXH3F4V56HDXJLJVVHKXRSJCJMT2PWXXI2IW3J734J6SGQC O25D52TAMOPAK45N4II5XMWOBMPQJNHLW22M37COVY43EKNQBWJAC N22GPKYTOLZLBGTGDATQDVZ4R5APZEAOIA7L32X4UXBH4XNI7MWAC D5QIOJGPKQJIYBUCSC3MFJ3TXLPNZ2XMI37GXMFRVRFWWR2VMTFAC print STDERR " keeping build ", $build->id, " (",$build->system, "; ",
print STDERR " keeping ", ($build->finished ? "" : "scheduled "), "build ", $build->id, " (",$build->get_column('project'), ":", $build->get_column('jobset'), ":", $build->get_column('job'), "; ",$build->system, "; ",
print STDERR "warning: output ", $build->outpath, " has disappeared\n";
print STDERR " warning: output ", $build->outpath, " has disappeared\n" if $build->finished;}if (!$build->finished) {if (isValidPath($build->drvpath)) {addRoot $build->drvpath;} else {print STDERR " warning: derivation ", $build->drvpath, " has disappeared\n";}
# Go over all projects.
# Keep every build in every release of every project.print STDERR "*** looking for release members\n";keepBuild $_ foreach $db->resultset('Builds')->search_literal("exists (select 1 from releasemembers where build = me.id)", { order_by => ["project", "jobset", "job", "id"] });# Keep all builds that have been marked as "keep".print STDERR "*** looking for kept builds\n";my @buildsToKeep = $db->resultset('Builds')->search({ finished => 1, keep => 1 }, { order_by => ["project", "jobset", "job", "id"], columns => [ @columns ] });keepBuild $_ foreach @buildsToKeep;
# Go over all jobs in this jobset.foreach my $job ($jobset->jobs->all) {print STDERR "*** looking for builds to keep in job ",$project->name, ":", $job->jobset->name, ":", $job->name, "\n";
print STDERR "*** looking for the $keepnr most recent successful builds of each job in jobset ",$project->name, ":", $jobset->name, "\n";
# Keep the N most recent successful builds for each job# and platform.# !!! Take time into account? E.g. don't delete builds# that are younger than N days.my @systems = $job->builds->search({ }, { select => ["system"], distinct => 1 })->all;foreach my $system (@systems) {my @recentBuilds = $job->builds->search({ finished => 1, buildStatus => 0 # == success, system => $system->system},{ order_by => 'me.id DESC', rows => $keepnr});keepBuild $_ foreach @recentBuilds;}}
keepBuild $_ foreach $jobset->builds->search({ 'me.id' => { 'in' => \[ "select b2.id from Builds b2 join " ." (select distinct job, system, coalesce( " ." (select id from builds where project = b.project and jobset = b.jobset and job = b.job and system = b.system and finished = 1 and buildStatus = 0 order by id desc offset ? limit 1)" ." , 0) nth from builds b where project = ? and jobset = ? and isCurrent = 1) x " ." on b2.project = ? and b2.jobset = ? and b2.job = x.job and b2.system = x.system and (id >= x.nth) where finished = 1 and buildStatus = 0", [ '', $keepnr - 1 ], [ '', $project->name ], [ '', $jobset->name ], [ '', $project->name ], [ '', $jobset->name ] ] }},{ order_by => ["job", "system", "id"], columns => [ @columns ] });
}# Keep every build in every release in this project.print STDERR "*** keeping releases in project ", $project->name, "\n"if scalar $project->releases > 0;foreach my $release ($project->releases->all) {print STDERR "keeping release ", $release->name, "\n";keepBuild $_->build foreach $release->releasemembers;
# Keep all builds that have been marked as "keep".print STDERR "*** looking for kept builds\n";my @buildsToKeep = $db->resultset('Builds')->search({finished => 1, keep => 1});keepBuild $_ foreach @buildsToKeep;
foreach my $build ($db->resultset('Builds')->search({finished => 0})) {if (isValidPath($build->drvpath)) {print STDERR "keeping scheduled build ", $build->id, " (",strftime("%Y-%m-%d %H:%M:%S", localtime($build->timestamp)), ")\n";registerRoot $build->drvpath;registerRoot $build->outpath if -e $build->outpath;} else {print STDERR "warning: derivation ", $build->drvpath, " has disappeared\n";}}
keepBuild $_ foreach $db->resultset('Builds')->search({ finished => 0 }, { columns => [ @columns ] });
print STDERR "kept $rootsKept roots, deleted $rootsDeleted roots\n";
create index IndexBuildsOnKeep on Builds(keep); -- used by hydra-update-gc-rootscreate index IndexMostRecentSuccessfulBuilds on Builds(project, jobset, job, system, finished, buildStatus, id desc); -- used by hydra-update-gc-roots