Vacuum every 10 iterations, update 10k at a time.
5M6YEYUJNC7GT2PIIDSR6LCPOM2TAD6JE6YZQG5PHCMNVCXC36EQC #! /usr/bin/env perluse strict;use utf8;use Hydra::Model::DB;STDOUT->autoflush();STDERR->autoflush(1);binmode STDERR, ":encoding(utf8)";my $db = Hydra::Model::DB->new();my $vacuum = $db->storage->dbh->prepare("VACUUM;");my $dryRun = defined $ENV{'HYDRA_DRY_RUN'};my $batchSize = 10000;my $iterationsPerVacuum = 500;sub backfillJobsJobsetId {my ($skipLocked) = @_;my $logPrefix;if ($skipLocked) {$logPrefix = "(pass 1/2)";} else {$logPrefix = "(pass 2/2)";}print STDERR "$logPrefix Backfilling Jobs records where jobset_id is NULL...\n";my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);SELECT COUNT(*) FROM jobs WHERE jobset_id IS NULLQUERY$totalToGoSth->execute();my ($totalToGo) = $totalToGoSth->fetchrow_array;my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";my $update10kJobs = $db->storage->dbh->prepare(<<QUERY);UPDATE jobsSET jobset_id = (SELECT jobsets.idFROM jobsetsWHERE jobsets.name = jobs.jobsetAND jobsets.project = jobs.project)WHERE (jobs.project, jobs.jobset, jobs.name) in (SELECT jobsprime.project, jobsprime.jobset, jobsprime.nameFROM jobs jobsprimeWHERE jobsprime.jobset_id IS NULL$skipLockedStmtLIMIT ?);QUERYprint STDERR "$logPrefix Total Jobs records without a jobset_id: $totalToGo\n";my $iteration = 0;my $affected;do {$iteration++;$affected = $update10kJobs->execute($batchSize);print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Jobs.jobset_id: affected $affected rows...\n";$totalToGo -= $affected;if ($iteration % $iterationsPerVacuum == 0) {print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";$vacuum->execute();}} while ($affected > 0);if ($skipLocked) {backfillJobsJobsetId(0);}}sub backfillBuildsJobsetId {my ($skipLocked) = @_;my $logPrefix;if ($skipLocked) {$logPrefix = "(pass 1/2)";print STDERR "$logPrefix Backfilling unlocked Builds records where jobset_id is NULL...\n";} else {$logPrefix = "(pass 2/2)";print STDERR "$logPrefix Backfilling all Builds records where jobset_id is NULL...\n";}my $skipLockedStmt = $skipLocked ? "FOR UPDATE SKIP LOCKED" : "";my $update10kBuilds = $db->storage->dbh->prepare(<<"QUERY");WITH updateprogress AS (UPDATE buildsSET jobset_id = (SELECT jobsets.idFROM jobsetsWHERE jobsets.name = builds.jobsetAND jobsets.project = builds.project)WHERE builds.id in (SELECT buildprime.idFROM builds buildprimeWHERE buildprime.jobset_id IS NULLAND buildprime.id >= ?ORDER BY buildprime.id$skipLockedStmtLIMIT ?)RETURNING id)SELECTcount(*) AS affected,max(updateprogress.id) AS highest_idFROM updateprogress;QUERYmy $lowestNullIdSth = $db->storage->dbh->prepare(<<QUERY);SELECT id FROM builds WHERE jobset_id IS NULL ORDER BY id LIMIT 1QUERY$lowestNullIdSth->execute();my ($highestId) = $lowestNullIdSth->fetchrow_array;my $totalToGoSth = $db->storage->dbh->prepare(<<QUERY);SELECT COUNT(*) FROM builds WHERE jobset_id IS NULL AND id >= ?QUERY$totalToGoSth->execute($highestId);my ($totalToGo) = $totalToGoSth->fetchrow_array;print STDERR "$logPrefix Total Builds records without a jobset_id: $totalToGo, starting at $highestId\n";my $iteration = 0;my $affected;do {my $previousHighId = $highestId;$iteration++;$update10kBuilds->execute($highestId, $batchSize);($affected, $highestId) = $update10kBuilds->fetchrow_array;print STDERR "$logPrefix (batch #$iteration; $totalToGo remaining) Builds.jobset_id: affected $affected rows; max ID: $previousHighId -> $highestId\n";$totalToGo -= $affected;if ($iteration % $iterationsPerVacuum == 0) {print STDERR "$logPrefix (batch #$iteration) Vacuuming...\n";$vacuum->execute();}} while ($affected > 0);if ($skipLocked) {backfillBuildsJobsetId(0);}}die "syntax: $0\n" unless @ARGV == 0;print STDERR "Beginning with a VACUUM\n";$vacuum->execute();backfillJobsJobsetId(1);backfillBuildsJobsetId(1);print STDERR "Ending with a VACUUM\n";$vacuum->execute();