from __future__ import print_function
desc = '''Generate the difference of two YAML files into a new YAML file (works on
pair of directories too). A new attribute 'Added' is set to True or False
depending whether the entry is added or removed from the first input to the
next.
The tools requires PyYAML.'''
import yaml
try:
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader
import optrecord
import argparse
from collections import defaultdict
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=desc)
parser.add_argument(
'yaml_dir_or_file_1',
help='An optimization record file or a directory searched for optimization '
'record files that are used as the old version for the comparison')
parser.add_argument(
'yaml_dir_or_file_2',
help='An optimization record file or a directory searched for optimization '
'record files that are used as the new version for the comparison')
parser.add_argument(
'--jobs',
'-j',
default=None,
type=int,
help='Max job count (defaults to %(default)s, the current CPU count)')
parser.add_argument(
'--max-size',
'-m',
default=100000,
type=int,
help='Maximum number of remarks stored in an output file')
parser.add_argument(
'--no-progress-indicator',
'-n',
action='store_true',
default=False,
help='Do not display any indicator of how many YAML files were read.')
parser.add_argument('--output', '-o', default='diff{}.opt.yaml')
args = parser.parse_args()
files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1)
files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2)
print_progress = not args.no_progress_indicator
all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress)
all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress)
added = set(all_remarks2.values()) - set(all_remarks1.values())
removed = set(all_remarks1.values()) - set(all_remarks2.values())
for r in added:
r.Added = True
for r in removed:
r.Added = False
result = list(added | removed)
for r in result:
r.recover_yaml_structure()
for i in range(0, len(result), args.max_size):
with open(args.output.format(i / args.max_size), 'w') as stream:
yaml.dump_all(result[i:i + args.max_size], stream)