nuudlman/llvm: clang/test/AST/gen_ast_dump_json

#!/usr/bin/env python3

from __future__ import print_function
from collections import OrderedDict
from shutil import copyfile
import argparse
import json
import os
import re
import subprocess
import sys
import tempfile


def normalize(dict_var):
    for k, v in dict_var.items():
        if isinstance(v, OrderedDict):
            normalize(v)
        elif isinstance(v, list):
            for e in v:
                if isinstance(e, OrderedDict):
                    normalize(e)
        elif type(v) is str:
            if v != "0x0" and re.match(r"0x[0-9A-Fa-f]+", v):
                dict_var[k] = '0x{{.*}}'
            elif os.path.isfile(v):
                dict_var[k] = '{{.*}}'
            else:
                splits = (v.split(' '))
                out_splits = []
                for split in splits:
                    inner_splits = split.rsplit(':',2)
                    if os.path.isfile(inner_splits[0]):
                        out_splits.append(
                            '{{.*}}:%s:%s'
                            %(inner_splits[1],
                              inner_splits[2]))
                        continue
                    out_splits.append(split)

                dict_var[k] = ' '.join(out_splits)


def filter_json(dict_var, filters, out):
    for k, v in dict_var.items():
        if type(v) is str:
            if v in filters:
                out.append(dict_var)
                break
        elif isinstance(v, OrderedDict):
            filter_json(v, filters, out)
        elif isinstance(v, list):
            for e in v:
                if isinstance(e, OrderedDict):
                    filter_json(e, filters, out)


def default_clang_path():
    guessed_clang = os.path.join(os.path.dirname(__file__), "clang")
    if os.path.isfile(guessed_clang):
        return guessed_clang
    return None


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--clang", help="The clang binary (could be a relative or absolute path)",
                        action="store", default=default_clang_path())
    parser.add_argument("--source", help="the source file(s). Without --update, the command used to generate the JSON "
                                         "will be of the format <clang> -cc1 -ast-dump=json <opts> <source>",
                        action="store", nargs=argparse.ONE_OR_MORE, required=True)
    parser.add_argument("--filters", help="comma separated list of AST filters. Ex: --filters=TypedefDecl,BuiltinType",
                        action="store", default='')
    update_or_generate_group = parser.add_mutually_exclusive_group()
    update_or_generate_group.add_argument("--update", help="Update the file in-place", action="store_true")
    update_or_generate_group.add_argument("--opts", help="other options",
                                          action="store", default='', type=str)
    parser.add_argument("--update-manual", help="When using --update, also update files that do not have the "
                                                "autogenerated disclaimer", action="store_true")
    args = parser.parse_args()

    if not args.source:
        sys.exit("Specify the source file to give to clang.")

    clang_binary = os.path.abspath(args.clang)
    if not os.path.isfile(clang_binary):
        sys.exit("clang binary specified not present.")

    for src in args.source:
        process_file(src, clang_binary, cmdline_filters=args.filters,
                     cmdline_opts=args.opts, do_update=args.update,
                     force_update=args.update_manual)


def process_file(source_file, clang_binary, cmdline_filters, cmdline_opts,
                 do_update, force_update):
    note_firstline = "// NOTE: CHECK lines have been autogenerated by " \
                     "gen_ast_dump_json_test.py"
    filters_line_prefix = "// using --filters="
    note = note_firstline

    cmd = [clang_binary, "-cc1"]
    if do_update:
        # When updating the first line of the test must be a RUN: line
        with open(source_file, "r") as srcf:
            first_line = srcf.readline()
            found_autogenerated_line = False
            filters_line = None
            for i, line in enumerate(srcf.readlines()):
                if found_autogenerated_line:
                    # print("Filters line: '", line.rstrip(), "'", sep="")
                    if line.startswith(filters_line_prefix):
                        filters_line = line[len(filters_line_prefix):].rstrip()
                    break
                if line.startswith(note_firstline):
                    found_autogenerated_line = True
                    # print("Found autogenerated disclaimer at line", i + 1)
        if not found_autogenerated_line and not force_update:
            print("Not updating", source_file, "since it is not autogenerated.",
                  file=sys.stderr)
            return
        if not cmdline_filters and filters_line:
            cmdline_filters = filters_line
            print("Inferred filters as '" + cmdline_filters + "'")

        if "RUN: %clang_cc1 " not in first_line:
            sys.exit("When using --update the first line of the input file must contain RUN: %clang_cc1")
        clang_start = first_line.find("%clang_cc1") + len("%clang_cc1")
        file_check_idx = first_line.rfind("| FileCheck")
        if file_check_idx:
            dump_cmd = first_line[clang_start:file_check_idx]
        else:
            dump_cmd = first_line[clang_start:]
        print("Inferred run arguments as '", dump_cmd, "'", sep="")
        options = dump_cmd.split()
        if "-ast-dump=json" not in options:
            sys.exit("ERROR: RUN: line does not contain -ast-dump=json")
        if "%s" not in options:
            sys.exit("ERROR: RUN: line does not contain %s")
        options.remove("%s")
    else:
        options = cmdline_opts.split()
        options.append("-ast-dump=json")
    cmd.extend(options)
    using_ast_dump_filter = any('ast-dump-filter' in arg for arg in cmd)
    cmd.append(source_file)
    print("Will run", cmd)
    filters = set()
    if cmdline_filters:
        note += "\n" + filters_line_prefix + cmdline_filters
        filters = set(cmdline_filters.split(','))
    print("Will use the following filters:", filters)

    try:
        json_str = subprocess.check_output(cmd).decode()
    except Exception as ex:
        print("The clang command failed with %s" % ex)
        return -1
    
    out_asts = []
    if using_ast_dump_filter:
        # If we're using a filter, then we might have multiple JSON objects
        # in the output. To parse each out, we use a manual JSONDecoder in
        # "raw" mode and update our location in the string based on where the
        # last document ended.
        decoder = json.JSONDecoder(object_hook=OrderedDict)
        doc_start = 0
        prev_end = 0
        while True:
            try:
                prev_end = doc_start
                (j, doc_start) = decoder.raw_decode(json_str[doc_start:])
                doc_start += prev_end + 1
                normalize(j)
                out_asts.append(j)
            except:
                break
    else:
        j = json.loads(json_str, object_pairs_hook=OrderedDict)
        normalize(j)

        if len(filters) == 0:
            out_asts.append(j)
        else:
            filter_json(j, filters, out_asts)
        
    with tempfile.NamedTemporaryFile("w", delete=False) as f:
        with open(source_file, "r") as srcf:
            for line in srcf.readlines():
                # copy up to the note:
                if line.rstrip() == note_firstline:
                    break
                f.write(line)
        f.write(note + "\n")
        for out_ast in out_asts:
            append_str = json.dumps(out_ast, indent=1, ensure_ascii=False)
            out_str = '\n\n'
            out_str += "// CHECK-NOT: {{^}}Dumping\n"
            index = 0
            for append_line in append_str.splitlines()[2:]:
                if index == 0:
                    out_str += '// CHECK: %s\n' %(append_line.rstrip())
                    index += 1
                else:
                    out_str += '// CHECK-NEXT: %s\n' %(append_line.rstrip())
                    
            f.write(out_str)
        f.flush()
        f.close()
        if do_update:
            print("Updating json appended source file to %s." % source_file)
            copyfile(f.name, source_file)
        else:
            partition = source_file.rpartition('.')
            dest_path = '%s-json%s%s' % (partition[0], partition[1], partition[2])
            print("Writing json appended source file to %s." % dest_path)
            copyfile(f.name, dest_path)
        os.remove(f.name)
    return 0


if __name__ == '__main__':
    main()