import re
import os
import os.path
import sys
import shutil
import plistlib
import glob
import json
import logging
import datetime
from libscanbuild import duplicate_check
from libscanbuild.clang import get_version
__all__ = ['document']
def document(args):
html_reports_available = args.output_format in {'html', 'plist-html', 'sarif-html'}
sarif_reports_available = args.output_format in {'sarif', 'sarif-html'}
logging.debug('count crashes and bugs')
crash_count = sum(1 for _ in read_crashes(args.output))
bug_counter = create_counters()
for bug in read_bugs(args.output, html_reports_available):
bug_counter(bug)
result = crash_count + bug_counter.total
if html_reports_available and result:
use_cdb = os.path.exists(args.cdb)
logging.debug('generate index.html file')
prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
fragments = []
try:
if bug_counter.total:
fragments.append(bug_summary(args.output, bug_counter))
fragments.append(bug_report(args.output, prefix))
if crash_count:
fragments.append(crash_report(args.output, prefix))
assemble_cover(args, prefix, fragments)
copy_resource_files(args.output)
if use_cdb:
shutil.copy(args.cdb, args.output)
finally:
for fragment in fragments:
os.remove(fragment)
if sarif_reports_available:
logging.debug('merging sarif files')
merge_sarif_files(args.output)
return result
def assemble_cover(args, prefix, fragments):
import getpass
import socket
if args.html_title is None:
args.html_title = os.path.basename(prefix) + ' - analyzer results'
with open(os.path.join(args.output, 'index.html'), 'w') as handle:
indent = 0
handle.write(reindent("""
|<!DOCTYPE html>
|<html>
| <head>
| <title>{html_title}</title>
| <link type="text/css" rel="stylesheet" href="scanview.css"/>
| <script type='text/javascript' src="sorttable.js"></script>
| <script type='text/javascript' src='selectable.js'></script>
| </head>""", indent).format(html_title=args.html_title))
handle.write(comment('SUMMARYENDHEAD'))
handle.write(reindent("""
| <body>
| <h1>{html_title}</h1>
| <table>
| <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
| <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
| <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
| <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
| <tr><th>Date:</th><td>{date}</td></tr>
| </table>""", indent).format(html_title=args.html_title,
user_name=getpass.getuser(),
host_name=socket.gethostname(),
current_dir=prefix,
cmd_args=' '.join(sys.argv),
clang_version=get_version(args.clang),
date=datetime.datetime.today(
).strftime('%c')))
for fragment in fragments:
with open(fragment, 'r') as input_handle:
shutil.copyfileobj(input_handle, handle)
handle.write(reindent("""
| </body>
|</html>""", indent))
def bug_summary(output_dir, bug_counter):
name = os.path.join(output_dir, 'summary.html.fragment')
with open(name, 'w') as handle:
indent = 4
handle.write(reindent("""
|<h2>Bug Summary</h2>
|<table>
| <thead>
| <tr>
| <td>Bug Type</td>
| <td>Quantity</td>
| <td class="sorttable_nosort">Display?</td>
| </tr>
| </thead>
| <tbody>""", indent))
handle.write(reindent("""
| <tr style="font-weight:bold">
| <td class="SUMM_DESC">All Bugs</td>
| <td class="Q">{0}</td>
| <td>
| <center>
| <input checked type="checkbox" id="AllBugsCheck"
| onClick="CopyCheckedStateToCheckButtons(this);"/>
| </center>
| </td>
| </tr>""", indent).format(bug_counter.total))
for category, types in bug_counter.categories.items():
handle.write(reindent("""
| <tr>
| <th>{0}</th><th colspan=2></th>
| </tr>""", indent).format(category))
for bug_type in types.values():
handle.write(reindent("""
| <tr>
| <td class="SUMM_DESC">{bug_type}</td>
| <td class="Q">{bug_count}</td>
| <td>
| <center>
| <input checked type="checkbox"
| onClick="ToggleDisplay(this,'{bug_type_class}');"/>
| </center>
| </td>
| </tr>""", indent).format(**bug_type))
handle.write(reindent("""
| </tbody>
|</table>""", indent))
handle.write(comment('SUMMARYBUGEND'))
return name
def bug_report(output_dir, prefix):
pretty = prettify_bug(prefix, output_dir)
bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
name = os.path.join(output_dir, 'bugs.html.fragment')
with open(name, 'w') as handle:
indent = 4
handle.write(reindent("""
|<h2>Reports</h2>
|<table class="sortable" style="table-layout:automatic">
| <thead>
| <tr>
| <td>Bug Group</td>
| <td class="sorttable_sorted">
| Bug Type
| <span id="sorttable_sortfwdind"> ▾</span>
| </td>
| <td>File</td>
| <td>Function/Method</td>
| <td class="Q">Line</td>
| <td class="Q">Path Length</td>
| <td class="sorttable_nosort"></td>
| </tr>
| </thead>
| <tbody>""", indent))
handle.write(comment('REPORTBUGCOL'))
for current in bugs:
handle.write(reindent("""
| <tr class="{bug_type_class}">
| <td class="DESC">{bug_category}</td>
| <td class="DESC">{bug_type}</td>
| <td>{bug_file}</td>
| <td class="DESC">{bug_function}</td>
| <td class="Q">{bug_line}</td>
| <td class="Q">{bug_path_length}</td>
| <td><a href="{report_file}#EndPath">View Report</a></td>
| </tr>""", indent).format(**current))
handle.write(comment('REPORTBUG', {'id': current['report_file']}))
handle.write(reindent("""
| </tbody>
|</table>""", indent))
handle.write(comment('REPORTBUGEND'))
return name
def crash_report(output_dir, prefix):
pretty = prettify_crash(prefix, output_dir)
crashes = (pretty(crash) for crash in read_crashes(output_dir))
name = os.path.join(output_dir, 'crashes.html.fragment')
with open(name, 'w') as handle:
indent = 4
handle.write(reindent("""
|<h2>Analyzer Failures</h2>
|<p>The analyzer had problems processing the following files:</p>
|<table>
| <thead>
| <tr>
| <td>Problem</td>
| <td>Source File</td>
| <td>Preprocessed File</td>
| <td>STDERR Output</td>
| </tr>
| </thead>
| <tbody>""", indent))
for current in crashes:
handle.write(reindent("""
| <tr>
| <td>{problem}</td>
| <td>{source}</td>
| <td><a href="{file}">preprocessor output</a></td>
| <td><a href="{stderr}">analyzer std err</a></td>
| </tr>""", indent).format(**current))
handle.write(comment('REPORTPROBLEM', current))
handle.write(reindent("""
| </tbody>
|</table>""", indent))
handle.write(comment('REPORTCRASHES'))
return name
def read_crashes(output_dir):
return (parse_crash(filename)
for filename in glob.iglob(os.path.join(output_dir, 'failures',
'*.info.txt')))
def read_bugs(output_dir, html):
def empty(file_name):
return os.stat(file_name).st_size == 0
duplicate = duplicate_check(
lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
parser = parse_bug_html if html else parse_bug_plist
pattern = os.path.join(output_dir, '*.html' if html else '*.plist')
bug_files = (file for file in glob.iglob(pattern) if not empty(file))
for bug_file in bug_files:
for bug in parser(bug_file):
if not duplicate(bug):
yield bug
def merge_sarif_files(output_dir, sort_files=False):
def empty(file_name):
return os.stat(file_name).st_size == 0
def update_sarif_object(sarif_object, runs_count_offset):
if not isinstance(sarif_object, dict):
return sarif_object
if 'message' in sarif_object:
sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset)
for key in sarif_object:
if isinstance(sarif_object[key], list):
arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]]
sarif_object[key] = arr
elif isinstance(sarif_object[key], dict):
sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset)
else:
pass
return sarif_object
def match_and_update_run(message, runs_count_offset):
if 'text' not in message:
return message
pattern = re.compile(r'sarif:/runs/(\d+)')
text = message['text']
matches = re.finditer(pattern, text)
matches_list = list(matches)
for idx in range(len(matches_list) - 1, -1, -1):
match = matches_list[idx]
new_run_count = str(runs_count_offset + int(match.group(1)))
text = text[0:match.start(1)] + new_run_count + text[match.end(1):]
message['text'] = text
return message
sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file))
if sort_files:
sarif_files = list(sarif_files)
sarif_files.sort()
runs_count = 0
merged = {}
for sarif_file in sarif_files:
with open(sarif_file) as fp:
sarif = json.load(fp)
if 'runs' not in sarif:
continue
if not merged:
merged = sarif
else:
for run in sarif['runs']:
new_run = update_sarif_object(run, runs_count)
merged['runs'].append(new_run)
runs_count += len(sarif['runs'])
with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out:
json.dump(merged, out, indent=4, sort_keys=True)
def parse_bug_plist(filename):
with open(filename, 'rb') as fp:
content = plistlib.load(fp)
files = content.get('files')
for bug in content.get('diagnostics', []):
if len(files) <= int(bug['location']['file']):
logging.warning('Parsing bug from "%s" failed', filename)
continue
yield {
'result': filename,
'bug_type': bug['type'],
'bug_category': bug['category'],
'bug_line': int(bug['location']['line']),
'bug_path_length': int(bug['location']['col']),
'bug_file': files[int(bug['location']['file'])]
}
def parse_bug_html(filename):
patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
endsign = re.compile(r'<!-- BUGMETAEND -->')
bug = {
'report_file': filename,
'bug_function': 'n/a', 'bug_category': 'Other',
'bug_line': 0,
'bug_path_length': 1
}
with open(filename, encoding='utf-8') as handler:
for line in handler.readlines():
if endsign.match(line):
break
for regex in patterns:
match = regex.match(line.strip())
if match:
bug.update(match.groupdict())
break
encode_value(bug, 'bug_line', int)
encode_value(bug, 'bug_path_length', int)
yield bug
def parse_crash(filename):
match = re.match(r'(.*)\.info\.txt', filename)
name = match.group(1) if match else None
with open(filename, mode='rb') as handler:
lines = [line.decode().rstrip() for line in handler.readlines()]
return {
'source': lines[0],
'problem': lines[1],
'file': name,
'info': name + '.info.txt',
'stderr': name + '.stderr.txt'
}
def category_type_name(bug):
def smash(key):
return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
def create_counters():
def predicate(bug):
bug_category = bug['bug_category']
bug_type = bug['bug_type']
current_category = predicate.categories.get(bug_category, dict())
current_type = current_category.get(bug_type, {
'bug_type': bug_type,
'bug_type_class': category_type_name(bug),
'bug_count': 0
})
current_type.update({'bug_count': current_type['bug_count'] + 1})
current_category.update({bug_type: current_type})
predicate.categories.update({bug_category: current_category})
predicate.total += 1
predicate.total = 0
predicate.categories = dict()
return predicate
def prettify_bug(prefix, output_dir):
def predicate(bug):
bug['bug_type_class'] = category_type_name(bug)
encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
encode_value(bug, 'bug_category', escape)
encode_value(bug, 'bug_type', escape)
encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
return bug
return predicate
def prettify_crash(prefix, output_dir):
def predicate(crash):
encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
encode_value(crash, 'problem', escape)
encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
return crash
return predicate
def copy_resource_files(output_dir):
this_dir = os.path.dirname(os.path.realpath(__file__))
for resource in os.listdir(os.path.join(this_dir, 'resources')):
shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
def encode_value(container, key, encode):
if key in container:
value = encode(container[key])
container.update({key: value})
def chop(prefix, filename):
return filename if not len(prefix) else os.path.relpath(filename, prefix)
def escape(text):
escape_table = {
'&': '&',
'"': '"',
"'": ''',
'>': '>',
'<': '<'
}
return ''.join(escape_table.get(c, c) for c in text)
def reindent(text, indent):
result = ''
for line in text.splitlines():
if len(line.strip()):
result += ' ' * indent + line.split('|')[1] + os.linesep
return result
def comment(name, opts=dict()):
attributes = ''
for key, value in opts.items():
attributes += ' {0}="{1}"'.format(key, value)
return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
def commonprefix_from(filename):
with open(filename, 'r') as handle:
return commonprefix(item['file'] for item in json.load(handle))
def commonprefix(files):
result = None
for current in files:
if result is not None:
result = os.path.commonprefix([result, current])
else:
result = current
if result is None:
return ''
elif not os.path.isdir(result):
return os.path.dirname(result)
else:
return os.path.abspath(result)