from __future__ import print_function
import argparse
import os
import sys
from json import loads
from math import ceil
from collections import OrderedDict
from subprocess import Popen, PIPE
TAINT_VALUE = "tainted"
def init_plot(plt):
plt.title('Debug Location Statistics', fontweight='bold')
plt.xlabel('location buckets')
plt.ylabel('number of variables in the location buckets')
plt.xticks(rotation=45, fontsize='x-small')
plt.yticks()
def finish_plot(plt):
plt.legend()
plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
plt.savefig('locstats.png')
print('The plot was saved within "locstats.png".')
class LocationStats:
def __init__(self, file_name, variables_total, variables_total_locstats,
variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
variables_coverage_map):
self.file_name = file_name
self.variables_total = variables_total
self.variables_total_locstats = variables_total_locstats
self.variables_with_loc = variables_with_loc
self.scope_bytes_covered = variables_scope_bytes_covered
self.scope_bytes = variables_scope_bytes
self.variables_coverage_map = variables_coverage_map
def get_pc_coverage(self):
if self.scope_bytes_covered == TAINT_VALUE or \
self.scope_bytes == TAINT_VALUE:
return TAINT_VALUE
pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
/ self.scope_bytes)
return pc_ranges_covered
def pretty_print(self):
if self.scope_bytes == 0:
print ('No scope bytes found.')
return -1
pc_ranges_covered = self.get_pc_coverage()
variables_coverage_per_map = {}
for cov_bucket in coverage_buckets():
variables_coverage_per_map[cov_bucket] = None
if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \
self.variables_total_locstats == TAINT_VALUE:
variables_coverage_per_map[cov_bucket] = TAINT_VALUE
else:
variables_coverage_per_map[cov_bucket] = \
int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
/ self.variables_total_locstats)
print (' =================================================')
print (' Debug Location Statistics ')
print (' =================================================')
print (' cov% samples percentage(~) ')
print (' -------------------------------------------------')
for cov_bucket in coverage_buckets():
if self.variables_coverage_map[cov_bucket] or \
self.variables_total_locstats == TAINT_VALUE:
print (' {0:10} {1:8} {2:3}%'. \
format(cov_bucket, self.variables_coverage_map[cov_bucket], \
variables_coverage_per_map[cov_bucket]))
else:
print (' {0:10} {1:8d} {2:3d}%'. \
format(cov_bucket, self.variables_coverage_map[cov_bucket], \
variables_coverage_per_map[cov_bucket]))
print (' =================================================')
print (' -the number of debug variables processed: ' \
+ str(self.variables_total_locstats))
print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
if self.variables_total and self.variables_with_loc:
total_availability = None
if self.variables_total == TAINT_VALUE or \
self.variables_with_loc == TAINT_VALUE:
total_availability = TAINT_VALUE
else:
total_availability = int(ceil(self.variables_with_loc * 100.0) \
/ self.variables_total)
print (' -------------------------------------------------')
print (' -total availability: ' + str(total_availability) + '%')
print (' =================================================')
return 0
def draw_plot(self):
from matplotlib import pyplot as plt
buckets = range(len(self.variables_coverage_map))
plt.figure(figsize=(12, 8))
init_plot(plt)
plt.bar(buckets, self.variables_coverage_map.values(), align='center',
tick_label=self.variables_coverage_map.keys(),
label='variables of {}'.format(self.file_name))
pc_ranges_covered = self.get_pc_coverage()
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
transform=plt.gca().transAxes, fontsize=12,
verticalalignment='top', bbox=props)
finish_plot(plt)
def draw_location_diff(self, locstats_to_compare):
from matplotlib import pyplot as plt
pc_ranges_covered = self.get_pc_coverage()
pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
buckets = range(len(self.variables_coverage_map))
buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)
init_plot(plt)
comparison_keys = list(coverage_buckets())
ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
width=0.4,
label='variables of {}'.format(self.file_name))
ax.bar(buckets_to_compare,
locstats_to_compare.variables_coverage_map.values(),
color='r', align='edge', width=-0.4,
label='variables of {}'.format(locstats_to_compare.file_name))
ax.set_xticks(range(len(comparison_keys)))
ax.set_xticklabels(comparison_keys)
props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
plt.text(0.02, 0.88,
'{} PC ranges covered: {}%'. \
format(self.file_name, pc_ranges_covered),
transform=plt.gca().transAxes, fontsize=12,
verticalalignment='top', bbox=props)
plt.text(0.02, 0.83,
'{} PC ranges covered: {}%'. \
format(locstats_to_compare.file_name,
pc_ranges_covered_to_compare),
transform=plt.gca().transAxes, fontsize=12,
verticalalignment='top', bbox=props)
finish_plot(plt)
def coverage_buckets():
yield '0%'
yield '(0%,10%)'
for start in range(10, 91, 10):
yield '[{0}%,{1}%)'.format(start, start + 10)
yield '100%'
def parse_locstats(opts, binary):
variables_total = None
variables_total_locstats = None
variables_with_loc = None
variables_scope_bytes_covered = None
variables_scope_bytes = None
variables_scope_bytes_entry_values = None
variables_coverage_map = OrderedDict()
llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
"llvm-dwarfdump")
llvm_dwarfdump_stats_opt = "--statistics"
subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
stdin=PIPE, stdout=PIPE, stderr=PIPE, \
universal_newlines = True)
cmd_stdout, cmd_stderr = subproc.communicate()
json_parsed = None
try:
json_parsed = loads(cmd_stdout)
except:
print ('error: No valid llvm-dwarfdump statistics found.')
sys.exit(1)
def init_field(name):
if json_parsed[name] == 'overflowed':
print ('warning: "' + name + '" field overflowed.')
return TAINT_VALUE
return json_parsed[name]
if opts.only_variables:
variables_total_locstats = \
init_field('#local vars processed by location statistics')
variables_scope_bytes_covered = \
init_field('sum_all_local_vars(#bytes in parent scope covered' \
' by DW_AT_location)')
variables_scope_bytes = \
init_field('sum_all_local_vars(#bytes in parent scope)')
if not opts.ignore_debug_entry_values:
for cov_bucket in coverage_buckets():
cov_category = "#local vars with {} of parent scope covered " \
"by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
else:
variables_scope_bytes_entry_values = \
init_field('sum_all_local_vars(#bytes in parent scope ' \
'covered by DW_OP_entry_value)')
if variables_scope_bytes_covered != TAINT_VALUE and \
variables_scope_bytes_entry_values != TAINT_VALUE:
variables_scope_bytes_covered = variables_scope_bytes_covered \
- variables_scope_bytes_entry_values
for cov_bucket in coverage_buckets():
cov_category = \
"#local vars - entry values with {} of parent scope " \
"covered by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
elif opts.only_formal_parameters:
variables_total_locstats = \
init_field('#params processed by location statistics')
variables_scope_bytes_covered = \
init_field('sum_all_params(#bytes in parent scope covered ' \
'by DW_AT_location)')
variables_scope_bytes = \
init_field('sum_all_params(#bytes in parent scope)')
if not opts.ignore_debug_entry_values:
for cov_bucket in coverage_buckets():
cov_category = "#params with {} of parent scope covered " \
"by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
else:
variables_scope_bytes_entry_values = \
init_field('sum_all_params(#bytes in parent scope covered ' \
'by DW_OP_entry_value)')
if variables_scope_bytes_covered != TAINT_VALUE and \
variables_scope_bytes_entry_values != TAINT_VALUE:
variables_scope_bytes_covered = variables_scope_bytes_covered \
- variables_scope_bytes_entry_values
for cov_bucket in coverage_buckets():
cov_category = \
"#params - entry values with {} of parent scope covered" \
" by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
else:
variables_total = \
init_field('#source variables')
variables_with_loc = init_field('#source variables with location')
variables_total_locstats = \
init_field('#variables processed by location statistics')
variables_scope_bytes_covered = \
init_field('sum_all_variables(#bytes in parent scope covered ' \
'by DW_AT_location)')
variables_scope_bytes = \
init_field('sum_all_variables(#bytes in parent scope)')
if not opts.ignore_debug_entry_values:
for cov_bucket in coverage_buckets():
cov_category = "#variables with {} of parent scope covered " \
"by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
else:
variables_scope_bytes_entry_values = \
init_field('sum_all_variables(#bytes in parent scope covered ' \
'by DW_OP_entry_value)')
if variables_scope_bytes_covered != TAINT_VALUE and \
variables_scope_bytes_entry_values != TAINT_VALUE:
variables_scope_bytes_covered = variables_scope_bytes_covered \
- variables_scope_bytes_entry_values
for cov_bucket in coverage_buckets():
cov_category = \
"#variables - entry values with {} of parent scope covered " \
"by DW_AT_location".format(cov_bucket)
variables_coverage_map[cov_bucket] = init_field(cov_category)
return LocationStats(binary, variables_total, variables_total_locstats,
variables_with_loc, variables_scope_bytes_covered,
variables_scope_bytes, variables_coverage_map)
def parse_program_args(parser):
parser.add_argument('--only-variables', action='store_true', default=False,
help='calculate the location statistics only for local variables')
parser.add_argument('--only-formal-parameters', action='store_true',
default=False,
help='calculate the location statistics only for formal parameters')
parser.add_argument('--ignore-debug-entry-values', action='store_true',
default=False,
help='ignore the location statistics on locations with '
'entry values')
parser.add_argument('--draw-plot', action='store_true', default=False,
help='show histogram of location buckets generated (requires '
'matplotlib)')
parser.add_argument('--compare', action='store_true', default=False,
help='compare the debug location coverage on two files provided, '
'and draw a plot showing the difference (requires '
'matplotlib)')
parser.add_argument('file_names', nargs='+', type=str, help='file to process')
return parser.parse_args()
def verify_program_inputs(opts):
if len(sys.argv) < 2:
print ('error: Too few arguments.')
return False
if opts.only_variables and opts.only_formal_parameters:
print ('error: Please use just one --only* option.')
return False
if not opts.compare and len(opts.file_names) != 1:
print ('error: Please specify only one file to process.')
return False
if opts.compare and len(opts.file_names) != 2:
print ('error: Please specify two files to process.')
return False
if opts.draw_plot or opts.compare:
try:
import matplotlib
except ImportError:
print('error: matplotlib not found.')
return False
return True
def Main():
parser = argparse.ArgumentParser()
opts = parse_program_args(parser)
if not verify_program_inputs(opts):
parser.print_help()
sys.exit(1)
binary_file = opts.file_names[0]
locstats = parse_locstats(opts, binary_file)
if not opts.compare:
if opts.draw_plot:
locstats.draw_plot()
else:
if locstats.pretty_print() == -1:
sys.exit(0)
else:
binary_file_to_compare = opts.file_names[1]
locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
locstats.draw_location_diff(locstats_to_compare)
if __name__ == '__main__':
Main()
sys.exit(0)