from __future__ import print_function
import sys
import re
import os
import subprocess
import multiprocessing
import argparse
def dumpbin_get_symbols(lib):
process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
for line in process.stdout:
match = re.match("^.+SECT.+External\s+\|\s+(\S+).*$", line)
if match:
yield match.group(1)
process.wait()
def nm_get_symbols(lib):
if sys.platform.startswith('aix'):
process = subprocess.Popen(['nm','-P','-Xany','-C','-p',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
else:
process = subprocess.Popen(['nm','-P',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
for line in process.stdout:
match = re.match("^(\S+)\s+[BDGRSTVW]\s+\S+\s+\S*$", line)
if match:
yield match.group(1)
process.wait()
def readobj_get_symbols(lib):
process = subprocess.Popen(['llvm-readobj','--symbols',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
for line in process.stdout:
match = re.search('Name: (\S+)', line)
if match:
name = match.group(1)
match = re.search('Section: (\S+)', line)
if match:
section = match.group(1)
match = re.search('StorageClass: (\S+)', line)
if match:
storageclass = match.group(1)
if section != 'IMAGE_SYM_ABSOLUTE' and \
section != 'IMAGE_SYM_UNDEFINED' and \
storageclass == 'External':
yield name
process.wait()
def dumpbin_is_32bit_windows(lib):
process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
retval = False
for line in process.stdout:
match = re.match('.+machine \((\S+)\)', line)
if match:
retval = (match.group(1) == 'x86')
break
process.stdout.close()
process.wait()
return retval
def objdump_is_32bit_windows(lib):
output = subprocess.check_output(['objdump','-f',lib],
universal_newlines=True)
for line in output.splitlines():
match = re.match('.+file format (\S+)', line)
if match:
return (match.group(1) == 'pe-i386')
return False
def readobj_is_32bit_windows(lib):
output = subprocess.check_output(['llvm-readobj','--file-header',lib],
universal_newlines=True)
for line in output.splitlines():
match = re.match('Format: (\S+)', line)
if match:
return (match.group(1) == 'COFF-i386')
return False
def aix_is_32bit_windows(lib):
return False
def should_keep_microsoft_symbol(symbol, calling_convention_decoration):
if not '?' in symbol:
if calling_convention_decoration:
match = re.match('[_@]([^@]+)', symbol)
if match:
return match.group(1)
return symbol
elif re.match('\?\?\$getAs@.+@Type@clang@@', symbol):
return symbol
elif symbol.startswith('??$'):
return None
elif symbol.startswith('??_G') or symbol.startswith('??_E'):
return None
elif symbol.startswith('??0?$') or symbol.startswith('??1?$'):
return None
elif re.search('\?A(0x\w+)?@', symbol):
return None
elif re.search('(llvm|clang)@@[A-Z][A-Z0-9_]*[A-JQ].+(X|.+@|.*Z)$', symbol):
return symbol
return None
def should_keep_itanium_symbol(symbol, calling_convention_decoration):
if calling_convention_decoration and symbol.startswith('_'):
symbol = symbol[1:]
if not symbol.startswith('_') and not symbol.startswith('.'):
return symbol
match = re.match('_Z(T[VTIS])?(N.+)', symbol)
if not match:
return None
try:
names, _ = parse_itanium_nested_name(match.group(2))
except TooComplexName:
return None
if not names:
return symbol
if re.match('[CD][123]', names[-1][0]) and names[-2][1]:
return None
elif symbol.startswith('_ZNK5clang4Type5getAs'):
return symbol
elif names[-1][1]:
return None
elif names[0][0] == '4llvm' or names[0][0] == '5clang':
return symbol
else:
return None
class TooComplexName(Exception):
pass
def parse_itanium_name(arg):
match = re.match('(\d+)(.+)', arg)
if match:
n = int(match.group(1))
name = match.group(1)+match.group(2)[:n]
rest = match.group(2)[n:]
return name, rest
match = re.match('([CD][123])(.+)', arg)
if match:
return match.group(1), match.group(2)
match = re.match('([^E]+)(.+)', arg)
if match:
return match.group(1), match.group(2)
return None, arg
def skip_itanium_template(arg):
assert arg.startswith('I'), arg
tmp = arg[1:]
while tmp:
match = re.match('(\d+)(.+)', tmp)
if match:
n = int(match.group(1))
tmp = match.group(2)[n:]
continue
match = re.match('S[A-Z0-9]*_(.+)', tmp)
if match:
tmp = match.group(1)
elif tmp.startswith('I'):
tmp = skip_itanium_template(tmp)
elif tmp.startswith('N'):
_, tmp = parse_itanium_nested_name(tmp)
elif tmp.startswith('L') or tmp.startswith('X'):
raise TooComplexName
elif tmp.startswith('E'):
return tmp[1:]
else:
tmp = tmp[1:]
return None
def parse_itanium_nested_name(arg):
assert arg.startswith('N'), arg
ret = []
match = re.match('NS[A-Z0-9]*_(.+)', arg)
if match:
tmp = match.group(1)
else:
tmp = arg[1:]
match = re.match('[rVKRO]*(.+)', tmp);
if match:
tmp = match.group(1)
while tmp:
if tmp.startswith('E'):
return ret, tmp[1:]
name_part, tmp = parse_itanium_name(tmp)
if not name_part:
return None, None
is_template = False
if tmp.startswith('I'):
tmp = skip_itanium_template(tmp)
is_template = True
ret.append((name_part, is_template))
return None, None
def extract_symbols(arg):
get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg
symbols = dict()
for symbol in get_symbols(lib):
symbol = should_keep_symbol(symbol, calling_convention_decoration)
if symbol:
symbols[symbol] = 1 + symbols.setdefault(symbol,0)
return symbols
if __name__ == '__main__':
tool_exes = ['dumpbin','nm','objdump','llvm-readobj']
parser = argparse.ArgumentParser(
description='Extract symbols to export from libraries')
parser.add_argument('--mangling', choices=['itanium','microsoft'],
required=True, help='expected symbol mangling scheme')
parser.add_argument('--tools', choices=tool_exes, nargs='*',
help='tools to use to extract symbols and determine the'
' target')
parser.add_argument('libs', metavar='lib', type=str, nargs='+',
help='libraries to extract symbols from')
parser.add_argument('-o', metavar='file', type=str, help='output to file')
args = parser.parse_args()
tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows),
'nm' : (nm_get_symbols, None),
'objdump' : (None, objdump_is_32bit_windows),
'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) }
get_symbols = None
is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None
if args.tools:
tool_exes = args.tools
get_symbols = None
for exe in tool_exes:
try:
p = subprocess.Popen([exe], stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
universal_newlines=True)
p.stdout.close()
p.stderr.close()
p.stdin.close()
p.wait()
if not get_symbols:
get_symbols = tools[exe][0]
if not is_32bit_windows:
is_32bit_windows = tools[exe][1]
if get_symbols and is_32bit_windows:
break
except OSError:
continue
if not get_symbols:
print("Couldn't find a program to read symbols with", file=sys.stderr)
exit(1)
if not is_32bit_windows:
print("Couldn't find a program to determining the target", file=sys.stderr)
exit(1)
if args.mangling == 'microsoft':
should_keep_symbol = should_keep_microsoft_symbol
else:
should_keep_symbol = should_keep_itanium_symbol
libs = list()
for lib in args.libs:
suffixes = ['.lib','.a','.obj','.o']
if not any([lib.endswith(s) for s in suffixes]):
for s in suffixes:
if os.path.exists(lib+s):
lib = lib+s
break
if os.path.exists('lib'+lib+s):
lib = 'lib'+lib+s
break
if not any([lib.endswith(s) for s in suffixes]):
print("Don't know what to do with argument "+lib, file=sys.stderr)
exit(1)
libs.append(lib)
calling_convention_decoration = is_32bit_windows(libs[0])
pool = multiprocessing.Pool()
try:
vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs]
result = pool.map_async(extract_symbols, vals)
pool.close()
libs_symbols = result.get(3600)
except KeyboardInterrupt:
pool.terminate()
pool.join()
exit(1)
symbols = dict()
for this_lib_symbols in libs_symbols:
for k,v in list(this_lib_symbols.items()):
symbols[k] = v + symbols.setdefault(k,0)
template_function_count = dict()
template_function_mapping = dict()
template_function_count[""] = 0
for k in symbols:
name = None
if args.mangling == 'microsoft':
match = re.search("^\?(\??\w+\@\?\$\w+)\@", k)
if match:
name = match.group(1)
else:
match = re.match('_Z(T[VTIS])?(N.+)', k)
if match:
try:
names, _ = parse_itanium_nested_name(match.group(2))
if names and names[-2][1]:
name = ''.join([x for x,_ in names])
except TooComplexName:
pass
if name:
old_count = template_function_count.setdefault(name,0)
template_function_count[name] = old_count + 1
template_function_mapping[k] = name
else:
template_function_mapping[k] = ""
if args.o:
outfile = open(args.o,'w')
else:
outfile = sys.stdout
for k,v in list(symbols.items()):
template_count = template_function_count[template_function_mapping[k]]
if v == 1 and template_count < 100:
print(k, file=outfile)