Compiler projects using llvm
#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
#===------------------------------------------------------------------------===#

from ctypes import CFUNCTYPE
from ctypes import POINTER
from ctypes import addressof
from ctypes import c_byte
from ctypes import c_char_p
from ctypes import c_int
from ctypes import c_size_t
from ctypes import c_ubyte
from ctypes import c_uint64
from ctypes import c_void_p
from ctypes import cast

from .common import LLVMObject
from .common import c_object_p
from .common import get_library

__all__ = [
    'Disassembler',
]

lib = get_library()
callbacks = {}

# Constants for set_options
Option_UseMarkup = 1



_initialized = False
_targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore']
def _ensure_initialized():
    global _initialized
    if not _initialized:
        # Here one would want to call the functions
        # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but
        # unfortunately they are only defined as static inline
        # functions in the header files of llvm-c, so they don't exist
        # as symbols in the shared library.
        # So until that is fixed use this hack to initialize them all
        for tgt in _targets:
            for initializer in ("TargetInfo", "TargetMC", "Disassembler"):
                try:
                    f = getattr(lib, "LLVMInitialize" + tgt + initializer)
                except AttributeError:
                    continue
                f()
        _initialized = True


class Disassembler(LLVMObject):
    """Represents a disassembler instance.

    Disassembler instances are tied to specific "triple," which must be defined
    at creation time.

    Disassembler instances can disassemble instructions from multiple sources.
    """
    def __init__(self, triple):
        """Create a new disassembler instance.

        The triple argument is the triple to create the disassembler for. This
        is something like 'i386-apple-darwin9'.
        """

        _ensure_initialized()

        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
        if not ptr:
            raise Exception('Could not obtain disassembler for triple: %s' %
                            triple)

        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)

    def get_instruction(self, source, pc=0):
        """Obtain the next instruction from an input source.

        The input source should be a str or bytearray or something that
        represents a sequence of bytes.

        This function will start reading bytes from the beginning of the
        source.

        The pc argument specifies the address that the first byte is at.

        This returns a 2-tuple of:

          long number of bytes read. 0 if no instruction was read.
          str representation of instruction. This will be the assembly that
            represents the instruction.
        """
        buf = cast(c_char_p(source), POINTER(c_ubyte))
        out_str = cast((c_byte * 255)(), c_char_p)

        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
                                           c_uint64(pc), out_str, 255)

        return (result, out_str.value)

    def get_instructions(self, source, pc=0):
        """Obtain multiple instructions from an input source.

        This is like get_instruction() except it is a generator for all
        instructions within the source. It starts at the beginning of the
        source and reads instructions until no more can be read.

        This generator returns 3-tuple of:

          long address of instruction.
          long size of instruction, in bytes.
          str representation of instruction.
        """
        source_bytes = c_char_p(source)
        out_str = cast((c_byte * 255)(), c_char_p)

        # This could probably be written cleaner. But, it does work.
        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
        offset = 0
        address = pc
        end_address = pc + len(source)
        while address < end_address:
            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
            result = lib.LLVMDisasmInstruction(self, b,
                    c_uint64(len(source) - offset), c_uint64(address),
                    out_str, 255)

            if result == 0:
                break

            yield (address, result, out_str.value)

            address += result
            offset += result

    def set_options(self, options):
        if not lib.LLVMSetDisasmOptions(self, options):
            raise Exception('Unable to set all disassembler options in %i' % options)


def register_library(library):
    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
        callbacks['op_info'], callbacks['symbol_lookup']]
    library.LLVMCreateDisasm.restype = c_object_p

    library.LLVMDisasmDispose.argtypes = [Disassembler]

    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
            c_uint64, c_uint64, c_char_p, c_size_t]
    library.LLVMDisasmInstruction.restype = c_size_t

    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
    library.LLVMSetDisasmOptions.restype = c_int


callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
                                 c_int, c_void_p)
callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
                                       POINTER(c_uint64), c_uint64,
                                       POINTER(c_char_p))

register_library(lib)