Source code for tritondse.loaders.cle_loader

# built-in imports
from typing import Generator, Optional, Tuple
from pathlib import Path
import logging

# third-party imports
import cle

# local imports
from tritondse.loaders import Loader, LoadableSegment
from tritondse.types import Addr, Architecture, PathLike, Platform, Perm, Endian
from tritondse.routines import SUPPORTED_ROUTINES
import tritondse.logging

logger = tritondse.logging.get("loader")

_arch_mapper = {
    "ARMEL":   Architecture.ARM32,
    "AARCH64": Architecture.AARCH64,
    "AMD64":   Architecture.X86_64,
    "X86":   Architecture.X86,
}

_plfm_mapper = {
    "UNIX - Linux": Platform.LINUX,
    "UNIX - System V": Platform.LINUX,
    "windows": Platform.WINDOWS,
    "macos": Platform.MACOS
}



[docs]
class CleLoader(Loader):
    EXTERN_SYM_BASE = 0x0f001000
    EXTERN_SYM_SIZE = 0x1000

    BASE_STACK = 0xf0000000
    END_STACK = 0x70000000  # This is inclusive

    def __init__(self, path: PathLike, ld_path: Optional[PathLike] = None):
        super(CleLoader, self).__init__(path)
        self.path: Path = Path(path)  #: Binary file path
        if not self.path.is_file():
            raise FileNotFoundError(f"file {path} not found (or not a file)")

        self._disable_vex_loggers()  # disable logs of pyvex

        self.ld_path = ld_path if ld_path is not None else ()
        self.ld = cle.Loader(str(path), ld_path=self.ld_path)

    def _disable_vex_loggers(self):
        for name, logger in logging.root.manager.loggerDict.items():
            if "pyvex" in name:
                logger.propagate = False

    @property
    def name(self) -> str:
        """ Name of the loader"""
        return f"CleLoader({self.path})"

    @property
    def architecture(self) -> Architecture:
        """
        Architecture enum representing program architecture.

        :rtype: Architecture
        """
        return _arch_mapper[self.ld.main_object.arch.name]

    @property
    def endianness(self) -> Endian:
        # FIXME: Depending on architecture returning good endianness
        return Endian.LITTLE

    @property
    def entry_point(self) -> Addr:
        """
        Program entrypoint address as defined in the binary headers

        :rtype: :py:obj:`tritondse.types.Addr`
        """
        return self.ld.main_object.entry


[docs]
    def memory_segments(self) -> Generator[LoadableSegment, None, None]:
        """
        :return: Generator of tuples addrs and content
        """
        for obj in self.ld.all_objects:
            logger.debug(obj)
            for seg in obj.segments:
                segdata = self.ld.memory.load(seg.vaddr, seg.memsize)
                assert len(segdata) == seg.memsize
                perms = (Perm.R if seg.is_readable else 0) | (Perm.W if seg.is_writable else 0) | (Perm.X if seg.is_executable else 0)
                if seg.__class__.__name__ != "ExternSegment":
                    # The format string in CLE is broken if the filesize is 0. This is a workaround.
                    logger.debug(f"Loading segment {seg} - perms:{perms}")
                yield LoadableSegment(seg.vaddr, perms, content=segdata, name=f"seg-{obj.binary_basename}")
        # Also return a specific map to put external symbols
        yield LoadableSegment(self.EXTERN_SYM_BASE, self.EXTERN_SYM_SIZE, Perm.R | Perm.W, name="[extern]")
        yield LoadableSegment(self.END_STACK, self.BASE_STACK-self.END_STACK+1, Perm.R | Perm.W, name="[stack]")

        # FIXME. Temporary solution to prevent crashes on access to the TLB e.g fs:28
        yield LoadableSegment(0, 0x2000, Perm.R | Perm.W, name="[fs]")


    # FIXME. Temporary solution to prevent crashes on access to the TLB e.g fs:28
    @property
    def cpustate(self):
        # NOTE: in Triton, the segment selector is used as the segment base and not as a selector into GDT.
        # i.e. directly store the segment base into fs
        return {"fs": 0x1000}

    @property
    def platform(self) -> Optional[Platform]:
        """
        Platform of the binary.

        :return: Platform
        """
        return _plfm_mapper[self.ld.main_object.os]


[docs]
    def imported_functions_relocations(self) -> Generator[Tuple[str, Addr], None, None]:
        """
        Iterate over all imported functions by the program. This function
        is a generator of tuples associating the function and its relocation
        address in the binary.

        :return: Generator of tuples function name and relocation address
        """
        # TODO I think there's a problem here. We only deal with imports from the main binary
        # For example if a library calls a libc function, we probably need to patch the library's GOT
        for obj in self.ld.all_objects:
            for fun in obj.imports:
                if fun in SUPPORTED_ROUTINES:
                    reloc = obj.imports[fun]
                    got_entry_addr = reloc.relative_addr + obj.mapped_base
                    yield fun, got_entry_addr

        # Handle indirect functions.
        # Currently, we only support indirect functions if there exists a stub for them in `routines.py`
        # Otherwise the program will crash because CLE doesn't perform the relocation for indirect functions.

        # We could perform the relocation ourselves by writing to the got slot, but we need a way to figure out
        # the correct fptr to use.
        # In other words we should execute `resolver_fun` or parse it in some way to get the correct function ptr
        # to write to got_slot (write with self.ld.memory.pack_word(got_slot, func_ptr))
        for obj in self.ld.all_objects:
            for (resolver_func, got_rva) in obj.irelatives:
                got_slot = got_rva + obj.mapped_base
                sym = self.ld.find_symbol(resolver_func)
                if sym is None:
                    continue
                fun = sym.name
                if fun in SUPPORTED_ROUTINES:
                    yield fun, got_slot



[docs]
    def imported_variable_symbols_relocations(self) -> Generator[Tuple[str, Addr], None, None]:
        """
        Iterate over all imported variable symbols. Yield for each of them the name and
        the relocation address in the binary.

        :return: Generator of tuples with symbol name, relocation address
        """
        # TODO I think there's a problem here. We only deal with imports from the main binary
        for s in self.ld.main_object.symbols:
            if s.resolved and s._type == cle.SymbolType.TYPE_OBJECT:
                logger.debug(f"CleLoader: hooking symbol {s.name} @ {s.relative_addr:#x} {s.resolved} {s.resolvedby} {s._type}")
                s_addr = s.relative_addr + self.ld.main_object.mapped_base
                yield s.name, s_addr



[docs]
    def find_function_addr(self, name: str) -> Optional[Addr]:
        """
        Search for the function name in functions of the binary.

        :param name: Function name
        :type name: str
        :return: Address of function if found
        :rtype: Addr
        """
        res = [x for x in self.ld.find_all_symbols(name) if x.is_function]
        return res[0].rebased_addr if res else None  # if multiple elements return the first