Source code for tritondse.loaders.cle_loader

# built-in imports
from typing import Generator, Optional, Tuple
from pathlib import Path
import logging

# third-party imports
import cle

# local imports
from tritondse.loaders import Loader, LoadableSegment
from tritondse.types import Addr, Architecture, PathLike, Platform, Perm, Endian
from tritondse.routines import SUPPORTED_ROUTINES
import tritondse.logging

logger = tritondse.logging.get("loader")

_arch_mapper = {
    "ARMEL":   Architecture.ARM32,
    "AARCH64": Architecture.AARCH64,
    "AMD64":   Architecture.X86_64,
    "X86":   Architecture.X86,
}

_plfm_mapper = {
    "UNIX - Linux": Platform.LINUX,
    "UNIX - System V": Platform.LINUX,
    "windows": Platform.WINDOWS,
    "macos": Platform.MACOS
}


[docs] class CleLoader(Loader): EXTERN_SYM_BASE = 0x0f001000 EXTERN_SYM_SIZE = 0x1000 BASE_STACK = 0xf0000000 END_STACK = 0x70000000 # This is inclusive def __init__(self, path: PathLike, ld_path: Optional[PathLike] = None): super(CleLoader, self).__init__(path) self.path: Path = Path(path) #: Binary file path if not self.path.is_file(): raise FileNotFoundError(f"file {path} not found (or not a file)") self._disable_vex_loggers() # disable logs of pyvex self.ld_path = ld_path if ld_path is not None else () self.ld = cle.Loader(str(path), ld_path=self.ld_path) def _disable_vex_loggers(self): for name, logger in logging.root.manager.loggerDict.items(): if "pyvex" in name: logger.propagate = False @property def name(self) -> str: """ Name of the loader""" return f"CleLoader({self.path})" @property def architecture(self) -> Architecture: """ Architecture enum representing program architecture. :rtype: Architecture """ return _arch_mapper[self.ld.main_object.arch.name] @property def endianness(self) -> Endian: # FIXME: Depending on architecture returning good endianness return Endian.LITTLE @property def entry_point(self) -> Addr: """ Program entrypoint address as defined in the binary headers :rtype: :py:obj:`tritondse.types.Addr` """ return self.ld.main_object.entry
[docs] def memory_segments(self) -> Generator[LoadableSegment, None, None]: """ :return: Generator of tuples addrs and content """ for obj in self.ld.all_objects: logger.debug(obj) for seg in obj.segments: segdata = self.ld.memory.load(seg.vaddr, seg.memsize) assert len(segdata) == seg.memsize perms = (Perm.R if seg.is_readable else 0) | (Perm.W if seg.is_writable else 0) | (Perm.X if seg.is_executable else 0) if seg.__class__.__name__ != "ExternSegment": # The format string in CLE is broken if the filesize is 0. This is a workaround. logger.debug(f"Loading segment {seg} - perms:{perms}") yield LoadableSegment(seg.vaddr, perms, content=segdata, name=f"seg-{obj.binary_basename}") # Also return a specific map to put external symbols yield LoadableSegment(self.EXTERN_SYM_BASE, self.EXTERN_SYM_SIZE, Perm.R | Perm.W, name="[extern]") yield LoadableSegment(self.END_STACK, self.BASE_STACK-self.END_STACK+1, Perm.R | Perm.W, name="[stack]") # FIXME. Temporary solution to prevent crashes on access to the TLB e.g fs:28 yield LoadableSegment(0, 0x2000, Perm.R | Perm.W, name="[fs]")
# FIXME. Temporary solution to prevent crashes on access to the TLB e.g fs:28 @property def cpustate(self): # NOTE: in Triton, the segment selector is used as the segment base and not as a selector into GDT. # i.e. directly store the segment base into fs return {"fs": 0x1000} @property def platform(self) -> Optional[Platform]: """ Platform of the binary. :return: Platform """ return _plfm_mapper[self.ld.main_object.os]
[docs] def imported_functions_relocations(self) -> Generator[Tuple[str, Addr], None, None]: """ Iterate over all imported functions by the program. This function is a generator of tuples associating the function and its relocation address in the binary. :return: Generator of tuples function name and relocation address """ # TODO I think there's a problem here. We only deal with imports from the main binary # For example if a library calls a libc function, we probably need to patch the library's GOT for obj in self.ld.all_objects: for fun in obj.imports: if fun in SUPPORTED_ROUTINES: reloc = obj.imports[fun] got_entry_addr = reloc.relative_addr + obj.mapped_base yield fun, got_entry_addr # Handle indirect functions. # Currently, we only support indirect functions if there exists a stub for them in `routines.py` # Otherwise the program will crash because CLE doesn't perform the relocation for indirect functions. # We could perform the relocation ourselves by writing to the got slot, but we need a way to figure out # the correct fptr to use. # In other words we should execute `resolver_fun` or parse it in some way to get the correct function ptr # to write to got_slot (write with self.ld.memory.pack_word(got_slot, func_ptr)) for obj in self.ld.all_objects: for (resolver_func, got_rva) in obj.irelatives: got_slot = got_rva + obj.mapped_base sym = self.ld.find_symbol(resolver_func) if sym is None: continue fun = sym.name if fun in SUPPORTED_ROUTINES: yield fun, got_slot
[docs] def imported_variable_symbols_relocations(self) -> Generator[Tuple[str, Addr], None, None]: """ Iterate over all imported variable symbols. Yield for each of them the name and the relocation address in the binary. :return: Generator of tuples with symbol name, relocation address """ # TODO I think there's a problem here. We only deal with imports from the main binary for s in self.ld.main_object.symbols: if s.resolved and s._type == cle.SymbolType.TYPE_OBJECT: logger.debug(f"CleLoader: hooking symbol {s.name} @ {s.relative_addr:#x} {s.resolved} {s.resolvedby} {s._type}") s_addr = s.relative_addr + self.ld.main_object.mapped_base yield s.name, s_addr
[docs] def find_function_addr(self, name: str) -> Optional[Addr]: """ Search for the function name in functions of the binary. :param name: Function name :type name: str :return: Address of function if found :rtype: Addr """ res = [x for x in self.ld.find_all_symbols(name) if x.is_function] return res[0].rebased_addr if res else None # if multiple elements return the first