Source code for tritondse.seed

# built-in imports
import hashlib
import base64
import json
from enum import Enum
from pathlib import Path
from typing import List, Dict, Union
from dataclasses import dataclass, field
import enum_tools.documentation

# local imports
from tritondse.types import PathLike


[docs] @enum_tools.documentation.document_enum class SeedStatus(Enum): """ Seed status enum. Enables giving a status to a seed during its execution. At the end of a :py:obj:`SymbolicExecutor` run one of these status must have set to the seed. """ NEW = 0 # doc: The input seed is new (has not been executed yet) OK_DONE = 1 # doc: The input seed has been executed and terminated correctly CRASH = 2 # doc: The input seed crashed in some ways HANG = 3 # doc: The input seed made the program to hang FAIL = 4 # doc: The input seed made the symbolic executor to raise an exception
[docs] @enum_tools.documentation.document_enum class SeedFormat(Enum): """ Seed format enum Raw seeds are just bytes Seed(b"AAAAA\x00BBBBB") Composite can describe how to inject the input more precisely """ RAW = 0 # doc: plain bytes input seed COMPOSITE = 1 # doc: complex input object
[docs] @dataclass(frozen=True) class CompositeData: argv: List[bytes] = field(default_factory=list) "list of argv values" files: Dict[str, bytes] = field(default_factory=dict) "dictionary of files and the associated content (stdin is one of them)" variables: Dict[str, bytes] = field(default_factory=dict) "user defined variables, that the use must take care to inject at right location" def _to_json(self): data = { 'argv': [base64.b64encode(v).decode() for v in self.argv], 'files': {k: (base64.b64encode(v).decode() if isinstance(v, bytes) else v) for k, v in self.files.items()}, 'variables': {k: (base64.b64encode(v).decode() if isinstance(v, bytes) else v) for k, v in self.variables.items()}, } return json.dumps(data, indent=2)
[docs] def __bytes__(self) -> bytes: """ Serialize data into a json string. :return: JSON serialized data """ return self._to_json().encode()
[docs] @staticmethod def from_dict(json_data: dict) -> 'CompositeData': """ Convert dict data into a :py:obj:`CompositeData` object. :param json_data: json data :return: new object instance """ argv = [base64.b64decode(v) for v in json_data['argv']] files = {k: (base64.b64decode(v) if isinstance(v, str) else v) for k, v in json_data['files'].items()} variables = {k: (base64.b64decode(v) if isinstance(v, str) else v) for k, v in json_data['variables'].items()} return CompositeData(argv=argv, files=files, variables=variables)
[docs] def __hash__(self): return hash(bytes(self))
[docs] class Seed(object): """ Seed input. Holds the bytes buffer of the content a status after execution but also some metadata of code portions it is meant to cover. """
[docs] def __init__(self, content: Union[bytes, CompositeData] = bytes(), status=SeedStatus.NEW): """ :param content: content of the input. By default is b"" *(and is thus considered as a bootstrap seed)* :type content: bytes :param status: status of the seed if already known :type status: SeedStatus """ self.content = content self.coverage_objectives = set() # set of coverage items that the seed is meant to cover self.meta_fname = [] self.target = None # CovItem informational field indicate the item the seed was generated for self._status = status self._type = SeedFormat.COMPOSITE if isinstance(content, CompositeData) else SeedFormat.RAW
[docs] def is_composite(self) -> bool: """Returns whether the seed is a composite seed or not. """ return self._type == SeedFormat.COMPOSITE
[docs] def is_raw(self) -> bool: """Returns whether the seed is a raw seed or not. """ return self._type == SeedFormat.RAW
[docs] def is_bootstrap_seed(self) -> bool: """ A bootstrap seed is an empty seed (b""). It will receive a specific processing in the engine as its size will be automatically adapted to the size read (in stdin for instance) :return: true if the seed is a bootstrap seed """ return self.content == b""
[docs] def is_fresh(self) -> bool: """ A fresh seed is never been executed. Its is recognizable as it does not contain any coverage objectives. :return: True if the seed has never been executed """ return not self.coverage_objectives
@property def status(self) -> SeedStatus: """ Status of the seed. :rtype: SeedStatus """ return self._status @property def format(self) -> SeedFormat: """ Format of the seed. :rtype: SeedFormat """ return self._type @status.setter def status(self, value: SeedStatus) -> None: """ Sets the status of the seed """ self._status = value
[docs] def is_status_set(self) -> bool: """ Checks whether a status has already been assigned to the seed. """ return self.status != SeedStatus.NEW
[docs] def __len__(self) -> int: """ Size of the content of the seed. :rtype: int """ return len(bytes(self.content))
[docs] def __eq__(self, other) -> bool: """ Equality check based on content. :returns: true if content of both seeds are equal """ return self.content == other.content
[docs] def bytes(self) -> bytes: return bytes(self)
[docs] def __bytes__(self) -> bytes: """ Return a representation of the seed's content in bytes. :rtype: bytes """ return bytes(self.content)
[docs] def __hash__(self): """ Seed hash function overriden to base itself on content. That enable storing seed in dictionaries directly based on their content to discriminate them. :rtype: int """ return hash(self.content)
@property def hash(self) -> str: """ MD5 hash of the seed content :rtype: str """ m = hashlib.md5(bytes(self)) return m.hexdigest() @property def size(self) -> int: """ Size of the seed content in bytes :rtype: int """ return len(bytes(self)) @property def filename(self): """ Standardized filename based on hash and size. That does not mean the file exists or anything. :returns: formatted intended filename of the seed :rtype: str """ return f"{self.hash}_{self.size:04x}_{'_'.join(self.meta_fname)}.tritondse.cov"
[docs] @staticmethod def from_bytes(raw_seed: bytes, status: SeedStatus = SeedStatus.NEW) -> 'Seed': """ Parse a seed from its byte representation. If it's a composite one it will parse the bytes as JSON and create the CompositeData accordingly. :param raw_seed: bytes: raw bytes of the seed :param status: status of the seed if any, otherwise :py:obj:`SeedStatus.NEW` :type status: SeedStatus :returns: fresh seed instance :rtype: Seed """ try: data = json.loads(raw_seed) if not isinstance(data, dict): # it might happen that files contains only digit which is a valid JSON return Seed(raw_seed, status) if 'files' in data and 'argv' in data: return Seed(CompositeData.from_dict(data), status) else: # Else still consider file as raw bytes return Seed(raw_seed, status) except (json.JSONDecodeError, UnicodeDecodeError): return Seed(raw_seed, status)
[docs] @staticmethod def from_file(path: PathLike, status: SeedStatus = SeedStatus.NEW) -> 'Seed': """ Read a seed from a file. The status can optionally be given as it cannot be determined from the file. :param path: seed path :type path: :py:obj:`tritondse.types.PathLike` :param status: status of the seed if any, otherwise :py:obj:`SeedStatus.NEW` :type status: SeedStatus :returns: fresh seed instance :rtype: Seed """ raw = Path(path).read_bytes() seed = Seed.from_bytes(raw, status) # Parse filename to extract back metadata if any name = Path(path).name if name.endswith(".tritondse.cov"): name = name.replace(".tritondse.cov", "") metas = name.split("_") if len(metas) >= 4: seed.meta_fname = metas[3:] return seed
# Utility function for composite seeds
[docs] def is_file_defined(self, name: str) -> bool: if self.is_composite(): return name in self.content.files else: return False
[docs] def get_file_input(self, name: str) -> bytes: """ Return the bytes associated to a given file within a composite seed. :raise KeyError: if the name cannot be found in the seed. :param name: name of the file to retrieve :return: bytes of the file content """ return self.content.files[name]