Skip to content

block

Methods to use and deal with blocks in a binary.

Block

Bases: MutableMapping

Basic Block class

A basic block is a sequence of instructions without any (basic) incoming flows disrupting it (except calls returns).

While blocks may be serialized in the exported file, a new instance of this class is created for each block in the program (so they all have an unique address).

Parameters:

Name Type Description Default
block_idx Index

Index in the protobuf file of the block

required
start_address AddressT

Starting address of the block

required
function Function

Parent function of the block.

required

Attributes:

Name Type Description
proto

Protobuf object

parent Function

A reference to the parent Function

program Program

A reference to the parent Program

start int

Start address

type BlockType

Block type

address_to_index dict[AddressT, Index]

A mapping of addresses to instruction indexes

end int

End address

comments dict[AddressT, str]

List of comments attached to the block

Source code in bindings/python/quokka/block.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
class Block(MutableMapping):
    """Basic Block class

    A basic block is a sequence of instructions without any (basic) incoming flows
    disrupting it (except calls returns).

    While blocks may be serialized in the exported file, a new instance of this class is
    created for each block in the program (so they all have an unique address).

    Arguments:
        block_idx: Index in the protobuf file of the block
        start_address: Starting address of the block
        function: Parent function of the block.

    Attributes:
        proto: Protobuf object
        parent: A reference to the parent Function
        program: A reference to the parent Program
        start: Start address
        type: Block type
        address_to_index: A mapping of addresses to instruction indexes
        end: End address
        comments: List of comments attached to the block
    """

    def __init__(
        self,
        block_idx: Index,
        start_address: AddressT,
        function: quokka.Function,
    ):
        """Constructor"""
        self._proto_index: Index = block_idx
        self.parent: quokka.Function = function

        self.proto = function.proto.blocks[block_idx]

        self.start: int = start_address
        self.type: BlockType = BlockType.from_proto(self.proto.block_type)
        self.size: int = self.proto.size
        self.file_offset = self.proto.file_offset

        self.is_thumb = self.proto.is_thumb

        self.address_to_index: dict[AddressT, Index] = {}
        self._raw_dict: dict[AddressT, quokka.Instruction] = {}

        if self.program.mode == ExporterMode.FULL:
            current_address: AddressT = self.start
            for inst_idx, inst_pb_idx in enumerate(self.proto.instructions_index):
                ins =  quokka.Instruction(inst_pb_idx, inst_idx, current_address, self)
                self._raw_dict[current_address] = ins
                current_address += ins.size

        elif self.program.mode == ExporterMode.LIGHT:
            insts = quokka.backends.capstone.capstone_decode_block(self)
            if len(insts) != self.proto.n_instr:
                logger.warning(
                    f"Decoded {len(insts)} instructions for block at 0x{self.start:x} but expected {self.proto.n_instr}."
                )
            for i, inst in enumerate(insts):  
                ins = quokka.Instruction(-1, i, inst.address, self, backend_inst=inst)
                self._raw_dict[ins.address] = ins
        else:
            assert False, "Unknown exporter mode"

        self.comments: dict[AddressT, str] = {}

    @property
    def address(self) -> AddressT:
        """Direct accessor of the block address"""
        return self.start

    @property
    def program(self) -> quokka.Program:
        """Return the parent program"""
        return self.parent.program

    def __setitem__(self, k: AddressT, ins: Instruction) -> None:
        """Update the instructions mapping"""
        self._raw_dict.__setitem__(k, ins)

    def __delitem__(self, v: AddressT) -> None:
        """Remove an instruction from the mapping"""
        self._raw_dict.__delitem__(v)

    def add_comment(self, addr: AddressT, value: str) -> None:
        """Set the comment at `addr`.

        Arguments:
            addr: Comment address
            value: Comment value
        """
        self.comments[addr] = value

    def __getitem__(self, address: AddressT) -> quokka.Instruction:
        """Retrieve an instruction at `address`."""
        return self._raw_dict.__getitem__(address)

    def __len__(self) -> int:
        """Number of instruction in the block"""
        return len(self._raw_dict)

    def __iter__(self) -> Iterator:
        """Return an iterator over the instruction list"""
        return iter(self._raw_dict)

    @property
    def end(self) -> int:
        """Size of the block.

        This number is the number of instruction * the size of an instruction for
        architecture with fixed length instructions (e.g. ARM).
        """
        return self.start + self.size

    @cached_property
    def constants(self) -> list[int]:
        """Constants used by the block"""
        constants: list[int] = []
        for instruction in self.values():
            constants.extend(instruction.constants)

        return constants

    @cached_property
    def strings(self) -> list[str]:
        """Strings used by the block"""
        strings: list[str] = []
        for instruction in self.values():
            strings.extend(instruction.strings)

        return strings

    @property
    def instructions(self) -> Iterator[Instruction]:
        """Accessor of the block instructions"""
        return iter(self.values())

    def __repr__(self) -> str:
        """Block Representation"""
        return (
            f"<Block at 0x{self.start:x} ({self.type}) with {len(self)} instructions>"
        )

    def __hash__(self) -> int:
        """Hash of the block.

        The proto index is guaranteed to be unique so we can use it as an hash and
        forget about un-hashable types.

        TODO(dm):
            Check this
        """
        return self._proto_index

    def successors(self) -> Iterator[AddressT]:
        """(Addresses of the) Successors of the current block."""
        return self.parent.graph.successors(self.start)

    def predecessors(self) -> Iterator[AddressT]:
        """(Addresses of) Predecessors of the current block"""
        return self.parent.graph.predecessors(self.start)

    @property
    def last_instruction(self) -> quokka.Instruction:
        """Direct accessor of the last instruction in the block"""
        deque = collections.deque(self.instructions, maxlen=1)
        return deque.pop()

    @cached_property
    def bytes(self) -> bytes:
        """Retrieve the block bytes

        All bytes for the block are read at once in the file but the result is not
        cached.
        """
        if self.file_offset is None:
            logger.warning("Trying to get the bytes for a block not in file.")
            return b""

        # Read the whole block at once
        block_bytes = self.program.executable.read_bytes(
            offset=self.file_offset,
            size=self.size,
        )

        return block_bytes

    @cached_property
    def pcode_insts(self) -> list[pypcode.PcodeOp]:
        """Generate PCode instructions for the block

        This method will call the backend Pypcode and generate the instruction for the
        whole block, updating all the instruction inside the block as well.

        However, all instructions will from now be attached to the block itself, and not
        the instructions so the list may differ after some optimizations (e.g.
        len(self.pcode_insts) != sum(len(inst.pcode_insts) for inst in block.values()) )

        Returns:
            A list of PCode instructions

        """
        from quokka.backends.pypcode import pypcode_decode_block

        return pypcode_decode_block(self)

address property

Direct accessor of the block address

bytes cached property

Retrieve the block bytes

All bytes for the block are read at once in the file but the result is not cached.

constants cached property

Constants used by the block

end property

Size of the block.

This number is the number of instruction * the size of an instruction for architecture with fixed length instructions (e.g. ARM).

instructions property

Accessor of the block instructions

last_instruction property

Direct accessor of the last instruction in the block

pcode_insts cached property

Generate PCode instructions for the block

This method will call the backend Pypcode and generate the instruction for the whole block, updating all the instruction inside the block as well.

However, all instructions will from now be attached to the block itself, and not the instructions so the list may differ after some optimizations (e.g. len(self.pcode_insts) != sum(len(inst.pcode_insts) for inst in block.values()) )

Returns:

Type Description
list[PcodeOp]

A list of PCode instructions

program property

Return the parent program

strings cached property

Strings used by the block

__delitem__(v)

Remove an instruction from the mapping

Source code in bindings/python/quokka/block.py
121
122
123
def __delitem__(self, v: AddressT) -> None:
    """Remove an instruction from the mapping"""
    self._raw_dict.__delitem__(v)

__getitem__(address)

Retrieve an instruction at address.

Source code in bindings/python/quokka/block.py
134
135
136
def __getitem__(self, address: AddressT) -> quokka.Instruction:
    """Retrieve an instruction at `address`."""
    return self._raw_dict.__getitem__(address)

__hash__()

Hash of the block.

The proto index is guaranteed to be unique so we can use it as an hash and forget about un-hashable types.

TODO(dm): Check this

Source code in bindings/python/quokka/block.py
184
185
186
187
188
189
190
191
192
193
def __hash__(self) -> int:
    """Hash of the block.

    The proto index is guaranteed to be unique so we can use it as an hash and
    forget about un-hashable types.

    TODO(dm):
        Check this
    """
    return self._proto_index

__init__(block_idx, start_address, function)

Constructor

Source code in bindings/python/quokka/block.py
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def __init__(
    self,
    block_idx: Index,
    start_address: AddressT,
    function: quokka.Function,
):
    """Constructor"""
    self._proto_index: Index = block_idx
    self.parent: quokka.Function = function

    self.proto = function.proto.blocks[block_idx]

    self.start: int = start_address
    self.type: BlockType = BlockType.from_proto(self.proto.block_type)
    self.size: int = self.proto.size
    self.file_offset = self.proto.file_offset

    self.is_thumb = self.proto.is_thumb

    self.address_to_index: dict[AddressT, Index] = {}
    self._raw_dict: dict[AddressT, quokka.Instruction] = {}

    if self.program.mode == ExporterMode.FULL:
        current_address: AddressT = self.start
        for inst_idx, inst_pb_idx in enumerate(self.proto.instructions_index):
            ins =  quokka.Instruction(inst_pb_idx, inst_idx, current_address, self)
            self._raw_dict[current_address] = ins
            current_address += ins.size

    elif self.program.mode == ExporterMode.LIGHT:
        insts = quokka.backends.capstone.capstone_decode_block(self)
        if len(insts) != self.proto.n_instr:
            logger.warning(
                f"Decoded {len(insts)} instructions for block at 0x{self.start:x} but expected {self.proto.n_instr}."
            )
        for i, inst in enumerate(insts):  
            ins = quokka.Instruction(-1, i, inst.address, self, backend_inst=inst)
            self._raw_dict[ins.address] = ins
    else:
        assert False, "Unknown exporter mode"

    self.comments: dict[AddressT, str] = {}

__iter__()

Return an iterator over the instruction list

Source code in bindings/python/quokka/block.py
142
143
144
def __iter__(self) -> Iterator:
    """Return an iterator over the instruction list"""
    return iter(self._raw_dict)

__len__()

Number of instruction in the block

Source code in bindings/python/quokka/block.py
138
139
140
def __len__(self) -> int:
    """Number of instruction in the block"""
    return len(self._raw_dict)

__repr__()

Block Representation

Source code in bindings/python/quokka/block.py
178
179
180
181
182
def __repr__(self) -> str:
    """Block Representation"""
    return (
        f"<Block at 0x{self.start:x} ({self.type}) with {len(self)} instructions>"
    )

__setitem__(k, ins)

Update the instructions mapping

Source code in bindings/python/quokka/block.py
117
118
119
def __setitem__(self, k: AddressT, ins: Instruction) -> None:
    """Update the instructions mapping"""
    self._raw_dict.__setitem__(k, ins)

add_comment(addr, value)

Set the comment at addr.

Parameters:

Name Type Description Default
addr AddressT

Comment address

required
value str

Comment value

required
Source code in bindings/python/quokka/block.py
125
126
127
128
129
130
131
132
def add_comment(self, addr: AddressT, value: str) -> None:
    """Set the comment at `addr`.

    Arguments:
        addr: Comment address
        value: Comment value
    """
    self.comments[addr] = value

predecessors()

(Addresses of) Predecessors of the current block

Source code in bindings/python/quokka/block.py
199
200
201
def predecessors(self) -> Iterator[AddressT]:
    """(Addresses of) Predecessors of the current block"""
    return self.parent.graph.predecessors(self.start)

successors()

(Addresses of the) Successors of the current block.

Source code in bindings/python/quokka/block.py
195
196
197
def successors(self) -> Iterator[AddressT]:
    """(Addresses of the) Successors of the current block."""
    return self.parent.graph.successors(self.start)