Skip to content

block

Methods to use and deal with blocks in a binary.

Block

Bases: MutableMapping

Basic Block class

A basic block is a sequence of instructions without any (basic) incoming flows disrupting it (except calls returns).

While blocks may be serialized in the exported file, a new instance of this class is created for each block in the program (so they all have an unique address).

Parameters:

Name Type Description Default
block_idx Index

Index in the protobuf file of the block

required
start_address AddressT

Starting address of the block

required
chunk Chunk

Parent chunk (e.g. function) of the block.

required

Attributes:

Name Type Description
proto_index Index

Index inside the protobuf

parent Chunk

A reference to the parent Chunk

program Program

A reference to the parent Program

start int

Start address

fake bool

Is it a fake block (e.g. belongs to a fake chunk)

type BlockType

Block type

address_to_index Dict[AddressT, Index]

A mapping of addresses to instruction indexes

end int

End address

comments Dict[AddressT, str]

List of comments attached to the block

references Dict[str, List[int]]

References mapping attached to the block (TODO(dm): remove me?)

Source code in quokka/block.py
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
class Block(MutableMapping):
    """Basic Block class

    A basic block is a sequence of instructions without any (basic) incoming flows
    disrupting it (except calls returns).

    While blocks may be serialized in the exported file, a new instance of this class is
    created for each block in the program (so they all have an unique address).

    Arguments:
        block_idx: Index in the protobuf file of the block
        start_address: Starting address of the block
        chunk: Parent chunk (e.g. function) of the block.

    Attributes:
        proto_index: Index inside the protobuf
        parent: A reference to the parent Chunk
        program: A reference to the parent Program
        start: Start address
        fake: Is it a fake block (e.g. belongs to a fake chunk)
        type: Block type
        address_to_index: A mapping of addresses to instruction indexes
        end: End address
        comments: List of comments attached to the block
        references: References mapping attached to the block (TODO(dm): remove me?)
    """

    def __init__(
        self,
        block_idx: Index,
        start_address: AddressT,
        chunk: quokka.Chunk,
    ):
        """Constructor"""
        self.proto_index: Index = block_idx
        self.parent: quokka.Chunk = chunk
        self.program: quokka.Program = chunk.program

        block: "quokka.pb.Quokka.FunctionChunk.Block"
        block = self.program.proto.function_chunks[chunk.proto_index].blocks[block_idx]

        self.start: int = start_address
        self.fake: bool = block.is_fake
        self.type: BlockType = BlockType.from_proto(block.block_type)

        self.address_to_index: Dict[AddressT, Index] = {}
        self._raw_dict: Dict[AddressT, Index] = {}

        current_address: AddressT = self.start
        for instruction_index, instruction_proto_index in enumerate(
            block.instructions_index
        ):
            self.address_to_index[current_address] = instruction_index
            self._raw_dict[current_address] = instruction_proto_index
            current_address += self.program.proto.instructions[
                instruction_proto_index
            ].size

        self.end: int = current_address

        self.comments: Dict[AddressT, str] = {}
        self.references: Dict[str, List[int]] = {"src": [], "dst": []}

    def __setitem__(self, k: AddressT, v: Index) -> None:
        """Update the instructions mapping"""
        self._raw_dict.__setitem__(k, v)

    def __delitem__(self, v: AddressT) -> None:
        """Remove an instruction from the mapping"""
        self._raw_dict.__delitem__(v)

    def add_comment(self, addr: AddressT, value: str) -> None:
        """Set the comment at `addr`.

        Arguments:
            addr: Comment address
            value: Comment value
        """
        self.comments[addr] = value

    @cached_property
    def strings(self) -> List[str]:
        """Compute the list of strings used in this block."""

        strings: Set[str] = set()

        for reference in self.program.references.resolve_block_references(
            self.parent.proto_index,
            self.proto_index,
            ReferenceType.DATA,
            towards=True,
        ):
            reference_source = reference.source
            if (
                isinstance(reference_source, quokka.data.Data)
                and reference_source.type == DataType.ASCII
            ):
                strings.add(reference_source.value)

        return list(strings)

    def __getitem__(self, address: AddressT) -> quokka.Instruction:
        """Retrieve an instruction at `address`."""
        item = self._raw_dict.__getitem__(address)
        return quokka.Instruction(
            proto_index=item,
            inst_index=self.address_to_index[address],
            address=address,
            block=self,
        )

    def __len__(self) -> int:
        """Number of instruction in the block"""
        return len(self._raw_dict)

    def __iter__(self) -> Iterator:
        """Return an iterator over the instruction list"""
        return iter(self._raw_dict)

    @property
    def data_references(self):
        """Return (and compute if needed) the data referenced by this block."""
        data_references: List[quokka.Data] = []
        for instruction in self.values():
            data_references.extend(instruction.data_references)

        return data_references

    @property
    def size(self) -> int:
        """Size of the block.

        This number is the number of instruction * the size of an instruction for
        architecture with fixed length instructions (e.g. ARM).
        """
        return self.end - self.start

    @cached_property
    def constants(self) -> List[int]:
        """Constants used by the block"""
        constants: List[int] = []
        for instruction in self.values():
            constants.extend(instruction.constants)

        return constants

    @property
    def instructions(self):
        """Accessor of the block instructions"""
        return self.values()

    def __repr__(self) -> str:
        """Block Representation"""
        return (
            f"<Block at 0x{self.start:x} ({self.type}) with {len(self)} instructions>"
        )

    def __hash__(self) -> int:
        """Hash of the block.

        The proto index is guaranteed to be unique so we can use it as an hash and
        forget about un-hashable types.

        TODO(dm):
            Check this
        """
        return self.proto_index

    def successors(self) -> Iterator[AddressT]:
        """(Addresses of the) Successors of the current block."""
        return self.parent.graph.successors(self.start)

    def predecessors(self) -> Iterator[AddressT]:
        """(Addresses of) Predecessors of the current block"""
        return self.parent.graph.predecessors(self.start)

    @property
    def last_instruction(self) -> quokka.Instruction:
        """Direct accessor of the last instruction in the block"""
        deque = collections.deque(self.instructions, maxlen=1)
        return deque.pop()

    @cached_property
    def bytes(self) -> bytes:
        """Retrieve the block bytes

        All bytes for the block are read at once in the file but the result is not
        cached.
        """
        try:
            file_offset: int = self.program.addresser.file(self.start)
        except quokka.NotInFileError:
            logger.warning("Trying to get the bytes for a block not in file.")
            return b""

        # Read all block at once
        block_bytes = self.program.executable.read_bytes(
            offset=file_offset,
            size=self.size,
        )

        return block_bytes

    @cached_property
    def pcode_insts(self) -> List[pypcode.PcodeOp]:
        """Generate PCode instructions for the block

        This method will call the backend Pypcode and generate the instruction for the
        whole block, updating all the instruction inside the block as well.

        However, all instructions will from now be attached to the block itself, and not
        the instructions so the list may differ after some optimizations (e.g.
        len(self.pcode_insts) != sum(len(inst.pcode_insts) for inst in block.values()) )

        Returns:
            A list of PCode instructions

        """
        from quokka.backend.pypcode import pypcode_decode_block

        return pypcode_decode_block(self)

bytes: bytes cached property

Retrieve the block bytes

All bytes for the block are read at once in the file but the result is not cached.

constants: List[int] cached property

Constants used by the block

data_references property

Return (and compute if needed) the data referenced by this block.

instructions property

Accessor of the block instructions

last_instruction: quokka.Instruction property

Direct accessor of the last instruction in the block

pcode_insts: List[pypcode.PcodeOp] cached property

Generate PCode instructions for the block

This method will call the backend Pypcode and generate the instruction for the whole block, updating all the instruction inside the block as well.

However, all instructions will from now be attached to the block itself, and not the instructions so the list may differ after some optimizations (e.g. len(self.pcode_insts) != sum(len(inst.pcode_insts) for inst in block.values()) )

Returns:

Type Description
List[PcodeOp]

A list of PCode instructions

size: int property

Size of the block.

This number is the number of instruction * the size of an instruction for architecture with fixed length instructions (e.g. ARM).

strings: List[str] cached property

Compute the list of strings used in this block.

__delitem__(v)

Remove an instruction from the mapping

Source code in quokka/block.py
110
111
112
def __delitem__(self, v: AddressT) -> None:
    """Remove an instruction from the mapping"""
    self._raw_dict.__delitem__(v)

__getitem__(address)

Retrieve an instruction at address.

Source code in quokka/block.py
144
145
146
147
148
149
150
151
152
def __getitem__(self, address: AddressT) -> quokka.Instruction:
    """Retrieve an instruction at `address`."""
    item = self._raw_dict.__getitem__(address)
    return quokka.Instruction(
        proto_index=item,
        inst_index=self.address_to_index[address],
        address=address,
        block=self,
    )

__hash__()

Hash of the block.

The proto index is guaranteed to be unique so we can use it as an hash and forget about un-hashable types.

TODO(dm): Check this

Source code in quokka/block.py
200
201
202
203
204
205
206
207
208
209
def __hash__(self) -> int:
    """Hash of the block.

    The proto index is guaranteed to be unique so we can use it as an hash and
    forget about un-hashable types.

    TODO(dm):
        Check this
    """
    return self.proto_index

__init__(block_idx, start_address, chunk)

Constructor

Source code in quokka/block.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def __init__(
    self,
    block_idx: Index,
    start_address: AddressT,
    chunk: quokka.Chunk,
):
    """Constructor"""
    self.proto_index: Index = block_idx
    self.parent: quokka.Chunk = chunk
    self.program: quokka.Program = chunk.program

    block: "quokka.pb.Quokka.FunctionChunk.Block"
    block = self.program.proto.function_chunks[chunk.proto_index].blocks[block_idx]

    self.start: int = start_address
    self.fake: bool = block.is_fake
    self.type: BlockType = BlockType.from_proto(block.block_type)

    self.address_to_index: Dict[AddressT, Index] = {}
    self._raw_dict: Dict[AddressT, Index] = {}

    current_address: AddressT = self.start
    for instruction_index, instruction_proto_index in enumerate(
        block.instructions_index
    ):
        self.address_to_index[current_address] = instruction_index
        self._raw_dict[current_address] = instruction_proto_index
        current_address += self.program.proto.instructions[
            instruction_proto_index
        ].size

    self.end: int = current_address

    self.comments: Dict[AddressT, str] = {}
    self.references: Dict[str, List[int]] = {"src": [], "dst": []}

__iter__()

Return an iterator over the instruction list

Source code in quokka/block.py
158
159
160
def __iter__(self) -> Iterator:
    """Return an iterator over the instruction list"""
    return iter(self._raw_dict)

__len__()

Number of instruction in the block

Source code in quokka/block.py
154
155
156
def __len__(self) -> int:
    """Number of instruction in the block"""
    return len(self._raw_dict)

__repr__()

Block Representation

Source code in quokka/block.py
194
195
196
197
198
def __repr__(self) -> str:
    """Block Representation"""
    return (
        f"<Block at 0x{self.start:x} ({self.type}) with {len(self)} instructions>"
    )

__setitem__(k, v)

Update the instructions mapping

Source code in quokka/block.py
106
107
108
def __setitem__(self, k: AddressT, v: Index) -> None:
    """Update the instructions mapping"""
    self._raw_dict.__setitem__(k, v)

add_comment(addr, value)

Set the comment at addr.

Parameters:

Name Type Description Default
addr AddressT

Comment address

required
value str

Comment value

required
Source code in quokka/block.py
114
115
116
117
118
119
120
121
def add_comment(self, addr: AddressT, value: str) -> None:
    """Set the comment at `addr`.

    Arguments:
        addr: Comment address
        value: Comment value
    """
    self.comments[addr] = value

predecessors()

(Addresses of) Predecessors of the current block

Source code in quokka/block.py
215
216
217
def predecessors(self) -> Iterator[AddressT]:
    """(Addresses of) Predecessors of the current block"""
    return self.parent.graph.predecessors(self.start)

successors()

(Addresses of the) Successors of the current block.

Source code in quokka/block.py
211
212
213
def successors(self) -> Iterator[AddressT]:
    """(Addresses of the) Successors of the current block."""
    return self.parent.graph.successors(self.start)