Skip to content

instruction

Methods to deal with instructions and operands within a binary

Instruction

Instruction class

An instruction is the binary bread-and-butter. This class abstract some elements and offer various backends integration if needed.

Parameters:

Name Type Description Default
proto_index Index

Protobuf index of the instruction

required
inst_index int

Instruction index in the parent block

required
address AddressT

Instruction address

required
block Block

Parent block reference

required

Attributes:

Name Type Description
program Program

Reference to the program

parent Block

Parent block

proto_index Index

Protobuf index of the instruction

inst_tuple

A tuple composed of the (chunk_index, block_index, inst_index). This uniquely identify an instruction within the program.

thumb

is the instruction thumb?

index int

Instruction index in the parent block

Source code in quokka/instruction.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
class Instruction:
    """Instruction class

    An instruction is the binary bread-and-butter.
    This class abstract some elements and offer various backends integration if needed.

    Arguments:
        proto_index: Protobuf index of the instruction
        inst_index: Instruction index in the parent block
        address: Instruction address
        block: Parent block reference

    Attributes:
        program: Reference to the program
        parent: Parent block
        proto_index: Protobuf index of the instruction
        inst_tuple: A tuple composed of the (chunk_index, block_index, inst_index). This
            uniquely identify an instruction within the program.
        thumb: is the instruction thumb?
        index: Instruction index in the parent block
    """

    def __init__(
        self,
        proto_index: Index,
        inst_index: int,
        address: AddressT,
        block: quokka.Block,
    ):
        self.program: quokka.Program = block.program
        self.parent: quokka.Block = block
        self.proto_index: Index = proto_index

        self.inst_tuple = (block.parent.proto_index, block.proto_index, inst_index)

        instruction = self.program.proto.instructions[proto_index]

        self.size = instruction.size
        self.thumb = instruction.is_thumb

        self.index: int = inst_index

        # TODO(dm) Sometimes, IDA merge two instruction in one
        #  (e.g. 0x1ab16 of d53a), deal with that
        self.address: AddressT = address

    @cached_property
    def mnemonic(self) -> str:
        """Return the mnemonic for the instruction.

        First, try to use capstone because it's prettier
        Otherwise, fallback to the IDA mnemonic which is better than nothing.

        Returns:
            A string representation of the mnemonic
        """
        if self.cs_inst is not None:
            return self.cs_inst.mnemonic

        instruction = self.program.proto.instructions[self.proto_index]
        return self.program.proto.mnemonics[instruction.mnemonic_index]

    @cached_property
    def cs_inst(self) -> Optional[capstone.CsInsn]:
        """Load an instruction from Capstone backend

        If the decoding fails, the result won't be cached, and it will be attempted
        again.

        Returns:
            A Capstone instruction

        """
        return quokka.backends.capstone_decode_instruction(self)

    @cached_property
    def pcode_insts(self) -> Sequence[pypcode.PcodeOp]:
        """Retrieve the PcodeOps associated to the instruction

        Returns:
            A sequence of PCode instructions
        """
        from quokka.backends.pypcode import pypcode_decode_instruction

        return pypcode_decode_instruction(self)

    @cached_property
    def string(self) -> Optional[str]:
        """String used by the instruction (if any)"""
        for data in self.data_references:
            if isinstance(data, quokka.data.Data) and data.type == DataType.ASCII:
                return data.value

        return None

    @property
    def references(self) -> Dict[ReferenceType, List[ReferenceTarget]]:
        """Returns all the references towards the instruction"""

        ref = defaultdict(list)
        for reference in self.program.references.resolve_inst_instance(
            self.inst_tuple, towards=True
        ):
            ref[reference.type].append(reference.source)
        return ref

    @property
    def data_references(self) -> List[ReferenceTarget]:
        """Returns all data reference to this instruction"""
        return self.references[ReferenceType.DATA]

    @property
    def struct_references(self) -> List[ReferenceTarget]:
        """Returns all struct reference to this instruction"""
        return self.references[ReferenceType.STRUC]

    @property
    def enum_references(self) -> List[ReferenceTarget]:
        """Returns all enum reference to this instruction"""
        return self.references[ReferenceType.ENUM]

    @property
    def call_references(self) -> List[ReferenceTarget]:
        """Returns all call reference to this instruction"""
        return self.references[ReferenceType.CALL]

    @property
    def operands(self) -> List[Operand]:
        """Retrieve the instruction operands and initialize them with Capstone"""
        operands: List[Operand] = []

        inst = self.program.proto.instructions[self.proto_index]

        try:
            capstone_operands = len(self.cs_inst.operands)
        except AttributeError:
            capstone_operands = 0

        operand_count = max(capstone_operands, len(inst.operand_index))

        for idx in range(operand_count):
            try:
                operand_index = inst.operand_index[idx]
            except IndexError:
                # logger.debug('Less IDA operands than capstone')
                continue

            details = None
            try:
                details = self.cs_inst.operands[idx]
            except (IndexError, quokka.exc.InstructionError):
                # logger.debug('Missing an IDA operand for capstone')
                pass

            # TODO(dm): Allow partial operands with only half of the data
            if operand_index != -1:
                operands.append(
                    Operand(
                        self.program.proto.operands[operand_index],
                        capstone_operand=details,
                        program=self.program,
                    )
                )

        return operands

    @cached_property
    def call_target(self) -> quokka.Chunk:
        """Find the call target of an instruction if any exists"""
        call_target = False

        candidates = set()
        for reference in self.program.references.resolve_inst_instance(
            self.inst_tuple, ReferenceType.CALL, towards=False
        ):
            # FIX: in Quokka a bug existed where the call target could be data
            if isinstance(reference.destination, tuple):
                candidates.add(reference.destination[0])  # A chunk
            elif isinstance(reference.destination, quokka.Chunk):
                candidates.add(reference.destination)

        try:
            call_target = candidates.pop()
        except KeyError:
            pass

        if candidates:
            logger.warning(
                f"We found multiple candidate targets for 0x{self.address:x}"
            )

        return call_target

    @property
    def has_call(self) -> bool:
        """Check if the instruction has a call target"""
        return self.call_target is not False

    @cached_property
    def constants(self) -> List[int]:
        """Fast accessor for instructions constant not using Capstone."""
        constants = []
        for op_index in self.program.proto.instructions[self.proto_index].operand_index:
            operand: quokka.pb.Quokka.Operand = self.program.proto.operands[op_index]
            if operand.type == 5:
                # FIX: This bug is due to IDA mislabelling operands for some
                #   operations like ADRP on ARM where the operand points to a
                #   memory area (2) but the type is CONSTANT (5).
                #   The behavior is inconsistent with LEA on Intel arch where
                #   the operand is properly labelled (either 2 or 5).
                if not self.data_references:
                    constants.append(operand.value)

        return constants

    def __str__(self) -> str:
        """String representation of the instruction

        First, try by capstone because it's prettier
        Otherwise, fallback to the mnemonic which is better than nothing.

        Returns:
            A string representation of the mnemonic
        """

        # First, try with the operand strings (case MODE FULL)
        inst = self.program.proto.instructions[self.proto_index]
        if self.program.mode == ExporterMode.FULL:
            operands = ", ".join(
                self.program.proto.operand_table[x] for x in inst.operand_strings
            )
            return f"<Inst {self.mnemonic} {operands}>"

        # Second tentative, use capstone
        if self.cs_inst is not None:
            return f"<{self.cs_inst.mnemonic} {self.cs_inst.op_str}>"

        # Finally, just use the mnemonic
        return f"<Inst {self.mnemonic}>"

    @cached_property
    def bytes(self) -> bytes:
        """Read the program binary to find the bytes associated to the instruction.

        This is not cached as it is already in memory.

        Returns:
            Bytes associated to the instruction
        """
        try:
            file_offset = self.program.addresser.file(self.address)
        except quokka.exc.NotInFileError:
            return b""

        return self.program.executable.read_bytes(
            offset=file_offset,
            size=self.size,
        )

bytes: bytes cached property

Read the program binary to find the bytes associated to the instruction.

This is not cached as it is already in memory.

Returns:

Type Description
bytes

Bytes associated to the instruction

call_references: List[ReferenceTarget] property

Returns all call reference to this instruction

call_target: quokka.Chunk cached property

Find the call target of an instruction if any exists

constants: List[int] cached property

Fast accessor for instructions constant not using Capstone.

cs_inst: Optional[capstone.CsInsn] cached property

Load an instruction from Capstone backend

If the decoding fails, the result won't be cached, and it will be attempted again.

Returns:

Type Description
Optional[CsInsn]

A Capstone instruction

data_references: List[ReferenceTarget] property

Returns all data reference to this instruction

enum_references: List[ReferenceTarget] property

Returns all enum reference to this instruction

has_call: bool property

Check if the instruction has a call target

mnemonic: str cached property

Return the mnemonic for the instruction.

First, try to use capstone because it's prettier Otherwise, fallback to the IDA mnemonic which is better than nothing.

Returns:

Type Description
str

A string representation of the mnemonic

operands: List[Operand] property

Retrieve the instruction operands and initialize them with Capstone

pcode_insts: Sequence[pypcode.PcodeOp] cached property

Retrieve the PcodeOps associated to the instruction

Returns:

Type Description
Sequence[PcodeOp]

A sequence of PCode instructions

references: Dict[ReferenceType, List[ReferenceTarget]] property

Returns all the references towards the instruction

string: Optional[str] cached property

String used by the instruction (if any)

struct_references: List[ReferenceTarget] property

Returns all struct reference to this instruction

__str__()

String representation of the instruction

First, try by capstone because it's prettier Otherwise, fallback to the mnemonic which is better than nothing.

Returns:

Type Description
str

A string representation of the mnemonic

Source code in quokka/instruction.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def __str__(self) -> str:
    """String representation of the instruction

    First, try by capstone because it's prettier
    Otherwise, fallback to the mnemonic which is better than nothing.

    Returns:
        A string representation of the mnemonic
    """

    # First, try with the operand strings (case MODE FULL)
    inst = self.program.proto.instructions[self.proto_index]
    if self.program.mode == ExporterMode.FULL:
        operands = ", ".join(
            self.program.proto.operand_table[x] for x in inst.operand_strings
        )
        return f"<Inst {self.mnemonic} {operands}>"

    # Second tentative, use capstone
    if self.cs_inst is not None:
        return f"<{self.cs_inst.mnemonic} {self.cs_inst.op_str}>"

    # Finally, just use the mnemonic
    return f"<Inst {self.mnemonic}>"

Operand

Operand object

An operand is an "argument" for an instruction. This class represent them but is rather lackluster at the moment.

Parameters:

Name Type Description Default
proto_operand 'quokka.pb.Quokka.Operand'

Protobuf data

required
capstone_operand

Capstone data (if any)

None
program Union[None, Program]

Program reference

None

Attributes:

Name Type Description
program Program

Program reference

type int

Operand type

flags int

Operand flags

address Optional[int]

Operand address

value_type

IDA value type

reg_id

IDA register ID (if applicable)

details

Capstone details

Source code in quokka/instruction.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class Operand:
    """Operand object

    An operand is an "argument" for an instruction.
    This class represent them but is rather lackluster at the moment.

    Arguments:
        proto_operand: Protobuf data
        capstone_operand: Capstone data (if any)
        program: Program reference

    Attributes:
        program: Program reference
        type: Operand type
        flags: Operand flags
        address: Operand address
        value_type: IDA value type
        reg_id: IDA register ID (if applicable)
        details: Capstone details
    """

    # Operand rewrite to integrate capstone information as well

    def __init__(
        self,
        proto_operand: "quokka.pb.Quokka.Operand",
        capstone_operand=None,
        program: Union[None, quokka.Program] = None,
    ):
        """Constructor"""
        self.program: quokka.Program = program

        self.type: int = proto_operand.type
        self.flags: int = proto_operand.flags  # TODO(dm)

        self.address: Optional[int] = (
            proto_operand.address if proto_operand.address != 0 else None
        )

        self.value_type = proto_operand.value_type
        self.reg_id = proto_operand.register_id

        self._value = proto_operand.value

        self.details = capstone_operand

    @property
    def value(self) -> Any:
        """Returns the operand value
        Warning: this is only implemented for constant operand (in IDA).

        Returns:
            The operand value

        """
        if self.type == 5:  # Type: IDA constant
            return self._value

        raise NotImplementedError

    def is_constant(self) -> bool:
        """Check if the operand is a constant"""
        return self.type == 5

value: Any property

Returns the operand value Warning: this is only implemented for constant operand (in IDA).

Returns:

Type Description
Any

The operand value

__init__(proto_operand, capstone_operand=None, program=None)

Constructor

Source code in quokka/instruction.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def __init__(
    self,
    proto_operand: "quokka.pb.Quokka.Operand",
    capstone_operand=None,
    program: Union[None, quokka.Program] = None,
):
    """Constructor"""
    self.program: quokka.Program = program

    self.type: int = proto_operand.type
    self.flags: int = proto_operand.flags  # TODO(dm)

    self.address: Optional[int] = (
        proto_operand.address if proto_operand.address != 0 else None
    )

    self.value_type = proto_operand.value_type
    self.reg_id = proto_operand.register_id

    self._value = proto_operand.value

    self.details = capstone_operand

is_constant()

Check if the operand is a constant

Source code in quokka/instruction.py
107
108
109
def is_constant(self) -> bool:
    """Check if the operand is a constant"""
    return self.type == 5