Skip to content

function

Functions management

Function

Bases: dict

Function object

This class represents a binary function within the Program.

Parameters:

Name Type Description Default
proto_index Index

Protobuf index of the function

required
func 'Pb.Function'

Protobuf data

required
program Program

Program reference

required

Attributes:

Name Type Description
start int

Start address

name str

Function name

mangled_name str

Function mangled name (it might be equal to the function name)

program Program

Program reference

type 'FunctionType'

Function type

func 'FunctionType'

Protobuf data

Source code in bindings/python/quokka/function.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
class Function(dict):
    """Function object

    This class represents a binary function within the Program.

    Arguments:
        proto_index: Protobuf index of the function
        func: Protobuf data
        program: Program reference

    Attributes:
        start: Start address
        name: Function name
        mangled_name: Function mangled name (it might be equal to the function name)
        program: Program reference
        type: Function type
        func: Protobuf data
    """

    def __init__(self, proto_index: Index, func: "Pb.Function", program: quokka.Program):
        """Constructor"""
        super(dict, self).__init__()
        self.start: int = program.virtual_address(func.segment_index, func.segment_offset)
        self.proto = func
        self.proto_index = proto_index
        self.mangled_name: str = func.mangled_name or func.name
        self.decompiled_code: str = func.decompiled_code or ""

        self.program: quokka.Program = program

        self.type: "FunctionType" = FunctionType.from_proto(func.function_type)
        if self.type == FunctionType.NORMAL:
            segment = self.program.get_segment(self.start)
            if segment and segment.type == SegmentType.EXTERN:
                self.type = FunctionType.EXTERN

        # Fill the dict with block addresses and their corresponding index
        self._block_data: dict[AddressT, Tuple[Index, int]] = {}
        for block_index, block in enumerate(func.blocks):  # iterate over the block protobuf objects
            block_address: int = program.virtual_address(block.segment_index, block.segment_offset)
            self._block_data[block_address] = (block_index, block.size)
        self._index_to_address = {idx: addr for addr, (idx, _) in self._block_data.items()}

        self._data_references: list[quokka.Data] = []

        # Continuous chunks of code in the function
        block_ranges = sorted((addr, addr+size) for addr, (idx, size) in self._block_data.items())
        self._chunks: list[tuple[AddressT, AddressT]] = self.coalesce_block_ranges(block_ranges)

        # TODO: Retrieving calling convention

    @property
    def has_body(self) -> bool:
        """Check if the function has a body (e.g. at least one block)"""
        return len(self._block_data) > 0

    def __getitem__(self, address: AddressT) -> Block:
        """Lazy loader for blocks within the function"""
        if address not in self._block_data:
            raise IndexError(f"Unable to find the block at 0x{address:x} in function {self.name}")
        else:
            if address in self:  # already loaded
                return super().__getitem__(address)
            else:
                block_index, block_size = self._block_data[address]
                block = Block(block_index, address, self)
                super().__setitem__(address, block)
                return block

    def values(self):
        """Return the blocks of the function"""
        for address in self._block_data.keys():
            yield self[address]

    def keys(self):
        """Return the block addresses of the function"""
        return self._block_data.keys()

    def items(self):
        """Return the block addresses and blocks of the function"""
        for address in self._block_data.keys():
            yield address, self[address]

    @cached_property
    def size(self) -> int:
        """Return the function size"""
        return self.end - self.start

    @property
    def comments(self) -> list[str]:
        """Return the function comments"""
        return self.proto.comments

    def add_comment(self, comment: str) -> None:
        """Add a comment to the function"""
        self.proto.comments.append(comment)
        self.proto.edits.comments.append(len(self.proto.comments) - 1)

    def add_edge(self, source: Block, destination: Block, type: EdgeType) -> None:
        """Add an edge to the function CFG"""
        assert source in self.blocks and destination in self.blocks, "Both source and destination blocks must belong to the function"
        edge = self.proto.edges.add()
        edge.source = source._proto_index
        edge.destination = destination._proto_index
        edge.edge_type = type.to_proto()
        edge.user_defined = True
        self.proto.edits.edges.append(len(self.proto.edges) - 1)

    @cached_property
    def strings(self) -> list[str]:
        """Return the list of strings referenced by the function"""
        strings = []
        for block in self.values():
            strings.extend(block.strings)
        return strings

    @property
    def name(self) -> str:
        """Return the function name"""
        return self.proto.name

    @name.setter
    def name(self, value: str) -> None:
        """Set the function name"""
        self.proto.name = value
        self.proto.edits.name_set = True

    @property
    def prototype(self) -> str:
        """Return the function prototype if any"""
        return self.proto.prototype

    @prototype.setter
    def prototype(self, value: str) -> None:
        """Set the function prototype"""
        self.proto.prototype = value
        self.proto.edits.prototype_set = True

    @cached_property
    def constants(self) -> list[int]:
        """Lists constants used in the function"""
        constants: list[int] = []
        for block in self.values():
            constants.extend(block.constants)

        return constants

    @cached_property
    def graph(self) -> "networkx.DiGraph":
        """Return the CFG of the function as DiGraph object"""
        graph = networkx.DiGraph()
        graph.add_nodes_from(n for n in self.keys())

        for edge in self.proto.edges:
            if (edge.source not in self._index_to_address
                or edge.destination not in self._index_to_address):
                continue
            graph.add_edge(
                self._index_to_address[edge.source],
                self._index_to_address[edge.destination],
                condition=RefType.from_proto(edge.edge_type),
            )

        return graph

    def in_function(self, address: AddressT) -> bool:
        """Check if an address belongs to the function."""
        if len(self._chunks) == 0:
            return False

        if address < min(self._chunks)[0] or address > max(self._chunks)[1]:
            return False

        for start, end in self._chunks:
            if start <= address < end:
                return True

        return False

    def get_instruction(self, address: AddressT) -> quokka.Instruction:
        """Get the instruction at `address`"""
        if not self.in_function(address):
            raise IndexError(f"Unable to find the instruction at 0x{address:x}")

        for block in self.values():  # TODO: Improve complexity
            if block.start <= address < block.end:
                return block[address]

        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

    @cached_property
    def end(self) -> int:
        """Get the last address of the function"""
        try:
            max_block = max(self.keys())
            return self[max_block].end
        except ValueError:
            return self.start + 1

    @cached_property
    def callees(self) -> list['Function']:
        """Return the list of calls made by this function.
        The semantic of a "call" is to jump or call to the **starting** of a function.
        Beware that this might lead to different results than the program call graph.

        Note: The list is not deduplicated so a target may occur multiple time.
        """
        calls = set()
        # Iterate all basic blocks references to find calls to other functions
        # Note do not use basic block object thus does not requires loading them.
        for block in self.proto.blocks:
            for inst_xref in block.instructions_xref_from:
                xref = self.program.proto.references[inst_xref.xref_index]
                typ = RefType.from_proto(xref.reference_type)
                if typ.is_code:  # Keep both jumps and calls
                    if xref.destination.address in self.program:  # Pointing on a function head
                        calls.add(self.program[xref.destination.address])
                # In all other else cases we are not on a call edge
        return list(calls)

    @cached_property
    def callers(self) -> list['Function']:
        """Retrieve the function callers (the ones calling this function)"""
        callers = []
        if self.has_body:
            block_idx, _ = self._block_data[self.start]
            block = self.proto.blocks[block_idx]
            for inst_xref in (x for x in block.instructions_xref_to if x.instr_bb_idx == 0):
                xref = self.program.proto.references[inst_xref.xref_index]
                typ = RefType.from_proto(xref.reference_type)
                if typ.is_code:
                    if f := self.program.find_function_by_address(xref.source.address):
                        callers.append(f)
        return callers

    @property
    def instructions(self):
        """Yields the function instruction"""
        return (inst for block in self.values() for inst in block.instructions)

    @cached_property
    def out_degree(self) -> int:
        """Function out degree"""
        return len(set(self.callees))

    @cached_property
    def in_degree(self) -> int:
        """Function in degree"""
        return len(set(self.callers))

    @property
    def blocks(self) -> dict[AddressT, Block]:
        """Returns a dictionary which is used to reference all basic blocks
        by their address.
        """
        return {addr: self[addr] for addr in self.keys()}

    def __hash__(self) -> int:  # type: ignore
        """Hash value"""
        return self.start

    def __str__(self) -> str:
        """Function representation"""
        return f"<Function {self.name} at 0x{self.start:x}>"

    def __repr__(self) -> str:
        """Function representation"""
        return self.__str__()

    @staticmethod
    def coalesce_block_ranges(block_ranges: list[tuple[int, int]]) -> list[tuple[AddressT, AddressT]]:
        """Merge adjacent or overlapping block ranges into a reduced list.

        Arguments:
            block_ranges: Sorted list of (start, end) tuples.

        Returns:
            A list of merged (start, end) tuples.
        """
        if not block_ranges:
            return []

        merged = [block_ranges[0]]
        for start, end in block_ranges[1:]:
            prev_start, prev_end = merged[-1]
            if start == prev_end:  # Adjacent
                merged[-1] = (prev_start, end)  # keep current end
            else:
                merged.append((start, end))

        return merged

blocks property

Returns a dictionary which is used to reference all basic blocks by their address.

callees cached property

Return the list of calls made by this function. The semantic of a "call" is to jump or call to the starting of a function. Beware that this might lead to different results than the program call graph.

Note: The list is not deduplicated so a target may occur multiple time.

callers cached property

Retrieve the function callers (the ones calling this function)

comments property

Return the function comments

constants cached property

Lists constants used in the function

end cached property

Get the last address of the function

graph cached property

Return the CFG of the function as DiGraph object

has_body property

Check if the function has a body (e.g. at least one block)

in_degree cached property

Function in degree

instructions property

Yields the function instruction

name property writable

Return the function name

out_degree cached property

Function out degree

prototype property writable

Return the function prototype if any

size cached property

Return the function size

strings cached property

Return the list of strings referenced by the function

__getitem__(address)

Lazy loader for blocks within the function

Source code in bindings/python/quokka/function.py
156
157
158
159
160
161
162
163
164
165
166
167
def __getitem__(self, address: AddressT) -> Block:
    """Lazy loader for blocks within the function"""
    if address not in self._block_data:
        raise IndexError(f"Unable to find the block at 0x{address:x} in function {self.name}")
    else:
        if address in self:  # already loaded
            return super().__getitem__(address)
        else:
            block_index, block_size = self._block_data[address]
            block = Block(block_index, address, self)
            super().__setitem__(address, block)
            return block

__hash__()

Hash value

Source code in bindings/python/quokka/function.py
357
358
359
def __hash__(self) -> int:  # type: ignore
    """Hash value"""
    return self.start

__init__(proto_index, func, program)

Constructor

Source code in bindings/python/quokka/function.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def __init__(self, proto_index: Index, func: "Pb.Function", program: quokka.Program):
    """Constructor"""
    super(dict, self).__init__()
    self.start: int = program.virtual_address(func.segment_index, func.segment_offset)
    self.proto = func
    self.proto_index = proto_index
    self.mangled_name: str = func.mangled_name or func.name
    self.decompiled_code: str = func.decompiled_code or ""

    self.program: quokka.Program = program

    self.type: "FunctionType" = FunctionType.from_proto(func.function_type)
    if self.type == FunctionType.NORMAL:
        segment = self.program.get_segment(self.start)
        if segment and segment.type == SegmentType.EXTERN:
            self.type = FunctionType.EXTERN

    # Fill the dict with block addresses and their corresponding index
    self._block_data: dict[AddressT, Tuple[Index, int]] = {}
    for block_index, block in enumerate(func.blocks):  # iterate over the block protobuf objects
        block_address: int = program.virtual_address(block.segment_index, block.segment_offset)
        self._block_data[block_address] = (block_index, block.size)
    self._index_to_address = {idx: addr for addr, (idx, _) in self._block_data.items()}

    self._data_references: list[quokka.Data] = []

    # Continuous chunks of code in the function
    block_ranges = sorted((addr, addr+size) for addr, (idx, size) in self._block_data.items())
    self._chunks: list[tuple[AddressT, AddressT]] = self.coalesce_block_ranges(block_ranges)

__repr__()

Function representation

Source code in bindings/python/quokka/function.py
365
366
367
def __repr__(self) -> str:
    """Function representation"""
    return self.__str__()

__str__()

Function representation

Source code in bindings/python/quokka/function.py
361
362
363
def __str__(self) -> str:
    """Function representation"""
    return f"<Function {self.name} at 0x{self.start:x}>"

add_comment(comment)

Add a comment to the function

Source code in bindings/python/quokka/function.py
193
194
195
196
def add_comment(self, comment: str) -> None:
    """Add a comment to the function"""
    self.proto.comments.append(comment)
    self.proto.edits.comments.append(len(self.proto.comments) - 1)

add_edge(source, destination, type)

Add an edge to the function CFG

Source code in bindings/python/quokka/function.py
198
199
200
201
202
203
204
205
206
def add_edge(self, source: Block, destination: Block, type: EdgeType) -> None:
    """Add an edge to the function CFG"""
    assert source in self.blocks and destination in self.blocks, "Both source and destination blocks must belong to the function"
    edge = self.proto.edges.add()
    edge.source = source._proto_index
    edge.destination = destination._proto_index
    edge.edge_type = type.to_proto()
    edge.user_defined = True
    self.proto.edits.edges.append(len(self.proto.edges) - 1)

coalesce_block_ranges(block_ranges) staticmethod

Merge adjacent or overlapping block ranges into a reduced list.

Parameters:

Name Type Description Default
block_ranges list[tuple[int, int]]

Sorted list of (start, end) tuples.

required

Returns:

Type Description
list[tuple[AddressT, AddressT]]

A list of merged (start, end) tuples.

Source code in bindings/python/quokka/function.py
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
@staticmethod
def coalesce_block_ranges(block_ranges: list[tuple[int, int]]) -> list[tuple[AddressT, AddressT]]:
    """Merge adjacent or overlapping block ranges into a reduced list.

    Arguments:
        block_ranges: Sorted list of (start, end) tuples.

    Returns:
        A list of merged (start, end) tuples.
    """
    if not block_ranges:
        return []

    merged = [block_ranges[0]]
    for start, end in block_ranges[1:]:
        prev_start, prev_end = merged[-1]
        if start == prev_end:  # Adjacent
            merged[-1] = (prev_start, end)  # keep current end
        else:
            merged.append((start, end))

    return merged

get_instruction(address)

Get the instruction at address

Source code in bindings/python/quokka/function.py
279
280
281
282
283
284
285
286
287
288
def get_instruction(self, address: AddressT) -> quokka.Instruction:
    """Get the instruction at `address`"""
    if not self.in_function(address):
        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

    for block in self.values():  # TODO: Improve complexity
        if block.start <= address < block.end:
            return block[address]

    raise IndexError(f"Unable to find the instruction at 0x{address:x}")

in_function(address)

Check if an address belongs to the function.

Source code in bindings/python/quokka/function.py
265
266
267
268
269
270
271
272
273
274
275
276
277
def in_function(self, address: AddressT) -> bool:
    """Check if an address belongs to the function."""
    if len(self._chunks) == 0:
        return False

    if address < min(self._chunks)[0] or address > max(self._chunks)[1]:
        return False

    for start, end in self._chunks:
        if start <= address < end:
            return True

    return False

items()

Return the block addresses and blocks of the function

Source code in bindings/python/quokka/function.py
178
179
180
181
def items(self):
    """Return the block addresses and blocks of the function"""
    for address in self._block_data.keys():
        yield address, self[address]

keys()

Return the block addresses of the function

Source code in bindings/python/quokka/function.py
174
175
176
def keys(self):
    """Return the block addresses of the function"""
    return self._block_data.keys()

values()

Return the blocks of the function

Source code in bindings/python/quokka/function.py
169
170
171
172
def values(self):
    """Return the blocks of the function"""
    for address in self._block_data.keys():
        yield self[address]

dereference_thunk(item, caller=False)

Dereference a thunk

This method is used to resolve a thunk calls / callers. As thunk function only have 1 relation : FUNC (call x) -> THUNK X -> X , it disrupts the call graph and heuristics based on graph degrees.

Parameters:

Name Type Description Default
item Function

A function

required
caller bool

True if we want to find the callers (e.g. the functions that call item) False if we want to find the callee (e.g. function that are called by item)

False

Raises:

Type Description
ThunkMissingError

When no thunk has been found

FunctionMissingError

When no function has been found

Source code in bindings/python/quokka/function.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
def dereference_thunk(item: Function, caller: bool = False) -> Function:
    """Dereference a thunk

    This method is used to resolve a thunk calls / callers. As thunk function only have
    1 relation : FUNC (call x) -> THUNK X -> X , it disrupts the call graph and
    heuristics based on graph degrees.

    Arguments:
        item: A function
        caller: True if we want to find the callers (e.g. the functions that call item)
                False if we want to find the callee (e.g. function that are called by
                item)

    Raises:
        ThunkMissingError: When no thunk has been found
        FunctionMissingError: When no function has been found
    """
    function = item

    # Do not try to (de)reference if we do not meet the prerequisites
    if caller is False and function.type != FunctionType.THUNK:
        # Only dereference THUNK function
        return function

    if caller is True and function.in_degree != 1:
        # Only try to reference function with in_degree == 1
        return function

    reference = function.callees if caller is False else function.callers

    try:
        candidate = reference[0]
    except IndexError as exc:
        raise FunctionMissingError("Missing func referenced by thunk") from exc

    if candidate.type == FunctionType.THUNK and caller is not True:
        # Recursive call for multi layered THUNK
        return dereference_thunk(candidate, caller)

    if caller and candidate.type != FunctionType.THUNK:
        return function

    return candidate

resolve_effective_degrees(item)

Compute a Function {in, out} degrees by resolving thunks

Source code in bindings/python/quokka/function.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def resolve_effective_degrees(item: Function) -> Tuple[int, int]:
    """Compute a Function {in, out} degrees by resolving thunks"""
    in_degree = item.in_degree
    try:
        in_func = dereference_thunk(item, True)
        in_degree = in_func.in_degree
    except FunctionMissingError:
        pass

    try:
        out_func: Function = dereference_thunk(item, False)
    except FunctionMissingError:
        out_func = item

    return in_degree, out_func.out_degree