Skip to content

function

Functions and chunk management

Chunk

Bases: MutableMapping, Iterable

Chunk object

A chunk is an IDA specific item that is used for code reuse across functions.

Parameters:

Name Type Description Default
chunk_idx Index

Index of the chunk in the protobuf

required
program Program

Backref to the program

required
accepted_addresses List[AddressT]

A list of address for blocks heads. Used only for fake chunks.

None

Attributes:

Name Type Description
program Program

Program reference

proto_index Index

Index inside the protobuf

start AddressT

Start address

fake bool

Is the chunk fake?

index_to_address Dict[int, int]

Mapping from index to block starting address

chunk_type FunctionType

Chunk type

chunk

Proto information

Source code in quokka/function.py
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
class Chunk(MutableMapping, Iterable):
    """Chunk object

    A chunk is an IDA specific item that is used for code reuse across functions.

    Arguments:
        chunk_idx: Index of the chunk in the protobuf
        program: Backref to the program
        accepted_addresses: A list of address for blocks heads. Used only
            for fake chunks.

    Attributes:
        program: Program reference
        proto_index: Index inside the protobuf
        start: Start address
        fake: Is the chunk fake?
        index_to_address: Mapping from index to block starting address
        chunk_type: Chunk type
        chunk: Proto information
    """

    def __init__(
        self,
        chunk_idx: Index,
        program: quokka.Program,
        accepted_addresses: List[AddressT] = None,
    ):
        """Constructor"""
        self.program: quokka.Program = program

        self.proto_index: Index = chunk_idx
        chunk = self.program.proto.function_chunks[chunk_idx]

        self.start: AddressT = self.program.addresser.absolute(chunk.offset_start)

        self.fake: bool = chunk.is_fake
        self._raw_dict: Dict[AddressT, Index] = {}

        self._graph: Optional["networkx.DiGraph"] = None

        self.index_to_address: Dict[int, int] = {}
        self.chunk = chunk

        self.chunk_type: FunctionType = FunctionType.NORMAL

        for block_index, block in enumerate(self.chunk.blocks):
            block_address: int = self.start + block.offset_start

            if (
                accepted_addresses is not None
                and block_address not in accepted_addresses
            ):
                continue

            self.index_to_address[block_index] = block_address
            self._raw_dict[block_address] = block_index

        if self.index_to_address:
            # We only update the start when we have a fake chunk (because it may have
            # been split out)
            if chunk.is_fake:
                self.start = min(self.index_to_address.values())

            assert self.start == min(
                self.index_to_address.values()
            ), "Wrong start of Chunk"

    def __len__(self) -> int:
        """Number of blocks in the chunk"""
        return len(self._raw_dict)

    def __iter__(self) -> Iterator:
        """Iterator over the blocks"""
        return self._raw_dict.__iter__()

    def __setitem__(self, k: AddressT, v: Index) -> None:
        """Set block"""
        self._raw_dict.__setitem__(k, v)

    def __delitem__(self, k: int) -> None:
        """Remove a block"""
        self._raw_dict.__delitem__(k)

    def __getitem__(self, address: AddressT) -> quokka.Block:
        """Lazy loader for blocks"""
        index: Index = self._raw_dict.__getitem__(address)
        return quokka.block.Block(index, address, self)

    def __str__(self) -> str:
        """Chunk representation"""
        return f"<Chunk at 0x{self.start:x} with {len(self)} block(s)>"

    @property
    def block_ranges(self) -> List[Tuple[AddressT, AddressT]]:
        """Returns the sorted list of block ranges.

        A block range is a tuple (block.start, block.end).
        """
        block_ranges = []
        for block in self.values():
            block_ranges.append((block.start, block.end))

        block_ranges = sorted(block_ranges)
        return block_ranges

    @cached_property
    def graph(self) -> "networkx.DiGraph":
        """Return the CFG of the chunk as DiGraph object"""
        graph = networkx.DiGraph()
        graph.add_nodes_from(n for n in self._raw_dict)

        for edge in self.program.proto.function_chunks[self.proto_index].edges:
            if (
                edge.source.block_id not in self.index_to_address
                or edge.destination.block_id not in self.index_to_address
            ):
                continue
            graph.add_edge(
                self.index_to_address[edge.source.block_id],
                self.index_to_address[edge.destination.block_id],
                condition=EdgeType.from_proto(edge.edge_type),
            )

        return graph

    @cached_property
    def strings(self) -> List[str]:
        """Return the strings used in the chunk"""

        strings = set()
        for block in self.values():
            strings.update(block.strings)

        return list(strings)

    @cached_property
    def constants(self) -> List[int]:
        """Return the constants used in the chunk"""
        constants = []
        for block in self.values():
            constants.extend(block.constants)

        return constants

    @property
    def data_references(self) -> List[quokka.Data]:
        """Returns the data reference in the chunk"""
        data_references: List[quokka.Data] = []
        for instruction in self.values():
            data_references.extend(instruction.data_references)

        return data_references

    @cached_property
    def end(self) -> AddressT:
        """Compute the end address of a chunk"""
        try:
            max_block = max(self.keys())
            return self[max_block].end
        except ValueError:
            return self.start + 1

    @cached_property
    def size(self) -> int:
        """Return the size of a chunk"""
        return self.end - self.start

    @property
    def calls(self) -> List[quokka.Chunk]:
        """Return the list of calls made by this chunk.

        Note: The list is not deduplicated so a target may occur multiple time.
        """

        calls = []
        for inst_instance in self.program.references.resolve_calls(self, towards=False):
            if isinstance(inst_instance, tuple):
                calls.append(inst_instance[0])
            else:
                calls.append(inst_instance)

        return calls

    @property
    def callers(self) -> List[Chunk]:
        """Return the list of callers of this chunk."""

        callers = []
        for inst_instance in self.program.references.resolve_calls(self, towards=True):
            if isinstance(inst_instance, tuple):
                callers.append(inst_instance[0])
            else:
                callers.append(inst_instance)

        return callers

    @cached_property
    def out_degree(self) -> int:
        """Compute the chunk out degree

        Get the out degree of a chunk (e.g. the number of distinct chunks called by
        this one).

        Returns:
            Number of distinct out edges
        """
        return len(set(self.calls))

    @cached_property
    def in_degree(self) -> int:
        """Compute the chunk in degree

        Get the in-degree of a chunk. This is the number of distinct incoming edges.

        returns:
            Chunk in-degree
        """
        return len(set(self.callers))

    @property
    def instructions(self) -> Generator:
        """Iterator over instructions in the chunk"""
        return (inst for block in self.values() for inst in block.instructions)

    def in_chunk(self, address: AddressT) -> bool:
        """Check if an address belongs to the chunk."""
        if len(self.block_ranges) == 0:
            return False

        if address < min(self.block_ranges)[0] or address > max(self.block_ranges)[1]:
            return False

        for start, end in self.block_ranges:
            if start <= address < end:
                return True

        return False

    def get_instruction(self, address: AddressT) -> quokka.Instruction:
        """Get the instruction at `address`"""
        if not self.in_chunk(address):
            raise IndexError(f"Unable to find the instruction at 0x{address:x}")

        for block in self.values():
            if block.start <= address < block.end:
                return block[address]

        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

    def get_block(self, address: AddressT) -> quokka.Block:
        """Get the block at `address`"""
        return self.__getitem__(address)

    def __hash__(self) -> int:
        """Override hash method to return an unique index"""
        return self.start

    @cached_property
    def name(self) -> str:
        """Chunk name.

        The chunk name is the one of its parent if it exists or is empty otherwise.

        Returns:
            Chunk name
        """
        try:
            return self.program.get_first_function_by_chunk(self).name
        except quokka.exc.FunctionMissingError:
            return ""

block_ranges: List[Tuple[AddressT, AddressT]] property

Returns the sorted list of block ranges.

A block range is a tuple (block.start, block.end).

callers: List[Chunk] property

Return the list of callers of this chunk.

calls: List[quokka.Chunk] property

Return the list of calls made by this chunk.

Note: The list is not deduplicated so a target may occur multiple time.

constants: List[int] cached property

Return the constants used in the chunk

data_references: List[quokka.Data] property

Returns the data reference in the chunk

end: AddressT cached property

Compute the end address of a chunk

graph: 'networkx.DiGraph' cached property

Return the CFG of the chunk as DiGraph object

in_degree: int cached property

Compute the chunk in degree

Get the in-degree of a chunk. This is the number of distinct incoming edges.

Returns:

Type Description
int

Chunk in-degree

instructions: Generator property

Iterator over instructions in the chunk

name: str cached property

Chunk name.

The chunk name is the one of its parent if it exists or is empty otherwise.

Returns:

Type Description
str

Chunk name

out_degree: int cached property

Compute the chunk out degree

Get the out degree of a chunk (e.g. the number of distinct chunks called by this one).

Returns:

Type Description
int

Number of distinct out edges

size: int cached property

Return the size of a chunk

strings: List[str] cached property

Return the strings used in the chunk

__delitem__(k)

Remove a block

Source code in quokka/function.py
201
202
203
def __delitem__(self, k: int) -> None:
    """Remove a block"""
    self._raw_dict.__delitem__(k)

__getitem__(address)

Lazy loader for blocks

Source code in quokka/function.py
205
206
207
208
def __getitem__(self, address: AddressT) -> quokka.Block:
    """Lazy loader for blocks"""
    index: Index = self._raw_dict.__getitem__(address)
    return quokka.block.Block(index, address, self)

__hash__()

Override hash method to return an unique index

Source code in quokka/function.py
375
376
377
def __hash__(self) -> int:
    """Override hash method to return an unique index"""
    return self.start

__init__(chunk_idx, program, accepted_addresses=None)

Constructor

Source code in quokka/function.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def __init__(
    self,
    chunk_idx: Index,
    program: quokka.Program,
    accepted_addresses: List[AddressT] = None,
):
    """Constructor"""
    self.program: quokka.Program = program

    self.proto_index: Index = chunk_idx
    chunk = self.program.proto.function_chunks[chunk_idx]

    self.start: AddressT = self.program.addresser.absolute(chunk.offset_start)

    self.fake: bool = chunk.is_fake
    self._raw_dict: Dict[AddressT, Index] = {}

    self._graph: Optional["networkx.DiGraph"] = None

    self.index_to_address: Dict[int, int] = {}
    self.chunk = chunk

    self.chunk_type: FunctionType = FunctionType.NORMAL

    for block_index, block in enumerate(self.chunk.blocks):
        block_address: int = self.start + block.offset_start

        if (
            accepted_addresses is not None
            and block_address not in accepted_addresses
        ):
            continue

        self.index_to_address[block_index] = block_address
        self._raw_dict[block_address] = block_index

    if self.index_to_address:
        # We only update the start when we have a fake chunk (because it may have
        # been split out)
        if chunk.is_fake:
            self.start = min(self.index_to_address.values())

        assert self.start == min(
            self.index_to_address.values()
        ), "Wrong start of Chunk"

__iter__()

Iterator over the blocks

Source code in quokka/function.py
193
194
195
def __iter__(self) -> Iterator:
    """Iterator over the blocks"""
    return self._raw_dict.__iter__()

__len__()

Number of blocks in the chunk

Source code in quokka/function.py
189
190
191
def __len__(self) -> int:
    """Number of blocks in the chunk"""
    return len(self._raw_dict)

__setitem__(k, v)

Set block

Source code in quokka/function.py
197
198
199
def __setitem__(self, k: AddressT, v: Index) -> None:
    """Set block"""
    self._raw_dict.__setitem__(k, v)

__str__()

Chunk representation

Source code in quokka/function.py
210
211
212
def __str__(self) -> str:
    """Chunk representation"""
    return f"<Chunk at 0x{self.start:x} with {len(self)} block(s)>"

get_block(address)

Get the block at address

Source code in quokka/function.py
371
372
373
def get_block(self, address: AddressT) -> quokka.Block:
    """Get the block at `address`"""
    return self.__getitem__(address)

get_instruction(address)

Get the instruction at address

Source code in quokka/function.py
360
361
362
363
364
365
366
367
368
369
def get_instruction(self, address: AddressT) -> quokka.Instruction:
    """Get the instruction at `address`"""
    if not self.in_chunk(address):
        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

    for block in self.values():
        if block.start <= address < block.end:
            return block[address]

    raise IndexError(f"Unable to find the instruction at 0x{address:x}")

in_chunk(address)

Check if an address belongs to the chunk.

Source code in quokka/function.py
346
347
348
349
350
351
352
353
354
355
356
357
358
def in_chunk(self, address: AddressT) -> bool:
    """Check if an address belongs to the chunk."""
    if len(self.block_ranges) == 0:
        return False

    if address < min(self.block_ranges)[0] or address > max(self.block_ranges)[1]:
        return False

    for start, end in self.block_ranges:
        if start <= address < end:
            return True

    return False

Function

Bases: dict

Function object

This class represents a binary function within the Program.

Parameters:

Name Type Description Default
func 'quokka.pb.Quokka.Function'

Protobuf data

required
program Program

Program reference

required

Attributes:

Name Type Description
start int

Start address

name str

Function name

program Program

Program reference

type 'FunctionType'

Function type

index_to_address Dict[int, int]

Mapping of Chunks to Protobuf indexes

func

Protobuf data

Source code in quokka/function.py
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
class Function(dict):
    """Function object

    This class represents a binary function within the Program.

    Arguments:
        func: Protobuf data
        program: Program reference

    Attributes:
        start: Start address
        name: Function name
        program: Program reference
        type: Function type
        index_to_address: Mapping of Chunks to Protobuf indexes
        func: Protobuf data
    """

    def __init__(self, func: "quokka.pb.Quokka.Function", program: quokka.Program):
        """Constructor"""
        super(dict, self).__init__()
        self.start: int = program.addresser.absolute(func.offset)
        self.name: str = func.name

        self.program: quokka.Program = program

        self.type: "FunctionType" = FunctionType.from_proto(func.function_type)
        if self.type == FunctionType.NORMAL:
            segment = self.program.get_segment(self.start)
            if segment and segment.type == SegmentType.EXTERN:
                self.type = FunctionType.EXTERN

        self.index_to_address: Dict[int, int] = {}
        for chunk_index in func.function_chunks_index:
            chunk = self.program.get_chunk(chunk_index)

            if not isinstance(chunk, quokka.function.Chunk):
                logger.error("Found a super chunk in a function which is not possible")
                continue

            if chunk.chunk_type not in {FunctionType.NORMAL, self.type}:
                logger.error(
                    "All the chunks of the function are supposed to have the same "
                    "type. It is not the case here."
                )

            chunk.chunk_type = self.type
            self[chunk.start] = chunk

            self.index_to_address[chunk_index] = chunk.start

        self.func = func

        self._data_references: List[quokka.Data] = None

    def get_block(self, address: AddressT) -> quokka.Block:
        """Get the block at `address`"""
        for chunk in self.values():
            try:
                return chunk[address]
            except KeyError:
                pass

        raise KeyError(f"Unable to find the block at address 0x{address:x}")

    def get_instruction(self, address: AddressT) -> quokka.Instruction:
        """Get the instruction at `address`"""
        for chunk in self.values():
            if chunk.in_chunk(address):
                return chunk.get_instruction(address)

        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

    @cached_property
    def strings(self) -> List[str]:
        """Return the list of strings used in the function"""
        strings = set()
        for chunk in self.values():
            strings.update(chunk.strings)

        return list(strings)

    @property
    def data_references(self):
        """Lists data references used in the function"""
        data_references: List[quokka.Data] = []
        for instruction in self.values():
            data_references.extend(instruction.data_references)

        return data_references

    @cached_property
    def constants(self) -> List[int]:
        """Lists constants used in the function"""
        constants: List[int] = []
        for chunk in self.values():
            constants.extend(chunk.constants)

        return constants

    @cached_property
    def graph(self) -> "networkx.DiGraph":
        """Compute the Control Flow Graph for the function"""
        graph = networkx.DiGraph()
        for chunk in self.values():
            graph = networkx.algorithms.operators.compose(graph, chunk.graph)

        for edge in self.func.chunk_edges:
            source_chunk = self.program.get_chunk(
                edge.source.chunk_id, edge.source.block_id
            )
            dest_chunk = self.program.get_chunk(
                edge.destination.chunk_id, edge.destination.block_id
            )

            graph.add_edge(
                source_chunk.index_to_address[edge.source.block_id],
                dest_chunk.index_to_address[edge.destination.block_id],
                condition=EdgeType.from_proto(edge.edge_type),
            )

        return graph

    @cached_property
    def end(self) -> int:
        """Get the last address of the function"""
        max_chunk = max(self.keys())
        return self[max_chunk].end

    @property
    def calls(self) -> List[Chunk]:
        """Retrieve the function calls (the ones called by the function)"""
        targets = []
        for chunk in self.values():
            targets.extend(chunk.calls)

        return targets

    @property
    def callers(self) -> List[Chunk]:
        """Retrieve the function callers (the ones calling this function)"""
        sources = []
        for chunk in self.values():
            sources.extend(chunk.callers)

        return sources

    @property
    def instructions(self):
        """Yields the function instruction"""
        return itertools.chain.from_iterable(
            chunk.instructions for chunk in self.values()
        )

    def in_func(self, address: AddressT) -> bool:
        """Check if the `address` belongs to this function."""
        for chunk in self.values():
            if chunk.in_chunk(address):
                return True

        return False

    @cached_property
    def out_degree(self) -> int:
        """Function out degree"""
        return len(set(self.calls))

    @cached_property
    def in_degree(self) -> int:
        """Function in degree"""
        return self[self.start].in_degree

    @property
    def blocks(self) -> dict[AddressT, quokka.Block]:
        """Returns a dictionary which is used to reference all basic blocks
        by their address.
        Calling this function will also load the CFG.
        """
        return {addr: self.get_block(addr) for addr in self.graph.nodes}

    def __hash__(self) -> int:  # type: ignore
        """Hash value"""
        return self.start

    def __str__(self) -> str:
        """Function representation"""
        return f"<Function {self.name} at 0x{self.start:x}>"

    def __repr__(self) -> str:
        """Function representation"""
        return self.__str__()

blocks: dict[AddressT, quokka.Block] property

Returns a dictionary which is used to reference all basic blocks by their address. Calling this function will also load the CFG.

callers: List[Chunk] property

Retrieve the function callers (the ones calling this function)

calls: List[Chunk] property

Retrieve the function calls (the ones called by the function)

constants: List[int] cached property

Lists constants used in the function

data_references property

Lists data references used in the function

end: int cached property

Get the last address of the function

graph: 'networkx.DiGraph' cached property

Compute the Control Flow Graph for the function

in_degree: int cached property

Function in degree

instructions property

Yields the function instruction

out_degree: int cached property

Function out degree

strings: List[str] cached property

Return the list of strings used in the function

__hash__()

Hash value

Source code in quokka/function.py
719
720
721
def __hash__(self) -> int:  # type: ignore
    """Hash value"""
    return self.start

__init__(func, program)

Constructor

Source code in quokka/function.py
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
def __init__(self, func: "quokka.pb.Quokka.Function", program: quokka.Program):
    """Constructor"""
    super(dict, self).__init__()
    self.start: int = program.addresser.absolute(func.offset)
    self.name: str = func.name

    self.program: quokka.Program = program

    self.type: "FunctionType" = FunctionType.from_proto(func.function_type)
    if self.type == FunctionType.NORMAL:
        segment = self.program.get_segment(self.start)
        if segment and segment.type == SegmentType.EXTERN:
            self.type = FunctionType.EXTERN

    self.index_to_address: Dict[int, int] = {}
    for chunk_index in func.function_chunks_index:
        chunk = self.program.get_chunk(chunk_index)

        if not isinstance(chunk, quokka.function.Chunk):
            logger.error("Found a super chunk in a function which is not possible")
            continue

        if chunk.chunk_type not in {FunctionType.NORMAL, self.type}:
            logger.error(
                "All the chunks of the function are supposed to have the same "
                "type. It is not the case here."
            )

        chunk.chunk_type = self.type
        self[chunk.start] = chunk

        self.index_to_address[chunk_index] = chunk.start

    self.func = func

    self._data_references: List[quokka.Data] = None

__repr__()

Function representation

Source code in quokka/function.py
727
728
729
def __repr__(self) -> str:
    """Function representation"""
    return self.__str__()

__str__()

Function representation

Source code in quokka/function.py
723
724
725
def __str__(self) -> str:
    """Function representation"""
    return f"<Function {self.name} at 0x{self.start:x}>"

get_block(address)

Get the block at address

Source code in quokka/function.py
594
595
596
597
598
599
600
601
602
def get_block(self, address: AddressT) -> quokka.Block:
    """Get the block at `address`"""
    for chunk in self.values():
        try:
            return chunk[address]
        except KeyError:
            pass

    raise KeyError(f"Unable to find the block at address 0x{address:x}")

get_instruction(address)

Get the instruction at address

Source code in quokka/function.py
604
605
606
607
608
609
610
def get_instruction(self, address: AddressT) -> quokka.Instruction:
    """Get the instruction at `address`"""
    for chunk in self.values():
        if chunk.in_chunk(address):
            return chunk.get_instruction(address)

    raise IndexError(f"Unable to find the instruction at 0x{address:x}")

in_func(address)

Check if the address belongs to this function.

Source code in quokka/function.py
693
694
695
696
697
698
699
def in_func(self, address: AddressT) -> bool:
    """Check if the `address` belongs to this function."""
    for chunk in self.values():
        if chunk.in_chunk(address):
            return True

    return False

SuperChunk

Bases: MutableMapping

SuperChunk: fake functions

A SuperChunk is an abstract construction that has no other meaning that serve as a candidate (or fake) function.

Indeed, super chunks are created when a chunk (or a fake chunk) have multiple non-connected components.

A superchunk keeps a mapping of chunk index to chunk instance and implements most of a function interface.

Parameters:

Name Type Description Default
initial_chunk Chunk

The chunk to split

required
components Generator

The various non-connected components

required

Attributes:

Name Type Description
proto_idx Index

Initial chunk proto index

addresses Dict[AddressT, int]

All addresses belonging to the chunk with the instruction index

chunks Dict[Index, Chunk]

Mapping of chunks within the SuperChunk

starts Dict[AddressT, Chunk]

Mapping of chunk starts to chunks

Source code in quokka/function.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
class SuperChunk(MutableMapping):
    """SuperChunk: fake functions

    A SuperChunk is an abstract construction that has no other meaning that serve as
    a candidate (or fake) function.

    Indeed, super chunks are created when a chunk (or a fake chunk) have multiple
    non-connected components.

    A superchunk keeps a mapping of chunk index to chunk instance and implements most
    of a function interface.

    Arguments:
        initial_chunk: The chunk to split
        components: The various non-connected components

    Attributes:
        proto_idx: Initial chunk proto index
        addresses: All addresses belonging to the chunk with the instruction index
        chunks: Mapping of chunks within the SuperChunk
        starts: Mapping of chunk starts to chunks
    """

    def __init__(self, initial_chunk: Chunk, components: Generator):
        """Init method

        Arguments:
            initial_chunk: Original chunk to split
            components: A generator of sets for each component of the graph
        """
        self.proto_idx: Index = initial_chunk.proto_index
        self.addresses: Dict[AddressT, int] = {}

        self.chunks: Dict[Index, Chunk] = {}
        self.starts: Dict[AddressT, Chunk] = {}

        for index, component in enumerate(components):
            self.addresses.update({block_addr: index for block_addr in component})
            chunk = Chunk(initial_chunk.proto_index, initial_chunk.program, component)
            self.chunks[index] = chunk
            self.starts[min(component)] = chunk

        # We need to keep this mapping sorted to improve efficiency
        self.addresses = {k: self.addresses[k] for k in sorted(self.addresses)}

    def __setitem__(self, k: Index, v: Chunk) -> None:
        """Set a chunk"""
        self.chunks.__setitem__(k, v)

    def __delitem__(self, v: Index) -> None:
        """Delete a chunk"""
        self.chunks.__delitem__(v)

    def __getitem__(self, k: Index) -> Chunk:
        """Get a chunk"""
        return self.chunks.__getitem__(k)

    def __len__(self) -> int:
        """Number of chunk"""
        return self.chunks.__len__()

    def __iter__(self) -> Iterator[Index]:
        """Iterator over chunk"""
        return self.chunks.__iter__()

    def __str__(self) -> str:
        """SuperChunk representation"""
        return f"<SuperChunk with {len(self.starts)} chunks(s)>"

    def get_chunk(self, address: AddressT) -> Chunk:
        """Return a chunk by an address.

        To resolve an address and retrieve the appropriate chunk, we enumerate the
        chunks and look within the blocks.

        Arguments:
            address: Address to query

        Raises:
            IndexError: When no chunk is found

        """
        if address in self.addresses:
            return self.chunks[self.addresses[address]]

        if address < min(self.addresses):
            raise IndexError("Address is before the chunk")

        # TODO(dm) CHECK OR FIXME
        chunk_index = self.addresses[min(self.addresses)]
        for blocks_head, chunk_index in self.addresses.items():
            if blocks_head > address:
                break

        candidate_chunk = self.chunks[chunk_index]
        if candidate_chunk.in_chunk(address):
            return candidate_chunk

        raise IndexError("Address does not belong in this SuperChunk")

    def get_chunk_by_index(self, chunk_index: Index, block_index: Index) -> Chunk:
        """Return a chunk by its index

        This must be reimplemented because the chunk_index is unique for the proto but
        there are multiple chunks existing with the same index because of a SuperChunk.

        Arguments:
            chunk_index: Chunk index
            block_index: Block index

        Raises:
            ChunkMissingError: if the chunk is not found

        """
        if chunk_index != self.proto_idx:
            raise quokka.exc.ChunkMissingError("Wrong chunk index")

        for chunk in self.chunks.values():
            if chunk.index_to_address.get(block_index, None) is not None:
                return chunk

        raise quokka.exc.ChunkMissingError(
            "Unable to find the correct chunk for this block"
        )

    def in_chunk(self, address: AddressT) -> bool:
        """Check if address belongs to this SuperChunk"""
        if address < min(self.starts):
            return False

        for chunk in self.chunks.values():
            if chunk.in_chunk(address):
                return True

        return False

    def get_instruction(self, address: AddressT) -> quokka.Instruction:
        """Get the instruction at `address`"""
        for chunk in self.chunks.values():
            if chunk.in_chunk(address):
                return chunk.get_instruction(address)

        raise IndexError(f"Unable to find the instruction at 0x{address:x}")

__delitem__(v)

Delete a chunk

Source code in quokka/function.py
443
444
445
def __delitem__(self, v: Index) -> None:
    """Delete a chunk"""
    self.chunks.__delitem__(v)

__getitem__(k)

Get a chunk

Source code in quokka/function.py
447
448
449
def __getitem__(self, k: Index) -> Chunk:
    """Get a chunk"""
    return self.chunks.__getitem__(k)

__init__(initial_chunk, components)

Init method

Parameters:

Name Type Description Default
initial_chunk Chunk

Original chunk to split

required
components Generator

A generator of sets for each component of the graph

required
Source code in quokka/function.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
def __init__(self, initial_chunk: Chunk, components: Generator):
    """Init method

    Arguments:
        initial_chunk: Original chunk to split
        components: A generator of sets for each component of the graph
    """
    self.proto_idx: Index = initial_chunk.proto_index
    self.addresses: Dict[AddressT, int] = {}

    self.chunks: Dict[Index, Chunk] = {}
    self.starts: Dict[AddressT, Chunk] = {}

    for index, component in enumerate(components):
        self.addresses.update({block_addr: index for block_addr in component})
        chunk = Chunk(initial_chunk.proto_index, initial_chunk.program, component)
        self.chunks[index] = chunk
        self.starts[min(component)] = chunk

    # We need to keep this mapping sorted to improve efficiency
    self.addresses = {k: self.addresses[k] for k in sorted(self.addresses)}

__iter__()

Iterator over chunk

Source code in quokka/function.py
455
456
457
def __iter__(self) -> Iterator[Index]:
    """Iterator over chunk"""
    return self.chunks.__iter__()

__len__()

Number of chunk

Source code in quokka/function.py
451
452
453
def __len__(self) -> int:
    """Number of chunk"""
    return self.chunks.__len__()

__setitem__(k, v)

Set a chunk

Source code in quokka/function.py
439
440
441
def __setitem__(self, k: Index, v: Chunk) -> None:
    """Set a chunk"""
    self.chunks.__setitem__(k, v)

__str__()

SuperChunk representation

Source code in quokka/function.py
459
460
461
def __str__(self) -> str:
    """SuperChunk representation"""
    return f"<SuperChunk with {len(self.starts)} chunks(s)>"

get_chunk(address)

Return a chunk by an address.

To resolve an address and retrieve the appropriate chunk, we enumerate the chunks and look within the blocks.

Parameters:

Name Type Description Default
address AddressT

Address to query

required

Raises:

Type Description
IndexError

When no chunk is found

Source code in quokka/function.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
def get_chunk(self, address: AddressT) -> Chunk:
    """Return a chunk by an address.

    To resolve an address and retrieve the appropriate chunk, we enumerate the
    chunks and look within the blocks.

    Arguments:
        address: Address to query

    Raises:
        IndexError: When no chunk is found

    """
    if address in self.addresses:
        return self.chunks[self.addresses[address]]

    if address < min(self.addresses):
        raise IndexError("Address is before the chunk")

    # TODO(dm) CHECK OR FIXME
    chunk_index = self.addresses[min(self.addresses)]
    for blocks_head, chunk_index in self.addresses.items():
        if blocks_head > address:
            break

    candidate_chunk = self.chunks[chunk_index]
    if candidate_chunk.in_chunk(address):
        return candidate_chunk

    raise IndexError("Address does not belong in this SuperChunk")

get_chunk_by_index(chunk_index, block_index)

Return a chunk by its index

This must be reimplemented because the chunk_index is unique for the proto but there are multiple chunks existing with the same index because of a SuperChunk.

Parameters:

Name Type Description Default
chunk_index Index

Chunk index

required
block_index Index

Block index

required

Raises:

Type Description
ChunkMissingError

if the chunk is not found

Source code in quokka/function.py
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
def get_chunk_by_index(self, chunk_index: Index, block_index: Index) -> Chunk:
    """Return a chunk by its index

    This must be reimplemented because the chunk_index is unique for the proto but
    there are multiple chunks existing with the same index because of a SuperChunk.

    Arguments:
        chunk_index: Chunk index
        block_index: Block index

    Raises:
        ChunkMissingError: if the chunk is not found

    """
    if chunk_index != self.proto_idx:
        raise quokka.exc.ChunkMissingError("Wrong chunk index")

    for chunk in self.chunks.values():
        if chunk.index_to_address.get(block_index, None) is not None:
            return chunk

    raise quokka.exc.ChunkMissingError(
        "Unable to find the correct chunk for this block"
    )

get_instruction(address)

Get the instruction at address

Source code in quokka/function.py
530
531
532
533
534
535
536
def get_instruction(self, address: AddressT) -> quokka.Instruction:
    """Get the instruction at `address`"""
    for chunk in self.chunks.values():
        if chunk.in_chunk(address):
            return chunk.get_instruction(address)

    raise IndexError(f"Unable to find the instruction at 0x{address:x}")

in_chunk(address)

Check if address belongs to this SuperChunk

Source code in quokka/function.py
519
520
521
522
523
524
525
526
527
528
def in_chunk(self, address: AddressT) -> bool:
    """Check if address belongs to this SuperChunk"""
    if address < min(self.starts):
        return False

    for chunk in self.chunks.values():
        if chunk.in_chunk(address):
            return True

    return False

dereference_thunk(item, caller=False)

Dereference a thunk

This method is used to resolve a thunk calls / callers. As thunk function only have 1 relation : FUNC (call x) -> THUNK X -> X , it disrupts the call graph and heuristics based on graph degrees.

Parameters:

Name Type Description Default
item Union[Function, Chunk]

Either a function or a chunk

required
caller bool

True if we want to find the callers (e.g. the functions that call item) False if we want to find the callee (e.g. function that are called by item)

False

Raises:

Type Description
ThunkMissingError

When no thunk has been found

FunctionMissingError

When no function has been found

Source code in quokka/function.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def dereference_thunk(item: Union[Function, Chunk], caller: bool = False) -> Function:
    """Dereference a thunk

    This method is used to resolve a thunk calls / callers. As thunk function only have
    1 relation : FUNC (call x) -> THUNK X -> X , it disrupts the call graph and
    heuristics based on graph degrees.

    Arguments:
        item: Either a function or a chunk
        caller: True if we want to find the callers (e.g. the functions that call item)
                False if we want to find the callee (e.g. function that are called by
                item)

    Raises:
        ThunkMissingError: When no thunk has been found
        FunctionMissingError: When no function has been found
    """
    if isinstance(item, quokka.function.Chunk):
        function = item.program.get_first_function_by_chunk(item)
    else:
        function = item

    # Do not try to (de)reference if we do not meet the prerequisites
    if caller is False and function.type != FunctionType.THUNK:
        # Only dereference THUNK function
        return function

    if caller is True and function.in_degree != 1:
        # Only try to reference function with in_degree == 1
        return function

    target = "calls" if caller is False else "callers"
    reference = getattr(function, target)

    try:
        candidate = function.program.get_first_function_by_chunk(reference[0])
    except (IndexError, quokka.exc.FunctionMissingError) as exc:
        if caller is True and reference[0].in_degree == 0:
            raise quokka.exc.ThunkMissingError("Error while finding thunk") from exc

        # This will appear when the referenced target is a chunk coming from a
        # fake chunk for instance
        # logger.debug("Unable to find the (de)reference of the thunk function")
        raise quokka.exc.FunctionMissingError(
            "Missing func referenced by thunk"
        ) from exc

    if candidate.type == FunctionType.THUNK and caller is not True:
        # Recursive call for multi layered THUNK
        return dereference_thunk(candidate, caller)

    if caller and candidate.type != FunctionType.THUNK:
        return function

    return candidate

get_degrees(item)

Compute the {in, out} degrees of an item (Function/Chunk)

Source code in quokka/function.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def get_degrees(item: Union[Chunk, Function]) -> Tuple[int, int]:
    """Compute the {in, out} degrees of an item (Function/Chunk)"""
    in_degree = item.in_degree
    try:
        in_func = quokka.function.dereference_thunk(item, True)
        in_degree = in_func.in_degree
    except quokka.exc.ThunkMissingError:
        in_degree = 0
    except quokka.exc.FunctionMissingError:
        pass

    try:
        out_func: Union[Function, Chunk] = quokka.function.dereference_thunk(
            item, False
        )
    except quokka.exc.FunctionMissingError:
        out_func = item

    return in_degree, out_func.out_degree