Skip to content

data

Data management.

A data is a piece of information that isn't code.

Data

Base class for data.

All data have at least a type and a value. They are referenced inside the program by and to other data and code.

Parameters:

Name Type Description Default
proto_index Index

Index in the protobuf

required
data 'Pb.Data'

Protobuf value of the data.

required
program Program

Program backref

required

Attributes:

Name Type Description
proto_index

Index in the protobuf

address AddressT

Data address

type TypeT

Data type

program Program

Reference to the Program

is_initialized bool

Is the data initialized?

size int

Data size (depends on the type usually)

name str

Data name (if any)

Source code in bindings/python/quokka/data.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
class Data:
    """Base class for data.

    All data have at least a type and a value.
    They are referenced inside the program by and to other data and code.

    Parameters:
        proto_index: Index in the protobuf
        data: Protobuf value of the data.
        program: Program backref

    Attributes:
        proto_index: Index in the protobuf
        address: Data address
        type: Data type
        program: Reference to the Program
        is_initialized: Is the data initialized?
        size: Data size (depends on the type usually)
        name: Data name (if any)
    """

    def __init__(
        self, proto_index: Index, data: "Pb.Data", program: quokka.Program
    ):
        """Constructor"""
        self.proto: "Pb.Data" = program.proto.data[proto_index]
        self.address: AddressT = program.virtual_address(data.segment_index, data.segment_offset)
        self.program: quokka.Program = program
        self.file_offset: int = data.file_offset
        self.is_initialized: bool = not data.not_initialized
        self.size: int = self.proto.size

        # Retrieve xrefs (for the data)
        self._xrefs_from = [self.program.proto.references[x] for x in self.proto.xref_from]
        self._xrefs_from = [(RefType(ref.reference_type), ref) for ref in self._xrefs_from]

        self._xrefs_to = [self.program.proto.references[x] for x in self.proto.xref_to]
        self._xrefs_to = [(RefType(ref.reference_type), ref) for ref in self._xrefs_to]

    def __str__(self) -> str:
        """Data representation"""
        return f"<Data {self.name} at {self.address:#x}>"

    def __eq__(self, other: "Pb.Data") -> bool:
        """Check equality between two Data instances"""
        return id(self.proto) == id(other.proto)

    @property
    def name(self) -> str:
        """Data name"""
        return self.proto.name

    @name.setter
    def name(self, value: str) -> None:
        """Set the data name and mark it as edited in the protobuf"""
        self.proto.edits.name_set = True
        self.proto.name = value

    @property
    def comments(self) -> list[str]:
        """Return the data comments"""
        return self.proto.comments

    @property
    def value(self) -> TypeValue | None:
        """Data value.

        The value is read in the program binary file.
        """

        if not self.is_initialized:
            return None  # Uninitialized memory has no value
        if self.proto.file_offset <= 0:
            return None  # Not mapped in the file

        if self.type.size <= 0 and self.size:  # Variable size data with a known size (e.g., string)
            return self.program.read_bytes(self.file_offset, self.size)
        else:  # Try reading the value as a type
            return self.program.executable.read_type_value(self.file_offset, self.type)

    def is_variable_size(self) -> bool:
        """Is the data of variable size?"""
        return self.size == -1

    @property
    def type(self) -> TypeT:
        """Data type. Assume one exists for each data"""
        return self.program.get_type(self.proto.type_index)

    @type.setter
    def type(self, typ: TypeT|str) -> None:
        """Set the data type and mark it as edited in the protobuf.

        The final type will only be applied when quokka file regenerated.
        """
        if isinstance(typ, str):
            self.proto.edits.type_str = typ
        elif isinstance(typ, TypeT):
            self.proto.edits.type_str = typ.c_str
        else:
            assert False, "Invalid type"

    @property
    def data_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data reference to this data"""
        # If querying refs_to get the source address
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t.is_data]

    @property
    def data_read_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data read reference to this data"""
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t in [RefType.DATA_READ, RefType.DATA_INDIR]]

    @property
    def data_write_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data write reference to this data"""
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t == RefType.DATA_WRITE]

    @property
    def data_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data reference from this data"""
        # If querying refs_from get the destination address
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t.is_data]

    @property
    def data_read_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data read reference from this data"""
        # FIXME: Right now consider DATA_INDIR reference as read references (do we want to distinguish R/W ?)
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t in [RefType.DATA_READ, RefType.DATA_INDIR]]

    @property
    def data_write_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data write reference from this data"""
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t == RefType.DATA_WRITE]


    @property
    def prev(self) -> Data | None:
        """Return the data at the highest address below this one, or None."""
        return self.program.data.prev(self.address)

    @property
    def next(self) -> Data | None:
        """Return the data at the lowest address above this one, or None."""
        return self.program.data.next(self.address)

    @property
    def code_refs_to(self) -> list[AddressT]:
        """Returns all code reference to this data"""
        # If querying refs_to get the source address
        return [xref.source.address for t, xref in self._xrefs_to if t.is_code]

    @property
    def type_refs_from(self) -> list[TypeReference]:
        """Returns all type reference from this data"""
        # Get protobuf type ids
        type_ids = [xref.destination.data_type_identifier for t, xref in self._xrefs_from
                    if t.is_data and xref.destination.HasField("data_type_identifier")]  # Note: do not use SYMBOL enum
        # Resolve type ids to actual types
        return [self.program.get_type_reference(t.type_index, t.member_index) for t in type_ids]

code_refs_to property

Returns all code reference to this data

comments property

Return the data comments

data_read_refs_from property

Returns all data read reference from this data

data_read_refs_to property

Returns all data read reference to this data

data_refs_from property

Returns all data reference from this data

data_refs_to property

Returns all data reference to this data

data_write_refs_from property

Returns all data write reference from this data

data_write_refs_to property

Returns all data write reference to this data

name property writable

Data name

next property

Return the data at the lowest address above this one, or None.

prev property

Return the data at the highest address below this one, or None.

type property writable

Data type. Assume one exists for each data

type_refs_from property

Returns all type reference from this data

value property

Data value.

The value is read in the program binary file.

__eq__(other)

Check equality between two Data instances

Source code in bindings/python/quokka/data.py
 99
100
101
def __eq__(self, other: "Pb.Data") -> bool:
    """Check equality between two Data instances"""
    return id(self.proto) == id(other.proto)

__init__(proto_index, data, program)

Constructor

Source code in bindings/python/quokka/data.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def __init__(
    self, proto_index: Index, data: "Pb.Data", program: quokka.Program
):
    """Constructor"""
    self.proto: "Pb.Data" = program.proto.data[proto_index]
    self.address: AddressT = program.virtual_address(data.segment_index, data.segment_offset)
    self.program: quokka.Program = program
    self.file_offset: int = data.file_offset
    self.is_initialized: bool = not data.not_initialized
    self.size: int = self.proto.size

    # Retrieve xrefs (for the data)
    self._xrefs_from = [self.program.proto.references[x] for x in self.proto.xref_from]
    self._xrefs_from = [(RefType(ref.reference_type), ref) for ref in self._xrefs_from]

    self._xrefs_to = [self.program.proto.references[x] for x in self.proto.xref_to]
    self._xrefs_to = [(RefType(ref.reference_type), ref) for ref in self._xrefs_to]

__str__()

Data representation

Source code in bindings/python/quokka/data.py
95
96
97
def __str__(self) -> str:
    """Data representation"""
    return f"<Data {self.name} at {self.address:#x}>"

is_variable_size()

Is the data of variable size?

Source code in bindings/python/quokka/data.py
136
137
138
def is_variable_size(self) -> bool:
    """Is the data of variable size?"""
    return self.size == -1

DataHolder

Bases: Mapping

Data bucket

All the data of the program are referenced in this bucket and allow to store them only once.

Attributes:

Name Type Description
proto

The protobuf data themselves

program Program

A reference to the Program

Parameters:

Name Type Description Default
proto

The protobuf data

required
program Program

The program

required
Source code in bindings/python/quokka/data.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
class DataHolder(Mapping):
    """Data bucket

    All the data of the program are referenced in this bucket and allow to store them
    only once.

    Attributes:
        proto: The protobuf data themselves
        program: A reference to the Program

    Arguments:
        proto: The protobuf data
        program: The program
    """

    def __init__(self, proto, program: quokka.Program):
        """Init method

        Arguments:
            proto: List of data in the protobuf
            program: Backref to the program
        """
        self.proto = proto.data
        self.program: quokka.Program = program
        self._addr_to_idx: dict[AddressT, Index] = {
            program.virtual_address(data.segment_index, data.segment_offset): index
            for index, data in enumerate(proto.data)
        }
        self._sorted_addrs: list[AddressT] = sorted(self._addr_to_idx)

    def __contains__(self, key: object) -> bool:
        return key in self._addr_to_idx

    def __setitem__(self, key: Index, value: Data) -> None:
        """Set a data"""
        raise ValueError("Should not be accessed")

    def __delitem__(self, value: Index) -> None:
        """Remove a data from the bucket"""
        raise ValueError("Should not be accessed")

    def __getitem__(self, address: AddressT) -> Data:
        """Get a data from the bucket.

        Arguments:
            address: Data address
        Returns:
            A Data
        """
        key = self._addr_to_idx.get(address)
        if key is None:
            raise ValueError(f"No data at address 0x{address:x}")
        # Right now we create a new Data object each time, but we could cache them if needed
        return Data(key, self.proto[key], self.program)

    def __len__(self) -> int:
        """Number of data in the program"""
        return len(self._addr_to_idx)

    def prev(self, address: AddressT) -> Data | None:
        """Return the data at the highest address below `address`, or None.

        Arguments:
            address: Reference address
        Returns:
            The previous Data by virtual address, or None
        """
        i = bisect.bisect_left(self._sorted_addrs, address)
        if i > 0:
            return self[self._sorted_addrs[i - 1]]
        return None

    def next(self, address: AddressT) -> Data | None:
        """Return the data at the lowest address above `address`, or None.

        Arguments:
            address: Reference address
        Returns:
            The next Data by virtual address, or None
        """
        i = bisect.bisect_right(self._sorted_addrs, address)
        if i < len(self._sorted_addrs):
            return self[self._sorted_addrs[i]]
        return None

    def __iter__(self):
        """Do not allow the iteration over the data"""
        for addr, idx in self._addr_to_idx.items():
            yield self[addr]

__delitem__(value)

Remove a data from the bucket

Source code in bindings/python/quokka/data.py
255
256
257
def __delitem__(self, value: Index) -> None:
    """Remove a data from the bucket"""
    raise ValueError("Should not be accessed")

__getitem__(address)

Get a data from the bucket.

Parameters:

Name Type Description Default
address AddressT

Data address

required

Returns: A Data

Source code in bindings/python/quokka/data.py
259
260
261
262
263
264
265
266
267
268
269
270
271
def __getitem__(self, address: AddressT) -> Data:
    """Get a data from the bucket.

    Arguments:
        address: Data address
    Returns:
        A Data
    """
    key = self._addr_to_idx.get(address)
    if key is None:
        raise ValueError(f"No data at address 0x{address:x}")
    # Right now we create a new Data object each time, but we could cache them if needed
    return Data(key, self.proto[key], self.program)

__init__(proto, program)

Init method

Parameters:

Name Type Description Default
proto

List of data in the protobuf

required
program Program

Backref to the program

required
Source code in bindings/python/quokka/data.py
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def __init__(self, proto, program: quokka.Program):
    """Init method

    Arguments:
        proto: List of data in the protobuf
        program: Backref to the program
    """
    self.proto = proto.data
    self.program: quokka.Program = program
    self._addr_to_idx: dict[AddressT, Index] = {
        program.virtual_address(data.segment_index, data.segment_offset): index
        for index, data in enumerate(proto.data)
    }
    self._sorted_addrs: list[AddressT] = sorted(self._addr_to_idx)

__iter__()

Do not allow the iteration over the data

Source code in bindings/python/quokka/data.py
303
304
305
306
def __iter__(self):
    """Do not allow the iteration over the data"""
    for addr, idx in self._addr_to_idx.items():
        yield self[addr]

__len__()

Number of data in the program

Source code in bindings/python/quokka/data.py
273
274
275
def __len__(self) -> int:
    """Number of data in the program"""
    return len(self._addr_to_idx)

__setitem__(key, value)

Set a data

Source code in bindings/python/quokka/data.py
251
252
253
def __setitem__(self, key: Index, value: Data) -> None:
    """Set a data"""
    raise ValueError("Should not be accessed")

next(address)

Return the data at the lowest address above address, or None.

Parameters:

Name Type Description Default
address AddressT

Reference address

required

Returns: The next Data by virtual address, or None

Source code in bindings/python/quokka/data.py
290
291
292
293
294
295
296
297
298
299
300
301
def next(self, address: AddressT) -> Data | None:
    """Return the data at the lowest address above `address`, or None.

    Arguments:
        address: Reference address
    Returns:
        The next Data by virtual address, or None
    """
    i = bisect.bisect_right(self._sorted_addrs, address)
    if i < len(self._sorted_addrs):
        return self[self._sorted_addrs[i]]
    return None

prev(address)

Return the data at the highest address below address, or None.

Parameters:

Name Type Description Default
address AddressT

Reference address

required

Returns: The previous Data by virtual address, or None

Source code in bindings/python/quokka/data.py
277
278
279
280
281
282
283
284
285
286
287
288
def prev(self, address: AddressT) -> Data | None:
    """Return the data at the highest address below `address`, or None.

    Arguments:
        address: Reference address
    Returns:
        The previous Data by virtual address, or None
    """
    i = bisect.bisect_left(self._sorted_addrs, address)
    if i > 0:
        return self[self._sorted_addrs[i - 1]]
    return None