Skip to content

data

Data management.

A data is a piece of information that isn't code.

Data

Base class for data.

All data have at least a type and a value. They are referenced inside the program by and to other data and code.

Parameters:

Name Type Description Default
proto_index Index

Index in the protobuf

required
data 'Pb.Data'

Protobuf value of the data.

required
program Program

Program backref

required

Attributes:

Name Type Description
proto_index

Index in the protobuf

address AddressT

Data address

type TypeT

Data type

program Program

Reference to the Program

is_initialized bool

Is the data initialized?

size int

Data size (depends on the type usually)

name str

Data name (if any)

Source code in bindings/python/quokka/data.py
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
class Data:
    """Base class for data.

    All data have at least a type and a value.
    They are referenced inside the program by and to other data and code.

    Parameters:
        proto_index: Index in the protobuf
        data: Protobuf value of the data.
        program: Program backref

    Attributes:
        proto_index: Index in the protobuf
        address: Data address
        type: Data type
        program: Reference to the Program
        is_initialized: Is the data initialized?
        size: Data size (depends on the type usually)
        name: Data name (if any)
    """

    def __init__(
        self, proto_index: Index, data: "Pb.Data", program: quokka.Program
    ):
        """Constructor"""
        self.proto: "Pb.Data" = program.proto.data[proto_index]
        self.address: AddressT = program.virtual_address(data.segment_index, data.segment_offset)
        self.program: quokka.Program = program
        self.file_offset: int = data.file_offset
        self.is_initialized: bool = not data.not_initialized
        self.size: int = self.proto.size

        # Retrieve xrefs (for the data)
        self._xrefs_from = [self.program.proto.references[x] for x in self.proto.xref_from]
        self._xrefs_from = [(RefType(ref.reference_type), ref) for ref in self._xrefs_from]

        self._xrefs_to = [self.program.proto.references[x] for x in self.proto.xref_to]
        self._xrefs_to = [(RefType(ref.reference_type), ref) for ref in self._xrefs_to]

    def __str__(self) -> str:
        """Data representation"""
        return f"<Data {self.name} at {self.address:#x}>"

    def __eq__(self, other: "Pb.Data") -> bool:
        """Check equality between two Data instances"""
        return id(self.proto) == id(other.proto)

    @property
    def name(self) -> str:
        """Data name"""
        return self.proto.name

    @name.setter
    def name(self, value: str) -> None:
        """Set the data name and mark it as edited in the protobuf"""
        self.proto.edits.name_set = True
        self.proto.name = value

    @property
    def comments(self) -> list[str]:
        """Return the data comments"""
        return self.proto.comments

    @property
    def value(self) -> TypeValue | None:
        """Data value.

        The value is read in the program binary file.
        """

        if not self.is_initialized:
            return None  # Uninitialized memory has no value
        if self.proto.file_offset <= 0:
            return None  # Not mapped in the file

        if self.type.size <= 0 and self.size:  # Variable size data with a known size (e.g., string)
            return self.program.read_bytes(self.file_offset, self.size)
        else:  # Try reading the value as a type
            return self.program.executable.read_type_value(self.file_offset, self.type)

    def is_variable_size(self) -> bool:
        """Is the data of variable size?"""
        return self.size == -1

    @property
    def type(self) -> TypeT:
        """Data type. Assume one exists for each data"""
        return self.program.get_type(self.proto.type_index)

    @type.setter
    def type(self, typ: TypeT|str) -> None:
        """Set the data type and mark it as edited in the protobuf.

        The final type will only be applied when quokka file regenerated.
        """
        if isinstance(typ, str):
            self.proto.edits.type_str = typ
        elif isinstance(typ, TypeT):
            self.proto.edits.type_str = typ.c_str
        else:
            assert False, "Invalid type"

    @property
    def data_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data reference to this data"""
        # If querying refs_to get the source address
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t.is_data]

    @property
    def data_read_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data read reference to this data"""
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t in [RefType.DATA_READ, RefType.DATA_INDIR]]

    @property
    def data_write_refs_to(self) -> list['Data | Function | AddressT']:
        """Returns all data write reference to this data"""
        return [_get_item(self.program, xref.source.address) for t, xref in self._xrefs_to if t == RefType.DATA_WRITE]

    @property
    def data_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data reference from this data"""
        # If querying refs_from get the destination address
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t.is_data]

    @property
    def data_read_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data read reference from this data"""
        # FIXME: Right now consider DATA_INDIR reference as read references (do we want to distinguish R/W ?)
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t in [RefType.DATA_READ, RefType.DATA_INDIR]]

    @property
    def data_write_refs_from(self) -> list['Data | Function | AddressT']:
        """Returns all data write reference from this data"""
        return [_get_item(self.program, xref.destination.address) for t, xref in self._xrefs_from if t == RefType.DATA_WRITE]


    @property
    def code_refs_to(self) -> list[AddressT]:
        """Returns all code reference to this data"""
        # If querying refs_to get the source address
        return [xref.source.address for t, xref in self._xrefs_to if t.is_code]

    @property
    def type_refs_from(self) -> list[TypeReference]:
        """Returns all type reference from this data"""
        # Get protobuf type ids
        type_ids = [xref.destination.data_type_identifier for t, xref in self._xrefs_from
                    if t.is_data and xref.destination.HasField("data_type_identifier")]  # Note: do not use SYMBOL enum
        # Resolve type ids to actual types
        return [self.program.get_type_reference(t.type_index, t.member_index) for t in type_ids]

code_refs_to property

Returns all code reference to this data

comments property

Return the data comments

data_read_refs_from property

Returns all data read reference from this data

data_read_refs_to property

Returns all data read reference to this data

data_refs_from property

Returns all data reference from this data

data_refs_to property

Returns all data reference to this data

data_write_refs_from property

Returns all data write reference from this data

data_write_refs_to property

Returns all data write reference to this data

name property writable

Data name

type property writable

Data type. Assume one exists for each data

type_refs_from property

Returns all type reference from this data

value property

Data value.

The value is read in the program binary file.

__eq__(other)

Check equality between two Data instances

Source code in bindings/python/quokka/data.py
 98
 99
100
def __eq__(self, other: "Pb.Data") -> bool:
    """Check equality between two Data instances"""
    return id(self.proto) == id(other.proto)

__init__(proto_index, data, program)

Constructor

Source code in bindings/python/quokka/data.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def __init__(
    self, proto_index: Index, data: "Pb.Data", program: quokka.Program
):
    """Constructor"""
    self.proto: "Pb.Data" = program.proto.data[proto_index]
    self.address: AddressT = program.virtual_address(data.segment_index, data.segment_offset)
    self.program: quokka.Program = program
    self.file_offset: int = data.file_offset
    self.is_initialized: bool = not data.not_initialized
    self.size: int = self.proto.size

    # Retrieve xrefs (for the data)
    self._xrefs_from = [self.program.proto.references[x] for x in self.proto.xref_from]
    self._xrefs_from = [(RefType(ref.reference_type), ref) for ref in self._xrefs_from]

    self._xrefs_to = [self.program.proto.references[x] for x in self.proto.xref_to]
    self._xrefs_to = [(RefType(ref.reference_type), ref) for ref in self._xrefs_to]

__str__()

Data representation

Source code in bindings/python/quokka/data.py
94
95
96
def __str__(self) -> str:
    """Data representation"""
    return f"<Data {self.name} at {self.address:#x}>"

is_variable_size()

Is the data of variable size?

Source code in bindings/python/quokka/data.py
135
136
137
def is_variable_size(self) -> bool:
    """Is the data of variable size?"""
    return self.size == -1

DataHolder

Bases: Mapping

Data bucket

All the data of the program are referenced in this bucket and allow to store them only once.

Attributes:

Name Type Description
proto

The protobuf data themselves

program Program

A reference to the Program

Parameters:

Name Type Description Default
proto

The protobuf data

required
program Program

The program

required
Source code in bindings/python/quokka/data.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
class DataHolder(Mapping):
    """Data bucket

    All the data of the program are referenced in this bucket and allow to store them
    only once.

    Attributes:
        proto: The protobuf data themselves
        program: A reference to the Program

    Arguments:
        proto: The protobuf data
        program: The program
    """

    def __init__(self, proto, program: quokka.Program):
        """Init method

        Arguments:
            proto: List of data in the protobuf
            program: Backref to the program
        """
        self.proto = proto.data
        self.program: quokka.Program = program
        self._addr_to_idx: dict[AddressT, Index] = {
            program.virtual_address(data.segment_index, data.segment_offset): index 
            for index, data in enumerate(proto.data)
        }

    def __setitem__(self, key: Index, value: Data) -> None:
        """Set a data"""
        raise ValueError("Should not be accessed")

    def __delitem__(self, value: Index) -> None:
        """Remove a data from the bucket"""
        raise ValueError("Should not be accessed")

    def __getitem__(self, address: AddressT) -> Data:
        """Get a data from the bucket.

        Arguments:
            address: Data address
        Returns:
            A Data
        """
        key = self._addr_to_idx.get(address)
        if key is None:
            raise ValueError(f"No data at address 0x{address:x}")
        # Right now we create a new Data object each time, but we could cache them if needed
        return Data(key, self.proto[key], self.program)

    def __len__(self) -> int:
        """Number of data in the program"""
        return len(self._addr_to_idx)

    def __iter__(self):
        """Do not allow the iteration over the data"""
        for addr, idx in self._addr_to_idx.items():
            yield self[addr]

__delitem__(value)

Remove a data from the bucket

Source code in bindings/python/quokka/data.py
240
241
242
def __delitem__(self, value: Index) -> None:
    """Remove a data from the bucket"""
    raise ValueError("Should not be accessed")

__getitem__(address)

Get a data from the bucket.

Parameters:

Name Type Description Default
address AddressT

Data address

required

Returns: A Data

Source code in bindings/python/quokka/data.py
244
245
246
247
248
249
250
251
252
253
254
255
256
def __getitem__(self, address: AddressT) -> Data:
    """Get a data from the bucket.

    Arguments:
        address: Data address
    Returns:
        A Data
    """
    key = self._addr_to_idx.get(address)
    if key is None:
        raise ValueError(f"No data at address 0x{address:x}")
    # Right now we create a new Data object each time, but we could cache them if needed
    return Data(key, self.proto[key], self.program)

__init__(proto, program)

Init method

Parameters:

Name Type Description Default
proto

List of data in the protobuf

required
program Program

Backref to the program

required
Source code in bindings/python/quokka/data.py
222
223
224
225
226
227
228
229
230
231
232
233
234
def __init__(self, proto, program: quokka.Program):
    """Init method

    Arguments:
        proto: List of data in the protobuf
        program: Backref to the program
    """
    self.proto = proto.data
    self.program: quokka.Program = program
    self._addr_to_idx: dict[AddressT, Index] = {
        program.virtual_address(data.segment_index, data.segment_offset): index 
        for index, data in enumerate(proto.data)
    }

__iter__()

Do not allow the iteration over the data

Source code in bindings/python/quokka/data.py
262
263
264
265
def __iter__(self):
    """Do not allow the iteration over the data"""
    for addr, idx in self._addr_to_idx.items():
        yield self[addr]

__len__()

Number of data in the program

Source code in bindings/python/quokka/data.py
258
259
260
def __len__(self) -> int:
    """Number of data in the program"""
    return len(self._addr_to_idx)

__setitem__(key, value)

Set a data

Source code in bindings/python/quokka/data.py
236
237
238
def __setitem__(self, key: Index, value: Data) -> None:
    """Set a data"""
    raise ValueError("Should not be accessed")