Skip to content

Functions features

Original code

# From https://github.com/Cisco-Talos/binary_function_similarity/blob/main/IDA_scripts/IDA_acfg_features/core/ff_features.py#L76
def get_function_features(fva, bbs_dict, len_edges):
    """
    Construction the dictionary with function-level features.
    Args:
        fva: function virtual address
        bbs_dict: a dictionary with all the features, one per BB
        len_eges: number of edges
    Return:
        a dictionary with function-level features
    """
    f_dict = {
        'n_func_calls': f_sum(bbs_dict, 'n_call_instrs'),
        'n_logic_instrs': f_sum(bbs_dict, 'n_logic_instrs'),
        'n_redirections': f_sum(bbs_dict, 'n_redirect_instrs'),
        'n_transfer_instrs': f_sum(bbs_dict, 'n_transfer_instrs'),
        'size_local_variables': get_size_local_vars(fva),
        'n_bb': len(bbs_dict),
        'n_edges': len_edges,
        'n_incoming_calls': get_func_incoming_calls(fva),
        'n_instructions': f_sum(bbs_dict, 'n_instructions')
    }
    return f_dict

Quokka code

import quokka

FeaturesDict = ...

def sum_block_features(bb_features: FeaturesDict, feature: str) -> int:
    """Sum the values for every basic block in the function"""
    assert feature.startswith("n_"), "Only numeric values can be summed"
    return sum(basic_block[feature] for basic_block in bb_features.values())


def get_func_features(func: quokka.Function) -> FeaturesDict:
    bb_features = {}
    for block_start in func.graph:
        block = func.get_block(block_start)
        bb_features[block_start] = get_bb_features(block)

    return {
        'n_func_calls': sum_block_features(bb_features, 'n_call_instrs'),
        'n_logic_instrs': sum_block_features(bb_features, 'n_logic_instrs'),
        'n_redirections': sum_block_features(bb_features, 'n_redirect_instrs'),
        'n_transfer_instrs': sum_block_features(bb_features, 'n_transfer_instrs'),
        'size_local_variables': ..., # Not possible with Quokka
        'n_bb': len(bb_features),
        'n_edges': len(func.graph.edges),
        'n_incoming_calls': len(func.callers),
        'n_instructions': sum(1 for _ in func.instructions),
        "basic_blocks": bb_features,
    }

Et voilĂ !