Skip to content

builder

builder

Author: dm (achallande@quarkslab.com)

Create source maps for AOSP.

Prerequistes: - a recent git version (>2.20) - an AOSP mirror - time

clean_disk(branch_workdir)

Remove a branch directory on the disk to save some space

Parameters:

Name Type Description Default
branch_workdir Path

Path to the directory to remove

required
Source code in bgraph/builder/builder.py
def clean_disk(branch_workdir: Path) -> None:
    """Remove a branch directory on the disk to save some space

    :param branch_workdir: Path to the directory to remove
    """
    if branch_workdir.is_dir():
        shutil.rmtree(branch_workdir, ignore_errors=True)

combine_files_path(branch_dir)

Load the "files.pickle" stored with results of git commands.

Parameters:

Name Type Description Default
branch_dir Path

Directory to find the AOSP partial tree

required

Returns:

Type Description
Dict[pathlib.Path, List[str]]

A mapping of path and the list of files inside the project

Source code in bgraph/builder/builder.py
def combine_files_path(branch_dir: Path) -> Dict[Path, List[str]]:
    """Load the "files.pickle" stored with results of git commands.

    :param branch_dir: Directory to find the AOSP partial tree
    :return: A mapping of path and the list of files inside the project
    """
    files: Dict[Path, List[str]] = {}
    for file_path in branch_dir.rglob("files.pickle"):
        try:
            local_files = pickle.load(open(file_path, "rb"))
        except pickle.PickleError:
            continue

        files[file_path.parent] = local_files

    return files

compose_all(mirror, branch_pattern='android-*', work_dir=None, force=False)

Iterates through all the branches in AOSP and create the source maps.

This methods: - list all the existing branches and filter those matching the pattern - does a partial checkout of each of them - parses the Soong File and store them

Parameters:

Name Type Description Default
mirror Union[str, pathlib.Path]

Path/Link to a mirror directory or an URL.

required
branch_pattern str

Optional. Pattern to filter branches

'android-*'
work_dir Optional[pathlib.Path]

Optional. Work directory

None
force bool

Optional. Overrides results.

False

Returns:

Type Description
Path

The path to the work directory

Source code in bgraph/builder/builder.py
def compose_all(
    mirror: Union[str, Path],
    branch_pattern: str = "android-*",
    work_dir: Optional[Path] = None,
    force: bool = False,
) -> Path:
    """Iterates through all the branches in AOSP and create the source maps.

    This methods:
        - list all the existing branches and filter those matching the pattern
        - does a partial checkout of each of them
        - parses the Soong File and store them

    :param mirror: Path/Link to a mirror directory or an URL.
    :param branch_pattern: Optional. Pattern to filter branches
    :param work_dir: Optional. Work directory
    :param force: Optional. Overrides results.
    :return: The path to the work directory
    """

    # List branches
    all_branches = get_all_branches(mirror)
    branches = fnmatch.filter(all_branches, branch_pattern)

    if work_dir is None:
        work_dir = Path(tempfile.mkdtemp(prefix="bgraph_"))

    logger.info("Found %d branches", len(branches))
    for branch_name in branches:
        compose_manifest_branch(branch_name, mirror, work_dir, force)

    logger.info("Finished")

    return work_dir

compose_manifest_branch(branch_name, mirror, work_dir=None, force=False)

Create the soong parser for a manifest branch.

As the process is slow, multiprocessing.Pool is used to speed the checkout. The bottleneck is the parsing of blueprints files. However, since variables definition must be analyzed, we cannot just randomly parallelize this step and it must be done carefuly (read: it's not done yet.).

The SoongParser is used to parse the whole tree blueprints files and stored using pickle. Another step is to convert this object as a (networkx) graph.

Parameters:

Name Type Description Default
branch_name str

Name of the branch to checkout

required
mirror Union[str, pathlib.Path]

Path/Link towards the mirror or the manifest URL

required
work_dir Optional[pathlib.Path]

Optional. Working directory - if not set, a temporary folder is used

None
force bool

Optional. Overwrite existing branch.

False

Returns:

Type Description
Optional[pathlib.Path]

The path to the work dir

Source code in bgraph/builder/builder.py
@bgraph.utils.no_except
def compose_manifest_branch(
    branch_name: str,
    mirror: Union[str, Path],
    work_dir: Optional[Path] = None,
    force: bool = False,
) -> Optional[Path]:
    """Create the soong parser for a manifest branch.

    As the process is slow, multiprocessing.Pool is used to speed the checkout.
    The bottleneck is the parsing of blueprints files. However, since variables
    definition must be analyzed, we cannot just randomly parallelize this step and
    it must be done carefuly (read: it's not done yet.).

    The SoongParser is used to parse the whole tree blueprints files and stored using
    pickle. Another step is to convert this object as a (networkx) graph.

    :param branch_name: Name of the branch to checkout
    :param mirror: Path/Link towards the mirror or the manifest URL
    :param work_dir: Optional. Working directory - if not set, a temporary folder is used
    :param force: Optional. Overwrite existing branch.
    :return: The path to the work dir
    """
    logger.info("Start composing for %s", branch_name)

    if work_dir is None:
        work_dir = Path(tempfile.mkdtemp(prefix="bgraph_"))

    # Guard: do not redo a branch
    pickle_file = work_dir / f"{branch_name}.pickle"
    if pickle_file.is_file() and force is False:
        logger.info("Branch already found; skip.")
        return work_dir
    elif (work_dir / f"{branch_name}.empty").is_file():
        logger.info("Branch empty; skip.")
        return work_dir

    # Create a branch by using repo
    try:
        branch_dir = create_manifest_branch(work_dir, mirror, branch_name)
    except bgraph.exc.BGraphBuilderException:
        return None

    manifest_file = branch_dir / ".repo" / "manifests" / "default.xml"

    logger.info("List projects")
    project_checkout_branch = functools.partial(
        project_checkout, branch_name, branch_dir, mirror
    )

    # Load the manifest
    manifest = bgraph.parsers.Manifest.from_file(manifest_file)

    # Core: multiprocessing
    with multiprocessing.Pool() as pool:
        res = pool.map_async(project_checkout_branch, manifest.get_projects().items())
        res.get(24 * 60 * 60)

    logger.info("Finished to compose with %s", branch_name)

    # Guard: Search build files
    for _ in branch_dir.rglob("Android.bp"):
        break
    else:
        logger.info("Found 0 Android.bp file, aborting")

        # Create an empty file to prevent from doing it if we restart
        with open(work_dir / f"{branch_name}.empty", "w") as _:
            pass

        clean_disk(branch_dir)
        return work_dir

    soong_parser = bgraph.parsers.SoongParser()

    logger.info("Starting parsing AOSP build files")
    soong_parser.parse_aosp(branch_dir, project_map=manifest.get_projects())
    soong_parser.file_listing = combine_files_path(branch_dir)

    # Save the result
    try:
        pickle.dump(soong_parser, open(pickle_file, "wb"))
    except pickle.PickleError:
        logger.error("Failed to pickle")
        clean_disk(branch_dir)
        return work_dir

    # Clean the disk
    logger.info("Clean branch")
    clean_disk(branch_dir)

    return work_dir

create_manifest_branch(root_dir, mirror, branch_name)

Create a manifest branch in the root_dir with the manifest branch as name.

Parameters:

Name Type Description Default
root_dir Path

Where to download the branch

required
mirror Union[str, pathlib.Path]

Path/Link to the mirror directory or manifest URL

required
branch_name str

Name of the branch to checkout

required

Returns:

Type Description
Path

A path towards the branch work directory

Source code in bgraph/builder/builder.py
def create_manifest_branch(
    root_dir: Path, mirror: Union[str, Path], branch_name: str
) -> Path:
    """Create a manifest branch in the root_dir with the manifest branch as name.

    :param root_dir: Where to download the branch
    :param mirror: Path/Link to the mirror directory or manifest URL
    :param branch_name: Name of the branch to checkout
    :raises BGraphBuilderException if repo command is not found
    :return: A path towards the branch work directory
    """

    branch_dir = root_dir / branch_name
    branch_dir.mkdir(exist_ok=True)

    # Init mirror
    if type(mirror) is str:
        # If it is a remote url
        manifest = f"{mirror}/platform/manifest"
    elif isinstance(mirror, pathlib.Path):
        manifest = str(mirror / "platform" / "manifest.git")

    try:
        repo = sh.Command("repo").bake("--color=never")
    except sh.CommandNotFound:
        logger.error("Did not find repo command. Is it in PATH?")
        raise bgraph.exc.BGraphBuilderException("Repo not found.")

    try:
        repo.init(
            "-u",
            f"{manifest!s}",
            "-b",
            branch_name,
            "--partial-clone",
            "--clone-filter=blob:none",
            "--depth=1",
            _cwd=branch_dir,
        )
    except sh.ErrorReturnCode:
        logger.error(
            "Unable to init the repository for branch %s. Verify that either the mirror"
            "is correct or the branch exists on the target.",
            branch_name,
        )
        raise bgraph.exc.BGraphBuilderException("Repo init failed.")

    return branch_dir

get_all_branches(manifest, pattern='android-*')

Parses the list of all branches available in the mirror directory

This methods works for both a local manifest directory and a remote url.

Returns:

Type Description
List[str]

A list of manifest branches

Source code in bgraph/builder/builder.py
def get_all_branches(
    manifest: Union[Path, str], pattern: str = "android-*"
) -> List[str]:
    """Parses the list of all branches available in the mirror directory

    This methods works for both a local manifest directory and a remote url.

    :param manifest A link or a path to the manifest
    :param pattern A pattern to match tags in the remote directory.
    :return: A list of manifest branches
    """

    if type(manifest) is str:
        manifest = f"{manifest}/platform/manifest"
    elif isinstance(manifest, pathlib.Path):
        manifest = manifest / "platform" / "manifest.git"

    try:
        tags = sh.git("ls-remote", "--tag", f"{manifest!s}", pattern)
    except sh.ErrorReturnCode:
        raise bgraph.exc.BGraphManifestException("Unable to retrieve the branches.")

    branches: List[str] = []
    for line_encoded in tags.stdout.splitlines():
        try:
            _, tag = line_encoded.decode().split("\t")
        except ValueError:
            continue

        if tag.startswith("refs/tags/") and "^" not in tag:
            branches.append(tag[len("refs/tags/") :])

    return branches

partial_checkout(branch_name, project_path, git_dir)

Performs a partial checkout using git.

A partial checkout allows to checkout only interesting files and not the whole repository. This is a bit tricky and needs a recent git version (>2.22)

Parameters:

Name Type Description Default
branch_name str

Name of the branch

required
project_path Path

Path where to do the checkout

required
git_dir Union[pathlib.Path, str]

Url/Path to the git directory

required

Returns:

Type Description
bool

Boolean for success

Source code in bgraph/builder/builder.py
def partial_checkout(
    branch_name: str, project_path: Path, git_dir: Union[Path, str]
) -> bool:
    """Performs a partial checkout using git.

    A partial checkout allows to checkout only interesting files and not the whole repository.
    This is a bit tricky and needs a recent git version (>2.22)

    :param branch_name: Name of the branch
    :param project_path: Path where to do the checkout
    :param git_dir: Url/Path to the git directory
    :return: Boolean for success
    """

    # Guard to not redo the operation if the checkout has already been done
    if project_path.is_dir():
        return True

    project_path.mkdir(parents=True, exist_ok=True)

    # Prepare the git command
    git = sh.git.bake(_cwd=project_path)

    # Init the directory only if .git folder is not present because git init fails on already inited git directories
    if not (project_path / ".git").is_dir():
        git.init()
        git.remote("add", "origin", f"{git_dir!s}")

    # Partial fetch : without objects
    try:
        git.fetch(
            "--filter=blob:none",
            "--recurse-submodules=yes",
            "--no-tags",
            "--depth=1",
            "origin",
            "tag",
            branch_name,
        )
    except sh.ErrorReturnCode:
        logger.error("Unable to do the fetch part of the operation.")
        return False

    # Some versions of git will fails if the .git/info/sparse-checkout is already there
    try:
        git("sparse-checkout", "init")
    except sh.ErrorReturnCode:
        pass

    # Sparse checkout magic
    try:
        git("sparse-checkout", "set", "**/*.bp")
        git("sparse-checkout", "reapply")
        git.checkout("--quiet", f"refs/tags/{branch_name}")
    except sh.ErrorReturnCode:
        logger.error("Unable to perform sparse-checkout magic.")
        return False

    # List all the files of the project (without downloading them)
    try:
        result = git("ls-tree", "-r", "--name-only", f"{branch_name}")
        files = result.stdout.decode().split()
    except sh.ErrorReturnCode:
        files = []

    # We will need the list of files afterwards so store it
    try:
        pickle.dump(files, open(project_path / "files.pickle", "wb"))
    except pickle.PickleError:
        logger.error("Unable to dump the list of files in the pickle-file.")
        return False

    # Save local space: delete git folder
    local_dir = project_path / ".git"
    if local_dir.is_dir():
        shutil.rmtree(local_dir)

    return True

project_checkout(branch_name, branch_dir, mirror, paths)

Perform a project checkout.

The project name is where a project is found in the mirror (e.g. MIRROR/platform/external/sqlite) The relative project path is the final path of the project in AOSP (e.g. ROOT/external/sqlite)

Abort fast if no git directory is found in the mirror

Parameters:

Name Type Description Default
branch_name str

Name of the branch to checkout

required
branch_dir Path

Branch working directory

required
mirror Union[str, pathlib.Path]

Path/Link to a mirror

required
paths Tuple[pathlib.Path, pathlib.Path]

Project Name and project relative path

required

Returns:

Type Description
None
Source code in bgraph/builder/builder.py
@bgraph.utils.no_except
def project_checkout(
    branch_name: str,
    branch_dir: Path,
    mirror: Union[str, Path],
    paths: Tuple[Path, Path],
) -> None:
    """Perform a project checkout.

    The project name is where a project is found in the mirror (e.g. MIRROR/platform/external/sqlite)
    The relative project path is the final path of the project in AOSP (e.g. ROOT/external/sqlite)

    Abort fast if no git directory is found in the mirror

    :param branch_name: Name of the branch to checkout
    :param branch_dir: Branch working directory
    :param mirror: Path/Link to a mirror
    :param paths: Project Name and project relative path
    :return:
    """
    project_name, relative_project_path = paths

    # Mirror git dir
    if type(mirror) is str:
        git_dir = f"{mirror}/{project_name}"
    elif isinstance(mirror, pathlib.Path):
        git_dir = str(mirror / f"{project_name}.git")

    # AOSP project dir
    project_path = branch_dir / relative_project_path

    if isinstance(git_dir, Path) and not git_dir.is_dir():
        logger.error("Project not found (%s)", git_dir)
        return

    partial_checkout(branch_name, project_path, git_dir)

graph

Author: dm

Collection of dirty scripts used to generate a source graph for AOSP based on the parsing of the Android.bp (blueprints) files.

This must be used with the SoongParser developed in AOSP_BUILD project (aka acb)

dependencies_keys: List[str]

In soong files, keys that indicates a source dependency

build_source_map(sp)

From a SoongParser object, converts all the targets into a graph representation where the links between two nodes are : - a dependency link if the origin is induced in the destination - a source link if the origin is a source file and the destination a target

The graphs are saved as networkx objects with pickle.

Returns:

Type Description
DiGraph

An UDG as a DiGraph

Source code in bgraph/builder/graph.py
def build_source_map(sp: bgraph.parsers.soong_parser.SoongParser) -> BGraph:
    """
    From a SoongParser object, converts all the targets into a graph representation where
    the links between two nodes are :
        - a dependency link if the origin is induced in the destination
        - a source link if the origin is a source file and the destination a target

    The graphs are saved as networkx objects with pickle.

    :param: sp: The soong parser
    :return: An UDG as a DiGraph
    """
    graph: BGraph = nx.DiGraph()

    for target in sp.sections:
        graph.add_node(target, data=sp.get_section(target))

    file_listing = sp.file_listing

    section_files: Dict[Path, List[str]] = {}

    nodes = list(graph.nodes)
    for idx, section_name in enumerate(nodes):
        if idx % 500 == 0:
            logger.debug("Converting section %d / %d", idx, len(nodes))

        for section in graph.nodes[section_name]["data"]:

            project_path: Path = section.get(
                bgraph.parsers.soong_parser.SoongParser.SECTION_PROJECT_PATH
            )
            project_files: List[str] = sp.file_listing.get(project_path, [])
            if not project_files:
                logger.info(f"Cannot find files for project {section_name}")

            convert_section(graph, section_files, section_name, section, project_files)

    return graph

compute_file_list(section_files, soong_file, project_path, files)

Create the file list that matches the soong_file for the project.

WARNING: This function is not pure, it will modify in-place the section_files mapping, allowing for an easy caching.

Parameters:

Name Type Description Default
section_files Dict[pathlib.Path, List[str]]

A mapping storing the files mapping

required
soong_file Path

Path for a soong file

required
project_path Path

Path for a project (should be a parent of the soong file)

required
files List[str]

List of files in the project (found via Git)

required

Returns:

Type Description
List[str]

A list of files descendent of the soong file in the project

Source code in bgraph/builder/graph.py
def compute_file_list(
    section_files: Dict[Path, List[str]],
    soong_file: Path,
    project_path: Path,
    files: List[str],
) -> List[str]:
    """Create the file list that matches the soong_file for the project.

    WARNING: This function is *not* pure, it will modify in-place the section_files
    mapping, allowing for an easy caching.

    :param section_files: A mapping storing the files mapping
    :param soong_file: Path for a soong file
    :param project_path: Path for a project (should be a parent of the soong file)
    :param files: List of files in the project (found via Git)
    :return: A list of files descendent of the soong file in the project
    """
    if soong_file not in section_files:

        section_files[soong_file] = []
        for file in files:
            full_path = project_path / file
            if str(full_path).startswith(str(soong_file)):
                try:
                    section_files[soong_file].append(
                        full_path.relative_to(soong_file).as_posix()
                    )
                except ValueError:
                    pass

    return section_files[soong_file]

convert(pickle_dir, result_dir)

Iterates through the source_maps directory and convert every soong_parser objects to a NetworkX DiGraph

Parameters:

Name Type Description Default
pickle_dir Path

Path towards the file where the pickle files are stored.

required
result_dir Path

Path where the BGraph are stored

required
Source code in bgraph/builder/graph.py
def convert(pickle_dir: Path, result_dir: Path) -> None:
    """Iterates through the source_maps directory and convert every soong_parser objects
    to a NetworkX DiGraph

    :param pickle_dir: Path towards the file where the pickle files are stored.
    :param result_dir: Path where the BGraph are stored
    """

    to_convert: List[Path] = [
        pickle_file
        for pickle_file in pickle_dir.glob("*.pickle")
        if not (result_dir / (pickle_file.with_suffix(".bgraph").name)).is_file()
    ]
    partial_convert_single = functools.partial(convert_single, result_dir)

    with multiprocessing.Pool() as pool:
        res = pool.map_async(partial_convert_single, to_convert)
        results = res.get()

    count_success = 0
    for branch_name, result in results:
        if result is False:
            logger.info("Fail to convert %s", branch_name)
        else:
            count_success += 1

    logger.info("Converted %d/%d branches", count_success, len(results))

convert_section(graph, section_files, section_name, section, project_files)

Convert a section from the SoongParser into a node in the graph and sets its dependencies

Warning: This functions modifies in place the graph.

Note: Some refactoring should be done on the file path detection (drop fnmatch).

TODO(dm): Integrate other type of dependencies such as exclusion

Parameters:

Name Type Description Default
graph DiGraph

The UDG

required
section_files Dict[pathlib.Path, List[str]]

A mapping for section files allowing an easy cache

required
section_name str

Name of the section to convert

required
section Section

Section data in iteself

required
project_files List[str]

Files found in the source tree

required
Source code in bgraph/builder/graph.py
def convert_section(
    graph: BGraph,
    section_files: Dict[Path, List[str]],
    section_name: str,
    section: Section,
    project_files: List[str],
) -> None:
    """Convert a section from the SoongParser into a node in the graph and sets its
    dependencies

    Warning: This functions modifies in place the graph.

    Note: Some refactoring should be done on the file path detection (drop fnmatch).

    TODO(dm):
        Integrate other type of dependencies such as exclusion

    :param graph: The UDG
    :param section_files: A mapping for section files allowing an easy cache
    :param section_name: Name of the section to convert
    :param section: Section data in iteself
    :param project_files: Files found in the source tree
    """
    # Project Path
    try:
        project_path: Path = section[
            bgraph.parsers.soong_parser.SoongParser.SECTION_PROJECT_PATH
        ]
    except KeyError:
        logger.error("Missing section_project_path in %s", section_name)
        return

    # Local Soong files
    try:
        soong_file_path: Path = section[
            bgraph.parsers.soong_parser.SoongParser.SOONG_FILE
        ].parent
    except (KeyError, AttributeError):
        logger.error("Missing soong_file in %s", section_name)
        return

    for key, value in bgraph.utils.recurse(section):  # type: ignore
        edge_type: Optional[Literal["dep", "src"]] = None
        if key in dependencies_keys:
            edge_type = "dep"

        elif key in srcs_keys:
            edge_type = "src"

        if edge_type is not None:
            for dep in value:
                if edge_type == "src":

                    # For dependency key representing directories, add a *
                    if "dirs" in key:
                        dep = f"{dep}*"

                    # Since we are using fnmatch and not a proper tool, we also
                    # must take care of those prefix and remove them...
                    # TODO(dm): Use removeprefix in Python3.9
                    for prefix in ["./", "."]:
                        if dep.startswith(prefix):
                            dep = dep[len(prefix) :]
                            break

                    # Resolve * in dependencies files : the pattern must be
                    # modified to accomodate Python fnmatch module
                    # FIX: https://android.googlesource.com/platform/build/soong/+/refs/heads/master#file-lists

                    for dependency_file in fnmatch.filter(
                        compute_file_list(
                            section_files, soong_file_path, project_path, project_files
                        ),
                        dep.replace("**/", "*"),
                    ):
                        graph.add_edge(
                            str(soong_file_path / dependency_file),
                            section_name,
                            type=edge_type,
                        )
                else:
                    graph.add_edge(dep, section_name, type=edge_type)

convert_single(result_dir, pickle_file)

Convert a pickle file representing a soong parser to a graph and store it in result dir.

Parameters:

Name Type Description Default
result_dir Path

Where to store the result

required
pickle_file Path

Which file to convert

required

Returns:

Type Description
Tuple[str, bool]

A tuple (branch_name, boolean for sucess) for later statistics.

Source code in bgraph/builder/graph.py
def convert_single(result_dir: Path, pickle_file: Path) -> Tuple[str, bool]:
    """Convert a pickle file representing a soong parser to a graph and store it in
    result dir.

    :param result_dir: Where to store the result
    :param pickle_file: Which file to convert
    :return: A tuple (branch_name, boolean for sucess) for later statistics.
    """
    branch_name: str = pickle_file.stem
    bgraph_file = result_dir / (pickle_file.with_suffix(".bgraph").name)

    try:
        soong_parser = pickle.load(open(pickle_file, "rb"))
    except pickle.PickleError:
        return branch_name, False

    graph = build_source_map(soong_parser)

    try:
        with open(bgraph_file, "wb") as file:
            pickle.dump(graph, file)
    except pickle.PickleError:
        return branch_name, False

    return branch_name, True