builder
builder
Author: dm (achallande@quarkslab.com)
Create source maps for AOSP.
Prerequistes: - a recent git version (>2.20) - an AOSP mirror - time
clean_disk(branch_workdir)
Remove a branch directory on the disk to save some space
Parameters:
Name | Type | Description | Default |
---|---|---|---|
branch_workdir |
Path |
Path to the directory to remove |
required |
Source code in bgraph/builder/builder.py
def clean_disk(branch_workdir: Path) -> None:
"""Remove a branch directory on the disk to save some space
:param branch_workdir: Path to the directory to remove
"""
if branch_workdir.is_dir():
shutil.rmtree(branch_workdir, ignore_errors=True)
combine_files_path(branch_dir)
Load the "files.pickle" stored with results of git commands.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
branch_dir |
Path |
Directory to find the AOSP partial tree |
required |
Returns:
Type | Description |
---|---|
Dict[pathlib.Path, List[str]] |
A mapping of path and the list of files inside the project |
Source code in bgraph/builder/builder.py
def combine_files_path(branch_dir: Path) -> Dict[Path, List[str]]:
"""Load the "files.pickle" stored with results of git commands.
:param branch_dir: Directory to find the AOSP partial tree
:return: A mapping of path and the list of files inside the project
"""
files: Dict[Path, List[str]] = {}
for file_path in branch_dir.rglob("files.pickle"):
try:
local_files = pickle.load(open(file_path, "rb"))
except pickle.PickleError:
continue
files[file_path.parent] = local_files
return files
compose_all(mirror, branch_pattern='android-*', work_dir=None, force=False)
Iterates through all the branches in AOSP and create the source maps.
This methods: - list all the existing branches and filter those matching the pattern - does a partial checkout of each of them - parses the Soong File and store them
Parameters:
Name | Type | Description | Default |
---|---|---|---|
mirror |
Union[str, pathlib.Path] |
Path/Link to a mirror directory or an URL. |
required |
branch_pattern |
str |
Optional. Pattern to filter branches |
'android-*' |
work_dir |
Optional[pathlib.Path] |
Optional. Work directory |
None |
force |
bool |
Optional. Overrides results. |
False |
Returns:
Type | Description |
---|---|
Path |
The path to the work directory |
Source code in bgraph/builder/builder.py
def compose_all(
mirror: Union[str, Path],
branch_pattern: str = "android-*",
work_dir: Optional[Path] = None,
force: bool = False,
) -> Path:
"""Iterates through all the branches in AOSP and create the source maps.
This methods:
- list all the existing branches and filter those matching the pattern
- does a partial checkout of each of them
- parses the Soong File and store them
:param mirror: Path/Link to a mirror directory or an URL.
:param branch_pattern: Optional. Pattern to filter branches
:param work_dir: Optional. Work directory
:param force: Optional. Overrides results.
:return: The path to the work directory
"""
# List branches
all_branches = get_all_branches(mirror)
branches = fnmatch.filter(all_branches, branch_pattern)
if work_dir is None:
work_dir = Path(tempfile.mkdtemp(prefix="bgraph_"))
logger.info("Found %d branches", len(branches))
for branch_name in branches:
compose_manifest_branch(branch_name, mirror, work_dir, force)
logger.info("Finished")
return work_dir
compose_manifest_branch(branch_name, mirror, work_dir=None, force=False)
Create the soong parser for a manifest branch.
As the process is slow, multiprocessing.Pool is used to speed the checkout. The bottleneck is the parsing of blueprints files. However, since variables definition must be analyzed, we cannot just randomly parallelize this step and it must be done carefuly (read: it's not done yet.).
The SoongParser is used to parse the whole tree blueprints files and stored using pickle. Another step is to convert this object as a (networkx) graph.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
branch_name |
str |
Name of the branch to checkout |
required |
mirror |
Union[str, pathlib.Path] |
Path/Link towards the mirror or the manifest URL |
required |
work_dir |
Optional[pathlib.Path] |
Optional. Working directory - if not set, a temporary folder is used |
None |
force |
bool |
Optional. Overwrite existing branch. |
False |
Returns:
Type | Description |
---|---|
Optional[pathlib.Path] |
The path to the work dir |
Source code in bgraph/builder/builder.py
@bgraph.utils.no_except
def compose_manifest_branch(
branch_name: str,
mirror: Union[str, Path],
work_dir: Optional[Path] = None,
force: bool = False,
) -> Optional[Path]:
"""Create the soong parser for a manifest branch.
As the process is slow, multiprocessing.Pool is used to speed the checkout.
The bottleneck is the parsing of blueprints files. However, since variables
definition must be analyzed, we cannot just randomly parallelize this step and
it must be done carefuly (read: it's not done yet.).
The SoongParser is used to parse the whole tree blueprints files and stored using
pickle. Another step is to convert this object as a (networkx) graph.
:param branch_name: Name of the branch to checkout
:param mirror: Path/Link towards the mirror or the manifest URL
:param work_dir: Optional. Working directory - if not set, a temporary folder is used
:param force: Optional. Overwrite existing branch.
:return: The path to the work dir
"""
logger.info("Start composing for %s", branch_name)
if work_dir is None:
work_dir = Path(tempfile.mkdtemp(prefix="bgraph_"))
# Guard: do not redo a branch
pickle_file = work_dir / f"{branch_name}.pickle"
if pickle_file.is_file() and force is False:
logger.info("Branch already found; skip.")
return work_dir
elif (work_dir / f"{branch_name}.empty").is_file():
logger.info("Branch empty; skip.")
return work_dir
# Create a branch by using repo
try:
branch_dir = create_manifest_branch(work_dir, mirror, branch_name)
except bgraph.exc.BGraphBuilderException:
return None
manifest_file = branch_dir / ".repo" / "manifests" / "default.xml"
logger.info("List projects")
project_checkout_branch = functools.partial(
project_checkout, branch_name, branch_dir, mirror
)
# Load the manifest
manifest = bgraph.parsers.Manifest.from_file(manifest_file)
# Core: multiprocessing
with multiprocessing.Pool() as pool:
res = pool.map_async(project_checkout_branch, manifest.get_projects().items())
res.get(24 * 60 * 60)
logger.info("Finished to compose with %s", branch_name)
# Guard: Search build files
for _ in branch_dir.rglob("Android.bp"):
break
else:
logger.info("Found 0 Android.bp file, aborting")
# Create an empty file to prevent from doing it if we restart
with open(work_dir / f"{branch_name}.empty", "w") as _:
pass
clean_disk(branch_dir)
return work_dir
soong_parser = bgraph.parsers.SoongParser()
logger.info("Starting parsing AOSP build files")
soong_parser.parse_aosp(branch_dir, project_map=manifest.get_projects())
soong_parser.file_listing = combine_files_path(branch_dir)
# Save the result
try:
pickle.dump(soong_parser, open(pickle_file, "wb"))
except pickle.PickleError:
logger.error("Failed to pickle")
clean_disk(branch_dir)
return work_dir
# Clean the disk
logger.info("Clean branch")
clean_disk(branch_dir)
return work_dir
create_manifest_branch(root_dir, mirror, branch_name)
Create a manifest branch in the root_dir with the manifest branch as name.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
root_dir |
Path |
Where to download the branch |
required |
mirror |
Union[str, pathlib.Path] |
Path/Link to the mirror directory or manifest URL |
required |
branch_name |
str |
Name of the branch to checkout |
required |
Returns:
Type | Description |
---|---|
Path |
A path towards the branch work directory |
Source code in bgraph/builder/builder.py
def create_manifest_branch(
root_dir: Path, mirror: Union[str, Path], branch_name: str
) -> Path:
"""Create a manifest branch in the root_dir with the manifest branch as name.
:param root_dir: Where to download the branch
:param mirror: Path/Link to the mirror directory or manifest URL
:param branch_name: Name of the branch to checkout
:raises BGraphBuilderException if repo command is not found
:return: A path towards the branch work directory
"""
branch_dir = root_dir / branch_name
branch_dir.mkdir(exist_ok=True)
# Init mirror
if type(mirror) is str:
# If it is a remote url
manifest = f"{mirror}/platform/manifest"
elif isinstance(mirror, pathlib.Path):
manifest = str(mirror / "platform" / "manifest.git")
try:
repo = sh.Command("repo").bake("--color=never")
except sh.CommandNotFound:
logger.error("Did not find repo command. Is it in PATH?")
raise bgraph.exc.BGraphBuilderException("Repo not found.")
try:
repo.init(
"-u",
f"{manifest!s}",
"-b",
branch_name,
"--partial-clone",
"--clone-filter=blob:none",
"--depth=1",
_cwd=branch_dir,
)
except sh.ErrorReturnCode:
logger.error(
"Unable to init the repository for branch %s. Verify that either the mirror"
"is correct or the branch exists on the target.",
branch_name,
)
raise bgraph.exc.BGraphBuilderException("Repo init failed.")
return branch_dir
get_all_branches(manifest, pattern='android-*')
Parses the list of all branches available in the mirror directory
This methods works for both a local manifest directory and a remote url.
Returns:
Type | Description |
---|---|
List[str] |
A list of manifest branches |
Source code in bgraph/builder/builder.py
def get_all_branches(
manifest: Union[Path, str], pattern: str = "android-*"
) -> List[str]:
"""Parses the list of all branches available in the mirror directory
This methods works for both a local manifest directory and a remote url.
:param manifest A link or a path to the manifest
:param pattern A pattern to match tags in the remote directory.
:return: A list of manifest branches
"""
if type(manifest) is str:
manifest = f"{manifest}/platform/manifest"
elif isinstance(manifest, pathlib.Path):
manifest = manifest / "platform" / "manifest.git"
try:
tags = sh.git("ls-remote", "--tag", f"{manifest!s}", pattern)
except sh.ErrorReturnCode:
raise bgraph.exc.BGraphManifestException("Unable to retrieve the branches.")
branches: List[str] = []
for line_encoded in tags.stdout.splitlines():
try:
_, tag = line_encoded.decode().split("\t")
except ValueError:
continue
if tag.startswith("refs/tags/") and "^" not in tag:
branches.append(tag[len("refs/tags/") :])
return branches
partial_checkout(branch_name, project_path, git_dir)
Performs a partial checkout using git.
A partial checkout allows to checkout only interesting files and not the whole repository. This is a bit tricky and needs a recent git version (>2.22)
Parameters:
Name | Type | Description | Default |
---|---|---|---|
branch_name |
str |
Name of the branch |
required |
project_path |
Path |
Path where to do the checkout |
required |
git_dir |
Union[pathlib.Path, str] |
Url/Path to the git directory |
required |
Returns:
Type | Description |
---|---|
bool |
Boolean for success |
Source code in bgraph/builder/builder.py
def partial_checkout(
branch_name: str, project_path: Path, git_dir: Union[Path, str]
) -> bool:
"""Performs a partial checkout using git.
A partial checkout allows to checkout only interesting files and not the whole repository.
This is a bit tricky and needs a recent git version (>2.22)
:param branch_name: Name of the branch
:param project_path: Path where to do the checkout
:param git_dir: Url/Path to the git directory
:return: Boolean for success
"""
# Guard to not redo the operation if the checkout has already been done
if project_path.is_dir():
return True
project_path.mkdir(parents=True, exist_ok=True)
# Prepare the git command
git = sh.git.bake(_cwd=project_path)
# Init the directory only if .git folder is not present because git init fails on already inited git directories
if not (project_path / ".git").is_dir():
git.init()
git.remote("add", "origin", f"{git_dir!s}")
# Partial fetch : without objects
try:
git.fetch(
"--filter=blob:none",
"--recurse-submodules=yes",
"--no-tags",
"--depth=1",
"origin",
"tag",
branch_name,
)
except sh.ErrorReturnCode:
logger.error("Unable to do the fetch part of the operation.")
return False
# Some versions of git will fails if the .git/info/sparse-checkout is already there
try:
git("sparse-checkout", "init")
except sh.ErrorReturnCode:
pass
# Sparse checkout magic
try:
git("sparse-checkout", "set", "**/*.bp")
git("sparse-checkout", "reapply")
git.checkout("--quiet", f"refs/tags/{branch_name}")
except sh.ErrorReturnCode:
logger.error("Unable to perform sparse-checkout magic.")
return False
# List all the files of the project (without downloading them)
try:
result = git("ls-tree", "-r", "--name-only", f"{branch_name}")
files = result.stdout.decode().split()
except sh.ErrorReturnCode:
files = []
# We will need the list of files afterwards so store it
try:
pickle.dump(files, open(project_path / "files.pickle", "wb"))
except pickle.PickleError:
logger.error("Unable to dump the list of files in the pickle-file.")
return False
# Save local space: delete git folder
local_dir = project_path / ".git"
if local_dir.is_dir():
shutil.rmtree(local_dir)
return True
project_checkout(branch_name, branch_dir, mirror, paths)
Perform a project checkout.
The project name is where a project is found in the mirror (e.g. MIRROR/platform/external/sqlite) The relative project path is the final path of the project in AOSP (e.g. ROOT/external/sqlite)
Abort fast if no git directory is found in the mirror
Parameters:
Name | Type | Description | Default |
---|---|---|---|
branch_name |
str |
Name of the branch to checkout |
required |
branch_dir |
Path |
Branch working directory |
required |
mirror |
Union[str, pathlib.Path] |
Path/Link to a mirror |
required |
paths |
Tuple[pathlib.Path, pathlib.Path] |
Project Name and project relative path |
required |
Returns:
Type | Description |
---|---|
None |
Source code in bgraph/builder/builder.py
@bgraph.utils.no_except
def project_checkout(
branch_name: str,
branch_dir: Path,
mirror: Union[str, Path],
paths: Tuple[Path, Path],
) -> None:
"""Perform a project checkout.
The project name is where a project is found in the mirror (e.g. MIRROR/platform/external/sqlite)
The relative project path is the final path of the project in AOSP (e.g. ROOT/external/sqlite)
Abort fast if no git directory is found in the mirror
:param branch_name: Name of the branch to checkout
:param branch_dir: Branch working directory
:param mirror: Path/Link to a mirror
:param paths: Project Name and project relative path
:return:
"""
project_name, relative_project_path = paths
# Mirror git dir
if type(mirror) is str:
git_dir = f"{mirror}/{project_name}"
elif isinstance(mirror, pathlib.Path):
git_dir = str(mirror / f"{project_name}.git")
# AOSP project dir
project_path = branch_dir / relative_project_path
if isinstance(git_dir, Path) and not git_dir.is_dir():
logger.error("Project not found (%s)", git_dir)
return
partial_checkout(branch_name, project_path, git_dir)
graph
Author: dm
Collection of dirty scripts used to generate a source graph for AOSP based on the parsing of the Android.bp (blueprints) files.
This must be used with the SoongParser developed in AOSP_BUILD project (aka acb)
dependencies_keys: List[str]
In soong files, keys that indicates a source dependency
build_source_map(sp)
From a SoongParser object, converts all the targets into a graph representation where the links between two nodes are : - a dependency link if the origin is induced in the destination - a source link if the origin is a source file and the destination a target
The graphs are saved as networkx objects with pickle.
Returns:
Type | Description |
---|---|
DiGraph |
An UDG as a DiGraph |
Source code in bgraph/builder/graph.py
def build_source_map(sp: bgraph.parsers.soong_parser.SoongParser) -> BGraph:
"""
From a SoongParser object, converts all the targets into a graph representation where
the links between two nodes are :
- a dependency link if the origin is induced in the destination
- a source link if the origin is a source file and the destination a target
The graphs are saved as networkx objects with pickle.
:param: sp: The soong parser
:return: An UDG as a DiGraph
"""
graph: BGraph = nx.DiGraph()
for target in sp.sections:
graph.add_node(target, data=sp.get_section(target))
file_listing = sp.file_listing
section_files: Dict[Path, List[str]] = {}
nodes = list(graph.nodes)
for idx, section_name in enumerate(nodes):
if idx % 500 == 0:
logger.debug("Converting section %d / %d", idx, len(nodes))
for section in graph.nodes[section_name]["data"]:
project_path: Path = section.get(
bgraph.parsers.soong_parser.SoongParser.SECTION_PROJECT_PATH
)
project_files: List[str] = sp.file_listing.get(project_path, [])
if not project_files:
logger.info(f"Cannot find files for project {section_name}")
convert_section(graph, section_files, section_name, section, project_files)
return graph
compute_file_list(section_files, soong_file, project_path, files)
Create the file list that matches the soong_file for the project.
WARNING: This function is not pure, it will modify in-place the section_files mapping, allowing for an easy caching.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
section_files |
Dict[pathlib.Path, List[str]] |
A mapping storing the files mapping |
required |
soong_file |
Path |
Path for a soong file |
required |
project_path |
Path |
Path for a project (should be a parent of the soong file) |
required |
files |
List[str] |
List of files in the project (found via Git) |
required |
Returns:
Type | Description |
---|---|
List[str] |
A list of files descendent of the soong file in the project |
Source code in bgraph/builder/graph.py
def compute_file_list(
section_files: Dict[Path, List[str]],
soong_file: Path,
project_path: Path,
files: List[str],
) -> List[str]:
"""Create the file list that matches the soong_file for the project.
WARNING: This function is *not* pure, it will modify in-place the section_files
mapping, allowing for an easy caching.
:param section_files: A mapping storing the files mapping
:param soong_file: Path for a soong file
:param project_path: Path for a project (should be a parent of the soong file)
:param files: List of files in the project (found via Git)
:return: A list of files descendent of the soong file in the project
"""
if soong_file not in section_files:
section_files[soong_file] = []
for file in files:
full_path = project_path / file
if str(full_path).startswith(str(soong_file)):
try:
section_files[soong_file].append(
full_path.relative_to(soong_file).as_posix()
)
except ValueError:
pass
return section_files[soong_file]
convert(pickle_dir, result_dir)
Iterates through the source_maps directory and convert every soong_parser objects to a NetworkX DiGraph
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pickle_dir |
Path |
Path towards the file where the pickle files are stored. |
required |
result_dir |
Path |
Path where the BGraph are stored |
required |
Source code in bgraph/builder/graph.py
def convert(pickle_dir: Path, result_dir: Path) -> None:
"""Iterates through the source_maps directory and convert every soong_parser objects
to a NetworkX DiGraph
:param pickle_dir: Path towards the file where the pickle files are stored.
:param result_dir: Path where the BGraph are stored
"""
to_convert: List[Path] = [
pickle_file
for pickle_file in pickle_dir.glob("*.pickle")
if not (result_dir / (pickle_file.with_suffix(".bgraph").name)).is_file()
]
partial_convert_single = functools.partial(convert_single, result_dir)
with multiprocessing.Pool() as pool:
res = pool.map_async(partial_convert_single, to_convert)
results = res.get()
count_success = 0
for branch_name, result in results:
if result is False:
logger.info("Fail to convert %s", branch_name)
else:
count_success += 1
logger.info("Converted %d/%d branches", count_success, len(results))
convert_section(graph, section_files, section_name, section, project_files)
Convert a section from the SoongParser into a node in the graph and sets its dependencies
Warning: This functions modifies in place the graph.
Note: Some refactoring should be done on the file path detection (drop fnmatch).
TODO(dm): Integrate other type of dependencies such as exclusion
Parameters:
Name | Type | Description | Default |
---|---|---|---|
graph |
DiGraph |
The UDG |
required |
section_files |
Dict[pathlib.Path, List[str]] |
A mapping for section files allowing an easy cache |
required |
section_name |
str |
Name of the section to convert |
required |
section |
Section |
Section data in iteself |
required |
project_files |
List[str] |
Files found in the source tree |
required |
Source code in bgraph/builder/graph.py
def convert_section(
graph: BGraph,
section_files: Dict[Path, List[str]],
section_name: str,
section: Section,
project_files: List[str],
) -> None:
"""Convert a section from the SoongParser into a node in the graph and sets its
dependencies
Warning: This functions modifies in place the graph.
Note: Some refactoring should be done on the file path detection (drop fnmatch).
TODO(dm):
Integrate other type of dependencies such as exclusion
:param graph: The UDG
:param section_files: A mapping for section files allowing an easy cache
:param section_name: Name of the section to convert
:param section: Section data in iteself
:param project_files: Files found in the source tree
"""
# Project Path
try:
project_path: Path = section[
bgraph.parsers.soong_parser.SoongParser.SECTION_PROJECT_PATH
]
except KeyError:
logger.error("Missing section_project_path in %s", section_name)
return
# Local Soong files
try:
soong_file_path: Path = section[
bgraph.parsers.soong_parser.SoongParser.SOONG_FILE
].parent
except (KeyError, AttributeError):
logger.error("Missing soong_file in %s", section_name)
return
for key, value in bgraph.utils.recurse(section): # type: ignore
edge_type: Optional[Literal["dep", "src"]] = None
if key in dependencies_keys:
edge_type = "dep"
elif key in srcs_keys:
edge_type = "src"
if edge_type is not None:
for dep in value:
if edge_type == "src":
# For dependency key representing directories, add a *
if "dirs" in key:
dep = f"{dep}*"
# Since we are using fnmatch and not a proper tool, we also
# must take care of those prefix and remove them...
# TODO(dm): Use removeprefix in Python3.9
for prefix in ["./", "."]:
if dep.startswith(prefix):
dep = dep[len(prefix) :]
break
# Resolve * in dependencies files : the pattern must be
# modified to accomodate Python fnmatch module
# FIX: https://android.googlesource.com/platform/build/soong/+/refs/heads/master#file-lists
for dependency_file in fnmatch.filter(
compute_file_list(
section_files, soong_file_path, project_path, project_files
),
dep.replace("**/", "*"),
):
graph.add_edge(
str(soong_file_path / dependency_file),
section_name,
type=edge_type,
)
else:
graph.add_edge(dep, section_name, type=edge_type)
convert_single(result_dir, pickle_file)
Convert a pickle file representing a soong parser to a graph and store it in result dir.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
result_dir |
Path |
Where to store the result |
required |
pickle_file |
Path |
Which file to convert |
required |
Returns:
Type | Description |
---|---|
Tuple[str, bool] |
A tuple (branch_name, boolean for sucess) for later statistics. |
Source code in bgraph/builder/graph.py
def convert_single(result_dir: Path, pickle_file: Path) -> Tuple[str, bool]:
"""Convert a pickle file representing a soong parser to a graph and store it in
result dir.
:param result_dir: Where to store the result
:param pickle_file: Which file to convert
:return: A tuple (branch_name, boolean for sucess) for later statistics.
"""
branch_name: str = pickle_file.stem
bgraph_file = result_dir / (pickle_file.with_suffix(".bgraph").name)
try:
soong_parser = pickle.load(open(pickle_file, "rb"))
except pickle.PickleError:
return branch_name, False
graph = build_source_map(soong_parser)
try:
with open(bgraph_file, "wb") as file:
pickle.dump(graph, file)
except pickle.PickleError:
return branch_name, False
return branch_name, True