aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2024-06-27 14:28:02 -0700
committerGitHub <noreply@github.com>2024-06-27 14:28:02 -0700
commit4134b33c6a362cb462b335177d6d9e8235f04309 (patch)
treee1e16294212d78641859852e673aa5c49f88a0b8
parent6b55ec1198ce9356340372fd8233b907d8d4cea2 (diff)
downloadllvm-4134b33c6a362cb462b335177d6d9e8235f04309.zip
llvm-4134b33c6a362cb462b335177d6d9e8235f04309.tar.gz
llvm-4134b33c6a362cb462b335177d6d9e8235f04309.tar.bz2
[MLGO] Add ability to extract IR from bazel using aquery (#96964)
This patch adds in support for extracting IR from binaries built with bazel through querying the linker command line using bazel aquery.
-rw-r--r--llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py9
-rw-r--r--llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py23
-rw-r--r--llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py37
3 files changed, 67 insertions, 2 deletions
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
index 9441543..a7d52da 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir.py
@@ -45,8 +45,8 @@ def parse_args_and_run():
parser.add_argument(
"--input_type",
type=str,
- help="Input file type - JSON, LLD params, or directory.",
- choices=["json", "params", "directory"],
+ help="Input file type - JSON, LLD params, directory, or bazel aquery.",
+ choices=["json", "params", "directory", "bazel_aquery"],
default="json",
nargs="?",
)
@@ -149,6 +149,11 @@ def main(args):
"structured compilation database, use that instead"
)
objs = extract_ir_lib.load_from_directory(args.input, args.output_dir)
+ elif args.input_type == "bazel_aquery":
+ with open(args.input, encoding="utf-8") as aquery_json_handle:
+ objs = extract_ir_lib.load_bazel_aquery(
+ json.load(aquery_json_handle), args.obj_base_dir, args.output_dir
+ )
else:
logging.error("Unknown input type: %s", args.input_type)
diff --git a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py
index 8e9779c..f434e59 100644
--- a/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py
+++ b/llvm/utils/mlgo-utils/mlgo/corpus/extract_ir_lib.py
@@ -316,6 +316,29 @@ def load_for_lld_thinlto(
return [make_spec(path) for path in paths]
+def load_bazel_aquery(aquery_json, obj_base_dir: str, output_dir: str):
+ """Creates an object file array by looking at the JSON output of bazel aquery.
+
+ Args:
+ aquery_json: The JSON-formatted output of the bazel aquery command for
+ the target of interest. The bazel aquery JSON should be a JSON
+ serialized version of the analysis.ActionGraphContainer proto.
+ https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto
+ obj_base_dir: The base build directory that all object files will be
+ written out as arelative to.
+ output_dir: The output directory where extracted .bc and .cmd files should
+ be placed.
+ """
+ linker_params = []
+
+ for action_info in aquery_json["actions"]:
+ if action_info["mnemonic"] != "CppLink":
+ continue
+ linker_params = action_info["arguments"]
+
+ return load_from_lld_params(linker_params, obj_base_dir, output_dir)
+
+
def run_extraction(
objs: List[TrainingIRExtractor],
num_workers: int,
diff --git a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py
index 653c572..f056446 100644
--- a/llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py
+++ b/llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py
@@ -304,6 +304,43 @@ def test_lld_thinlto_extraction(outer, outdir):
# CHECK-LLD-THINLTO-EXTRACTION-PY: 3
+## Test that we can load a bazel query JSON as expected.
+
+# RUN: %python %s test_load_bazel_aquery | FileCheck %s --check-prefix CHECK-TEST-LOAD-BAZEL-AQUERY
+
+
+def test_load_bazel_aquery():
+ obj = extract_ir_lib.load_bazel_aquery(
+ {
+ "actions": [
+ {"mnemonic": "not-link", "arguments": []},
+ {
+ "mnemonic": "CppLink",
+ "arguments": ["clang", "-o", "output_binary", "test1.o", "test2.o"],
+ },
+ ]
+ },
+ "/some/path",
+ "/tmp/out",
+ )
+ print(obj[0].input_obj())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test1.o
+ print(obj[0].relative_output_path())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: test1.o
+ print(obj[0].cmd_file())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.cmd
+ print(obj[0].bc_file())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.bc
+ print(obj[1].input_obj())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test2.o
+ print(obj[1].relative_output_path())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: test2.o
+ print(obj[1].cmd_file())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.cmd
+ print(obj[1].bc_file())
+ # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.bc
+
+
## Test that filtering works correctly
# RUN: %python %s test_filtering | FileCheck %s --check-prefix CHECK-TEST-FILTERING