aboutsummaryrefslogtreecommitdiff
path: root/llvm/utils
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/utils')
-rw-r--r--llvm/utils/TableGen/InstrInfoEmitter.cpp51
-rw-r--r--llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn2
-rw-r--r--llvm/utils/lit/lit/Test.py3
-rw-r--r--llvm/utils/lit/lit/TestRunner.py2
-rw-r--r--llvm/utils/lit/lit/cl_arguments.py10
-rwxr-xr-xllvm/utils/lit/lit/main.py2
-rw-r--r--llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt2
-rw-r--r--llvm/utils/lit/tests/xfail-cl.py28
-rw-r--r--llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py304
14 files changed, 384 insertions, 25 deletions
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index fa38d01..6f72b51 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -250,29 +250,38 @@ void InstrInfoEmitter::emitOperandNameMappings(
// Map of operand names to their ID.
MapVector<StringRef, unsigned> OperandNameToID;
- /// The keys of this map is a map which have OpName ID values as their keys
- /// and instruction operand indices as their values. The values of this map
- /// are lists of instruction names. This map helps to unique entries among
+ /// A key in this map is a vector mapping OpName ID values to instruction
+ /// operand indices or -1 (but without any trailing -1 values which will be
+ /// added later). The corresponding value in this map is the index of that row
+ /// in the emitted OperandMap table. This map helps to unique entries among
/// instructions that have identical OpName -> Operand index mapping.
- std::map<std::map<unsigned, unsigned>, std::vector<StringRef>> OperandMap;
+ MapVector<SmallVector<int>, unsigned> OperandMap;
// Max operand index seen.
unsigned MaxOperandNo = 0;
// Fixed/Predefined instructions do not have UseNamedOperandTable enabled, so
- // we can just skip them.
+ // add a dummy map entry for them.
+ OperandMap.try_emplace({}, 0);
+ unsigned FirstTargetVal = TargetInstructions.front()->EnumVal;
+ SmallVector<unsigned> InstructionIndex(FirstTargetVal, 0);
for (const CodeGenInstruction *Inst : TargetInstructions) {
- if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
+ if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) {
+ InstructionIndex.push_back(0);
continue;
- std::map<unsigned, unsigned> OpList;
+ }
+ SmallVector<int> OpList;
for (const auto &Info : Inst->Operands) {
unsigned ID =
OperandNameToID.try_emplace(Info.Name, OperandNameToID.size())
.first->second;
+ OpList.resize(std::max((unsigned)OpList.size(), ID + 1), -1);
OpList[ID] = Info.MIOperandNo;
MaxOperandNo = std::max(MaxOperandNo, Info.MIOperandNo);
}
- OperandMap[OpList].push_back(Inst->TheDef->getName());
+ auto [It, Inserted] =
+ OperandMap.try_emplace(std::move(OpList), OperandMap.size());
+ InstructionIndex.push_back(It->second);
}
const size_t NumOperandNames = OperandNameToID.size();
@@ -302,28 +311,22 @@ void InstrInfoEmitter::emitOperandNameMappings(
StringRef Type = MaxOperandNo <= INT8_MAX ? "int8_t" : "int16_t";
OS << " static constexpr " << Type << " OperandMap[][" << NumOperandNames
<< "] = {\n";
- for (const auto &Entry : OperandMap) {
- const std::map<unsigned, unsigned> &OpList = Entry.first;
-
+ for (const auto &[OpList, _] : OperandMap) {
// Emit a row of the OperandMap table.
OS << " {";
- for (unsigned ID = 0; ID < NumOperandNames; ++ID) {
- auto Iter = OpList.find(ID);
- OS << (Iter != OpList.end() ? (int)Iter->second : -1) << ", ";
- }
+ for (unsigned ID = 0; ID < NumOperandNames; ++ID)
+ OS << (ID < OpList.size() ? OpList[ID] : -1) << ", ";
OS << "},\n";
}
OS << " };\n";
- OS << " switch(Opcode) {\n";
- for (const auto &[TableIndex, Entry] : enumerate(OperandMap)) {
- for (StringRef Name : Entry.second)
- OS << " case " << Namespace << "::" << Name << ":\n";
- OS << " return OperandMap[" << TableIndex
- << "][static_cast<unsigned>(Name)];\n";
- }
- OS << " default: return -1;\n";
- OS << " }\n";
+ Type = OperandMap.size() <= UINT8_MAX + 1 ? "uint8_t" : "uint16_t";
+ OS << " static constexpr " << Type << " InstructionIndex[] = {";
+ for (auto [TableIndex, Entry] : enumerate(InstructionIndex))
+ OS << (TableIndex % 16 == 0 ? "\n " : " ") << Entry << ',';
+ OS << "\n };\n";
+
+ OS << " return OperandMap[InstructionIndex[Opcode]][(unsigned)Name];\n";
} else {
// There are no operands, so no need to emit anything
OS << " return -1;\n";
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
index 218e36e..e3182b0 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
@@ -46,6 +46,7 @@ static_library("bugprone") {
"IncorrectRoundingsCheck.cpp",
"InfiniteLoopCheck.cpp",
"IntegerDivisionCheck.cpp",
+ "InvalidEnumDefaultInitializationCheck.cpp",
"LambdaFunctionNameCheck.cpp",
"MacroParenthesesCheck.cpp",
"MacroRepeatedSideEffectsCheck.cpp",
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
index 566195e..4e63aa8 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
@@ -69,6 +69,7 @@ static_library("DWARF") {
"SymbolFileDWARF.cpp",
"SymbolFileDWARFDebugMap.cpp",
"SymbolFileDWARFDwo.cpp",
+ "SymbolFileWasm.cpp",
"UniqueDWARFASTType.cpp",
]
}
diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 3d08c3f..d394923 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Analysis") {
"//llvm/include/llvm/Config:config",
"//llvm/lib/BinaryFormat",
"//llvm/lib/IR",
+ "//llvm/lib/Frontend/HLSL",
"//llvm/lib/ProfileData",
"//llvm/lib/Support",
"//llvm/lib/TargetParser",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
index 4c1c613..fce564e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
@@ -6,6 +6,7 @@ static_library("HLSL") {
]
sources = [
"CBuffer.cpp",
+ "HLSLBinding.cpp",
"HLSLResource.cpp",
"HLSLRootSignature.cpp",
"RootSignatureMetadata.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
index 1c839b1..33d4246 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Offloading") {
]
sources = [
"OffloadWrapper.cpp",
+ "PropertySet.cpp",
"Utility.cpp",
]
}
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
index c29277c..12f7d65 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
@@ -13,6 +13,7 @@ unittest("LLVMFrontendTests") {
"//llvm/lib/Testing/Support",
]
sources = [
+ "HLSLBindingTest.cpp",
"HLSLRootSignatureDumpTest.cpp",
"HLSLRootSignatureRangesTest.cpp",
"OpenACCTest.cpp",
@@ -23,5 +24,6 @@ unittest("LLVMFrontendTests") {
"OpenMPDirectiveNameTest.cpp",
"OpenMPIRBuilderTest.cpp",
"OpenMPParsingTest.cpp",
+ "PropertySetRegistryTest.cpp",
]
}
diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py
index 1bd5ba8..7290977 100644
--- a/llvm/utils/lit/lit/Test.py
+++ b/llvm/utils/lit/lit/Test.py
@@ -247,6 +247,9 @@ class Test:
# and will be honored when the test result is supplied.
self.xfails = []
+ # Exclude this test if it's xfail.
+ self.exclude_xfail = False
+
# If true, ignore all items in self.xfails.
self.xfail_not = False
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 73db67a..e7cd707 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -2175,6 +2175,8 @@ def parseIntegratedTestScript(test, additional_parsers=[], require_script=True):
assert parsed["DEFINE:"] == script
assert parsed["REDEFINE:"] == script
test.xfails += parsed["XFAIL:"] or []
+ if test.exclude_xfail and test.isExpectedToFail():
+ return lit.Test.Result(Test.EXCLUDED, "excluding XFAIL tests")
test.requires += parsed["REQUIRES:"] or []
test.unsupported += parsed["UNSUPPORTED:"] or []
if parsed["ALLOW_RETRIES:"]:
diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py
index 3292554..e889515 100644
--- a/llvm/utils/lit/lit/cl_arguments.py
+++ b/llvm/utils/lit/lit/cl_arguments.py
@@ -304,6 +304,16 @@ def parse_args():
default=os.environ.get("LIT_XFAIL_NOT", ""),
)
selection_group.add_argument(
+ "--exclude-xfail",
+ help="exclude XFAIL tests (unless they are in the --xfail-not list). "
+ "Note: This option is implemented in "
+ "lit.TestRunner.parseIntegratedTestScript and so will have no effect on "
+ "test formats that do not call that and do not implement the option "
+ "separately.",
+ default=False,
+ action="store_true",
+ )
+ selection_group.add_argument(
"--num-shards",
dest="numShards",
metavar="M",
diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index 0939838..9650a0e 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -240,6 +240,8 @@ def mark_xfail(selected_tests, opts):
t.xfails += "*"
if test_file in opts.xfail_not or test_full_name in opts.xfail_not:
t.xfail_not = True
+ if opts.exclude_xfail:
+ t.exclude_xfail = True
def mark_excluded(discovered_tests, selected_tests):
diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt
new file mode 100644
index 0000000..6fdecd6
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt
@@ -0,0 +1,2 @@
+# XFAIL: this-does-not-exist
+# RUN: true \ No newline at end of file
diff --git a/llvm/utils/lit/tests/xfail-cl.py b/llvm/utils/lit/tests/xfail-cl.py
index ef1bb04..f1e0e33 100644
--- a/llvm/utils/lit/tests/xfail-cl.py
+++ b/llvm/utils/lit/tests/xfail-cl.py
@@ -5,6 +5,18 @@
# RUN: %{inputs}/xfail-cl \
# RUN: | FileCheck --check-prefix=CHECK-FILTER %s
+# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \
+# RUN: --exclude-xfail \
+# RUN: %{inputs}/xfail-cl \
+# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-NOOVERRIDE %s
+
+# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \
+# RUN: --xfail-not 'true-xfail.txt' \
+# RUN: --exclude-xfail \
+# RUN: %{inputs}/xfail-cl \
+# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-OVERRIDE %s
+
+
# RUN: env LIT_XFAIL='false.txt;false2.txt;top-level-suite :: b :: test.txt' \
# RUN: LIT_XFAIL_NOT='true-xfail.txt;top-level-suite :: a :: test-xfail.txt' \
# RUN: %{lit} %{inputs}/xfail-cl \
@@ -23,7 +35,7 @@
# END.
-# CHECK-FILTER: Testing: 10 tests, {{[0-9]*}} workers
+# CHECK-FILTER: Testing: 11 tests, {{[0-9]*}} workers
# CHECK-FILTER-DAG: {{^}}PASS: top-level-suite :: a :: test.txt
# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: b :: test.txt
# CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: a :: false.txt
@@ -37,3 +49,17 @@
# CHECK-OVERRIDE: Testing: 1 tests, {{[0-9]*}} workers
# CHECK-OVERRIDE: {{^}}PASS: top-level-suite :: true-xfail.txt
+
+# CHECK-EXCLUDED: Testing: 11 tests, {{[0-9]*}} workers
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: test-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: a :: test.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false2.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true-xfail-conditionally.txt
+# CHECK-EXCLUDED-NOOVERRIDE-DAG: {{^}}EXCLUDED: top-level-suite :: true-xfail.txt
+# CHECK-EXCLUDED-OVERRIDE-DAG: {{^}}PASS: top-level-suite :: true-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true.txt
diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
new file mode 100644
index 0000000..80ac4c6
--- /dev/null
+++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
@@ -0,0 +1,304 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""IR2Vec Triplet Generator
+
+Generates IR2Vec triplets by applying random optimization levels to LLVM IR files
+and extracting triplets using llvm-ir2vec. Automatically generates preprocessed
+files: entity2id.txt, relation2id.txt, and train2id.txt.
+
+Usage:
+ python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir>
+"""
+
+import argparse
+import logging
+import os
+import random
+import subprocess
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import List, Set, Tuple
+
+# Configuration
+OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"]
+DEFAULT_MAX_WORKERS = 100
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Change this to a dataclass with slots
+# when Python 3.10+ is the minimum version
+# https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass
+class TripletResult:
+ """Result from processing a single LLVM IR file"""
+
+ __slots__ = ["triplets", "max_relation"]
+
+ def __init__(self, triplets: Set[str], max_relation: int):
+ self.triplets = triplets
+ self.max_relation = max_relation
+
+
+class IR2VecTripletGenerator:
+ """Main class for generating IR2Vec triplets"""
+
+ def __init__(
+ self,
+ llvm_build_dir: Path,
+ num_optimizations: int,
+ output_dir: Path,
+ max_workers: int = DEFAULT_MAX_WORKERS,
+ ):
+ self.llvm_build_dir = llvm_build_dir
+ self.num_optimizations = num_optimizations
+ self.output_dir = output_dir
+ self.max_workers = max_workers
+
+ # Tool paths
+ self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt")
+ self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec")
+
+ self._validate_setup()
+
+ # Create output directory if it doesn't exist
+ self.output_dir.mkdir(parents=True, exist_ok=True)
+
+ def _validate_setup(self):
+ """Validate that all required tools and paths exist"""
+ if not self.llvm_build_dir.exists():
+ raise FileNotFoundError(
+ f"LLVM build directory not found: {self.llvm_build_dir}"
+ )
+
+ if not os.path.isfile(self.opt_binary) or not os.access(
+ self.opt_binary, os.X_OK
+ ):
+ raise FileNotFoundError(
+ f"opt binary not found or not executable: {self.opt_binary}"
+ )
+
+ if not os.path.isfile(self.ir2vec_binary) or not os.access(
+ self.ir2vec_binary, os.X_OK
+ ):
+ raise FileNotFoundError(
+ f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}"
+ )
+
+ if not (1 <= self.num_optimizations <= len(OPT_LEVELS)):
+ raise ValueError(
+ f"Number of optimizations must be between 1-{len(OPT_LEVELS)}"
+ )
+
+ def _select_optimization_levels(self) -> List[str]:
+ """Select unique random optimization levels"""
+ return random.sample(OPT_LEVELS, self.num_optimizations)
+
+ def _process_single_file(self, input_file: Path) -> TripletResult:
+ """Process a single LLVM IR file with multiple optimization levels"""
+ all_triplets = set()
+ max_relation = 1
+ opt_levels = self._select_optimization_levels()
+
+ for opt_level in opt_levels:
+ triplets, file_max_relation = self._run_pipeline(input_file, opt_level)
+ if triplets:
+ all_triplets.update(triplets)
+ max_relation = max(max_relation, file_max_relation)
+ logger.debug(
+ f"Generated {len(triplets)} triplets for {input_file} with {opt_level}"
+ )
+
+ return TripletResult(all_triplets, max_relation)
+
+ def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int]:
+ """Run opt | llvm-ir2vec pipeline using subprocess pipes."""
+ try:
+ # Run opt first
+ opt_proc = subprocess.Popen(
+ [self.opt_binary, f"-{opt_level}", str(input_file), "-o", "-"],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ )
+
+ # Run llvm-ir2vec with opt's output as input
+ ir2vec_proc = subprocess.Popen(
+ [self.ir2vec_binary, "triplets", "-", "-o", "-"],
+ stdin=opt_proc.stdout,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ )
+
+ opt_proc.stdout.close()
+ stdout, _ = ir2vec_proc.communicate()
+ opt_proc.wait()
+
+ # Check if either process failed
+ if opt_proc.returncode != 0 or ir2vec_proc.returncode != 0:
+ return set(), 1
+
+ return self._parse_triplet_output(stdout)
+ except (subprocess.SubprocessError, OSError):
+ return set(), 1
+
+ def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]:
+ """Parse triplet output and extract max relation"""
+ if not output.strip():
+ return set(), 1
+
+ lines = output.strip().split("\n")
+ max_relation = 1
+
+ # Extract max relation from metadata line
+ if lines and lines[0].startswith("MAX_RELATION="):
+ max_relation = int(lines[0].split("=")[1])
+ lines = lines[1:]
+
+ # Remove duplicate triplets by converting to a set
+ return set(lines), max_relation
+
+ def generate_triplets(self, file_list: Path) -> None:
+ """Main method to generate triplets from a list of LLVM IR files"""
+ input_files = self._read_file_list(file_list)
+ logger.info(
+ f"Processing {len(input_files)} files with {self.num_optimizations} "
+ f"optimization levels using {self.max_workers} workers"
+ )
+
+ all_triplets = set()
+ global_max_relation = 1
+
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+ future_to_file = {
+ executor.submit(self._process_single_file, file): file
+ for file in input_files
+ }
+
+ for future in as_completed(future_to_file):
+ try:
+ result = future.result()
+ all_triplets.update(result.triplets)
+ global_max_relation = max(global_max_relation, result.max_relation)
+ except (subprocess.SubprocessError, OSError, ValueError) as e:
+ file_path = future_to_file[future]
+ logger.error(f"Error processing {file_path}: {e}")
+
+ self._generate_output_files(all_triplets, global_max_relation)
+ logger.info("Processing completed successfully")
+
+ def _read_file_list(self, file_list: Path) -> List[Path]:
+ """Read and validate the list of input files"""
+ input_files = []
+ with open(file_list, "r") as f:
+ for line_num, line in enumerate(f, 1):
+ if line := line.strip():
+ file_path = Path(line)
+ if file_path.exists():
+ input_files.append(file_path)
+ else:
+ logger.warning(f"File not found (line {line_num}): {file_path}")
+
+ if not input_files:
+ raise ValueError("No valid input files found")
+ return input_files
+
+ def _generate_output_files(self, all_triplets: Set[str], max_relation: int) -> None:
+ """Generate the final output files"""
+ logger.info(f"Generating output files with {len(all_triplets)} unique triplets")
+
+ # Write all output files -- train2id.txt, entity2id.txt, relation2id.txt
+ train2id_file = os.path.join(self.output_dir, "train2id.txt")
+ entity2id_file = os.path.join(self.output_dir, "entity2id.txt")
+ relation2id_file = os.path.join(self.output_dir, "relation2id.txt")
+
+ with open(train2id_file, "w") as f:
+ f.write(f"{len(all_triplets)}\n")
+ f.writelines(f"{triplet}\n" for triplet in all_triplets)
+
+ self._generate_entity2id(entity2id_file)
+ self._generate_relation2id(relation2id_file, max_relation)
+
+ def _generate_entity2id(self, output_file: Path) -> None:
+ """Generate entity2id.txt using llvm-ir2vec"""
+ subprocess.run(
+ [str(self.ir2vec_binary), "entities", "-o", str(output_file)],
+ check=True,
+ capture_output=True,
+ )
+
+ def _generate_relation2id(self, output_file: Path, max_relation: int) -> None:
+ """Generate relation2id.txt from max relation"""
+ max_relation = max(max_relation, 1) # At least Type and Next relations
+ num_relations = max_relation + 1
+
+ with open(output_file, "w") as f:
+ f.write(f"{num_relations}\n")
+ f.write("Type\t0\n")
+ f.write("Next\t1\n")
+ f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations))
+
+
+def main():
+ """Main entry point"""
+ parser = argparse.ArgumentParser(
+ description="Generate IR2Vec triplets from LLVM IR files",
+ formatter_class=argparse.RawDescriptionHelpFormatter,
+ )
+
+ parser.add_argument(
+ "llvm_build_dir", type=Path, help="Path to LLVM build directory"
+ )
+ parser.add_argument(
+ "num_optimizations",
+ type=int,
+ help="Number of optimization levels to apply (1-6)",
+ )
+ parser.add_argument(
+ "ll_file_list",
+ type=Path,
+ help="File containing list of LLVM IR files to process",
+ )
+ parser.add_argument(
+ "output_dir", type=Path, help="Output directory for generated files"
+ )
+ parser.add_argument(
+ "-j",
+ "--max-workers",
+ type=int,
+ default=DEFAULT_MAX_WORKERS,
+ help=f"Maximum number of parallel workers (default: {DEFAULT_MAX_WORKERS})",
+ )
+ parser.add_argument(
+ "-v", "--verbose", action="store_true", help="Enable debug logging"
+ )
+ parser.add_argument(
+ "-q", "--quiet", action="store_true", help="Suppress all output except errors"
+ )
+
+ args = parser.parse_args()
+
+ # Configure logging
+ level = (
+ logging.ERROR
+ if args.quiet
+ else (logging.DEBUG if args.verbose else logging.INFO)
+ )
+ logging.basicConfig(
+ level=level,
+ format="[%(asctime)s] %(levelname)s: %(message)s",
+ datefmt="%H:%M:%S",
+ )
+
+ generator = IR2VecTripletGenerator(
+ args.llvm_build_dir,
+ args.num_optimizations,
+ args.output_dir,
+ args.max_workers,
+ )
+ generator.generate_triplets(args.ll_file_list)
+
+
+if __name__ == "__main__":
+ main()