diff options
Diffstat (limited to 'llvm/utils')
-rw-r--r-- | llvm/utils/TableGen/InstrInfoEmitter.cpp | 51 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn | 1 | ||||
-rw-r--r-- | llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn | 2 | ||||
-rw-r--r-- | llvm/utils/lit/lit/Test.py | 3 | ||||
-rw-r--r-- | llvm/utils/lit/lit/TestRunner.py | 2 | ||||
-rw-r--r-- | llvm/utils/lit/lit/cl_arguments.py | 10 | ||||
-rwxr-xr-x | llvm/utils/lit/lit/main.py | 2 | ||||
-rw-r--r-- | llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt | 2 | ||||
-rw-r--r-- | llvm/utils/lit/tests/xfail-cl.py | 28 | ||||
-rw-r--r-- | llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py | 304 |
14 files changed, 384 insertions, 25 deletions
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index fa38d01..6f72b51 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -250,29 +250,38 @@ void InstrInfoEmitter::emitOperandNameMappings( // Map of operand names to their ID. MapVector<StringRef, unsigned> OperandNameToID; - /// The keys of this map is a map which have OpName ID values as their keys - /// and instruction operand indices as their values. The values of this map - /// are lists of instruction names. This map helps to unique entries among + /// A key in this map is a vector mapping OpName ID values to instruction + /// operand indices or -1 (but without any trailing -1 values which will be + /// added later). The corresponding value in this map is the index of that row + /// in the emitted OperandMap table. This map helps to unique entries among /// instructions that have identical OpName -> Operand index mapping. - std::map<std::map<unsigned, unsigned>, std::vector<StringRef>> OperandMap; + MapVector<SmallVector<int>, unsigned> OperandMap; // Max operand index seen. unsigned MaxOperandNo = 0; // Fixed/Predefined instructions do not have UseNamedOperandTable enabled, so - // we can just skip them. + // add a dummy map entry for them. + OperandMap.try_emplace({}, 0); + unsigned FirstTargetVal = TargetInstructions.front()->EnumVal; + SmallVector<unsigned> InstructionIndex(FirstTargetVal, 0); for (const CodeGenInstruction *Inst : TargetInstructions) { - if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) + if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) { + InstructionIndex.push_back(0); continue; - std::map<unsigned, unsigned> OpList; + } + SmallVector<int> OpList; for (const auto &Info : Inst->Operands) { unsigned ID = OperandNameToID.try_emplace(Info.Name, OperandNameToID.size()) .first->second; + OpList.resize(std::max((unsigned)OpList.size(), ID + 1), -1); OpList[ID] = Info.MIOperandNo; MaxOperandNo = std::max(MaxOperandNo, Info.MIOperandNo); } - OperandMap[OpList].push_back(Inst->TheDef->getName()); + auto [It, Inserted] = + OperandMap.try_emplace(std::move(OpList), OperandMap.size()); + InstructionIndex.push_back(It->second); } const size_t NumOperandNames = OperandNameToID.size(); @@ -302,28 +311,22 @@ void InstrInfoEmitter::emitOperandNameMappings( StringRef Type = MaxOperandNo <= INT8_MAX ? "int8_t" : "int16_t"; OS << " static constexpr " << Type << " OperandMap[][" << NumOperandNames << "] = {\n"; - for (const auto &Entry : OperandMap) { - const std::map<unsigned, unsigned> &OpList = Entry.first; - + for (const auto &[OpList, _] : OperandMap) { // Emit a row of the OperandMap table. OS << " {"; - for (unsigned ID = 0; ID < NumOperandNames; ++ID) { - auto Iter = OpList.find(ID); - OS << (Iter != OpList.end() ? (int)Iter->second : -1) << ", "; - } + for (unsigned ID = 0; ID < NumOperandNames; ++ID) + OS << (ID < OpList.size() ? OpList[ID] : -1) << ", "; OS << "},\n"; } OS << " };\n"; - OS << " switch(Opcode) {\n"; - for (const auto &[TableIndex, Entry] : enumerate(OperandMap)) { - for (StringRef Name : Entry.second) - OS << " case " << Namespace << "::" << Name << ":\n"; - OS << " return OperandMap[" << TableIndex - << "][static_cast<unsigned>(Name)];\n"; - } - OS << " default: return -1;\n"; - OS << " }\n"; + Type = OperandMap.size() <= UINT8_MAX + 1 ? "uint8_t" : "uint16_t"; + OS << " static constexpr " << Type << " InstructionIndex[] = {"; + for (auto [TableIndex, Entry] : enumerate(InstructionIndex)) + OS << (TableIndex % 16 == 0 ? "\n " : " ") << Entry << ','; + OS << "\n };\n"; + + OS << " return OperandMap[InstructionIndex[Opcode]][(unsigned)Name];\n"; } else { // There are no operands, so no need to emit anything OS << " return -1;\n"; diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn index 218e36e..e3182b0 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn @@ -46,6 +46,7 @@ static_library("bugprone") { "IncorrectRoundingsCheck.cpp", "InfiniteLoopCheck.cpp", "IntegerDivisionCheck.cpp", + "InvalidEnumDefaultInitializationCheck.cpp", "LambdaFunctionNameCheck.cpp", "MacroParenthesesCheck.cpp", "MacroRepeatedSideEffectsCheck.cpp", diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn index 566195e..4e63aa8 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn @@ -69,6 +69,7 @@ static_library("DWARF") { "SymbolFileDWARF.cpp", "SymbolFileDWARFDebugMap.cpp", "SymbolFileDWARFDwo.cpp", + "SymbolFileWasm.cpp", "UniqueDWARFASTType.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn index 3d08c3f..d394923 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn @@ -8,6 +8,7 @@ static_library("Analysis") { "//llvm/include/llvm/Config:config", "//llvm/lib/BinaryFormat", "//llvm/lib/IR", + "//llvm/lib/Frontend/HLSL", "//llvm/lib/ProfileData", "//llvm/lib/Support", "//llvm/lib/TargetParser", diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn index 4c1c613..fce564e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn @@ -6,6 +6,7 @@ static_library("HLSL") { ] sources = [ "CBuffer.cpp", + "HLSLBinding.cpp", "HLSLResource.cpp", "HLSLRootSignature.cpp", "RootSignatureMetadata.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn index 1c839b1..33d4246 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn @@ -8,6 +8,7 @@ static_library("Offloading") { ] sources = [ "OffloadWrapper.cpp", + "PropertySet.cpp", "Utility.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn index c29277c..12f7d65 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn @@ -13,6 +13,7 @@ unittest("LLVMFrontendTests") { "//llvm/lib/Testing/Support", ] sources = [ + "HLSLBindingTest.cpp", "HLSLRootSignatureDumpTest.cpp", "HLSLRootSignatureRangesTest.cpp", "OpenACCTest.cpp", @@ -23,5 +24,6 @@ unittest("LLVMFrontendTests") { "OpenMPDirectiveNameTest.cpp", "OpenMPIRBuilderTest.cpp", "OpenMPParsingTest.cpp", + "PropertySetRegistryTest.cpp", ] } diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py index 1bd5ba8..7290977 100644 --- a/llvm/utils/lit/lit/Test.py +++ b/llvm/utils/lit/lit/Test.py @@ -247,6 +247,9 @@ class Test: # and will be honored when the test result is supplied. self.xfails = [] + # Exclude this test if it's xfail. + self.exclude_xfail = False + # If true, ignore all items in self.xfails. self.xfail_not = False diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 73db67a..e7cd707 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -2175,6 +2175,8 @@ def parseIntegratedTestScript(test, additional_parsers=[], require_script=True): assert parsed["DEFINE:"] == script assert parsed["REDEFINE:"] == script test.xfails += parsed["XFAIL:"] or [] + if test.exclude_xfail and test.isExpectedToFail(): + return lit.Test.Result(Test.EXCLUDED, "excluding XFAIL tests") test.requires += parsed["REQUIRES:"] or [] test.unsupported += parsed["UNSUPPORTED:"] or [] if parsed["ALLOW_RETRIES:"]: diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py index 3292554..e889515 100644 --- a/llvm/utils/lit/lit/cl_arguments.py +++ b/llvm/utils/lit/lit/cl_arguments.py @@ -304,6 +304,16 @@ def parse_args(): default=os.environ.get("LIT_XFAIL_NOT", ""), ) selection_group.add_argument( + "--exclude-xfail", + help="exclude XFAIL tests (unless they are in the --xfail-not list). " + "Note: This option is implemented in " + "lit.TestRunner.parseIntegratedTestScript and so will have no effect on " + "test formats that do not call that and do not implement the option " + "separately.", + default=False, + action="store_true", + ) + selection_group.add_argument( "--num-shards", dest="numShards", metavar="M", diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py index 0939838..9650a0e 100755 --- a/llvm/utils/lit/lit/main.py +++ b/llvm/utils/lit/lit/main.py @@ -240,6 +240,8 @@ def mark_xfail(selected_tests, opts): t.xfails += "*" if test_file in opts.xfail_not or test_full_name in opts.xfail_not: t.xfail_not = True + if opts.exclude_xfail: + t.exclude_xfail = True def mark_excluded(discovered_tests, selected_tests): diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt new file mode 100644 index 0000000..6fdecd6 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt @@ -0,0 +1,2 @@ +# XFAIL: this-does-not-exist +# RUN: true
\ No newline at end of file diff --git a/llvm/utils/lit/tests/xfail-cl.py b/llvm/utils/lit/tests/xfail-cl.py index ef1bb04..f1e0e33 100644 --- a/llvm/utils/lit/tests/xfail-cl.py +++ b/llvm/utils/lit/tests/xfail-cl.py @@ -5,6 +5,18 @@ # RUN: %{inputs}/xfail-cl \ # RUN: | FileCheck --check-prefix=CHECK-FILTER %s +# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \ +# RUN: --exclude-xfail \ +# RUN: %{inputs}/xfail-cl \ +# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-NOOVERRIDE %s + +# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \ +# RUN: --xfail-not 'true-xfail.txt' \ +# RUN: --exclude-xfail \ +# RUN: %{inputs}/xfail-cl \ +# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-OVERRIDE %s + + # RUN: env LIT_XFAIL='false.txt;false2.txt;top-level-suite :: b :: test.txt' \ # RUN: LIT_XFAIL_NOT='true-xfail.txt;top-level-suite :: a :: test-xfail.txt' \ # RUN: %{lit} %{inputs}/xfail-cl \ @@ -23,7 +35,7 @@ # END. -# CHECK-FILTER: Testing: 10 tests, {{[0-9]*}} workers +# CHECK-FILTER: Testing: 11 tests, {{[0-9]*}} workers # CHECK-FILTER-DAG: {{^}}PASS: top-level-suite :: a :: test.txt # CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: b :: test.txt # CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: a :: false.txt @@ -37,3 +49,17 @@ # CHECK-OVERRIDE: Testing: 1 tests, {{[0-9]*}} workers # CHECK-OVERRIDE: {{^}}PASS: top-level-suite :: true-xfail.txt + +# CHECK-EXCLUDED: Testing: 11 tests, {{[0-9]*}} workers +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: false.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: test-xfail.txt +# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: a :: test.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: false.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test-xfail.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false.txt +# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false2.txt +# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true-xfail-conditionally.txt +# CHECK-EXCLUDED-NOOVERRIDE-DAG: {{^}}EXCLUDED: top-level-suite :: true-xfail.txt +# CHECK-EXCLUDED-OVERRIDE-DAG: {{^}}PASS: top-level-suite :: true-xfail.txt +# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true.txt diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py new file mode 100644 index 0000000..80ac4c6 --- /dev/null +++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py @@ -0,0 +1,304 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""IR2Vec Triplet Generator + +Generates IR2Vec triplets by applying random optimization levels to LLVM IR files +and extracting triplets using llvm-ir2vec. Automatically generates preprocessed +files: entity2id.txt, relation2id.txt, and train2id.txt. + +Usage: + python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir> +""" + +import argparse +import logging +import os +import random +import subprocess +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import List, Set, Tuple + +# Configuration +OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"] +DEFAULT_MAX_WORKERS = 100 + +logger = logging.getLogger(__name__) + + +# TODO: Change this to a dataclass with slots +# when Python 3.10+ is the minimum version +# https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass +class TripletResult: + """Result from processing a single LLVM IR file""" + + __slots__ = ["triplets", "max_relation"] + + def __init__(self, triplets: Set[str], max_relation: int): + self.triplets = triplets + self.max_relation = max_relation + + +class IR2VecTripletGenerator: + """Main class for generating IR2Vec triplets""" + + def __init__( + self, + llvm_build_dir: Path, + num_optimizations: int, + output_dir: Path, + max_workers: int = DEFAULT_MAX_WORKERS, + ): + self.llvm_build_dir = llvm_build_dir + self.num_optimizations = num_optimizations + self.output_dir = output_dir + self.max_workers = max_workers + + # Tool paths + self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt") + self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec") + + self._validate_setup() + + # Create output directory if it doesn't exist + self.output_dir.mkdir(parents=True, exist_ok=True) + + def _validate_setup(self): + """Validate that all required tools and paths exist""" + if not self.llvm_build_dir.exists(): + raise FileNotFoundError( + f"LLVM build directory not found: {self.llvm_build_dir}" + ) + + if not os.path.isfile(self.opt_binary) or not os.access( + self.opt_binary, os.X_OK + ): + raise FileNotFoundError( + f"opt binary not found or not executable: {self.opt_binary}" + ) + + if not os.path.isfile(self.ir2vec_binary) or not os.access( + self.ir2vec_binary, os.X_OK + ): + raise FileNotFoundError( + f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}" + ) + + if not (1 <= self.num_optimizations <= len(OPT_LEVELS)): + raise ValueError( + f"Number of optimizations must be between 1-{len(OPT_LEVELS)}" + ) + + def _select_optimization_levels(self) -> List[str]: + """Select unique random optimization levels""" + return random.sample(OPT_LEVELS, self.num_optimizations) + + def _process_single_file(self, input_file: Path) -> TripletResult: + """Process a single LLVM IR file with multiple optimization levels""" + all_triplets = set() + max_relation = 1 + opt_levels = self._select_optimization_levels() + + for opt_level in opt_levels: + triplets, file_max_relation = self._run_pipeline(input_file, opt_level) + if triplets: + all_triplets.update(triplets) + max_relation = max(max_relation, file_max_relation) + logger.debug( + f"Generated {len(triplets)} triplets for {input_file} with {opt_level}" + ) + + return TripletResult(all_triplets, max_relation) + + def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int]: + """Run opt | llvm-ir2vec pipeline using subprocess pipes.""" + try: + # Run opt first + opt_proc = subprocess.Popen( + [self.opt_binary, f"-{opt_level}", str(input_file), "-o", "-"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + # Run llvm-ir2vec with opt's output as input + ir2vec_proc = subprocess.Popen( + [self.ir2vec_binary, "triplets", "-", "-o", "-"], + stdin=opt_proc.stdout, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + opt_proc.stdout.close() + stdout, _ = ir2vec_proc.communicate() + opt_proc.wait() + + # Check if either process failed + if opt_proc.returncode != 0 or ir2vec_proc.returncode != 0: + return set(), 1 + + return self._parse_triplet_output(stdout) + except (subprocess.SubprocessError, OSError): + return set(), 1 + + def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]: + """Parse triplet output and extract max relation""" + if not output.strip(): + return set(), 1 + + lines = output.strip().split("\n") + max_relation = 1 + + # Extract max relation from metadata line + if lines and lines[0].startswith("MAX_RELATION="): + max_relation = int(lines[0].split("=")[1]) + lines = lines[1:] + + # Remove duplicate triplets by converting to a set + return set(lines), max_relation + + def generate_triplets(self, file_list: Path) -> None: + """Main method to generate triplets from a list of LLVM IR files""" + input_files = self._read_file_list(file_list) + logger.info( + f"Processing {len(input_files)} files with {self.num_optimizations} " + f"optimization levels using {self.max_workers} workers" + ) + + all_triplets = set() + global_max_relation = 1 + + with ThreadPoolExecutor(max_workers=self.max_workers) as executor: + future_to_file = { + executor.submit(self._process_single_file, file): file + for file in input_files + } + + for future in as_completed(future_to_file): + try: + result = future.result() + all_triplets.update(result.triplets) + global_max_relation = max(global_max_relation, result.max_relation) + except (subprocess.SubprocessError, OSError, ValueError) as e: + file_path = future_to_file[future] + logger.error(f"Error processing {file_path}: {e}") + + self._generate_output_files(all_triplets, global_max_relation) + logger.info("Processing completed successfully") + + def _read_file_list(self, file_list: Path) -> List[Path]: + """Read and validate the list of input files""" + input_files = [] + with open(file_list, "r") as f: + for line_num, line in enumerate(f, 1): + if line := line.strip(): + file_path = Path(line) + if file_path.exists(): + input_files.append(file_path) + else: + logger.warning(f"File not found (line {line_num}): {file_path}") + + if not input_files: + raise ValueError("No valid input files found") + return input_files + + def _generate_output_files(self, all_triplets: Set[str], max_relation: int) -> None: + """Generate the final output files""" + logger.info(f"Generating output files with {len(all_triplets)} unique triplets") + + # Write all output files -- train2id.txt, entity2id.txt, relation2id.txt + train2id_file = os.path.join(self.output_dir, "train2id.txt") + entity2id_file = os.path.join(self.output_dir, "entity2id.txt") + relation2id_file = os.path.join(self.output_dir, "relation2id.txt") + + with open(train2id_file, "w") as f: + f.write(f"{len(all_triplets)}\n") + f.writelines(f"{triplet}\n" for triplet in all_triplets) + + self._generate_entity2id(entity2id_file) + self._generate_relation2id(relation2id_file, max_relation) + + def _generate_entity2id(self, output_file: Path) -> None: + """Generate entity2id.txt using llvm-ir2vec""" + subprocess.run( + [str(self.ir2vec_binary), "entities", "-o", str(output_file)], + check=True, + capture_output=True, + ) + + def _generate_relation2id(self, output_file: Path, max_relation: int) -> None: + """Generate relation2id.txt from max relation""" + max_relation = max(max_relation, 1) # At least Type and Next relations + num_relations = max_relation + 1 + + with open(output_file, "w") as f: + f.write(f"{num_relations}\n") + f.write("Type\t0\n") + f.write("Next\t1\n") + f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations)) + + +def main(): + """Main entry point""" + parser = argparse.ArgumentParser( + description="Generate IR2Vec triplets from LLVM IR files", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + parser.add_argument( + "llvm_build_dir", type=Path, help="Path to LLVM build directory" + ) + parser.add_argument( + "num_optimizations", + type=int, + help="Number of optimization levels to apply (1-6)", + ) + parser.add_argument( + "ll_file_list", + type=Path, + help="File containing list of LLVM IR files to process", + ) + parser.add_argument( + "output_dir", type=Path, help="Output directory for generated files" + ) + parser.add_argument( + "-j", + "--max-workers", + type=int, + default=DEFAULT_MAX_WORKERS, + help=f"Maximum number of parallel workers (default: {DEFAULT_MAX_WORKERS})", + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="Enable debug logging" + ) + parser.add_argument( + "-q", "--quiet", action="store_true", help="Suppress all output except errors" + ) + + args = parser.parse_args() + + # Configure logging + level = ( + logging.ERROR + if args.quiet + else (logging.DEBUG if args.verbose else logging.INFO) + ) + logging.basicConfig( + level=level, + format="[%(asctime)s] %(levelname)s: %(message)s", + datefmt="%H:%M:%S", + ) + + generator = IR2VecTripletGenerator( + args.llvm_build_dir, + args.num_optimizations, + args.output_dir, + args.max_workers, + ) + generator.generate_triplets(args.ll_file_list) + + +if __name__ == "__main__": + main() |