14 files changed, 384 insertions, 25 deletions
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index fa38d01..6f72b51 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -250,29 +250,38 @@ void InstrInfoEmitter::emitOperandNameMappings(
   // Map of operand names to their ID.
   MapVector<StringRef, unsigned> OperandNameToID;
 
-  /// The keys of this map is a map which have OpName ID values as their keys
-  /// and instruction operand indices as their values. The values of this map
-  /// are lists of instruction names. This map helps to unique entries among
+  /// A key in this map is a vector mapping OpName ID values to instruction
+  /// operand indices or -1 (but without any trailing -1 values which will be
+  /// added later). The corresponding value in this map is the index of that row
+  /// in the emitted OperandMap table. This map helps to unique entries among
   /// instructions that have identical OpName -> Operand index mapping.
-  std::map<std::map<unsigned, unsigned>, std::vector<StringRef>> OperandMap;
+  MapVector<SmallVector<int>, unsigned> OperandMap;
 
   // Max operand index seen.
   unsigned MaxOperandNo = 0;
 
   // Fixed/Predefined instructions do not have UseNamedOperandTable enabled, so
-  // we can just skip them.
+  // add a dummy map entry for them.
+  OperandMap.try_emplace({}, 0);
+  unsigned FirstTargetVal = TargetInstructions.front()->EnumVal;
+  SmallVector<unsigned> InstructionIndex(FirstTargetVal, 0);
   for (const CodeGenInstruction *Inst : TargetInstructions) {
-    if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
+    if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable")) {
+      InstructionIndex.push_back(0);
       continue;
-    std::map<unsigned, unsigned> OpList;
+    }
+    SmallVector<int> OpList;
     for (const auto &Info : Inst->Operands) {
       unsigned ID =
           OperandNameToID.try_emplace(Info.Name, OperandNameToID.size())
               .first->second;
+      OpList.resize(std::max((unsigned)OpList.size(), ID + 1), -1);
       OpList[ID] = Info.MIOperandNo;
       MaxOperandNo = std::max(MaxOperandNo, Info.MIOperandNo);
     }
-    OperandMap[OpList].push_back(Inst->TheDef->getName());
+    auto [It, Inserted] =
+        OperandMap.try_emplace(std::move(OpList), OperandMap.size());
+    InstructionIndex.push_back(It->second);
   }
 
   const size_t NumOperandNames = OperandNameToID.size();
@@ -302,28 +311,22 @@ void InstrInfoEmitter::emitOperandNameMappings(
     StringRef Type = MaxOperandNo <= INT8_MAX ? "int8_t" : "int16_t";
     OS << "  static constexpr " << Type << " OperandMap[][" << NumOperandNames
        << "] = {\n";
-    for (const auto &Entry : OperandMap) {
-      const std::map<unsigned, unsigned> &OpList = Entry.first;
-
+    for (const auto &[OpList, _] : OperandMap) {
       // Emit a row of the OperandMap table.
       OS << "    {";
-      for (unsigned ID = 0; ID < NumOperandNames; ++ID) {
-        auto Iter = OpList.find(ID);
-        OS << (Iter != OpList.end() ? (int)Iter->second : -1) << ", ";
-      }
+      for (unsigned ID = 0; ID < NumOperandNames; ++ID)
+        OS << (ID < OpList.size() ? OpList[ID] : -1) << ", ";
       OS << "},\n";
     }
     OS << "  };\n";
 
-    OS << "  switch(Opcode) {\n";
-    for (const auto &[TableIndex, Entry] : enumerate(OperandMap)) {
-      for (StringRef Name : Entry.second)
-        OS << "  case " << Namespace << "::" << Name << ":\n";
-      OS << "    return OperandMap[" << TableIndex
-         << "][static_cast<unsigned>(Name)];\n";
-    }
-    OS << "  default: return -1;\n";
-    OS << "  }\n";
+    Type = OperandMap.size() <= UINT8_MAX + 1 ? "uint8_t" : "uint16_t";
+    OS << "  static constexpr " << Type << " InstructionIndex[] = {";
+    for (auto [TableIndex, Entry] : enumerate(InstructionIndex))
+      OS << (TableIndex % 16 == 0 ? "\n    " : " ") << Entry << ',';
+    OS << "\n  };\n";
+
+    OS << "  return OperandMap[InstructionIndex[Opcode]][(unsigned)Name];\n";
   } else {
     // There are no operands, so no need to emit anything
     OS << "  return -1;\n";
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
index 218e36e..e3182b0 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
@@ -46,6 +46,7 @@ static_library("bugprone") {
     "IncorrectRoundingsCheck.cpp",
     "InfiniteLoopCheck.cpp",
     "IntegerDivisionCheck.cpp",
+    "InvalidEnumDefaultInitializationCheck.cpp",
     "LambdaFunctionNameCheck.cpp",
     "MacroParenthesesCheck.cpp",
     "MacroRepeatedSideEffectsCheck.cpp",
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
index 566195e..4e63aa8 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/SymbolFile/DWARF/BUILD.gn
@@ -69,6 +69,7 @@ static_library("DWARF") {
     "SymbolFileDWARF.cpp",
     "SymbolFileDWARFDebugMap.cpp",
     "SymbolFileDWARFDwo.cpp",
+    "SymbolFileWasm.cpp",
     "UniqueDWARFASTType.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 3d08c3f..d394923 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Analysis") {
     "//llvm/include/llvm/Config:config",
     "//llvm/lib/BinaryFormat",
     "//llvm/lib/IR",
+    "//llvm/lib/Frontend/HLSL",
     "//llvm/lib/ProfileData",
     "//llvm/lib/Support",
     "//llvm/lib/TargetParser",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
index 4c1c613..fce564e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/HLSL/BUILD.gn
@@ -6,6 +6,7 @@ static_library("HLSL") {
   ]
   sources = [
     "CBuffer.cpp",
+    "HLSLBinding.cpp",
     "HLSLResource.cpp",
     "HLSLRootSignature.cpp",
     "RootSignatureMetadata.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
index 1c839b1..33d4246 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/Offloading/BUILD.gn
@@ -8,6 +8,7 @@ static_library("Offloading") {
   ]
   sources = [
     "OffloadWrapper.cpp",
+    "PropertySet.cpp",
     "Utility.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
index c29277c..12f7d65 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
@@ -13,6 +13,7 @@ unittest("LLVMFrontendTests") {
     "//llvm/lib/Testing/Support",
   ]
   sources = [
+    "HLSLBindingTest.cpp",
     "HLSLRootSignatureDumpTest.cpp",
     "HLSLRootSignatureRangesTest.cpp",
     "OpenACCTest.cpp",
@@ -23,5 +24,6 @@ unittest("LLVMFrontendTests") {
     "OpenMPDirectiveNameTest.cpp",
     "OpenMPIRBuilderTest.cpp",
     "OpenMPParsingTest.cpp",
+    "PropertySetRegistryTest.cpp",
   ]
 }
diff --git a/llvm/utils/lit/lit/Test.py b/llvm/utils/lit/lit/Test.py
index 1bd5ba8..7290977 100644
--- a/llvm/utils/lit/lit/Test.py
+++ b/llvm/utils/lit/lit/Test.py
@@ -247,6 +247,9 @@ class Test:
         # and will be honored when the test result is supplied.
         self.xfails = []
 
+        # Exclude this test if it's xfail.
+        self.exclude_xfail = False
+
         # If true, ignore all items in self.xfails.
         self.xfail_not = False
 
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index 73db67a..e7cd707 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -2175,6 +2175,8 @@ def parseIntegratedTestScript(test, additional_parsers=[], require_script=True):
     assert parsed["DEFINE:"] == script
     assert parsed["REDEFINE:"] == script
     test.xfails += parsed["XFAIL:"] or []
+    if test.exclude_xfail and test.isExpectedToFail():
+        return lit.Test.Result(Test.EXCLUDED, "excluding XFAIL tests")
     test.requires += parsed["REQUIRES:"] or []
     test.unsupported += parsed["UNSUPPORTED:"] or []
     if parsed["ALLOW_RETRIES:"]:
diff --git a/llvm/utils/lit/lit/cl_arguments.py b/llvm/utils/lit/lit/cl_arguments.py
index 3292554..e889515 100644
--- a/llvm/utils/lit/lit/cl_arguments.py
+++ b/llvm/utils/lit/lit/cl_arguments.py
@@ -304,6 +304,16 @@ def parse_args():
         default=os.environ.get("LIT_XFAIL_NOT", ""),
     )
     selection_group.add_argument(
+        "--exclude-xfail",
+        help="exclude XFAIL tests (unless they are in the --xfail-not list). "
+        "Note: This option is implemented in "
+        "lit.TestRunner.parseIntegratedTestScript and so will have no effect on "
+        "test formats that do not call that and do not implement the option "
+        "separately.",
+        default=False,
+        action="store_true",
+    )
+    selection_group.add_argument(
         "--num-shards",
         dest="numShards",
         metavar="M",
diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index 0939838..9650a0e 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -240,6 +240,8 @@ def mark_xfail(selected_tests, opts):
             t.xfails += "*"
         if test_file in opts.xfail_not or test_full_name in opts.xfail_not:
             t.xfail_not = True
+        if opts.exclude_xfail:
+            t.exclude_xfail = True
 
 
 def mark_excluded(discovered_tests, selected_tests):
diff --git a/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt
new file mode 100644
index 0000000..6fdecd6
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/xfail-cl/true-xfail-conditionally.txt
@@ -0,0 +1,2 @@
+# XFAIL: this-does-not-exist
+# RUN: true
+\ No newline at end of file
diff --git a/llvm/utils/lit/tests/xfail-cl.py b/llvm/utils/lit/tests/xfail-cl.py
index ef1bb04..f1e0e33 100644
--- a/llvm/utils/lit/tests/xfail-cl.py
+++ b/llvm/utils/lit/tests/xfail-cl.py
@@ -5,6 +5,18 @@
 # RUN:   %{inputs}/xfail-cl \
 # RUN: | FileCheck --check-prefix=CHECK-FILTER %s
 
+# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \
+# RUN:   --exclude-xfail \
+# RUN:   %{inputs}/xfail-cl \
+# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-NOOVERRIDE %s
+
+# RUN: %{lit} --xfail 'false.txt;false2.txt;top-level-suite :: b :: test.txt' \
+# RUN:   --xfail-not 'true-xfail.txt' \
+# RUN:   --exclude-xfail \
+# RUN:   %{inputs}/xfail-cl \
+# RUN: | FileCheck --check-prefixes=CHECK-EXCLUDED,CHECK-EXCLUDED-OVERRIDE %s
+
+
 # RUN: env LIT_XFAIL='false.txt;false2.txt;top-level-suite :: b :: test.txt' \
 # RUN:   LIT_XFAIL_NOT='true-xfail.txt;top-level-suite :: a :: test-xfail.txt' \
 # RUN: %{lit} %{inputs}/xfail-cl \
@@ -23,7 +35,7 @@
 
 # END.
 
-# CHECK-FILTER: Testing: 10 tests, {{[0-9]*}} workers
+# CHECK-FILTER: Testing: 11 tests, {{[0-9]*}} workers
 # CHECK-FILTER-DAG: {{^}}PASS: top-level-suite :: a :: test.txt
 # CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: b :: test.txt
 # CHECK-FILTER-DAG: {{^}}XFAIL: top-level-suite :: a :: false.txt
@@ -37,3 +49,17 @@
 
 # CHECK-OVERRIDE: Testing: 1 tests, {{[0-9]*}} workers
 # CHECK-OVERRIDE: {{^}}PASS: top-level-suite :: true-xfail.txt
+
+# CHECK-EXCLUDED: Testing: 11 tests, {{[0-9]*}} workers
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: a :: test-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: a :: test.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: b :: test.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false.txt
+# CHECK-EXCLUDED-DAG: {{^}}EXCLUDED: top-level-suite :: false2.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true-xfail-conditionally.txt
+# CHECK-EXCLUDED-NOOVERRIDE-DAG: {{^}}EXCLUDED: top-level-suite :: true-xfail.txt
+# CHECK-EXCLUDED-OVERRIDE-DAG: {{^}}PASS: top-level-suite :: true-xfail.txt
+# CHECK-EXCLUDED-DAG: {{^}}PASS: top-level-suite :: true.txt
diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
new file mode 100644
index 0000000..80ac4c6
--- /dev/null
+++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
@@ -0,0 +1,304 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""IR2Vec Triplet Generator
+
+Generates IR2Vec triplets by applying random optimization levels to LLVM IR files
+and extracting triplets using llvm-ir2vec. Automatically generates preprocessed
+files: entity2id.txt, relation2id.txt, and train2id.txt.
+
+Usage:
+    python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir>
+"""
+
+import argparse
+import logging
+import os
+import random
+import subprocess
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import List, Set, Tuple
+
+# Configuration
+OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"]
+DEFAULT_MAX_WORKERS = 100
+
+logger = logging.getLogger(__name__)
+
+
+# TODO: Change this to a dataclass with slots
+# when Python 3.10+ is the minimum version
+# https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass
+class TripletResult:
+    """Result from processing a single LLVM IR file"""
+
+    __slots__ = ["triplets", "max_relation"]
+
+    def __init__(self, triplets: Set[str], max_relation: int):
+        self.triplets = triplets
+        self.max_relation = max_relation
+
+
+class IR2VecTripletGenerator:
+    """Main class for generating IR2Vec triplets"""
+
+    def __init__(
+        self,
+        llvm_build_dir: Path,
+        num_optimizations: int,
+        output_dir: Path,
+        max_workers: int = DEFAULT_MAX_WORKERS,
+    ):
+        self.llvm_build_dir = llvm_build_dir
+        self.num_optimizations = num_optimizations
+        self.output_dir = output_dir
+        self.max_workers = max_workers
+
+        # Tool paths
+        self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt")
+        self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec")
+
+        self._validate_setup()
+
+        # Create output directory if it doesn't exist
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+    def _validate_setup(self):
+        """Validate that all required tools and paths exist"""
+        if not self.llvm_build_dir.exists():
+            raise FileNotFoundError(
+                f"LLVM build directory not found: {self.llvm_build_dir}"
+            )
+
+        if not os.path.isfile(self.opt_binary) or not os.access(
+            self.opt_binary, os.X_OK
+        ):
+            raise FileNotFoundError(
+                f"opt binary not found or not executable: {self.opt_binary}"
+            )
+
+        if not os.path.isfile(self.ir2vec_binary) or not os.access(
+            self.ir2vec_binary, os.X_OK
+        ):
+            raise FileNotFoundError(
+                f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}"
+            )
+
+        if not (1 <= self.num_optimizations <= len(OPT_LEVELS)):
+            raise ValueError(
+                f"Number of optimizations must be between 1-{len(OPT_LEVELS)}"
+            )
+
+    def _select_optimization_levels(self) -> List[str]:
+        """Select unique random optimization levels"""
+        return random.sample(OPT_LEVELS, self.num_optimizations)
+
+    def _process_single_file(self, input_file: Path) -> TripletResult:
+        """Process a single LLVM IR file with multiple optimization levels"""
+        all_triplets = set()
+        max_relation = 1
+        opt_levels = self._select_optimization_levels()
+
+        for opt_level in opt_levels:
+            triplets, file_max_relation = self._run_pipeline(input_file, opt_level)
+            if triplets:
+                all_triplets.update(triplets)
+                max_relation = max(max_relation, file_max_relation)
+                logger.debug(
+                    f"Generated {len(triplets)} triplets for {input_file} with {opt_level}"
+                )
+
+        return TripletResult(all_triplets, max_relation)
+
+    def _run_pipeline(self, input_file: Path, opt_level: str) -> Tuple[Set[str], int]:
+        """Run opt | llvm-ir2vec pipeline using subprocess pipes."""
+        try:
+            # Run opt first
+            opt_proc = subprocess.Popen(
+                [self.opt_binary, f"-{opt_level}", str(input_file), "-o", "-"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+
+            # Run llvm-ir2vec with opt's output as input
+            ir2vec_proc = subprocess.Popen(
+                [self.ir2vec_binary, "triplets", "-", "-o", "-"],
+                stdin=opt_proc.stdout,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+            )
+
+            opt_proc.stdout.close()
+            stdout, _ = ir2vec_proc.communicate()
+            opt_proc.wait()
+
+            # Check if either process failed
+            if opt_proc.returncode != 0 or ir2vec_proc.returncode != 0:
+                return set(), 1
+
+            return self._parse_triplet_output(stdout)
+        except (subprocess.SubprocessError, OSError):
+            return set(), 1
+
+    def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]:
+        """Parse triplet output and extract max relation"""
+        if not output.strip():
+            return set(), 1
+
+        lines = output.strip().split("\n")
+        max_relation = 1
+
+        # Extract max relation from metadata line
+        if lines and lines[0].startswith("MAX_RELATION="):
+            max_relation = int(lines[0].split("=")[1])
+            lines = lines[1:]
+
+        # Remove duplicate triplets by converting to a set
+        return set(lines), max_relation
+
+    def generate_triplets(self, file_list: Path) -> None:
+        """Main method to generate triplets from a list of LLVM IR files"""
+        input_files = self._read_file_list(file_list)
+        logger.info(
+            f"Processing {len(input_files)} files with {self.num_optimizations} "
+            f"optimization levels using {self.max_workers} workers"
+        )
+
+        all_triplets = set()
+        global_max_relation = 1
+
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            future_to_file = {
+                executor.submit(self._process_single_file, file): file
+                for file in input_files
+            }
+
+            for future in as_completed(future_to_file):
+                try:
+                    result = future.result()
+                    all_triplets.update(result.triplets)
+                    global_max_relation = max(global_max_relation, result.max_relation)
+                except (subprocess.SubprocessError, OSError, ValueError) as e:
+                    file_path = future_to_file[future]
+                    logger.error(f"Error processing {file_path}: {e}")
+
+        self._generate_output_files(all_triplets, global_max_relation)
+        logger.info("Processing completed successfully")
+
+    def _read_file_list(self, file_list: Path) -> List[Path]:
+        """Read and validate the list of input files"""
+        input_files = []
+        with open(file_list, "r") as f:
+            for line_num, line in enumerate(f, 1):
+                if line := line.strip():
+                    file_path = Path(line)
+                    if file_path.exists():
+                        input_files.append(file_path)
+                    else:
+                        logger.warning(f"File not found (line {line_num}): {file_path}")
+
+        if not input_files:
+            raise ValueError("No valid input files found")
+        return input_files
+
+    def _generate_output_files(self, all_triplets: Set[str], max_relation: int) -> None:
+        """Generate the final output files"""
+        logger.info(f"Generating output files with {len(all_triplets)} unique triplets")
+
+        # Write all output files -- train2id.txt, entity2id.txt, relation2id.txt
+        train2id_file = os.path.join(self.output_dir, "train2id.txt")
+        entity2id_file = os.path.join(self.output_dir, "entity2id.txt")
+        relation2id_file = os.path.join(self.output_dir, "relation2id.txt")
+
+        with open(train2id_file, "w") as f:
+            f.write(f"{len(all_triplets)}\n")
+            f.writelines(f"{triplet}\n" for triplet in all_triplets)
+
+        self._generate_entity2id(entity2id_file)
+        self._generate_relation2id(relation2id_file, max_relation)
+
+    def _generate_entity2id(self, output_file: Path) -> None:
+        """Generate entity2id.txt using llvm-ir2vec"""
+        subprocess.run(
+            [str(self.ir2vec_binary), "entities", "-o", str(output_file)],
+            check=True,
+            capture_output=True,
+        )
+
+    def _generate_relation2id(self, output_file: Path, max_relation: int) -> None:
+        """Generate relation2id.txt from max relation"""
+        max_relation = max(max_relation, 1)  # At least Type and Next relations
+        num_relations = max_relation + 1
+
+        with open(output_file, "w") as f:
+            f.write(f"{num_relations}\n")
+            f.write("Type\t0\n")
+            f.write("Next\t1\n")
+            f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations))
+
+
+def main():
+    """Main entry point"""
+    parser = argparse.ArgumentParser(
+        description="Generate IR2Vec triplets from LLVM IR files",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+
+    parser.add_argument(
+        "llvm_build_dir", type=Path, help="Path to LLVM build directory"
+    )
+    parser.add_argument(
+        "num_optimizations",
+        type=int,
+        help="Number of optimization levels to apply (1-6)",
+    )
+    parser.add_argument(
+        "ll_file_list",
+        type=Path,
+        help="File containing list of LLVM IR files to process",
+    )
+    parser.add_argument(
+        "output_dir", type=Path, help="Output directory for generated files"
+    )
+    parser.add_argument(
+        "-j",
+        "--max-workers",
+        type=int,
+        default=DEFAULT_MAX_WORKERS,
+        help=f"Maximum number of parallel workers (default: {DEFAULT_MAX_WORKERS})",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="Enable debug logging"
+    )
+    parser.add_argument(
+        "-q", "--quiet", action="store_true", help="Suppress all output except errors"
+    )
+
+    args = parser.parse_args()
+
+    # Configure logging
+    level = (
+        logging.ERROR
+        if args.quiet
+        else (logging.DEBUG if args.verbose else logging.INFO)
+    )
+    logging.basicConfig(
+        level=level,
+        format="[%(asctime)s] %(levelname)s: %(message)s",
+        datefmt="%H:%M:%S",
+    )
+
+    generator = IR2VecTripletGenerator(
+        args.llvm_build_dir,
+        args.num_optimizations,
+        args.output_dir,
+        args.max_workers,
+    )
+    generator.generate_triplets(args.ll_file_list)
+
+
+if __name__ == "__main__":
+    main()