25 files changed, 224 insertions, 72 deletions
diff --git a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
index be7537c..cd86646 100644
--- a/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
+++ b/llvm/utils/TableGen/Basic/CodeGenIntrinsics.cpp
@@ -278,15 +278,21 @@ CodeGenIntrinsic::CodeGenIntrinsic(const Record *R,
   TargetPrefix = R->getValueAsString("TargetPrefix");
   Name = R->getValueAsString("LLVMName").str();
 
+  std::string DefaultName = "llvm." + EnumName.str();
+  llvm::replace(DefaultName, '_', '.');
+
   if (Name == "") {
     // If an explicit name isn't specified, derive one from the DefName.
-    Name = "llvm." + EnumName.str();
-    llvm::replace(Name, '_', '.');
+    Name = std::move(DefaultName);
   } else {
     // Verify it starts with "llvm.".
     if (!StringRef(Name).starts_with("llvm."))
       PrintFatalError(DefLoc, "Intrinsic '" + DefName +
                                   "'s name does not start with 'llvm.'!");
+
+    if (Name == DefaultName)
+      PrintNote(DefLoc, "Explicitly specified name matches default name, "
+                        "consider dropping it");
   }
 
   // If TargetPrefix is specified, make sure that Name starts with
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index d1b14fb..0b90f91 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -285,7 +285,7 @@ emitGetNamedOperandIdx(raw_ostream &OS,
 
 static void
 emitGetOperandIdxName(raw_ostream &OS,
-                      MapVector<StringRef, unsigned> OperandNameToID,
+                      const MapVector<StringRef, unsigned> &OperandNameToID,
                       const MapVector<SmallVector<int>, unsigned> &OperandMap,
                       unsigned MaxNumOperands, unsigned NumOperandNames) {
   OS << "LLVM_READONLY OpName getOperandIdxName(uint16_t Opcode, int16_t Idx) "
diff --git a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
index 9981d10..4da907c 100644
--- a/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/AST/BUILD.gn
@@ -121,6 +121,7 @@ static_library("AST") {
     "ExternalASTMerger.cpp",
     "ExternalASTSource.cpp",
     "FormatString.cpp",
+    "InferAlloc.cpp",
     "InheritViz.cpp",
     "ItaniumCXXABI.cpp",
     "ItaniumMangle.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 29c6178..3c523ae 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -104,6 +104,9 @@ copy("Headers") {
     "__clang_hip_runtime_wrapper.h",
     "__clang_hip_stdlib.h",
     "__clang_spirv_builtins.h",
+    "__float_float.h",
+    "__float_header_macro.h",
+    "__float_infinity_nan.h",
     "__stdarg___gnuc_va_list.h",
     "__stdarg___va_copy.h",
     "__stdarg_header_macro.h",
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index e747006..278c29c 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -911,7 +911,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/cmath",
       "__cxx03/codecvt",
       "__cxx03/complex",
-      "__cxx03/complex.h",
       "__cxx03/condition_variable",
       "__cxx03/csetjmp",
       "__cxx03/csignal",
@@ -924,25 +923,20 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/cstring",
       "__cxx03/ctgmath",
       "__cxx03/ctime",
-      "__cxx03/ctype.h",
       "__cxx03/cuchar",
       "__cxx03/cwchar",
       "__cxx03/cwctype",
       "__cxx03/deque",
-      "__cxx03/errno.h",
       "__cxx03/exception",
       "__cxx03/experimental/__config",
       "__cxx03/experimental/utility",
       "__cxx03/ext/__hash",
       "__cxx03/ext/hash_map",
       "__cxx03/ext/hash_set",
-      "__cxx03/fenv.h",
-      "__cxx03/float.h",
       "__cxx03/forward_list",
       "__cxx03/fstream",
       "__cxx03/functional",
       "__cxx03/future",
-      "__cxx03/inttypes.h",
       "__cxx03/iomanip",
       "__cxx03/ios",
       "__cxx03/iosfwd",
@@ -969,11 +963,8 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/sstream",
       "__cxx03/stack",
       "__cxx03/stdatomic.h",
-      "__cxx03/stdbool.h",
-      "__cxx03/stddef.h",
       "__cxx03/stdexcept",
       "__cxx03/stdint.h",
-      "__cxx03/stdio.h",
       "__cxx03/stdlib.h",
       "__cxx03/streambuf",
       "__cxx03/string",
@@ -981,7 +972,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/string_view",
       "__cxx03/strstream",
       "__cxx03/system_error",
-      "__cxx03/tgmath.h",
       "__cxx03/thread",
       "__cxx03/type_traits",
       "__cxx03/typeindex",
@@ -994,7 +984,6 @@ if (current_toolchain == default_toolchain) {
       "__cxx03/vector",
       "__cxx03/version",
       "__cxx03/wchar.h",
-      "__cxx03/wctype.h",
       "__debug_utils/randomize_range.h",
       "__debug_utils/sanitizers.h",
       "__debug_utils/strict_weak_ordering_check.h",
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
index 5efc153..51911d7 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/ExpressionParser/Clang/BUILD.gn
@@ -47,7 +47,6 @@ static_library("Clang") {
     "ClangASTImporter.cpp",
     "ClangASTMetadata.cpp",
     "ClangASTSource.cpp",
-    "ClangDeclVendor.cpp",
     "ClangExpressionDeclMap.cpp",
     "ClangExpressionHelper.cpp",
     "ClangExpressionParser.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 022cd87..1ca9292 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -301,7 +301,6 @@ write_cmake_config("llvm-config") {
     "LLVM_BUILD_SHARED_LIBS=",
     "LLVM_ENABLE_LLVM_C_EXPORT_ANNOTATIONS=",
     "LLVM_ENABLE_TELEMETRY=",
-    "LLVM_ENABLE_ONDISK_CAS=",
     "LLVM_DEFAULT_TARGET_TRIPLE=$llvm_target_triple",
     "LLVM_ENABLE_DEBUGLOC_TRACKING_COVERAGE=",
     "LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN=",
@@ -367,6 +366,12 @@ write_cmake_config("llvm-config") {
     values += [ "LLVM_ENABLE_DIA_SDK=" ]
   }
 
+  if (llvm_enable_ondisk_cas) {
+    values += [ "LLVM_ENABLE_ONDISK_CAS=1" ]
+  } else {
+    values += [ "LLVM_ENABLE_ONDISK_CAS=" ]
+  }
+
   if (llvm_enable_threads) {
     values += [ "LLVM_ENABLE_THREADS=1" ]
   } else {
diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/config.gni b/llvm/utils/gn/secondary/llvm/include/llvm/Config/config.gni
index 8c2ab8a..715b03e 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/config.gni
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/config.gni
@@ -1,4 +1,7 @@
 declare_args() {
   # Iterate unordered llvm containers in reverse.
   llvm_enable_reverse_iteration = false
+
+  # Iterate unordered llvm containers in reverse.
+  llvm_enable_ondisk_cas = false
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/AsmParser/BUILD.gn
index 9ff794f..2ad867d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/AsmParser/BUILD.gn
@@ -6,6 +6,7 @@ static_library("AsmParser") {
     "//llvm/lib/Support",
   ]
   sources = [
+    "AsmParserContext.cpp",
     "LLLexer.cpp",
     "LLParser.cpp",
     "Parser.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn
index b4edd8d..5590b27 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CAS/BUILD.gn
@@ -10,6 +10,8 @@ static_library("CAS") {
     "ObjectStore.cpp",
     "OnDiskCommon.cpp",
     "OnDiskDataAllocator.cpp",
+    "OnDiskGraphDB.cpp",
+    "OnDiskKeyValueDB.cpp",
     "OnDiskTrieRawHashMap.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
index 39dacf7..4446702 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/BUILD.gn
@@ -122,6 +122,7 @@ static_library("CodeGen") {
     "MLRegAllocPriorityAdvisor.cpp",
     "MachineBasicBlock.cpp",
     "MachineBlockFrequencyInfo.cpp",
+    "MachineBlockHashInfo.cpp",
     "MachineBlockPlacement.cpp",
     "MachineBranchProbabilityInfo.cpp",
     "MachineCFGPrinter.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
index c4ce990..937e81b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/TargetProcess/BUILD.gn
@@ -12,6 +12,8 @@ static_library("TargetProcess") {
     "JITLoaderGDB.cpp",
     "JITLoaderPerf.cpp",
     "JITLoaderVTune.cpp",
+    "LibraryResolver.cpp",
+    "LibraryScanner.cpp",
     "OrcRTBootstrap.cpp",
     "RegisterEHFrames.cpp",
     "SimpleExecutorDylibManager.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index 38ba466..df9ddf9 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -45,6 +45,7 @@ static_library("Support") {
     "ARMAttributeParser.cpp",
     "ARMBuildAttributes.cpp",
     "ARMWinEH.cpp",
+    "AllocToken.cpp",
     "Allocator.cpp",
     "AutoConvert.cpp",
     "BalancedPartitioning.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
index c89e335..e47ca1e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn
@@ -137,6 +137,7 @@ static_library("LLVMAMDGPUCodeGen") {
     "AMDGPUAsmPrinter.cpp",
     "AMDGPUAtomicOptimizer.cpp",
     "AMDGPUAttributor.cpp",
+    "AMDGPUBarrierLatency.cpp",
     "AMDGPUCallLowering.cpp",
     "AMDGPUCodeGenPrepare.cpp",
     "AMDGPUCombinerHelper.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn
index c055001..065d33d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn
@@ -79,6 +79,7 @@ static_library("LLVMHexagonCodeGen") {
     "HexagonOptAddrMode.cpp",
     "HexagonOptimizeSZextends.cpp",
     "HexagonPeephole.cpp",
+    "HexagonQFPOptimizer.cpp",
     "HexagonRDFOpt.cpp",
     "HexagonRegisterInfo.cpp",
     "HexagonSelectionDAGInfo.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ir2vec/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ir2vec/BUILD.gn
index 07a7951..4d75201 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-ir2vec/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ir2vec/BUILD.gn
@@ -1,9 +1,16 @@
 executable("llvm-ir2vec") {
   deps = [
     "//llvm/lib/Analysis",
+    "//llvm/lib/CodeGen",
+    "//llvm/lib/CodeGen/MIRParser",
     "//llvm/lib/IR",
     "//llvm/lib/IRReader",
     "//llvm/lib/Support",
+    "//llvm/lib/Target:AllTargetsAsmParsers",
+    "//llvm/lib/Target:AllTargetsCodeGens",
+    "//llvm/lib/Target:AllTargetsDescs",
+    "//llvm/lib/Target:AllTargetsInfos",
+    "//llvm/lib/TargetParser",
   ]
   sources = [ "llvm-ir2vec.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn
index 52a64be..2d9eb68 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/CAS/BUILD.gn
@@ -1,3 +1,4 @@
+import("//llvm/include/llvm/Config/config.gni")
 import("//third-party/unittest/unittest.gni")
 
 unittest("CASTests") {
@@ -10,8 +11,15 @@ unittest("CASTests") {
     "ActionCacheTest.cpp",
     "CASTestConfig.cpp",
     "ObjectStoreTest.cpp",
-    "OnDiskDataAllocatorTest.cpp",
-    "OnDiskTrieRawHashMapTest.cpp",
-    "ProgramTest.cpp",
   ]
+
+  if (llvm_enable_ondisk_cas) {
+    sources += [
+      "OnDiskDataAllocatorTest.cpp",
+      "OnDiskGraphDBTest.cpp",
+      "OnDiskKeyValueDBTest.cpp",
+      "OnDiskTrieRawHashMapTest.cpp",
+      "ProgramTest.cpp",
+    ]
+  }
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
index 376f689..111e4c9 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/Orc/BUILD.gn
@@ -24,6 +24,7 @@ unittest("OrcJITTests") {
     "JITLinkRedirectionManagerTest.cpp",
     "JITTargetMachineBuilderTest.cpp",
     "LazyCallThroughAndReexportsTest.cpp",
+    "LibraryResolverTest.cpp",
     "LookupAndRecordAddrsTest.cpp",
     "MachOPlatformTest.cpp",
     "MapperJITLinkMemoryManagerTest.cpp",
@@ -42,6 +43,7 @@ unittest("OrcJITTests") {
     "SymbolStringPoolTest.cpp",
     "TaskDispatchTest.cpp",
     "ThreadSafeModuleTest.cpp",
+    "WaitingOnGraphTest.cpp",
     "WrapperFunctionUtilsTest.cpp",
   ]
 
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index a7e2705..f883145 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -92,12 +92,12 @@ class ShellEnvironment(object):
     we maintain a dir stack for pushd/popd.
     """
 
-    def __init__(self, cwd, env, umask=-1, ulimit={}):
+    def __init__(self, cwd, env, umask=-1, ulimit=None):
         self.cwd = cwd
         self.env = dict(env)
         self.umask = umask
         self.dirStack = []
-        self.ulimit = ulimit
+        self.ulimit = ulimit if ulimit else {}
 
     def change_dir(self, newdir):
         if os.path.isabs(newdir):
diff --git a/llvm/utils/lit/lit/run.py b/llvm/utils/lit/lit/run.py
index 62070e8..55de914 100644
--- a/llvm/utils/lit/lit/run.py
+++ b/llvm/utils/lit/lit/run.py
@@ -137,6 +137,10 @@ class Run(object):
                     "Raised process limit from %d to %d" % (soft_limit, desired_limit)
                 )
         except Exception as ex:
-            # Warn, unless this is Windows or z/OS, in which case this is expected.
-            if os.name != "nt" and platform.system() != "OS/390":
+            # Warn, unless this is Windows, z/OS, or Cygwin in which case this is expected.
+            if (
+                os.name != "nt"
+                and platform.system() != "OS/390"
+                and platform.sys.platform != "cygwin"
+            ):
                 self.lit_config.warning("Failed to raise process limit: %s" % ex)
diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt
new file mode 100644
index 0000000..011d6db
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_reset.txt
@@ -0,0 +1,3 @@
+# RUN: %{python} %S/print_limits.py
+# Fail the test so that we can assert on the output.
+# RUN: not echo return
diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py
index e843277..09cd475 100644
--- a/llvm/utils/lit/tests/shtest-ulimit.py
+++ b/llvm/utils/lit/tests/shtest-ulimit.py
@@ -3,11 +3,15 @@
 # ulimit does not work on non-POSIX platforms.
 # Solaris for some reason does not respect ulimit -n, so mark it unsupported
 # as well.
-# UNSUPPORTED: system-windows, system-solaris
+# UNSUPPORTED: system-windows, system-cygwin, system-solaris
 
-# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit | FileCheck %s
+# RUN: %{python} %S/Inputs/shtest-ulimit/print_limits.py | grep RLIMIT_NOFILE \
+# RUN:   | sed -n -e 's/.*=//p' | tr -d '\n' > %t.nofile_limit
 
-# CHECK: -- Testing: 2 tests{{.*}}
+# RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \
+# RUN:   | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s
+
+# CHECK: -- Testing: 3 tests{{.*}}
 
 # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}})
 # CHECK: ulimit -n
@@ -16,3 +20,6 @@
 # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}})
 # CHECK: ulimit -n 50
 # CHECK: RLIMIT_NOFILE=50
+
+# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}})
+# CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]]
diff --git a/llvm/utils/lldbDataFormatters.py b/llvm/utils/lldbDataFormatters.py
index 5e553ca..a3e4ae1 100644
--- a/llvm/utils/lldbDataFormatters.py
+++ b/llvm/utils/lldbDataFormatters.py
@@ -197,6 +197,11 @@ def StringRefSummaryProvider(valobj, internal_dict):
         return '""'
 
     data = data_pointer.deref
+    # StringRef may be uninitialized with length exceeding available memory,
+    # potentially causing bad_alloc exceptions. Limit the length to max string summary setting.
+    limit_obj = valobj.target.debugger.GetSetting("target.max-string-summary-length")
+    if limit_obj:
+        length = min(length, limit_obj.GetUnsignedIntegerValue())
     # Get a char[N] type, from the underlying char type.
     array_type = data.type.GetArrayType(length)
     # Cast the char* string data to a char[N] array.
diff --git a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
index 80ac4c6..dba9e2c 100644
--- a/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
+++ b/llvm/utils/mlgo-utils/IR2Vec/generateTriplets.py
@@ -1,14 +1,19 @@
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-"""IR2Vec Triplet Generator
+"""IR2Vec/MIR2Vec Triplet Generator
 
-Generates IR2Vec triplets by applying random optimization levels to LLVM IR files
-and extracting triplets using llvm-ir2vec. Automatically generates preprocessed
-files: entity2id.txt, relation2id.txt, and train2id.txt.
+Generates IR2Vec or MIR2Vec triplets by applying random optimization levels to
+LLVM IR files (or processing MIR files) and extracting triplets using llvm-ir2vec.
+Automatically generates preprocessed files (entity2id.txt, relation2id.txt, and
+train2id.txt) necessary for training IR2Vec or MIR2Vec vocabularies.
 
 Usage:
-    python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir>
+    For LLVM IR:
+        python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir>
+
+    For Machine IR:
+        python generateTriplets.py --mode=mir <llvm_build_dir> <mir_file_list> <output_dir>
 """
 
 import argparse
@@ -41,7 +46,7 @@ class TripletResult:
 
 
 class IR2VecTripletGenerator:
-    """Main class for generating IR2Vec triplets"""
+    """Main class for generating IR2Vec or MIR2Vec triplets"""
 
     def __init__(
         self,
@@ -49,11 +54,13 @@ class IR2VecTripletGenerator:
         num_optimizations: int,
         output_dir: Path,
         max_workers: int = DEFAULT_MAX_WORKERS,
+        mode: str = "llvm",
     ):
         self.llvm_build_dir = llvm_build_dir
         self.num_optimizations = num_optimizations
         self.output_dir = output_dir
         self.max_workers = max_workers
+        self.mode = mode  # "llvm" or "mir"
 
         # Tool paths
         self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt")
@@ -85,7 +92,11 @@ class IR2VecTripletGenerator:
                 f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}"
             )
 
-        if not (1 <= self.num_optimizations <= len(OPT_LEVELS)):
+        if self.mode not in ["llvm", "mir"]:
+            raise ValueError(f"Mode must be 'llvm' or 'mir', got: {self.mode}")
+
+        # For LLVM IR mode, validate optimization count
+        if self.mode == "llvm" and not (1 <= self.num_optimizations <= len(OPT_LEVELS)):
             raise ValueError(
                 f"Number of optimizations must be between 1-{len(OPT_LEVELS)}"
             )
@@ -95,19 +106,28 @@ class IR2VecTripletGenerator:
         return random.sample(OPT_LEVELS, self.num_optimizations)
 
     def _process_single_file(self, input_file: Path) -> TripletResult:
-        """Process a single LLVM IR file with multiple optimization levels"""
+        """Process a single LLVM IR or MIR file"""
         all_triplets = set()
         max_relation = 1
-        opt_levels = self._select_optimization_levels()
 
-        for opt_level in opt_levels:
-            triplets, file_max_relation = self._run_pipeline(input_file, opt_level)
+        if self.mode == "mir":
+            # For MIR files, process directly without optimization
+            triplets, file_max_relation = self._run_mir_pipeline(input_file)
             if triplets:
                 all_triplets.update(triplets)
                 max_relation = max(max_relation, file_max_relation)
-                logger.debug(
-                    f"Generated {len(triplets)} triplets for {input_file} with {opt_level}"
-                )
+                logger.debug(f"Generated {len(triplets)} triplets for {input_file}")
+        else:
+            # For LLVM IR files, apply multiple optimization levels
+            opt_levels = self._select_optimization_levels()
+            for opt_level in opt_levels:
+                triplets, file_max_relation = self._run_pipeline(input_file, opt_level)
+                if triplets:
+                    all_triplets.update(triplets)
+                    max_relation = max(max_relation, file_max_relation)
+                    logger.debug(
+                        f"Generated {len(triplets)} triplets for {input_file} with {opt_level}"
+                    )
 
         return TripletResult(all_triplets, max_relation)
 
@@ -124,7 +144,7 @@ class IR2VecTripletGenerator:
 
             # Run llvm-ir2vec with opt's output as input
             ir2vec_proc = subprocess.Popen(
-                [self.ir2vec_binary, "triplets", "-", "-o", "-"],
+                [self.ir2vec_binary, "triplets", "--mode=llvm", "-", "-o", "-"],
                 stdin=opt_proc.stdout,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
@@ -143,6 +163,32 @@ class IR2VecTripletGenerator:
         except (subprocess.SubprocessError, OSError):
             return set(), 1
 
+    def _run_mir_pipeline(self, input_file: Path) -> Tuple[Set[str], int]:
+        """Run llvm-ir2vec pipeline for MIR files."""
+        try:
+            # Run llvm-ir2vec directly on MIR file
+            result = subprocess.run(
+                [
+                    self.ir2vec_binary,
+                    "triplets",
+                    "--mode=mir",
+                    str(input_file),
+                    "-o",
+                    "-",
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True,
+                check=False,
+            )
+
+            if result.returncode != 0:
+                return set(), 1
+
+            return self._parse_triplet_output(result.stdout)
+        except (subprocess.SubprocessError, OSError):
+            return set(), 1
+
     def _parse_triplet_output(self, output: str) -> Tuple[Set[str], int]:
         """Parse triplet output and extract max relation"""
         if not output.strip():
@@ -160,12 +206,21 @@ class IR2VecTripletGenerator:
         return set(lines), max_relation
 
     def generate_triplets(self, file_list: Path) -> None:
-        """Main method to generate triplets from a list of LLVM IR files"""
+        """Main method to generate triplets from a list of LLVM IR or MIR files"""
+        # Store file_list_path for later use in entity generation
+        self.file_list_path = file_list
+
         input_files = self._read_file_list(file_list)
-        logger.info(
-            f"Processing {len(input_files)} files with {self.num_optimizations} "
-            f"optimization levels using {self.max_workers} workers"
-        )
+
+        if self.mode == "mir":
+            logger.info(
+                f"Processing {len(input_files)} MIR files using {self.max_workers} workers"
+            )
+        else:
+            logger.info(
+                f"Processing {len(input_files)} files with {self.num_optimizations} "
+                f"optimization levels using {self.max_workers} workers"
+            )
 
         all_triplets = set()
         global_max_relation = 1
@@ -222,28 +277,60 @@ class IR2VecTripletGenerator:
 
     def _generate_entity2id(self, output_file: Path) -> None:
         """Generate entity2id.txt using llvm-ir2vec"""
-        subprocess.run(
-            [str(self.ir2vec_binary), "entities", "-o", str(output_file)],
-            check=True,
-            capture_output=True,
-        )
+        if self.mode == "mir":
+            # For MIR mode, we need to provide a sample MIR file to determine target
+            # Use the first file from the processed list
+            input_files = self._read_file_list(self.file_list_path)
+            if not input_files:
+                raise ValueError("No input files available for entity generation")
+
+            subprocess.run(
+                [
+                    str(self.ir2vec_binary),
+                    "entities",
+                    "--mode=mir",
+                    str(input_files[0]),
+                    "-o",
+                    str(output_file),
+                ],
+                check=True,
+                capture_output=True,
+            )
+        else:
+            subprocess.run(
+                [
+                    str(self.ir2vec_binary),
+                    "entities",
+                    "--mode=llvm",
+                    "-o",
+                    str(output_file),
+                ],
+                check=True,
+                capture_output=True,
+            )
 
     def _generate_relation2id(self, output_file: Path, max_relation: int) -> None:
         """Generate relation2id.txt from max relation"""
-        max_relation = max(max_relation, 1)  # At least Type and Next relations
+        max_relation = max(max_relation, 1)  # At least Next relation
         num_relations = max_relation + 1
 
         with open(output_file, "w") as f:
             f.write(f"{num_relations}\n")
-            f.write("Type\t0\n")
-            f.write("Next\t1\n")
-            f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations))
+            if self.mode == "llvm":
+                # LLVM IR has Type relation at 0
+                f.write("Type\t0\n")
+                f.write("Next\t1\n")
+                f.writelines(f"Arg{i-2}\t{i}\n" for i in range(2, num_relations))
+            else:
+                # MIR doesn't have Type relation, starts with Next at 0
+                f.write("Next\t0\n")
+                f.writelines(f"Arg{i-1}\t{i}\n" for i in range(1, num_relations))
 
 
 def main():
     """Main entry point"""
     parser = argparse.ArgumentParser(
-        description="Generate IR2Vec triplets from LLVM IR files",
+        description="Generate IR2Vec or MIR2Vec triplets from LLVM IR or Machine IR files",
         formatter_class=argparse.RawDescriptionHelpFormatter,
     )
 
@@ -253,17 +340,26 @@ def main():
     parser.add_argument(
         "num_optimizations",
         type=int,
-        help="Number of optimization levels to apply (1-6)",
+        nargs="?",
+        default=1,
+        help="Number of optimization levels to apply (1-6) for LLVM IR mode",
     )
     parser.add_argument(
-        "ll_file_list",
+        "input_file_list",
         type=Path,
-        help="File containing list of LLVM IR files to process",
+        help="File containing list of LLVM IR or MIR files to process",
     )
     parser.add_argument(
         "output_dir", type=Path, help="Output directory for generated files"
     )
     parser.add_argument(
+        "--mode",
+        type=str,
+        choices=["llvm", "mir"],
+        default="llvm",
+        help="Operation mode: 'llvm' for LLVM IR (default) or 'mir' for Machine IR",
+    )
+    parser.add_argument(
         "-j",
         "--max-workers",
         type=int,
@@ -296,8 +392,9 @@ def main():
         args.num_optimizations,
         args.output_dir,
         args.max_workers,
+        args.mode,
     )
-    generator.generate_triplets(args.ll_file_list)
+    generator.generate_triplets(args.input_file_list)
 
 
 if __name__ == "__main__":
diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py
index ab7fe19..67fff56 100755
--- a/llvm/utils/update_mc_test_checks.py
+++ b/llvm/utils/update_mc_test_checks.py
@@ -290,11 +290,9 @@ def update_test(ti: common.TestInfo):
 
         # prefix is selected and generated with most shared output lines
         # each run_id can only be used once
-        gen_prefix = ""
         used_runid = set()
 
-        # line number diff between generated prefix and testline
-        line_offset = 1
+        selected_prefixes = set()
         for prefix, tup in p_dict_sorted.items():
             o, run_ids = tup
 
@@ -308,18 +306,24 @@ def update_test(ti: common.TestInfo):
                 else:
                     used_runid.add(i)
             if not skip:
-                used_prefixes.add(prefix)
+                selected_prefixes.add(prefix)
 
-                if hasErr(o):
-                    newline = getErrCheckLine(prefix, o, mc_mode, line_offset)
-                else:
-                    newline = getStdCheckLine(prefix, o, mc_mode)
+        # Generate check lines in alphabetical order.
+        check_lines = []
+        for prefix in sorted(selected_prefixes):
+            o, run_ids = p_dict[prefix]
+            used_prefixes.add(prefix)
+
+            if hasErr(o):
+                line_offset = len(check_lines) + 1
+                check = getErrCheckLine(prefix, o, mc_mode, line_offset)
+            else:
+                check = getStdCheckLine(prefix, o, mc_mode)
 
-                if newline:
-                    gen_prefix += newline
-                    line_offset += 1
+            if check:
+                check_lines.append(check.strip())
 
-        generated_prefixes[input_line] = gen_prefix.rstrip("\n")
+        generated_prefixes[input_line] = "\n".join(check_lines)
 
     # write output
     for input_info in ti.iterlines(output_lines):