5 files changed, 147 insertions, 81 deletions
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 93b4a50..b3d7185 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -733,8 +733,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
         reportError("target does not support generation of this file type");
     }
 
-    const_cast<TargetLoweringObjectFile *>(Target->getObjFileLowering())
-        ->Initialize(MMIWP->getMMI().getContext(), *Target);
+    Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(),
+                                             *Target);
     if (MIR) {
       assert(MMIWP && "Forgot to create MMIWP?");
       if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index e1e5fad..f6ed94b 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -9,13 +9,20 @@
 /// \file
 /// This file implements the IR2Vec embedding generation tool.
 ///
-/// This tool provides two main functionalities:
+/// This tool provides three main modes:
 ///
 /// 1. Triplet Generation Mode (--mode=triplets):
-///    Generates triplets (opcode, type, operands) for vocabulary training.
-///    Usage: llvm-ir2vec --mode=triplets input.bc -o triplets.txt
+///    Generates numeric triplets (head, tail, relation) for vocabulary
+///    training. Output format: MAX_RELATION=N header followed by
+///    head\ttail\trelation lines. Relations: 0=Type, 1=Next, 2+=Arg0,Arg1,...
+///    Usage: llvm-ir2vec --mode=triplets input.bc -o train2id.txt
 ///
-/// 2. Embedding Generation Mode (--mode=embeddings):
+/// 2. Entities Generation Mode (--mode=entities):
+///    Generates entity mappings for vocabulary training.
+///    Output format: <total_entities> header followed by entity\tid lines.
+///    Usage: llvm-ir2vec --mode=entities input.bc -o entity2id.txt
+///
+/// 3. Embedding Generation Mode (--mode=embeddings):
 ///    Generates IR2Vec embeddings using a trained vocabulary.
 ///    Usage: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=vocab.json
 ///    --level=func input.bc -o embeddings.txt Levels: --level=inst
@@ -60,16 +67,19 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
 
 enum ToolMode {
   TripletMode,  // Generate triplets for vocabulary training
+  EntityMode,   // Generate entity mappings for vocabulary training
   EmbeddingMode // Generate embeddings using trained vocabulary
 };
 
-static cl::opt<ToolMode>
-    Mode("mode", cl::desc("Tool operation mode:"),
-         cl::values(clEnumValN(TripletMode, "triplets",
-                               "Generate triplets for vocabulary training"),
-                    clEnumValN(EmbeddingMode, "embeddings",
-                               "Generate embeddings using trained vocabulary")),
-         cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory));
+static cl::opt<ToolMode> Mode(
+    "mode", cl::desc("Tool operation mode:"),
+    cl::values(clEnumValN(TripletMode, "triplets",
+                          "Generate triplets for vocabulary training"),
+               clEnumValN(EntityMode, "entities",
+                          "Generate entity mappings for vocabulary training"),
+               clEnumValN(EmbeddingMode, "embeddings",
+                          "Generate embeddings using trained vocabulary")),
+    cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory));
 
 static cl::opt<std::string>
     FunctionName("function", cl::desc("Process specific function only"),
@@ -94,6 +104,13 @@ static cl::opt<EmbeddingLevel>
 
 namespace {
 
+/// Relation types for triplet generation
+enum RelationType {
+  TypeRelation = 0, ///< Instruction to type relationship
+  NextRelation = 1, ///< Sequential instruction relationship
+  ArgRelation = 2   ///< Instruction to operand relationship (ArgRelation + N)
+};
+
 /// Helper class for collecting IR triplets and generating embeddings
 class IR2VecTool {
 private:
@@ -111,29 +128,101 @@ public:
     // option
     MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
     MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
+    // This will throw an error if vocab is not found or invalid
     Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
     return Vocab->isValid();
   }
 
-  /// Generate triplets for the entire module
+  /// Generate triplets for the module
+  /// Output format: MAX_RELATION=N header followed by relationships
   void generateTriplets(raw_ostream &OS) const {
-    for (const Function &F : M)
-      generateTriplets(F, OS);
+    unsigned MaxRelation = NextRelation; // Track maximum relation ID
+    std::string Relationships;
+    raw_string_ostream RelOS(Relationships);
+
+    for (const Function &F : M) {
+      unsigned FuncMaxRelation = generateTriplets(F, RelOS);
+      MaxRelation = std::max(MaxRelation, FuncMaxRelation);
+    }
+
+    RelOS.flush();
+
+    // Write metadata header followed by relationships
+    OS << "MAX_RELATION=" << MaxRelation << '\n';
+    OS << Relationships;
   }
 
   /// Generate triplets for a single function
-  void generateTriplets(const Function &F, raw_ostream &OS) const {
+  /// Returns the maximum relation ID used in this function
+  unsigned generateTriplets(const Function &F, raw_ostream &OS) const {
     if (F.isDeclaration())
-      return;
+      return 0;
+
+    unsigned MaxRelation = 1;
+    unsigned PrevOpcode = 0;
+    bool HasPrevOpcode = false;
+
+    for (const BasicBlock &BB : F) {
+      for (const auto &I : BB.instructionsWithoutDebug()) {
+        unsigned Opcode = Vocabulary::getNumericID(I.getOpcode());
+        unsigned TypeID = Vocabulary::getNumericID(I.getType()->getTypeID());
+
+        // Add "Next" relationship with previous instruction
+        if (HasPrevOpcode) {
+          OS << PrevOpcode << '\t' << Opcode << '\t' << NextRelation << '\n';
+          LLVM_DEBUG(dbgs()
+                     << Vocabulary::getVocabKeyForOpcode(PrevOpcode + 1) << '\t'
+                     << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+                     << "Next\n");
+        }
 
-    std::string LocalOutput;
-    raw_string_ostream LocalOS(LocalOutput);
+        // Add "Type" relationship
+        OS << Opcode << '\t' << TypeID << '\t' << TypeRelation << '\n';
+        LLVM_DEBUG(
+            dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+                   << Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID())
+                   << '\t' << "Type\n");
+
+        // Add "Arg" relationships
+        unsigned ArgIndex = 0;
+        for (const Use &U : I.operands()) {
+          unsigned OperandID = Vocabulary::getNumericID(U.get());
+          unsigned RelationID = ArgRelation + ArgIndex;
+          OS << Opcode << '\t' << OperandID << '\t' << RelationID << '\n';
+
+          LLVM_DEBUG({
+            StringRef OperandStr = Vocabulary::getVocabKeyForOperandKind(
+                Vocabulary::getOperandKind(U.get()));
+            dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+                   << OperandStr << '\t' << "Arg" << ArgIndex << '\n';
+          });
+
+          ++ArgIndex;
+        }
+        // Only update MaxRelation if there were operands
+        if (ArgIndex > 0) {
+          MaxRelation = std::max(MaxRelation, ArgRelation + ArgIndex - 1);
+        }
+        PrevOpcode = Opcode;
+        HasPrevOpcode = true;
+      }
+    }
 
-    for (const BasicBlock &BB : F)
-      traverseBasicBlock(BB, LocalOS);
+    return MaxRelation;
+  }
 
-    LocalOS.flush();
-    OS << LocalOutput;
+  /// Dump entity ID to string mappings
+  static void generateEntityMappings(raw_ostream &OS) {
+    // FIXME: Currently, the generated entity mappings are not one-to-one;
+    // Multiple TypeIDs map to same string key (Like Half, BFloat, etc. map to
+    // FloatTy). This would hinder learning good seed embeddings.
+    // We should fix this in the future by ensuring unique string keys either by
+    // post-processing here without changing the mapping in ir2vec::Vocabulary,
+    // or by changing the Vocabulary generation logic to ensure unique keys.
+    auto EntityLen = Vocabulary::expectedSize();
+    OS << EntityLen << "\n";
+    for (unsigned EntityID = 0; EntityID < EntityLen; ++EntityID)
+      OS << Vocabulary::getStringKey(EntityID) << '\t' << EntityID << '\n';
   }
 
   /// Generate embeddings for the entire module
@@ -197,31 +286,6 @@ public:
     }
     }
   }
-
-private:
-  /// Process a single basic block for triplet generation
-  void traverseBasicBlock(const BasicBlock &BB, raw_string_ostream &OS) const {
-    // Consider only non-debug and non-pseudo instructions
-    for (const auto &I : BB.instructionsWithoutDebug()) {
-      StringRef OpcStr = Vocabulary::getVocabKeyForOpcode(I.getOpcode());
-      StringRef TypeStr =
-          Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID());
-
-      OS << '\n' << OpcStr << ' ' << TypeStr << ' ';
-
-      LLVM_DEBUG({
-        I.print(dbgs());
-        dbgs() << "\n";
-        I.getType()->print(dbgs());
-        dbgs() << " Type\n";
-      });
-
-      for (const Use &U : I.operands())
-        OS << Vocabulary::getVocabKeyForOperandKind(
-                  Vocabulary::getOperandKind(U.get()))
-           << ' ';
-    }
-  }
 };
 
 Error processModule(Module &M, raw_ostream &OS) {
@@ -230,11 +294,9 @@ Error processModule(Module &M, raw_ostream &OS) {
   if (Mode == EmbeddingMode) {
     // Initialize vocabulary for embedding generation
     // Note: Requires --ir2vec-vocab-path option to be set
-    if (!Tool.initializeVocabulary())
-      return createStringError(
-          errc::invalid_argument,
-          "Failed to initialize IR2Vec vocabulary. "
-          "Make sure to specify --ir2vec-vocab-path for embedding mode.");
+    auto VocabStatus = Tool.initializeVocabulary();
+    assert(VocabStatus && "Failed to initialize IR2Vec vocabulary");
+    (void)VocabStatus;
 
     if (!FunctionName.empty()) {
       // Process single function
@@ -249,18 +311,7 @@ Error processModule(Module &M, raw_ostream &OS) {
       Tool.generateEmbeddings(OS);
     }
   } else {
-    // Triplet generation mode - no vocabulary needed
-    if (!FunctionName.empty())
-      // Process single function
-      if (const Function *F = M.getFunction(FunctionName))
-        Tool.generateTriplets(*F, OS);
-      else
-        return createStringError(errc::invalid_argument,
-                                 "Function '%s' not found",
-                                 FunctionName.c_str());
-    else
-      // Process all functions
-      Tool.generateTriplets(OS);
+    Tool.generateTriplets(OS);
   }
   return Error::success();
 }
@@ -284,8 +335,25 @@ int main(int argc, char **argv) {
       "information.\n");
 
   // Validate command line options
-  if (Mode == TripletMode && Level.getNumOccurrences() > 0)
-    errs() << "Warning: --level option is ignored in triplet mode\n";
+  if (Mode != EmbeddingMode) {
+    if (Level.getNumOccurrences() > 0)
+      errs() << "Warning: --level option is ignored\n";
+    if (FunctionName.getNumOccurrences() > 0)
+      errs() << "Warning: --function option is ignored\n";
+  }
+
+  std::error_code EC;
+  raw_fd_ostream OS(OutputFilename, EC);
+  if (EC) {
+    errs() << "Error opening output file: " << EC.message() << "\n";
+    return 1;
+  }
+
+  if (Mode == EntityMode) {
+    // Just dump entity mappings without processing any IR
+    IR2VecTool::generateEntityMappings(OS);
+    return 0;
+  }
 
   // Parse the input LLVM IR file or stdin
   SMDiagnostic Err;
@@ -296,13 +364,6 @@ int main(int argc, char **argv) {
     return 1;
   }
 
-  std::error_code EC;
-  raw_fd_ostream OS(OutputFilename, EC);
-  if (EC) {
-    errs() << "Error opening output file: " << EC.message() << "\n";
-    return 1;
-  }
-
   if (Error Err = processModule(*M, OS)) {
     handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EIB) {
       errs() << "Error: " << EIB.message() << "\n";
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 7362154..f623342 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -201,7 +201,7 @@ std::string getMingwTriple() {
   Triple T(sys::getDefaultTargetTriple());
   if (!isUsableArch(T.getArch()))
     T.setArch(getDefaultFallbackArch());
-  if (T.isWindowsGNUEnvironment())
+  if (T.isOSCygMing())
     return T.str();
   // Write out the literal form of the vendor/env here, instead of
   // constructing them with enum values (which end up with them in
diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index dce8e60..96e0a634 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -412,10 +412,19 @@ const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
   LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE),
 };
 
+// clang-format off
 static const EnumEntry<COFF::ExtendedDLLCharacteristics>
     PEExtendedDLLCharacteristics[] = {
-        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT                                ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE                    ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC_ONLY       ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_1                            ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_2                            ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_FORWARD_CFI_COMPAT                        ),
+        LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_HOTPATCH_COMPATIBLE                       ),
 };
+// clang-format on
 
 static const EnumEntry<COFF::SectionCharacteristics>
 ImageSectionCharacteristics[] = {
diff --git a/llvm/tools/spirv-tools/CMakeLists.txt b/llvm/tools/spirv-tools/CMakeLists.txt
index c2c0f3e..5db7aec 100644
--- a/llvm/tools/spirv-tools/CMakeLists.txt
+++ b/llvm/tools/spirv-tools/CMakeLists.txt
@@ -5,10 +5,6 @@ if (NOT LLVM_INCLUDE_SPIRV_TOOLS_TESTS)
   return()
 endif ()
 
-if (NOT "SPIRV" IN_LIST LLVM_TARGETS_TO_BUILD)
-  message(FATAL_ERROR "Building SPIRV-Tools tests is unsupported without the SPIR-V target")
-endif ()
-
 # SPIRV_DIS, SPIRV_VAL, SPIRV_AS and SPIRV_LINK variables can be used to provide paths to existing
 # spirv-dis, spirv-val, spirv-as, and spirv-link binaries, respectively. Otherwise, build them from
 # SPIRV-Tools source.