diff options
Diffstat (limited to 'llvm/tools')
-rw-r--r-- | llvm/tools/llc/llc.cpp | 4 | ||||
-rw-r--r-- | llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp | 207 | ||||
-rw-r--r-- | llvm/tools/llvm-rc/llvm-rc.cpp | 2 | ||||
-rw-r--r-- | llvm/tools/llvm-readobj/COFFDumper.cpp | 11 | ||||
-rw-r--r-- | llvm/tools/spirv-tools/CMakeLists.txt | 4 |
5 files changed, 147 insertions, 81 deletions
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 93b4a50..b3d7185 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -733,8 +733,8 @@ static int compileModule(char **argv, LLVMContext &Context) { reportError("target does not support generation of this file type"); } - const_cast<TargetLoweringObjectFile *>(Target->getObjFileLowering()) - ->Initialize(MMIWP->getMMI().getContext(), *Target); + Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(), + *Target); if (MIR) { assert(MMIWP && "Forgot to create MMIWP?"); if (MIR->parseMachineFunctions(*M, MMIWP->getMMI())) diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp index e1e5fad..f6ed94b 100644 --- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp +++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp @@ -9,13 +9,20 @@ /// \file /// This file implements the IR2Vec embedding generation tool. /// -/// This tool provides two main functionalities: +/// This tool provides three main modes: /// /// 1. Triplet Generation Mode (--mode=triplets): -/// Generates triplets (opcode, type, operands) for vocabulary training. -/// Usage: llvm-ir2vec --mode=triplets input.bc -o triplets.txt +/// Generates numeric triplets (head, tail, relation) for vocabulary +/// training. Output format: MAX_RELATION=N header followed by +/// head\ttail\trelation lines. Relations: 0=Type, 1=Next, 2+=Arg0,Arg1,... +/// Usage: llvm-ir2vec --mode=triplets input.bc -o train2id.txt /// -/// 2. Embedding Generation Mode (--mode=embeddings): +/// 2. Entities Generation Mode (--mode=entities): +/// Generates entity mappings for vocabulary training. +/// Output format: <total_entities> header followed by entity\tid lines. +/// Usage: llvm-ir2vec --mode=entities input.bc -o entity2id.txt +/// +/// 3. Embedding Generation Mode (--mode=embeddings): /// Generates IR2Vec embeddings using a trained vocabulary. /// Usage: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=vocab.json /// --level=func input.bc -o embeddings.txt Levels: --level=inst @@ -60,16 +67,19 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), enum ToolMode { TripletMode, // Generate triplets for vocabulary training + EntityMode, // Generate entity mappings for vocabulary training EmbeddingMode // Generate embeddings using trained vocabulary }; -static cl::opt<ToolMode> - Mode("mode", cl::desc("Tool operation mode:"), - cl::values(clEnumValN(TripletMode, "triplets", - "Generate triplets for vocabulary training"), - clEnumValN(EmbeddingMode, "embeddings", - "Generate embeddings using trained vocabulary")), - cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory)); +static cl::opt<ToolMode> Mode( + "mode", cl::desc("Tool operation mode:"), + cl::values(clEnumValN(TripletMode, "triplets", + "Generate triplets for vocabulary training"), + clEnumValN(EntityMode, "entities", + "Generate entity mappings for vocabulary training"), + clEnumValN(EmbeddingMode, "embeddings", + "Generate embeddings using trained vocabulary")), + cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory)); static cl::opt<std::string> FunctionName("function", cl::desc("Process specific function only"), @@ -94,6 +104,13 @@ static cl::opt<EmbeddingLevel> namespace { +/// Relation types for triplet generation +enum RelationType { + TypeRelation = 0, ///< Instruction to type relationship + NextRelation = 1, ///< Sequential instruction relationship + ArgRelation = 2 ///< Instruction to operand relationship (ArgRelation + N) +}; + /// Helper class for collecting IR triplets and generating embeddings class IR2VecTool { private: @@ -111,29 +128,101 @@ public: // option MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); MAM.registerPass([&] { return IR2VecVocabAnalysis(); }); + // This will throw an error if vocab is not found or invalid Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M); return Vocab->isValid(); } - /// Generate triplets for the entire module + /// Generate triplets for the module + /// Output format: MAX_RELATION=N header followed by relationships void generateTriplets(raw_ostream &OS) const { - for (const Function &F : M) - generateTriplets(F, OS); + unsigned MaxRelation = NextRelation; // Track maximum relation ID + std::string Relationships; + raw_string_ostream RelOS(Relationships); + + for (const Function &F : M) { + unsigned FuncMaxRelation = generateTriplets(F, RelOS); + MaxRelation = std::max(MaxRelation, FuncMaxRelation); + } + + RelOS.flush(); + + // Write metadata header followed by relationships + OS << "MAX_RELATION=" << MaxRelation << '\n'; + OS << Relationships; } /// Generate triplets for a single function - void generateTriplets(const Function &F, raw_ostream &OS) const { + /// Returns the maximum relation ID used in this function + unsigned generateTriplets(const Function &F, raw_ostream &OS) const { if (F.isDeclaration()) - return; + return 0; + + unsigned MaxRelation = 1; + unsigned PrevOpcode = 0; + bool HasPrevOpcode = false; + + for (const BasicBlock &BB : F) { + for (const auto &I : BB.instructionsWithoutDebug()) { + unsigned Opcode = Vocabulary::getNumericID(I.getOpcode()); + unsigned TypeID = Vocabulary::getNumericID(I.getType()->getTypeID()); + + // Add "Next" relationship with previous instruction + if (HasPrevOpcode) { + OS << PrevOpcode << '\t' << Opcode << '\t' << NextRelation << '\n'; + LLVM_DEBUG(dbgs() + << Vocabulary::getVocabKeyForOpcode(PrevOpcode + 1) << '\t' + << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t' + << "Next\n"); + } - std::string LocalOutput; - raw_string_ostream LocalOS(LocalOutput); + // Add "Type" relationship + OS << Opcode << '\t' << TypeID << '\t' << TypeRelation << '\n'; + LLVM_DEBUG( + dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t' + << Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID()) + << '\t' << "Type\n"); + + // Add "Arg" relationships + unsigned ArgIndex = 0; + for (const Use &U : I.operands()) { + unsigned OperandID = Vocabulary::getNumericID(U.get()); + unsigned RelationID = ArgRelation + ArgIndex; + OS << Opcode << '\t' << OperandID << '\t' << RelationID << '\n'; + + LLVM_DEBUG({ + StringRef OperandStr = Vocabulary::getVocabKeyForOperandKind( + Vocabulary::getOperandKind(U.get())); + dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t' + << OperandStr << '\t' << "Arg" << ArgIndex << '\n'; + }); + + ++ArgIndex; + } + // Only update MaxRelation if there were operands + if (ArgIndex > 0) { + MaxRelation = std::max(MaxRelation, ArgRelation + ArgIndex - 1); + } + PrevOpcode = Opcode; + HasPrevOpcode = true; + } + } - for (const BasicBlock &BB : F) - traverseBasicBlock(BB, LocalOS); + return MaxRelation; + } - LocalOS.flush(); - OS << LocalOutput; + /// Dump entity ID to string mappings + static void generateEntityMappings(raw_ostream &OS) { + // FIXME: Currently, the generated entity mappings are not one-to-one; + // Multiple TypeIDs map to same string key (Like Half, BFloat, etc. map to + // FloatTy). This would hinder learning good seed embeddings. + // We should fix this in the future by ensuring unique string keys either by + // post-processing here without changing the mapping in ir2vec::Vocabulary, + // or by changing the Vocabulary generation logic to ensure unique keys. + auto EntityLen = Vocabulary::expectedSize(); + OS << EntityLen << "\n"; + for (unsigned EntityID = 0; EntityID < EntityLen; ++EntityID) + OS << Vocabulary::getStringKey(EntityID) << '\t' << EntityID << '\n'; } /// Generate embeddings for the entire module @@ -197,31 +286,6 @@ public: } } } - -private: - /// Process a single basic block for triplet generation - void traverseBasicBlock(const BasicBlock &BB, raw_string_ostream &OS) const { - // Consider only non-debug and non-pseudo instructions - for (const auto &I : BB.instructionsWithoutDebug()) { - StringRef OpcStr = Vocabulary::getVocabKeyForOpcode(I.getOpcode()); - StringRef TypeStr = - Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID()); - - OS << '\n' << OpcStr << ' ' << TypeStr << ' '; - - LLVM_DEBUG({ - I.print(dbgs()); - dbgs() << "\n"; - I.getType()->print(dbgs()); - dbgs() << " Type\n"; - }); - - for (const Use &U : I.operands()) - OS << Vocabulary::getVocabKeyForOperandKind( - Vocabulary::getOperandKind(U.get())) - << ' '; - } - } }; Error processModule(Module &M, raw_ostream &OS) { @@ -230,11 +294,9 @@ Error processModule(Module &M, raw_ostream &OS) { if (Mode == EmbeddingMode) { // Initialize vocabulary for embedding generation // Note: Requires --ir2vec-vocab-path option to be set - if (!Tool.initializeVocabulary()) - return createStringError( - errc::invalid_argument, - "Failed to initialize IR2Vec vocabulary. " - "Make sure to specify --ir2vec-vocab-path for embedding mode."); + auto VocabStatus = Tool.initializeVocabulary(); + assert(VocabStatus && "Failed to initialize IR2Vec vocabulary"); + (void)VocabStatus; if (!FunctionName.empty()) { // Process single function @@ -249,18 +311,7 @@ Error processModule(Module &M, raw_ostream &OS) { Tool.generateEmbeddings(OS); } } else { - // Triplet generation mode - no vocabulary needed - if (!FunctionName.empty()) - // Process single function - if (const Function *F = M.getFunction(FunctionName)) - Tool.generateTriplets(*F, OS); - else - return createStringError(errc::invalid_argument, - "Function '%s' not found", - FunctionName.c_str()); - else - // Process all functions - Tool.generateTriplets(OS); + Tool.generateTriplets(OS); } return Error::success(); } @@ -284,8 +335,25 @@ int main(int argc, char **argv) { "information.\n"); // Validate command line options - if (Mode == TripletMode && Level.getNumOccurrences() > 0) - errs() << "Warning: --level option is ignored in triplet mode\n"; + if (Mode != EmbeddingMode) { + if (Level.getNumOccurrences() > 0) + errs() << "Warning: --level option is ignored\n"; + if (FunctionName.getNumOccurrences() > 0) + errs() << "Warning: --function option is ignored\n"; + } + + std::error_code EC; + raw_fd_ostream OS(OutputFilename, EC); + if (EC) { + errs() << "Error opening output file: " << EC.message() << "\n"; + return 1; + } + + if (Mode == EntityMode) { + // Just dump entity mappings without processing any IR + IR2VecTool::generateEntityMappings(OS); + return 0; + } // Parse the input LLVM IR file or stdin SMDiagnostic Err; @@ -296,13 +364,6 @@ int main(int argc, char **argv) { return 1; } - std::error_code EC; - raw_fd_ostream OS(OutputFilename, EC); - if (EC) { - errs() << "Error opening output file: " << EC.message() << "\n"; - return 1; - } - if (Error Err = processModule(*M, OS)) { handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EIB) { errs() << "Error: " << EIB.message() << "\n"; diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 7362154..f623342 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -201,7 +201,7 @@ std::string getMingwTriple() { Triple T(sys::getDefaultTargetTriple()); if (!isUsableArch(T.getArch())) T.setArch(getDefaultFallbackArch()); - if (T.isWindowsGNUEnvironment()) + if (T.isOSCygMing()) return T.str(); // Write out the literal form of the vendor/env here, instead of // constructing them with enum values (which end up with them in diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index dce8e60..96e0a634 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -412,10 +412,19 @@ const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = { LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE), }; +// clang-format off static const EnumEntry<COFF::ExtendedDLLCharacteristics> PEExtendedDLLCharacteristics[] = { - LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC_ONLY ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_1 ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_2 ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_FORWARD_CFI_COMPAT ), + LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_HOTPATCH_COMPATIBLE ), }; +// clang-format on static const EnumEntry<COFF::SectionCharacteristics> ImageSectionCharacteristics[] = { diff --git a/llvm/tools/spirv-tools/CMakeLists.txt b/llvm/tools/spirv-tools/CMakeLists.txt index c2c0f3e..5db7aec 100644 --- a/llvm/tools/spirv-tools/CMakeLists.txt +++ b/llvm/tools/spirv-tools/CMakeLists.txt @@ -5,10 +5,6 @@ if (NOT LLVM_INCLUDE_SPIRV_TOOLS_TESTS) return() endif () -if (NOT "SPIRV" IN_LIST LLVM_TARGETS_TO_BUILD) - message(FATAL_ERROR "Building SPIRV-Tools tests is unsupported without the SPIR-V target") -endif () - # SPIRV_DIS, SPIRV_VAL, SPIRV_AS and SPIRV_LINK variables can be used to provide paths to existing # spirv-dis, spirv-val, spirv-as, and spirv-link binaries, respectively. Otherwise, build them from # SPIRV-Tools source. |