aboutsummaryrefslogtreecommitdiff
path: root/llvm/tools
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools')
-rw-r--r--llvm/tools/llc/llc.cpp4
-rw-r--r--llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp207
-rw-r--r--llvm/tools/llvm-rc/llvm-rc.cpp2
-rw-r--r--llvm/tools/llvm-readobj/COFFDumper.cpp11
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp119
-rw-r--r--llvm/tools/spirv-tools/CMakeLists.txt4
6 files changed, 238 insertions, 109 deletions
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 93b4a50..b3d7185 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -733,8 +733,8 @@ static int compileModule(char **argv, LLVMContext &Context) {
reportError("target does not support generation of this file type");
}
- const_cast<TargetLoweringObjectFile *>(Target->getObjFileLowering())
- ->Initialize(MMIWP->getMMI().getContext(), *Target);
+ Target->getObjFileLowering()->Initialize(MMIWP->getMMI().getContext(),
+ *Target);
if (MIR) {
assert(MMIWP && "Forgot to create MMIWP?");
if (MIR->parseMachineFunctions(*M, MMIWP->getMMI()))
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index e1e5fad..f6ed94b 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -9,13 +9,20 @@
/// \file
/// This file implements the IR2Vec embedding generation tool.
///
-/// This tool provides two main functionalities:
+/// This tool provides three main modes:
///
/// 1. Triplet Generation Mode (--mode=triplets):
-/// Generates triplets (opcode, type, operands) for vocabulary training.
-/// Usage: llvm-ir2vec --mode=triplets input.bc -o triplets.txt
+/// Generates numeric triplets (head, tail, relation) for vocabulary
+/// training. Output format: MAX_RELATION=N header followed by
+/// head\ttail\trelation lines. Relations: 0=Type, 1=Next, 2+=Arg0,Arg1,...
+/// Usage: llvm-ir2vec --mode=triplets input.bc -o train2id.txt
///
-/// 2. Embedding Generation Mode (--mode=embeddings):
+/// 2. Entities Generation Mode (--mode=entities):
+/// Generates entity mappings for vocabulary training.
+/// Output format: <total_entities> header followed by entity\tid lines.
+/// Usage: llvm-ir2vec --mode=entities input.bc -o entity2id.txt
+///
+/// 3. Embedding Generation Mode (--mode=embeddings):
/// Generates IR2Vec embeddings using a trained vocabulary.
/// Usage: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=vocab.json
/// --level=func input.bc -o embeddings.txt Levels: --level=inst
@@ -60,16 +67,19 @@ static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
enum ToolMode {
TripletMode, // Generate triplets for vocabulary training
+ EntityMode, // Generate entity mappings for vocabulary training
EmbeddingMode // Generate embeddings using trained vocabulary
};
-static cl::opt<ToolMode>
- Mode("mode", cl::desc("Tool operation mode:"),
- cl::values(clEnumValN(TripletMode, "triplets",
- "Generate triplets for vocabulary training"),
- clEnumValN(EmbeddingMode, "embeddings",
- "Generate embeddings using trained vocabulary")),
- cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory));
+static cl::opt<ToolMode> Mode(
+ "mode", cl::desc("Tool operation mode:"),
+ cl::values(clEnumValN(TripletMode, "triplets",
+ "Generate triplets for vocabulary training"),
+ clEnumValN(EntityMode, "entities",
+ "Generate entity mappings for vocabulary training"),
+ clEnumValN(EmbeddingMode, "embeddings",
+ "Generate embeddings using trained vocabulary")),
+ cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory));
static cl::opt<std::string>
FunctionName("function", cl::desc("Process specific function only"),
@@ -94,6 +104,13 @@ static cl::opt<EmbeddingLevel>
namespace {
+/// Relation types for triplet generation
+enum RelationType {
+ TypeRelation = 0, ///< Instruction to type relationship
+ NextRelation = 1, ///< Sequential instruction relationship
+ ArgRelation = 2 ///< Instruction to operand relationship (ArgRelation + N)
+};
+
/// Helper class for collecting IR triplets and generating embeddings
class IR2VecTool {
private:
@@ -111,29 +128,101 @@ public:
// option
MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
+ // This will throw an error if vocab is not found or invalid
Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
return Vocab->isValid();
}
- /// Generate triplets for the entire module
+ /// Generate triplets for the module
+ /// Output format: MAX_RELATION=N header followed by relationships
void generateTriplets(raw_ostream &OS) const {
- for (const Function &F : M)
- generateTriplets(F, OS);
+ unsigned MaxRelation = NextRelation; // Track maximum relation ID
+ std::string Relationships;
+ raw_string_ostream RelOS(Relationships);
+
+ for (const Function &F : M) {
+ unsigned FuncMaxRelation = generateTriplets(F, RelOS);
+ MaxRelation = std::max(MaxRelation, FuncMaxRelation);
+ }
+
+ RelOS.flush();
+
+ // Write metadata header followed by relationships
+ OS << "MAX_RELATION=" << MaxRelation << '\n';
+ OS << Relationships;
}
/// Generate triplets for a single function
- void generateTriplets(const Function &F, raw_ostream &OS) const {
+ /// Returns the maximum relation ID used in this function
+ unsigned generateTriplets(const Function &F, raw_ostream &OS) const {
if (F.isDeclaration())
- return;
+ return 0;
+
+ unsigned MaxRelation = 1;
+ unsigned PrevOpcode = 0;
+ bool HasPrevOpcode = false;
+
+ for (const BasicBlock &BB : F) {
+ for (const auto &I : BB.instructionsWithoutDebug()) {
+ unsigned Opcode = Vocabulary::getNumericID(I.getOpcode());
+ unsigned TypeID = Vocabulary::getNumericID(I.getType()->getTypeID());
+
+ // Add "Next" relationship with previous instruction
+ if (HasPrevOpcode) {
+ OS << PrevOpcode << '\t' << Opcode << '\t' << NextRelation << '\n';
+ LLVM_DEBUG(dbgs()
+ << Vocabulary::getVocabKeyForOpcode(PrevOpcode + 1) << '\t'
+ << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+ << "Next\n");
+ }
- std::string LocalOutput;
- raw_string_ostream LocalOS(LocalOutput);
+ // Add "Type" relationship
+ OS << Opcode << '\t' << TypeID << '\t' << TypeRelation << '\n';
+ LLVM_DEBUG(
+ dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+ << Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID())
+ << '\t' << "Type\n");
+
+ // Add "Arg" relationships
+ unsigned ArgIndex = 0;
+ for (const Use &U : I.operands()) {
+ unsigned OperandID = Vocabulary::getNumericID(U.get());
+ unsigned RelationID = ArgRelation + ArgIndex;
+ OS << Opcode << '\t' << OperandID << '\t' << RelationID << '\n';
+
+ LLVM_DEBUG({
+ StringRef OperandStr = Vocabulary::getVocabKeyForOperandKind(
+ Vocabulary::getOperandKind(U.get()));
+ dbgs() << Vocabulary::getVocabKeyForOpcode(Opcode + 1) << '\t'
+ << OperandStr << '\t' << "Arg" << ArgIndex << '\n';
+ });
+
+ ++ArgIndex;
+ }
+ // Only update MaxRelation if there were operands
+ if (ArgIndex > 0) {
+ MaxRelation = std::max(MaxRelation, ArgRelation + ArgIndex - 1);
+ }
+ PrevOpcode = Opcode;
+ HasPrevOpcode = true;
+ }
+ }
- for (const BasicBlock &BB : F)
- traverseBasicBlock(BB, LocalOS);
+ return MaxRelation;
+ }
- LocalOS.flush();
- OS << LocalOutput;
+ /// Dump entity ID to string mappings
+ static void generateEntityMappings(raw_ostream &OS) {
+ // FIXME: Currently, the generated entity mappings are not one-to-one;
+ // Multiple TypeIDs map to same string key (Like Half, BFloat, etc. map to
+ // FloatTy). This would hinder learning good seed embeddings.
+ // We should fix this in the future by ensuring unique string keys either by
+ // post-processing here without changing the mapping in ir2vec::Vocabulary,
+ // or by changing the Vocabulary generation logic to ensure unique keys.
+ auto EntityLen = Vocabulary::expectedSize();
+ OS << EntityLen << "\n";
+ for (unsigned EntityID = 0; EntityID < EntityLen; ++EntityID)
+ OS << Vocabulary::getStringKey(EntityID) << '\t' << EntityID << '\n';
}
/// Generate embeddings for the entire module
@@ -197,31 +286,6 @@ public:
}
}
}
-
-private:
- /// Process a single basic block for triplet generation
- void traverseBasicBlock(const BasicBlock &BB, raw_string_ostream &OS) const {
- // Consider only non-debug and non-pseudo instructions
- for (const auto &I : BB.instructionsWithoutDebug()) {
- StringRef OpcStr = Vocabulary::getVocabKeyForOpcode(I.getOpcode());
- StringRef TypeStr =
- Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID());
-
- OS << '\n' << OpcStr << ' ' << TypeStr << ' ';
-
- LLVM_DEBUG({
- I.print(dbgs());
- dbgs() << "\n";
- I.getType()->print(dbgs());
- dbgs() << " Type\n";
- });
-
- for (const Use &U : I.operands())
- OS << Vocabulary::getVocabKeyForOperandKind(
- Vocabulary::getOperandKind(U.get()))
- << ' ';
- }
- }
};
Error processModule(Module &M, raw_ostream &OS) {
@@ -230,11 +294,9 @@ Error processModule(Module &M, raw_ostream &OS) {
if (Mode == EmbeddingMode) {
// Initialize vocabulary for embedding generation
// Note: Requires --ir2vec-vocab-path option to be set
- if (!Tool.initializeVocabulary())
- return createStringError(
- errc::invalid_argument,
- "Failed to initialize IR2Vec vocabulary. "
- "Make sure to specify --ir2vec-vocab-path for embedding mode.");
+ auto VocabStatus = Tool.initializeVocabulary();
+ assert(VocabStatus && "Failed to initialize IR2Vec vocabulary");
+ (void)VocabStatus;
if (!FunctionName.empty()) {
// Process single function
@@ -249,18 +311,7 @@ Error processModule(Module &M, raw_ostream &OS) {
Tool.generateEmbeddings(OS);
}
} else {
- // Triplet generation mode - no vocabulary needed
- if (!FunctionName.empty())
- // Process single function
- if (const Function *F = M.getFunction(FunctionName))
- Tool.generateTriplets(*F, OS);
- else
- return createStringError(errc::invalid_argument,
- "Function '%s' not found",
- FunctionName.c_str());
- else
- // Process all functions
- Tool.generateTriplets(OS);
+ Tool.generateTriplets(OS);
}
return Error::success();
}
@@ -284,8 +335,25 @@ int main(int argc, char **argv) {
"information.\n");
// Validate command line options
- if (Mode == TripletMode && Level.getNumOccurrences() > 0)
- errs() << "Warning: --level option is ignored in triplet mode\n";
+ if (Mode != EmbeddingMode) {
+ if (Level.getNumOccurrences() > 0)
+ errs() << "Warning: --level option is ignored\n";
+ if (FunctionName.getNumOccurrences() > 0)
+ errs() << "Warning: --function option is ignored\n";
+ }
+
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename, EC);
+ if (EC) {
+ errs() << "Error opening output file: " << EC.message() << "\n";
+ return 1;
+ }
+
+ if (Mode == EntityMode) {
+ // Just dump entity mappings without processing any IR
+ IR2VecTool::generateEntityMappings(OS);
+ return 0;
+ }
// Parse the input LLVM IR file or stdin
SMDiagnostic Err;
@@ -296,13 +364,6 @@ int main(int argc, char **argv) {
return 1;
}
- std::error_code EC;
- raw_fd_ostream OS(OutputFilename, EC);
- if (EC) {
- errs() << "Error opening output file: " << EC.message() << "\n";
- return 1;
- }
-
if (Error Err = processModule(*M, OS)) {
handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EIB) {
errs() << "Error: " << EIB.message() << "\n";
diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp
index 7362154..f623342 100644
--- a/llvm/tools/llvm-rc/llvm-rc.cpp
+++ b/llvm/tools/llvm-rc/llvm-rc.cpp
@@ -201,7 +201,7 @@ std::string getMingwTriple() {
Triple T(sys::getDefaultTargetTriple());
if (!isUsableArch(T.getArch()))
T.setArch(getDefaultFallbackArch());
- if (T.isWindowsGNUEnvironment())
+ if (T.isOSCygMing())
return T.str();
// Write out the literal form of the vendor/env here, instead of
// constructing them with enum values (which end up with them in
diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index dce8e60..96e0a634 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -412,10 +412,19 @@ const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE),
};
+// clang-format off
static const EnumEntry<COFF::ExtendedDLLCharacteristics>
PEExtendedDLLCharacteristics[] = {
- LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT_STRICT_MODE ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_SET_CONTEXT_IP_VALIDATION_RELAXED_MODE),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_DYNAMIC_APIS_ALLOW_IN_PROC_ONLY ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_1 ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_CET_RESERVED_2 ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_FORWARD_CFI_COMPAT ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_EX_HOTPATCH_COMPATIBLE ),
};
+// clang-format on
static const EnumEntry<COFF::SectionCharacteristics>
ImageSectionCharacteristics[] = {
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 2699e10..94ce386 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -424,6 +424,9 @@ protected:
ArrayRef<Elf_Word> getShndxTable(const Elf_Shdr *Symtab) const;
+ void printSFrameHeader(const SFrameParser<ELFT::Endianness> &Parser);
+ void printSFrameFDEs(const SFrameParser<ELFT::Endianness> &Parser);
+
private:
mutable SmallVector<std::optional<VersionEntry>, 0> VersionMap;
};
@@ -6440,6 +6443,90 @@ template <typename ELFT> void ELFDumper<ELFT>::printMemtag() {
}
template <typename ELFT>
+void ELFDumper<ELFT>::printSFrameHeader(
+ const SFrameParser<ELFT::Endianness> &Parser) {
+ DictScope HeaderScope(W, "Header");
+
+ const sframe::Preamble<ELFT::Endianness> &Preamble = Parser.getPreamble();
+ W.printHex("Magic", Preamble.Magic.value());
+ W.printEnum("Version", Preamble.Version.value(), sframe::getVersions());
+ W.printFlags("Flags", Preamble.Flags.value(), sframe::getFlags());
+
+ const sframe::Header<ELFT::Endianness> &Header = Parser.getHeader();
+ W.printEnum("ABI", Header.ABIArch.value(), sframe::getABIs());
+
+ W.printNumber(("CFA fixed FP offset" +
+ Twine(Parser.usesFixedFPOffset() ? "" : " (unused)"))
+ .str(),
+ Header.CFAFixedFPOffset.value());
+
+ W.printNumber(("CFA fixed RA offset" +
+ Twine(Parser.usesFixedRAOffset() ? "" : " (unused)"))
+ .str(),
+ Header.CFAFixedRAOffset.value());
+
+ W.printNumber("Auxiliary header length", Header.AuxHdrLen.value());
+ W.printNumber("Num FDEs", Header.NumFDEs.value());
+ W.printNumber("Num FREs", Header.NumFREs.value());
+ W.printNumber("FRE subsection length", Header.FRELen.value());
+ W.printNumber("FDE subsection offset", Header.FDEOff.value());
+ W.printNumber("FRE subsection offset", Header.FREOff.value());
+
+ if (Expected<ArrayRef<uint8_t>> Aux = Parser.getAuxHeader())
+ W.printHexList("Auxiliary header", *Aux);
+ else
+ reportWarning(Aux.takeError(), FileName);
+}
+
+template <typename ELFT>
+void ELFDumper<ELFT>::printSFrameFDEs(
+ const SFrameParser<ELFT::Endianness> &Parser) {
+ typename SFrameParser<ELFT::Endianness>::FDERange FDEs;
+ if (Error Err = Parser.fdes().moveInto(FDEs)) {
+ reportWarning(std::move(Err), FileName);
+ return;
+ }
+
+ ListScope IndexScope(W, "Function Index");
+ for (auto It = FDEs.begin(); It != FDEs.end(); ++It) {
+ DictScope FDEScope(
+ W,
+ formatv("FuncDescEntry [{0}]", std::distance(FDEs.begin(), It)).str());
+
+ W.printHex("PC", Parser.getAbsoluteStartAddress(It));
+ W.printHex("Size", It->Size);
+ W.printHex("Start FRE Offset", It->StartFREOff);
+ W.printNumber("Num FREs", It->NumFREs);
+
+ {
+ DictScope InfoScope(W, "Info");
+ W.printEnum("FRE Type", It->getFREType(), sframe::getFRETypes());
+ W.printEnum("FDE Type", It->getFDEType(), sframe::getFDETypes());
+ switch (Parser.getHeader().ABIArch) {
+ case sframe::ABI::AArch64EndianBig:
+ case sframe::ABI::AArch64EndianLittle:
+ W.printEnum("PAuth Key", sframe::AArch64PAuthKey(It->getPAuthKey()),
+ sframe::getAArch64PAuthKeys());
+ break;
+ case sframe::ABI::AMD64EndianLittle:
+ // unused
+ break;
+ }
+
+ W.printHex("Raw", It->Info);
+ }
+
+ W.printHex(
+ ("Repetitive block size" +
+ Twine(It->getFDEType() == sframe::FDEType::PCMask ? "" : " (unused)"))
+ .str(),
+ It->RepSize);
+
+ W.printHex("Padding2", It->Padding2);
+ }
+}
+
+template <typename ELFT>
void ELFDumper<ELFT>::printSectionsAsSFrame(ArrayRef<std::string> Sections) {
constexpr endianness E = ELFT::Endianness;
for (object::SectionRef Section :
@@ -6456,8 +6543,8 @@ void ELFDumper<ELFT>::printSectionsAsSFrame(ArrayRef<std::string> Sections) {
continue;
}
- Expected<object::SFrameParser<E>> Parser =
- object::SFrameParser<E>::create(arrayRefFromStringRef(SectionContent));
+ Expected<object::SFrameParser<E>> Parser = object::SFrameParser<E>::create(
+ arrayRefFromStringRef(SectionContent), Section.getAddress());
if (!Parser) {
reportWarning(createError("invalid sframe section: " +
toString(Parser.takeError())),
@@ -6465,32 +6552,8 @@ void ELFDumper<ELFT>::printSectionsAsSFrame(ArrayRef<std::string> Sections) {
continue;
}
- DictScope HeaderScope(W, "Header");
-
- const sframe::Preamble<E> &Preamble = Parser->getPreamble();
- W.printHex("Magic", Preamble.Magic.value());
- W.printEnum("Version", Preamble.Version.value(), sframe::getVersions());
- W.printFlags("Flags", Preamble.Flags.value(), sframe::getFlags());
-
- const sframe::Header<E> &Header = Parser->getHeader();
- W.printEnum("ABI", Header.ABIArch.value(), sframe::getABIs());
-
- W.printNumber(("CFA fixed FP offset" +
- Twine(Parser->usesFixedFPOffset() ? "" : " (unused)"))
- .str(),
- Header.CFAFixedFPOffset.value());
-
- W.printNumber(("CFA fixed RA offset" +
- Twine(Parser->usesFixedRAOffset() ? "" : " (unused)"))
- .str(),
- Header.CFAFixedRAOffset.value());
-
- W.printNumber("Auxiliary header length", Header.AuxHdrLen.value());
- W.printNumber("Num FDEs", Header.NumFDEs.value());
- W.printNumber("Num FREs", Header.NumFREs.value());
- W.printNumber("FRE subsection length", Header.FRELen.value());
- W.printNumber("FDE subsection offset", Header.FDEOff.value());
- W.printNumber("FRE subsection offset", Header.FREOff.value());
+ printSFrameHeader(*Parser);
+ printSFrameFDEs(*Parser);
}
}
diff --git a/llvm/tools/spirv-tools/CMakeLists.txt b/llvm/tools/spirv-tools/CMakeLists.txt
index c2c0f3e..5db7aec 100644
--- a/llvm/tools/spirv-tools/CMakeLists.txt
+++ b/llvm/tools/spirv-tools/CMakeLists.txt
@@ -5,10 +5,6 @@ if (NOT LLVM_INCLUDE_SPIRV_TOOLS_TESTS)
return()
endif ()
-if (NOT "SPIRV" IN_LIST LLVM_TARGETS_TO_BUILD)
- message(FATAL_ERROR "Building SPIRV-Tools tests is unsupported without the SPIR-V target")
-endif ()
-
# SPIRV_DIS, SPIRV_VAL, SPIRV_AS and SPIRV_LINK variables can be used to provide paths to existing
# spirv-dis, spirv-val, spirv-as, and spirv-link binaries, respectively. Otherwise, build them from
# SPIRV-Tools source.