aboutsummaryrefslogtreecommitdiff
path: root/llvm/tools
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools')
-rw-r--r--llvm/tools/llvm-ir2vec/CMakeLists.txt10
-rw-r--r--llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp314
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOptions.cpp2
3 files changed, 326 insertions, 0 deletions
diff --git a/llvm/tools/llvm-ir2vec/CMakeLists.txt b/llvm/tools/llvm-ir2vec/CMakeLists.txt
new file mode 100644
index 0000000..a4cf969
--- /dev/null
+++ b/llvm/tools/llvm-ir2vec/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS
+ Analysis
+ Core
+ IRReader
+ Support
+ )
+
+add_llvm_tool(llvm-ir2vec
+ llvm-ir2vec.cpp
+ )
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
new file mode 100644
index 0000000..e1e5fad
--- /dev/null
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -0,0 +1,314 @@
+//===- llvm-ir2vec.cpp - IR2Vec Embedding Generation Tool -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the IR2Vec embedding generation tool.
+///
+/// This tool provides two main functionalities:
+///
+/// 1. Triplet Generation Mode (--mode=triplets):
+/// Generates triplets (opcode, type, operands) for vocabulary training.
+/// Usage: llvm-ir2vec --mode=triplets input.bc -o triplets.txt
+///
+/// 2. Embedding Generation Mode (--mode=embeddings):
+/// Generates IR2Vec embeddings using a trained vocabulary.
+/// Usage: llvm-ir2vec --mode=embeddings --ir2vec-vocab-path=vocab.json
+/// --level=func input.bc -o embeddings.txt Levels: --level=inst
+/// (instructions), --level=bb (basic blocks), --level=func (functions)
+/// (See IR2Vec.cpp for more embedding generation options)
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IR2Vec.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassInstrumentation.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "ir2vec"
+
+namespace llvm {
+namespace ir2vec {
+
+static cl::OptionCategory IR2VecToolCategory("IR2Vec Tool Options");
+
+static cl::opt<std::string>
+ InputFilename(cl::Positional,
+ cl::desc("<input bitcode file or '-' for stdin>"),
+ cl::init("-"), cl::cat(IR2VecToolCategory));
+
+static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"),
+ cl::value_desc("filename"),
+ cl::init("-"),
+ cl::cat(IR2VecToolCategory));
+
+enum ToolMode {
+ TripletMode, // Generate triplets for vocabulary training
+ EmbeddingMode // Generate embeddings using trained vocabulary
+};
+
+static cl::opt<ToolMode>
+ Mode("mode", cl::desc("Tool operation mode:"),
+ cl::values(clEnumValN(TripletMode, "triplets",
+ "Generate triplets for vocabulary training"),
+ clEnumValN(EmbeddingMode, "embeddings",
+ "Generate embeddings using trained vocabulary")),
+ cl::init(EmbeddingMode), cl::cat(IR2VecToolCategory));
+
+static cl::opt<std::string>
+ FunctionName("function", cl::desc("Process specific function only"),
+ cl::value_desc("name"), cl::Optional, cl::init(""),
+ cl::cat(IR2VecToolCategory));
+
+enum EmbeddingLevel {
+ InstructionLevel, // Generate instruction-level embeddings
+ BasicBlockLevel, // Generate basic block-level embeddings
+ FunctionLevel // Generate function-level embeddings
+};
+
+static cl::opt<EmbeddingLevel>
+ Level("level", cl::desc("Embedding generation level (for embedding mode):"),
+ cl::values(clEnumValN(InstructionLevel, "inst",
+ "Generate instruction-level embeddings"),
+ clEnumValN(BasicBlockLevel, "bb",
+ "Generate basic block-level embeddings"),
+ clEnumValN(FunctionLevel, "func",
+ "Generate function-level embeddings")),
+ cl::init(FunctionLevel), cl::cat(IR2VecToolCategory));
+
+namespace {
+
+/// Helper class for collecting IR triplets and generating embeddings
+class IR2VecTool {
+private:
+ Module &M;
+ ModuleAnalysisManager MAM;
+ const Vocabulary *Vocab = nullptr;
+
+public:
+ explicit IR2VecTool(Module &M) : M(M) {}
+
+ /// Initialize the IR2Vec vocabulary analysis
+ bool initializeVocabulary() {
+ // Register and run the IR2Vec vocabulary analysis
+ // The vocabulary file path is specified via --ir2vec-vocab-path global
+ // option
+ MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+ MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
+ Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
+ return Vocab->isValid();
+ }
+
+ /// Generate triplets for the entire module
+ void generateTriplets(raw_ostream &OS) const {
+ for (const Function &F : M)
+ generateTriplets(F, OS);
+ }
+
+ /// Generate triplets for a single function
+ void generateTriplets(const Function &F, raw_ostream &OS) const {
+ if (F.isDeclaration())
+ return;
+
+ std::string LocalOutput;
+ raw_string_ostream LocalOS(LocalOutput);
+
+ for (const BasicBlock &BB : F)
+ traverseBasicBlock(BB, LocalOS);
+
+ LocalOS.flush();
+ OS << LocalOutput;
+ }
+
+ /// Generate embeddings for the entire module
+ void generateEmbeddings(raw_ostream &OS) const {
+ if (!Vocab->isValid()) {
+ OS << "Error: Vocabulary is not valid. IR2VecTool not initialized.\n";
+ return;
+ }
+
+ for (const Function &F : M)
+ generateEmbeddings(F, OS);
+ }
+
+ /// Generate embeddings for a single function
+ void generateEmbeddings(const Function &F, raw_ostream &OS) const {
+ if (F.isDeclaration()) {
+ OS << "Function " << F.getName() << " is a declaration, skipping.\n";
+ return;
+ }
+
+ // Create embedder for this function
+ assert(Vocab->isValid() && "Vocabulary is not valid");
+ auto Emb = Embedder::create(IR2VecKind::Symbolic, F, *Vocab);
+ if (!Emb) {
+ OS << "Error: Failed to create embedder for function " << F.getName()
+ << "\n";
+ return;
+ }
+
+ OS << "Function: " << F.getName() << "\n";
+
+ // Generate embeddings based on the specified level
+ switch (Level) {
+ case FunctionLevel: {
+ Emb->getFunctionVector().print(OS);
+ break;
+ }
+ case BasicBlockLevel: {
+ const auto &BBVecMap = Emb->getBBVecMap();
+ for (const BasicBlock &BB : F) {
+ auto It = BBVecMap.find(&BB);
+ if (It != BBVecMap.end()) {
+ OS << BB.getName() << ":";
+ It->second.print(OS);
+ }
+ }
+ break;
+ }
+ case InstructionLevel: {
+ const auto &InstMap = Emb->getInstVecMap();
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ auto It = InstMap.find(&I);
+ if (It != InstMap.end()) {
+ I.print(OS);
+ It->second.print(OS);
+ }
+ }
+ }
+ break;
+ }
+ }
+ }
+
+private:
+ /// Process a single basic block for triplet generation
+ void traverseBasicBlock(const BasicBlock &BB, raw_string_ostream &OS) const {
+ // Consider only non-debug and non-pseudo instructions
+ for (const auto &I : BB.instructionsWithoutDebug()) {
+ StringRef OpcStr = Vocabulary::getVocabKeyForOpcode(I.getOpcode());
+ StringRef TypeStr =
+ Vocabulary::getVocabKeyForTypeID(I.getType()->getTypeID());
+
+ OS << '\n' << OpcStr << ' ' << TypeStr << ' ';
+
+ LLVM_DEBUG({
+ I.print(dbgs());
+ dbgs() << "\n";
+ I.getType()->print(dbgs());
+ dbgs() << " Type\n";
+ });
+
+ for (const Use &U : I.operands())
+ OS << Vocabulary::getVocabKeyForOperandKind(
+ Vocabulary::getOperandKind(U.get()))
+ << ' ';
+ }
+ }
+};
+
+Error processModule(Module &M, raw_ostream &OS) {
+ IR2VecTool Tool(M);
+
+ if (Mode == EmbeddingMode) {
+ // Initialize vocabulary for embedding generation
+ // Note: Requires --ir2vec-vocab-path option to be set
+ if (!Tool.initializeVocabulary())
+ return createStringError(
+ errc::invalid_argument,
+ "Failed to initialize IR2Vec vocabulary. "
+ "Make sure to specify --ir2vec-vocab-path for embedding mode.");
+
+ if (!FunctionName.empty()) {
+ // Process single function
+ if (const Function *F = M.getFunction(FunctionName))
+ Tool.generateEmbeddings(*F, OS);
+ else
+ return createStringError(errc::invalid_argument,
+ "Function '%s' not found",
+ FunctionName.c_str());
+ } else {
+ // Process all functions
+ Tool.generateEmbeddings(OS);
+ }
+ } else {
+ // Triplet generation mode - no vocabulary needed
+ if (!FunctionName.empty())
+ // Process single function
+ if (const Function *F = M.getFunction(FunctionName))
+ Tool.generateTriplets(*F, OS);
+ else
+ return createStringError(errc::invalid_argument,
+ "Function '%s' not found",
+ FunctionName.c_str());
+ else
+ // Process all functions
+ Tool.generateTriplets(OS);
+ }
+ return Error::success();
+}
+} // namespace
+} // namespace ir2vec
+} // namespace llvm
+
+int main(int argc, char **argv) {
+ using namespace llvm;
+ using namespace llvm::ir2vec;
+
+ InitLLVM X(argc, argv);
+ cl::HideUnrelatedOptions(IR2VecToolCategory);
+ cl::ParseCommandLineOptions(
+ argc, argv,
+ "IR2Vec - Embedding Generation Tool\n"
+ "Generates embeddings for a given LLVM IR and "
+ "supports triplet generation for vocabulary "
+ "training and embedding generation.\n\n"
+ "See https://llvm.org/docs/CommandGuide/llvm-ir2vec.html for more "
+ "information.\n");
+
+ // Validate command line options
+ if (Mode == TripletMode && Level.getNumOccurrences() > 0)
+ errs() << "Warning: --level option is ignored in triplet mode\n";
+
+ // Parse the input LLVM IR file or stdin
+ SMDiagnostic Err;
+ LLVMContext Context;
+ std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
+ if (!M) {
+ Err.print(argv[0], errs());
+ return 1;
+ }
+
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename, EC);
+ if (EC) {
+ errs() << "Error opening output file: " << EC.message() << "\n";
+ return 1;
+ }
+
+ if (Error Err = processModule(*M, OS)) {
+ handleAllErrors(std::move(Err), [&](const ErrorInfoBase &EIB) {
+ errs() << "Error: " << EIB.message() << "\n";
+ });
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 0d20959..175f77c 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -308,6 +308,8 @@ static const StringMap<MachineInfo> TargetMap{
// RISC-V
{"elf32-littleriscv", {ELF::EM_RISCV, false, true}},
{"elf64-littleriscv", {ELF::EM_RISCV, true, true}},
+ {"elf32-bigriscv", {ELF::EM_RISCV, false, false}},
+ {"elf64-bigriscv", {ELF::EM_RISCV, true, false}},
// PowerPC
{"elf32-powerpc", {ELF::EM_PPC, false, false}},
{"elf32-powerpcle", {ELF::EM_PPC, false, true}},