//===------------- llubi.cpp - LLVM UB-aware Interpreter --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This utility provides an UB-aware interpreter for programs in LLVM bitcode. // It is not built on top of the existing ExecutionEngine interface, but instead // implements its own value representation, state tracking and interpreter loop. // //===----------------------------------------------------------------------===// #include "lib/Context.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt InputFile(cl::desc(""), cl::Positional, cl::init("-")); static cl::list InputArgv(cl::ConsumeAfter, cl::desc("...")); static cl::opt EntryFunc("entry-function", cl::desc("Specify the entry function (default = 'main') " "of the executable"), cl::value_desc("function"), cl::init("main")); static cl::opt FakeArgv0("fake-argv0", cl::desc("Override the 'argv[0]' value passed into the executing" " program"), cl::value_desc("executable")); static cl::opt Verbose("verbose", cl::desc("Print results for each instruction executed."), cl::init(false)); cl::OptionCategory InterpreterCategory("Interpreter Options"); static cl::opt MaxMem( "max-mem", cl::desc("Max amount of memory (in bytes) that can be allocated by the" " program, including stack, heap, and global variables." " Set to 0 to disable the limit."), cl::value_desc("N"), cl::init(0), cl::cat(InterpreterCategory)); static cl::opt MaxSteps("max-steps", cl::desc("Max number of instructions executed." " Set to 0 to disable the limit."), cl::value_desc("N"), cl::init(0), cl::cat(InterpreterCategory)); static cl::opt MaxStackDepth( "max-stack-depth", cl::desc("Max stack depth (default = 256). Set to 0 to disable the limit."), cl::value_desc("N"), cl::init(256), cl::cat(InterpreterCategory)); static cl::opt VScale("vscale", cl::desc("The value of llvm.vscale (default = 4)"), cl::value_desc("N"), cl::init(4), cl::cat(InterpreterCategory)); class VerboseEventHandler : public ubi::EventHandler { public: bool onInstructionExecuted(Instruction &I, const ubi::AnyValue &Result) override { if (Result.isNone()) { errs() << I << '\n'; } else { errs() << I << " => " << Result << '\n'; } return true; } void onImmediateUB(StringRef Msg) override { errs() << "Immediate UB detected: " << Msg << '\n'; } bool onBBJump(Instruction &I, BasicBlock &To) override { errs() << I << " jump to "; To.printAsOperand(errs(), /*PrintType=*/false); return true; } bool onFunctionEntry(Function &F, ArrayRef Args, CallBase *CallSite) override { errs() << "Entering function: " << F.getName() << '\n'; size_t ArgSize = F.arg_size(); for (auto &&[Idx, Arg] : enumerate(Args)) { if (Idx >= ArgSize) errs() << " vaarg[" << (Idx - ArgSize) << "] = " << Arg << '\n'; else errs() << " " << *F.getArg(Idx) << " = " << Arg << '\n'; } return true; } bool onFunctionExit(Function &F, const ubi::AnyValue &RetVal) override { errs() << "Exiting function: " << F.getName() << '\n'; return true; } void onUnrecognizedInstruction(Instruction &I) override { errs() << "Unrecognized instruction: " << I << '\n'; } }; int main(int argc, char **argv) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv, "llvm ub-aware interpreter\n"); if (EntryFunc.empty()) { WithColor::error() << "--entry-function name cannot be empty\n"; return 1; } LLVMContext Context; // Load the bitcode... SMDiagnostic Err; std::unique_ptr Owner = parseIRFile(InputFile, Err, Context); Module *Mod = Owner.get(); if (!Mod) { Err.print(argv[0], errs()); return 1; } // If the user specifically requested an argv[0] to pass into the program, // do it now. if (!FakeArgv0.empty()) { InputFile = static_cast(FakeArgv0); } else { // Otherwise, if there is a .bc suffix on the executable strip it off, it // might confuse the program. if (StringRef(InputFile).ends_with(".bc")) InputFile.erase(InputFile.length() - 3); } // Add the module's name to the start of the vector of arguments to main(). InputArgv.insert(InputArgv.begin(), InputFile); // Initialize the execution context and set parameters. ubi::Context Ctx(*Mod); Ctx.setMemoryLimit(MaxMem); Ctx.setVScale(VScale); Ctx.setMaxSteps(MaxSteps); Ctx.setMaxStackDepth(MaxStackDepth); // Call the main function from M as if its signature were: // int main (int argc, char **argv) // using the contents of Args to determine argc & argv Function *EntryFn = Mod->getFunction(EntryFunc); if (!EntryFn) { WithColor::error() << '\'' << EntryFunc << "\' function not found in module.\n"; return 1; } TargetLibraryInfo TLI(Ctx.getTLIImpl()); Type *IntTy = IntegerType::get(Ctx.getContext(), TLI.getIntSize()); auto *MainFuncTy = FunctionType::get( IntTy, {IntTy, PointerType::getUnqual(Ctx.getContext())}, false); SmallVector Args; if (EntryFn->getFunctionType() == MainFuncTy) { Args.push_back( Ctx.getConstantValue(ConstantInt::get(IntTy, InputArgv.size()))); uint32_t PtrSize = Ctx.getDataLayout().getPointerSize(); uint64_t PtrsSize = PtrSize * (InputArgv.size() + 1); auto ArgvPtrsMem = Ctx.allocate(PtrsSize, 8, "argv", /*AS=*/0, ubi::MemInitKind::Zeroed); if (!ArgvPtrsMem) { WithColor::error() << "Failed to allocate memory for argv pointers.\n"; return 1; } for (const auto &[Idx, Arg] : enumerate(InputArgv)) { uint64_t Size = Arg.length() + 1; auto ArgvStrMem = Ctx.allocate(Size, 8, "argv_str", /*AS=*/0, ubi::MemInitKind::Zeroed); if (!ArgvStrMem) { WithColor::error() << "Failed to allocate memory for argv strings.\n"; return 1; } ubi::Pointer ArgPtr = Ctx.deriveFromMemoryObject(ArgvStrMem); ArgvStrMem->writeRawBytes(0, Arg.c_str(), Arg.length()); ArgvPtrsMem->writePointer(Idx * PtrSize, ArgPtr, Ctx.getDataLayout()); } Args.push_back(Ctx.deriveFromMemoryObject(ArgvPtrsMem)); } else if (!EntryFn->arg_empty()) { // If the signature does not match (e.g., llvm-reduce change the signature // of main), it will pass null values for all arguments. WithColor::warning() << "The signature of function '" << EntryFunc << "' does not match 'int main(int, char**)', passing null values for " "all arguments.\n"; Args.reserve(EntryFn->arg_size()); for (Argument &Arg : EntryFn->args()) Args.push_back(ubi::AnyValue::getNullValue(Ctx, Arg.getType())); } ubi::EventHandler NoopHandler; VerboseEventHandler VerboseHandler; ubi::AnyValue RetVal; if (!Ctx.runFunction(*EntryFn, Args, RetVal, Verbose ? VerboseHandler : NoopHandler)) { WithColor::error() << "Execution of function '" << EntryFunc << "' failed.\n"; return 1; } // If the function returns an integer, return that as the exit code. if (EntryFn->getReturnType()->isIntegerTy()) { assert(!RetVal.isNone() && "Expected a return value from entry function"); if (RetVal.isPoison()) { WithColor::error() << "Execution of function '" << EntryFunc << "' resulted in poison return value.\n"; return 1; } APInt Result = RetVal.asInteger(); return (int)Result.extractBitsAsZExtValue( std::min(Result.getBitWidth(), 8U), 0); } return 0; }