//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Bitcode/BitcodeReader.h"
#include "MetadataLoader.h"
#include "ValueList.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <deque>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <string>
#include <system_error>
#include <tuple>
#include <utility>
#include <vector>
using namespace llvm;
static cl::opt<bool> PrintSummaryGUIDs(
"print-summary-global-ids", cl::init(false), cl::Hidden,
cl::desc(
"Print the global id for each value when reading the module summary"));
static cl::opt<bool> ExpandConstantExprs(
"expand-constant-exprs", cl::Hidden,
cl::desc(
"Expand constant expressions to instructions for testing purposes"));
namespace {
enum {
SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
};
} // end anonymous namespace
static Error error(const Twine &Message) {
return make_error<StringError>(
Message, make_error_code(BitcodeError::CorruptedBitcode));
}
static Error hasInvalidBitcodeHeader(BitstreamCursor &Stream) {
if (!Stream.canSkipToPos(4))
return createStringError(std::errc::illegal_byte_sequence,
"file too small to contain bitcode header");
for (unsigned C : {'B', 'C'})
if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(8)) {
if (Res.get() != C)
return createStringError(std::errc::illegal_byte_sequence,
"file doesn't start with bitcode header");
} else
return Res.takeError();
for (unsigned C : {0x0, 0xC, 0xE, 0xD})
if (Expected<SimpleBitstreamCursor::word_t> Res = Stream.Read(4)) {
if (Res.get() != C)
return createStringError(std::errc::illegal_byte_sequence,
"file doesn't start with bitcode header");
} else
return Res.takeError();
return Error::success();
}
static Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
const unsigned char *BufPtr = (const unsigned char *)Buffer.getBufferStart();
const unsigned char *BufEnd = BufPtr + Buffer.getBufferSize();
if (Buffer.getBufferSize() & 3)
return error("Invalid bitcode signature");
// If we have a wrapper header, parse it and ignore the non-bc file contents.
// The magic number is 0x0B17C0DE stored in little endian.
if (isBitcodeWrapper(BufPtr, BufEnd))
if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
return error("Invalid bitcode wrapper header");
BitstreamCursor Stream(ArrayRef<uint8_t>(BufPtr, BufEnd));
if (Error Err = hasInvalidBitcodeHeader(Stream))
return std::move(Err);
return std::move(Stream);
}
/// Convert a string from a record into an std::string, return true on failure.
template <typename StrTy>
static bool convertToString(ArrayRef<uint64_t> Record, unsigned Idx,
StrTy &Result) {
if (Idx > Record.size())
return true;
Result.append(Record.begin() + Idx, Record.end());
return false;
}
// Strip all the TBAA attachment for the module.
static void stripTBAA(Module *M) {
for (auto &F : *M) {
if (F.isMaterializable())
continue;
for (auto &I : instructions(F))
I.setMetadata(LLVMContext::MD_tbaa, nullptr);
}
}
/// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the
/// "epoch" encoded in the bitcode, and return the producer name if any.
static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
if (Error Err = Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
return std::move(Err);
// Read all the records.
SmallVector<uint64_t, 64> Record;
std::string ProducerIdentification;
while (true) {
BitstreamEntry Entry;
if (Error E = Stream.advance().moveInto(Entry))
return std::move(E);
switch (Entry.Kind) {
default:
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return ProducerIdentification;
case BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Record.clear();
Expected<unsigned> MaybeBitCode = Stream.readRecord(Entry.ID, Record);
if (!MaybeBitCode)
return MaybeBitCode.takeError();
switch (MaybeBitCode.get()) {
default: // Default behavior: reject
return error("Invalid value");
case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N]
convertToString(Record, 0, ProducerIdentification);
break;
case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
unsigned epoch = (unsigned)Record[0];
if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
return error(
Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
"' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
}
}
}
}
}
static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
if (Stream.AtEndOfStream())
return "";
BitstreamEntry Entry;
if (Error E = Stream.advance().moveInto(Entry))
return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
return readIdentificationBlock(Stream);
// Ignore other sub-blocks.
if (Error Err = Stream.SkipBlock())
return std::move(Err);
continue;
case BitstreamEntry::Record:
if (Error E = Stream.skipRecord(Entry.ID).takeError())
return std::move(E);
continue;
}
}
}
static Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return std::move(Err);
SmallVector<uint64_t, 64> Record;
// Read all the records for this module.
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
return MaybeEntry.takeError();
BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return false;
case BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
if (!MaybeRecord)
return MaybeRecord.takeError();
switch (MaybeRecord.get()) {
default:
break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
std::string S;
if (convertToString(Record, 0, S))
return error("Invalid section name record");
// Check for the i386 and other (x86_64, ARM) conventions
if (S.find("__DATA,__objc_catlist") != std::string::npos ||
S.find("__OBJC,__category") != std::string::npos)
return true;
break;
}
}
Record.clear();
}
llvm_unreachable("Exit infinite loop");
}
static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
BitstreamEntry Entry;
if (Error E = Stream.advance().moveInto(Entry))
return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return false;
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::MODULE_BLOCK_ID)
return hasObjCCategoryInModule(Stream);
// Ignore other sub-blocks.
if (Error Err = Stream.SkipBlock())
return std::move(Err);
continue;
case BitstreamEntry::Record:
if (Error E = Stream.skipRecord(Entry.ID).takeError())
return std::move(E);
continue;
}
}
}
static Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
return std::move(Err);
SmallVector<uint64_t, 64> Record;
std::string Triple;
// Read all the records for this module.
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
return MaybeEntry.takeError();
BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return Triple;
case BitstreamEntry::Record:
// The interesting case.
break;
}
// Read a record.
Expected<unsigned> MaybeRecord = Stream.readRecord(Entry.ID, Record);
if (!MaybeRecord)
return MaybeRecord.takeError();
switch (MaybeRecord.get()) {
default: break; // Default behavior, ignore unknown content.
case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N]
std::string S;
if (convertToString(Record, 0, S))
return error("Invalid triple record");
Triple = S;
break;
}
}
Record.clear();
}
llvm_unreachable("Exit infinite loop");
}
static Expected<std::string> readTriple(BitstreamCursor &Stream) {
// We expect a number of well-defined blocks, though we don't necessarily
// need to understand them all.
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advance();
if (!MaybeEntry)
return MaybeEntry.takeError();
BitstreamEntry Entry = MaybeEntry.get();
switch (Entry.Kind) {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
return "";
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::MODULE_BLOCK_ID)
return readModuleTriple(Stream);
// Ignore other sub-blocks.
if (Error Err = Stream.SkipBlock())
return std::move(Err);
continue;
case BitstreamEntry::Record:
if (llvm::Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
continue;
else
return Skipped.takeError();
}
}
}
namespace {
class BitcodeReaderBase {
protected:
BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab)
: Stream(std::move(Stream)), Strtab(Strtab) {
this->Stream.setBlockInfo(&BlockInfo);
}
BitstreamBlockInfo BlockInfo;
BitstreamCursor Stream;
StringRef Strtab;
/// In version 2 of the bitcode we store names of global values and comdats in
/// a string table rather than in the VST.
bool UseStrtab = false;
Expected<unsigned> parseVersionRecord(ArrayRef<uint64_t> Record);
/// If this module uses a string table, pop the reference to the string table
/// and return the referenced string and the rest of the record. Otherwise
/// just return the record itself.
std::pair<StringRef, ArrayRef<uint64_t>>
readNameFromStrtab(ArrayRef<uint64_t> Record);
Error readBlockInfo();
// Contains an arbitrary and optional string identifying the bitcode producer
std::string ProducerIdentification;
Error error(const Twine &Message);
};
} // end anonymous namespace
Error BitcodeReaderBase::error(const Twine &Message) {
std::string FullMsg = Message.str();
if (!ProducerIdentification.empty())
FullMsg += " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " +
LLVM_VERSION_STRING "')";
return ::error(FullMsg);
}
Expected<unsigned>
BitcodeReaderBase::parseVersionRecord(ArrayRef<uint64_t> Record) {
if (Record.empty())
return error("Invalid version record");
unsigned ModuleVersion = Record[0];
if (ModuleVersion > 2)
return error("Invalid value");
UseStrtab = ModuleVersion >= 2;
return ModuleVersion;
}
std::pair<StringRef, ArrayRef<uint64_t>>
BitcodeReaderBase::readNameFromStrtab(ArrayRef<uint64_t> Record) {
if (!UseStrtab)
return {"", Record};
// Invalid reference. Let the caller complain about the record being empty.
if (Record[0] + Record[1] > Strtab.size())
return {"", {}};
return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)};
}
namespace {
/// This represents a constant expression or constant aggregate using a custom
/// structure internal to the bitcode reader. Later, this structure will be
/// expanded by materializeValue() either into a constant expression/aggregate,
/// or into an instruction sequence at the point of use. This allows us to
/// upgrade bitcode using constant expressions even if this kind of constant
/// expression is no longer supported.
class BitcodeConstant final : public Value,
TrailingObjects<BitcodeConstant, unsigned> {
friend TrailingObjects;
// Value subclass ID: Pick largest possible value to avoid any clashes.
static constexpr uint8_t SubclassID = 255;
public:
// Opcodes used for non-expressions. This includes constant aggregates
// (struct, array, vector) that might need expansion, as well as non-leaf
// constants that don't need expansion (no_cfi, dso_local, blockaddress),
// but still go through BitcodeConstant to avoid different uselist orders
// between the two cases.
static constexpr uint8_t ConstantStructOpcode = 255;
static constexpr uint8_t ConstantArrayOpcode = 254;
static constexpr uint8_t ConstantVectorOpcode = 253;
static constexpr uint8_t NoCFIOpcode = 252;
static constexpr uint8_t DSOLocalEquivalentOpcode = 251;
static constexpr uint8_t BlockAddressOpcode = 250;
static constexpr uint8_t FirstSpecialOpcode = BlockAddressOpcode;
// Separate struct to make passing different number of parameters to
// BitcodeConstant::create() more convenient.
struct ExtraInfo {
uint8_t Opcode;
uint8_t Flags;
unsigned Extra;
Type *SrcElemTy;
ExtraInfo(uint8_t Opcode, uint8_t Flags = 0, unsigned Extra = 0,
Type *SrcElemTy = nullptr)
: Opcode(Opcode), Flags(Flags), Extra(Extra), SrcElemTy(SrcElemTy) {}
};
uint8_t Opcode;
uint8_t Flags;
unsigned NumOperands;
unsigned Extra; // GEP inrange index or blockaddress BB id.
Type *SrcElemTy; // GEP source element type.
private:
BitcodeConstant(Type *Ty, const ExtraInfo &Info, ArrayRef<unsigned> OpIDs)
: Value(Ty, SubclassID), Opcode(Info.Opcode), Flags(Info.Flags),
NumOperands(OpIDs.size()), Extra(Info.Extra),
SrcElemTy(Info.SrcElemTy) {
std::uninitialized_copy(OpIDs.begin(), OpIDs.end(),
getTrailingObjects<unsigned>());
}
BitcodeConstant &operator=(const BitcodeConstant &) = delete;
public:
static BitcodeConstant *create(BumpPtrAllocator &A, Type *Ty,
const ExtraInfo &Info,
ArrayRef<unsigned> OpIDs) {
void *Mem = A.Allocate(totalSizeToAlloc<unsigned>(OpIDs.size()),
alignof(BitcodeConstant));
return new (Mem) BitcodeConstant(Ty, Info, OpIDs);
}
static bool classof(const Value *V) { return V->getValueID() == SubclassID; }
ArrayRef<unsigned> getOperandIDs() const {
return makeArrayRef(getTrailingObjects<unsigned>(), NumOperands);
}
std::optional<unsigned> getInRangeIndex() const {
assert(Opcode == Instruction::GetElementPtr);
if (Extra == (unsigned)-1)
return std::nullopt;
return Extra;
}
const char *getOpcodeName() const {
return Instruction::getOpcodeName(Opcode);
}
};
class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
LLVMContext &Context;
Module *TheModule = nullptr;
// Next offset to start scanning for lazy parsing of function bodies.
uint64_t NextUnreadBit = 0;
// Last function offset found in the VST.
uint64_t LastFunctionBlockBit = 0;
bool SeenValueSymbolTable = false;
uint64_t VSTOffset = 0;
std::vector<std::string> SectionTable;
std::vector<std::string> GCTable;
std::vector<Type *> TypeList;
/// Track type IDs of contained types. Order is the same as the contained
/// types of a Type*. This is used during upgrades of typed pointer IR in
/// opaque pointer mode.
DenseMap<unsigned, SmallVector<unsigned, 1>> ContainedTypeIDs;
/// In some cases, we need to create a type ID for a type that was not
/// explicitly encoded in the bitcode, or we don't know about at the current
/// point. For example, a global may explicitly encode the value type ID, but
/// not have a type ID for the pointer to value type, for which we create a
/// virtual type ID instead. This map stores the new type ID that was created
/// for the given pair of Type and contained type ID.
DenseMap<std::pair<Type *, unsigned>, unsigned> VirtualTypeIDs;
DenseMap<Function *, unsigned> FunctionTypeIDs;
/// Allocator for BitcodeConstants. This should come before ValueList,
/// because the ValueList might hold ValueHandles to these constants, so
/// ValueList must be destroyed before Alloc.
BumpPtrAllocator Alloc;
BitcodeReaderValueList ValueList;
std::optional<MetadataLoader> MDLoader;
std::vector<Comdat *> ComdatList;
DenseSet<GlobalObject *> ImplicitComdatObjects;
SmallVector<Instruction *, 64> InstructionList;
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInits;
std::vector<std::pair<GlobalValue *, unsigned>> IndirectSymbolInits;
struct FunctionOperandInfo {
Function *F;
unsigned PersonalityFn;
unsigned Prefix;
unsigned Prologue;
};
std::vector<FunctionOperandInfo> FunctionOperands;
/// The set of attributes by index. Index zero in the file is for null, and
/// is thus not represented here. As such all indices are off by one.
std::vector<AttributeList> MAttributes;
/// The set of attribute groups.
std::map<unsigned, AttributeList> MAttributeGroups;
/// While parsing a function body, this is a list of the basic blocks for the
/// function.
std::vector<BasicBlock*> FunctionBBs;
// When reading the module header, this list is populated with functions that
// have bodies later in the file.
std::vector<Function*> FunctionsWithBodies;
// When intrinsic functions are encountered which require upgrading they are
// stored here with their replacement function.
using UpdatedIntrinsicMap = DenseMap<Function *, Function *>;
UpdatedIntrinsicMap UpgradedIntrinsics;
// Several operations happen after the module header has been read, but
// before function bodies are processed. This keeps track of whether
// we've done this yet.
bool SeenFirstFunctionBody = false;
/// When function bodies are initially scanned, this map contains info about
/// where to find deferred function body in the stream.
DenseMap<Function*, uint64_t> DeferredFunctionInfo;
/// When Metadata block is initially scanned when parsing the module, we may
/// choose to defer parsing of the metadata. This vector contains info about
/// which Metadata blocks are deferred.
std::vector<uint64_t> DeferredMetadataInfo;
/// These are basic blocks forward-referenced by block addresses. They are
/// inserted lazily into functions when they're loaded. The basic block ID is
/// its index into the vector.
DenseMap<Function *, std::vector<BasicBlock *>> BasicBlockFwdRefs;
std::deque<Function *> BasicBlockFwdRefQueue;
/// These are Functions that contain BlockAddresses which refer a different
/// Function. When parsing the different Function, queue Functions that refer
/// to the different Function. Those Functions must be materialized in order
/// to resolve their BlockAddress constants before the different Function
/// gets moved into another Module.
std::vector<Function *> BackwardRefFunctions;
/// Indicates that we are using a new encoding for instruction operands where
/// most operands in the current FUNCTION_BLOCK are encoded relative to the
/// instruction number, for a more compact encoding. Some instruction
/// operands are not relative to the instruction ID: basic block numbers, and
/// types. Once the old style function blocks have been phased out, we would
/// not need this flag.
bool UseRelativeIDs = false;
/// True if all functions will be materialized, negating the need to process
/// (e.g.) blockaddress forward references.
bool WillMaterializeAllForwardRefs = false;
bool StripDebugInfo = false;
TBAAVerifier TBAAVerifyHelper;
std::vector<std::string> BundleTags;
SmallVector<SyncScope::ID, 8> SSIDs;
public:
BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
StringRef ProducerIdentification, LLVMContext &Context);
Error materializeForwardReferencedFunctions();
Error materialize(GlobalValue *GV) override;
Error materializeModule() override;
std::vector<StructType *> getIdentifiedStructTypes() const override;
/// Main interface to parsing a bitcode buffer.
/// \returns true if an error occurred.
Error parseBitcodeInto(
Module *M, bool ShouldLazyLoadMetadata, bool IsImporting,
DataLayoutCallbackTy DataLayoutCallback);
static uint64_t decodeSignRotatedValue(uint64_t V);
/// Materialize any deferred Metadata block.
Error materializeMetadata() override;
void setStripDebugInfo() override;
private:
std::vector<StructType *> IdentifiedStructTypes;
StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
StructType *createIdentifiedStructType(LLVMContext &Context);
static constexpr unsigned InvalidTypeID = ~0u;
Type *getTypeByID(unsigned ID);
Type *getPtrElementTypeByID(unsigned ID);
unsigned getContainedTypeID(unsigned ID, unsigned Idx = 0);
unsigned getVirtualTypeID(Type *Ty, ArrayRef<unsigned> ContainedTypeIDs = {});
Expected<Value *> materializeValue(unsigned ValID, BasicBlock *InsertBB);
Expected<Constant *> getValueForInitializer(unsigned ID);
Value *getFnValueByID(unsigned ID, Type *Ty, unsigned TyID,
BasicBlock *ConstExprInsertBB) {
if (Ty && Ty->isMetadataTy())
return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
return ValueList.getValueFwdRef(ID, Ty, TyID, ConstExprInsertBB);
}
Metadata *getFnMetadataByID(unsigned ID) {
return MDLoader->getMetadataFwdRefOrLoad(ID);
}
BasicBlock *getBasicBlock(unsigned ID) const {
if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
return FunctionBBs[ID];
}
AttributeList getAttributes(unsigned i) const {
if (i-1 < MAttributes.size())
return MAttributes[i-1];
return AttributeList();
}
/// Read a value/type pair out of the specified record from slot 'Slot'.
/// Increment Slot past the number of slots used in the record. Return true on
/// failure.
bool getValueTypePair(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
unsigned InstNum, Value *&ResVal, unsigned &TypeID,
BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return true;
unsigned ValNo = (unsigned)Record[Slot++];
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
ValNo = InstNum - ValNo;
if (ValNo < InstNum) {
// If this is not a forward reference, just return the value we already
// have.
TypeID = ValueList.getTypeID(ValNo);
ResVal = getFnValueByID(ValNo, nullptr, TypeID, ConstExprInsertBB);
assert((!ResVal || ResVal->getType() == getTypeByID(TypeID)) &&
"Incorrect type ID stored for value");
return ResVal == nullptr;
}
if (Slot == Record.size())
return true;
TypeID = (unsigned)Record[Slot++];
ResVal = getFnValueByID(ValNo, getTypeByID(TypeID), TypeID,
ConstExprInsertBB);
return ResVal == nullptr;
}
/// Read a value out of the specified record from slot 'Slot'. Increment Slot
/// past the number of slots used by the value in the record. Return true if
/// there is an error.
bool popValue(const SmallVectorImpl<uint64_t> &Record, unsigned &Slot,
unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal,
BasicBlock *ConstExprInsertBB) {
if (getValue(Record, Slot, InstNum, Ty, TyID, ResVal, ConstExprInsertBB))
return true;
// All values currently take a single record slot.
++Slot;
return false;
}
/// Like popValue, but does not increment the Slot number.
bool getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty, unsigned TyID, Value *&ResVal,
BasicBlock *ConstExprInsertBB) {
ResVal = getValue(Record, Slot, InstNum, Ty, TyID, ConstExprInsertBB);
return ResVal == nullptr;
}
/// Version of getValue that returns ResVal directly, or 0 if there is an
/// error.
Value *getValue(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty, unsigned TyID,
BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)Record[Slot];
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
ValNo = InstNum - ValNo;
return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB);
}
/// Like getValue, but decodes signed VBRs.
Value *getValueSigned(const SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty, unsigned TyID,
BasicBlock *ConstExprInsertBB) {
if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
ValNo = InstNum - ValNo;
return getFnValueByID(ValNo, Ty, TyID, ConstExprInsertBB);
}
/// Upgrades old-style typeless byval/sret/inalloca attributes by adding the
/// corresponding argument's pointee type. Also upgrades intrinsics that now
/// require an elementtype attribute.
Error propagateAttributeTypes(CallBase *CB, ArrayRef<unsigned> ArgsTys);
/// Converts alignment exponent (i.e. power of two (or zero)) to the
/// corresponding alignment to use. If alignment is too large, returns
/// a corresponding error code.
Error parseAlignmentValue(uint64_t Exponent, MaybeAlign &Alignment);
Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
Error parseModule(
uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false,
DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
return std::nullopt;
});
Error parseComdatRecord(ArrayRef<uint64_t> Record);
Error parseGlobalVarRecord(ArrayRef<uint64_t> Record);
Error parseFunctionRecord(ArrayRef<uint64_t> Record);
Error parseGlobalIndirectSymbolRecord(unsigned BitCode,
ArrayRef<uint64_t> Record);
Error parseAttributeBlock();
Error parseAttributeGroupBlock();
Error parseTypeTable();
Error parseTypeTableBody();
Error parseOperandBundleTags();
Error parseSyncScopeNames();
Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
unsigned NameIndex, Triple &TT);
void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F,
ArrayRef<uint64_t> Record);
Error parseValueSymbolTable(uint64_t Offset = 0);
Error parseGlobalValueSymbolTable();
Error parseConstants();
Error rememberAndSkipFunctionBodies();
Error rememberAndSkipFunctionBody();
/// Save the positions of the Metadata blocks and skip parsing the blocks.
Error rememberAndSkipMetadata();
Error typeCheckLoadStoreInst(Type *ValType, Type *PtrType);
Error parseFunctionBody(Function *F);
Error globalCleanup();
Error resolveGlobalAndIndirectSymbolInits();
Error parseUseLists();
Error findFunctionInStream(
Function *F,
DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
SyncScope::ID getDecodedSyncScopeID(unsigned Val);
};
/// Class to manage reading and parsing function summary index bitcode
/// files/sections.
class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
/// The module index built during parsing.
ModuleSummaryIndex &TheIndex;
/// Indicates whether we have encountered a global value summary section
/// yet during parsing.
bool SeenGlobalValSummary = false;
/// Indicates whether we have already parsed the VST, used for error checking.
bool SeenValueSymbolTable = false;
/// Set to the offset of the VST recorded in the MODULE_CODE_VSTOFFSET record.
/// Used to enable on-demand parsing of the VST.
uint64_t VSTOffset = 0;
// Map to save ValueId to ValueInfo association that was recorded in the
// ValueSymbolTable. It is used after the VST is parsed to convert
// call graph edges read from the function summary from referencing
// callees by their ValueId to using the ValueInfo instead, which is how
// they are recorded in the summary index being built.
// We save a GUID which refers to the same global as the ValueInfo, but
// ignoring the linkage, i.e. for values other than local linkage they are
// identical (this is the second tuple member).
// The third tuple member is the real GUID of the ValueInfo.
DenseMap<unsigned,
std::tuple<ValueInfo, GlobalValue::GUID, GlobalValue::GUID>>
ValueIdToValueInfoMap;
/// Map populated during module path string table parsing, from the
/// module ID to a string reference owned by the index's module
/// path string table, used to correlate with combined index
/// summary records.
DenseMap<uint64_t, StringRef> ModuleIdMap;
/// Original source file name recorded in a bitcode record.
std::string SourceFileName;
/// The string identifier given to this module by the client, normally the
/// path to the bitcode file.
StringRef ModulePath;
/// For per-module summary indexes, the unique numerical identifier given to
/// this module by the client.
unsigned ModuleId;
/// Callback to ask whether a symbol is the prevailing copy when invoked
/// during combined index building.
std::function<bool(GlobalValue::GUID)> IsPrevailing;
/// Saves the stack ids from the STACK_IDS record to consult when adding stack
/// ids from the lists in the callsite and alloc entries to the index.
std::vector<uint64_t> StackIds;
public:
ModuleSummaryIndexBitcodeReader(
BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
StringRef ModulePath, unsigned ModuleId,
std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
Error parseModule();
private:
void setValueGUID(uint64_t ValueID, StringRef ValueName,
GlobalValue::LinkageTypes Linkage,
StringRef SourceFileName);
Error parseValueSymbolTable(
uint64_t Offset,
DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
std::vector<ValueInfo> makeRefList(ArrayRef<uint64_t> Record);
std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
bool IsOldProfileFormat,
bool HasProfile,
bool HasRelBF);
Error parseEntireSummary(unsigned ID);
Error parseModuleStringTable();
void parseTypeIdCompatibleVtableSummaryRecord(ArrayRef<uint64_t> Record);
void parseTypeIdCompatibleVtableInfo(ArrayRef<uint64_t> Record, size_t &Slot,
TypeIdCompatibleVtableInfo &TypeId);
std::vector<FunctionSummary::ParamAccess>
parseParamAccesses(ArrayRef<uint64_t> Record);
template <bool AllowNullValueInfo = false>
std::tuple<ValueInfo, GlobalValue::GUID, GlobalValue::GUID>
getValueInfoFromValueId(unsigned ValueId);
void addThisModule();
ModuleSummaryIndex::ModuleInfo *getThisModule();
};
} // end anonymous namespace
std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
Error Err) {
if (Err) {
std::error_code EC;
handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
EC = EIB.convertToErrorCode();
Ctx.emitError(EIB.message());
});
return EC;
}
return std::error_code();
}
BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab,
StringRef ProducerIdentification,
LLVMContext &Context)
: BitcodeReaderBase(std::move(Stream), Strtab), Context(Context),
ValueList(this->Stream.SizeInBytes(),
[this](unsigned ValID, BasicBlock *InsertBB) {
return materializeValue(ValID, InsertBB);
}) {
this->ProducerIdentification = std::string(ProducerIdentification);
}
Error BitcodeReader::materializeForwardReferencedFunctions() {
if (WillMaterializeAllForwardRefs)
return Error::success();
// Prevent recursion.
WillMaterializeAllForwardRefs = true;
while (!BasicBlockFwdRefQueue.empty()) {
Function *F = BasicBlockFwdRefQueue.front();
BasicBlockFwdRefQueue.pop_front();
assert(F && "Expected valid function");
if (!BasicBlockFwdRefs.count(F))
// Already materialized.
continue;
// Check for a function that isn't materializable to prevent an infinite
// loop. When parsing a blockaddress stored in a global variable, there
// isn't a trivial way to check if a function will have a body without a
// linear search through FunctionsWithBodies, so just check it here.
if (!F->isMaterializable())
return error("Never resolved function from blockaddress");
// Try to materialize F.
if (Error Err = materialize(F))
return Err;
}
assert(BasicBlockFwdRefs.empty() && "Function missing from queue");
for (Function *F : BackwardRefFunctions)
if (Error Err = materialize(F))
return Err;
BackwardRefFunctions.clear();
// Reset state.
WillMaterializeAllForwardRefs = false;
return Error::success();
}
//===----------------------------------------------------------------------===//
// Helper functions to implement forward reference resolution, etc.
//===----------------------------------------------------------------------===//
static bool hasImplicitComdat(size_t Val) {
switch (Val) {
default:
return false;
case 1: // Old WeakAnyLinkage
case 4: // Old LinkOnceAnyLinkage
case 10: // Old WeakODRLinkage
case 11: // Old LinkOnceODRLinkage
return true;
}
}
static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
switch (Val) {
default: // Map unknown/new linkages to external
case 0:
return GlobalValue::ExternalLinkage;
case 2:
return GlobalValue::AppendingLinkage;
case 3:
return GlobalValue::InternalLinkage;
case 5:
return GlobalValue::ExternalLinkage; // Obsolete DLLImportLinkage
case 6:
return GlobalValue::ExternalLinkage; // Obsolete DLLExportLinkage
case 7:
return GlobalValue::ExternalWeakLinkage;
case 8:
return GlobalValue::CommonLinkage;
case 9:
return GlobalValue::PrivateLinkage;
case 12:
return GlobalValue::AvailableExternallyLinkage;
case 13:
return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateLinkage
case 14:
return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateWeakLinkage
case 15:
return GlobalValue::ExternalLinkage; // Obsolete LinkOnceODRAutoHideLinkage
case 1: // Old value with implicit comdat.
case 16:
return GlobalValue::WeakAnyLinkage;
case 10: // Old value with implicit comdat.
case 17:
return GlobalValue::WeakODRLinkage;
case 4: // Old value with implicit comdat.
case 18:
return GlobalValue::LinkOnceAnyLinkage;
case 11: // Old value with implicit comdat.
case 19:
return GlobalValue::LinkOnceODRLinkage;
}
}
static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
FunctionSummary::FFlags Flags;
Flags.ReadNone = RawFlags & 0x1;
Flags.ReadOnly = (RawFlags >> 1) & 0x1;
Flags.NoRecurse = (RawFlags >> 2) & 0x1;
Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
Flags.NoInline = (RawFlags >> 4) & 0x1;
Flags.AlwaysInline = (RawFlags >> 5) & 0x1;
Flags.NoUnwind = (RawFlags >> 6) & 0x1;
Flags.MayThrow = (RawFlags >> 7) & 0x1;
Flags.HasUnknownCall = (RawFlags >> 8) & 0x1;
Flags.MustBeUnreachable = (RawFlags >> 9) & 0x1;
return Flags;
}
// Decode the flags for GlobalValue in the summary. The bits for each attribute:
//
// linkage: [0,4), notEligibleToImport: 4, live: 5, local: 6, canAutoHide: 7,
// visibility: [8, 10).
static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
uint64_t Version) {
// Summary were not emitted before LLVM 3.9, we don't need to upgrade Linkage
// like getDecodedLinkage() above. Any future change to the linkage enum and
// to getDecodedLinkage() will need to be taken into account here as above.
auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
auto Visibility = GlobalValue::VisibilityTypes((RawFlags >> 8) & 3); // 2 bits
RawFlags = RawFlags >> 4;
bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
// The Live flag wasn't introduced until version 3. For dead stripping
// to work correctly on earlier versions, we must conservatively treat all
// values as live.
bool Live = (RawFlags & 0x2) || Version < 3;
bool Local = (RawFlags & 0x4);
bool AutoHide = (RawFlags & 0x8);
return GlobalValueSummary::GVFlags(Linkage, Visibility, NotEligibleToImport,
Live, Local, AutoHide);
}
// Decode the flags for GlobalVariable in the summary
static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) {
return GlobalVarSummary::GVarFlags(
(RawFlags & 0x1) ? true : false, (RawFlags & 0x2) ? true : false,
(RawFlags & 0x4) ? true : false,
(GlobalObject::VCallVisibility)(RawFlags >> 3));
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
switch (Val) {
default: // Map unknown visibilities to default.
case 0: return GlobalValue::DefaultVisibility;
case 1: return GlobalValue::HiddenVisibility;
case 2: return GlobalValue::ProtectedVisibility;
}
}
static GlobalValue::DLLStorageClassTypes
getDecodedDLLStorageClass(unsigned Val) {
switch (Val) {
default: // Map unknown values to default.
case 0: return GlobalValue::DefaultStorageClass;
case 1: return GlobalValue::DLLImportStorageClass;
case 2: return GlobalValue::DLLExportStorageClass;
}
}
static bool getDecodedDSOLocal(unsigned Val) {
switch(Val) {
default: // Map unknown values to preemptable.
case 0: return false;
case 1: return true;
}
}
static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) {
switch (Val) {
case 0: return GlobalVariable::NotThreadLocal;
default: // Map unknown non-zero value to general dynamic.
case 1: return GlobalVariable::GeneralDynamicTLSModel;
case 2: return GlobalVariable::LocalDynamicTLSModel;
case 3: return GlobalVariable::InitialExecTLSModel;
case 4: return GlobalVariable::LocalExecTLSModel;
}
}
static GlobalVariable::UnnamedAddr getDecodedUnnamedAddrType(unsigned Val) {
switch (Val) {
default: // Map unknown to UnnamedAddr::None.
case 0: return GlobalVariable::UnnamedAddr::None;
case 1: return GlobalVariable::UnnamedAddr::Global;
case 2: return GlobalVariable::UnnamedAddr::Local;
}
}
static int getDecodedCastOpcode(unsigned Val) {
switch (Val) {
default: return -1;
case bitc::CAST_TRUNC : return Instruction::Trunc;
case bitc::CAST_ZEXT : return Instruction::ZExt;
case bitc::CAST_SEXT : return Instruction::SExt;
case bitc::CAST_FPTOUI : return Instruction::FPToUI;
case bitc::CAST_FPTOSI : return Instruction::FPToSI;
case bitc::CAST_UITOFP : return Instruction::UIToFP;
case bitc::CAST_SITOFP : return Instruction::SIToFP;
case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
case bitc::CAST_FPEXT : return Instruction::FPExt;
case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
case bitc::CAST_BITCAST : return Instruction::BitCast;
case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast;
}
}
static int getDecodedUnaryOpcode(unsigned Val, Type *Ty) {
bool IsFP = Ty->isFPOrFPVectorTy();
// UnOps are only valid for int/fp or vector of int/fp types
if (!IsFP && !Ty->isIntOrIntVectorTy())
return -1;
switch (Val) {
default:
return -1;
case bitc::UNOP_FNEG:
return IsFP ? Instruction::FNeg : -1;
}
}
static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) {
bool IsFP = Ty->isFPOrFPVectorTy();
// BinOps are only valid for int/fp or vector of int/fp types
if (!IsFP && !Ty->isIntOrIntVectorTy())
return -1;
switch (Val) {
default:
return -1;
case bitc::BINOP_ADD:
return IsFP ? Instruction::FAdd : Instruction::Add;
case bitc::BINOP_SUB:
return IsFP ? Instruction::FSub : Instruction::Sub;
case bitc::BINOP_MUL:
return IsFP ? Instruction::FMul : Instruction::Mul;
case bitc::BINOP_UDIV:
return IsFP ? -1 : Instruction::UDiv;
case bitc::BINOP_SDIV:
return IsFP ? Instruction::FDiv : Instruction::SDiv;
case bitc::BINOP_UREM:
return IsFP ? -1 : Instruction::URem;
case bitc::BINOP_SREM:
return IsFP ? Instruction::FRem : Instruction::SRem;
case bitc::BINOP_SHL:
return IsFP ? -1 : Instruction::Shl;
case bitc::BINOP_LSHR:
return IsFP ? -1 : Instruction::LShr;
case bitc::BINOP_ASHR:
return IsFP ? -1 : Instruction::AShr;
case bitc::BINOP_AND:
return IsFP ? -1 : Instruction::And;
case bitc::BINOP_OR:
return IsFP ? -1 : Instruction::Or;
case bitc::BINOP_XOR:
return IsFP ? -1 : Instruction::Xor;
}
}
static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) {
switch (Val) {
default: return AtomicRMWInst::BAD_BINOP;
case bitc::RMW_XCHG: return AtomicRMWInst::Xchg;
case bitc::RMW_ADD: return AtomicRMWInst::Add;
case bitc::RMW_SUB: return AtomicRMWInst::Sub;
case bitc::RMW_AND: return AtomicRMWInst::And;
case bitc::RMW_NAND: return AtomicRMWInst::Nand;
case bitc::RMW_OR: return AtomicRMWInst::Or;
case bitc::RMW_XOR: return AtomicRMWInst::Xor;
case bitc::RMW_MAX: return AtomicRMWInst::Max;
case bitc::RMW_MIN: return AtomicRMWInst::Min;
case bitc::RMW_UMAX: return AtomicRMWInst::UMax;
case bitc::RMW_UMIN: return AtomicRMWInst::UMin;
case bitc::RMW_FADD: return AtomicRMWInst::FAdd;
|