From a0f371a106f003bb6575bc8f19ab82121d8bc1c4 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 17 Apr 2017 17:51:36 +0000 Subject: Bitcode: Add a string table to the bitcode format. Add a top-level STRTAB block containing a string table blob, and start storing strings for module codes FUNCTION, GLOBALVAR, ALIAS, IFUNC and COMDAT in the string table. This change allows us to share names between globals and comdats as well as between modules, and improves the efficiency of loading bitcode files by no longer using a bit encoding for symbol names. Once we start writing the irsymtab to the bitcode file we will also be able to share strings between it and the module. On my machine, link time for Chromium for Linux with ThinLTO decreases by about 7% for no-op incremental builds or about 1% for full builds. Total bitcode file size decreases by about 3%. As discussed on llvm-dev: http://lists.llvm.org/pipermail/llvm-dev/2017-April/111732.html Differential Revision: https://reviews.llvm.org/D31838 llvm-svn: 300464 --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 315 ++++++++++++++++++++++-------- 1 file changed, 237 insertions(+), 78 deletions(-) (limited to 'llvm/lib/Bitcode/Reader/BitcodeReader.cpp') diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 24ab7e9..2de6bcc 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -372,15 +372,27 @@ Expected readTriple(BitstreamCursor &Stream) { class BitcodeReaderBase { protected: - BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) { + BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab) + : Stream(std::move(Stream)), Strtab(Strtab) { this->Stream.setBlockInfo(&BlockInfo); } BitstreamBlockInfo BlockInfo; BitstreamCursor Stream; + StringRef Strtab; + + /// In version 2 of the bitcode we store names of global values and comdats in + /// a string table rather than in the VST. + bool UseStrtab = false; Expected parseVersionRecord(ArrayRef Record); + /// If this module uses a string table, pop the reference to the string table + /// and return the referenced string and the rest of the record. Otherwise + /// just return the record itself. + std::pair> + readNameFromStrtab(ArrayRef Record); + bool readBlockInfo(); // Contains an arbitrary and optional string identifying the bitcode producer @@ -402,11 +414,22 @@ BitcodeReaderBase::parseVersionRecord(ArrayRef Record) { if (Record.size() < 1) return error("Invalid record"); unsigned ModuleVersion = Record[0]; - if (ModuleVersion > 1) + if (ModuleVersion > 2) return error("Invalid value"); + UseStrtab = ModuleVersion >= 2; return ModuleVersion; } +std::pair> +BitcodeReaderBase::readNameFromStrtab(ArrayRef Record) { + if (!UseStrtab) + return {"", Record}; + // Invalid reference. Let the caller complain about the record being empty. + if (Record[0] + Record[1] > Strtab.size()) + return {"", {}}; + return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)}; +} + class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; @@ -492,8 +515,8 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { std::vector BundleTags; public: - BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification, - LLVMContext &Context); + BitcodeReader(BitstreamCursor Stream, StringRef Strtab, + StringRef ProducerIdentification, LLVMContext &Context); Error materializeForwardReferencedFunctions(); @@ -628,7 +651,10 @@ private: Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); + void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, + ArrayRef Record); Error parseValueSymbolTable(uint64_t Offset = 0); + Error parseGlobalValueSymbolTable(); Error parseConstants(); Error rememberAndSkipFunctionBodies(); Error rememberAndSkipFunctionBody(); @@ -681,12 +707,15 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase { std::string SourceFileName; public: - ModuleSummaryIndexBitcodeReader( - BitstreamCursor Stream, ModuleSummaryIndex &TheIndex); + ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, + ModuleSummaryIndex &TheIndex); Error parseModule(StringRef ModulePath); private: + void setValueGUID(uint64_t ValueID, StringRef ValueName, + GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName); Error parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap); @@ -716,10 +745,10 @@ std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, return std::error_code(); } -BitcodeReader::BitcodeReader(BitstreamCursor Stream, +BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) - : BitcodeReaderBase(std::move(Stream)), Context(Context), + : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), ValueList(Context) { this->ProducerIdentification = ProducerIdentification; } @@ -1749,6 +1778,54 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset, return CurrentBit; } +void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, + Function *F, + ArrayRef Record) { + // Note that we subtract 1 here because the offset is relative to one word + // before the start of the identification or module block, which was + // historically always the start of the regular bitcode header. + uint64_t FuncWordOffset = Record[1] - 1; + uint64_t FuncBitOffset = FuncWordOffset * 32; + DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; + // Set the LastFunctionBlockBit to point to the last function block. + // Later when parsing is resumed after function materialization, + // we can simply skip that last function block. + if (FuncBitOffset > LastFunctionBlockBit) + LastFunctionBlockBit = FuncBitOffset; +} + +/// Read a new-style GlobalValue symbol table. +Error BitcodeReader::parseGlobalValueSymbolTable() { + unsigned FuncBitcodeOffsetDelta = + Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; + + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Record: + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + case bitc::VST_CODE_FNENTRY: // [valueid, offset] + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, + cast(ValueList[Record[0]]), Record); + break; + } + } +} + /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { @@ -1756,8 +1833,18 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). - if (Offset > 0) + if (Offset > 0) { CurrentBit = jumpToValueSymbolTable(Offset, Stream); + // If this module uses a string table, read this as a module-level VST. + if (UseStrtab) { + if (Error Err = parseGlobalValueSymbolTable()) + return Err; + Stream.JumpToBit(CurrentBit); + return Error::success(); + } + // Otherwise, the VST will be in a similar format to a function-level VST, + // and will contain symbol names. + } // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that @@ -1818,23 +1905,10 @@ Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { return Err; Value *V = ValOrErr.get(); - auto *F = dyn_cast(V); // Ignore function offsets emitted for aliases of functions in older // versions of LLVM. - if (!F) - break; - - // Note that we subtract 1 here because the offset is relative to one word - // before the start of the identification or module block, which was - // historically always the start of the regular bitcode header. - uint64_t FuncWordOffset = Record[1] - 1; - uint64_t FuncBitOffset = FuncWordOffset * 32; - DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; - // Set the LastFunctionBlockBit to point to the last function block. - // Later when parsing is resumed after function materialization, - // we can simply skip that last function block. - if (FuncBitOffset > LastFunctionBlockBit) - LastFunctionBlockBit = FuncBitOffset; + if (auto *F = dyn_cast(V)) + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record); break; } case bitc::VST_CODE_BBENTRY: { @@ -2626,15 +2700,24 @@ bool BitcodeReaderBase::readBlockInfo() { } Error BitcodeReader::parseComdatRecord(ArrayRef Record) { - // [selection_kind, name] - if (Record.size() < 2) + // v1: [selection_kind, name] + // v2: [strtab_offset, strtab_size, selection_kind] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 1) return error("Invalid record"); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); - std::string Name; - unsigned ComdatNameSize = Record[1]; - Name.reserve(ComdatNameSize); - for (unsigned i = 0; i != ComdatNameSize; ++i) - Name += (char)Record[2 + i]; + std::string OldFormatName; + if (!UseStrtab) { + if (Record.size() < 2) + return error("Invalid record"); + unsigned ComdatNameSize = Record[1]; + OldFormatName.reserve(ComdatNameSize); + for (unsigned i = 0; i != ComdatNameSize; ++i) + OldFormatName += (char)Record[2 + i]; + Name = OldFormatName; + } Comdat *C = TheModule->getOrInsertComdat(Name); C->setSelectionKind(SK); ComdatList.push_back(C); @@ -2642,9 +2725,13 @@ Error BitcodeReader::parseComdatRecord(ArrayRef Record) { } Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { - // [pointer type, isconst, initid, linkage, alignment, section, + // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, - // dllstorageclass, comdat] + // dllstorageclass, comdat] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 6) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2692,7 +2779,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { ExternallyInitialized = Record[9]; GlobalVariable *NewGV = - new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name, nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) @@ -2724,9 +2811,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { } Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { - // [type, callingconv, isproto, linkage, paramattr, alignment, section, + // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, - // prefixdata] + // prefixdata] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 8) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2742,7 +2833,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { return error("Invalid calling convention ID"); Function *Func = - Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); + Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule); Func->setCallingConv(CC); bool isProto = Record[2]; @@ -2810,11 +2901,15 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { Error BitcodeReader::parseGlobalIndirectSymbolRecord( unsigned BitCode, ArrayRef Record) { - // ALIAS_OLD: [alias type, aliasee val#, linkage] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, - // dllstorageclass] - // IFUNC: [alias type, addrspace, aliasee val#, linkage, - // visibility, dllstorageclass] + // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST) + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, + // dllstorageclass] (name in VST) + // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage, + // visibility, dllstorageclass] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); @@ -2839,10 +2934,10 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( GlobalIndirectSymbol *NewGA; if (BitCode == bitc::MODULE_CODE_ALIAS || BitCode == bitc::MODULE_CODE_ALIAS_OLD) - NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, TheModule); else - NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, nullptr, TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. @@ -4570,8 +4665,8 @@ std::vector BitcodeReader::getIdentifiedStructTypes() const { } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( - BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex) - : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {} + BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex) + : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {} std::pair ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { @@ -4580,12 +4675,32 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { return VGI->second; } +void ModuleSummaryIndexBitcodeReader::setValueGUID( + uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName) { + std::string GlobalId = + GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); + auto ValueGUID = GlobalValue::getGUID(GlobalId); + auto OriginalNameID = ValueGUID; + if (GlobalValue::isLocalLinkage(Linkage)) + OriginalNameID = GlobalValue::getGUID(ValueName); + if (PrintSummaryGUIDs) + dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " + << ValueName << "\n"; + ValueIdToCallGraphGUIDMap[ValueID] = + std::make_pair(ValueGUID, OriginalNameID); +} + // Specialized value symbol table parser used when reading module index // blocks where we don't actually create global values. The parsed information // is saved in the bitcode reader for use when later parsing summaries. Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap) { + // With a strtab the VST is not required to parse the summary. + if (UseStrtab) + return Error::success(); + assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); @@ -4627,17 +4742,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string GlobalId = - GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); - auto ValueGUID = GlobalValue::getGUID(GlobalId); - auto OriginalNameID = ValueGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(ValueGUID, OriginalNameID); + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4651,18 +4756,7 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier( - ValueName, VLI->second, SourceFileName); - auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId); - auto OriginalNameID = FunctionGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(FunctionGUID, OriginalNameID); - + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4749,6 +4843,11 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { switch (BitCode) { default: break; // Default behavior, ignore unknown content. + case bitc::MODULE_CODE_VERSION: { + if (Error Err = parseVersionRecord(Record).takeError()) + return Err; + break; + } /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; @@ -4783,17 +4882,26 @@ Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) { // was historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; - // GLOBALVAR: [pointer type, isconst, initid, linkage, ...] - // FUNCTION: [type, callingconv, isproto, linkage, ...] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...] + // v1 FUNCTION: [type, callingconv, isproto, linkage, ...] + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v2: [strtab offset, strtab size, v1] case bitc::MODULE_CODE_GLOBALVAR: case bitc::MODULE_CODE_FUNCTION: case bitc::MODULE_CODE_ALIAS: { - if (Record.size() <= 3) + StringRef Name; + ArrayRef GVRecord; + std::tie(Name, GVRecord) = readNameFromStrtab(Record); + if (GVRecord.size() <= 3) return error("Invalid record"); - uint64_t RawLinkage = Record[3]; + uint64_t RawLinkage = GVRecord[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - ValueIdToLinkageMap[ValueId++] = Linkage; + if (!UseStrtab) { + ValueIdToLinkageMap[ValueId++] = Linkage; + break; + } + + setValueGUID(ValueId++, Name, Linkage, SourceFileName); break; } } @@ -4904,6 +5012,12 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary( switch (BitCode) { default: // Default behavior: ignore. break; + case bitc::FS_VALUE_GUID: { // [valueid, refguid] + uint64_t ValueID = Record[0]; + GlobalValue::GUID RefGUID = Record[1]; + ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID); + break; + } // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, @@ -5208,6 +5322,35 @@ const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } +static Expected readStrtab(BitstreamCursor &Stream) { + if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) + return error("Invalid record"); + + StringRef Strtab; + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::EndBlock: + return Strtab; + + case BitstreamEntry::Error: + return error("Malformed block"); + + case BitstreamEntry::SubBlock: + if (Stream.SkipBlock()) + return error("Malformed block"); + break; + + case BitstreamEntry::Record: + StringRef Blob; + SmallVector Record; + if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + Strtab = Blob; + break; + } + } +} + //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// @@ -5260,6 +5403,22 @@ llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { continue; } + if (Entry.ID == bitc::STRTAB_BLOCK_ID) { + Expected Strtab = readStrtab(Stream); + if (!Strtab) + return Strtab.takeError(); + // This string table is used by every preceding bitcode module that does + // not have its own string table. A bitcode file may have multiple + // string tables if it was created by binary concatenation, for example + // with "llvm-cat -b". + for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + if (!I->Strtab.empty()) + break; + I->Strtab = *Strtab; + } + continue; + } + if (Stream.SkipBlock()) return error("Malformed block"); continue; @@ -5296,8 +5455,8 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, } Stream.JumpToBit(ModuleBit); - auto *R = - new BitcodeReader(std::move(Stream), ProducerIdentification, Context); + auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification, + Context); std::unique_ptr M = llvm::make_unique(ModuleIdentifier, Context); @@ -5332,7 +5491,7 @@ Expected> BitcodeModule::getSummary() { Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique(); - ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index); + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index); if (Error Err = R.parseModule(ModuleIdentifier)) return std::move(Err); -- cgit v1.1