From 6565a0d4b2c98722eb8fee9093cdde4f37928986 Mon Sep 17 00:00:00 2001 From: "Duncan P. N. Exon Smith" Date: Sun, 27 Mar 2016 23:17:54 +0000 Subject: Reapply ~"Bitcode: Collect all MDString records into a single blob" Spiritually reapply commit r264409 (reverted in r264410), albeit with a bit of a redesign. Firstly, avoid splitting the big blob into multiple chunks of strings. r264409 imposed an arbitrary limit to avoid a massive allocation on the shared 'Record' SmallVector. The bug with that commit only reproduced when there were more than "chunk-size" strings. A test for this would have been useless long-term, since we're liable to adjust the chunk-size in the future. Thus, eliminate the motivation for chunk-ing by storing the string sizes in the blob. Here's the layout: vbr6: # of strings vbr6: offset-to-blob blob: [vbr6]: string lengths [char]: concatenated strings Secondly, make the output of llvm-bcanalyzer readable. I noticed when debugging r264409 that llvm-bcanalyzer was outputting a massive blob all in one line. Past a small number, the strings were impossible to split in my head, and the lines were way too long. This version adds support in llvm-bcanalyzer for pretty-printing. num-strings = 3 { 'abc' 'def' 'ghi' } From the original commit: Inspired by Mehdi's similar patch, http://reviews.llvm.org/D18342, this should (a) slightly reduce bitcode size, since there is less record overhead, and (b) greatly improve reading speed, since blobs are super cheap to deserialize. llvm-svn: 264551 --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 72 ++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 24 deletions(-) (limited to 'llvm/lib/Bitcode/Writer/BitcodeWriter.cpp') diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 8284d55..80da91a 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1347,31 +1347,65 @@ static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE, } } +static unsigned createMetadataStringsAbbrev(BitstreamWriter &Stream) { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + return Stream.EmitAbbrev(Abbv); +} + +/// Write out a record for MDString. +/// +/// All the metadata strings in a metadata block are emitted in a single +/// record. The sizes and strings themselves are shoved into a blob. +static void writeMetadataStrings(ArrayRef Strings, + BitstreamWriter &Stream, + SmallVectorImpl &Record) { + if (Strings.empty()) + return; + + // Start the record with the number of strings. + Record.push_back(bitc::METADATA_STRINGS); + Record.push_back(Strings.size()); + + // Emit the sizes of the strings in the blob. + SmallString<256> Blob; + { + BitstreamWriter W(Blob); + for (const Metadata *MD : Strings) + W.EmitVBR(cast(MD)->getLength(), 6); + W.FlushToWord(); + } + + // Add the offset to the strings to the record. + Record.push_back(Blob.size()); + + // Add the strings to the blob. + for (const Metadata *MD : Strings) + Blob.append(cast(MD)->getString()); + + // Emit the final record. + Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(Stream), Record, Blob); + Record.clear(); +} + static void WriteModuleMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - const auto &MDs = VE.getMDs(); - if (MDs.empty() && M.named_metadata_empty()) + if (VE.getMDs().empty() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - unsigned MDSAbbrev = 0; - if (VE.hasMDString()) { - // Abbrev for METADATA_STRING. - BitCodeAbbrev *Abbv = new BitCodeAbbrev(); - Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - MDSAbbrev = Stream.EmitAbbrev(Abbv); - } - // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" SmallVector Record; - for (const Metadata *MD : MDs) { + writeMetadataStrings(VE.getMDStrings(), Stream, Record); + for (const Metadata *MD : VE.getNonMDStrings()) { if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); @@ -1385,17 +1419,7 @@ static void WriteModuleMetadata(const Module &M, #include "llvm/IR/Metadata.def" } } - if (const auto *MDC = dyn_cast(MD)) { - WriteValueAsMetadata(MDC, VE, Stream, Record); - continue; - } - const MDString *MDS = cast(MD); - // Code: [strchar x N] - Record.append(MDS->bytes_begin(), MDS->bytes_end()); - - // Emit the finished record. - Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev); - Record.clear(); + WriteValueAsMetadata(cast(MD), VE, Stream, Record); } writeNamedMetadata(M, VE, Stream, Record); -- cgit v1.1