[codeview,pdb] Try really hard to conserve memory when reading.

PDBs can be extremely large. We're already mapping the entire PDB into the process's address space, but to make matters worse the blocks of the PDB are not arranged contiguously. So, when we have something like an array or a string embedded into the stream, we have to make a copy. Since it's convenient to use traditional data structures to iterate and manipulate these records, we need the memory to be contiguous. As a result of this, we were using roughly twice as much memory as the file size of the PDB, because every stream was copied out and re-stitched together contiguously. This patch addresses this by improving the MappedBlockStream to allocate from a BumpPtrAllocator only when a read requires a discontiguous read. Furthermore, it introduces some data structures backed by a stream which can iterate over both fixed and variable length records of a PDB. Since everything is backed by a stream and not a buffer, we can read almost everything from the PDB with zero copies. Differential Revision: http://reviews.llvm.org/D20654 Reviewed By: ruiu llvm-svn: 270951
author: Zachary Turner <zturner@google.com> 2016-05-27 01:54:44 +0000
committer: Zachary Turner <zturner@google.com> 2016-05-27 01:54:44 +0000
commit: 8dbe3629a09a754464f1420ce3059676c986090a (patch)
tree: a332a7c4b100869604af7066e58aa664ac623289 /llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
parent: bd8e9542163f4218b6ad52df8c143c06263ff4a2 (diff)
download: llvm-8dbe3629a09a754464f1420ce3059676c986090a.zip
llvm-8dbe3629a09a754464f1420ce3059676c986090a.tar.gz
llvm-8dbe3629a09a754464f1420ce3059676c986090a.tar.bz2
1 files changed, 19 insertions, 11 deletions
diff --git a/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
index a542a51..7eae748 100644
--- a/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
+++ b/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
@@ -84,28 +84,28 @@ Error NameHashTable::load(codeview::StreamReader &Stream) {
     support::ulittle32_t ByteSize;
   };
 
-  Header H;
-  if (auto EC = Stream.readObject(&H))
+  const Header *H;
+  if (auto EC = Stream.readObject(H))
     return EC;
 
-  if (H.Signature != 0xEFFEEFFE)
+  if (H->Signature != 0xEFFEEFFE)
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Invalid hash table signature");
-  if (H.HashVersion != 1 && H.HashVersion != 2)
+  if (H->HashVersion != 1 && H->HashVersion != 2)
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Unsupported hash version");
 
-  Signature = H.Signature;
-  HashVersion = H.HashVersion;
-  if (auto EC = NamesBuffer.initialize(Stream, H.ByteSize))
+  Signature = H->Signature;
+  HashVersion = H->HashVersion;
+  if (auto EC = Stream.readStreamRef(NamesBuffer, H->ByteSize))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Invalid hash table byte length");
 
-  support::ulittle32_t HashCount;
-  if (auto EC = Stream.readObject(&HashCount))
+  const support::ulittle32_t *HashCount;
+  if (auto EC = Stream.readObject(HashCount))
     return EC;
 
-  std::vector<support::ulittle32_t> BucketArray(HashCount);
+  std::vector<support::ulittle32_t> BucketArray(*HashCount);
   if (auto EC = Stream.readArray<support::ulittle32_t>(BucketArray))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Could not read bucket array");
@@ -124,7 +124,15 @@ StringRef NameHashTable::getStringForID(uint32_t ID) const {
   if (ID == IDs[0])
     return StringRef();
 
-  return StringRef(NamesBuffer.str().begin() + ID);
+  // NamesBuffer is a buffer of null terminated strings back to back.  ID is
+  // the starting offset of the string we're looking for.  So just seek into
+  // the desired offset and a read a null terminated stream from that offset.
+  StringRef Result;
+  codeview::StreamReader NameReader(NamesBuffer);
+  NameReader.setOffset(ID);
+  if (auto EC = NameReader.readZeroString(Result))
+    consumeError(std::move(EC));
+  return Result;
 }
 
 uint32_t NameHashTable::getIDForString(StringRef Str) const {
author	Zachary Turner <zturner@google.com>	2016-05-27 01:54:44 +0000
committer	Zachary Turner <zturner@google.com>	2016-05-27 01:54:44 +0000
commit	8dbe3629a09a754464f1420ce3059676c986090a (patch)
tree	a332a7c4b100869604af7066e58aa664ac623289 /llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
parent	bd8e9542163f4218b6ad52df8c143c06263ff4a2 (diff)
download	llvm-8dbe3629a09a754464f1420ce3059676c986090a.zip llvm-8dbe3629a09a754464f1420ce3059676c986090a.tar.gz llvm-8dbe3629a09a754464f1420ce3059676c986090a.tar.bz2