diff options
author | Nico Weber <thakis@chromium.org> | 2021-04-07 09:42:11 -0400 |
---|---|---|
committer | Nico Weber <thakis@chromium.org> | 2021-04-07 09:42:11 -0400 |
commit | c22b09debddbaa00be3418b47ca33446ee8a8319 (patch) | |
tree | 8c3735d78d9db49c4757d4053b2df40a3f231866 /clang/lib/Basic/SourceManager.cpp | |
parent | 302e7480655b0d1c1a3a629b4ef446bf77dcb01a (diff) | |
download | llvm-c22b09debddbaa00be3418b47ca33446ee8a8319.zip llvm-c22b09debddbaa00be3418b47ca33446ee8a8319.tar.gz llvm-c22b09debddbaa00be3418b47ca33446ee8a8319.tar.bz2 |
Revert "[clang] Speedup line offset mapping computation"
This reverts commit 6951b72334bbe4c189c71751edc1e361d7b5632c.
Breaks several bots, see comments on https://reviews.llvm.org/D99409
Diffstat (limited to 'clang/lib/Basic/SourceManager.cpp')
-rw-r--r-- | clang/lib/Basic/SourceManager.cpp | 64 |
1 files changed, 11 insertions, 53 deletions
diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 5aab2a4..cc275d4 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1252,21 +1252,12 @@ unsigned SourceManager::getPresumedColumnNumber(SourceLocation Loc, return PLoc.getColumn(); } -// Check if mutli-byte word x has bytes between m and n, included. This may also -// catch bytes equal to n + 1. -// The returned value holds a 0x80 at each byte position that holds a match. -// see http://graphics.stanford.edu/~seander/bithacks.html#HasBetweenInWord -template <class T> -static constexpr inline T likelyhasbetween(T x, unsigned char m, - unsigned char n) { - return ((x - ~0UL / 255 * (n + 1)) & ~x & - (x & ~0UL / 255 * 127) + ~0UL / 255 * (127 - (m - 1))) & - ~0UL / 255 * 128; -} +#ifdef __SSE2__ +#include <emmintrin.h> +#endif LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer, llvm::BumpPtrAllocator &Alloc) { - // Find the file offsets of all of the *physical* source lines. This does // not look at trigraphs, escaped newlines, or anything else tricky. SmallVector<unsigned, 256> LineOffsets; @@ -1277,51 +1268,18 @@ LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer, const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart(); const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd(); const std::size_t BufLen = End - Buf; - unsigned I = 0; - uint64_t Word; - - // scan sizeof(Word) bytes at a time for new lines. - // This is much faster than scanning each byte independently. - if (BufLen > sizeof(Word)) { - do { - memcpy(&Word, Buf + I, sizeof(Word)); - // no new line => jump over sizeof(Word) bytes. - auto Mask = likelyhasbetween(Word, '\n', '\r'); - if (!Mask) { - I += sizeof(Word); - continue; - } - - // At that point, Mask contains 0x80 set at each byte that holds a value - // in [\n, \r + 1 [ - - // Scan for the next newline - it's very likely there's one. - unsigned N = - llvm::countTrailingZeros(Mask) - 7; // -7 because 0x80 is the marker - Word >>= N; - I += N / 8 + 1; - unsigned char Byte = Word; - if (Byte == '\n') { - LineOffsets.push_back(I); - } else if (Byte == '\r') { + while (I < BufLen) { + // Use a fast check to catch both newlines + if (LLVM_UNLIKELY(Buf[I] <= std::max('\n', '\r'))) { + if (Buf[I] == '\n') { + LineOffsets.push_back(I + 1); + } else if (Buf[I] == '\r') { // If this is \r\n, skip both characters. - if (Buf[I] == '\n') + if (I + 1 < BufLen && Buf[I + 1] == '\n') ++I; - LineOffsets.push_back(I); + LineOffsets.push_back(I + 1); } - } while (I < BufLen - sizeof(Word) - 1); - } - - // Handle tail using a regular check. - while (I < BufLen) { - if (Buf[I] == '\n') { - LineOffsets.push_back(I + 1); - } else if (Buf[I] == '\r') { - // If this is \r\n, skip both characters. - if (I + 1 < BufLen && Buf[I + 1] == '\n') - ++I; - LineOffsets.push_back(I + 1); } ++I; } |