[ADT] Rewrite the StringRef::find implementation to be simpler, clearer,

and tremendously less reliant on the optimizer to fix things. The code is always necessarily looking for the entire length of the string when doing the equality tests in this find implementation, but it previously was needlessly re-checking the size each time among other annoyances. By writing this so simply an ddirectly in terms of memcmp, it also is about 8x faster in a debug build, which in turn makes FileCheck about 2x faster in 'ninja check-llvm'. This saves about 8% of the time for FileCheck-heavy parts of the test suite like the x86 backend tests. llvm-svn: 247269
author: Chandler Carruth <chandlerc@gmail.com> 2015-09-10 11:17:49 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2015-09-10 11:17:49 +0000
commit: 233edd20a774bbccad77641ebdcb52cd23c9afa5 (patch)
tree: 7d589d33d6b7ca87e7c63fb36a12570a289aac96 /llvm/lib/Support/StringRef.cpp
parent: e19facb9bc36ba9e5b33dcb3241f55ceeb26d29f (diff)
download: llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.zip
llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.tar.gz
llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.tar.bz2
1 files changed, 23 insertions, 16 deletions
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index 88f9204..7ecff29 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -140,37 +140,44 @@ std::string StringRef::upper() const {
 /// \return - The index of the first occurrence of \arg Str, or npos if not
 /// found.
 size_t StringRef::find(StringRef Str, size_t From) const {
+  if (From > Length)
+    return npos;
+
+  const char *Needle = Str.data();
   size_t N = Str.size();
-  if (N > Length)
+  if (N == 0)
+    return From;
+
+  size_t Size = Length - From;
+  if (Size < N)
     return npos;
 
+  const char *Start = Data + From;
+  const char *Stop = Start + (Size - N + 1);
+
   // For short haystacks or unsupported needles fall back to the naive algorithm
-  if (Length < 16 || N > 255 || N == 0) {
-    for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
-      if (substr(i, N).equals(Str))
-        return i;
+  if (Size < 16 || N > 255) {
+    do {
+      if (std::memcmp(Start, Needle, N) == 0)
+        return Start - Data;
+      ++Start;
+    } while (Start < Stop);
     return npos;
   }
 
-  if (From >= Length)
-    return npos;
-
   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
   uint8_t BadCharSkip[256];
   std::memset(BadCharSkip, N, 256);
   for (unsigned i = 0; i != N-1; ++i)
     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
 
-  unsigned Len = Length-From, Pos = From;
-  while (Len >= N) {
-    if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
-      return Pos;
+  do {
+    if (std::memcmp(Start, Needle, N) == 0)
+      return Start - Data;
 
     // Otherwise skip the appropriate number of bytes.
-    uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
-    Len -= Skip;
-    Pos += Skip;
-  }
+    Start += BadCharSkip[(uint8_t)Start[N-1]];
+  } while (Start < Stop);
 
   return npos;
 }
author	Chandler Carruth <chandlerc@gmail.com>	2015-09-10 11:17:49 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2015-09-10 11:17:49 +0000
commit	233edd20a774bbccad77641ebdcb52cd23c9afa5 (patch)
tree	7d589d33d6b7ca87e7c63fb36a12570a289aac96 /llvm/lib/Support/StringRef.cpp
parent	e19facb9bc36ba9e5b33dcb3241f55ceeb26d29f (diff)
download	llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.zip llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.tar.gz llvm-233edd20a774bbccad77641ebdcb52cd23c9afa5.tar.bz2