aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--string/memmem.c127
2 files changed, 89 insertions, 42 deletions
diff --git a/ChangeLog b/ChangeLog
index 7ddec54..01fc098 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2019-06-12 Wilco Dijkstra <wdijkstr@arm.com>
+ * string/memmem.c (__memmem): Rewrite to improve performance.
+
+2019-06-12 Wilco Dijkstra <wdijkstr@arm.com>
+
* string/str-two-way.h (two_way_short_needle): Add inline to avoid
warning.
(two_way_long_needle): Block inlining.
diff --git a/string/memmem.c b/string/memmem.c
index 4bf733f..83ee75e 100644
--- a/string/memmem.c
+++ b/string/memmem.c
@@ -15,17 +15,13 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-/* This particular implementation was written by Eric Blake, 2008. */
-
#ifndef _LIBC
# include <config.h>
#endif
-/* Specification of memmem. */
#include <string.h>
#ifndef _LIBC
-# define __builtin_expect(expr, val) (expr)
# define __memmem memmem
#endif
@@ -36,51 +32,98 @@
#undef memmem
-/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
- if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in
- HAYSTACK. */
+/* Hash character pairs so a small shift table can be used. All bits of
+ p[0] are included, but not all bits from p[-1]. So if two equal hashes
+ match on p[-1], p[0] matches too. Hash collisions are harmless and result
+ in smaller shifts. */
+#define hash2(p) (((size_t)(p)[0] - ((size_t)(p)[-1] << 3)) % sizeof (shift))
+
+/* Fast memmem algorithm with guaranteed linear-time performance.
+ Small needles up to size 2 use a dedicated linear search. Longer needles
+ up to size 256 use a novel modified Horspool algorithm. It hashes pairs
+ of characters to quickly skip past mismatches. The main search loop only
+ exits if the last 2 characters match, avoiding unnecessary calls to memcmp
+ and allowing for a larger skip if there is no match. A self-adapting
+ filtering check is used to quickly detect mismatches in long needles.
+ By limiting the needle length to 256, the shift table can be reduced to 8
+ bits per entry, lowering preprocessing overhead and minimizing cache effects.
+ The limit also implies worst-case performance is linear.
+ Needles larger than 256 characters use the linear-time Two-Way algorithm. */
void *
-__memmem (const void *haystack_start, size_t haystack_len,
- const void *needle_start, size_t needle_len)
+__memmem (const void *haystack, size_t hs_len,
+ const void *needle, size_t ne_len)
{
- /* Abstract memory is considered to be an array of 'unsigned char' values,
- not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
- const unsigned char *haystack = (const unsigned char *) haystack_start;
- const unsigned char *needle = (const unsigned char *) needle_start;
-
- if (needle_len == 0)
- /* The first occurrence of the empty string is deemed to occur at
- the beginning of the string. */
- return (void *) haystack;
-
- /* Sanity check, otherwise the loop might search through the whole
- memory. */
- if (__glibc_unlikely (haystack_len < needle_len))
+ const unsigned char *hs = (const unsigned char *) haystack;
+ const unsigned char *ne = (const unsigned char *) needle;
+
+ if (ne_len == 0)
+ return (void *) hs;
+ if (ne_len == 1)
+ return (void *) memchr (hs, ne[0], hs_len);
+
+ /* Ensure haystack length is >= needle length. */
+ if (hs_len < ne_len)
return NULL;
- /* Use optimizations in memchr when possible, to reduce the search
- size of haystack using a linear algorithm with a smaller
- coefficient. However, avoid memchr for long needles, since we
- can often achieve sublinear performance. */
- if (needle_len < LONG_NEEDLE_THRESHOLD)
+ const unsigned char *end = hs + hs_len - ne_len;
+
+ if (ne_len == 2)
+ {
+ uint32_t nw = ne[0] << 16 | ne[1], hw = hs[0] << 16 | hs[1];
+ for (hs++; hs <= end && hw != nw; )
+ hw = hw << 16 | *++hs;
+ return hw == nw ? (void *)hs - 1 : NULL;
+ }
+
+ /* Use Two-Way algorithm for very long needles. */
+ if (__builtin_expect (ne_len > 256, 0))
+ return two_way_long_needle (hs, hs_len, ne, ne_len);
+
+ uint8_t shift[256];
+ size_t tmp, shift1;
+ size_t m1 = ne_len - 1;
+ size_t offset = 0;
+
+ memset (shift, 0, sizeof (shift));
+ for (int i = 1; i < m1; i++)
+ shift[hash2 (ne + i)] = i;
+ /* Shift1 is the amount we can skip after matching the hash of the
+ needle end but not the full needle. */
+ shift1 = m1 - shift[hash2 (ne + m1)];
+ shift[hash2 (ne + m1)] = m1;
+
+ for ( ; hs <= end; )
{
- haystack = memchr (haystack, *needle, haystack_len);
- if (!haystack || __builtin_expect (needle_len == 1, 0))
- return (void *) haystack;
- haystack_len -= haystack - (const unsigned char *) haystack_start;
- if (haystack_len < needle_len)
- return NULL;
- /* Check whether we have a match. This improves performance since we
- avoid the initialization overhead of the two-way algorithm. */
- if (memcmp (haystack, needle, needle_len) == 0)
- return (void *) haystack;
- return two_way_short_needle (haystack, haystack_len, needle, needle_len);
+ /* Skip past character pairs not in the needle. */
+ do
+ {
+ hs += m1;
+ tmp = shift[hash2 (hs)];
+ }
+ while (tmp == 0 && hs <= end);
+
+ /* If the match is not at the end of the needle, shift to the end
+ and continue until we match the hash of the needle end. */
+ hs -= tmp;
+ if (tmp < m1)
+ continue;
+
+ /* Hash of the last 2 characters matches. If the needle is long,
+ try to quickly filter out mismatches. */
+ if (m1 < 15 || memcmp (hs + offset, ne + offset, 8) == 0)
+ {
+ if (memcmp (hs, ne, m1) == 0)
+ return (void *) hs;
+
+ /* Adjust filter offset when it doesn't find the mismatch. */
+ offset = (offset >= 8 ? offset : m1) - 8;
+ }
+
+ /* Skip based on matching the hash of the needle end. */
+ hs += shift1;
}
- else
- return two_way_long_needle (haystack, haystack_len, needle, needle_len);
+ return NULL;
}
libc_hidden_def (__memmem)
weak_alias (__memmem, memmem)
libc_hidden_weak (memmem)
-
-#undef LONG_NEEDLE_THRESHOLD