From 20a71f2c8ab45b458416422af2eec2b7bc52f66b Mon Sep 17 00:00:00 2001 From: Maxim Kuvyrkov Date: Tue, 29 May 2012 00:04:12 -0700 Subject: Optimize first-character loop of strstr, strcasestr and memmem. --- string/str-two-way.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'string') diff --git a/string/str-two-way.h b/string/str-two-way.h index 22e7539..e18dc91 100644 --- a/string/str-two-way.h +++ b/string/str-two-way.h @@ -258,14 +258,27 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len, } else { + /* The comparison always starts from needle[suffix], so cache it + and use an optimized first-character loop. */ + unsigned char needle_suffix = CANON_ELEMENT (needle[suffix]); + /* The two halves of needle are distinct; no extra memory is required, and any mismatch results in a maximal shift. */ period = MAX (suffix, needle_len - suffix) + 1; j = 0; while (AVAILABLE (haystack, haystack_len, j, needle_len)) { + /* TODO: The first-character loop can be sped up by adapting + longword-at-a-time implementation of memchr/strchr. */ + if (needle_suffix + != CANON_ELEMENT (haystack[suffix + j])) + { + ++j; + continue; + } + /* Scan for matches in right half. */ - i = suffix; + i = suffix + 1; while (i < needle_len && (CANON_ELEMENT (needle[i]) == CANON_ELEMENT (haystack[i + j]))) ++i; -- cgit v1.1