From 9a98f12331c336ad7433f51a2edd8531be2086c6 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Thu, 21 Apr 2011 20:00:24 +1000 Subject: More efficient [string trimright] and [string last] By searching backwards in [string trimright], even in utf-8 strings. And using binary comparison in [string list] Signed-off-by: Steve Bennett --- jim.c | 72 ++++++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 33 deletions(-) (limited to 'jim.c') diff --git a/jim.c b/jim.c index 70e9967..a7d7022 100644 --- a/jim.c +++ b/jim.c @@ -354,23 +354,11 @@ static int JimStringFirst(const char *s1, int l1, const char *s2, int l2, int id return -1; } +/** + * Note: Lengths and return value are in bytes, not chars. + */ static int JimStringLast(const char *s1, int l1, const char *s2, int l2) { -#ifdef JIM_UTF8 - int i = 0; - /* It is too hard to search backwards with utf-8, so just keep using JimStringFirst() - * until we find the last instance - */ - int result = -1; - /* Search is inclusive of l2 */ - l2++; - while ((i = JimStringFirst(s1, l1, s2, l2, i)) >= 0) { - int c; - result = i; - i += utf8_tounicode(s2 + i, &c); - } - return result; -#else const char *p; if (!l1 || !l2 || l1 > l2) @@ -383,9 +371,22 @@ static int JimStringLast(const char *s1, int l1, const char *s2, int l2) } } return -1; -#endif } +#ifdef JIM_UTF8 +/** + * Note: Lengths and return value are in chars. + */ +static int JimStringLastUtf8(const char *s1, int l1, const char *s2, int l2) +{ + int n = JimStringLast(s1, utf8_index(s1, l1), s2, utf8_index(s2, l2)); + if (n > 0) { + n = utf8_strlen(s2, n); + } + return n; +} +#endif + int Jim_WideToString(char *buf, jim_wide wideValue) { const char *fmt = "%" JIM_WIDE_MODIFIER; @@ -2408,22 +2409,26 @@ static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr) return Jim_NewStringObjNoAlloc(interp, buf, len); } -/* Similar to strchr() except searches a UTF-8 string 'str' of byte length 'len' +/* Similar to memchr() except searches a UTF-8 string 'str' of byte length 'len' * for unicode character 'c'. - * Returns 1 if found or 0 if not + * Returns the position if found or NULL if not */ -static int utf8_strchr(const char *str, int len, int c) +static const char *utf8_memchr(const char *str, int len, int c) { +#ifdef JIM_UTF8 while (len) { int sc; int n = utf8_tounicode(str, &sc); if (sc == c) { - return 1; + return str; } str += n; len -= n; } - return 0; + return NULL; +#else + return memchr(str, c, len); +#endif } /** @@ -2439,7 +2444,7 @@ static const char *JimFindTrimLeft(const char *str, int len, const char *trimcha int c; int n = utf8_tounicode(str, &c); - if (utf8_strchr(trimchars, trimlen, c) == 0) { + if (utf8_memchr(trimchars, trimlen, c) == NULL) { /* Not a trim char, so stop */ break; } @@ -2458,26 +2463,23 @@ static const char *JimFindTrimLeft(const char *str, int len, const char *trimcha */ static const char *JimFindTrimRight(const char *str, int len, const char *trimchars, int trimlen) { - /* It is too hard to search backwards with utf-8, so just examine every char - * of the string and remember the point just after the last non-trim char - */ - const char *nontrim = NULL; - - /* XXX: Could optimize this for non-utf-8 by searching backwards */ + str += len; while (len) { int c; - int n = utf8_tounicode(str, &c); + int n = utf8_prev_len(str, len); - str += n; len -= n; + str -= n; + + n = utf8_tounicode(str, &c); - if (utf8_strchr(trimchars, trimlen, c) == 0) { - nontrim = str; + if (utf8_memchr(trimchars, trimlen, c) == NULL) { + return str + n; } } - return nontrim; + return NULL; } static const char default_trim_chars[] = " \t\n\r"; @@ -12536,7 +12538,11 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a Jim_SetResultInt(interp, JimStringFirst(s1, l1, s2, l2, idx)); } else { +#ifdef JIM_UTF8 + Jim_SetResultInt(interp, JimStringLastUtf8(s1, l1, s2, idx)); +#else Jim_SetResultInt(interp, JimStringLast(s1, l1, s2, idx)); +#endif } return JIM_OK; } -- cgit v1.1