aboutsummaryrefslogtreecommitdiff
path: root/jim.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2011-04-21 20:00:24 +1000
committerSteve Bennett <steveb@workware.net.au>2011-04-21 20:46:07 +1000
commit9a98f12331c336ad7433f51a2edd8531be2086c6 (patch)
treebcab9a5b126063f726c4fd26bd5a3a22bdc45221 /jim.c
parent827c2764f9a4bebff0947ad1fd3cd5fb1f787c4a (diff)
downloadjimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.zip
jimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.tar.gz
jimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.tar.bz2
More efficient [string trimright] and [string last]
By searching backwards in [string trimright], even in utf-8 strings. And using binary comparison in [string list] Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'jim.c')
-rw-r--r--jim.c72
1 files changed, 39 insertions, 33 deletions
diff --git a/jim.c b/jim.c
index 70e9967..a7d7022 100644
--- a/jim.c
+++ b/jim.c
@@ -354,23 +354,11 @@ static int JimStringFirst(const char *s1, int l1, const char *s2, int l2, int id
return -1;
}
+/**
+ * Note: Lengths and return value are in bytes, not chars.
+ */
static int JimStringLast(const char *s1, int l1, const char *s2, int l2)
{
-#ifdef JIM_UTF8
- int i = 0;
- /* It is too hard to search backwards with utf-8, so just keep using JimStringFirst()
- * until we find the last instance
- */
- int result = -1;
- /* Search is inclusive of l2 */
- l2++;
- while ((i = JimStringFirst(s1, l1, s2, l2, i)) >= 0) {
- int c;
- result = i;
- i += utf8_tounicode(s2 + i, &c);
- }
- return result;
-#else
const char *p;
if (!l1 || !l2 || l1 > l2)
@@ -383,9 +371,22 @@ static int JimStringLast(const char *s1, int l1, const char *s2, int l2)
}
}
return -1;
-#endif
}
+#ifdef JIM_UTF8
+/**
+ * Note: Lengths and return value are in chars.
+ */
+static int JimStringLastUtf8(const char *s1, int l1, const char *s2, int l2)
+{
+ int n = JimStringLast(s1, utf8_index(s1, l1), s2, utf8_index(s2, l2));
+ if (n > 0) {
+ n = utf8_strlen(s2, n);
+ }
+ return n;
+}
+#endif
+
int Jim_WideToString(char *buf, jim_wide wideValue)
{
const char *fmt = "%" JIM_WIDE_MODIFIER;
@@ -2408,22 +2409,26 @@ static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr)
return Jim_NewStringObjNoAlloc(interp, buf, len);
}
-/* Similar to strchr() except searches a UTF-8 string 'str' of byte length 'len'
+/* Similar to memchr() except searches a UTF-8 string 'str' of byte length 'len'
* for unicode character 'c'.
- * Returns 1 if found or 0 if not
+ * Returns the position if found or NULL if not
*/
-static int utf8_strchr(const char *str, int len, int c)
+static const char *utf8_memchr(const char *str, int len, int c)
{
+#ifdef JIM_UTF8
while (len) {
int sc;
int n = utf8_tounicode(str, &sc);
if (sc == c) {
- return 1;
+ return str;
}
str += n;
len -= n;
}
- return 0;
+ return NULL;
+#else
+ return memchr(str, c, len);
+#endif
}
/**
@@ -2439,7 +2444,7 @@ static const char *JimFindTrimLeft(const char *str, int len, const char *trimcha
int c;
int n = utf8_tounicode(str, &c);
- if (utf8_strchr(trimchars, trimlen, c) == 0) {
+ if (utf8_memchr(trimchars, trimlen, c) == NULL) {
/* Not a trim char, so stop */
break;
}
@@ -2458,26 +2463,23 @@ static const char *JimFindTrimLeft(const char *str, int len, const char *trimcha
*/
static const char *JimFindTrimRight(const char *str, int len, const char *trimchars, int trimlen)
{
- /* It is too hard to search backwards with utf-8, so just examine every char
- * of the string and remember the point just after the last non-trim char
- */
- const char *nontrim = NULL;
-
- /* XXX: Could optimize this for non-utf-8 by searching backwards */
+ str += len;
while (len) {
int c;
- int n = utf8_tounicode(str, &c);
+ int n = utf8_prev_len(str, len);
- str += n;
len -= n;
+ str -= n;
+
+ n = utf8_tounicode(str, &c);
- if (utf8_strchr(trimchars, trimlen, c) == 0) {
- nontrim = str;
+ if (utf8_memchr(trimchars, trimlen, c) == NULL) {
+ return str + n;
}
}
- return nontrim;
+ return NULL;
}
static const char default_trim_chars[] = " \t\n\r";
@@ -12536,7 +12538,11 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
Jim_SetResultInt(interp, JimStringFirst(s1, l1, s2, l2, idx));
}
else {
+#ifdef JIM_UTF8
+ Jim_SetResultInt(interp, JimStringLastUtf8(s1, l1, s2, idx));
+#else
Jim_SetResultInt(interp, JimStringLast(s1, l1, s2, idx));
+#endif
}
return JIM_OK;
}