From 9a98f12331c336ad7433f51a2edd8531be2086c6 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Thu, 21 Apr 2011 20:00:24 +1000 Subject: More efficient [string trimright] and [string last] By searching backwards in [string trimright], even in utf-8 strings. And using binary comparison in [string list] Signed-off-by: Steve Bennett --- utf8.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index 3be9899..3b32a7f 100644 --- a/utf8.c +++ b/utf8.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "utf8.h" /* This one is always implemented */ @@ -87,6 +88,27 @@ int utf8_charequal(const char *s1, const char *s2) return c1 == c2; } +int utf8_prev_len(const char *str, int len) +{ + int n = 1; + + assert(len > 0); + + /* Look up to len chars backward for a start-of-char byte */ + while (--len) { + if ((str[-n] & 0x80) == 0) { + /* Start of a 1-byte char */ + break; + } + if ((str[-n] & 0xc0) == 0xc0) { + /* Start of a multi-byte char */ + break; + } + n++; + } + return n; +} + int utf8_tounicode(const char *str, int *uc) { unsigned const char *s = (unsigned const char *)str; -- cgit v1.1