aboutsummaryrefslogtreecommitdiff
path: root/utf8.c
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2011-04-21 20:00:24 +1000
committerSteve Bennett <steveb@workware.net.au>2011-04-21 20:46:07 +1000
commit9a98f12331c336ad7433f51a2edd8531be2086c6 (patch)
treebcab9a5b126063f726c4fd26bd5a3a22bdc45221 /utf8.c
parent827c2764f9a4bebff0947ad1fd3cd5fb1f787c4a (diff)
downloadjimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.zip
jimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.tar.gz
jimtcl-9a98f12331c336ad7433f51a2edd8531be2086c6.tar.bz2
More efficient [string trimright] and [string last]
By searching backwards in [string trimright], even in utf-8 strings. And using binary comparison in [string list] Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'utf8.c')
-rw-r--r--utf8.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/utf8.c b/utf8.c
index 3be9899..3b32a7f 100644
--- a/utf8.c
+++ b/utf8.c
@@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
+#include <assert.h>
#include "utf8.h"
/* This one is always implemented */
@@ -87,6 +88,27 @@ int utf8_charequal(const char *s1, const char *s2)
return c1 == c2;
}
+int utf8_prev_len(const char *str, int len)
+{
+ int n = 1;
+
+ assert(len > 0);
+
+ /* Look up to len chars backward for a start-of-char byte */
+ while (--len) {
+ if ((str[-n] & 0x80) == 0) {
+ /* Start of a 1-byte char */
+ break;
+ }
+ if ((str[-n] & 0xc0) == 0xc0) {
+ /* Start of a multi-byte char */
+ break;
+ }
+ n++;
+ }
+ return n;
+}
+
int utf8_tounicode(const char *str, int *uc)
{
unsigned const char *s = (unsigned const char *)str;