From 9a98f12331c336ad7433f51a2edd8531be2086c6 Mon Sep 17 00:00:00 2001
From: Steve Bennett <steveb@workware.net.au>
Date: Thu, 21 Apr 2011 20:00:24 +1000
Subject: More efficient [string trimright] and [string last]

By searching backwards in [string trimright], even in utf-8 strings.
And using binary comparison in [string list]

Signed-off-by: Steve Bennett <steveb@workware.net.au>
---
 utf8.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'utf8.c')

diff --git a/utf8.c b/utf8.c
index 3be9899..3b32a7f 100644
--- a/utf8.c
+++ b/utf8.c
@@ -10,6 +10,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
+#include <assert.h>
 #include "utf8.h"
 
 /* This one is always implemented */
@@ -87,6 +88,27 @@ int utf8_charequal(const char *s1, const char *s2)
     return c1 == c2;
 }
 
+int utf8_prev_len(const char *str, int len)
+{
+    int n = 1;
+
+    assert(len > 0);
+
+    /* Look up to len chars backward for a start-of-char byte */
+    while (--len) {
+        if ((str[-n] & 0x80) == 0) {
+            /* Start of a 1-byte char */
+            break;
+        }
+        if ((str[-n] & 0xc0) == 0xc0) {
+            /* Start of a multi-byte char */
+            break;
+        }
+        n++;
+    }
+    return n;
+}
+
 int utf8_tounicode(const char *str, int *uc)
 {
     unsigned const char *s = (unsigned const char *)str;
-- 
cgit v1.1