From 338f5b5681181c771646b6f225f103be4b14ea03 Mon Sep 17 00:00:00 2001
From: Steve Bennett <steveb@workware.net.au>
Date: Mon, 28 Oct 2019 07:38:22 +1000
Subject: utf8_strlen: Improve performance

Use utf8_charlen() rather than utf8_tounicode()
Note that utf8_charlen() now returns 1 instead of -1 for an invalid
utf-8 start byte.

Reported-by: dbohdan <dbohdan@dbohdan.com>
Signed-off-by: Steve Bennett <steveb@workware.net.au>
---
 utf8.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'utf8.c')

diff --git a/utf8.c b/utf8.c
index ffee89d..405c20d 100644
--- a/utf8.c
+++ b/utf8.c
@@ -56,8 +56,8 @@ int utf8_charlen(int c)
     if ((c & 0xf8) == 0xf0) {
         return 4;
     }
-    /* Invalid sequence */
-    return -1;
+    /* Invalid sequence, so treat it as a single byte */
+    return 1;
 }
 
 int utf8_strlen(const char *str, int bytelen)
@@ -93,8 +93,7 @@ int utf8_index(const char *str, int index)
 {
     const char *s = str;
     while (index--) {
-        int c;
-        s += utf8_tounicode(s, &c);
+        s += utf8_charlen(*s);
     }
     return s - str;
 }
-- 
cgit v1.1