From 338f5b5681181c771646b6f225f103be4b14ea03 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Mon, 28 Oct 2019 07:38:22 +1000 Subject: utf8_strlen: Improve performance Use utf8_charlen() rather than utf8_tounicode() Note that utf8_charlen() now returns 1 instead of -1 for an invalid utf-8 start byte. Reported-by: dbohdan Signed-off-by: Steve Bennett --- utf8.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'utf8.c') diff --git a/utf8.c b/utf8.c index ffee89d..405c20d 100644 --- a/utf8.c +++ b/utf8.c @@ -56,8 +56,8 @@ int utf8_charlen(int c) if ((c & 0xf8) == 0xf0) { return 4; } - /* Invalid sequence */ - return -1; + /* Invalid sequence, so treat it as a single byte */ + return 1; } int utf8_strlen(const char *str, int bytelen) @@ -93,8 +93,7 @@ int utf8_index(const char *str, int index) { const char *s = str; while (index--) { - int c; - s += utf8_tounicode(s, &c); + s += utf8_charlen(*s); } return s - str; } -- cgit v1.1