diff options
author | Steve Bennett <steveb@workware.net.au> | 2019-10-28 07:38:22 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2019-10-28 07:43:47 +1000 |
commit | 338f5b5681181c771646b6f225f103be4b14ea03 (patch) | |
tree | f5f22e66725e52e6d281ed525f8f6643b22eddc8 /utf8.c | |
parent | 27ed764fa513ea55f92ea1e6be500fb22b30007b (diff) | |
download | jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.zip jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.tar.gz jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.tar.bz2 |
utf8_strlen: Improve performance
Use utf8_charlen() rather than utf8_tounicode()
Note that utf8_charlen() now returns 1 instead of -1 for an invalid
utf-8 start byte.
Reported-by: dbohdan <dbohdan@dbohdan.com>
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 7 |
1 files changed, 3 insertions, 4 deletions
@@ -56,8 +56,8 @@ int utf8_charlen(int c) if ((c & 0xf8) == 0xf0) { return 4; } - /* Invalid sequence */ - return -1; + /* Invalid sequence, so treat it as a single byte */ + return 1; } int utf8_strlen(const char *str, int bytelen) @@ -93,8 +93,7 @@ int utf8_index(const char *str, int index) { const char *s = str; while (index--) { - int c; - s += utf8_tounicode(s, &c); + s += utf8_charlen(*s); } return s - str; } |