diff options
author | Steve Bennett <steveb@workware.net.au> | 2019-10-28 07:38:22 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2019-10-28 07:43:47 +1000 |
commit | 338f5b5681181c771646b6f225f103be4b14ea03 (patch) | |
tree | f5f22e66725e52e6d281ed525f8f6643b22eddc8 | |
parent | 27ed764fa513ea55f92ea1e6be500fb22b30007b (diff) | |
download | jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.zip jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.tar.gz jimtcl-338f5b5681181c771646b6f225f103be4b14ea03.tar.bz2 |
utf8_strlen: Improve performance
Use utf8_charlen() rather than utf8_tounicode()
Note that utf8_charlen() now returns 1 instead of -1 for an invalid
utf-8 start byte.
Reported-by: dbohdan <dbohdan@dbohdan.com>
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r-- | utf8.c | 7 | ||||
-rw-r--r-- | utf8.h | 5 |
2 files changed, 5 insertions, 7 deletions
@@ -56,8 +56,8 @@ int utf8_charlen(int c) if ((c & 0xf8) == 0xf0) { return 4; } - /* Invalid sequence */ - return -1; + /* Invalid sequence, so treat it as a single byte */ + return 1; } int utf8_strlen(const char *str, int bytelen) @@ -93,8 +93,7 @@ int utf8_index(const char *str, int index) { const char *s = str; while (index--) { - int c; - s += utf8_tounicode(s, &c); + s += utf8_charlen(*s); } return s - str; } @@ -49,9 +49,8 @@ int utf8_fromunicode(char *p, unsigned uc); /** * Returns the length of the utf-8 sequence starting with 'c'. * - * Returns 1-4, or -1 if this is not a valid start byte. - * - * Note that charlen=4 is not supported by the rest of the API. + * Returns 1-4. + * If 'c' is not a valid start byte, returns 1. */ int utf8_charlen(int c); |