diff options
author | Steve Bennett <steveb@workware.net.au> | 2011-10-10 08:19:01 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2011-10-10 08:19:01 +1000 |
commit | 510a65c687133c537b428992d0b1df6c336deaeb (patch) | |
tree | 4ce036628d4e46767088e3bfdadb032f23bd707a /utf8.c | |
parent | dfbde800afdabc83efc9ebe087b1aed6a90136d8 (diff) | |
download | jimtcl-510a65c687133c537b428992d0b1df6c336deaeb.zip jimtcl-510a65c687133c537b428992d0b1df6c336deaeb.tar.gz jimtcl-510a65c687133c537b428992d0b1df6c336deaeb.tar.bz2 |
Simplify the way unicode case mapping is done
Smaller, faster and includes title-case characters.
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'utf8.c')
-rw-r--r-- | utf8.c | 47 |
1 files changed, 19 insertions, 28 deletions
@@ -136,57 +136,48 @@ int utf8_tounicode(const char *str, int *uc) } struct casemap { - unsigned short code; /* code point */ - signed char lowerdelta; /* add for lowercase, or if -128 use the ext table */ - signed char upperdelta; /* add for uppercase, or offset into the ext table */ -}; - -/* Extended table for codepoints where |delta| > 127 */ -struct caseextmap { - unsigned short lower; - unsigned short upper; + unsigned short code; /* code point */ + unsigned short altcode; /* alternate case code point */ }; /* Generated mapping tables */ #include "_unicode_mapping.c" -#define NUMCASEMAP sizeof(unicode_case_mapping) / sizeof(*unicode_case_mapping) +#define ARRAYSIZE(A) sizeof(A) / sizeof(*(A)) static int cmp_casemap(const void *key, const void *cm) { return *(int *)key - (int)((const struct casemap *)cm)->code; } -static int utf8_map_case(int uc, int upper) +static int utf8_map_case(const struct casemap *mapping, int num, int ch) { - const struct casemap *cm = bsearch(&uc, unicode_case_mapping, NUMCASEMAP, sizeof(*unicode_case_mapping), cmp_casemap); + /* We only support 16 bit case mapping */ + if (ch <= 0xffff) { + const struct casemap *cm = + bsearch(&ch, mapping, num, sizeof(*mapping), cmp_casemap); - if (cm) { - if (cm->lowerdelta == -128) { - uc = upper ? unicode_extmap[cm->upperdelta].upper : unicode_extmap[cm->upperdelta].lower; - } - else { - uc += upper ? cm->upperdelta : cm->lowerdelta; + if (cm) { + return cm->altcode; } } - return uc; + return ch; } -int utf8_upper(int uc) +int utf8_upper(int ch) { - if (isascii(uc)) { - return toupper(uc); + if (isascii(ch)) { + return toupper(ch); } - return utf8_map_case(uc, 1); + return utf8_map_case(unicode_case_mapping_upper, ARRAYSIZE(unicode_case_mapping_upper), ch); } -int utf8_lower(int uc) +int utf8_lower(int ch) { - if (isascii(uc)) { - return tolower(uc); + if (isascii(ch)) { + return tolower(ch); } - - return utf8_map_case(uc, 0); + return utf8_map_case(unicode_case_mapping_lower, ARRAYSIZE(unicode_case_mapping_lower), ch); } #endif /* JIM_BOOTSTRAP */ |