From fbc62c271669a746eb4f0acbc2f156ba306365d4 Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Tue, 1 Nov 2011 10:41:08 -0400 Subject: Add support for [string totitle] Signed-off-by: Steve Bennett --- jim.c | 75 +++++++++++++++++++++++++++++++++++++------------------ parse-unidata.tcl | 24 ++++++++++++------ tests/string.test | 13 ++++++++++ utf8.c | 9 +++++++ utf8.h | 10 ++++++++ 5 files changed, 100 insertions(+), 31 deletions(-) diff --git a/jim.c b/jim.c index 9578e1d..2cc3389 100644 --- a/jim.c +++ b/jim.c @@ -2531,9 +2531,19 @@ Jim_Obj *Jim_StringRangeObj(Jim_Interp *interp, #endif } +static void JimStrCopyUpperLower(char *dest, const char *str, int uc) +{ + while (*str) { + int c; + str += utf8_tounicode(str, &c); + dest += utf8_fromunicode(dest, uc ? utf8_upper(c) : utf8_lower(c)); + } + *dest = 0; +} + static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr) { - char *buf, *p; + char *buf; int len; const char *str; @@ -2541,33 +2551,46 @@ static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr) str = Jim_GetString(strObjPtr, &len); - buf = p = Jim_Alloc(len + 1); - while (*str) { - int c; - str += utf8_tounicode(str, &c); - p += utf8_fromunicode(p, utf8_lower(c)); - } - *p = 0; + buf = Jim_Alloc(len + 1); + JimStrCopyUpperLower(buf, str, 0); return Jim_NewStringObjNoAlloc(interp, buf, len); } static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr) { - char *buf, *p; - int len; + char *buf; const char *str; + int len; - SetStringFromAny(interp, strObjPtr); + if (strObjPtr->typePtr != &stringObjType) { + SetStringFromAny(interp, strObjPtr); + } str = Jim_GetString(strObjPtr, &len); - buf = p = Jim_Alloc(len + 1); - while (*str) { - int c; - str += utf8_tounicode(str, &c); - p += utf8_fromunicode(p, utf8_upper(c)); + buf = Jim_Alloc(len + 1); + JimStrCopyUpperLower(buf, str, 1); + return Jim_NewStringObjNoAlloc(interp, buf, len); +} + +static Jim_Obj *JimStringToTitle(Jim_Interp *interp, Jim_Obj *strObjPtr) +{ + char *buf, *p; + int len; + int c; + const char *str; + + str = Jim_GetString(strObjPtr, &len); + if (len == 0) { + return strObjPtr; } - *p = 0; + buf = p = Jim_Alloc(len + 1); + + str += utf8_tounicode(str, &c); + p += utf8_fromunicode(p, utf8_title(c)); + + JimStrCopyUpperLower(p, str, 0); + return Jim_NewStringObjNoAlloc(interp, buf, len); } @@ -12639,15 +12662,15 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a int opt_case = 1; int option; static const char * const options[] = { - "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range", "map", - "repeat", "reverse", "index", "first", "last", - "trim", "trimleft", "trimright", "tolower", "toupper", NULL + "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range", + "map", "repeat", "reverse", "index", "first", "last", + "trim", "trimleft", "trimright", "tolower", "toupper", "totitle", NULL }; enum { - OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE, OPT_MAP, - OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST, - OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER + OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE, + OPT_MAP, OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST, + OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER, OPT_TOTITLE }; static const char * const nocase_options[] = { "-nocase", NULL @@ -12894,6 +12917,7 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a case OPT_TOLOWER: case OPT_TOUPPER: + case OPT_TOTITLE: if (argc != 3) { Jim_WrongNumArgs(interp, 2, argv, "string"); return JIM_ERR; @@ -12901,9 +12925,12 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a if (option == OPT_TOLOWER) { Jim_SetResult(interp, JimStringToLower(interp, argv[2])); } - else { + else if (option == OPT_TOUPPER) { Jim_SetResult(interp, JimStringToUpper(interp, argv[2])); } + else { + Jim_SetResult(interp, JimStringToTitle(interp, argv[2])); + } return JIM_OK; case OPT_IS: diff --git a/parse-unidata.tcl b/parse-unidata.tcl index 1a927a3..348a114 100644 --- a/parse-unidata.tcl +++ b/parse-unidata.tcl @@ -11,30 +11,40 @@ # to generate case mapping tables set map(lower) {} set map(upper) {} +set map(title) {} set f [open [lindex $argv 0]] while {[gets $f buf] >= 0} { - foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break - set code [string tolower 0x$code] - if {$code <= 0x7f} { + set title "" + set lower "" + set upper "" + foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break + set codex [string tolower 0x$code] + if {$codex <= 0x7f} { continue } - if {$code > 0xffff} { + if {$codex > 0xffff} { break } if {![string match L* $class]} { continue } if {$upper ne ""} { - lappend map(upper) $code [string tolower 0x$upper] + lappend map(upper) $codex [string tolower 0x$upper] } if {$lower ne ""} { - lappend map(lower) $code [string tolower 0x$lower] + lappend map(lower) $codex [string tolower 0x$lower] + } + if {$title ne "" && $title ne $upper} { + if {$title eq $code} { + set title 0 + } + lappend map(title) $codex [string tolower 0x$title] } } close $f -foreach type {upper lower} { +foreach type {upper lower title} { puts "static const struct casemap unicode_case_mapping_$type\[\] = \{" foreach {code alt} $map($type) { puts "\t{ $code, $alt }," diff --git a/tests/string.test b/tests/string.test index 5da52df..4cb54ac 100644 --- a/tests/string.test +++ b/tests/string.test @@ -770,6 +770,19 @@ test string-16.6 {string toupper} { string toupper {123#$&*()} } {123#$&*()} +test string-17.1 {string totitle} -body { + string totitle +} -returnCodes error -match glob -result {wrong # args: should be "string totitle string*} +test string-17.3 {string totitle} { + string totitle abCDEf +} {Abcdef} +test string-17.4 {string totitle} { + string totitle "abc xYz" +} {Abc xyz} +test string-17.5 {string totitle} { + string totitle {123#$&*()} +} {123#$&*()} + test string-18.1 {string trim} { list [catch {string trim} msg] } {1} diff --git a/utf8.c b/utf8.c index cd1c88b..1368f00 100644 --- a/utf8.c +++ b/utf8.c @@ -180,4 +180,13 @@ int utf8_lower(int ch) return utf8_map_case(unicode_case_mapping_lower, ARRAYSIZE(unicode_case_mapping_lower), ch); } +int utf8_title(int ch) +{ + int newch = utf8_map_case(unicode_case_mapping_title, ARRAYSIZE(unicode_case_mapping_title), ch); + if (newch != ch) { + return newch ? newch : ch; + } + return utf8_upper(ch); +} + #endif /* JIM_BOOTSTRAP */ diff --git a/utf8.h b/utf8.h index 9ada93f..39da384 100644 --- a/utf8.h +++ b/utf8.h @@ -24,6 +24,7 @@ int utf8_fromunicode(char *p, unsigned short uc); #define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B)) #define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1) #define utf8_upper(C) toupper(C) +#define utf8_title(C) toupper(C) #define utf8_lower(C) tolower(C) #define utf8_index(C, I) (I) #define utf8_charlen(C) 1 @@ -96,6 +97,15 @@ int utf8_prev_len(const char *str, int len); int utf8_upper(int uc); /** + * Returns the title-case variant of the given unicode codepoint. + * + * If none, returns utf8_upper(). + * + * Unicode code points > \uffff are returned unchanged. + */ +int utf8_title(int uc); + +/** * Returns the lower-case variant of the given unicode codepoint. * * NOTE: Use utf8_upper() in preference for case-insensitive matching. -- cgit v1.1