Add support for [string totitle]

Signed-off-by: Steve Bennett <steveb@workware.net.au>
author: Steve Bennett <steveb@workware.net.au> 2011-11-01 10:41:08 -0400
committer: Steve Bennett <steveb@workware.net.au> 2011-11-28 13:13:41 +1000
commit: fbc62c271669a746eb4f0acbc2f156ba306365d4 (patch)
tree: 142b2b83dfac664bc87b83accd26e53f693a6d5e
parent: 1e6e0d0351f8643ed08f88bd246bf8950c1d7fe1 (diff)
download: jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.zip
jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.gz
jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.bz2
5 files changed, 100 insertions, 31 deletions
diff --git a/jim.c b/jim.c
index 9578e1d..2cc3389 100644
--- a/jim.c
+++ b/jim.c
@@ -2531,9 +2531,19 @@ Jim_Obj *Jim_StringRangeObj(Jim_Interp *interp,
 #endif
 }
 
+static void JimStrCopyUpperLower(char *dest, const char *str, int uc)
+{
+    while (*str) {
+        int c;
+        str += utf8_tounicode(str, &c);
+        dest += utf8_fromunicode(dest, uc ? utf8_upper(c) : utf8_lower(c));
+    }
+    *dest = 0;
+}
+
 static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr)
 {
-    char *buf, *p;
+    char *buf;
     int len;
     const char *str;
 
@@ -2541,33 +2551,46 @@ static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr)
 
     str = Jim_GetString(strObjPtr, &len);
 
-    buf = p = Jim_Alloc(len + 1);
-    while (*str) {
-        int c;
-        str += utf8_tounicode(str, &c);
-        p += utf8_fromunicode(p, utf8_lower(c));
-    }
-    *p = 0;
+    buf = Jim_Alloc(len + 1);
+    JimStrCopyUpperLower(buf, str, 0);
     return Jim_NewStringObjNoAlloc(interp, buf, len);
 }
 
 static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr)
 {
-    char *buf, *p;
-    int len;
+    char *buf;
     const char *str;
+    int len;
 
-    SetStringFromAny(interp, strObjPtr);
+    if (strObjPtr->typePtr != &stringObjType) {
+        SetStringFromAny(interp, strObjPtr);
+    }
 
     str = Jim_GetString(strObjPtr, &len);
 
-    buf = p = Jim_Alloc(len + 1);
-    while (*str) {
-        int c;
-        str += utf8_tounicode(str, &c);
-        p += utf8_fromunicode(p, utf8_upper(c));
+    buf = Jim_Alloc(len + 1);
+    JimStrCopyUpperLower(buf, str, 1);
+    return Jim_NewStringObjNoAlloc(interp, buf, len);
+}
+
+static Jim_Obj *JimStringToTitle(Jim_Interp *interp, Jim_Obj *strObjPtr)
+{
+    char *buf, *p;
+    int len;
+    int c;
+    const char *str;
+
+    str = Jim_GetString(strObjPtr, &len);
+    if (len == 0) {
+        return strObjPtr;
     }
-    *p = 0;
+    buf = p = Jim_Alloc(len + 1);
+
+    str += utf8_tounicode(str, &c);
+    p += utf8_fromunicode(p, utf8_title(c));
+
+    JimStrCopyUpperLower(p, str, 0);
+
     return Jim_NewStringObjNoAlloc(interp, buf, len);
 }
 
@@ -12639,15 +12662,15 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
     int opt_case = 1;
     int option;
     static const char * const options[] = {
-        "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range", "map",
-        "repeat", "reverse", "index", "first", "last",
-        "trim", "trimleft", "trimright", "tolower", "toupper", NULL
+        "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range",
+        "map", "repeat", "reverse", "index", "first", "last",
+        "trim", "trimleft", "trimright", "tolower", "toupper", "totitle", NULL
     };
     enum
     {
-        OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE, OPT_MAP,
-        OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
-        OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER
+        OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE,
+        OPT_MAP, OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
+        OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER, OPT_TOTITLE
     };
     static const char * const nocase_options[] = {
         "-nocase", NULL
@@ -12894,6 +12917,7 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
 
         case OPT_TOLOWER:
         case OPT_TOUPPER:
+        case OPT_TOTITLE:
             if (argc != 3) {
                 Jim_WrongNumArgs(interp, 2, argv, "string");
                 return JIM_ERR;
@@ -12901,9 +12925,12 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
             if (option == OPT_TOLOWER) {
                 Jim_SetResult(interp, JimStringToLower(interp, argv[2]));
             }
-            else {
+            else if (option == OPT_TOUPPER) {
                 Jim_SetResult(interp, JimStringToUpper(interp, argv[2]));
             }
+            else {
+                Jim_SetResult(interp, JimStringToTitle(interp, argv[2]));
+            }
             return JIM_OK;
 
         case OPT_IS:
diff --git a/parse-unidata.tcl b/parse-unidata.tcl
index 1a927a3..348a114 100644
--- a/parse-unidata.tcl
+++ b/parse-unidata.tcl
@@ -11,30 +11,40 @@
 # to generate case mapping tables
 set map(lower) {}
 set map(upper) {}
+set map(title) {}
 
 set f [open [lindex $argv 0]]
 while {[gets $f buf] >= 0} {
-	foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
-	set code [string tolower 0x$code]
-	if {$code <= 0x7f} {
+	set title ""
+	set lower ""
+	set upper ""
+	foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
+	set codex [string tolower 0x$code]
+	if {$codex <= 0x7f} {
 		continue
 	}
-	if {$code > 0xffff} {
+	if {$codex > 0xffff} {
 		break
 	}
 	if {![string match L* $class]} {
 		continue
 	}
 	if {$upper ne ""} {
-		lappend map(upper) $code [string tolower 0x$upper]
+		lappend map(upper) $codex [string tolower 0x$upper]
 	}
 	if {$lower ne ""} {
-		lappend map(lower) $code [string tolower 0x$lower]
+		lappend map(lower) $codex [string tolower 0x$lower]
+	}
+	if {$title ne "" && $title ne $upper} {
+		if {$title eq $code} {
+			set title 0
+		}
+		lappend map(title) $codex [string tolower 0x$title]
 	}
 }
 close $f
 
-foreach type {upper lower} {
+foreach type {upper lower title} {
 	puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
 	foreach {code alt} $map($type) {
 		puts "\t{ $code, $alt },"
diff --git a/tests/string.test b/tests/string.test
index 5da52df..4cb54ac 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -770,6 +770,19 @@ test string-16.6 {string toupper} {
     string toupper {123#$&*()}
 } {123#$&*()}
 
+test string-17.1 {string totitle} -body {
+    string totitle
+}  -returnCodes error -match glob -result {wrong # args: should be "string totitle string*}
+test string-17.3 {string totitle} {
+    string totitle abCDEf
+} {Abcdef}
+test string-17.4 {string totitle} {
+    string totitle "abc xYz"
+} {Abc xyz}
+test string-17.5 {string totitle} {
+    string totitle {123#$&*()}
+} {123#$&*()}
+
 test string-18.1 {string trim} {
     list [catch {string trim} msg]
 } {1}
diff --git a/utf8.c b/utf8.c
index cd1c88b..1368f00 100644
--- a/utf8.c
+++ b/utf8.c
@@ -180,4 +180,13 @@ int utf8_lower(int ch)
     return utf8_map_case(unicode_case_mapping_lower, ARRAYSIZE(unicode_case_mapping_lower), ch);
 }
 
+int utf8_title(int ch)
+{
+    int newch = utf8_map_case(unicode_case_mapping_title, ARRAYSIZE(unicode_case_mapping_title), ch);
+    if (newch != ch) {
+        return newch ? newch : ch;
+    }
+    return utf8_upper(ch);
+}
+
 #endif /* JIM_BOOTSTRAP */
diff --git a/utf8.h b/utf8.h
index 9ada93f..39da384 100644
--- a/utf8.h
+++ b/utf8.h
@@ -24,6 +24,7 @@ int utf8_fromunicode(char *p, unsigned short uc);
 #define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B))
 #define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
 #define utf8_upper(C) toupper(C)
+#define utf8_title(C) toupper(C)
 #define utf8_lower(C) tolower(C)
 #define utf8_index(C, I) (I)
 #define utf8_charlen(C) 1
@@ -96,6 +97,15 @@ int utf8_prev_len(const char *str, int len);
 int utf8_upper(int uc);
 
 /**
+ * Returns the title-case variant of the given unicode codepoint.
+ *
+ * If none, returns utf8_upper().
+ *
+ * Unicode code points > \uffff are returned unchanged.
+ */
+int utf8_title(int uc);
+
+/**
  * Returns the lower-case variant of the given unicode codepoint.
  *
  * NOTE: Use utf8_upper() in preference for case-insensitive matching.
author	Steve Bennett <steveb@workware.net.au>	2011-11-01 10:41:08 -0400
committer	Steve Bennett <steveb@workware.net.au>	2011-11-28 13:13:41 +1000
commit	fbc62c271669a746eb4f0acbc2f156ba306365d4 (patch)
tree	142b2b83dfac664bc87b83accd26e53f693a6d5e
parent	1e6e0d0351f8643ed08f88bd246bf8950c1d7fe1 (diff)
download	jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.zip jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.gz jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.bz2