aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2011-11-01 10:41:08 -0400
committerSteve Bennett <steveb@workware.net.au>2011-11-28 13:13:41 +1000
commitfbc62c271669a746eb4f0acbc2f156ba306365d4 (patch)
tree142b2b83dfac664bc87b83accd26e53f693a6d5e
parent1e6e0d0351f8643ed08f88bd246bf8950c1d7fe1 (diff)
downloadjimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.zip
jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.gz
jimtcl-fbc62c271669a746eb4f0acbc2f156ba306365d4.tar.bz2
Add support for [string totitle]
Signed-off-by: Steve Bennett <steveb@workware.net.au>
-rw-r--r--jim.c75
-rw-r--r--parse-unidata.tcl24
-rw-r--r--tests/string.test13
-rw-r--r--utf8.c9
-rw-r--r--utf8.h10
5 files changed, 100 insertions, 31 deletions
diff --git a/jim.c b/jim.c
index 9578e1d..2cc3389 100644
--- a/jim.c
+++ b/jim.c
@@ -2531,9 +2531,19 @@ Jim_Obj *Jim_StringRangeObj(Jim_Interp *interp,
#endif
}
+static void JimStrCopyUpperLower(char *dest, const char *str, int uc)
+{
+ while (*str) {
+ int c;
+ str += utf8_tounicode(str, &c);
+ dest += utf8_fromunicode(dest, uc ? utf8_upper(c) : utf8_lower(c));
+ }
+ *dest = 0;
+}
+
static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr)
{
- char *buf, *p;
+ char *buf;
int len;
const char *str;
@@ -2541,33 +2551,46 @@ static Jim_Obj *JimStringToLower(Jim_Interp *interp, Jim_Obj *strObjPtr)
str = Jim_GetString(strObjPtr, &len);
- buf = p = Jim_Alloc(len + 1);
- while (*str) {
- int c;
- str += utf8_tounicode(str, &c);
- p += utf8_fromunicode(p, utf8_lower(c));
- }
- *p = 0;
+ buf = Jim_Alloc(len + 1);
+ JimStrCopyUpperLower(buf, str, 0);
return Jim_NewStringObjNoAlloc(interp, buf, len);
}
static Jim_Obj *JimStringToUpper(Jim_Interp *interp, Jim_Obj *strObjPtr)
{
- char *buf, *p;
- int len;
+ char *buf;
const char *str;
+ int len;
- SetStringFromAny(interp, strObjPtr);
+ if (strObjPtr->typePtr != &stringObjType) {
+ SetStringFromAny(interp, strObjPtr);
+ }
str = Jim_GetString(strObjPtr, &len);
- buf = p = Jim_Alloc(len + 1);
- while (*str) {
- int c;
- str += utf8_tounicode(str, &c);
- p += utf8_fromunicode(p, utf8_upper(c));
+ buf = Jim_Alloc(len + 1);
+ JimStrCopyUpperLower(buf, str, 1);
+ return Jim_NewStringObjNoAlloc(interp, buf, len);
+}
+
+static Jim_Obj *JimStringToTitle(Jim_Interp *interp, Jim_Obj *strObjPtr)
+{
+ char *buf, *p;
+ int len;
+ int c;
+ const char *str;
+
+ str = Jim_GetString(strObjPtr, &len);
+ if (len == 0) {
+ return strObjPtr;
}
- *p = 0;
+ buf = p = Jim_Alloc(len + 1);
+
+ str += utf8_tounicode(str, &c);
+ p += utf8_fromunicode(p, utf8_title(c));
+
+ JimStrCopyUpperLower(p, str, 0);
+
return Jim_NewStringObjNoAlloc(interp, buf, len);
}
@@ -12639,15 +12662,15 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
int opt_case = 1;
int option;
static const char * const options[] = {
- "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range", "map",
- "repeat", "reverse", "index", "first", "last",
- "trim", "trimleft", "trimright", "tolower", "toupper", NULL
+ "bytelength", "length", "compare", "match", "equal", "is", "byterange", "range",
+ "map", "repeat", "reverse", "index", "first", "last",
+ "trim", "trimleft", "trimright", "tolower", "toupper", "totitle", NULL
};
enum
{
- OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE, OPT_MAP,
- OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
- OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER
+ OPT_BYTELENGTH, OPT_LENGTH, OPT_COMPARE, OPT_MATCH, OPT_EQUAL, OPT_IS, OPT_BYTERANGE, OPT_RANGE,
+ OPT_MAP, OPT_REPEAT, OPT_REVERSE, OPT_INDEX, OPT_FIRST, OPT_LAST,
+ OPT_TRIM, OPT_TRIMLEFT, OPT_TRIMRIGHT, OPT_TOLOWER, OPT_TOUPPER, OPT_TOTITLE
};
static const char * const nocase_options[] = {
"-nocase", NULL
@@ -12894,6 +12917,7 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
case OPT_TOLOWER:
case OPT_TOUPPER:
+ case OPT_TOTITLE:
if (argc != 3) {
Jim_WrongNumArgs(interp, 2, argv, "string");
return JIM_ERR;
@@ -12901,9 +12925,12 @@ static int Jim_StringCoreCommand(Jim_Interp *interp, int argc, Jim_Obj *const *a
if (option == OPT_TOLOWER) {
Jim_SetResult(interp, JimStringToLower(interp, argv[2]));
}
- else {
+ else if (option == OPT_TOUPPER) {
Jim_SetResult(interp, JimStringToUpper(interp, argv[2]));
}
+ else {
+ Jim_SetResult(interp, JimStringToTitle(interp, argv[2]));
+ }
return JIM_OK;
case OPT_IS:
diff --git a/parse-unidata.tcl b/parse-unidata.tcl
index 1a927a3..348a114 100644
--- a/parse-unidata.tcl
+++ b/parse-unidata.tcl
@@ -11,30 +11,40 @@
# to generate case mapping tables
set map(lower) {}
set map(upper) {}
+set map(title) {}
set f [open [lindex $argv 0]]
while {[gets $f buf] >= 0} {
- foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
- set code [string tolower 0x$code]
- if {$code <= 0x7f} {
+ set title ""
+ set lower ""
+ set upper ""
+ foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
+ set codex [string tolower 0x$code]
+ if {$codex <= 0x7f} {
continue
}
- if {$code > 0xffff} {
+ if {$codex > 0xffff} {
break
}
if {![string match L* $class]} {
continue
}
if {$upper ne ""} {
- lappend map(upper) $code [string tolower 0x$upper]
+ lappend map(upper) $codex [string tolower 0x$upper]
}
if {$lower ne ""} {
- lappend map(lower) $code [string tolower 0x$lower]
+ lappend map(lower) $codex [string tolower 0x$lower]
+ }
+ if {$title ne "" && $title ne $upper} {
+ if {$title eq $code} {
+ set title 0
+ }
+ lappend map(title) $codex [string tolower 0x$title]
}
}
close $f
-foreach type {upper lower} {
+foreach type {upper lower title} {
puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
foreach {code alt} $map($type) {
puts "\t{ $code, $alt },"
diff --git a/tests/string.test b/tests/string.test
index 5da52df..4cb54ac 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -770,6 +770,19 @@ test string-16.6 {string toupper} {
string toupper {123#$&*()}
} {123#$&*()}
+test string-17.1 {string totitle} -body {
+ string totitle
+} -returnCodes error -match glob -result {wrong # args: should be "string totitle string*}
+test string-17.3 {string totitle} {
+ string totitle abCDEf
+} {Abcdef}
+test string-17.4 {string totitle} {
+ string totitle "abc xYz"
+} {Abc xyz}
+test string-17.5 {string totitle} {
+ string totitle {123#$&*()}
+} {123#$&*()}
+
test string-18.1 {string trim} {
list [catch {string trim} msg]
} {1}
diff --git a/utf8.c b/utf8.c
index cd1c88b..1368f00 100644
--- a/utf8.c
+++ b/utf8.c
@@ -180,4 +180,13 @@ int utf8_lower(int ch)
return utf8_map_case(unicode_case_mapping_lower, ARRAYSIZE(unicode_case_mapping_lower), ch);
}
+int utf8_title(int ch)
+{
+ int newch = utf8_map_case(unicode_case_mapping_title, ARRAYSIZE(unicode_case_mapping_title), ch);
+ if (newch != ch) {
+ return newch ? newch : ch;
+ }
+ return utf8_upper(ch);
+}
+
#endif /* JIM_BOOTSTRAP */
diff --git a/utf8.h b/utf8.h
index 9ada93f..39da384 100644
--- a/utf8.h
+++ b/utf8.h
@@ -24,6 +24,7 @@ int utf8_fromunicode(char *p, unsigned short uc);
#define utf8_strlen(S, B) ((B) < 0 ? strlen(S) : (B))
#define utf8_tounicode(S, CP) (*(CP) = (unsigned char)*(S), 1)
#define utf8_upper(C) toupper(C)
+#define utf8_title(C) toupper(C)
#define utf8_lower(C) tolower(C)
#define utf8_index(C, I) (I)
#define utf8_charlen(C) 1
@@ -96,6 +97,15 @@ int utf8_prev_len(const char *str, int len);
int utf8_upper(int uc);
/**
+ * Returns the title-case variant of the given unicode codepoint.
+ *
+ * If none, returns utf8_upper().
+ *
+ * Unicode code points > \uffff are returned unchanged.
+ */
+int utf8_title(int uc);
+
+/**
* Returns the lower-case variant of the given unicode codepoint.
*
* NOTE: Use utf8_upper() in preference for case-insensitive matching.