diff options
-rw-r--r-- | tests/lsort.test | 20 | ||||
-rw-r--r-- | tests/utftcl.test | 5 | ||||
-rw-r--r-- | utf8.c | 15 |
3 files changed, 17 insertions, 23 deletions
diff --git a/tests/lsort.test b/tests/lsort.test index 69b7467..5808b89 100644 --- a/tests/lsort.test +++ b/tests/lsort.test @@ -203,24 +203,8 @@ test lsort-3.22 {lsort, unique sort with index} { } {0 4 5} test lsort-4.26 {DefaultCompare procedure, signed characters} utf8 { - set l [lsort [list "abc\u80" "abc"]] - set viewlist {} - foreach s $l { - set viewelem "" - set len [string length $s] - for {set i 0} {$i < $len} {incr i} { - set c [string index $s $i] - scan $c %c d - if {$d > 0 && $d < 128} { - append viewelem $c - } else { - append viewelem "\\[format %03o [expr {$d & 0xff}]]" - } - } - lappend viewlist $viewelem - } - set viewlist -} [list "abc" "abc\\200"] + lsort [list "abc\u80" "abc"] +} [list "abc" "abc\u80"] test lsort-5.1 "Sort case insensitive" { lsort -nocase {ba aB aa ce} diff --git a/tests/utftcl.test b/tests/utftcl.test index 33b8933..fac14ce 100644 --- a/tests/utftcl.test +++ b/tests/utftcl.test @@ -74,7 +74,8 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} { test utf-4.3 {Tcl_NumUtfChars: long string} { testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] } {7} -test utf-4.4 {Tcl_NumUtfChars: #u0000} { +# This is an invalid utf-8 sequence. Not minimal, so should return 2 +test utf-4.4 {Tcl_NumUtfChars: #u0000} tcl { testnumutfchars [bytestring "\xC0\x80"] } {1} test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} { @@ -86,7 +87,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} { test utf-4.7 {Tcl_NumUtfChars: long string, calc len} { testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1 } {7} -test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} { +test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} tcl { testnumutfchars [bytestring "\xC0\x80"] 1 } {1} @@ -131,19 +131,28 @@ int utf8_tounicode(const char *str, int *uc) if (s[0] < 0xe0) { if ((s[1] & 0xc0) == 0x80) { *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80); - return 2; + if (*uc >= 0x80) { + return 2; + } + /* Otherwise this is an invalid sequence */ } } else if (s[0] < 0xf0) { if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) { *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80); - return 3; + if (*uc >= 0x800) { + return 3; + } + /* Otherwise this is an invalid sequence */ } } else if (s[0] < 0xf8) { if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) { *uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80); - return 4; + if (*uc >= 0x10000) { + return 4; + } + /* Otherwise this is an invalid sequence */ } } |