aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--tests/lsort.test20
-rw-r--r--tests/utftcl.test5
-rw-r--r--utf8.c15
3 files changed, 17 insertions, 23 deletions
diff --git a/tests/lsort.test b/tests/lsort.test
index 69b7467..5808b89 100644
--- a/tests/lsort.test
+++ b/tests/lsort.test
@@ -203,24 +203,8 @@ test lsort-3.22 {lsort, unique sort with index} {
} {0 4 5}
test lsort-4.26 {DefaultCompare procedure, signed characters} utf8 {
- set l [lsort [list "abc\u80" "abc"]]
- set viewlist {}
- foreach s $l {
- set viewelem ""
- set len [string length $s]
- for {set i 0} {$i < $len} {incr i} {
- set c [string index $s $i]
- scan $c %c d
- if {$d > 0 && $d < 128} {
- append viewelem $c
- } else {
- append viewelem "\\[format %03o [expr {$d & 0xff}]]"
- }
- }
- lappend viewlist $viewelem
- }
- set viewlist
-} [list "abc" "abc\\200"]
+ lsort [list "abc\u80" "abc"]
+} [list "abc" "abc\u80"]
test lsort-5.1 "Sort case insensitive" {
lsort -nocase {ba aB aa ce}
diff --git a/tests/utftcl.test b/tests/utftcl.test
index 33b8933..fac14ce 100644
--- a/tests/utftcl.test
+++ b/tests/utftcl.test
@@ -74,7 +74,8 @@ test utf-4.2 {Tcl_NumUtfChars: length 1} {
test utf-4.3 {Tcl_NumUtfChars: long string} {
testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"]
} {7}
-test utf-4.4 {Tcl_NumUtfChars: #u0000} {
+# This is an invalid utf-8 sequence. Not minimal, so should return 2
+test utf-4.4 {Tcl_NumUtfChars: #u0000} tcl {
testnumutfchars [bytestring "\xC0\x80"]
} {1}
test utf-4.5 {Tcl_NumUtfChars: zero length, calc len} {
@@ -86,7 +87,7 @@ test utf-4.6 {Tcl_NumUtfChars: length 1, calc len} {
test utf-4.7 {Tcl_NumUtfChars: long string, calc len} {
testnumutfchars [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] 1
} {7}
-test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} {
+test utf-4.8 {Tcl_NumUtfChars: #u0000, calc len} tcl {
testnumutfchars [bytestring "\xC0\x80"] 1
} {1}
diff --git a/utf8.c b/utf8.c
index 2698f66..56a036b 100644
--- a/utf8.c
+++ b/utf8.c
@@ -131,19 +131,28 @@ int utf8_tounicode(const char *str, int *uc)
if (s[0] < 0xe0) {
if ((s[1] & 0xc0) == 0x80) {
*uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
- return 2;
+ if (*uc >= 0x80) {
+ return 2;
+ }
+ /* Otherwise this is an invalid sequence */
}
}
else if (s[0] < 0xf0) {
if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
*uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
- return 3;
+ if (*uc >= 0x800) {
+ return 3;
+ }
+ /* Otherwise this is an invalid sequence */
}
}
else if (s[0] < 0xf8) {
if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80) && ((str[3] & 0xc0) == 0x80)) {
*uc = ((s[0] & ~0xf0) << 18) | ((s[1] & ~0x80) << 12) | ((s[2] & ~0x80) << 6) | (s[3] & ~0x80);
- return 4;
+ if (*uc >= 0x10000) {
+ return 4;
+ }
+ /* Otherwise this is an invalid sequence */
}
}