aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2011-07-27 09:31:03 +1000
committerSteve Bennett <steveb@workware.net.au>2011-12-02 20:56:50 +1000
commit1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4 (patch)
tree5764121cb8c33a22892da4e0eb89205a600750d3 /tests
parentc2e5f7502026349106314843cad7f24020aad7fb (diff)
downloadjimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.zip
jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.gz
jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.bz2
Extend UTF-8 support past the BMP
Now codepoints up to U+1FFFFF are supported, including as literals with the new \u{NNNNNN} syntax (up to six hex digits) Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'tests')
-rw-r--r--tests/utf8.test16
-rw-r--r--tests/utftcl.test5
2 files changed, 20 insertions, 1 deletions
diff --git a/tests/utf8.test b/tests/utf8.test
index 715df13..287f6b1 100644
--- a/tests/utf8.test
+++ b/tests/utf8.test
@@ -129,4 +129,20 @@ test utf8-7.3 {Upper, lower for titlecase utf-8} {
list [string toupper \u01c5] [string tolower \u01c5]
} "\u01c4 \u01c6"
+test utf8-8.1 {Chars outside the BMP} jim {
+ string length \u{12000}\u{13000}
+} 2
+
+test utf8-8.2 {Chars outside the BMP} jim {
+ string match "ab\[\u{12000}c\]d" ab\u{12000}d
+} 1
+
+test utf8-8.3 {Chars outside the BMP} jim {
+ string last d "ab\u{101fff}cd"
+} 4
+
+test utf8-8.4 {Longer sequences} {
+ string length \u12000
+} 2
+
testreport
diff --git a/tests/utftcl.test b/tests/utftcl.test
index db058c2..a58fdda 100644
--- a/tests/utftcl.test
+++ b/tests/utftcl.test
@@ -53,9 +53,12 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} {
test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} {
string length [bytestring "\xE4\xb9\x8e"]
} {1}
-test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} {
+test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} tcl {
string length [bytestring "\xF4\xA2\xA2\xA2"]
} {4}
+test utf-2.9 {Tcl_UtfToUniChar: 4-byte UTF sequence} jim {
+ string length [bytestring "\xF4\xA2\xA2\xA2"]
+} {1}
test utf-3.1 {Tcl_UtfCharComplete} {
} {}