diff options
author | Steve Bennett <steveb@workware.net.au> | 2011-07-27 09:31:03 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2011-12-02 20:56:50 +1000 |
commit | 1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4 (patch) | |
tree | 5764121cb8c33a22892da4e0eb89205a600750d3 /tests | |
parent | c2e5f7502026349106314843cad7f24020aad7fb (diff) | |
download | jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.zip jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.gz jimtcl-1d72edfab5cff0e7bbc1e1e22ae7b1b6bdc756b4.tar.bz2 |
Extend UTF-8 support past the BMP
Now codepoints up to U+1FFFFF are supported, including
as literals with the new \u{NNNNNN} syntax (up to six hex digits)
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/utf8.test | 16 | ||||
-rw-r--r-- | tests/utftcl.test | 5 |
2 files changed, 20 insertions, 1 deletions
diff --git a/tests/utf8.test b/tests/utf8.test index 715df13..287f6b1 100644 --- a/tests/utf8.test +++ b/tests/utf8.test @@ -129,4 +129,20 @@ test utf8-7.3 {Upper, lower for titlecase utf-8} { list [string toupper \u01c5] [string tolower \u01c5] } "\u01c4 \u01c6" +test utf8-8.1 {Chars outside the BMP} jim { + string length \u{12000}\u{13000} +} 2 + +test utf8-8.2 {Chars outside the BMP} jim { + string match "ab\[\u{12000}c\]d" ab\u{12000}d +} 1 + +test utf8-8.3 {Chars outside the BMP} jim { + string last d "ab\u{101fff}cd" +} 4 + +test utf8-8.4 {Longer sequences} { + string length \u12000 +} 2 + testreport diff --git a/tests/utftcl.test b/tests/utftcl.test index db058c2..a58fdda 100644 --- a/tests/utftcl.test +++ b/tests/utftcl.test @@ -53,9 +53,12 @@ test utf-2.6 {Tcl_UtfToUniChar: lead (3-byte) followed by 1 trail} { test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { string length [bytestring "\xE4\xb9\x8e"] } {1} -test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} { +test utf-2.8 {Tcl_UtfToUniChar: longer UTF sequences not supported} tcl { string length [bytestring "\xF4\xA2\xA2\xA2"] } {4} +test utf-2.9 {Tcl_UtfToUniChar: 4-byte UTF sequence} jim { + string length [bytestring "\xF4\xA2\xA2\xA2"] +} {1} test utf-3.1 {Tcl_UtfCharComplete} { } {} |