diff options
author | Steve Bennett <steveb@workware.net.au> | 2010-10-20 16:01:17 +1000 |
---|---|---|
committer | Steve Bennett <steveb@workware.net.au> | 2010-11-17 07:57:37 +1000 |
commit | 9f6ad73686d6dc1fc8628be60a0d42a6ee20817c (patch) | |
tree | 455e400d7d49937b5814d824ff40461aee93b8ff /tests/utf8.test | |
parent | abac7fb5ee7d37150951b9618ba6a0ee57d98085 (diff) | |
download | jimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.zip jimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.tar.gz jimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.tar.bz2 |
Add UTF-8 support to Jim
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'tests/utf8.test')
-rw-r--r-- | tests/utf8.test | 336 |
1 files changed, 336 insertions, 0 deletions
diff --git a/tests/utf8.test b/tests/utf8.test new file mode 100644 index 0000000..ba39128 --- /dev/null +++ b/tests/utf8.test @@ -0,0 +1,336 @@ +source testing.tcl + +ifutf8 { + +section "string tests" + +test utf8-1.1 "Pattern matching - ?" { + string match "abc?def" "abc\u00b5def" +} 1 + +test utf8-1.2 "Pattern matching - ?" { + string match "abc?def" "abc\u2704def" +} 1 + +test utf8-1.3 "Pattern utf-8 literal" { + string match "ab\u00b5\u2704?" "ab\u00b5\u2704x" +} 1 + +test utf8-1.4 "Pattern utf-8 char sets" { + string match "a\[b\u00b5\]\u2704?" "a\u00b5\u2704x" +} 1 + +test utf8-1.5 "Pattern utf-8 char sets" { + string match "a\[b\u00b5\]\u2704?" "a\u00b6\u2704x" +} 0 +test utf8-1.6 "Pattern utf-8 char sets" { + string match "a\[b\u00b5\]\u2704?" "ab\u2704x" +} 1 +test utf8-1.7 "Pattern utf-8 char sets" { + string match "a\[b\u00b5\]?" "a\u2704x" +} 0 +test utf8-1.8 "Pattern utf-8 char sets" { + string match "a\[\u00b5-\u00c3\]" "a\ubd" +} 1 +test utf8-1.9 "Pattern utf-8 char sets" { + string match "a\[\u00b5-\u00c3\]" "a\uc4" +} 0 + +test utf8-2.1 "Pattern utf-8 nocase" { + string match -nocase "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX" +} 1 + +test utf8-2.2 "Pattern utf-8 case difference" { + string match "a\u1edc\u1ef4*" "A\u1edd\u1ef5XX" +} 0 + +test utf8-3.1 "lsearch -glob" { + lsearch -glob {1 d a\u00b5xyb c} a\ub5*b +} 2 + +test utf8-3.2 "switch -glob" { + switch -glob -- a\ub5xyb a\ub5*b { set x 1 } default { set x 0 } + set x +} 1 + +set x "\ub5test" +test utf8-3.3 "info procs" { + proc $x {} { info procs \[\ub5X]???? } + $x +} $x + +test utf8-3.3 "info commands" { + info commands \[\ub5X]???? +} $x + +test utf8-3.4 "proc name with invalid utf-8" { + catch { proc ab\xc2 {} {} } msg +} 0 + +test utf8-3.5 "rename to invalid name" { + catch { rename ab\xc2 ab\xc3 } msg +} 0 + +catch {rename ab\xc3 ""} + +test utf8-4.1 "split with utf-8" { + split "zy\u2702xw" x +} "zy\u2702 w" + +test utf8-4.2 "split with utf-8" { + split "zy\u2702xw" \u2702 +} "zy xw" + +test utf8-4.2 "split with utf-8" { + split "zy\u2702xw" {} +} "z y \u2702 x w" + +test utf8-5.1 "string first with utf-8" { + string first w "zy\u2702xw" +} 4 + +test utf8-5.2 "string first with utf-8" { + string first \u2702 "\ub5zy\u2702xw" +} 3 + +test utf8-5.3 "string first with utf-8" { + string first \u2704 "\ub5zy\u2702xw" +} -1 + +test utf8-5.4 "string first with utf-8" { + string first \u2702 "\ub5zy\u2702xw\u2702BB" +} 3 + +test utf8-6.1 "string last with utf-8" { + string last w "zy\u2702xw" +} 4 + +test utf8-6.2 "string last with utf-8" { + string last \u2702 "\ub5zy\u2702xw" +} 3 + +test utf8-6.3 "string last with utf-8" { + string last \u2704 "\ub5zy\u2702xw" +} -1 + +test utf8-6.4 "string last with utf-8" { + string last \u2702 "\ub5zy\u2702xw\u2702BB" +} 6 + +test utf8-7.1 "string reverse" { + string reverse \ub5Test\u2702 +} \u2702tseT\ub5 + +# This file contains a collection of tests for tclUtf.c +# Sourcing this file into Tcl runs the tests and generates output for +# errors. No output means no errors were found. +# +# Copyright (c) 1997 Sun Microsystems, Inc. +# Copyright (c) 1998-1999 by Scriptics Corporation. +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. +# +# RCS: @(#) $Id: utf.test,v 1.7 2000/04/10 17:19:05 ericm Exp $ + +catch {unset x} + +section "utf tests" + +test utf-1.1 {Tcl_UniCharToUtf: 1 byte sequences} { + set x \u01 +} [bytestring "\x01"] +test utf-1.2 {Tcl_UniCharToUtf: 2 byte sequences} { + set x "\u80" +} [bytestring "\xc2\x80"] +test utf-1.3 {Tcl_UniCharToUtf: 2 byte sequences} { + set x "\ue0" +} [bytestring "\xc3\xa0"] +test utf-1.4 {Tcl_UniCharToUtf: 3 byte sequences} { + set x "\u4e4e" +} [bytestring "\xe4\xb9\x8e"] + +test utf-2.1 {Tcl_UtfToUniChar: low ascii} { + string length "abc" +} {3} +test utf-2.4 {Tcl_UtfToUniChar: lead (2-byte) followed by trail} { + string length [bytestring "\xC2\xa2"] +} {1} +test utf-2.7 {Tcl_UtfToUniChar: lead (3-byte) followed by 2 trail} { + string length [bytestring "\xE4\xb9\x8e"] +} {1} + +test utf-3.1 {Tcl_UtfCharComplete} { +} {} + +test utf-4.1 {Tcl_NumUtfChars: zero length} { + string length "" +} {0} +test utf-4.2 {Tcl_NumUtfChars: length 1} { + string length [bytestring "\xC2\xA2"] +} {1} +test utf-4.3 {Tcl_NumUtfChars: long string} { + string length [bytestring "abc\xC2\xA2\xe4\xb9\x8e\uA2\u4e4e"] +} {7} + +test utf-5.1 {Tcl_UtfFindFirsts} { +} {} + +test utf-6.1 {Tcl_UtfNext} { +} {} + +test utf-7.1 {Tcl_UtfPrev} { +} {} + +test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { + string index abcd 0 +} {a} +test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { + string index \u4e4e\u25a 0 +} "\u4e4e" +test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { + string index abcd 2 +} {c} +test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { + string index \u4e4e\u25a\uff\u543 2 +} "\uff" + +test utf-9.1 {Tcl_UtfAtIndex: index = 0} { + string range abcd 0 2 +} {abc} +test utf-9.2 {Tcl_UtfAtIndex: index > 0} { + string range \u4e4e\u25a\uff\u543klmnop 1 5 +} "\u25a\uff\u543kl" + + +test utf-10.1 {Tcl_UtfBackslash: dst == NULL} { + set x \n +} { +} +test utf-10.2 {Tcl_UtfBackslash: \u subst} { + set x \ua2 +} [bytestring "\xc2\xa2"] +test utf-10.3 {Tcl_UtfBackslash: longer \u subst} { + set x \u4e21 +} [bytestring "\xe4\xb8\xa1"] +test utf-10.4 {Tcl_UtfBackslash: stops at first non-hex} { + set x \u4e2k +} "[bytestring \xd3\xa2]k" +test utf-10.5 {Tcl_UtfBackslash: stops after 4 hex chars} { + set x \u4e216 +} "[bytestring \xe4\xb8\xa1]6" +proc bsCheck {char num} { + global errNum + test utf-10.$errNum "backslash substitution ($num)" { + scan $char %c value + set value + } $num + incr errNum +} +set errNum 6 +bsCheck \b 8 +bsCheck \e 101 +bsCheck \f 12 +bsCheck \n 10 +bsCheck \r 13 +bsCheck \t 9 +bsCheck \v 11 +bsCheck \{ 123 +bsCheck \} 125 +bsCheck \[ 91 +bsCheck \] 93 +bsCheck \$ 36 +bsCheck \ 32 +bsCheck \; 59 +bsCheck \\ 92 +bsCheck \Ca 67 +bsCheck \Ma 77 +bsCheck \CMa 67 +# prior to 8.3, this returned 8, as \8 as accepted as an +# octal value - but it isn't! [Bug: 3975] +bsCheck \8a 56 +bsCheck \14 12 +bsCheck \141 97 +bsCheck b\0 98 +bsCheck \x 120 +bsCheck \xa 10 +bsCheck \xA 10 +bsCheck \x41 65 +#bsCheck \x541 65 +bsCheck \u 117 +bsCheck \uk 117 +bsCheck \u41 65 +bsCheck \ua 10 +bsCheck \uA 10 +bsCheck \ue0 224 +bsCheck \ua1 161 +bsCheck \u4e21 20001 + +test utf-11.1 {Tcl_UtfToUpper} { + string toupper {} +} {} +test utf-11.2 {Tcl_UtfToUpper} { + string toupper abc +} ABC +test utf-11.3 {Tcl_UtfToUpper} { + string toupper \u00e3ab +} \u00c3AB +test utf-11.4 {Tcl_UtfToUpper} { + string toupper \u01e3ab +} \u01e2AB + +test utf-12.1 {Tcl_UtfToLower} { + string tolower {} +} {} +test utf-12.2 {Tcl_UtfToLower} { + string tolower ABC +} abc +test utf-12.3 {Tcl_UtfToLower} { + string tolower \u00c3AB +} \u00e3ab +test utf-12.4 {Tcl_UtfToLower} { + string tolower \u01e2AB +} \u01e3ab + + +test utf-14.1 {Tcl_UtfNcasecmp} { + string compare -nocase a b +} -1 +test utf-14.2 {Tcl_UtfNcasecmp} { + string compare -nocase b a +} 1 +test utf-14.3 {Tcl_UtfNcasecmp} { + string compare -nocase B a +} 1 +test utf-14.4 {Tcl_UtfNcasecmp} { + string compare -nocase aBcB abca +} 1 + +test utf-15.1 {Tcl_UniCharToUpper, negative delta} { + string toupper aA +} AA +test utf-15.2 {Tcl_UniCharToUpper, positive delta} { + string toupper \u0178\u00ff +} \u0178\u0178 +test utf-15.3 {Tcl_UniCharToUpper, no delta} { + string toupper ! +} ! + +test utf-16.1 {Tcl_UniCharToLower, negative delta} { + string tolower aA +} aa +test utf-16.2 {Tcl_UniCharToLower, positive delta} { + string tolower \u0178\u00ff +} \u00ff\u00ff +test utf-17.1 {Tcl_UniCharToLower, no delta} { + string tolower ! +} ! + +test utf-18.1 {append counts correctly} { + set x \u2702XYZ + append x \u2702XYZ + list [string length $x] [string bytelength $x] +} {8 12} + +testreport +} |