aboutsummaryrefslogtreecommitdiff
path: root/parse-unidata.tcl
diff options
context:
space:
mode:
authorSteve Bennett <steveb@workware.net.au>2010-10-20 16:01:17 +1000
committerSteve Bennett <steveb@workware.net.au>2010-11-17 07:57:37 +1000
commit9f6ad73686d6dc1fc8628be60a0d42a6ee20817c (patch)
tree455e400d7d49937b5814d824ff40461aee93b8ff /parse-unidata.tcl
parentabac7fb5ee7d37150951b9618ba6a0ee57d98085 (diff)
downloadjimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.zip
jimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.tar.gz
jimtcl-9f6ad73686d6dc1fc8628be60a0d42a6ee20817c.tar.bz2
Add UTF-8 support to Jim
Signed-off-by: Steve Bennett <steveb@workware.net.au>
Diffstat (limited to 'parse-unidata.tcl')
-rw-r--r--parse-unidata.tcl57
1 files changed, 57 insertions, 0 deletions
diff --git a/parse-unidata.tcl b/parse-unidata.tcl
new file mode 100644
index 0000000..9e41e1f
--- /dev/null
+++ b/parse-unidata.tcl
@@ -0,0 +1,57 @@
+#!/usr/bin/env tclsh
+
+# Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
+# to generate case mapping tables
+
+set f [open [lindex $argv 0]]
+set extoff 0
+puts "static const struct casemap unicode_case_mapping\[\] = \{"
+while {[gets $f buf] >= 0} {
+ foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
+ set code 0x$code
+ if {$code <= 0x7f} {
+ continue
+ }
+ if {$code > 0xffff} {
+ break
+ }
+ if {$class ne "Lu" && $class ne "Ll"} {
+ continue
+ }
+ if {$upper eq ""} {
+ set upper $code
+ } else {
+ set upper 0x$upper
+ }
+ if {$lower eq ""} {
+ set lower $code
+ } else {
+ set lower 0x$lower
+ }
+ if {$upper == $code && $lower == $code} {
+ continue
+ }
+ set l [expr {$lower - $code}]
+ set u [expr {$upper - $code}]
+ if {abs($u) > 127 || abs($l) > 127} {
+ # Can't encode both in one byte, so use indirection
+ lappend jumptable $code $lower $upper
+ set l -128
+ set u $extoff
+ incr extoff
+ if {$extoff > 0xff} {
+ error "Too many entries in the offset table!"
+ }
+ }
+ set entry [string tolower "$code, $l, $u"]
+ puts " { $entry },"
+}
+close $f
+puts "\};\n"
+
+# Now the jump table
+puts "static const struct caseextmap unicode_extmap\[\] = \{"
+foreach {c l u} $jumptable {
+ puts " { $l, $u },"
+}
+puts "\};\n"