From 9f6ad73686d6dc1fc8628be60a0d42a6ee20817c Mon Sep 17 00:00:00 2001 From: Steve Bennett Date: Wed, 20 Oct 2010 16:01:17 +1000 Subject: Add UTF-8 support to Jim Signed-off-by: Steve Bennett --- parse-unidata.tcl | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 parse-unidata.tcl (limited to 'parse-unidata.tcl') diff --git a/parse-unidata.tcl b/parse-unidata.tcl new file mode 100644 index 0000000..9e41e1f --- /dev/null +++ b/parse-unidata.tcl @@ -0,0 +1,57 @@ +#!/usr/bin/env tclsh + +# Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt +# to generate case mapping tables + +set f [open [lindex $argv 0]] +set extoff 0 +puts "static const struct casemap unicode_case_mapping\[\] = \{" +while {[gets $f buf] >= 0} { + foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break + set code 0x$code + if {$code <= 0x7f} { + continue + } + if {$code > 0xffff} { + break + } + if {$class ne "Lu" && $class ne "Ll"} { + continue + } + if {$upper eq ""} { + set upper $code + } else { + set upper 0x$upper + } + if {$lower eq ""} { + set lower $code + } else { + set lower 0x$lower + } + if {$upper == $code && $lower == $code} { + continue + } + set l [expr {$lower - $code}] + set u [expr {$upper - $code}] + if {abs($u) > 127 || abs($l) > 127} { + # Can't encode both in one byte, so use indirection + lappend jumptable $code $lower $upper + set l -128 + set u $extoff + incr extoff + if {$extoff > 0xff} { + error "Too many entries in the offset table!" + } + } + set entry [string tolower "$code, $l, $u"] + puts " { $entry }," +} +close $f +puts "\};\n" + +# Now the jump table +puts "static const struct caseextmap unicode_extmap\[\] = \{" +foreach {c l u} $jumptable { + puts " { $l, $u }," +} +puts "\};\n" -- cgit v1.1