diff options
Diffstat (limited to 'parse-unidata.tcl')
-rw-r--r-- | parse-unidata.tcl | 49 |
1 files changed, 14 insertions, 35 deletions
diff --git a/parse-unidata.tcl b/parse-unidata.tcl index 4b5ec3a..1a927a3 100644 --- a/parse-unidata.tcl +++ b/parse-unidata.tcl @@ -9,56 +9,35 @@ # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt # to generate case mapping tables +set map(lower) {} +set map(upper) {} set f [open [lindex $argv 0]] -set extoff 0 -puts "static const struct casemap unicode_case_mapping\[\] = \{" while {[gets $f buf] >= 0} { foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break - set code 0x$code + set code [string tolower 0x$code] if {$code <= 0x7f} { continue } if {$code > 0xffff} { break } - if {$class ne "Lu" && $class ne "Ll"} { + if {![string match L* $class]} { continue } - if {$upper eq ""} { - set upper $code - } else { - set upper 0x$upper + if {$upper ne ""} { + lappend map(upper) $code [string tolower 0x$upper] } - if {$lower eq ""} { - set lower $code - } else { - set lower 0x$lower + if {$lower ne ""} { + lappend map(lower) $code [string tolower 0x$lower] } - if {$upper == $code && $lower == $code} { - continue - } - set l [expr {$lower - $code}] - set u [expr {$upper - $code}] - if {abs($u) > 127 || abs($l) > 127} { - # Can't encode both in one byte, so use indirection - lappend jumptable $code $lower $upper - set l -128 - set u $extoff - incr extoff - if {$extoff > 0xff} { - error "Too many entries in the offset table!" - } - } - set entry [string tolower "$code, $l, $u"] - puts " { $entry }," } close $f -puts "\};\n" -# Now the jump table -puts "static const struct caseextmap unicode_extmap\[\] = \{" -foreach {c l u} $jumptable { - puts " { $l, $u }," +foreach type {upper lower} { + puts "static const struct casemap unicode_case_mapping_$type\[\] = \{" + foreach {code alt} $map($type) { + puts "\t{ $code, $alt }," + } + puts "\};\n" } -puts "\};\n" |