aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/exp/locale/collate/table.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/exp/locale/collate/table.go')
-rw-r--r--libgo/go/exp/locale/collate/table.go150
1 files changed, 150 insertions, 0 deletions
diff --git a/libgo/go/exp/locale/collate/table.go b/libgo/go/exp/locale/collate/table.go
new file mode 100644
index 0000000..c25799b
--- /dev/null
+++ b/libgo/go/exp/locale/collate/table.go
@@ -0,0 +1,150 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package collate
+
+import (
+ "exp/norm"
+ "unicode/utf8"
+)
+
+// tableIndex holds information for constructing a table
+// for a certain locale based on the main table.
+type tableIndex struct {
+ lookupOffset uint32
+ valuesOffset uint32
+}
+
+// table holds all collation data for a given collation ordering.
+type table struct {
+ index trie // main trie
+
+ // expansion info
+ expandElem []uint32
+
+ // contraction info
+ contractTries contractTrieSet
+ contractElem []uint32
+ maxContractLen int
+ variableTop uint32
+}
+
+func (t *table) indexedTable(idx tableIndex) *table {
+ nt := *t
+ nt.index.index0 = t.index.index[idx.lookupOffset*blockSize:]
+ nt.index.values0 = t.index.values[idx.valuesOffset*blockSize:]
+ return &nt
+}
+
+// appendNext appends the weights corresponding to the next rune or
+// contraction in s. If a contraction is matched to a discontinuous
+// sequence of runes, the weights for the interstitial runes are
+// appended as well. It returns a new slice that includes the appended
+// weights and the number of bytes consumed from s.
+func (t *table) appendNext(w []weights, s []byte) ([]weights, int) {
+ v, sz := t.index.lookup(s)
+ ce := colElem(v)
+ tp := ce.ctype()
+ if tp == ceNormal {
+ w = append(w, getWeights(ce, s))
+ } else if tp == ceExpansionIndex {
+ w = t.appendExpansion(w, ce)
+ } else if tp == ceContractionIndex {
+ n := 0
+ w, n = t.matchContraction(w, ce, s[sz:])
+ sz += n
+ } else if tp == ceDecompose {
+ // Decompose using NFCK and replace tertiary weights.
+ t1, t2 := splitDecompose(ce)
+ i := len(w)
+ nfkd := norm.NFKD.Properties(s).Decomposition()
+ for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
+ w, p = t.appendNext(w, nfkd)
+ }
+ w[i].tertiary = t1
+ if i++; i < len(w) {
+ w[i].tertiary = t2
+ for i++; i < len(w); i++ {
+ w[i].tertiary = maxTertiary
+ }
+ }
+ }
+ return w, sz
+}
+
+func getWeights(ce colElem, s []byte) weights {
+ if ce == 0 { // implicit
+ r, _ := utf8.DecodeRune(s)
+ return weights{
+ primary: uint32(implicitPrimary(r)),
+ secondary: defaultSecondary,
+ tertiary: defaultTertiary,
+ }
+ }
+ return splitCE(ce)
+}
+
+func (t *table) appendExpansion(w []weights, ce colElem) []weights {
+ i := splitExpandIndex(ce)
+ n := int(t.expandElem[i])
+ i++
+ for _, ce := range t.expandElem[i : i+n] {
+ w = append(w, splitCE(colElem(ce)))
+ }
+ return w
+}
+
+func (t *table) matchContraction(w []weights, ce colElem, suffix []byte) ([]weights, int) {
+ index, n, offset := splitContractIndex(ce)
+
+ scan := t.contractTries.scanner(index, n, suffix)
+ buf := [norm.MaxSegmentSize]byte{}
+ bufp := 0
+ p := scan.scan(0)
+
+ if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
+ // By now we should have filtered most cases.
+ p0 := p
+ bufn := 0
+ rune := norm.NFC.Properties(suffix[p:])
+ p += rune.Size()
+ if prevCC := rune.TrailCCC(); prevCC != 0 {
+ // A gap may only occur in the last normalization segment.
+ // This also ensures that len(scan.s) < norm.MaxSegmentSize.
+ if end := norm.NFC.FirstBoundary(suffix[p:]); end != -1 {
+ scan.s = suffix[:p+end]
+ }
+ for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
+ rune = norm.NFC.Properties(suffix[p:])
+ if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
+ break
+ }
+ prevCC = rune.TrailCCC()
+ if pp := scan.scan(p); pp != p {
+ // Copy the interstitial runes for later processing.
+ bufn += copy(buf[bufn:], suffix[p0:p])
+ if scan.pindex == pp {
+ bufp = bufn
+ }
+ p, p0 = pp, pp
+ } else {
+ p += rune.Size()
+ }
+ }
+ }
+ }
+ // Append weights for the matched contraction, which may be an expansion.
+ i, n := scan.result()
+ ce = colElem(t.contractElem[i+offset])
+ if ce.ctype() == ceNormal {
+ w = append(w, splitCE(ce))
+ } else {
+ w = t.appendExpansion(w, ce)
+ }
+ // Append weights for the runes in the segment not part of the contraction.
+ for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
+ w, p = t.appendNext(w, b)
+ }
+ return w, n
+}