diff options
Diffstat (limited to 'libgo/go/exp/locale/collate/contract.go')
-rw-r--r-- | libgo/go/exp/locale/collate/contract.go | 145 |
1 files changed, 0 insertions, 145 deletions
diff --git a/libgo/go/exp/locale/collate/contract.go b/libgo/go/exp/locale/collate/contract.go deleted file mode 100644 index 7ce6b1f..0000000 --- a/libgo/go/exp/locale/collate/contract.go +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright 2012 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package collate - -import "unicode/utf8" - -// For a description of contractTrieSet, see exp/locale/collate/build/contract.go. - -type contractTrieSet []struct{ l, h, n, i uint8 } - -// ctScanner is used to match a trie to an input sequence. -// A contraction may match a non-contiguous sequence of bytes in an input string. -// For example, if there is a contraction for <a, combining_ring>, it should match -// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does -// not block combining_ring. -// ctScanner does not automatically skip over non-blocking non-starters, but rather -// retains the state of the last match and leaves it up to the user to continue -// the match at the appropriate points. -type ctScanner struct { - states contractTrieSet - s []byte - n int - index int - pindex int - done bool -} - -type ctScannerString struct { - states contractTrieSet - s string - n int - index int - pindex int - done bool -} - -func (t contractTrieSet) scanner(index, n int, b []byte) ctScanner { - return ctScanner{s: b, states: t[index:], n: n} -} - -func (t contractTrieSet) scannerString(index, n int, str string) ctScannerString { - return ctScannerString{s: str, states: t[index:], n: n} -} - -// result returns the offset i and bytes consumed p so far. If no suffix -// matched, i and p will be 0. -func (s *ctScanner) result() (i, p int) { - return s.index, s.pindex -} - -func (s *ctScannerString) result() (i, p int) { - return s.index, s.pindex -} - -const ( - final = 0 - noIndex = 0xFF -) - -// scan matches the longest suffix at the current location in the input -// and returns the number of bytes consumed. -func (s *ctScanner) scan(p int) int { - pr := p // the p at the rune start - str := s.s - states, n := s.states, s.n - for i := 0; i < n && p < len(str); { - e := states[i] - c := str[p] - // TODO: a significant number of contractions are of a form that - // cannot match discontiguous UTF-8 in a normalized string. We could let - // a negative value of e.n mean that we can set s.done = true and avoid - // the need for additional matches. - if c >= e.l { - if e.l == c { - p++ - if e.i != noIndex { - s.index = int(e.i) - s.pindex = p - } - if e.n != final { - i, states, n = 0, states[int(e.h)+n:], int(e.n) - if p >= len(str) || utf8.RuneStart(str[p]) { - s.states, s.n, pr = states, n, p - } - } else { - s.done = true - return p - } - continue - } else if e.n == final && c <= e.h { - p++ - s.done = true - s.index = int(c-e.l) + int(e.i) - s.pindex = p - return p - } - } - i++ - } - return pr -} - -// scan is a verbatim copy of ctScanner.scan. -func (s *ctScannerString) scan(p int) int { - pr := p // the p at the rune start - str := s.s - states, n := s.states, s.n - for i := 0; i < n && p < len(str); { - e := states[i] - c := str[p] - // TODO: a significant number of contractions are of a form that - // cannot match discontiguous UTF-8 in a normalized string. We could let - // a negative value of e.n mean that we can set s.done = true and avoid - // the need for additional matches. - if c >= e.l { - if e.l == c { - p++ - if e.i != noIndex { - s.index = int(e.i) - s.pindex = p - } - if e.n != final { - i, states, n = 0, states[int(e.h)+n:], int(e.n) - if p >= len(str) || utf8.RuneStart(str[p]) { - s.states, s.n, pr = states, n, p - } - } else { - s.done = true - return p - } - continue - } else if e.n == final && c <= e.h { - p++ - s.done = true - s.index = int(c-e.l) + int(e.i) - s.pindex = p - return p - } - } - i++ - } - return pr -} |