diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2012-03-02 20:01:37 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2012-03-02 20:01:37 +0000 |
commit | 501699af1603287b1b47ac450fd6eeb826aa76b1 (patch) | |
tree | 3eeb8918d39d675108073c8b76d6dd10586a608c /libgo/go/exp/norm/iter.go | |
parent | 34c5f21a387dc461042bafc3052ce6e1af786a77 (diff) | |
download | gcc-501699af1603287b1b47ac450fd6eeb826aa76b1.zip gcc-501699af1603287b1b47ac450fd6eeb826aa76b1.tar.gz gcc-501699af1603287b1b47ac450fd6eeb826aa76b1.tar.bz2 |
libgo: Update to weekly.2012-02-22 release.
From-SVN: r184819
Diffstat (limited to 'libgo/go/exp/norm/iter.go')
-rw-r--r-- | libgo/go/exp/norm/iter.go | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/libgo/go/exp/norm/iter.go b/libgo/go/exp/norm/iter.go new file mode 100644 index 0000000..761ba90 --- /dev/null +++ b/libgo/go/exp/norm/iter.go @@ -0,0 +1,286 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package norm + +const MaxSegmentSize = maxByteBufferSize + +// An Iter iterates over a string or byte slice, while normalizing it +// to a given Form. +type Iter struct { + rb reorderBuffer + info runeInfo // first character saved from previous iteration + next iterFunc // implementation of next depends on form + + p int // current position in input source + outStart int // start of current segment in output buffer + inStart int // start of current segment in input source + maxp int // position in output buffer after which not to start a new segment + maxseg int // for tracking an excess of combining characters + + tccc uint8 + done bool +} + +type iterFunc func(*Iter, []byte) int + +// SetInput initializes i to iterate over src after normalizing it to Form f. +func (i *Iter) SetInput(f Form, src []byte) { + i.rb.init(f, src) + if i.rb.f.composing { + i.next = nextComposed + } else { + i.next = nextDecomposed + } + i.p = 0 + if i.done = len(src) == 0; !i.done { + i.info = i.rb.f.info(i.rb.src, i.p) + } +} + +// SetInputString initializes i to iterate over src after normalizing it to Form f. +func (i *Iter) SetInputString(f Form, src string) { + i.rb.initString(f, src) + if i.rb.f.composing { + i.next = nextComposed + } else { + i.next = nextDecomposed + } + i.p = 0 + if i.done = len(src) == 0; !i.done { + i.info = i.rb.f.info(i.rb.src, i.p) + } +} + +// Pos returns the byte position at which the next call to Next will commence processing. +func (i *Iter) Pos() int { + return i.p +} + +// Done returns true if there is no more input to process. +func (i *Iter) Done() bool { + return i.done +} + +// Next writes f(i.input[i.Pos():n]...) to buffer buf, where n is the +// largest boundary of i.input such that the result fits in buf. +// It returns the number of bytes written to buf. +// len(buf) should be at least MaxSegmentSize. +// Done must be false before calling Next. +func (i *Iter) Next(buf []byte) int { + return i.next(i, buf) +} + +func (i *Iter) initNext(outn, inStart int) { + i.outStart = 0 + i.inStart = inStart + i.maxp = outn - MaxSegmentSize + i.maxseg = MaxSegmentSize +} + +// setStart resets the start of the new segment to the given position. +// It returns true if there is not enough room for the new segment. +func (i *Iter) setStart(outp, inp int) bool { + if outp > i.maxp { + return true + } + i.outStart = outp + i.inStart = inp + i.maxseg = outp + MaxSegmentSize + return false +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// nextDecomposed is the implementation of Next for forms NFD and NFKD. +func nextDecomposed(i *Iter, out []byte) int { + var outp int + i.initNext(len(out), i.p) +doFast: + inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart + for { + if sz := int(i.info.size); sz <= 1 { + // ASCII or illegal byte. Either way, advance by 1. + i.p++ + outp++ + max := min(i.rb.nsrc, len(out)-outp+i.p) + if np := i.rb.src.skipASCII(i.p, max); np > i.p { + outp += np - i.p + i.p = np + if i.p >= i.rb.nsrc { + break + } + // ASCII may combine with consecutive runes. + if i.setStart(outp-1, i.p-1) { + i.p-- + outp-- + i.info.size = 1 + break + } + } + } else if d := i.info.decomposition(); d != nil { + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p) + p := outp + len(d) + if p > i.maxseg && i.setStart(outp, i.p) { + return outp + } + copy(out[outp:], d) + outp = p + i.p += sz + inCopyStart, outCopyStart = i.p, outp + } else if r := i.rb.src.hangul(i.p); r != 0 { + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p) + for { + outp += decomposeHangul(out[outp:], r) + i.p += hangulUTF8Size + if r = i.rb.src.hangul(i.p); r == 0 { + break + } + if i.setStart(outp, i.p) { + return outp + } + } + inCopyStart, outCopyStart = i.p, outp + } else { + p := outp + sz + if p > i.maxseg && i.setStart(outp, i.p) { + break + } + outp = p + i.p += sz + } + if i.p >= i.rb.nsrc { + break + } + prevCC := i.info.tccc + i.info = i.rb.f.info(i.rb.src, i.p) + if cc := i.info.ccc; cc == 0 { + if i.setStart(outp, i.p) { + break + } + } else if cc < prevCC { + goto doNorm + } + } + if inCopyStart != i.p { + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p) + } + i.done = i.p >= i.rb.nsrc + return outp +doNorm: + // Insert what we have decomposed so far in the reorderBuffer. + // As we will only reorder, there will always be enough room. + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p) + if !i.rb.insertDecomposed(out[i.outStart:outp]) { + // Start over to prevent decompositions from crossing segment boundaries. + // This is a rare occurance. + i.p = i.inStart + i.info = i.rb.f.info(i.rb.src, i.p) + } + outp = i.outStart + for { + if !i.rb.insert(i.rb.src, i.p, i.info) { + break + } + if i.p += int(i.info.size); i.p >= i.rb.nsrc { + outp += i.rb.flushCopy(out[outp:]) + i.done = true + return outp + } + i.info = i.rb.f.info(i.rb.src, i.p) + if i.info.ccc == 0 { + break + } + } + // new segment or too many combining characters: exit normalization + if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) { + return outp + } + goto doFast +} + +// nextComposed is the implementation of Next for forms NFC and NFKC. +func nextComposed(i *Iter, out []byte) int { + var outp int + i.initNext(len(out), i.p) +doFast: + inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart + var prevCC uint8 + for { + if !i.info.isYesC() { + goto doNorm + } + if cc := i.info.ccc; cc == 0 { + if i.setStart(outp, i.p) { + break + } + } else if cc < prevCC { + goto doNorm + } + prevCC = i.info.tccc + sz := int(i.info.size) + if sz == 0 { + sz = 1 // illegal rune: copy byte-by-byte + } + p := outp + sz + if p > i.maxseg && i.setStart(outp, i.p) { + break + } + outp = p + i.p += sz + max := min(i.rb.nsrc, len(out)-outp+i.p) + if np := i.rb.src.skipASCII(i.p, max); np > i.p { + outp += np - i.p + i.p = np + if i.p >= i.rb.nsrc { + break + } + // ASCII may combine with consecutive runes. + if i.setStart(outp-1, i.p-1) { + i.p-- + outp-- + i.info = runeInfo{size: 1} + break + } + } + if i.p >= i.rb.nsrc { + break + } + i.info = i.rb.f.info(i.rb.src, i.p) + } + if inCopyStart != i.p { + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p) + } + i.done = i.p >= i.rb.nsrc + return outp +doNorm: + i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.inStart) + outp, i.p = i.outStart, i.inStart + i.info = i.rb.f.info(i.rb.src, i.p) + for { + if !i.rb.insert(i.rb.src, i.p, i.info) { + break + } + if i.p += int(i.info.size); i.p >= i.rb.nsrc { + i.rb.compose() + outp += i.rb.flushCopy(out[outp:]) + i.done = true + return outp + } + i.info = i.rb.f.info(i.rb.src, i.p) + if i.info.boundaryBefore() { + break + } + } + i.rb.compose() + if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) { + return outp + } + goto doFast +} |