diff options
Diffstat (limited to 'libgo/go/internal/bytealg')
-rw-r--r-- | libgo/go/internal/bytealg/bytealg.go | 128 | ||||
-rw-r--r-- | libgo/go/internal/bytealg/gccgo.go | 3 | ||||
-rw-r--r-- | libgo/go/internal/bytealg/index_generic.go | 38 |
3 files changed, 162 insertions, 7 deletions
diff --git a/libgo/go/internal/bytealg/bytealg.go b/libgo/go/internal/bytealg/bytealg.go index e46ee7c..abdba5f 100644 --- a/libgo/go/internal/bytealg/bytealg.go +++ b/libgo/go/internal/bytealg/bytealg.go @@ -2,8 +2,6 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -// +build ignore_for_gccgo - package bytealg import ( @@ -22,4 +20,130 @@ const ( ) // MaxLen is the maximum length of the string to be searched for (argument b) in Index. +// If MaxLen is not 0, make sure MaxLen >= 4. var MaxLen int = 32 + +// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, +// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate +// three of them without causing allocation? + +// PrimeRK is the prime base used in Rabin-Karp algorithm. +const PrimeRK = 16777619 + +// HashStrBytes returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func HashStrBytes(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStr returns the hash and the appropriate multiplicative +// factor for use in Rabin-Karp algorithm. +func HashStr(sep string) (uint32, uint32) { + hash := uint32(0) + for i := 0; i < len(sep); i++ { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStrRevBytes returns the hash of the reverse of sep and the +// appropriate multiplicative factor for use in Rabin-Karp algorithm. +func HashStrRevBytes(sep []byte) (uint32, uint32) { + hash := uint32(0) + for i := len(sep) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// HashStrRev returns the hash of the reverse of sep and the +// appropriate multiplicative factor for use in Rabin-Karp algorithm. +func HashStrRev(sep string) (uint32, uint32) { + hash := uint32(0) + for i := len(sep) - 1; i >= 0; i-- { + hash = hash*PrimeRK + uint32(sep[i]) + } + var pow, sq uint32 = 1, PrimeRK + for i := len(sep); i > 0; i >>= 1 { + if i&1 != 0 { + pow *= sq + } + sq *= sq + } + return hash, pow +} + +// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the +// first occurence of substr in s, or -1 if not present. +func IndexRabinKarpBytes(s, sep []byte) int { + // Rabin-Karp search + hashsep, pow := HashStrBytes(sep) + n := len(sep) + var h uint32 + for i := 0; i < n; i++ { + h = h*PrimeRK + uint32(s[i]) + } + if h == hashsep && Equal(s[:n], sep) { + return 0 + } + for i := n; i < len(s); { + h *= PrimeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashsep && Equal(s[i-n:i], sep) { + return i - n + } + } + return -1 +} + +// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the +// first occurence of substr in s, or -1 if not present. +func IndexRabinKarp(s, substr string) int { + // Rabin-Karp search + hashss, pow := HashStr(substr) + n := len(substr) + var h uint32 + for i := 0; i < n; i++ { + h = h*PrimeRK + uint32(s[i]) + } + if h == hashss && s[:n] == substr { + return 0 + } + for i := n; i < len(s); { + h *= PrimeRK + h += uint32(s[i]) + h -= pow * uint32(s[i-n]) + i++ + if h == hashss && s[i-n:i] == substr { + return i - n + } + } + return -1 +} diff --git a/libgo/go/internal/bytealg/gccgo.go b/libgo/go/internal/bytealg/gccgo.go index 8c78b63..05b39e3 100644 --- a/libgo/go/internal/bytealg/gccgo.go +++ b/libgo/go/internal/bytealg/gccgo.go @@ -6,7 +6,4 @@ package bytealg -// MaxLen is the maximum length of the string to be searched for (argument b) in Index. -var MaxLen int = 32 - const MaxBruteForce = 64 diff --git a/libgo/go/internal/bytealg/index_generic.go b/libgo/go/internal/bytealg/index_generic.go index c595c23..3dc1c6b 100644 --- a/libgo/go/internal/bytealg/index_generic.go +++ b/libgo/go/internal/bytealg/index_generic.go @@ -17,8 +17,42 @@ func Index(a, b []byte) int { // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a. // Requires 2 <= len(b) <= MaxLen. -func IndexString(a, b string) int { - panic("unimplemented") +func IndexString(s, substr string) int { + // This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny + // call bytealg.IndexString. Some platforms have an optimized assembly version of this function. + // This implementation is used for those that do not. Although the pure Go implementation here + // works for the case of len(b) > MaxLen, we do not require that its assembly implementation also + // supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the + // case of len(b) > MaxLen. + n := len(substr) + c0 := substr[0] + c1 := substr[1] + i := 0 + t := len(s) - n + 1 + fails := 0 + for i < t { + if s[i] != c0 { + o := IndexByteString(s[i:t], c0) + if o < 0 { + return -1 + } + i += o + } + if s[i+1] == c1 && s[i:i+n] == substr { + return i + } + i++ + fails++ + if fails >= 4+i>>4 && i < t { + // See comment in src/bytes/bytes.go. + j := IndexRabinKarp(s[i:], substr) + if j < 0 { + return -1 + } + return i + j + } + } + return -1 } // Cutover reports the number of failures of IndexByte we should tolerate |