aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/internal/bytealg
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/internal/bytealg')
-rw-r--r--libgo/go/internal/bytealg/bytealg.go128
-rw-r--r--libgo/go/internal/bytealg/gccgo.go3
-rw-r--r--libgo/go/internal/bytealg/index_generic.go38
3 files changed, 162 insertions, 7 deletions
diff --git a/libgo/go/internal/bytealg/bytealg.go b/libgo/go/internal/bytealg/bytealg.go
index e46ee7c..abdba5f 100644
--- a/libgo/go/internal/bytealg/bytealg.go
+++ b/libgo/go/internal/bytealg/bytealg.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build ignore_for_gccgo
-
package bytealg
import (
@@ -22,4 +20,130 @@ const (
)
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
+// If MaxLen is not 0, make sure MaxLen >= 4.
var MaxLen int = 32
+
+// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
+// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
+// three of them without causing allocation?
+
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
+const PrimeRK = 16777619
+
+// HashStrBytes returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStrBytes(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStr(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := 0; i < len(sep); i++ {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStrRevBytes returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRevBytes(sep []byte) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// HashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRev(sep string) (uint32, uint32) {
+ hash := uint32(0)
+ for i := len(sep) - 1; i >= 0; i-- {
+ hash = hash*PrimeRK + uint32(sep[i])
+ }
+ var pow, sq uint32 = 1, PrimeRK
+ for i := len(sep); i > 0; i >>= 1 {
+ if i&1 != 0 {
+ pow *= sq
+ }
+ sq *= sq
+ }
+ return hash, pow
+}
+
+// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarpBytes(s, sep []byte) int {
+ // Rabin-Karp search
+ hashsep, pow := HashStrBytes(sep)
+ n := len(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*PrimeRK + uint32(s[i])
+ }
+ if h == hashsep && Equal(s[:n], sep) {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= PrimeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && Equal(s[i-n:i], sep) {
+ return i - n
+ }
+ }
+ return -1
+}
+
+// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarp(s, substr string) int {
+ // Rabin-Karp search
+ hashss, pow := HashStr(substr)
+ n := len(substr)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*PrimeRK + uint32(s[i])
+ }
+ if h == hashss && s[:n] == substr {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= PrimeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashss && s[i-n:i] == substr {
+ return i - n
+ }
+ }
+ return -1
+}
diff --git a/libgo/go/internal/bytealg/gccgo.go b/libgo/go/internal/bytealg/gccgo.go
index 8c78b63..05b39e3 100644
--- a/libgo/go/internal/bytealg/gccgo.go
+++ b/libgo/go/internal/bytealg/gccgo.go
@@ -6,7 +6,4 @@
package bytealg
-// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
-var MaxLen int = 32
-
const MaxBruteForce = 64
diff --git a/libgo/go/internal/bytealg/index_generic.go b/libgo/go/internal/bytealg/index_generic.go
index c595c23..3dc1c6b 100644
--- a/libgo/go/internal/bytealg/index_generic.go
+++ b/libgo/go/internal/bytealg/index_generic.go
@@ -17,8 +17,42 @@ func Index(a, b []byte) int {
// IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
// Requires 2 <= len(b) <= MaxLen.
-func IndexString(a, b string) int {
- panic("unimplemented")
+func IndexString(s, substr string) int {
+ // This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny
+ // call bytealg.IndexString. Some platforms have an optimized assembly version of this function.
+ // This implementation is used for those that do not. Although the pure Go implementation here
+ // works for the case of len(b) > MaxLen, we do not require that its assembly implementation also
+ // supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the
+ // case of len(b) > MaxLen.
+ n := len(substr)
+ c0 := substr[0]
+ c1 := substr[1]
+ i := 0
+ t := len(s) - n + 1
+ fails := 0
+ for i < t {
+ if s[i] != c0 {
+ o := IndexByteString(s[i:t], c0)
+ if o < 0 {
+ return -1
+ }
+ i += o
+ }
+ if s[i+1] == c1 && s[i:i+n] == substr {
+ return i
+ }
+ i++
+ fails++
+ if fails >= 4+i>>4 && i < t {
+ // See comment in src/bytes/bytes.go.
+ j := IndexRabinKarp(s[i:], substr)
+ if j < 0 {
+ return -1
+ }
+ return i + j
+ }
+ }
+ return -1
}
// Cutover reports the number of failures of IndexByte we should tolerate