3 files changed, 162 insertions, 7 deletions
diff --git a/libgo/go/internal/bytealg/bytealg.go b/libgo/go/internal/bytealg/bytealg.go
index e46ee7c..abdba5f 100644
--- a/libgo/go/internal/bytealg/bytealg.go
+++ b/libgo/go/internal/bytealg/bytealg.go
@@ -2,8 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build ignore_for_gccgo
-
 package bytealg
 
 import (
@@ -22,4 +20,130 @@ const (
 )
 
 // MaxLen is the maximum length of the string to be searched for (argument b) in Index.
+// If MaxLen is not 0, make sure MaxLen >= 4.
 var MaxLen int = 32
+
+// FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev,
+// IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate
+// three of them without causing allocation?
+
+// PrimeRK is the prime base used in Rabin-Karp algorithm.
+const PrimeRK = 16777619
+
+// HashStrBytes returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStrBytes(sep []byte) (uint32, uint32) {
+	hash := uint32(0)
+	for i := 0; i < len(sep); i++ {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStr returns the hash and the appropriate multiplicative
+// factor for use in Rabin-Karp algorithm.
+func HashStr(sep string) (uint32, uint32) {
+	hash := uint32(0)
+	for i := 0; i < len(sep); i++ {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStrRevBytes returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRevBytes(sep []byte) (uint32, uint32) {
+	hash := uint32(0)
+	for i := len(sep) - 1; i >= 0; i-- {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// HashStrRev returns the hash of the reverse of sep and the
+// appropriate multiplicative factor for use in Rabin-Karp algorithm.
+func HashStrRev(sep string) (uint32, uint32) {
+	hash := uint32(0)
+	for i := len(sep) - 1; i >= 0; i-- {
+		hash = hash*PrimeRK + uint32(sep[i])
+	}
+	var pow, sq uint32 = 1, PrimeRK
+	for i := len(sep); i > 0; i >>= 1 {
+		if i&1 != 0 {
+			pow *= sq
+		}
+		sq *= sq
+	}
+	return hash, pow
+}
+
+// IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarpBytes(s, sep []byte) int {
+	// Rabin-Karp search
+	hashsep, pow := HashStrBytes(sep)
+	n := len(sep)
+	var h uint32
+	for i := 0; i < n; i++ {
+		h = h*PrimeRK + uint32(s[i])
+	}
+	if h == hashsep && Equal(s[:n], sep) {
+		return 0
+	}
+	for i := n; i < len(s); {
+		h *= PrimeRK
+		h += uint32(s[i])
+		h -= pow * uint32(s[i-n])
+		i++
+		if h == hashsep && Equal(s[i-n:i], sep) {
+			return i - n
+		}
+	}
+	return -1
+}
+
+// IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the
+// first occurence of substr in s, or -1 if not present.
+func IndexRabinKarp(s, substr string) int {
+	// Rabin-Karp search
+	hashss, pow := HashStr(substr)
+	n := len(substr)
+	var h uint32
+	for i := 0; i < n; i++ {
+		h = h*PrimeRK + uint32(s[i])
+	}
+	if h == hashss && s[:n] == substr {
+		return 0
+	}
+	for i := n; i < len(s); {
+		h *= PrimeRK
+		h += uint32(s[i])
+		h -= pow * uint32(s[i-n])
+		i++
+		if h == hashss && s[i-n:i] == substr {
+			return i - n
+		}
+	}
+	return -1
+}
diff --git a/libgo/go/internal/bytealg/gccgo.go b/libgo/go/internal/bytealg/gccgo.go
index 8c78b63..05b39e3 100644
--- a/libgo/go/internal/bytealg/gccgo.go
+++ b/libgo/go/internal/bytealg/gccgo.go
@@ -6,7 +6,4 @@
 
 package bytealg
 
-// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
-var MaxLen int = 32
-
 const MaxBruteForce = 64
diff --git a/libgo/go/internal/bytealg/index_generic.go b/libgo/go/internal/bytealg/index_generic.go
index c595c23..3dc1c6b 100644
--- a/libgo/go/internal/bytealg/index_generic.go
+++ b/libgo/go/internal/bytealg/index_generic.go
@@ -17,8 +17,42 @@ func Index(a, b []byte) int {
 
 // IndexString returns the index of the first instance of b in a, or -1 if b is not present in a.
 // Requires 2 <= len(b) <= MaxLen.
-func IndexString(a, b string) int {
-	panic("unimplemented")
+func IndexString(s, substr string) int {
+	// This is a partial copy of strings.Index, here because bytes.IndexAny and bytes.LastIndexAny
+	// call bytealg.IndexString. Some platforms have an optimized assembly version of this function.
+	// This implementation is used for those that do not. Although the pure Go implementation here
+	// works for the case of len(b) > MaxLen, we do not require that its assembly implementation also
+	// supports the case of len(b) > MaxLen. And we do not guarantee that this function supports the
+	// case of len(b) > MaxLen.
+	n := len(substr)
+	c0 := substr[0]
+	c1 := substr[1]
+	i := 0
+	t := len(s) - n + 1
+	fails := 0
+	for i < t {
+		if s[i] != c0 {
+			o := IndexByteString(s[i:t], c0)
+			if o < 0 {
+				return -1
+			}
+			i += o
+		}
+		if s[i+1] == c1 && s[i:i+n] == substr {
+			return i
+		}
+		i++
+		fails++
+		if fails >= 4+i>>4 && i < t {
+			// See comment in src/bytes/bytes.go.
+			j := IndexRabinKarp(s[i:], substr)
+			if j < 0 {
+				return -1
+			}
+			return i + j
+		}
+	}
+	return -1
 }
 
 // Cutover reports the number of failures of IndexByte we should tolerate