libgo: update to Go 1.13beta1 release

Reviewed-on: https://go-review.googlesource.com/c/gofrontend/+/193497 From-SVN: r275473
author: Ian Lance Taylor <iant@golang.org> 2019-09-06 18:12:46 +0000
committer: Ian Lance Taylor <ian@gcc.gnu.org> 2019-09-06 18:12:46 +0000
commit: aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch)
tree: 7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/index
parent: 920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff)
download: gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.zip
gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.gz
gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.bz2
6 files changed, 3174 insertions, 221 deletions
diff --git a/libgo/go/index/suffixarray/gen.go b/libgo/go/index/suffixarray/gen.go
new file mode 100644
index 0000000..8c3de55
--- /dev/null
+++ b/libgo/go/index/suffixarray/gen.go
@@ -0,0 +1,92 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ignore
+
+// Gen generates sais2.go by duplicating functions in sais.go
+// using different input types.
+// See the comment at the top of sais.go for details.
+package main
+
+import (
+	"bytes"
+	"io/ioutil"
+	"log"
+	"strings"
+)
+
+func main() {
+	log.SetPrefix("gen: ")
+	log.SetFlags(0)
+
+	data, err := ioutil.ReadFile("sais.go")
+	if err != nil {
+		log.Fatal(err)
+	}
+
+	x := bytes.Index(data, []byte("\n\n"))
+	if x < 0 {
+		log.Fatal("cannot find blank line after copyright comment")
+	}
+
+	var buf bytes.Buffer
+	buf.Write(data[:x])
+	buf.WriteString("\n\n// Code generated by go generate; DO NOT EDIT.\n\npackage suffixarray\n")
+
+	for {
+		x := bytes.Index(data, []byte("\nfunc "))
+		if x < 0 {
+			break
+		}
+		data = data[x:]
+		p := bytes.IndexByte(data, '(')
+		if p < 0 {
+			p = len(data)
+		}
+		name := string(data[len("\nfunc "):p])
+
+		x = bytes.Index(data, []byte("\n}\n"))
+		if x < 0 {
+			log.Fatalf("cannot find end of func %s", name)
+		}
+		fn := string(data[:x+len("\n}\n")])
+		data = data[x+len("\n}"):]
+
+		if strings.HasSuffix(name, "_32") {
+			buf.WriteString(fix32.Replace(fn))
+		}
+		if strings.HasSuffix(name, "_8_32") {
+			// x_8_32 -> x_8_64 done above
+			fn = fix8_32.Replace(stripByteOnly(fn))
+			buf.WriteString(fn)
+			buf.WriteString(fix32.Replace(fn))
+		}
+	}
+
+	if err := ioutil.WriteFile("sais2.go", buf.Bytes(), 0666); err != nil {
+		log.Fatal(err)
+	}
+}
+
+var fix32 = strings.NewReplacer(
+	"32", "64",
+	"int32", "int64",
+)
+
+var fix8_32 = strings.NewReplacer(
+	"_8_32", "_32",
+	"byte", "int32",
+)
+
+func stripByteOnly(s string) string {
+	lines := strings.SplitAfter(s, "\n")
+	w := 0
+	for _, line := range lines {
+		if !strings.Contains(line, "256") && !strings.Contains(line, "byte-only") {
+			lines[w] = line
+			w++
+		}
+	}
+	return strings.Join(lines[:w], "")
+}
diff --git a/libgo/go/index/suffixarray/qsufsort.go b/libgo/go/index/suffixarray/qsufsort.go
deleted file mode 100644
index 9c36a98..0000000
--- a/libgo/go/index/suffixarray/qsufsort.go
+++ /dev/null
@@ -1,168 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This algorithm is based on "Faster Suffix Sorting"
-//   by N. Jesper Larsson and Kunihiko Sadakane
-// paper: http://www.larsson.dogma.net/ssrev-tr.pdf
-// code:  http://www.larsson.dogma.net/qsufsort.c
-
-// This algorithm computes the suffix array sa by computing its inverse.
-// Consecutive groups of suffixes in sa are labeled as sorted groups or
-// unsorted groups. For a given pass of the sorter, all suffixes are ordered
-// up to their first h characters, and sa is h-ordered. Suffixes in their
-// final positions and unambiguously sorted in h-order are in a sorted group.
-// Consecutive groups of suffixes with identical first h characters are an
-// unsorted group. In each pass of the algorithm, unsorted groups are sorted
-// according to the group number of their following suffix.
-
-// In the implementation, if sa[i] is negative, it indicates that i is
-// the first element of a sorted group of length -sa[i], and can be skipped.
-// An unsorted group sa[i:k] is given the group number of the index of its
-// last element, k-1. The group numbers are stored in the inverse slice (inv),
-// and when all groups are sorted, this slice is the inverse suffix array.
-
-package suffixarray
-
-import "sort"
-
-func qsufsort(data []byte) []int {
-	// initial sorting by first byte of suffix
-	sa := sortedByFirstByte(data)
-	if len(sa) < 2 {
-		return sa
-	}
-	// initialize the group lookup table
-	// this becomes the inverse of the suffix array when all groups are sorted
-	inv := initGroups(sa, data)
-
-	// the index starts 1-ordered
-	sufSortable := &suffixSortable{sa: sa, inv: inv, h: 1}
-
-	for sa[0] > -len(sa) { // until all suffixes are one big sorted group
-		// The suffixes are h-ordered, make them 2*h-ordered
-		pi := 0 // pi is first position of first group
-		sl := 0 // sl is negated length of sorted groups
-		for pi < len(sa) {
-			if s := sa[pi]; s < 0 { // if pi starts sorted group
-				pi -= s // skip over sorted group
-				sl += s // add negated length to sl
-			} else { // if pi starts unsorted group
-				if sl != 0 {
-					sa[pi+sl] = sl // combine sorted groups before pi
-					sl = 0
-				}
-				pk := inv[s] + 1 // pk-1 is last position of unsorted group
-				sufSortable.sa = sa[pi:pk]
-				sort.Sort(sufSortable)
-				sufSortable.updateGroups(pi)
-				pi = pk // next group
-			}
-		}
-		if sl != 0 { // if the array ends with a sorted group
-			sa[pi+sl] = sl // combine sorted groups at end of sa
-		}
-
-		sufSortable.h *= 2 // double sorted depth
-	}
-
-	for i := range sa { // reconstruct suffix array from inverse
-		sa[inv[i]] = i
-	}
-	return sa
-}
-
-func sortedByFirstByte(data []byte) []int {
-	// total byte counts
-	var count [256]int
-	for _, b := range data {
-		count[b]++
-	}
-	// make count[b] equal index of first occurrence of b in sorted array
-	sum := 0
-	for b := range count {
-		count[b], sum = sum, count[b]+sum
-	}
-	// iterate through bytes, placing index into the correct spot in sa
-	sa := make([]int, len(data))
-	for i, b := range data {
-		sa[count[b]] = i
-		count[b]++
-	}
-	return sa
-}
-
-func initGroups(sa []int, data []byte) []int {
-	// label contiguous same-letter groups with the same group number
-	inv := make([]int, len(data))
-	prevGroup := len(sa) - 1
-	groupByte := data[sa[prevGroup]]
-	for i := len(sa) - 1; i >= 0; i-- {
-		if b := data[sa[i]]; b < groupByte {
-			if prevGroup == i+1 {
-				sa[i+1] = -1
-			}
-			groupByte = b
-			prevGroup = i
-		}
-		inv[sa[i]] = prevGroup
-		if prevGroup == 0 {
-			sa[0] = -1
-		}
-	}
-	// Separate out the final suffix to the start of its group.
-	// This is necessary to ensure the suffix "a" is before "aba"
-	// when using a potentially unstable sort.
-	lastByte := data[len(data)-1]
-	s := -1
-	for i := range sa {
-		if sa[i] >= 0 {
-			if data[sa[i]] == lastByte && s == -1 {
-				s = i
-			}
-			if sa[i] == len(sa)-1 {
-				sa[i], sa[s] = sa[s], sa[i]
-				inv[sa[s]] = s
-				sa[s] = -1 // mark it as an isolated sorted group
-				break
-			}
-		}
-	}
-	return inv
-}
-
-type suffixSortable struct {
-	sa  []int
-	inv []int
-	h   int
-	buf []int // common scratch space
-}
-
-func (x *suffixSortable) Len() int           { return len(x.sa) }
-func (x *suffixSortable) Less(i, j int) bool { return x.inv[x.sa[i]+x.h] < x.inv[x.sa[j]+x.h] }
-func (x *suffixSortable) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
-
-func (x *suffixSortable) updateGroups(offset int) {
-	bounds := x.buf[0:0]
-	group := x.inv[x.sa[0]+x.h]
-	for i := 1; i < len(x.sa); i++ {
-		if g := x.inv[x.sa[i]+x.h]; g > group {
-			bounds = append(bounds, i)
-			group = g
-		}
-	}
-	bounds = append(bounds, len(x.sa))
-	x.buf = bounds
-
-	// update the group numberings after all new groups are determined
-	prev := 0
-	for _, b := range bounds {
-		for i := prev; i < b; i++ {
-			x.inv[x.sa[i]] = offset + b - 1
-		}
-		if b-prev == 1 {
-			x.sa[prev] = -1
-		}
-		prev = b
-	}
-}
diff --git a/libgo/go/index/suffixarray/sais.go b/libgo/go/index/suffixarray/sais.go
new file mode 100644
index 0000000..b4496d2
--- /dev/null
+++ b/libgo/go/index/suffixarray/sais.go
@@ -0,0 +1,899 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Suffix array construction by induced sorting (SAIS).
+// See Ge Nong, Sen Zhang, and Wai Hong Chen,
+// "Two Efficient Algorithms for Linear Time Suffix Array Construction",
+// especially section 3 (https://ieeexplore.ieee.org/document/5582081).
+// See also http://zork.net/~st/jottings/sais.html.
+//
+// With optimizations inspired by Yuta Mori's sais-lite
+// (https://sites.google.com/site/yuta256/sais).
+//
+// And with other new optimizations.
+
+// Many of these functions are parameterized by the sizes of
+// the types they operate on. The generator gen.go makes
+// copies of these functions for use with other sizes.
+// Specifically:
+//
+// - A function with a name ending in _8_32 takes []byte and []int32 arguments
+//   and is duplicated into _32_32, _8_64, and _64_64 forms.
+//   The _32_32 and _64_64_ suffixes are shortened to plain _32 and _64.
+//   Any lines in the function body that contain the text "byte-only" or "256"
+//   are stripped when creating _32_32 and _64_64 forms.
+//   (Those lines are typically 8-bit-specific optimizations.)
+//
+// - A function with a name ending only in _32 operates on []int32
+//   and is duplicated into a _64 form. (Note that it may still take a []byte,
+//   but there is no need for a version of the function in which the []byte
+//   is widened to a full integer array.)
+
+// The overall runtime of this code is linear in the input size:
+// it runs a sequence of linear passes to reduce the problem to
+// a subproblem at most half as big, invokes itself recursively,
+// and then runs a sequence of linear passes to turn the answer
+// for the subproblem into the answer for the original problem.
+// This gives T(N) = O(N) + T(N/2) = O(N) + O(N/2) + O(N/4) + ... = O(N).
+//
+// The outline of the code, with the forward and backward scans
+// through O(N)-sized arrays called out, is:
+//
+// sais_I_N
+//	placeLMS_I_B
+//		bucketMax_I_B
+//			freq_I_B
+//				<scan +text> (1)
+//			<scan +freq> (2)
+//		<scan -text, random bucket> (3)
+//	induceSubL_I_B
+//		bucketMin_I_B
+//			freq_I_B
+//				<scan +text, often optimized away> (4)
+//			<scan +freq> (5)
+//		<scan +sa, random text, random bucket> (6)
+//	induceSubS_I_B
+//		bucketMax_I_B
+//			freq_I_B
+//				<scan +text, often optimized away> (7)
+//			<scan +freq> (8)
+//		<scan -sa, random text, random bucket> (9)
+//	assignID_I_B
+//		<scan +sa, random text substrings> (10)
+//	map_B
+//		<scan -sa> (11)
+//	recurse_B
+//		(recursive call to sais_B_B for a subproblem of size at most 1/2 input, often much smaller)
+//	unmap_I_B
+//		<scan -text> (12)
+//		<scan +sa> (13)
+//	expand_I_B
+//		bucketMax_I_B
+//			freq_I_B
+//				<scan +text, often optimized away> (14)
+//			<scan +freq> (15)
+//		<scan -sa, random text, random bucket> (16)
+//	induceL_I_B
+//		bucketMin_I_B
+//			freq_I_B
+//				<scan +text, often optimized away> (17)
+//			<scan +freq> (18)
+//		<scan +sa, random text, random bucket> (19)
+//	induceS_I_B
+//		bucketMax_I_B
+//			freq_I_B
+//				<scan +text, often optimized away> (20)
+//			<scan +freq> (21)
+//		<scan -sa, random text, random bucket> (22)
+//
+// Here, _B indicates the suffix array size (_32 or _64) and _I the input size (_8 or _B).
+//
+// The outline shows there are in general 22 scans through
+// O(N)-sized arrays for a given level of the recursion.
+// In the top level, operating on 8-bit input text,
+// the six freq scans are fixed size (256) instead of potentially
+// input-sized. Also, the frequency is counted once and cached
+// whenever there is room to do so (there is nearly always room in general,
+// and always room at the top level), which eliminates all but
+// the first freq_I_B text scans (that is, 5 of the 6).
+// So the top level of the recursion only does 22 - 6 - 5 = 11
+// input-sized scans and a typical level does 16 scans.
+//
+// The linear scans do not cost anywhere near as much as
+// the random accesses to the text made during a few of
+// the scans (specifically #6, #9, #16, #19, #22 marked above).
+// In real texts, there is not much but some locality to
+// the accesses, due to the repetitive structure of the text
+// (the same reason Burrows-Wheeler compression is so effective).
+// For random inputs, there is no locality, which makes those
+// accesses even more expensive, especially once the text
+// no longer fits in cache.
+// For example, running on 50 MB of Go source code, induceSubL_8_32
+// (which runs only once, at the top level of the recursion)
+// takes 0.44s, while on 50 MB of random input, it takes 2.55s.
+// Nearly all the relative slowdown is explained by the text access:
+//
+//		c0, c1 := text[k-1], text[k]
+//
+// That line runs for 0.23s on the Go text and 2.02s on random text.
+
+//go:generate go run gen.go
+
+package suffixarray
+
+// text_32 returns the suffix array for the input text.
+// It requires that len(text) fit in an int32
+// and that the caller zero sa.
+func text_32(text []byte, sa []int32) {
+	if int(int32(len(text))) != len(text) || len(text) != len(sa) {
+		panic("suffixarray: misuse of text_32")
+	}
+	sais_8_32(text, 256, sa, make([]int32, 2*256))
+}
+
+// sais_8_32 computes the suffix array of text.
+// The text must contain only values in [0, textMax).
+// The suffix array is stored in sa, which the caller
+// must ensure is already zeroed.
+// The caller must also provide temporary space tmp
+// with len(tmp) ≥ textMax. If len(tmp) ≥ 2*textMax
+// then the algorithm runs a little faster.
+// If sais_8_32 modifies tmp, it sets tmp[0] = -1 on return.
+func sais_8_32(text []byte, textMax int, sa, tmp []int32) {
+	if len(sa) != len(text) || len(tmp) < int(textMax) {
+		panic("suffixarray: misuse of sais_8_32")
+	}
+
+	// Trivial base cases. Sorting 0 or 1 things is easy.
+	if len(text) == 0 {
+		return
+	}
+	if len(text) == 1 {
+		sa[0] = 0
+		return
+	}
+
+	// Establish slices indexed by text character
+	// holding character frequency and bucket-sort offsets.
+	// If there's only enough tmp for one slice,
+	// we make it the bucket offsets and recompute
+	// the character frequency each time we need it.
+	var freq, bucket []int32
+	if len(tmp) >= 2*textMax {
+		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
+		freq[0] = -1 // mark as uninitialized
+	} else {
+		freq, bucket = nil, tmp[:textMax]
+	}
+
+	// The SAIS algorithm.
+	// Each of these calls makes one scan through sa.
+	// See the individual functions for documentation
+	// about each's role in the algorithm.
+	numLMS := placeLMS_8_32(text, sa, freq, bucket)
+	if numLMS <= 1 {
+		// 0 or 1 items are already sorted. Do nothing.
+	} else {
+		induceSubL_8_32(text, sa, freq, bucket)
+		induceSubS_8_32(text, sa, freq, bucket)
+		length_8_32(text, sa, numLMS)
+		maxID := assignID_8_32(text, sa, numLMS)
+		if maxID < numLMS {
+			map_32(sa, numLMS)
+			recurse_32(sa, tmp, numLMS, maxID)
+			unmap_8_32(text, sa, numLMS)
+		} else {
+			// If maxID == numLMS, then each LMS-substring
+			// is unique, so the relative ordering of two LMS-suffixes
+			// is determined by just the leading LMS-substring.
+			// That is, the LMS-suffix sort order matches the
+			// (simpler) LMS-substring sort order.
+			// Copy the original LMS-substring order into the
+			// suffix array destination.
+			copy(sa, sa[len(sa)-numLMS:])
+		}
+		expand_8_32(text, freq, bucket, sa, numLMS)
+	}
+	induceL_8_32(text, sa, freq, bucket)
+	induceS_8_32(text, sa, freq, bucket)
+
+	// Mark for caller that we overwrote tmp.
+	tmp[0] = -1
+}
+
+// freq_8_32 returns the character frequencies
+// for text, as a slice indexed by character value.
+// If freq is nil, freq_8_32 uses and returns bucket.
+// If freq is non-nil, freq_8_32 assumes that freq[0] >= 0
+// means the frequencies are already computed.
+// If the frequency data is overwritten or uninitialized,
+// the caller must set freq[0] = -1 to force recomputation
+// the next time it is needed.
+func freq_8_32(text []byte, freq, bucket []int32) []int32 {
+	if freq != nil && freq[0] >= 0 {
+		return freq // already computed
+	}
+	if freq == nil {
+		freq = bucket
+	}
+
+	freq = freq[:256] // eliminate bounds check for freq[c] below
+	for i := range freq {
+		freq[i] = 0
+	}
+	for _, c := range text {
+		freq[c]++
+	}
+	return freq
+}
+
+// bucketMin_8_32 stores into bucket[c] the minimum index
+// in the bucket for character c in a bucket-sort of text.
+func bucketMin_8_32(text []byte, freq, bucket []int32) {
+	freq = freq_8_32(text, freq, bucket)
+	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
+	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
+	total := int32(0)
+	for i, n := range freq {
+		bucket[i] = total
+		total += n
+	}
+}
+
+// bucketMax_8_32 stores into bucket[c] the maximum index
+// in the bucket for character c in a bucket-sort of text.
+// The bucket indexes for c are [min, max).
+// That is, max is one past the final index in that bucket.
+func bucketMax_8_32(text []byte, freq, bucket []int32) {
+	freq = freq_8_32(text, freq, bucket)
+	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
+	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
+	total := int32(0)
+	for i, n := range freq {
+		total += n
+		bucket[i] = total
+	}
+}
+
+// The SAIS algorithm proceeds in a sequence of scans through sa.
+// Each of the following functions implements one scan,
+// and the functions appear here in the order they execute in the algorithm.
+
+// placeLMS_8_32 places into sa the indexes of the
+// final characters of the LMS substrings of text,
+// sorted into the rightmost ends of their correct buckets
+// in the suffix array.
+//
+// The imaginary sentinel character at the end of the text
+// is the final character of the final LMS substring, but there
+// is no bucket for the imaginary sentinel character,
+// which has a smaller value than any real character.
+// The caller must therefore pretend that sa[-1] == len(text).
+//
+// The text indexes of LMS-substring characters are always ≥ 1
+// (the first LMS-substring must be preceded by one or more L-type
+// characters that are not part of any LMS-substring),
+// so using 0 as a “not present” suffix array entry is safe,
+// both in this function and in most later functions
+// (until induceL_8_32 below).
+func placeLMS_8_32(text []byte, sa, freq, bucket []int32) int {
+	bucketMax_8_32(text, freq, bucket)
+
+	numLMS := 0
+	lastB := int32(-1)
+	bucket = bucket[:256] // eliminate bounds check for bucket[c1] below
+
+	// The next stanza of code (until the blank line) loop backward
+	// over text, stopping to execute a code body at each position i
+	// such that text[i] is an L-character and text[i+1] is an S-character.
+	// That is, i+1 is the position of the start of an LMS-substring.
+	// These could be hoisted out into a function with a callback,
+	// but at a significant speed cost. Instead, we just write these
+	// seven lines a few times in this source file. The copies below
+	// refer back to the pattern established by this original as the
+	// "LMS-substring iterator".
+	//
+	// In every scan through the text, c0, c1 are successive characters of text.
+	// In this backward scan, c0 == text[i] and c1 == text[i+1].
+	// By scanning backward, we can keep track of whether the current
+	// position is type-S or type-L according to the usual definition:
+	//
+	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
+	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
+	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
+	//
+	// The backward scan lets us maintain the current type,
+	// update it when we see c0 != c1, and otherwise leave it alone.
+	// We want to identify all S positions with a preceding L.
+	// Position len(text) is one such position by definition, but we have
+	// nowhere to write it down, so we eliminate it by untruthfully
+	// setting isTypeS = false at the start of the loop.
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Bucket the index i+1 for the start of an LMS-substring.
+			b := bucket[c1] - 1
+			bucket[c1] = b
+			sa[b] = int32(i + 1)
+			lastB = b
+			numLMS++
+		}
+	}
+
+	// We recorded the LMS-substring starts but really want the ends.
+	// Luckily, with two differences, the start indexes and the end indexes are the same.
+	// The first difference is that the rightmost LMS-substring's end index is len(text),
+	// so the caller must pretend that sa[-1] == len(text), as noted above.
+	// The second difference is that the first leftmost LMS-substring start index
+	// does not end an earlier LMS-substring, so as an optimization we can omit
+	// that leftmost LMS-substring start index (the last one we wrote).
+	//
+	// Exception: if numLMS <= 1, the caller is not going to bother with
+	// the recursion at all and will treat the result as containing LMS-substring starts.
+	// In that case, we don't remove the final entry.
+	if numLMS > 1 {
+		sa[lastB] = 0
+	}
+	return numLMS
+}
+
+// induceSubL_8_32 inserts the L-type text indexes of LMS-substrings
+// into sa, assuming that the final characters of the LMS-substrings
+// are already inserted into sa, sorted by final character, and at the
+// right (not left) end of the corresponding character bucket.
+// Each LMS-substring has the form (as a regexp) /S+L+S/:
+// one or more S-type, one or more L-type, final S-type.
+// induceSubL_8_32 leaves behind only the leftmost L-type text
+// index for each LMS-substring. That is, it removes the final S-type
+// indexes that are present on entry, and it inserts but then removes
+// the interior L-type indexes too.
+// (Only the leftmost L-type index is needed by induceSubS_8_32.)
+func induceSubL_8_32(text []byte, sa, freq, bucket []int32) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_8_32(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
+	// Because j-1 is type L, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type L from type S.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type S.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type S, at which point it must stop.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
+	// only the indexes of the leftmost L-type indexes for each LMS-substring.
+	//
+	// The suffix array sa therefore serves simultaneously as input, output,
+	// and a miraculously well-tailored work queue.
+
+	// placeLMS_8_32 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index:
+	// we're processing suffixes in sorted order
+	// and accessing buckets indexed by the
+	// byte before the sorted order, which still
+	// has very good locality.
+	// Invariant: b is cached, possibly dirty copy of bucket[cB].
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int32(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		if j < 0 {
+			// Leave discovered type-S index for caller.
+			sa[i] = int32(-j)
+			continue
+		}
+		sa[i] = 0
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		k := j - 1
+		c0, c1 := text[k-1], text[k]
+		if c0 < c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int32(k)
+		b++
+	}
+}
+
+// induceSubS_8_32 inserts the S-type text indexes of LMS-substrings
+// into sa, assuming that the leftmost L-type text indexes are already
+// inserted into sa, sorted by LMS-substring suffix, and at the
+// left end of the corresponding character bucket.
+// Each LMS-substring has the form (as a regexp) /S+L+S/:
+// one or more S-type, one or more L-type, final S-type.
+// induceSubS_8_32 leaves behind only the leftmost S-type text
+// index for each LMS-substring, in sorted order, at the right end of sa.
+// That is, it removes the L-type indexes that are present on entry,
+// and it inserts but then removes the interior S-type indexes too,
+// leaving the LMS-substring start indexes packed into sa[len(sa)-numLMS:].
+// (Only the LMS-substring start indexes are processed by the recursion.)
+func induceSubS_8_32(text []byte, sa, freq, bucket []int32) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_8_32(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// Analogous to induceSubL_8_32 above,
+	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
+	// Because j-1 is type S, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type S from type L.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type L.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type L, at which point it must stop.
+	// That index (preceded by one of type L) is an LMS-substring start.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
+	// so that the loop finishes with the top of sa containing exactly
+	// the LMS-substring start indexes, sorted by LMS-substring.
+
+	// Cache recently used bucket index:
+	cB := byte(0)
+	b := bucket[cB]
+
+	top := len(sa)
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		sa[i] = 0
+		if j < 0 {
+			// Leave discovered LMS-substring start index for caller.
+			top--
+			sa[top] = int32(-j)
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is S-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
+		k := j - 1
+		c1 := text[k]
+		c0 := text[k-1]
+		if c0 > c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int32(k)
+	}
+}
+
+// length_8_32 computes and records the length of each LMS-substring in text.
+// The length of the LMS-substring at index j is stored at sa[j/2],
+// avoiding the LMS-substring indexes already stored in the top half of sa.
+// (If index j is an LMS-substring start, then index j-1 is type L and cannot be.)
+// There are two exceptions, made for optimizations in name_8_32 below.
+//
+// First, the final LMS-substring is recorded as having length 0, which is otherwise
+// impossible, instead of giving it a length that includes the implicit sentinel.
+// This ensures the final LMS-substring has length unequal to all others
+// and therefore can be detected as different without text comparison
+// (it is unequal because it is the only one that ends in the implicit sentinel,
+// and the text comparison would be problematic since the implicit sentinel
+// is not actually present at text[len(text)]).
+//
+// Second, to avoid text comparison entirely, if an LMS-substring is very short,
+// sa[j/2] records its actual text instead of its length, so that if two such
+// substrings have matching “length,” the text need not be read at all.
+// The definition of “very short” is that the text bytes must pack into an uint32,
+// and the unsigned encoding e must be ≥ len(text), so that it can be
+// distinguished from a valid length.
+func length_8_32(text []byte, sa []int32, numLMS int) {
+	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
+
+	// The encoding of N text bytes into a “length” word
+	// adds 1 to each byte, packs them into the bottom
+	// N*8 bits of a word, and then bitwise inverts the result.
+	// That is, the text sequence A B C (hex 41 42 43)
+	// encodes as ^uint32(0x42_43_44).
+	// LMS-substrings can never start or end with 0xFF.
+	// Adding 1 ensures the encoded byte sequence never
+	// starts or ends with 0x00, so that present bytes can be
+	// distinguished from zero-padding in the top bits,
+	// so the length need not be separately encoded.
+	// Inverting the bytes increases the chance that a
+	// 4-byte encoding will still be ≥ len(text).
+	// In particular, if the first byte is ASCII (<= 0x7E, so +1 <= 0x7F)
+	// then the high bit of the inversion will be set,
+	// making it clearly not a valid length (it would be a negative one).
+	//
+	// cx holds the pre-inverted encoding (the packed incremented bytes).
+	cx := uint32(0) // byte-only
+
+	// This stanza (until the blank line) is the "LMS-substring iterator",
+	// described in placeLMS_8_32 above, with one line added to maintain cx.
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		cx = cx<<8 | uint32(c1+1) // byte-only
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Index j = i+1 is the start of an LMS-substring.
+			// Compute length or encoded text to store in sa[j/2].
+			j := i + 1
+			var code int32
+			if end == 0 {
+				code = 0
+			} else {
+				code = int32(end - j)
+				if code <= 32/8 && ^cx >= uint32(len(text)) { // byte-only
+					code = int32(^cx) // byte-only
+				} // byte-only
+			}
+			sa[j>>1] = code
+			end = j + 1
+			cx = uint32(c1 + 1) // byte-only
+		}
+	}
+}
+
+// assignID_8_32 assigns a dense ID numbering to the
+// set of LMS-substrings respecting string ordering and equality,
+// returning the maximum assigned ID.
+// For example given the input "ababab", the LMS-substrings
+// are "aba", "aba", and "ab", renumbered as 2 2 1.
+// sa[len(sa)-numLMS:] holds the LMS-substring indexes
+// sorted in string order, so to assign numbers we can
+// consider each in turn, removing adjacent duplicates.
+// The new ID for the LMS-substring at index j is written to sa[j/2],
+// overwriting the length previously stored there (by length_8_32 above).
+func assignID_8_32(text []byte, sa []int32, numLMS int) int {
+	id := 0
+	lastLen := int32(-1) // impossible
+	lastPos := int32(0)
+	for _, j := range sa[len(sa)-numLMS:] {
+		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
+		n := sa[j/2]
+		if n != lastLen {
+			goto New
+		}
+		if uint32(n) >= uint32(len(text)) {
+			// “Length” is really encoded full text, and they match.
+			goto Same
+		}
+		{
+			// Compare actual texts.
+			n := int(n)
+			this := text[j:][:n]
+			last := text[lastPos:][:n]
+			for i := 0; i < n; i++ {
+				if this[i] != last[i] {
+					goto New
+				}
+			}
+			goto Same
+		}
+	New:
+		id++
+		lastPos = j
+		lastLen = n
+	Same:
+		sa[j/2] = int32(id)
+	}
+	return id
+}
+
+// map_32 maps the LMS-substrings in text to their new IDs,
+// producing the subproblem for the recursion.
+// The mapping itself was mostly applied by assignID_8_32:
+// sa[i] is either 0, the ID for the LMS-substring at index 2*i,
+// or the ID for the LMS-substring at index 2*i+1.
+// To produce the subproblem we need only remove the zeros
+// and change ID into ID-1 (our IDs start at 1, but text chars start at 0).
+//
+// map_32 packs the result, which is the input to the recursion,
+// into the top of sa, so that the recursion result can be stored
+// in the bottom of sa, which sets up for expand_8_32 well.
+func map_32(sa []int32, numLMS int) {
+	w := len(sa)
+	for i := len(sa) / 2; i >= 0; i-- {
+		j := sa[i]
+		if j > 0 {
+			w--
+			sa[w] = j - 1
+		}
+	}
+}
+
+// recurse_32 calls sais_32 recursively to solve the subproblem we've built.
+// The subproblem is at the right end of sa, the suffix array result will be
+// written at the left end of sa, and the middle of sa is available for use as
+// temporary frequency and bucket storage.
+func recurse_32(sa, oldTmp []int32, numLMS, maxID int) {
+	dst, saTmp, text := sa[:numLMS], sa[numLMS:len(sa)-numLMS], sa[len(sa)-numLMS:]
+
+	// Set up temporary space for recursive call.
+	// We must pass sais_32 a tmp buffer wiith at least maxID entries.
+	//
+	// The subproblem is guaranteed to have length at most len(sa)/2,
+	// so that sa can hold both the subproblem and its suffix array.
+	// Nearly all the time, however, the subproblem has length < len(sa)/3,
+	// in which case there is a subproblem-sized middle of sa that
+	// we can reuse for temporary space (saTmp).
+	// When recurse_32 is called from sais_8_32, oldTmp is length 512
+	// (from text_32), and saTmp will typically be much larger, so we'll use saTmp.
+	// When deeper recursions come back to recurse_32, now oldTmp is
+	// the saTmp from the top-most recursion, it is typically larger than
+	// the current saTmp (because the current sa gets smaller and smaller
+	// as the recursion gets deeper), and we keep reusing that top-most
+	// large saTmp instead of the offered smaller ones.
+	//
+	// Why is the subproblem length so often just under len(sa)/3?
+	// See Nong, Zhang, and Chen, section 3.6 for a plausible explanation.
+	// In brief, the len(sa)/2 case would correspond to an SLSLSLSLSLSL pattern
+	// in the input, perfect alternation of larger and smaller input bytes.
+	// Real text doesn't do that. If each L-type index is randomly followed
+	// by either an L-type or S-type index, then half the substrings will
+	// be of the form SLS, but the other half will be longer. Of that half,
+	// half (a quarter overall) will be SLLS; an eighth will be SLLLS, and so on.
+	// Not counting the final S in each (which overlaps the first S in the next),
+	// This works out to an average length 2×½ + 3×¼ + 4×⅛ + ... = 3.
+	// The space we need is further reduced by the fact that many of the
+	// short patterns like SLS will often be the same character sequences
+	// repeated throughout the text, reducing maxID relative to numLMS.
+	//
+	// For short inputs, the averages may not run in our favor, but then we
+	// can often fall back to using the length-512 tmp available in the
+	// top-most call. (Also a short allocation would not be a big deal.)
+	//
+	// For pathological inputs, we fall back to allocating a new tmp of length
+	// max(maxID, numLMS/2). This level of the recursion needs maxID,
+	// and all deeper levels of the recursion will need no more than numLMS/2,
+	// so this one allocation is guaranteed to suffice for the entire stack
+	// of recursive calls.
+	tmp := oldTmp
+	if len(tmp) < len(saTmp) {
+		tmp = saTmp
+	}
+	if len(tmp) < numLMS {
+		// TestSAIS/forcealloc reaches this code.
+		n := maxID
+		if n < numLMS/2 {
+			n = numLMS / 2
+		}
+		tmp = make([]int32, n)
+	}
+
+	// sais_32 requires that the caller arrange to clear dst,
+	// because in general the caller may know dst is
+	// freshly-allocated and already cleared. But this one is not.
+	for i := range dst {
+		dst[i] = 0
+	}
+	sais_32(text, maxID, dst, tmp)
+}
+
+// unmap_8_32 unmaps the subproblem back to the original.
+// sa[:numLMS] is the LMS-substring numbers, which don't matter much anymore.
+// sa[len(sa)-numLMS:] is the sorted list of those LMS-substring numbers.
+// The key part is that if the list says K that means the K'th substring.
+// We can replace sa[:numLMS] with the indexes of the LMS-substrings.
+// Then if the list says K it really means sa[K].
+// Having mapped the list back to LMS-substring indexes,
+// we can place those into the right buckets.
+func unmap_8_32(text []byte, sa []int32, numLMS int) {
+	unmap := sa[len(sa)-numLMS:]
+	j := len(unmap)
+
+	// "LMS-substring iterator" (see placeLMS_8_32 above).
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Populate inverse map.
+			j--
+			unmap[j] = int32(i + 1)
+		}
+	}
+
+	// Apply inverse map to subproblem suffix array.
+	sa = sa[:numLMS]
+	for i := 0; i < len(sa); i++ {
+		sa[i] = unmap[sa[i]]
+	}
+}
+
+// expand_8_32 distributes the compacted, sorted LMS-suffix indexes
+// from sa[:numLMS] into the tops of the appropriate buckets in sa,
+// preserving the sorted order and making room for the L-type indexes
+// to be slotted into the sorted sequence by induceL_8_32.
+func expand_8_32(text []byte, freq, bucket, sa []int32, numLMS int) {
+	bucketMax_8_32(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bound check for bucket[c] below
+
+	// Loop backward through sa, always tracking
+	// the next index to populate from sa[:numLMS].
+	// When we get to one, populate it.
+	// Zero the rest of the slots; they have dead values in them.
+	x := numLMS - 1
+	saX := sa[x]
+	c := text[saX]
+	b := bucket[c] - 1
+	bucket[c] = b
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		if i != int(b) {
+			sa[i] = 0
+			continue
+		}
+		sa[i] = saX
+
+		// Load next entry to put down (if any).
+		if x > 0 {
+			x--
+			saX = sa[x] // TODO bounds check
+			c = text[saX]
+			b = bucket[c] - 1
+			bucket[c] = b
+		}
+	}
+}
+
+// induceL_8_32 inserts L-type text indexes into sa,
+// assuming that the leftmost S-type indexes are inserted
+// into sa, in sorted order, in the right bucket halves.
+// It leaves all the L-type indexes in sa, but the
+// leftmost L-type indexes are negated, to mark them
+// for processing by induceS_8_32.
+func induceL_8_32(text []byte, sa, freq, bucket []int32) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_8_32(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// This scan is similar to the one in induceSubL_8_32 above.
+	// That one arranges to clear all but the leftmost L-type indexes.
+	// This scan leaves all the L-type indexes and the original S-type
+	// indexes, but it negates the positive leftmost L-type indexes
+	// (the ones that induceS_8_32 needs to process).
+
+	// expand_8_32 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index.
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int32(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j <= 0 {
+			// Skip empty or negated entry (including negated zero).
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller. The caller can't tell the difference between
+		// an empty slot and a non-empty zero, but there's no need
+		// to distinguish them anyway: the final suffix array will end up
+		// with one zero somewhere, and that will be a real zero.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 < c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int32(k)
+		b++
+	}
+}
+
+func induceS_8_32(text []byte, sa, freq, bucket []int32) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_8_32(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	cB := byte(0)
+	b := bucket[cB]
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j >= 0 {
+			// Skip non-flagged entry.
+			// (This loop can't see an empty entry; 0 means the real zero index.)
+			continue
+		}
+
+		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
+		j = -j
+		sa[i] = int32(j)
+
+		// Index j was on work queue (encoded as -j but now decoded),
+		// meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue -k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 <= c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int32(k)
+	}
+}
diff --git a/libgo/go/index/suffixarray/sais2.go b/libgo/go/index/suffixarray/sais2.go
new file mode 100644
index 0000000..f124702
--- /dev/null
+++ b/libgo/go/index/suffixarray/sais2.go
@@ -0,0 +1,1741 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Code generated by go generate; DO NOT EDIT.
+
+package suffixarray
+
+func text_64(text []byte, sa []int64) {
+	if int(int64(len(text))) != len(text) || len(text) != len(sa) {
+		panic("suffixarray: misuse of text_64")
+	}
+	sais_8_64(text, 256, sa, make([]int64, 2*256))
+}
+
+func sais_8_64(text []byte, textMax int, sa, tmp []int64) {
+	if len(sa) != len(text) || len(tmp) < int(textMax) {
+		panic("suffixarray: misuse of sais_8_64")
+	}
+
+	// Trivial base cases. Sorting 0 or 1 things is easy.
+	if len(text) == 0 {
+		return
+	}
+	if len(text) == 1 {
+		sa[0] = 0
+		return
+	}
+
+	// Establish slices indexed by text character
+	// holding character frequency and bucket-sort offsets.
+	// If there's only enough tmp for one slice,
+	// we make it the bucket offsets and recompute
+	// the character frequency each time we need it.
+	var freq, bucket []int64
+	if len(tmp) >= 2*textMax {
+		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
+		freq[0] = -1 // mark as uninitialized
+	} else {
+		freq, bucket = nil, tmp[:textMax]
+	}
+
+	// The SAIS algorithm.
+	// Each of these calls makes one scan through sa.
+	// See the individual functions for documentation
+	// about each's role in the algorithm.
+	numLMS := placeLMS_8_64(text, sa, freq, bucket)
+	if numLMS <= 1 {
+		// 0 or 1 items are already sorted. Do nothing.
+	} else {
+		induceSubL_8_64(text, sa, freq, bucket)
+		induceSubS_8_64(text, sa, freq, bucket)
+		length_8_64(text, sa, numLMS)
+		maxID := assignID_8_64(text, sa, numLMS)
+		if maxID < numLMS {
+			map_64(sa, numLMS)
+			recurse_64(sa, tmp, numLMS, maxID)
+			unmap_8_64(text, sa, numLMS)
+		} else {
+			// If maxID == numLMS, then each LMS-substring
+			// is unique, so the relative ordering of two LMS-suffixes
+			// is determined by just the leading LMS-substring.
+			// That is, the LMS-suffix sort order matches the
+			// (simpler) LMS-substring sort order.
+			// Copy the original LMS-substring order into the
+			// suffix array destination.
+			copy(sa, sa[len(sa)-numLMS:])
+		}
+		expand_8_64(text, freq, bucket, sa, numLMS)
+	}
+	induceL_8_64(text, sa, freq, bucket)
+	induceS_8_64(text, sa, freq, bucket)
+
+	// Mark for caller that we overwrote tmp.
+	tmp[0] = -1
+}
+
+func sais_32(text []int32, textMax int, sa, tmp []int32) {
+	if len(sa) != len(text) || len(tmp) < int(textMax) {
+		panic("suffixarray: misuse of sais_32")
+	}
+
+	// Trivial base cases. Sorting 0 or 1 things is easy.
+	if len(text) == 0 {
+		return
+	}
+	if len(text) == 1 {
+		sa[0] = 0
+		return
+	}
+
+	// Establish slices indexed by text character
+	// holding character frequency and bucket-sort offsets.
+	// If there's only enough tmp for one slice,
+	// we make it the bucket offsets and recompute
+	// the character frequency each time we need it.
+	var freq, bucket []int32
+	if len(tmp) >= 2*textMax {
+		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
+		freq[0] = -1 // mark as uninitialized
+	} else {
+		freq, bucket = nil, tmp[:textMax]
+	}
+
+	// The SAIS algorithm.
+	// Each of these calls makes one scan through sa.
+	// See the individual functions for documentation
+	// about each's role in the algorithm.
+	numLMS := placeLMS_32(text, sa, freq, bucket)
+	if numLMS <= 1 {
+		// 0 or 1 items are already sorted. Do nothing.
+	} else {
+		induceSubL_32(text, sa, freq, bucket)
+		induceSubS_32(text, sa, freq, bucket)
+		length_32(text, sa, numLMS)
+		maxID := assignID_32(text, sa, numLMS)
+		if maxID < numLMS {
+			map_32(sa, numLMS)
+			recurse_32(sa, tmp, numLMS, maxID)
+			unmap_32(text, sa, numLMS)
+		} else {
+			// If maxID == numLMS, then each LMS-substring
+			// is unique, so the relative ordering of two LMS-suffixes
+			// is determined by just the leading LMS-substring.
+			// That is, the LMS-suffix sort order matches the
+			// (simpler) LMS-substring sort order.
+			// Copy the original LMS-substring order into the
+			// suffix array destination.
+			copy(sa, sa[len(sa)-numLMS:])
+		}
+		expand_32(text, freq, bucket, sa, numLMS)
+	}
+	induceL_32(text, sa, freq, bucket)
+	induceS_32(text, sa, freq, bucket)
+
+	// Mark for caller that we overwrote tmp.
+	tmp[0] = -1
+}
+
+func sais_64(text []int64, textMax int, sa, tmp []int64) {
+	if len(sa) != len(text) || len(tmp) < int(textMax) {
+		panic("suffixarray: misuse of sais_64")
+	}
+
+	// Trivial base cases. Sorting 0 or 1 things is easy.
+	if len(text) == 0 {
+		return
+	}
+	if len(text) == 1 {
+		sa[0] = 0
+		return
+	}
+
+	// Establish slices indexed by text character
+	// holding character frequency and bucket-sort offsets.
+	// If there's only enough tmp for one slice,
+	// we make it the bucket offsets and recompute
+	// the character frequency each time we need it.
+	var freq, bucket []int64
+	if len(tmp) >= 2*textMax {
+		freq, bucket = tmp[:textMax], tmp[textMax:2*textMax]
+		freq[0] = -1 // mark as uninitialized
+	} else {
+		freq, bucket = nil, tmp[:textMax]
+	}
+
+	// The SAIS algorithm.
+	// Each of these calls makes one scan through sa.
+	// See the individual functions for documentation
+	// about each's role in the algorithm.
+	numLMS := placeLMS_64(text, sa, freq, bucket)
+	if numLMS <= 1 {
+		// 0 or 1 items are already sorted. Do nothing.
+	} else {
+		induceSubL_64(text, sa, freq, bucket)
+		induceSubS_64(text, sa, freq, bucket)
+		length_64(text, sa, numLMS)
+		maxID := assignID_64(text, sa, numLMS)
+		if maxID < numLMS {
+			map_64(sa, numLMS)
+			recurse_64(sa, tmp, numLMS, maxID)
+			unmap_64(text, sa, numLMS)
+		} else {
+			// If maxID == numLMS, then each LMS-substring
+			// is unique, so the relative ordering of two LMS-suffixes
+			// is determined by just the leading LMS-substring.
+			// That is, the LMS-suffix sort order matches the
+			// (simpler) LMS-substring sort order.
+			// Copy the original LMS-substring order into the
+			// suffix array destination.
+			copy(sa, sa[len(sa)-numLMS:])
+		}
+		expand_64(text, freq, bucket, sa, numLMS)
+	}
+	induceL_64(text, sa, freq, bucket)
+	induceS_64(text, sa, freq, bucket)
+
+	// Mark for caller that we overwrote tmp.
+	tmp[0] = -1
+}
+
+func freq_8_64(text []byte, freq, bucket []int64) []int64 {
+	if freq != nil && freq[0] >= 0 {
+		return freq // already computed
+	}
+	if freq == nil {
+		freq = bucket
+	}
+
+	freq = freq[:256] // eliminate bounds check for freq[c] below
+	for i := range freq {
+		freq[i] = 0
+	}
+	for _, c := range text {
+		freq[c]++
+	}
+	return freq
+}
+
+func freq_32(text []int32, freq, bucket []int32) []int32 {
+	if freq != nil && freq[0] >= 0 {
+		return freq // already computed
+	}
+	if freq == nil {
+		freq = bucket
+	}
+
+	for i := range freq {
+		freq[i] = 0
+	}
+	for _, c := range text {
+		freq[c]++
+	}
+	return freq
+}
+
+func freq_64(text []int64, freq, bucket []int64) []int64 {
+	if freq != nil && freq[0] >= 0 {
+		return freq // already computed
+	}
+	if freq == nil {
+		freq = bucket
+	}
+
+	for i := range freq {
+		freq[i] = 0
+	}
+	for _, c := range text {
+		freq[c]++
+	}
+	return freq
+}
+
+func bucketMin_8_64(text []byte, freq, bucket []int64) {
+	freq = freq_8_64(text, freq, bucket)
+	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
+	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
+	total := int64(0)
+	for i, n := range freq {
+		bucket[i] = total
+		total += n
+	}
+}
+
+func bucketMin_32(text []int32, freq, bucket []int32) {
+	freq = freq_32(text, freq, bucket)
+	total := int32(0)
+	for i, n := range freq {
+		bucket[i] = total
+		total += n
+	}
+}
+
+func bucketMin_64(text []int64, freq, bucket []int64) {
+	freq = freq_64(text, freq, bucket)
+	total := int64(0)
+	for i, n := range freq {
+		bucket[i] = total
+		total += n
+	}
+}
+
+func bucketMax_8_64(text []byte, freq, bucket []int64) {
+	freq = freq_8_64(text, freq, bucket)
+	freq = freq[:256]     // establish len(freq) = 256, so 0 ≤ i < 256 below
+	bucket = bucket[:256] // eliminate bounds check for bucket[i] below
+	total := int64(0)
+	for i, n := range freq {
+		total += n
+		bucket[i] = total
+	}
+}
+
+func bucketMax_32(text []int32, freq, bucket []int32) {
+	freq = freq_32(text, freq, bucket)
+	total := int32(0)
+	for i, n := range freq {
+		total += n
+		bucket[i] = total
+	}
+}
+
+func bucketMax_64(text []int64, freq, bucket []int64) {
+	freq = freq_64(text, freq, bucket)
+	total := int64(0)
+	for i, n := range freq {
+		total += n
+		bucket[i] = total
+	}
+}
+
+func placeLMS_8_64(text []byte, sa, freq, bucket []int64) int {
+	bucketMax_8_64(text, freq, bucket)
+
+	numLMS := 0
+	lastB := int64(-1)
+	bucket = bucket[:256] // eliminate bounds check for bucket[c1] below
+
+	// The next stanza of code (until the blank line) loop backward
+	// over text, stopping to execute a code body at each position i
+	// such that text[i] is an L-character and text[i+1] is an S-character.
+	// That is, i+1 is the position of the start of an LMS-substring.
+	// These could be hoisted out into a function with a callback,
+	// but at a significant speed cost. Instead, we just write these
+	// seven lines a few times in this source file. The copies below
+	// refer back to the pattern established by this original as the
+	// "LMS-substring iterator".
+	//
+	// In every scan through the text, c0, c1 are successive characters of text.
+	// In this backward scan, c0 == text[i] and c1 == text[i+1].
+	// By scanning backward, we can keep track of whether the current
+	// position is type-S or type-L according to the usual definition:
+	//
+	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
+	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
+	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
+	//
+	// The backward scan lets us maintain the current type,
+	// update it when we see c0 != c1, and otherwise leave it alone.
+	// We want to identify all S positions with a preceding L.
+	// Position len(text) is one such position by definition, but we have
+	// nowhere to write it down, so we eliminate it by untruthfully
+	// setting isTypeS = false at the start of the loop.
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Bucket the index i+1 for the start of an LMS-substring.
+			b := bucket[c1] - 1
+			bucket[c1] = b
+			sa[b] = int64(i + 1)
+			lastB = b
+			numLMS++
+		}
+	}
+
+	// We recorded the LMS-substring starts but really want the ends.
+	// Luckily, with two differences, the start indexes and the end indexes are the same.
+	// The first difference is that the rightmost LMS-substring's end index is len(text),
+	// so the caller must pretend that sa[-1] == len(text), as noted above.
+	// The second difference is that the first leftmost LMS-substring start index
+	// does not end an earlier LMS-substring, so as an optimization we can omit
+	// that leftmost LMS-substring start index (the last one we wrote).
+	//
+	// Exception: if numLMS <= 1, the caller is not going to bother with
+	// the recursion at all and will treat the result as containing LMS-substring starts.
+	// In that case, we don't remove the final entry.
+	if numLMS > 1 {
+		sa[lastB] = 0
+	}
+	return numLMS
+}
+
+func placeLMS_32(text []int32, sa, freq, bucket []int32) int {
+	bucketMax_32(text, freq, bucket)
+
+	numLMS := 0
+	lastB := int32(-1)
+
+	// The next stanza of code (until the blank line) loop backward
+	// over text, stopping to execute a code body at each position i
+	// such that text[i] is an L-character and text[i+1] is an S-character.
+	// That is, i+1 is the position of the start of an LMS-substring.
+	// These could be hoisted out into a function with a callback,
+	// but at a significant speed cost. Instead, we just write these
+	// seven lines a few times in this source file. The copies below
+	// refer back to the pattern established by this original as the
+	// "LMS-substring iterator".
+	//
+	// In every scan through the text, c0, c1 are successive characters of text.
+	// In this backward scan, c0 == text[i] and c1 == text[i+1].
+	// By scanning backward, we can keep track of whether the current
+	// position is type-S or type-L according to the usual definition:
+	//
+	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
+	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
+	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
+	//
+	// The backward scan lets us maintain the current type,
+	// update it when we see c0 != c1, and otherwise leave it alone.
+	// We want to identify all S positions with a preceding L.
+	// Position len(text) is one such position by definition, but we have
+	// nowhere to write it down, so we eliminate it by untruthfully
+	// setting isTypeS = false at the start of the loop.
+	c0, c1, isTypeS := int32(0), int32(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Bucket the index i+1 for the start of an LMS-substring.
+			b := bucket[c1] - 1
+			bucket[c1] = b
+			sa[b] = int32(i + 1)
+			lastB = b
+			numLMS++
+		}
+	}
+
+	// We recorded the LMS-substring starts but really want the ends.
+	// Luckily, with two differences, the start indexes and the end indexes are the same.
+	// The first difference is that the rightmost LMS-substring's end index is len(text),
+	// so the caller must pretend that sa[-1] == len(text), as noted above.
+	// The second difference is that the first leftmost LMS-substring start index
+	// does not end an earlier LMS-substring, so as an optimization we can omit
+	// that leftmost LMS-substring start index (the last one we wrote).
+	//
+	// Exception: if numLMS <= 1, the caller is not going to bother with
+	// the recursion at all and will treat the result as containing LMS-substring starts.
+	// In that case, we don't remove the final entry.
+	if numLMS > 1 {
+		sa[lastB] = 0
+	}
+	return numLMS
+}
+
+func placeLMS_64(text []int64, sa, freq, bucket []int64) int {
+	bucketMax_64(text, freq, bucket)
+
+	numLMS := 0
+	lastB := int64(-1)
+
+	// The next stanza of code (until the blank line) loop backward
+	// over text, stopping to execute a code body at each position i
+	// such that text[i] is an L-character and text[i+1] is an S-character.
+	// That is, i+1 is the position of the start of an LMS-substring.
+	// These could be hoisted out into a function with a callback,
+	// but at a significant speed cost. Instead, we just write these
+	// seven lines a few times in this source file. The copies below
+	// refer back to the pattern established by this original as the
+	// "LMS-substring iterator".
+	//
+	// In every scan through the text, c0, c1 are successive characters of text.
+	// In this backward scan, c0 == text[i] and c1 == text[i+1].
+	// By scanning backward, we can keep track of whether the current
+	// position is type-S or type-L according to the usual definition:
+	//
+	//	- position len(text) is type S with text[len(text)] == -1 (the sentinel)
+	//	- position i is type S if text[i] < text[i+1], or if text[i] == text[i+1] && i+1 is type S.
+	//	- position i is type L if text[i] > text[i+1], or if text[i] == text[i+1] && i+1 is type L.
+	//
+	// The backward scan lets us maintain the current type,
+	// update it when we see c0 != c1, and otherwise leave it alone.
+	// We want to identify all S positions with a preceding L.
+	// Position len(text) is one such position by definition, but we have
+	// nowhere to write it down, so we eliminate it by untruthfully
+	// setting isTypeS = false at the start of the loop.
+	c0, c1, isTypeS := int64(0), int64(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Bucket the index i+1 for the start of an LMS-substring.
+			b := bucket[c1] - 1
+			bucket[c1] = b
+			sa[b] = int64(i + 1)
+			lastB = b
+			numLMS++
+		}
+	}
+
+	// We recorded the LMS-substring starts but really want the ends.
+	// Luckily, with two differences, the start indexes and the end indexes are the same.
+	// The first difference is that the rightmost LMS-substring's end index is len(text),
+	// so the caller must pretend that sa[-1] == len(text), as noted above.
+	// The second difference is that the first leftmost LMS-substring start index
+	// does not end an earlier LMS-substring, so as an optimization we can omit
+	// that leftmost LMS-substring start index (the last one we wrote).
+	//
+	// Exception: if numLMS <= 1, the caller is not going to bother with
+	// the recursion at all and will treat the result as containing LMS-substring starts.
+	// In that case, we don't remove the final entry.
+	if numLMS > 1 {
+		sa[lastB] = 0
+	}
+	return numLMS
+}
+
+func induceSubL_8_64(text []byte, sa, freq, bucket []int64) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_8_64(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
+	// Because j-1 is type L, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type L from type S.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type S.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type S, at which point it must stop.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
+	// only the indexes of the leftmost L-type indexes for each LMS-substring.
+	//
+	// The suffix array sa therefore serves simultaneously as input, output,
+	// and a miraculously well-tailored work queue.
+
+	// placeLMS_8_64 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index:
+	// we're processing suffixes in sorted order
+	// and accessing buckets indexed by the
+	// byte before the sorted order, which still
+	// has very good locality.
+	// Invariant: b is cached, possibly dirty copy of bucket[cB].
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int64(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		if j < 0 {
+			// Leave discovered type-S index for caller.
+			sa[i] = int64(-j)
+			continue
+		}
+		sa[i] = 0
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		k := j - 1
+		c0, c1 := text[k-1], text[k]
+		if c0 < c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int64(k)
+		b++
+	}
+}
+
+func induceSubL_32(text []int32, sa, freq, bucket []int32) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_32(text, freq, bucket)
+
+	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
+	// Because j-1 is type L, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type L from type S.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type S.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type S, at which point it must stop.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
+	// only the indexes of the leftmost L-type indexes for each LMS-substring.
+	//
+	// The suffix array sa therefore serves simultaneously as input, output,
+	// and a miraculously well-tailored work queue.
+
+	// placeLMS_32 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index:
+	// we're processing suffixes in sorted order
+	// and accessing buckets indexed by the
+	// int32 before the sorted order, which still
+	// has very good locality.
+	// Invariant: b is cached, possibly dirty copy of bucket[cB].
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int32(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		if j < 0 {
+			// Leave discovered type-S index for caller.
+			sa[i] = int32(-j)
+			continue
+		}
+		sa[i] = 0
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		k := j - 1
+		c0, c1 := text[k-1], text[k]
+		if c0 < c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int32(k)
+		b++
+	}
+}
+
+func induceSubL_64(text []int64, sa, freq, bucket []int64) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_64(text, freq, bucket)
+
+	// As we scan the array left-to-right, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type L.
+	// Because j-1 is type L, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type L from type S.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type S.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ > i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type S, at which point it must stop.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i], so that the loop finishes with sa containing
+	// only the indexes of the leftmost L-type indexes for each LMS-substring.
+	//
+	// The suffix array sa therefore serves simultaneously as input, output,
+	// and a miraculously well-tailored work queue.
+
+	// placeLMS_64 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index:
+	// we're processing suffixes in sorted order
+	// and accessing buckets indexed by the
+	// int64 before the sorted order, which still
+	// has very good locality.
+	// Invariant: b is cached, possibly dirty copy of bucket[cB].
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int64(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		if j < 0 {
+			// Leave discovered type-S index for caller.
+			sa[i] = int64(-j)
+			continue
+		}
+		sa[i] = 0
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		k := j - 1
+		c0, c1 := text[k-1], text[k]
+		if c0 < c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int64(k)
+		b++
+	}
+}
+
+func induceSubS_8_64(text []byte, sa, freq, bucket []int64) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_8_64(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// Analogous to induceSubL_8_64 above,
+	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
+	// Because j-1 is type S, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type S from type L.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type L.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type L, at which point it must stop.
+	// That index (preceded by one of type L) is an LMS-substring start.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
+	// so that the loop finishes with the top of sa containing exactly
+	// the LMS-substring start indexes, sorted by LMS-substring.
+
+	// Cache recently used bucket index:
+	cB := byte(0)
+	b := bucket[cB]
+
+	top := len(sa)
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		sa[i] = 0
+		if j < 0 {
+			// Leave discovered LMS-substring start index for caller.
+			top--
+			sa[top] = int64(-j)
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is S-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
+		k := j - 1
+		c1 := text[k]
+		c0 := text[k-1]
+		if c0 > c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int64(k)
+	}
+}
+
+func induceSubS_32(text []int32, sa, freq, bucket []int32) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_32(text, freq, bucket)
+
+	// Analogous to induceSubL_32 above,
+	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
+	// Because j-1 is type S, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type S from type L.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type L.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type L, at which point it must stop.
+	// That index (preceded by one of type L) is an LMS-substring start.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
+	// so that the loop finishes with the top of sa containing exactly
+	// the LMS-substring start indexes, sorted by LMS-substring.
+
+	// Cache recently used bucket index:
+	cB := int32(0)
+	b := bucket[cB]
+
+	top := len(sa)
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		sa[i] = 0
+		if j < 0 {
+			// Leave discovered LMS-substring start index for caller.
+			top--
+			sa[top] = int32(-j)
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is S-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
+		k := j - 1
+		c1 := text[k]
+		c0 := text[k-1]
+		if c0 > c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int32(k)
+	}
+}
+
+func induceSubS_64(text []int64, sa, freq, bucket []int64) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_64(text, freq, bucket)
+
+	// Analogous to induceSubL_64 above,
+	// as we scan the array right-to-left, each sa[i] = j > 0 is a correctly
+	// sorted suffix array entry (for text[j:]) for which we know that j-1 is type S.
+	// Because j-1 is type S, inserting it into sa now will sort it correctly.
+	// But we want to distinguish a j-1 with j-2 of type S from type L.
+	// We can process the former but want to leave the latter for the caller.
+	// We record the difference by negating j-1 if it is preceded by type L.
+	// Either way, the insertion (into the text[j-1] bucket) is guaranteed to
+	// happen at sa[i´] for some i´ < i, that is, in the portion of sa we have
+	// yet to scan. A single pass therefore sees indexes j, j-1, j-2, j-3,
+	// and so on, in sorted but not necessarily adjacent order, until it finds
+	// one preceded by an index of type L, at which point it must stop.
+	// That index (preceded by one of type L) is an LMS-substring start.
+	//
+	// As we scan through the array, we clear the worked entries (sa[i] > 0) to zero,
+	// and we flip sa[i] < 0 to -sa[i] and compact into the top of sa,
+	// so that the loop finishes with the top of sa containing exactly
+	// the LMS-substring start indexes, sorted by LMS-substring.
+
+	// Cache recently used bucket index:
+	cB := int64(0)
+	b := bucket[cB]
+
+	top := len(sa)
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j == 0 {
+			// Skip empty entry.
+			continue
+		}
+		sa[i] = 0
+		if j < 0 {
+			// Leave discovered LMS-substring start index for caller.
+			top--
+			sa[top] = int64(-j)
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is S-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue -k to save for the caller.
+		k := j - 1
+		c1 := text[k]
+		c0 := text[k-1]
+		if c0 > c1 {
+			k = -k
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int64(k)
+	}
+}
+
+func length_8_64(text []byte, sa []int64, numLMS int) {
+	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
+
+	// The encoding of N text bytes into a “length” word
+	// adds 1 to each byte, packs them into the bottom
+	// N*8 bits of a word, and then bitwise inverts the result.
+	// That is, the text sequence A B C (hex 41 42 43)
+	// encodes as ^uint64(0x42_43_44).
+	// LMS-substrings can never start or end with 0xFF.
+	// Adding 1 ensures the encoded byte sequence never
+	// starts or ends with 0x00, so that present bytes can be
+	// distinguished from zero-padding in the top bits,
+	// so the length need not be separately encoded.
+	// Inverting the bytes increases the chance that a
+	// 4-byte encoding will still be ≥ len(text).
+	// In particular, if the first byte is ASCII (<= 0x7E, so +1 <= 0x7F)
+	// then the high bit of the inversion will be set,
+	// making it clearly not a valid length (it would be a negative one).
+	//
+	// cx holds the pre-inverted encoding (the packed incremented bytes).
+	cx := uint64(0) // byte-only
+
+	// This stanza (until the blank line) is the "LMS-substring iterator",
+	// described in placeLMS_8_64 above, with one line added to maintain cx.
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		cx = cx<<8 | uint64(c1+1) // byte-only
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Index j = i+1 is the start of an LMS-substring.
+			// Compute length or encoded text to store in sa[j/2].
+			j := i + 1
+			var code int64
+			if end == 0 {
+				code = 0
+			} else {
+				code = int64(end - j)
+				if code <= 64/8 && ^cx >= uint64(len(text)) { // byte-only
+					code = int64(^cx) // byte-only
+				} // byte-only
+			}
+			sa[j>>1] = code
+			end = j + 1
+			cx = uint64(c1 + 1) // byte-only
+		}
+	}
+}
+
+func length_32(text []int32, sa []int32, numLMS int) {
+	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
+
+	// The encoding of N text int32s into a “length” word
+	// adds 1 to each int32, packs them into the bottom
+	// N*8 bits of a word, and then bitwise inverts the result.
+	// That is, the text sequence A B C (hex 41 42 43)
+	// encodes as ^uint32(0x42_43_44).
+	// LMS-substrings can never start or end with 0xFF.
+	// Adding 1 ensures the encoded int32 sequence never
+	// starts or ends with 0x00, so that present int32s can be
+	// distinguished from zero-padding in the top bits,
+	// so the length need not be separately encoded.
+	// Inverting the int32s increases the chance that a
+	// 4-int32 encoding will still be ≥ len(text).
+	// In particular, if the first int32 is ASCII (<= 0x7E, so +1 <= 0x7F)
+	// then the high bit of the inversion will be set,
+	// making it clearly not a valid length (it would be a negative one).
+	//
+	// cx holds the pre-inverted encoding (the packed incremented int32s).
+
+	// This stanza (until the blank line) is the "LMS-substring iterator",
+	// described in placeLMS_32 above, with one line added to maintain cx.
+	c0, c1, isTypeS := int32(0), int32(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Index j = i+1 is the start of an LMS-substring.
+			// Compute length or encoded text to store in sa[j/2].
+			j := i + 1
+			var code int32
+			if end == 0 {
+				code = 0
+			} else {
+				code = int32(end - j)
+			}
+			sa[j>>1] = code
+			end = j + 1
+		}
+	}
+}
+
+func length_64(text []int64, sa []int64, numLMS int) {
+	end := 0 // index of current LMS-substring end (0 indicates final LMS-substring)
+
+	// The encoding of N text int64s into a “length” word
+	// adds 1 to each int64, packs them into the bottom
+	// N*8 bits of a word, and then bitwise inverts the result.
+	// That is, the text sequence A B C (hex 41 42 43)
+	// encodes as ^uint64(0x42_43_44).
+	// LMS-substrings can never start or end with 0xFF.
+	// Adding 1 ensures the encoded int64 sequence never
+	// starts or ends with 0x00, so that present int64s can be
+	// distinguished from zero-padding in the top bits,
+	// so the length need not be separately encoded.
+	// Inverting the int64s increases the chance that a
+	// 4-int64 encoding will still be ≥ len(text).
+	// In particular, if the first int64 is ASCII (<= 0x7E, so +1 <= 0x7F)
+	// then the high bit of the inversion will be set,
+	// making it clearly not a valid length (it would be a negative one).
+	//
+	// cx holds the pre-inverted encoding (the packed incremented int64s).
+
+	// This stanza (until the blank line) is the "LMS-substring iterator",
+	// described in placeLMS_64 above, with one line added to maintain cx.
+	c0, c1, isTypeS := int64(0), int64(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Index j = i+1 is the start of an LMS-substring.
+			// Compute length or encoded text to store in sa[j/2].
+			j := i + 1
+			var code int64
+			if end == 0 {
+				code = 0
+			} else {
+				code = int64(end - j)
+			}
+			sa[j>>1] = code
+			end = j + 1
+		}
+	}
+}
+
+func assignID_8_64(text []byte, sa []int64, numLMS int) int {
+	id := 0
+	lastLen := int64(-1) // impossible
+	lastPos := int64(0)
+	for _, j := range sa[len(sa)-numLMS:] {
+		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
+		n := sa[j/2]
+		if n != lastLen {
+			goto New
+		}
+		if uint64(n) >= uint64(len(text)) {
+			// “Length” is really encoded full text, and they match.
+			goto Same
+		}
+		{
+			// Compare actual texts.
+			n := int(n)
+			this := text[j:][:n]
+			last := text[lastPos:][:n]
+			for i := 0; i < n; i++ {
+				if this[i] != last[i] {
+					goto New
+				}
+			}
+			goto Same
+		}
+	New:
+		id++
+		lastPos = j
+		lastLen = n
+	Same:
+		sa[j/2] = int64(id)
+	}
+	return id
+}
+
+func assignID_32(text []int32, sa []int32, numLMS int) int {
+	id := 0
+	lastLen := int32(-1) // impossible
+	lastPos := int32(0)
+	for _, j := range sa[len(sa)-numLMS:] {
+		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
+		n := sa[j/2]
+		if n != lastLen {
+			goto New
+		}
+		if uint32(n) >= uint32(len(text)) {
+			// “Length” is really encoded full text, and they match.
+			goto Same
+		}
+		{
+			// Compare actual texts.
+			n := int(n)
+			this := text[j:][:n]
+			last := text[lastPos:][:n]
+			for i := 0; i < n; i++ {
+				if this[i] != last[i] {
+					goto New
+				}
+			}
+			goto Same
+		}
+	New:
+		id++
+		lastPos = j
+		lastLen = n
+	Same:
+		sa[j/2] = int32(id)
+	}
+	return id
+}
+
+func assignID_64(text []int64, sa []int64, numLMS int) int {
+	id := 0
+	lastLen := int64(-1) // impossible
+	lastPos := int64(0)
+	for _, j := range sa[len(sa)-numLMS:] {
+		// Is the LMS-substring at index j new, or is it the same as the last one we saw?
+		n := sa[j/2]
+		if n != lastLen {
+			goto New
+		}
+		if uint64(n) >= uint64(len(text)) {
+			// “Length” is really encoded full text, and they match.
+			goto Same
+		}
+		{
+			// Compare actual texts.
+			n := int(n)
+			this := text[j:][:n]
+			last := text[lastPos:][:n]
+			for i := 0; i < n; i++ {
+				if this[i] != last[i] {
+					goto New
+				}
+			}
+			goto Same
+		}
+	New:
+		id++
+		lastPos = j
+		lastLen = n
+	Same:
+		sa[j/2] = int64(id)
+	}
+	return id
+}
+
+func map_64(sa []int64, numLMS int) {
+	w := len(sa)
+	for i := len(sa) / 2; i >= 0; i-- {
+		j := sa[i]
+		if j > 0 {
+			w--
+			sa[w] = j - 1
+		}
+	}
+}
+
+func recurse_64(sa, oldTmp []int64, numLMS, maxID int) {
+	dst, saTmp, text := sa[:numLMS], sa[numLMS:len(sa)-numLMS], sa[len(sa)-numLMS:]
+
+	// Set up temporary space for recursive call.
+	// We must pass sais_64 a tmp buffer wiith at least maxID entries.
+	//
+	// The subproblem is guaranteed to have length at most len(sa)/2,
+	// so that sa can hold both the subproblem and its suffix array.
+	// Nearly all the time, however, the subproblem has length < len(sa)/3,
+	// in which case there is a subproblem-sized middle of sa that
+	// we can reuse for temporary space (saTmp).
+	// When recurse_64 is called from sais_8_64, oldTmp is length 512
+	// (from text_64), and saTmp will typically be much larger, so we'll use saTmp.
+	// When deeper recursions come back to recurse_64, now oldTmp is
+	// the saTmp from the top-most recursion, it is typically larger than
+	// the current saTmp (because the current sa gets smaller and smaller
+	// as the recursion gets deeper), and we keep reusing that top-most
+	// large saTmp instead of the offered smaller ones.
+	//
+	// Why is the subproblem length so often just under len(sa)/3?
+	// See Nong, Zhang, and Chen, section 3.6 for a plausible explanation.
+	// In brief, the len(sa)/2 case would correspond to an SLSLSLSLSLSL pattern
+	// in the input, perfect alternation of larger and smaller input bytes.
+	// Real text doesn't do that. If each L-type index is randomly followed
+	// by either an L-type or S-type index, then half the substrings will
+	// be of the form SLS, but the other half will be longer. Of that half,
+	// half (a quarter overall) will be SLLS; an eighth will be SLLLS, and so on.
+	// Not counting the final S in each (which overlaps the first S in the next),
+	// This works out to an average length 2×½ + 3×¼ + 4×⅛ + ... = 3.
+	// The space we need is further reduced by the fact that many of the
+	// short patterns like SLS will often be the same character sequences
+	// repeated throughout the text, reducing maxID relative to numLMS.
+	//
+	// For short inputs, the averages may not run in our favor, but then we
+	// can often fall back to using the length-512 tmp available in the
+	// top-most call. (Also a short allocation would not be a big deal.)
+	//
+	// For pathological inputs, we fall back to allocating a new tmp of length
+	// max(maxID, numLMS/2). This level of the recursion needs maxID,
+	// and all deeper levels of the recursion will need no more than numLMS/2,
+	// so this one allocation is guaranteed to suffice for the entire stack
+	// of recursive calls.
+	tmp := oldTmp
+	if len(tmp) < len(saTmp) {
+		tmp = saTmp
+	}
+	if len(tmp) < numLMS {
+		// TestSAIS/forcealloc reaches this code.
+		n := maxID
+		if n < numLMS/2 {
+			n = numLMS / 2
+		}
+		tmp = make([]int64, n)
+	}
+
+	// sais_64 requires that the caller arrange to clear dst,
+	// because in general the caller may know dst is
+	// freshly-allocated and already cleared. But this one is not.
+	for i := range dst {
+		dst[i] = 0
+	}
+	sais_64(text, maxID, dst, tmp)
+}
+
+func unmap_8_64(text []byte, sa []int64, numLMS int) {
+	unmap := sa[len(sa)-numLMS:]
+	j := len(unmap)
+
+	// "LMS-substring iterator" (see placeLMS_8_64 above).
+	c0, c1, isTypeS := byte(0), byte(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Populate inverse map.
+			j--
+			unmap[j] = int64(i + 1)
+		}
+	}
+
+	// Apply inverse map to subproblem suffix array.
+	sa = sa[:numLMS]
+	for i := 0; i < len(sa); i++ {
+		sa[i] = unmap[sa[i]]
+	}
+}
+
+func unmap_32(text []int32, sa []int32, numLMS int) {
+	unmap := sa[len(sa)-numLMS:]
+	j := len(unmap)
+
+	// "LMS-substring iterator" (see placeLMS_32 above).
+	c0, c1, isTypeS := int32(0), int32(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Populate inverse map.
+			j--
+			unmap[j] = int32(i + 1)
+		}
+	}
+
+	// Apply inverse map to subproblem suffix array.
+	sa = sa[:numLMS]
+	for i := 0; i < len(sa); i++ {
+		sa[i] = unmap[sa[i]]
+	}
+}
+
+func unmap_64(text []int64, sa []int64, numLMS int) {
+	unmap := sa[len(sa)-numLMS:]
+	j := len(unmap)
+
+	// "LMS-substring iterator" (see placeLMS_64 above).
+	c0, c1, isTypeS := int64(0), int64(0), false
+	for i := len(text) - 1; i >= 0; i-- {
+		c0, c1 = text[i], c0
+		if c0 < c1 {
+			isTypeS = true
+		} else if c0 > c1 && isTypeS {
+			isTypeS = false
+
+			// Populate inverse map.
+			j--
+			unmap[j] = int64(i + 1)
+		}
+	}
+
+	// Apply inverse map to subproblem suffix array.
+	sa = sa[:numLMS]
+	for i := 0; i < len(sa); i++ {
+		sa[i] = unmap[sa[i]]
+	}
+}
+
+func expand_8_64(text []byte, freq, bucket, sa []int64, numLMS int) {
+	bucketMax_8_64(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bound check for bucket[c] below
+
+	// Loop backward through sa, always tracking
+	// the next index to populate from sa[:numLMS].
+	// When we get to one, populate it.
+	// Zero the rest of the slots; they have dead values in them.
+	x := numLMS - 1
+	saX := sa[x]
+	c := text[saX]
+	b := bucket[c] - 1
+	bucket[c] = b
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		if i != int(b) {
+			sa[i] = 0
+			continue
+		}
+		sa[i] = saX
+
+		// Load next entry to put down (if any).
+		if x > 0 {
+			x--
+			saX = sa[x] // TODO bounds check
+			c = text[saX]
+			b = bucket[c] - 1
+			bucket[c] = b
+		}
+	}
+}
+
+func expand_32(text []int32, freq, bucket, sa []int32, numLMS int) {
+	bucketMax_32(text, freq, bucket)
+
+	// Loop backward through sa, always tracking
+	// the next index to populate from sa[:numLMS].
+	// When we get to one, populate it.
+	// Zero the rest of the slots; they have dead values in them.
+	x := numLMS - 1
+	saX := sa[x]
+	c := text[saX]
+	b := bucket[c] - 1
+	bucket[c] = b
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		if i != int(b) {
+			sa[i] = 0
+			continue
+		}
+		sa[i] = saX
+
+		// Load next entry to put down (if any).
+		if x > 0 {
+			x--
+			saX = sa[x] // TODO bounds check
+			c = text[saX]
+			b = bucket[c] - 1
+			bucket[c] = b
+		}
+	}
+}
+
+func expand_64(text []int64, freq, bucket, sa []int64, numLMS int) {
+	bucketMax_64(text, freq, bucket)
+
+	// Loop backward through sa, always tracking
+	// the next index to populate from sa[:numLMS].
+	// When we get to one, populate it.
+	// Zero the rest of the slots; they have dead values in them.
+	x := numLMS - 1
+	saX := sa[x]
+	c := text[saX]
+	b := bucket[c] - 1
+	bucket[c] = b
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		if i != int(b) {
+			sa[i] = 0
+			continue
+		}
+		sa[i] = saX
+
+		// Load next entry to put down (if any).
+		if x > 0 {
+			x--
+			saX = sa[x] // TODO bounds check
+			c = text[saX]
+			b = bucket[c] - 1
+			bucket[c] = b
+		}
+	}
+}
+
+func induceL_8_64(text []byte, sa, freq, bucket []int64) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_8_64(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	// This scan is similar to the one in induceSubL_8_64 above.
+	// That one arranges to clear all but the leftmost L-type indexes.
+	// This scan leaves all the L-type indexes and the original S-type
+	// indexes, but it negates the positive leftmost L-type indexes
+	// (the ones that induceS_8_64 needs to process).
+
+	// expand_8_64 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index.
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int64(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j <= 0 {
+			// Skip empty or negated entry (including negated zero).
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller. The caller can't tell the difference between
+		// an empty slot and a non-empty zero, but there's no need
+		// to distinguish them anyway: the final suffix array will end up
+		// with one zero somewhere, and that will be a real zero.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 < c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int64(k)
+		b++
+	}
+}
+
+func induceL_32(text []int32, sa, freq, bucket []int32) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_32(text, freq, bucket)
+
+	// This scan is similar to the one in induceSubL_32 above.
+	// That one arranges to clear all but the leftmost L-type indexes.
+	// This scan leaves all the L-type indexes and the original S-type
+	// indexes, but it negates the positive leftmost L-type indexes
+	// (the ones that induceS_32 needs to process).
+
+	// expand_32 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index.
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int32(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j <= 0 {
+			// Skip empty or negated entry (including negated zero).
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller. The caller can't tell the difference between
+		// an empty slot and a non-empty zero, but there's no need
+		// to distinguish them anyway: the final suffix array will end up
+		// with one zero somewhere, and that will be a real zero.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 < c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int32(k)
+		b++
+	}
+}
+
+func induceL_64(text []int64, sa, freq, bucket []int64) {
+	// Initialize positions for left side of character buckets.
+	bucketMin_64(text, freq, bucket)
+
+	// This scan is similar to the one in induceSubL_64 above.
+	// That one arranges to clear all but the leftmost L-type indexes.
+	// This scan leaves all the L-type indexes and the original S-type
+	// indexes, but it negates the positive leftmost L-type indexes
+	// (the ones that induceS_64 needs to process).
+
+	// expand_64 left out the implicit entry sa[-1] == len(text),
+	// corresponding to the identified type-L index len(text)-1.
+	// Process it before the left-to-right scan of sa proper.
+	// See body in loop for commentary.
+	k := len(text) - 1
+	c0, c1 := text[k-1], text[k]
+	if c0 < c1 {
+		k = -k
+	}
+
+	// Cache recently used bucket index.
+	cB := c1
+	b := bucket[cB]
+	sa[b] = int64(k)
+	b++
+
+	for i := 0; i < len(sa); i++ {
+		j := int(sa[i])
+		if j <= 0 {
+			// Skip empty or negated entry (including negated zero).
+			continue
+		}
+
+		// Index j was on work queue, meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is L-type, queue k for processing later in this loop.
+		// If k-1 is S-type (text[k-1] < text[k]), queue -k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller. The caller can't tell the difference between
+		// an empty slot and a non-empty zero, but there's no need
+		// to distinguish them anyway: the final suffix array will end up
+		// with one zero somewhere, and that will be a real zero.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 < c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		sa[b] = int64(k)
+		b++
+	}
+}
+
+func induceS_8_64(text []byte, sa, freq, bucket []int64) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_8_64(text, freq, bucket)
+	bucket = bucket[:256] // eliminate bounds check for bucket[cB] below
+
+	cB := byte(0)
+	b := bucket[cB]
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j >= 0 {
+			// Skip non-flagged entry.
+			// (This loop can't see an empty entry; 0 means the real zero index.)
+			continue
+		}
+
+		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
+		j = -j
+		sa[i] = int64(j)
+
+		// Index j was on work queue (encoded as -j but now decoded),
+		// meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue -k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 <= c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int64(k)
+	}
+}
+
+func induceS_32(text []int32, sa, freq, bucket []int32) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_32(text, freq, bucket)
+
+	cB := int32(0)
+	b := bucket[cB]
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j >= 0 {
+			// Skip non-flagged entry.
+			// (This loop can't see an empty entry; 0 means the real zero index.)
+			continue
+		}
+
+		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
+		j = -j
+		sa[i] = int32(j)
+
+		// Index j was on work queue (encoded as -j but now decoded),
+		// meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue -k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 <= c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int32(k)
+	}
+}
+
+func induceS_64(text []int64, sa, freq, bucket []int64) {
+	// Initialize positions for right side of character buckets.
+	bucketMax_64(text, freq, bucket)
+
+	cB := int64(0)
+	b := bucket[cB]
+
+	for i := len(sa) - 1; i >= 0; i-- {
+		j := int(sa[i])
+		if j >= 0 {
+			// Skip non-flagged entry.
+			// (This loop can't see an empty entry; 0 means the real zero index.)
+			continue
+		}
+
+		// Negative j is a work queue entry; rewrite to positive j for final suffix array.
+		j = -j
+		sa[i] = int64(j)
+
+		// Index j was on work queue (encoded as -j but now decoded),
+		// meaning k := j-1 is L-type,
+		// so we can now place k correctly into sa.
+		// If k-1 is S-type, queue -k for processing later in this loop.
+		// If k-1 is L-type (text[k-1] > text[k]), queue k to save for the caller.
+		// If k is zero, k-1 doesn't exist, so we only need to leave it
+		// for the caller.
+		k := j - 1
+		c1 := text[k]
+		if k > 0 {
+			if c0 := text[k-1]; c0 <= c1 {
+				k = -k
+			}
+		}
+
+		if cB != c1 {
+			bucket[cB] = b
+			cB = c1
+			b = bucket[cB]
+		}
+		b--
+		sa[b] = int64(k)
+	}
+}
diff --git a/libgo/go/index/suffixarray/suffixarray.go b/libgo/go/index/suffixarray/suffixarray.go
index 0961ac4..9c169e7 100644
--- a/libgo/go/index/suffixarray/suffixarray.go
+++ b/libgo/go/index/suffixarray/suffixarray.go
@@ -19,21 +19,70 @@ package suffixarray
 import (
 	"bytes"
 	"encoding/binary"
+	"errors"
 	"io"
+	"math"
 	"regexp"
 	"sort"
 )
 
+// Can change for testing
+var maxData32 int = realMaxData32
+
+const realMaxData32 = math.MaxInt32
+
 // Index implements a suffix array for fast substring search.
 type Index struct {
 	data []byte
-	sa   []int // suffix array for data; len(sa) == len(data)
+	sa   ints // suffix array for data; sa.len() == len(data)
+}
+
+// An ints is either an []int32 or an []int64.
+// That is, one of them is empty, and one is the real data.
+// The int64 form is used when len(data) > maxData32
+type ints struct {
+	int32 []int32
+	int64 []int64
+}
+
+func (a *ints) len() int {
+	return len(a.int32) + len(a.int64)
+}
+
+func (a *ints) get(i int) int64 {
+	if a.int32 != nil {
+		return int64(a.int32[i])
+	}
+	return a.int64[i]
+}
+
+func (a *ints) set(i int, v int64) {
+	if a.int32 != nil {
+		a.int32[i] = int32(v)
+	} else {
+		a.int64[i] = v
+	}
+}
+
+func (a *ints) slice(i, j int) ints {
+	if a.int32 != nil {
+		return ints{a.int32[i:j], nil}
+	}
+	return ints{nil, a.int64[i:j]}
 }
 
 // New creates a new Index for data.
-// Index creation time is O(N*log(N)) for N = len(data).
+// Index creation time is O(N) for N = len(data).
 func New(data []byte) *Index {
-	return &Index{data, qsufsort(data)}
+	ix := &Index{data: data}
+	if len(data) <= maxData32 {
+		ix.sa.int32 = make([]int32, len(data))
+		text_32(data, ix.sa.int32)
+	} else {
+		ix.sa.int64 = make([]int64, len(data))
+		text_64(data, ix.sa.int64)
+	}
+	return ix
 }
 
 // writeInt writes an int x to w using buf to buffer the write.
@@ -44,19 +93,20 @@ func writeInt(w io.Writer, buf []byte, x int) error {
 }
 
 // readInt reads an int x from r using buf to buffer the read and returns x.
-func readInt(r io.Reader, buf []byte) (int, error) {
+func readInt(r io.Reader, buf []byte) (int64, error) {
 	_, err := io.ReadFull(r, buf[0:binary.MaxVarintLen64]) // ok to continue with error
 	x, _ := binary.Varint(buf)
-	return int(x), err
+	return x, err
 }
 
 // writeSlice writes data[:n] to w and returns n.
 // It uses buf to buffer the write.
-func writeSlice(w io.Writer, buf []byte, data []int) (n int, err error) {
+func writeSlice(w io.Writer, buf []byte, data ints) (n int, err error) {
 	// encode as many elements as fit into buf
 	p := binary.MaxVarintLen64
-	for ; n < len(data) && p+binary.MaxVarintLen64 <= len(buf); n++ {
-		p += binary.PutUvarint(buf[p:], uint64(data[n]))
+	m := data.len()
+	for ; n < m && p+binary.MaxVarintLen64 <= len(buf); n++ {
+		p += binary.PutUvarint(buf[p:], uint64(data.get(n)))
 	}
 
 	// update buffer size
@@ -67,15 +117,22 @@ func writeSlice(w io.Writer, buf []byte, data []int) (n int, err error) {
 	return
 }
 
+var errTooBig = errors.New("suffixarray: data too large")
+
 // readSlice reads data[:n] from r and returns n.
 // It uses buf to buffer the read.
-func readSlice(r io.Reader, buf []byte, data []int) (n int, err error) {
+func readSlice(r io.Reader, buf []byte, data ints) (n int, err error) {
 	// read buffer size
-	var size int
-	size, err = readInt(r, buf)
+	var size64 int64
+	size64, err = readInt(r, buf)
 	if err != nil {
 		return
 	}
+	if int64(int(size64)) != size64 || int(size64) < 0 {
+		// We never write chunks this big anyway.
+		return 0, errTooBig
+	}
+	size := int(size64)
 
 	// read buffer w/o the size
 	if _, err = io.ReadFull(r, buf[binary.MaxVarintLen64:size]); err != nil {
@@ -85,7 +142,7 @@ func readSlice(r io.Reader, buf []byte, data []int) (n int, err error) {
 	// decode as many elements as present in buf
 	for p := binary.MaxVarintLen64; p < size; n++ {
 		x, w := binary.Uvarint(buf[p:])
-		data[n] = int(x)
+		data.set(n, int64(x))
 		p += w
 	}
 
@@ -100,21 +157,31 @@ func (x *Index) Read(r io.Reader) error {
 	buf := make([]byte, bufSize)
 
 	// read length
-	n, err := readInt(r, buf)
+	n64, err := readInt(r, buf)
 	if err != nil {
 		return err
 	}
+	if int64(int(n64)) != n64 || int(n64) < 0 {
+		return errTooBig
+	}
+	n := int(n64)
 
 	// allocate space
-	if 2*n < cap(x.data) || cap(x.data) < n {
+	if 2*n < cap(x.data) || cap(x.data) < n || x.sa.int32 != nil && n > maxData32 || x.sa.int64 != nil && n <= maxData32 {
 		// new data is significantly smaller or larger than
 		// existing buffers - allocate new ones
 		x.data = make([]byte, n)
-		x.sa = make([]int, n)
+		x.sa.int32 = nil
+		x.sa.int64 = nil
+		if n <= maxData32 {
+			x.sa.int32 = make([]int32, n)
+		} else {
+			x.sa.int64 = make([]int64, n)
+		}
 	} else {
 		// re-use existing buffers
 		x.data = x.data[0:n]
-		x.sa = x.sa[0:n]
+		x.sa = x.sa.slice(0, n)
 	}
 
 	// read data
@@ -123,12 +190,13 @@ func (x *Index) Read(r io.Reader) error {
 	}
 
 	// read index
-	for sa := x.sa; len(sa) > 0; {
+	sa := x.sa
+	for sa.len() > 0 {
 		n, err := readSlice(r, buf, sa)
 		if err != nil {
 			return err
 		}
-		sa = sa[n:]
+		sa = sa.slice(n, sa.len())
 	}
 	return nil
 }
@@ -149,12 +217,13 @@ func (x *Index) Write(w io.Writer) error {
 	}
 
 	// write index
-	for sa := x.sa; len(sa) > 0; {
+	sa := x.sa
+	for sa.len() > 0 {
 		n, err := writeSlice(w, buf, sa)
 		if err != nil {
 			return err
 		}
-		sa = sa[n:]
+		sa = sa.slice(n, sa.len())
 	}
 	return nil
 }
@@ -167,18 +236,18 @@ func (x *Index) Bytes() []byte {
 }
 
 func (x *Index) at(i int) []byte {
-	return x.data[x.sa[i]:]
+	return x.data[x.sa.get(i):]
 }
 
 // lookupAll returns a slice into the matching region of the index.
 // The runtime is O(log(N)*len(s)).
-func (x *Index) lookupAll(s []byte) []int {
+func (x *Index) lookupAll(s []byte) ints {
 	// find matching suffix index range [i:j]
 	// find the first index where s would be the prefix
-	i := sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
+	i := sort.Search(x.sa.len(), func(i int) bool { return bytes.Compare(x.at(i), s) >= 0 })
 	// starting at i, find the first index at which s is not a prefix
-	j := i + sort.Search(len(x.sa)-i, func(j int) bool { return !bytes.HasPrefix(x.at(j+i), s) })
-	return x.sa[i:j]
+	j := i + sort.Search(x.sa.len()-i, func(j int) bool { return !bytes.HasPrefix(x.at(j+i), s) })
+	return x.sa.slice(i, j)
 }
 
 // Lookup returns an unsorted list of at most n indices where the byte string s
@@ -190,13 +259,22 @@ func (x *Index) lookupAll(s []byte) []int {
 func (x *Index) Lookup(s []byte, n int) (result []int) {
 	if len(s) > 0 && n != 0 {
 		matches := x.lookupAll(s)
-		if n < 0 || len(matches) < n {
-			n = len(matches)
+		count := matches.len()
+		if n < 0 || count < n {
+			n = count
 		}
-		// 0 <= n <= len(matches)
+		// 0 <= n <= count
 		if n > 0 {
 			result = make([]int, n)
-			copy(result, matches)
+			if matches.int32 != nil {
+				for i := range result {
+					result[i] = int(matches.int32[i])
+				}
+			} else {
+				for i := range result {
+					result[i] = int(matches.int64[i])
+				}
+			}
 		}
 	}
 	return
diff --git a/libgo/go/index/suffixarray/suffixarray_test.go b/libgo/go/index/suffixarray/suffixarray_test.go
index 644f00c..28090de 100644
--- a/libgo/go/index/suffixarray/suffixarray_test.go
+++ b/libgo/go/index/suffixarray/suffixarray_test.go
@@ -6,7 +6,11 @@ package suffixarray
 
 import (
 	"bytes"
+	"fmt"
+	"io/ioutil"
 	"math/rand"
+	"os"
+	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
@@ -207,10 +211,19 @@ func testLookups(t *testing.T, tc *testCase, x *Index, n int) {
 // index is used to hide the sort.Interface
 type index Index
 
-func (x *index) Len() int           { return len(x.sa) }
+func (x *index) Len() int           { return x.sa.len() }
 func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
-func (x *index) Swap(i, j int)      { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
-func (a *index) at(i int) []byte    { return a.data[a.sa[i]:] }
+func (x *index) Swap(i, j int) {
+	if x.sa.int32 != nil {
+		x.sa.int32[i], x.sa.int32[j] = x.sa.int32[j], x.sa.int32[i]
+	} else {
+		x.sa.int64[i], x.sa.int64[j] = x.sa.int64[j], x.sa.int64[i]
+	}
+}
+
+func (x *index) at(i int) []byte {
+	return x.data[x.sa.get(i):]
+}
 
 func testConstruction(t *testing.T, tc *testCase, x *Index) {
 	if !sort.IsSorted((*index)(x)) {
@@ -222,8 +235,12 @@ func equal(x, y *Index) bool {
 	if !bytes.Equal(x.data, y.data) {
 		return false
 	}
-	for i, j := range x.sa {
-		if j != y.sa[i] {
+	if x.sa.len() != y.sa.len() {
+		return false
+	}
+	n := x.sa.len()
+	for i := 0; i < n; i++ {
+		if x.sa.get(i) != y.sa.get(i) {
 			return false
 		}
 	}
@@ -238,16 +255,41 @@ func testSaveRestore(t *testing.T, tc *testCase, x *Index) int {
 	}
 	size := buf.Len()
 	var y Index
-	if err := y.Read(&buf); err != nil {
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
+		t.Errorf("failed reading index %s (%s)", tc.name, err)
+	}
+	if !equal(x, &y) {
+		t.Errorf("restored index doesn't match saved index %s", tc.name)
+	}
+
+	old := maxData32
+	defer func() {
+		maxData32 = old
+	}()
+	// Reread as forced 32.
+	y = Index{}
+	maxData32 = realMaxData32
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
 		t.Errorf("failed reading index %s (%s)", tc.name, err)
 	}
 	if !equal(x, &y) {
 		t.Errorf("restored index doesn't match saved index %s", tc.name)
 	}
+
+	// Reread as forced 64.
+	y = Index{}
+	maxData32 = -1
+	if err := y.Read(bytes.NewReader(buf.Bytes())); err != nil {
+		t.Errorf("failed reading index %s (%s)", tc.name, err)
+	}
+	if !equal(x, &y) {
+		t.Errorf("restored index doesn't match saved index %s", tc.name)
+	}
+
 	return size
 }
 
-func TestIndex(t *testing.T) {
+func testIndex(t *testing.T) {
 	for _, tc := range testCases {
 		x := New([]byte(tc.source))
 		testConstruction(t, &tc, x)
@@ -260,45 +302,314 @@ func TestIndex(t *testing.T) {
 	}
 }
 
+func TestIndex32(t *testing.T) {
+	testIndex(t)
+}
+
+func TestIndex64(t *testing.T) {
+	maxData32 = -1
+	defer func() {
+		maxData32 = realMaxData32
+	}()
+	testIndex(t)
+}
+
+func TestNew32(t *testing.T) {
+	test(t, func(x []byte) []int {
+		sa := make([]int32, len(x))
+		text_32(x, sa)
+		out := make([]int, len(sa))
+		for i, v := range sa {
+			out[i] = int(v)
+		}
+		return out
+	})
+}
+
+func TestNew64(t *testing.T) {
+	test(t, func(x []byte) []int {
+		sa := make([]int64, len(x))
+		text_64(x, sa)
+		out := make([]int, len(sa))
+		for i, v := range sa {
+			out[i] = int(v)
+		}
+		return out
+	})
+}
+
+// test tests an arbitrary suffix array construction function.
+// Generates many inputs, builds and checks suffix arrays.
+func test(t *testing.T, build func([]byte) []int) {
+	t.Run("ababab...", func(t *testing.T) {
+		// Very repetitive input has numLMS = len(x)/2-1
+		// at top level, the largest it can be.
+		// But maxID is only two (aba and ab$).
+		size := 100000
+		if testing.Short() {
+			size = 10000
+		}
+		x := make([]byte, size)
+		for i := range x {
+			x[i] = "ab"[i%2]
+		}
+		testSA(t, x, build)
+	})
+
+	t.Run("forcealloc", func(t *testing.T) {
+		// Construct a pathological input that forces
+		// recurse_32 to allocate a new temporary buffer.
+		// The input must have more than N/3 LMS-substrings,
+		// which we arrange by repeating an SLSLSLSLSLSL pattern
+		// like ababab... above, but then we must also arrange
+		// for a large number of distinct LMS-substrings.
+		// We use this pattern:
+		// 1 255 1 254 1 253 1 ... 1 2 1 255 2 254 2 253 2 252 2 ...
+		// This gives approximately 2¹⁵ distinct LMS-substrings.
+		// We need to repeat at least one substring, though,
+		// or else the recursion can be bypassed entirely.
+		x := make([]byte, 100000, 100001)
+		lo := byte(1)
+		hi := byte(255)
+		for i := range x {
+			if i%2 == 0 {
+				x[i] = lo
+			} else {
+				x[i] = hi
+				hi--
+				if hi <= lo {
+					lo++
+					if lo == 0 {
+						lo = 1
+					}
+					hi = 255
+				}
+			}
+		}
+		x[:cap(x)][len(x)] = 0 // for sais.New
+		testSA(t, x, build)
+	})
+
+	t.Run("exhaustive2", func(t *testing.T) {
+		// All inputs over {0,1} up to length 21.
+		// Runs in about 10 seconds on my laptop.
+		x := make([]byte, 30)
+		numFail := 0
+		for n := 0; n <= 21; n++ {
+			if n > 12 && testing.Short() {
+				break
+			}
+			x[n] = 0 // for sais.New
+			testRec(t, x[:n], 0, 2, &numFail, build)
+		}
+	})
+
+	t.Run("exhaustive3", func(t *testing.T) {
+		// All inputs over {0,1,2} up to length 14.
+		// Runs in about 10 seconds on my laptop.
+		x := make([]byte, 30)
+		numFail := 0
+		for n := 0; n <= 14; n++ {
+			if n > 8 && testing.Short() {
+				break
+			}
+			x[n] = 0 // for sais.New
+			testRec(t, x[:n], 0, 3, &numFail, build)
+		}
+	})
+}
+
+// testRec fills x[i:] with all possible combinations of values in [1,max]
+// and then calls testSA(t, x, build) for each one.
+func testRec(t *testing.T, x []byte, i, max int, numFail *int, build func([]byte) []int) {
+	if i < len(x) {
+		for x[i] = 1; x[i] <= byte(max); x[i]++ {
+			testRec(t, x, i+1, max, numFail, build)
+		}
+		return
+	}
+
+	if !testSA(t, x, build) {
+		*numFail++
+		if *numFail >= 10 {
+			t.Errorf("stopping after %d failures", *numFail)
+			t.FailNow()
+		}
+	}
+}
+
+// testSA tests the suffix array build function on the input x.
+// It constructs the suffix array and then checks that it is correct.
+func testSA(t *testing.T, x []byte, build func([]byte) []int) bool {
+	defer func() {
+		if e := recover(); e != nil {
+			t.Logf("build %v", x)
+			panic(e)
+		}
+	}()
+	sa := build(x)
+	if len(sa) != len(x) {
+		t.Errorf("build %v: len(sa) = %d, want %d", x, len(sa), len(x))
+		return false
+	}
+	for i := 0; i+1 < len(sa); i++ {
+		if sa[i] < 0 || sa[i] >= len(x) || sa[i+1] < 0 || sa[i+1] >= len(x) {
+			t.Errorf("build %s: sa out of range: %v\n", x, sa)
+			return false
+		}
+		if bytes.Compare(x[sa[i]:], x[sa[i+1]:]) >= 0 {
+			t.Errorf("build %v -> %v\nsa[%d:] = %d,%d out of order", x, sa, i, sa[i], sa[i+1])
+			return false
+		}
+	}
+
+	return true
+}
+
+var (
+	benchdata = make([]byte, 1e6)
+	benchrand = make([]byte, 1e6)
+)
+
 // Of all possible inputs, the random bytes have the least amount of substring
 // repetition, and the repeated bytes have the most. For most algorithms,
 // the running time of every input will be between these two.
 func benchmarkNew(b *testing.B, random bool) {
+	b.ReportAllocs()
 	b.StopTimer()
-	data := make([]byte, 1e6)
+	data := benchdata
 	if random {
-		for i := range data {
-			data[i] = byte(rand.Intn(256))
+		data = benchrand
+		if data[0] == 0 {
+			for i := range data {
+				data[i] = byte(rand.Intn(256))
+			}
 		}
 	}
 	b.StartTimer()
+	b.SetBytes(int64(len(data)))
 	for i := 0; i < b.N; i++ {
 		New(data)
 	}
 }
 
-func BenchmarkNewIndexRandom(b *testing.B) {
-	benchmarkNew(b, true)
+func makeText(name string) ([]byte, error) {
+	var data []byte
+	switch name {
+	case "opticks":
+		var err error
+		data, err = ioutil.ReadFile("../../testdata/Isaac.Newton-Opticks.txt")
+		if err != nil {
+			return nil, err
+		}
+	case "go":
+		err := filepath.Walk("../..", func(path string, info os.FileInfo, err error) error {
+			if err == nil && strings.HasSuffix(path, ".go") && !info.IsDir() {
+				file, err := ioutil.ReadFile(path)
+				if err != nil {
+					return err
+				}
+				data = append(data, file...)
+			}
+			return nil
+		})
+		if err != nil {
+			return nil, err
+		}
+	case "zero":
+		data = make([]byte, 50e6)
+	case "rand":
+		data = make([]byte, 50e6)
+		for i := range data {
+			data[i] = byte(rand.Intn(256))
+		}
+	}
+	return data, nil
 }
-func BenchmarkNewIndexRepeat(b *testing.B) {
-	benchmarkNew(b, false)
+
+func setBits(bits int) (cleanup func()) {
+	if bits == 32 {
+		maxData32 = realMaxData32
+	} else {
+		maxData32 = -1 // force use of 64-bit code
+	}
+	return func() {
+		maxData32 = realMaxData32
+	}
+}
+
+func BenchmarkNew(b *testing.B) {
+	for _, text := range []string{"opticks", "go", "zero", "rand"} {
+		b.Run("text="+text, func(b *testing.B) {
+			data, err := makeText(text)
+			if err != nil {
+				b.Fatal(err)
+			}
+			if testing.Short() && len(data) > 5e6 {
+				data = data[:5e6]
+			}
+			for _, size := range []int{100e3, 500e3, 1e6, 5e6, 10e6, 50e6} {
+				if len(data) < size {
+					continue
+				}
+				data := data[:size]
+				name := fmt.Sprintf("%dK", size/1e3)
+				if size >= 1e6 {
+					name = fmt.Sprintf("%dM", size/1e6)
+				}
+				b.Run("size="+name, func(b *testing.B) {
+					for _, bits := range []int{32, 64} {
+						if ^uint(0) == 0xffffffff && bits == 64 {
+							continue
+						}
+						b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
+							cleanup := setBits(bits)
+							defer cleanup()
+
+							b.SetBytes(int64(len(data)))
+							b.ReportAllocs()
+							for i := 0; i < b.N; i++ {
+								New(data)
+							}
+						})
+					}
+				})
+			}
+		})
+	}
 }
 
 func BenchmarkSaveRestore(b *testing.B) {
-	b.StopTimer()
 	r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
 	data := make([]byte, 1<<20)             // 1MB of data to index
 	for i := range data {
 		data[i] = byte(r.Intn(256))
 	}
-	x := New(data)
-	size := testSaveRestore(nil, nil, x)       // verify correctness
-	buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
-	b.SetBytes(int64(size))
-	b.StartTimer()
-	for i := 0; i < b.N; i++ {
-		x.Write(buf)
-		var y Index
-		y.Read(buf)
+	for _, bits := range []int{32, 64} {
+		if ^uint(0) == 0xffffffff && bits == 64 {
+			continue
+		}
+		b.Run(fmt.Sprintf("bits=%d", bits), func(b *testing.B) {
+			cleanup := setBits(bits)
+			defer cleanup()
+
+			b.StopTimer()
+			x := New(data)
+			size := testSaveRestore(nil, nil, x)       // verify correctness
+			buf := bytes.NewBuffer(make([]byte, size)) // avoid growing
+			b.SetBytes(int64(size))
+			b.StartTimer()
+			b.ReportAllocs()
+			for i := 0; i < b.N; i++ {
+				buf.Reset()
+				if err := x.Write(buf); err != nil {
+					b.Fatal(err)
+				}
+				var y Index
+				if err := y.Read(buf); err != nil {
+					b.Fatal(err)
+				}
+			}
+		})
 	}
 }
author	Ian Lance Taylor <iant@golang.org>	2019-09-06 18:12:46 +0000
committer	Ian Lance Taylor <ian@gcc.gnu.org>	2019-09-06 18:12:46 +0000
commit	aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13 (patch)
tree	7e63b06d1eec92beec6997c9d3ab47a5d6a835be /libgo/go/index
parent	920ea3b8ba3164b61ac9490dfdfceb6936eda6dd (diff)
download	gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.zip gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.gz gcc-aa8901e9bb0399d2c16f988ba2fe46eb0c0c5d13.tar.bz2