aboutsummaryrefslogtreecommitdiff
path: root/libgo/go/index
diff options
context:
space:
mode:
authorIan Lance Taylor <ian@gcc.gnu.org>2010-12-03 04:34:57 +0000
committerIan Lance Taylor <ian@gcc.gnu.org>2010-12-03 04:34:57 +0000
commit7a9389330e91acc3ed05deac2d198af25d13cf3c (patch)
tree38fe54a4f38ede5d949c915d66191f24a6fe5153 /libgo/go/index
parent1aa6700378e5188a853c018256113ce6e1fb5c05 (diff)
downloadgcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.zip
gcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.tar.gz
gcc-7a9389330e91acc3ed05deac2d198af25d13cf3c.tar.bz2
Add Go frontend, libgo library, and Go testsuite.
gcc/: * gcc.c (default_compilers): Add entry for ".go". * common.opt: Add -static-libgo as a driver option. * doc/install.texi (Configuration): Mention libgo as an option for --enable-shared. Mention go as an option for --enable-languages. * doc/invoke.texi (Overall Options): Mention .go as a file name suffix. Mention go as a -x option. * doc/frontends.texi (G++ and GCC): Mention Go as a supported language. * doc/sourcebuild.texi (Top Level): Mention libgo. * doc/standards.texi (Standards): Add section on Go language. Move references for other languages into their own section. * doc/contrib.texi (Contributors): Mention that I contributed the Go frontend. gcc/testsuite/: * lib/go.exp: New file. * lib/go-dg.exp: New file. * lib/go-torture.exp: New file. * lib/target-supports.exp (check_compile): Match // Go. From-SVN: r167407
Diffstat (limited to 'libgo/go/index')
-rw-r--r--libgo/go/index/suffixarray/suffixarray.go111
-rw-r--r--libgo/go/index/suffixarray/suffixarray_test.go161
2 files changed, 272 insertions, 0 deletions
diff --git a/libgo/go/index/suffixarray/suffixarray.go b/libgo/go/index/suffixarray/suffixarray.go
new file mode 100644
index 0000000..0a17472
--- /dev/null
+++ b/libgo/go/index/suffixarray/suffixarray.go
@@ -0,0 +1,111 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The suffixarray package implements substring search in logarithmic time
+// using an in-memory suffix array.
+//
+// Example use:
+//
+// // create index for some data
+// index := suffixarray.New(data)
+//
+// // lookup byte slice s
+// offsets1 := index.Lookup(s, -1) // the list of all indices where s occurs in data
+// offsets2 := index.Lookup(s, 3) // the list of at most 3 indices where s occurs in data
+//
+package suffixarray
+
+import (
+ "bytes"
+ "container/vector"
+ "sort"
+)
+
+// BUG(gri): For larger data (10MB) which contains very long (say 100000)
+// contiguous sequences of identical bytes, index creation time will be extremely slow.
+
+// TODO(gri): Use a more sophisticated algorithm to create the suffix array.
+
+
+// Index implements a suffix array for fast substring search.
+type Index struct {
+ data []byte
+ sa []int // suffix array for data
+}
+
+
+// New creates a new Index for data.
+// Index creation time is approximately O(N*log(N)) for N = len(data).
+//
+func New(data []byte) *Index {
+ sa := make([]int, len(data))
+ for i, _ := range sa {
+ sa[i] = i
+ }
+ x := &Index{data, sa}
+ sort.Sort((*index)(x))
+ return x
+}
+
+
+func (x *Index) at(i int) []byte {
+ return x.data[x.sa[i]:]
+}
+
+
+// Binary search according to "A Method of Programming", E.W. Dijkstra.
+func (x *Index) search(s []byte) int {
+ i, j := 0, len(x.sa)
+ // i < j for non-empty x
+ for i+1 < j {
+ // 0 <= i < j <= len(x.sa) && (x.at(i) <= s < x.at(j) || (s is not in x))
+ h := i + (j-i)/2 // i < h < j
+ if bytes.Compare(x.at(h), s) <= 0 {
+ i = h
+ } else { // s < x.at(h)
+ j = h
+ }
+ }
+ // i+1 == j for non-empty x
+ return i
+}
+
+
+// Lookup returns an unsorted list of at most n indices where the byte string s
+// occurs in the indexed data. If n < 0, all occurrences are returned.
+// The result is nil if s is empty, s is not found, or n == 0.
+// Lookup time is O((log(N) + len(result))*len(s)) where N is the
+// size of the indexed data.
+//
+func (x *Index) Lookup(s []byte, n int) []int {
+ var res vector.IntVector
+
+ if len(s) > 0 && n != 0 {
+ // find matching suffix index i
+ i := x.search(s)
+ // x.at(i) <= s < x.at(i+1)
+
+ // ignore the first suffix if it is < s
+ if i < len(x.sa) && bytes.Compare(x.at(i), s) < 0 {
+ i++
+ }
+
+ // collect the following suffixes with matching prefixes
+ for (n < 0 || len(res) < n) && i < len(x.sa) && bytes.HasPrefix(x.at(i), s) {
+ res.Push(x.sa[i])
+ i++
+ }
+ }
+
+ return res
+}
+
+
+// index is used to hide the sort.Interface
+type index Index
+
+func (x *index) Len() int { return len(x.sa) }
+func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
+func (x *index) Swap(i, j int) { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
+func (a *index) at(i int) []byte { return a.data[a.sa[i]:] }
diff --git a/libgo/go/index/suffixarray/suffixarray_test.go b/libgo/go/index/suffixarray/suffixarray_test.go
new file mode 100644
index 0000000..8280750
--- /dev/null
+++ b/libgo/go/index/suffixarray/suffixarray_test.go
@@ -0,0 +1,161 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package suffixarray
+
+import (
+ "container/vector"
+ "sort"
+ "strings"
+ "testing"
+)
+
+
+type testCase struct {
+ name string // name of test case
+ source string // source to index
+ lookups []string // strings to lookup
+}
+
+
+var testCases = []testCase{
+ {
+ "empty string",
+ "",
+ []string{
+ "",
+ "foo",
+ },
+ },
+
+ {
+ "all a's",
+ "aaaaaaaaaa", // 10 a's
+ []string{
+ "",
+ "a",
+ "aa",
+ "aaa",
+ "aaaa",
+ "aaaaa",
+ "aaaaaa",
+ "aaaaaaa",
+ "aaaaaaaa",
+ "aaaaaaaaa",
+ "aaaaaaaaaa",
+ "aaaaaaaaaaa", // 11 a's
+ },
+ },
+
+ {
+ "abc",
+ "abc",
+ []string{
+ "a",
+ "b",
+ "c",
+ "ab",
+ "bc",
+ "abc",
+ },
+ },
+
+ {
+ "barbara*3",
+ "barbarabarbarabarbara",
+ []string{
+ "a",
+ "bar",
+ "rab",
+ "arab",
+ "barbar",
+ },
+ },
+
+ {
+ "typing drill",
+ "Now is the time for all good men to come to the aid of their country.",
+ []string{
+ "Now",
+ "the time",
+ "to come the aid",
+ "is the time for all good men to come to the aid of their",
+ },
+ },
+}
+
+
+// find all occurences of s in source; report at most n occurences
+func find(src, s string, n int) []int {
+ var res vector.IntVector
+ if s != "" && n != 0 {
+ // find at most n occurences of s in src
+ for i := -1; n < 0 || len(res) < n; {
+ j := strings.Index(src[i+1:], s)
+ if j < 0 {
+ break
+ }
+ i += j + 1
+ res.Push(i)
+ }
+ }
+ return res
+}
+
+
+func testLookups(t *testing.T, src string, x *Index, tc *testCase, n int) {
+ for _, s := range tc.lookups {
+ res := x.Lookup([]byte(s), n)
+ exp := find(tc.source, s, n)
+
+ // check that the lengths match
+ if len(res) != len(exp) {
+ t.Errorf("test %q, lookup %q (n = %d): expected %d results; got %d", tc.name, s, n, len(exp), len(res))
+ }
+
+ // if n >= 0 the number of results is limited --- unless n >= all results,
+ // we may obtain different positions from the Index and from find (because
+ // Index may not find the results in the same order as find) => in general
+ // we cannot simply check that the res and exp lists are equal
+
+ // check that there are no duplicates
+ sort.SortInts(res)
+ for i, r := range res {
+ if i > 0 && res[i-1] == r {
+ t.Errorf("test %q, lookup %q, result %d (n = %d): found duplicate index %d", tc.name, s, i, n, r)
+ }
+ }
+
+ // check that each result is in fact a correct match
+ for i, r := range res {
+ if r < 0 || len(src) <= r {
+ t.Errorf("test %q, lookup %q, result %d (n = %d): index %d out of range [0, %d[", tc.name, s, i, n, r, len(src))
+ } else if !strings.HasPrefix(src[r:], s) {
+ t.Errorf("test %q, lookup %q, result %d (n = %d): index %d not a match", tc.name, s, i, n, r)
+ }
+ }
+
+ if n < 0 {
+ // all results computed - sorted res and exp must be equal
+ for i, r := range res {
+ e := exp[i]
+ if r != e {
+ t.Errorf("test %q, lookup %q, result %d: expected index %d; got %d", tc.name, s, i, e, r)
+ continue
+ }
+ }
+ }
+ }
+}
+
+
+func TestIndex(t *testing.T) {
+ for _, tc := range testCases {
+ x := New([]byte(tc.source))
+ testLookups(t, tc.source, x, &tc, 0)
+ testLookups(t, tc.source, x, &tc, 1)
+ testLookups(t, tc.source, x, &tc, 10)
+ testLookups(t, tc.source, x, &tc, -1)
+ }
+}