4 files changed, 116 insertions, 17 deletions
diff --git a/libgo/go/bufio/bufio.go b/libgo/go/bufio/bufio.go
index 3bbb933..6a70f70 100644
--- a/libgo/go/bufio/bufio.go
+++ b/libgo/go/bufio/bufio.go
@@ -179,7 +179,7 @@ func (b *Reader) Discard(n int) (discarded int, err error) {
 
 // Read reads data into p.
 // It returns the number of bytes read into p.
-// It calls Read at most once on the underlying Reader,
+// The bytes are taken from at most one Read on the underlying Reader,
 // hence n may be less than len(p).
 // At EOF, the count will be zero and err will be io.EOF.
 func (b *Reader) Read(p []byte) (n int, err error) {
diff --git a/libgo/go/bufio/example_test.go b/libgo/go/bufio/example_test.go
index 3da9141..4666e6d 100644
--- a/libgo/go/bufio/example_test.go
+++ b/libgo/go/bufio/example_test.go
@@ -80,3 +80,32 @@ func ExampleScanner_custom() {
 	// 5678
 	// Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range
 }
+
+// Use a Scanner with a custom split function to parse a comma-separated
+// list with an empty final value.
+func ExampleScanner_emptyFinalToken() {
+	// Comma-separated list; last entry is empty.
+	const input = "1,2,3,4,"
+	scanner := bufio.NewScanner(strings.NewReader(input))
+	// Define a split function that separates on commas.
+	onComma := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
+		for i := 0; i < len(data); i++ {
+			if data[i] == ',' {
+				return i + 1, data[:i], nil
+			}
+		}
+		// There is one final token to be delivered, which may be the empty string.
+		// Returning bufio.ErrFinalToken here tells Scan there are no more tokens after this
+		// but does not trigger an error to be returned from Scan itself.
+		return 0, data, bufio.ErrFinalToken
+	}
+	scanner.Split(onComma)
+	// Scan.
+	for scanner.Scan() {
+		fmt.Printf("%q ", scanner.Text())
+	}
+	if err := scanner.Err(); err != nil {
+		fmt.Fprintln(os.Stderr, "reading input:", err)
+	}
+	// Output: "1" "2" "3" "4" ""
+}
diff --git a/libgo/go/bufio/scan.go b/libgo/go/bufio/scan.go
index 7a349fa..27a0f00 100644
--- a/libgo/go/bufio/scan.go
+++ b/libgo/go/bufio/scan.go
@@ -37,6 +37,8 @@ type Scanner struct {
 	end          int       // End of data in buf.
 	err          error     // Sticky error.
 	empties      int       // Count of successive empty tokens.
+	scanCalled   bool      // Scan has been called; buffer is in use.
+	done         bool      // Scan has finished.
 }
 
 // SplitFunc is the signature of the split function used to tokenize the
@@ -65,10 +67,13 @@ var (
 )
 
 const (
-	// MaxScanTokenSize is the maximum size used to buffer a token.
+	// MaxScanTokenSize is the maximum size used to buffer a token
+	// unless the user provides an explicit buffer with Scan.Buffer.
 	// The actual maximum token size may be smaller as the buffer
 	// may need to include, for instance, a newline.
 	MaxScanTokenSize = 64 * 1024
+
+	startBufSize = 4096 // Size of initial allocation for buffer.
 )
 
 // NewScanner returns a new Scanner to read from r.
@@ -78,7 +83,6 @@ func NewScanner(r io.Reader) *Scanner {
 		r:            r,
 		split:        ScanLines,
 		maxTokenSize: MaxScanTokenSize,
-		buf:          make([]byte, 4096), // Plausible starting size; needn't be large.
 	}
 }
 
@@ -103,6 +107,16 @@ func (s *Scanner) Text() string {
 	return string(s.token)
 }
 
+// ErrFinalToken is a special sentinel error value. It is intended to be
+// returned by a Split function to indicate that the token being delivered
+// with the error is the last token and scanning should stop after this one.
+// After ErrFinalToken is received by Scan, scanning stops with no error.
+// The value is useful to stop processing early or when it is necessary to
+// deliver a final empty token. One could achieve the same behavior
+// with a custom error value but providing one here is tidier.
+// See the emptyFinalToken example for a use of this value.
+var ErrFinalToken = errors.New("final token")
+
 // Scan advances the Scanner to the next token, which will then be
 // available through the Bytes or Text method. It returns false when the
 // scan stops, either by reaching the end of the input or an error.
@@ -112,6 +126,10 @@ func (s *Scanner) Text() string {
 // Scan panics if the split function returns 100 empty tokens without
 // advancing the input. This is a common error mode for scanners.
 func (s *Scanner) Scan() bool {
+	if s.done {
+		return false
+	}
+	s.scanCalled = true
 	// Loop until we have a token.
 	for {
 		// See if we can get a token with what we already have.
@@ -120,6 +138,11 @@ func (s *Scanner) Scan() bool {
 		if s.end > s.start || s.err != nil {
 			advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil)
 			if err != nil {
+				if err == ErrFinalToken {
+					s.token = token
+					s.done = true
+					return true
+				}
 				s.setErr(err)
 				return false
 			}
@@ -158,11 +181,16 @@ func (s *Scanner) Scan() bool {
 		}
 		// Is the buffer full? If so, resize.
 		if s.end == len(s.buf) {
-			if len(s.buf) >= s.maxTokenSize {
+			// Guarantee no overflow in the multiplication below.
+			const maxInt = int(^uint(0) >> 1)
+			if len(s.buf) >= s.maxTokenSize || len(s.buf) > maxInt/2 {
 				s.setErr(ErrTooLong)
 				return false
 			}
 			newSize := len(s.buf) * 2
+			if newSize == 0 {
+				newSize = startBufSize
+			}
 			if newSize > s.maxTokenSize {
 				newSize = s.maxTokenSize
 			}
@@ -217,9 +245,31 @@ func (s *Scanner) setErr(err error) {
 	}
 }
 
-// Split sets the split function for the Scanner. If called, it must be
-// called before Scan. The default split function is ScanLines.
+// Buffer sets the initial buffer to use when scanning and the maximum
+// size of buffer that may be allocated during scanning. The maximum
+// token size is the larger of max and cap(buf). If max <= cap(buf),
+// Scan will use this buffer only and do no allocation.
+//
+// By default, Scan uses an internal buffer and sets the
+// maximum token size to MaxScanTokenSize.
+//
+// Buffer panics if it is called after scanning has started.
+func (s *Scanner) Buffer(buf []byte, max int) {
+	if s.scanCalled {
+		panic("Buffer called after Scan")
+	}
+	s.buf = buf[0:cap(buf)]
+	s.maxTokenSize = max
+}
+
+// Split sets the split function for the Scanner.
+// The default split function is ScanLines.
+//
+// Split panics if it is called after scanning has started.
 func (s *Scanner) Split(split SplitFunc) {
+	if s.scanCalled {
+		panic("Split called after Scan")
+	}
 	s.split = split
 }
 
diff --git a/libgo/go/bufio/scan_test.go b/libgo/go/bufio/scan_test.go
index eea87cb..07b1a56d 100644
--- a/libgo/go/bufio/scan_test.go
+++ b/libgo/go/bufio/scan_test.go
@@ -429,33 +429,37 @@ func commaSplit(data []byte, atEOF bool) (advance int, token []byte, err error)
 			return i + 1, data[:i], nil
 		}
 	}
-	if !atEOF {
-		return 0, nil, nil
-	}
-	return 0, data, nil
+	return 0, data, ErrFinalToken
 }
 
-func TestEmptyTokens(t *testing.T) {
-	s := NewScanner(strings.NewReader("1,2,3,"))
-	values := []string{"1", "2", "3", ""}
+func testEmptyTokens(t *testing.T, text string, values []string) {
+	s := NewScanner(strings.NewReader(text))
 	s.Split(commaSplit)
 	var i int
-	for i = 0; i < len(values); i++ {
-		if !s.Scan() {
-			break
+	for i = 0; s.Scan(); i++ {
+		if i >= len(values) {
+			t.Fatalf("got %d fields, expected %d", i+1, len(values))
 		}
 		if s.Text() != values[i] {
 			t.Errorf("%d: expected %q got %q", i, values[i], s.Text())
 		}
 	}
 	if i != len(values) {
-		t.Errorf("got %d fields, expected %d", i, len(values))
+		t.Fatalf("got %d fields, expected %d", i, len(values))
 	}
 	if err := s.Err(); err != nil {
 		t.Fatal(err)
 	}
 }
 
+func TestEmptyTokens(t *testing.T) {
+	testEmptyTokens(t, "1,2,3,", []string{"1", "2", "3", ""})
+}
+
+func TestWithNoEmptyTokens(t *testing.T) {
+	testEmptyTokens(t, "1,2,3", []string{"1", "2", "3"})
+}
+
 func loopAtEOFSplit(data []byte, atEOF bool) (advance int, token []byte, err error) {
 	if len(data) > 0 {
 		return 1, data[:1], nil
@@ -522,3 +526,19 @@ func TestEmptyLinesOK(t *testing.T) {
 		t.Fatalf("stopped with %d left to process", c)
 	}
 }
+
+// Make sure we can read a huge token if a big enough buffer is provided.
+func TestHugeBuffer(t *testing.T) {
+	text := strings.Repeat("x", 2*MaxScanTokenSize)
+	s := NewScanner(strings.NewReader(text + "\n"))
+	s.Buffer(make([]byte, 100), 3*MaxScanTokenSize)
+	for s.Scan() {
+		token := s.Text()
+		if token != text {
+			t.Errorf("scan got incorrect token of length %d", len(token))
+		}
+	}
+	if s.Err() != nil {
+		t.Fatal("after scan:", s.Err())
+	}
+}