diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-03-16 23:05:44 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-03-16 23:05:44 +0000 |
commit | 5133f00ef8baab894d92de1e8b8baae59815a8b6 (patch) | |
tree | 44176975832a3faf1626836e70c97d5edd674122 /libgo/go/fmt | |
parent | f617201f55938fc89b532f2240bdf77bea946471 (diff) | |
download | gcc-5133f00ef8baab894d92de1e8b8baae59815a8b6.zip gcc-5133f00ef8baab894d92de1e8b8baae59815a8b6.tar.gz gcc-5133f00ef8baab894d92de1e8b8baae59815a8b6.tar.bz2 |
Update to current version of Go library (revision 94d654be2064).
From-SVN: r171076
Diffstat (limited to 'libgo/go/fmt')
-rw-r--r-- | libgo/go/fmt/doc.go | 48 | ||||
-rw-r--r-- | libgo/go/fmt/fmt_test.go | 21 | ||||
-rw-r--r-- | libgo/go/fmt/print.go | 61 | ||||
-rw-r--r-- | libgo/go/fmt/scan.go | 324 | ||||
-rw-r--r-- | libgo/go/fmt/scan_test.go | 243 |
5 files changed, 513 insertions, 184 deletions
diff --git a/libgo/go/fmt/doc.go b/libgo/go/fmt/doc.go index 191bf68..77ee62b 100644 --- a/libgo/go/fmt/doc.go +++ b/libgo/go/fmt/doc.go @@ -16,6 +16,7 @@ when printing structs, the plus flag (%+v) adds field names %#v a Go-syntax representation of the value %T a Go-syntax representation of the type of the value + %% a literal percent sign; consumes no value Boolean: %t the word true or false @@ -26,8 +27,10 @@ %o base 8 %x base 16, with lower-case letters for a-f %X base 16, with upper-case letters for A-F - %U unicode format: U+1234; same as "U+%x" with 4 digits default + %U Unicode format: U+1234; same as "U+%x" with 4 digits default Floating-point and complex constituents: + %b decimalless scientific notation with exponent a power + of two, in the manner of strconv.Ftoa32, e.g. -123456p-78 %e scientific notation, e.g. -1234.456e+78 %E scientific notation, e.g. -1234.456E+78 %f decimal point but no exponent, e.g. 123.456 @@ -44,14 +47,19 @@ There is no 'u' flag. Integers are printed unsigned if they have unsigned type. Similarly, there is no need to specify the size of the operand (int8, int64). - For numeric values, the width and precision flags control - formatting; width sets the width of the field, precision the - number of places after the decimal, if appropriate. The - format %6.2f prints 123.45. The width of a field is the number - of Unicode code points in the string. This differs from C's printf where - the field width is the number of bytes. Either or both of the - flags may be replaced with the character '*', causing their values - to be obtained from the next operand, which must be of type int. + The width and precision control formatting and are in units of Unicode + code points. (This differs from C's printf where the units are numbers + of bytes.) Either or both of the flags may be replaced with the + character '*', causing their values to be obtained from the next + operand, which must be of type int. + + For numeric values, width sets the width of the field and precision + sets the number of places after the decimal, if appropriate. For + example, the format %6.2f prints 123.45. + + For strings, width is the minimum number of characters to output, + padding with spaces if necessary, and precision is the maximum + number of characters to output, truncating if necessary. Other flags: + always print a sign for numeric values @@ -112,7 +120,7 @@ An analogous set of functions scans formatted text to yield values. Scan, Scanf and Scanln read from os.Stdin; Fscan, Fscanf and Fscanln read from a specified os.Reader; Sscan, - Sscanf and Sscanln read from an argument string. Sscanln, + Sscanf and Sscanln read from an argument string. Scanln, Fscanln and Sscanln stop scanning at a newline and require that the items be followed by one; Sscanf, Fscanf and Sscanf require newlines in the input to match newlines in the format; the other @@ -131,6 +139,10 @@ %e %E %f %F %g %g are all equivalent and scan any floating point or complex value %s and %v on strings scan a space-delimited token + The familiar base-setting prefixes 0 (octal) and 0x + (hexadecimal) are accepted when scanning integers without a + format or with the %v verb. + Width is interpreted in the input text (%5s means at most five runes of input will be read to scan a string) but there is no syntax for scanning with a precision (no %5.2f, just @@ -152,13 +164,15 @@ All arguments to be scanned must be either pointers to basic types or implementations of the Scanner interface. - Note: Fscan etc. can read one character (rune) past the - input they return, which means that a loop calling a scan - routine may skip some of the input. This is usually a - problem only when there is no space between input values. - However, if the reader provided to Fscan implements UnreadRune, + Note: Fscan etc. can read one character (rune) past the input + they return, which means that a loop calling a scan routine + may skip some of the input. This is usually a problem only + when there is no space between input values. If the reader + provided to Fscan implements ReadRune, that method will be used + to read characters. If the reader also implements UnreadRune, that method will be used to save the character and successive - calls will not lose data. To attach an UnreadRune method - to a reader without that capability, use bufio.NewReader. + calls will not lose data. To attach ReadRune and UnreadRune + methods to a reader without that capability, use + bufio.NewReader. */ package fmt diff --git a/libgo/go/fmt/fmt_test.go b/libgo/go/fmt/fmt_test.go index 3f085b7..c8aa609 100644 --- a/libgo/go/fmt/fmt_test.go +++ b/libgo/go/fmt/fmt_test.go @@ -311,9 +311,9 @@ var fmttests = []struct { // go syntax {"%#v", A{1, 2, "a", []int{1, 2}}, `fmt_test.A{i:1, j:0x2, s:"a", x:[]int{1, 2}}`}, - {"%#v", &b, "(*uint8)(PTR)"}, - {"%#v", TestFmtInterface, "(func(*testing.T))(PTR)"}, - {"%#v", make(chan int), "(chan int)(PTR)"}, + {"%#v", &b, "(*uint8)(0xPTR)"}, + {"%#v", TestFmtInterface, "(func(*testing.T))(0xPTR)"}, + {"%#v", make(chan int), "(chan int)(0xPTR)"}, {"%#v", uint64(1<<64 - 1), "0xffffffffffffffff"}, {"%#v", 1000000000, "1000000000"}, {"%#v", map[string]int{"a": 1, "b": 2}, `map[string] int{"a":1, "b":2}`}, @@ -365,14 +365,15 @@ var fmttests = []struct { {"%6T", &intVal, " *int"}, // %p - {"p0=%p", new(int), "p0=PTR"}, + {"p0=%p", new(int), "p0=0xPTR"}, {"p1=%s", &pValue, "p1=String(p)"}, // String method... - {"p2=%p", &pValue, "p2=PTR"}, // ... not called with %p + {"p2=%p", &pValue, "p2=0xPTR"}, // ... not called with %p + {"p4=%#p", new(int), "p4=PTR"}, // %p on non-pointers - {"%p", make(chan int), "PTR"}, - {"%p", make(map[int]int), "PTR"}, - {"%p", make([]int, 1), "PTR"}, + {"%p", make(chan int), "0xPTR"}, + {"%p", make(map[int]int), "0xPTR"}, + {"%p", make([]int, 1), "0xPTR"}, {"%p", 27, "%!p(int=27)"}, // not a pointer at all // erroneous things @@ -388,8 +389,8 @@ var fmttests = []struct { func TestSprintf(t *testing.T) { for _, tt := range fmttests { s := Sprintf(tt.fmt, tt.val) - if i := strings.Index(s, "0x"); i >= 0 && strings.Contains(tt.out, "PTR") { - j := i + 2 + if i := strings.Index(tt.out, "PTR"); i >= 0 { + j := i for ; j < len(s); j++ { c := s[j] if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') { diff --git a/libgo/go/fmt/print.go b/libgo/go/fmt/print.go index 96029a8..4e14fda 100644 --- a/libgo/go/fmt/print.go +++ b/libgo/go/fmt/print.go @@ -74,15 +74,42 @@ type pp struct { fmt fmt } -// A leaky bucket of reusable pp structures. -var ppFree = make(chan *pp, 100) +// A cache holds a set of reusable objects. +// The buffered channel holds the currently available objects. +// If more are needed, the cache creates them by calling new. +type cache struct { + saved chan interface{} + new func() interface{} +} + +func (c *cache) put(x interface{}) { + select { + case c.saved <- x: + // saved in cache + default: + // discard + } +} -// Allocate a new pp struct. Probably can grab the previous one from ppFree. -func newPrinter() *pp { - p, ok := <-ppFree - if !ok { - p = new(pp) +func (c *cache) get() interface{} { + select { + case x := <-c.saved: + return x // reused from cache + default: + return c.new() } + panic("not reached") +} + +func newCache(f func() interface{}) *cache { + return &cache{make(chan interface{}, 100), f} +} + +var ppFree = newCache(func() interface{} { return new(pp) }) + +// Allocate a new pp struct or grab a cached one. +func newPrinter() *pp { + p := ppFree.get().(*pp) p.fmt.init(&p.buf) return p } @@ -94,7 +121,7 @@ func (p *pp) free() { return } p.buf.Reset() - _ = ppFree <- p + ppFree.put(p) } func (p *pp) Width() (wid int, ok bool) { return p.fmt.wid, p.fmt.widPresent } @@ -321,11 +348,11 @@ func (p *pp) fmtInt64(v int64, verb int, value interface{}) { } } -// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x by -// temporarily turning on the sharp flag. -func (p *pp) fmt0x64(v uint64) { +// fmt0x64 formats a uint64 in hexadecimal and prefixes it with 0x or +// not, as requested, by temporarily setting the sharp flag. +func (p *pp) fmt0x64(v uint64, leading0x bool) { sharp := p.fmt.sharp - p.fmt.sharp = true // turn on 0x + p.fmt.sharp = leading0x p.fmt.integer(int64(v), 16, unsigned, ldigits) p.fmt.sharp = sharp } @@ -357,7 +384,7 @@ func (p *pp) fmtUint64(v uint64, verb int, goSyntax bool, value interface{}) { p.fmt.integer(int64(v), 10, unsigned, ldigits) case 'v': if goSyntax { - p.fmt0x64(v) + p.fmt0x64(v, true) } else { p.fmt.integer(int64(v), 10, unsigned, ldigits) } @@ -507,11 +534,11 @@ func (p *pp) fmtPointer(field interface{}, value reflect.Value, verb int, goSynt if u == 0 { p.buf.Write(nilBytes) } else { - p.fmt0x64(uint64(v.Get())) + p.fmt0x64(uint64(v.Get()), true) } p.add(')') } else { - p.fmt0x64(uint64(u)) + p.fmt0x64(uint64(u), !p.fmt.sharp) } } @@ -774,7 +801,7 @@ BigSwitch: if v == 0 { p.buf.Write(nilBytes) } else { - p.fmt0x64(uint64(v)) + p.fmt0x64(uint64(v), true) } p.buf.WriteByte(')') break @@ -783,7 +810,7 @@ BigSwitch: p.buf.Write(nilAngleBytes) break } - p.fmt0x64(uint64(v)) + p.fmt0x64(uint64(v), true) case uintptrGetter: p.fmtPointer(field, value, verb, goSyntax) default: diff --git a/libgo/go/fmt/scan.go b/libgo/go/fmt/scan.go index ebbb171..c0f2bac 100644 --- a/libgo/go/fmt/scan.go +++ b/libgo/go/fmt/scan.go @@ -7,6 +7,7 @@ package fmt import ( "bytes" "io" + "math" "os" "reflect" "strconv" @@ -15,18 +16,11 @@ import ( "utf8" ) -// readRuner is the interface to something that can read runes. If -// the object provided to Scan does not satisfy this interface, the -// object will be wrapped by a readRune object. -type readRuner interface { - ReadRune() (rune int, size int, err os.Error) -} - -// unreadRuner is the interface to something that can unread runes. +// runeUnreader is the interface to something that can unread runes. // If the object provided to Scan does not satisfy this interface, // a local buffer will be used to back up the input, but its contents // will be lost when Scan returns. -type unreadRuner interface { +type runeUnreader interface { UnreadRune() os.Error } @@ -34,23 +28,30 @@ type unreadRuner interface { // Scanners may do rune-at-a-time scanning or ask the ScanState // to discover the next space-delimited token. type ScanState interface { - // GetRune reads the next rune (Unicode code point) from the input. - GetRune() (rune int, err os.Error) - // UngetRune causes the next call to GetRune to return the rune. - UngetRune() - // Width returns the value of the width option and whether it has been set. - // The unit is Unicode code points. - Width() (wid int, ok bool) + // ReadRune reads the next rune (Unicode code point) from the input. + // If invoked during Scanln, Fscanln, or Sscanln, ReadRune() will + // return EOF after returning the first '\n' or when reading beyond + // the specified width. + ReadRune() (rune int, size int, err os.Error) + // UnreadRune causes the next call to ReadRune to return the same rune. + UnreadRune() os.Error // Token returns the next space-delimited token from the input. If // a width has been specified, the returned token will be no longer // than the width. Token() (token string, err os.Error) + // Width returns the value of the width option and whether it has been set. + // The unit is Unicode code points. + Width() (wid int, ok bool) + // Because ReadRune is implemented by the interface, Read should never be + // called by the scanning routines and a valid implementation of + // ScanState may choose always to return an error from Read. + Read(buf []byte) (n int, err os.Error) } // Scanner is implemented by any value that has a Scan method, which scans // the input for the representation of a value and stores the result in the // receiver, which must be a pointer to be useful. The Scan method is called -// for any argument to Scan or Scanln that implements it. +// for any argument to Scan, Scanf, or Scanln that implements it. type Scanner interface { Scan(state ScanState, verb int) os.Error } @@ -102,18 +103,18 @@ func Sscanf(str string, format string, a ...interface{}) (n int, err os.Error) { // returns the number of items successfully scanned. If that is less // than the number of arguments, err will report why. func Fscan(r io.Reader, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, true) + s, old := newScanState(r, true, false) n, err = s.doScan(a) - s.free() + s.free(old) return } // Fscanln is similar to Fscan, but stops scanning at a newline and // after the final item there must be a newline or EOF. func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, false) + s, old := newScanState(r, false, true) n, err = s.doScan(a) - s.free() + s.free(old) return } @@ -121,9 +122,9 @@ func Fscanln(r io.Reader, a ...interface{}) (n int, err os.Error) { // values into successive arguments as determined by the format. It // returns the number of items successfully parsed. func Fscanf(r io.Reader, format string, a ...interface{}) (n int, err os.Error) { - s := newScanState(r, false) + s, old := newScanState(r, false, false) n, err = s.doScanf(format, a) - s.free() + s.free(old) return } @@ -137,53 +138,70 @@ const EOF = -1 // ss is the internal implementation of ScanState. type ss struct { - rr readRuner // where to read input - buf bytes.Buffer // token accumulator - nlIsSpace bool // whether newline counts as white space - peekRune int // one-rune lookahead - prevRune int // last rune returned by GetRune - atEOF bool // already read EOF - maxWid int // max width of field, in runes - widPresent bool // width was specified - wid int // width consumed so far; used in accept() -} - -func (s *ss) GetRune() (rune int, err os.Error) { + rr io.RuneReader // where to read input + buf bytes.Buffer // token accumulator + peekRune int // one-rune lookahead + prevRune int // last rune returned by ReadRune + count int // runes consumed so far. + atEOF bool // already read EOF + ssave +} + +// ssave holds the parts of ss that need to be +// saved and restored on recursive scans. +type ssave struct { + validSave bool // is or was a part of an actual ss. + nlIsEnd bool // whether newline terminates scan + nlIsSpace bool // whether newline counts as white space + fieldLimit int // max value of ss.count for this field; fieldLimit <= limit + limit int // max value of ss.count. + maxWid int // width of this field. +} + +// The Read method is only in ScanState so that ScanState +// satisfies io.Reader. It will never be called when used as +// intended, so there is no need to make it actually work. +func (s *ss) Read(buf []byte) (n int, err os.Error) { + return 0, os.ErrorString("ScanState's Read should not be called. Use ReadRune") +} + +func (s *ss) ReadRune() (rune int, size int, err os.Error) { if s.peekRune >= 0 { + s.count++ rune = s.peekRune + size = utf8.RuneLen(rune) s.prevRune = rune s.peekRune = -1 return } - rune, _, err = s.rr.ReadRune() + if s.atEOF || s.nlIsEnd && s.prevRune == '\n' || s.count >= s.fieldLimit { + err = os.EOF + return + } + + rune, size, err = s.rr.ReadRune() if err == nil { + s.count++ s.prevRune = rune + } else if err == os.EOF { + s.atEOF = true } return } func (s *ss) Width() (wid int, ok bool) { - return s.maxWid, s.widPresent + if s.maxWid == hugeWid { + return 0, false + } + return s.maxWid, true } // The public method returns an error; this private one panics. // If getRune reaches EOF, the return value is EOF (-1). func (s *ss) getRune() (rune int) { - if s.atEOF { - return EOF - } - if s.peekRune >= 0 { - rune = s.peekRune - s.prevRune = rune - s.peekRune = -1 - return - } - rune, _, err := s.rr.ReadRune() - if err == nil { - s.prevRune = rune - } else if err != nil { + rune, _, err := s.ReadRune() + if err != nil { if err == os.EOF { - s.atEOF = true return EOF } s.error(err) @@ -191,35 +209,25 @@ func (s *ss) getRune() (rune int) { return } -// mustGetRune turns os.EOF into a panic(io.ErrUnexpectedEOF). +// mustReadRune turns os.EOF into a panic(io.ErrUnexpectedEOF). // It is called in cases such as string scanning where an EOF is a // syntax error. -func (s *ss) mustGetRune() (rune int) { - if s.atEOF { +func (s *ss) mustReadRune() (rune int) { + rune = s.getRune() + if rune == EOF { s.error(io.ErrUnexpectedEOF) } - if s.peekRune >= 0 { - rune = s.peekRune - s.peekRune = -1 - return - } - rune, _, err := s.rr.ReadRune() - if err != nil { - if err == os.EOF { - err = io.ErrUnexpectedEOF - } - s.error(err) - } return } - -func (s *ss) UngetRune() { - if u, ok := s.rr.(unreadRuner); ok { +func (s *ss) UnreadRune() os.Error { + if u, ok := s.rr.(runeUnreader); ok { u.UnreadRune() } else { s.peekRune = s.prevRune } + s.count-- + return nil } func (s *ss) error(err os.Error) { @@ -246,7 +254,7 @@ func (s *ss) Token() (tok string, err os.Error) { // readRune is a structure to enable reading UTF-8 encoded code points // from an io.Reader. It is used if the Reader given to the scanner does -// not already implement ReadRuner. +// not already implement io.RuneReader. type readRune struct { reader io.Reader buf [utf8.UTFMax]byte // used only inside ReadRune @@ -303,37 +311,53 @@ func (r *readRune) ReadRune() (rune int, size int, err os.Error) { } -// A leaky bucket of reusable ss structures. -var ssFree = make(chan *ss, 100) +var ssFree = newCache(func() interface{} { return new(ss) }) -// Allocate a new ss struct. Probably can grab the previous one from ssFree. -func newScanState(r io.Reader, nlIsSpace bool) *ss { - s, ok := <-ssFree - if !ok { - s = new(ss) +// Allocate a new ss struct or grab a cached one. +func newScanState(r io.Reader, nlIsSpace, nlIsEnd bool) (s *ss, old ssave) { + // If the reader is a *ss, then we've got a recursive + // call to Scan, so re-use the scan state. + s, ok := r.(*ss) + if ok { + old = s.ssave + s.limit = s.fieldLimit + s.nlIsEnd = nlIsEnd || s.nlIsEnd + s.nlIsSpace = nlIsSpace + return } - if rr, ok := r.(readRuner); ok { + + s = ssFree.get().(*ss) + if rr, ok := r.(io.RuneReader); ok { s.rr = rr } else { s.rr = &readRune{reader: r} } s.nlIsSpace = nlIsSpace + s.nlIsEnd = nlIsEnd + s.prevRune = -1 s.peekRune = -1 s.atEOF = false - s.maxWid = 0 - s.widPresent = false - return s + s.limit = hugeWid + s.fieldLimit = hugeWid + s.maxWid = hugeWid + s.validSave = true + return } // Save used ss structs in ssFree; avoid an allocation per invocation. -func (s *ss) free() { +func (s *ss) free(old ssave) { + // If it was used recursively, just restore the old state. + if old.validSave { + s.ssave = old + return + } // Don't hold on to ss structs with large buffers. if cap(s.buf.Bytes()) > 1024 { return } s.buf.Reset() s.rr = nil - _ = ssFree <- s + ssFree.put(s) } // skipSpace skips spaces and maybe newlines. @@ -354,7 +378,7 @@ func (s *ss) skipSpace(stopAtNewline bool) { return } if !unicode.IsSpace(rune) { - s.UngetRune() + s.UnreadRune() break } } @@ -366,13 +390,13 @@ func (s *ss) skipSpace(stopAtNewline bool) { func (s *ss) token() string { s.skipSpace(false) // read until white space or newline - for nrunes := 0; !s.widPresent || nrunes < s.maxWid; nrunes++ { + for { rune := s.getRune() if rune == EOF { break } if unicode.IsSpace(rune) { - s.UngetRune() + s.UnreadRune() break } s.buf.WriteRune(rune) @@ -391,28 +415,31 @@ var boolError = os.ErrorString("syntax error scanning boolean") // consume reads the next rune in the input and reports whether it is in the ok string. // If accept is true, it puts the character into the input token. func (s *ss) consume(ok string, accept bool) bool { - if s.wid >= s.maxWid { - return false - } rune := s.getRune() if rune == EOF { return false } - for i := 0; i < len(ok); i++ { - if int(ok[i]) == rune { - if accept { - s.buf.WriteRune(rune) - s.wid++ - } - return true + if strings.IndexRune(ok, rune) >= 0 { + if accept { + s.buf.WriteRune(rune) } + return true } if rune != EOF && accept { - s.UngetRune() + s.UnreadRune() } return false } +// peek reports whether the next character is in the ok string, without consuming it. +func (s *ss) peek(ok string) bool { + rune := s.getRune() + if rune != EOF { + s.UnreadRune() + } + return strings.IndexRune(ok, rune) >= 0 +} + // accept checks the next rune in the input. If it's a byte (sic) in the string, it puts it in the // buffer and returns true. Otherwise it return false. func (s *ss) accept(ok string) bool { @@ -436,7 +463,7 @@ func (s *ss) scanBool(verb int) bool { return false } // Syntax-checking a boolean is annoying. We're not fastidious about case. - switch s.mustGetRune() { + switch s.mustReadRune() { case '0': return false case '1': @@ -463,7 +490,7 @@ const ( hexadecimalDigits = "0123456789aAbBcCdDeEfF" sign = "+-" period = "." - exponent = "eE" + exponent = "eEp" ) // getBase returns the numeric base represented by the verb and its digit string. @@ -486,8 +513,8 @@ func (s *ss) getBase(verb int) (base int, digits string) { } // scanNumber returns the numerical string with specified digits starting here. -func (s *ss) scanNumber(digits string) string { - if !s.accept(digits) { +func (s *ss) scanNumber(digits string, haveDigits bool) string { + if !haveDigits && !s.accept(digits) { s.errorString("expected integer") } for s.accept(digits) { @@ -497,7 +524,7 @@ func (s *ss) scanNumber(digits string) string { // scanRune returns the next rune value in the input. func (s *ss) scanRune(bitSize int) int64 { - rune := int64(s.mustGetRune()) + rune := int64(s.mustReadRune()) n := uint(bitSize) x := (rune << (64 - n)) >> (64 - n) if x != rune { @@ -506,22 +533,44 @@ func (s *ss) scanRune(bitSize int) int64 { return rune } +// scanBasePrefix reports whether the integer begins with a 0 or 0x, +// and returns the base, digit string, and whether a zero was found. +// It is called only if the verb is %v. +func (s *ss) scanBasePrefix() (base int, digits string, found bool) { + if !s.peek("0") { + return 10, decimalDigits, false + } + s.accept("0") + found = true // We've put a digit into the token buffer. + // Special cases for '0' && '0x' + base, digits = 8, octalDigits + if s.peek("xX") { + s.consume("xX", false) + base, digits = 16, hexadecimalDigits + } + return +} + // scanInt returns the value of the integer represented by the next // token, checking for overflow. Any error is stored in s.err. func (s *ss) scanInt(verb int, bitSize int) int64 { if verb == 'c' { return s.scanRune(bitSize) } - base, digits := s.getBase(verb) s.skipSpace(false) + base, digits := s.getBase(verb) + haveDigits := false if verb == 'U' { if !s.consume("U", false) || !s.consume("+", false) { s.errorString("bad unicode format ") } } else { s.accept(sign) // If there's a sign, it will be left in the token buffer. + if verb == 'v' { + base, digits, haveDigits = s.scanBasePrefix() + } } - tok := s.scanNumber(digits) + tok := s.scanNumber(digits, haveDigits) i, err := strconv.Btoi64(tok, base) if err != nil { s.error(err) @@ -540,14 +589,17 @@ func (s *ss) scanUint(verb int, bitSize int) uint64 { if verb == 'c' { return uint64(s.scanRune(bitSize)) } - base, digits := s.getBase(verb) s.skipSpace(false) + base, digits := s.getBase(verb) + haveDigits := false if verb == 'U' { if !s.consume("U", false) || !s.consume("+", false) { s.errorString("bad unicode format ") } + } else if verb == 'v' { + base, digits, haveDigits = s.scanBasePrefix() } - tok := s.scanNumber(digits) + tok := s.scanNumber(digits, haveDigits) i, err := strconv.Btoui64(tok, base) if err != nil { s.error(err) @@ -621,6 +673,27 @@ func (s *ss) complexTokens() (real, imag string) { // convertFloat converts the string to a float64value. func (s *ss) convertFloat(str string, n int) float64 { + if p := strings.Index(str, "p"); p >= 0 { + // Atof doesn't handle power-of-2 exponents, + // but they're easy to evaluate. + f, err := strconv.AtofN(str[:p], n) + if err != nil { + // Put full string into error. + if e, ok := err.(*strconv.NumError); ok { + e.Num = str + } + s.error(err) + } + n, err := strconv.Atoi(str[p+1:]) + if err != nil { + // Put full string into error. + if e, ok := err.(*strconv.NumError); ok { + e.Num = str + } + s.error(err) + } + return math.Ldexp(f, n) + } f, err := strconv.AtofN(str, n) if err != nil { s.error(err) @@ -667,12 +740,12 @@ func (s *ss) convertString(verb int) (str string) { // quotedString returns the double- or back-quoted string represented by the next input characters. func (s *ss) quotedString() string { - quote := s.mustGetRune() + quote := s.mustReadRune() switch quote { case '`': // Back-quoted: Anything goes until EOF or back quote. for { - rune := s.mustGetRune() + rune := s.mustReadRune() if rune == quote { break } @@ -683,13 +756,13 @@ func (s *ss) quotedString() string { // Double-quoted: Include the quotes and let strconv.Unquote do the backslash escapes. s.buf.WriteRune(quote) for { - rune := s.mustGetRune() + rune := s.mustReadRune() s.buf.WriteRune(rune) if rune == '\\' { // In a legal backslash escape, no matter how long, only the character // immediately after the escape can itself be a backslash or quote. // Thus we only need to protect the first character after the backslash. - rune := s.mustGetRune() + rune := s.mustReadRune() s.buf.WriteRune(rune) } else if rune == '"' { break @@ -728,10 +801,10 @@ func (s *ss) hexByte() (b byte, ok bool) { return } if unicode.IsSpace(rune1) { - s.UngetRune() + s.UnreadRune() return } - rune2 := s.mustGetRune() + rune2 := s.mustReadRune() return byte(s.hexDigit(rune1)<<4 | s.hexDigit(rune2)), true } @@ -751,7 +824,9 @@ func (s *ss) hexString() string { return s.buf.String() } -const floatVerbs = "eEfFgGv" +const floatVerbs = "beEfFgGv" + +const hugeWid = 1 << 30 // scanOne scans a single value, deriving the scanner from the type of the argument. func (s *ss) scanOne(verb int, field interface{}) { @@ -761,14 +836,13 @@ func (s *ss) scanOne(verb int, field interface{}) { if v, ok := field.(Scanner); ok { err = v.Scan(s, verb) if err != nil { + if err == os.EOF { + err = io.ErrUnexpectedEOF + } s.error(err) } return } - if !s.widPresent { - s.maxWid = 1 << 30 // Huge - } - s.wid = 0 switch v := field.(type) { case *bool: *v = s.scanBool(verb) @@ -869,7 +943,6 @@ func errorHandler(errp *os.Error) { } // doScan does the real work for scanning without a format string. -// At the moment, it handles only pointers to basic types. func (s *ss) doScan(a []interface{}) (numProcessed int, err os.Error) { defer errorHandler(&err) for _, field := range a { @@ -930,9 +1003,9 @@ func (s *ss) advance(format string) (i int) { s.skipSpace(true) continue } - inputc := s.mustGetRune() + inputc := s.mustReadRune() if fmtc != inputc { - s.UngetRune() + s.UnreadRune() return -1 } i += w @@ -964,7 +1037,15 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E i++ // % is one byte // do we have 20 (width)? - s.maxWid, s.widPresent, i = parsenum(format, i, end) + var widPresent bool + s.maxWid, widPresent, i = parsenum(format, i, end) + if !widPresent { + s.maxWid = hugeWid + } + s.fieldLimit = s.limit + if f := s.count + s.maxWid; f < s.fieldLimit { + s.fieldLimit = f + } c, w := utf8.DecodeRuneInString(format[i:]) i += w @@ -977,6 +1058,7 @@ func (s *ss) doScanf(format string, a []interface{}) (numProcessed int, err os.E s.scanOne(c, field) numProcessed++ + s.fieldLimit = s.limit } if numProcessed < len(a) { s.errorString("too many operands") diff --git a/libgo/go/fmt/scan_test.go b/libgo/go/fmt/scan_test.go index 78b9fbb..65adb02 100644 --- a/libgo/go/fmt/scan_test.go +++ b/libgo/go/fmt/scan_test.go @@ -6,6 +6,7 @@ package fmt_test import ( "bufio" + "bytes" . "fmt" "io" "math" @@ -87,21 +88,7 @@ type FloatTest struct { type Xs string func (x *Xs) Scan(state ScanState, verb int) os.Error { - var tok string - var c int - var err os.Error - wid, present := state.Width() - if !present { - tok, err = state.Token() - } else { - for i := 0; i < wid; i++ { - c, err = state.GetRune() - if err != nil { - break - } - tok += string(c) - } - } + tok, err := state.Token() if err != nil { return err } @@ -114,6 +101,26 @@ func (x *Xs) Scan(state ScanState, verb int) os.Error { var xVal Xs +// IntString accepts an integer followed immediately by a string. +// It tests the embedding of a scan within a scan. +type IntString struct { + i int + s string +} + +func (s *IntString) Scan(state ScanState, verb int) os.Error { + if _, err := Fscan(state, &s.i); err != nil { + return err + } + + if _, err := Fscan(state, &s.s); err != nil { + return err + } + return nil +} + +var intStringVal IntString + // myStringReader implements Read but not ReadRune, allowing us to test our readRune wrapper // type that creates something that can read runes given only Read(). type myStringReader struct { @@ -129,10 +136,20 @@ func newReader(s string) *myStringReader { } var scanTests = []ScanTest{ - // Numbers + // Basic types {"T\n", &boolVal, true}, // boolean test vals toggle to be sure they are written {"F\n", &boolVal, false}, // restored to zero value {"21\n", &intVal, 21}, + {"0\n", &intVal, 0}, + {"000\n", &intVal, 0}, + {"0x10\n", &intVal, 0x10}, + {"-0x10\n", &intVal, -0x10}, + {"0377\n", &intVal, 0377}, + {"-0377\n", &intVal, -0377}, + {"0\n", &uintVal, uint(0)}, + {"000\n", &uintVal, uint(0)}, + {"0x10\n", &uintVal, uint(0x10)}, + {"0377\n", &uintVal, uint(0377)}, {"22\n", &int8Val, int8(22)}, {"23\n", &int16Val, int16(23)}, {"24\n", &int32Val, int32(24)}, @@ -160,6 +177,10 @@ var scanTests = []ScanTest{ {"2.3\n", &float64Val, 2.3}, {"2.3e1\n", &float32Val, float32(2.3e1)}, {"2.3e2\n", &float64Val, 2.3e2}, + {"2.3p2\n", &float64Val, 2.3 * 4}, + {"2.3p+2\n", &float64Val, 2.3 * 4}, + {"2.3p+66\n", &float64Val, 2.3 * (1 << 32) * (1 << 32) * 4}, + {"2.3p-66\n", &float64Val, 2.3 / ((1 << 32) * (1 << 32) * 4)}, {"2.35\n", &stringVal, "2.35"}, {"2345678\n", &bytesVal, []byte("2345678")}, {"(3.4e1-2i)\n", &complex128Val, 3.4e1 - 2i}, @@ -186,8 +207,9 @@ var scanTests = []ScanTest{ {"114\n", &renamedStringVal, renamedString("114")}, {"115\n", &renamedBytesVal, renamedBytes([]byte("115"))}, - // Custom scanner. + // Custom scanners. {" vvv ", &xVal, Xs("vvv")}, + {" 1234hello", &intStringVal, IntString{1234, "hello"}}, // Fixed bugs {"2147483648\n", &int64Val, int64(2147483648)}, // was: integer overflow @@ -197,6 +219,8 @@ var scanfTests = []ScanfTest{ {"%v", "TRUE\n", &boolVal, true}, {"%t", "false\n", &boolVal, false}, {"%v", "-71\n", &intVal, -71}, + {"%v", "0377\n", &intVal, 0377}, + {"%v", "0x44\n", &intVal, 0x44}, {"%d", "72\n", &intVal, 72}, {"%c", "a\n", &intVal, 'a'}, {"%c", "\u5072\n", &intVal, 0x5072}, @@ -292,6 +316,7 @@ var f float64 var s, t string var c complex128 var x, y Xs +var z IntString var multiTests = []ScanfMultiTest{ {"", "", nil, nil, ""}, @@ -305,8 +330,9 @@ var multiTests = []ScanfMultiTest{ {"%d%s", "123abc", args(&i, &s), args(123, "abc"), ""}, {"%c%c%c", "2\u50c2X", args(&i, &j, &k), args('2', '\u50c2', 'X'), ""}, - // Custom scanner. + // Custom scanners. {"%2e%f", "eefffff", args(&x, &y), args(Xs("ee"), Xs("fffff")), ""}, + {"%4v%s", "12abcd", args(&z, &s), args(IntString{12, "ab"}, "cd"), ""}, // Errors {"%t", "23 18", args(&i), nil, "bad verb"}, @@ -329,7 +355,11 @@ func testScan(name string, t *testing.T, scan func(r io.Reader, a ...interface{} } n, err := scan(r, test.in) if err != nil { - t.Errorf("%s got error scanning %q: %s", name, test.text, err) + m := "" + if n > 0 { + m = Sprintf(" (%d fields ok)", n) + } + t.Errorf("%s got error scanning %q: %s%s", name, test.text, err, m) continue } if n != 1 { @@ -657,3 +687,178 @@ func TestUnreadRuneWithBufio(t *testing.T) { t.Errorf("expected αb; got %q", a) } } + +type TwoLines string + +// Attempt to read two lines into the object. Scanln should prevent this +// because it stops at newline; Scan and Scanf should be fine. +func (t *TwoLines) Scan(state ScanState, verb int) os.Error { + chars := make([]int, 0, 100) + for nlCount := 0; nlCount < 2; { + c, _, err := state.ReadRune() + if err != nil { + return err + } + chars = append(chars, c) + if c == '\n' { + nlCount++ + } + } + *t = TwoLines(string(chars)) + return nil +} + +func TestMultiLine(t *testing.T) { + input := "abc\ndef\n" + // Sscan should work + var tscan TwoLines + n, err := Sscan(input, &tscan) + if n != 1 { + t.Errorf("Sscan: expected 1 item; got %d", n) + } + if err != nil { + t.Errorf("Sscan: expected no error; got %s", err) + } + if string(tscan) != input { + t.Errorf("Sscan: expected %q; got %q", input, tscan) + } + // Sscanf should work + var tscanf TwoLines + n, err = Sscanf(input, "%s", &tscanf) + if n != 1 { + t.Errorf("Sscanf: expected 1 item; got %d", n) + } + if err != nil { + t.Errorf("Sscanf: expected no error; got %s", err) + } + if string(tscanf) != input { + t.Errorf("Sscanf: expected %q; got %q", input, tscanf) + } + // Sscanln should not work + var tscanln TwoLines + n, err = Sscanln(input, &tscanln) + if n != 0 { + t.Errorf("Sscanln: expected 0 items; got %d: %q", n, tscanln) + } + if err == nil { + t.Error("Sscanln: expected error; got none") + } else if err != io.ErrUnexpectedEOF { + t.Errorf("Sscanln: expected io.ErrUnexpectedEOF (ha!); got %s", err) + } +} + +// RecursiveInt accepts an string matching %d.%d.%d.... +// and parses it into a linked list. +// It allows us to benchmark recursive descent style scanners. +type RecursiveInt struct { + i int + next *RecursiveInt +} + +func (r *RecursiveInt) Scan(state ScanState, verb int) (err os.Error) { + _, err = Fscan(state, &r.i) + if err != nil { + return + } + next := new(RecursiveInt) + _, err = Fscanf(state, ".%v", next) + if err != nil { + if err == os.ErrorString("input does not match format") || err == io.ErrUnexpectedEOF { + err = nil + } + return + } + r.next = next + return +} + +// Perform the same scanning task as RecursiveInt.Scan +// but without recurring through scanner, so we can compare +// performance more directly. +func scanInts(r *RecursiveInt, b *bytes.Buffer) (err os.Error) { + r.next = nil + _, err = Fscan(b, &r.i) + if err != nil { + return + } + var c int + c, _, err = b.ReadRune() + if err != nil { + if err == os.EOF { + err = nil + } + return + } + if c != '.' { + return + } + next := new(RecursiveInt) + err = scanInts(next, b) + if err == nil { + r.next = next + } + return +} + +func makeInts(n int) []byte { + var buf bytes.Buffer + Fprintf(&buf, "1") + for i := 1; i < n; i++ { + Fprintf(&buf, ".%d", i+1) + } + return buf.Bytes() +} + +func TestScanInts(t *testing.T) { + testScanInts(t, scanInts) + testScanInts(t, func(r *RecursiveInt, b *bytes.Buffer) (err os.Error) { + _, err = Fscan(b, r) + return + }) +} + +const intCount = 1000 + +func testScanInts(t *testing.T, scan func(*RecursiveInt, *bytes.Buffer) os.Error) { + r := new(RecursiveInt) + ints := makeInts(intCount) + buf := bytes.NewBuffer(ints) + err := scan(r, buf) + if err != nil { + t.Error("unexpected error", err) + } + i := 1 + for ; r != nil; r = r.next { + if r.i != i { + t.Fatal("bad scan: expected %d got %d", i, r.i) + } + i++ + } + if i-1 != intCount { + t.Fatal("bad scan count: expected %d got %d", intCount, i-1) + } +} + +func BenchmarkScanInts(b *testing.B) { + b.ResetTimer() + ints := makeInts(intCount) + var r RecursiveInt + for i := b.N - 1; i >= 0; i-- { + buf := bytes.NewBuffer(ints) + b.StartTimer() + scanInts(&r, buf) + b.StopTimer() + } +} + +func BenchmarkScanRecursiveInt(b *testing.B) { + b.ResetTimer() + ints := makeInts(intCount) + var r RecursiveInt + for i := b.N - 1; i >= 0; i-- { + buf := bytes.NewBuffer(ints) + b.StartTimer() + Fscan(buf, &r) + b.StopTimer() + } +} |