diff options
author | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
---|---|---|
committer | Ian Lance Taylor <ian@gcc.gnu.org> | 2011-10-26 23:57:58 +0000 |
commit | d8f412571f8768df2d3239e72392dfeabbad1559 (patch) | |
tree | 19d182df05ead7ff8ba7ee00a7d57555e1383fdf /libgo/go/old | |
parent | e0c39d66d4f0607177b1cf8995dda56a667e07b3 (diff) | |
download | gcc-d8f412571f8768df2d3239e72392dfeabbad1559.zip gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.gz gcc-d8f412571f8768df2d3239e72392dfeabbad1559.tar.bz2 |
Update Go library to last weekly.
From-SVN: r180552
Diffstat (limited to 'libgo/go/old')
-rw-r--r-- | libgo/go/old/netchan/common.go | 336 | ||||
-rw-r--r-- | libgo/go/old/netchan/export.go | 400 | ||||
-rw-r--r-- | libgo/go/old/netchan/import.go | 287 | ||||
-rw-r--r-- | libgo/go/old/netchan/netchan_test.go | 435 | ||||
-rw-r--r-- | libgo/go/old/regexp/all_test.go | 426 | ||||
-rw-r--r-- | libgo/go/old/regexp/find_test.go | 472 | ||||
-rw-r--r-- | libgo/go/old/regexp/regexp.go | 1488 | ||||
-rw-r--r-- | libgo/go/old/template/template_test.go | 7 |
8 files changed, 3846 insertions, 5 deletions
diff --git a/libgo/go/old/netchan/common.go b/libgo/go/old/netchan/common.go new file mode 100644 index 0000000..ac1ca12 --- /dev/null +++ b/libgo/go/old/netchan/common.go @@ -0,0 +1,336 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package netchan + +import ( + "gob" + "io" + "os" + "reflect" + "sync" + "time" +) + +// The direction of a connection from the client's perspective. +type Dir int + +const ( + Recv Dir = iota + Send +) + +func (dir Dir) String() string { + switch dir { + case Recv: + return "Recv" + case Send: + return "Send" + } + return "???" +} + +// Payload types +const ( + payRequest = iota // request structure follows + payError // error structure follows + payData // user payload follows + payAck // acknowledgement; no payload + payClosed // channel is now closed + payAckSend // payload has been delivered. +) + +// A header is sent as a prefix to every transmission. It will be followed by +// a request structure, an error structure, or an arbitrary user payload structure. +type header struct { + Id int + PayloadType int + SeqNum int64 +} + +// Sent with a header once per channel from importer to exporter to report +// that it wants to bind to a channel with the specified direction for count +// messages, with space for size buffered values. If count is -1, it means unlimited. +type request struct { + Name string + Count int64 + Size int + Dir Dir +} + +// Sent with a header to report an error. +type error struct { + Error string +} + +// Used to unify management of acknowledgements for import and export. +type unackedCounter interface { + unackedCount() int64 + ack() int64 + seq() int64 +} + +// A channel and its direction. +type chanDir struct { + ch reflect.Value + dir Dir +} + +// clientSet contains the objects and methods needed for tracking +// clients of an exporter and draining outstanding messages. +type clientSet struct { + mu sync.Mutex // protects access to channel and client maps + names map[string]*chanDir + clients map[unackedCounter]bool +} + +// Mutex-protected encoder and decoder pair. +type encDec struct { + decLock sync.Mutex + dec *gob.Decoder + encLock sync.Mutex + enc *gob.Encoder +} + +func newEncDec(conn io.ReadWriter) *encDec { + return &encDec{ + dec: gob.NewDecoder(conn), + enc: gob.NewEncoder(conn), + } +} + +// Decode an item from the connection. +func (ed *encDec) decode(value reflect.Value) os.Error { + ed.decLock.Lock() + err := ed.dec.DecodeValue(value) + if err != nil { + // TODO: tear down connection? + } + ed.decLock.Unlock() + return err +} + +// Encode a header and payload onto the connection. +func (ed *encDec) encode(hdr *header, payloadType int, payload interface{}) os.Error { + ed.encLock.Lock() + hdr.PayloadType = payloadType + err := ed.enc.Encode(hdr) + if err == nil { + if payload != nil { + err = ed.enc.Encode(payload) + } + } + if err != nil { + // TODO: tear down connection if there is an error? + } + ed.encLock.Unlock() + return err +} + +// See the comment for Exporter.Drain. +func (cs *clientSet) drain(timeout int64) os.Error { + startTime := time.Nanoseconds() + for { + pending := false + cs.mu.Lock() + // Any messages waiting for a client? + for _, chDir := range cs.names { + if chDir.ch.Len() > 0 { + pending = true + } + } + // Any unacknowledged messages? + for client := range cs.clients { + n := client.unackedCount() + if n > 0 { // Check for > rather than != just to be safe. + pending = true + break + } + } + cs.mu.Unlock() + if !pending { + break + } + if timeout > 0 && time.Nanoseconds()-startTime >= timeout { + return os.NewError("timeout") + } + time.Sleep(100 * 1e6) // 100 milliseconds + } + return nil +} + +// See the comment for Exporter.Sync. +func (cs *clientSet) sync(timeout int64) os.Error { + startTime := time.Nanoseconds() + // seq remembers the clients and their seqNum at point of entry. + seq := make(map[unackedCounter]int64) + for client := range cs.clients { + seq[client] = client.seq() + } + for { + pending := false + cs.mu.Lock() + // Any unacknowledged messages? Look only at clients that existed + // when we started and are still in this client set. + for client := range seq { + if _, ok := cs.clients[client]; ok { + if client.ack() < seq[client] { + pending = true + break + } + } + } + cs.mu.Unlock() + if !pending { + break + } + if timeout > 0 && time.Nanoseconds()-startTime >= timeout { + return os.NewError("timeout") + } + time.Sleep(100 * 1e6) // 100 milliseconds + } + return nil +} + +// A netChan represents a channel imported or exported +// on a single connection. Flow is controlled by the receiving +// side by sending payAckSend messages when values +// are delivered into the local channel. +type netChan struct { + *chanDir + name string + id int + size int // buffer size of channel. + closed bool + + // sender-specific state + ackCh chan bool // buffered with space for all the acks we need + space int // available space. + + // receiver-specific state + sendCh chan reflect.Value // buffered channel of values received from other end. + ed *encDec // so that we can send acks. + count int64 // number of values still to receive. +} + +// Create a new netChan with the given name (only used for +// messages), id, direction, buffer size, and count. +// The connection to the other side is represented by ed. +func newNetChan(name string, id int, ch *chanDir, ed *encDec, size int, count int64) *netChan { + c := &netChan{chanDir: ch, name: name, id: id, size: size, ed: ed, count: count} + if c.dir == Send { + c.ackCh = make(chan bool, size) + c.space = size + } + return c +} + +// Close the channel. +func (nch *netChan) close() { + if nch.closed { + return + } + if nch.dir == Recv { + if nch.sendCh != nil { + // If the sender goroutine is active, close the channel to it. + // It will close nch.ch when it can. + close(nch.sendCh) + } else { + nch.ch.Close() + } + } else { + nch.ch.Close() + close(nch.ackCh) + } + nch.closed = true +} + +// Send message from remote side to local receiver. +func (nch *netChan) send(val reflect.Value) { + if nch.dir != Recv { + panic("send on wrong direction of channel") + } + if nch.sendCh == nil { + // If possible, do local send directly and ack immediately. + if nch.ch.TrySend(val) { + nch.sendAck() + return + } + // Start sender goroutine to manage delayed delivery of values. + nch.sendCh = make(chan reflect.Value, nch.size) + go nch.sender() + } + select { + case nch.sendCh <- val: + // ok + default: + // TODO: should this be more resilient? + panic("netchan: remote sender sent more values than allowed") + } +} + +// sendAck sends an acknowledgment that a message has left +// the channel's buffer. If the messages remaining to be sent +// will fit in the channel's buffer, then we don't +// need to send an ack. +func (nch *netChan) sendAck() { + if nch.count < 0 || nch.count > int64(nch.size) { + nch.ed.encode(&header{Id: nch.id}, payAckSend, nil) + } + if nch.count > 0 { + nch.count-- + } +} + +// The sender process forwards items from the sending queue +// to the destination channel, acknowledging each item. +func (nch *netChan) sender() { + if nch.dir != Recv { + panic("sender on wrong direction of channel") + } + // When Exporter.Hangup is called, the underlying channel is closed, + // and so we may get a "too many operations on closed channel" error + // if there are outstanding messages in sendCh. + // Make sure that this doesn't panic the whole program. + defer func() { + if r := recover(); r != nil { + // TODO check that r is "too many operations", otherwise re-panic. + } + }() + for v := range nch.sendCh { + nch.ch.Send(v) + nch.sendAck() + } + nch.ch.Close() +} + +// Receive value from local side for sending to remote side. +func (nch *netChan) recv() (val reflect.Value, ok bool) { + if nch.dir != Send { + panic("recv on wrong direction of channel") + } + + if nch.space == 0 { + // Wait for buffer space. + <-nch.ackCh + nch.space++ + } + nch.space-- + return nch.ch.Recv() +} + +// acked is called when the remote side indicates that +// a value has been delivered. +func (nch *netChan) acked() { + if nch.dir != Send { + panic("recv on wrong direction of channel") + } + select { + case nch.ackCh <- true: + // ok + default: + // TODO: should this be more resilient? + panic("netchan: remote receiver sent too many acks") + } +} diff --git a/libgo/go/old/netchan/export.go b/libgo/go/old/netchan/export.go new file mode 100644 index 0000000..99d5d7e --- /dev/null +++ b/libgo/go/old/netchan/export.go @@ -0,0 +1,400 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* + Package netchan implements type-safe networked channels: + it allows the two ends of a channel to appear on different + computers connected by a network. It does this by transporting + data sent to a channel on one machine so it can be recovered + by a receive of a channel of the same type on the other. + + An exporter publishes a set of channels by name. An importer + connects to the exporting machine and imports the channels + by name. After importing the channels, the two machines can + use the channels in the usual way. + + Networked channels are not synchronized; they always behave + as if they are buffered channels of at least one element. +*/ +package netchan + +// BUG: can't use range clause to receive when using ImportNValues to limit the count. + +import ( + "log" + "io" + "net" + "os" + "reflect" + "strconv" + "sync" +) + +// Export + +// expLog is a logging convenience function. The first argument must be a string. +func expLog(args ...interface{}) { + args[0] = "netchan export: " + args[0].(string) + log.Print(args...) +} + +// An Exporter allows a set of channels to be published on a single +// network port. A single machine may have multiple Exporters +// but they must use different ports. +type Exporter struct { + *clientSet +} + +type expClient struct { + *encDec + exp *Exporter + chans map[int]*netChan // channels in use by client + mu sync.Mutex // protects remaining fields + errored bool // client has been sent an error + seqNum int64 // sequences messages sent to client; has value of highest sent + ackNum int64 // highest sequence number acknowledged + seqLock sync.Mutex // guarantees messages are in sequence, only locked under mu +} + +func newClient(exp *Exporter, conn io.ReadWriter) *expClient { + client := new(expClient) + client.exp = exp + client.encDec = newEncDec(conn) + client.seqNum = 0 + client.ackNum = 0 + client.chans = make(map[int]*netChan) + return client +} + +func (client *expClient) sendError(hdr *header, err string) { + error := &error{err} + expLog("sending error to client:", error.Error) + client.encode(hdr, payError, error) // ignore any encode error, hope client gets it + client.mu.Lock() + client.errored = true + client.mu.Unlock() +} + +func (client *expClient) newChan(hdr *header, dir Dir, name string, size int, count int64) *netChan { + exp := client.exp + exp.mu.Lock() + ech, ok := exp.names[name] + exp.mu.Unlock() + if !ok { + client.sendError(hdr, "no such channel: "+name) + return nil + } + if ech.dir != dir { + client.sendError(hdr, "wrong direction for channel: "+name) + return nil + } + nch := newNetChan(name, hdr.Id, ech, client.encDec, size, count) + client.chans[hdr.Id] = nch + return nch +} + +func (client *expClient) getChan(hdr *header, dir Dir) *netChan { + nch := client.chans[hdr.Id] + if nch == nil { + return nil + } + if nch.dir != dir { + client.sendError(hdr, "wrong direction for channel: "+nch.name) + } + return nch +} + +// The function run manages sends and receives for a single client. For each +// (client Recv) request, this will launch a serveRecv goroutine to deliver +// the data for that channel, while (client Send) requests are handled as +// data arrives from the client. +func (client *expClient) run() { + hdr := new(header) + hdrValue := reflect.ValueOf(hdr) + req := new(request) + reqValue := reflect.ValueOf(req) + error := new(error) + for { + *hdr = header{} + if err := client.decode(hdrValue); err != nil { + if err != os.EOF { + expLog("error decoding client header:", err) + } + break + } + switch hdr.PayloadType { + case payRequest: + *req = request{} + if err := client.decode(reqValue); err != nil { + expLog("error decoding client request:", err) + break + } + if req.Size < 1 { + panic("netchan: remote requested " + strconv.Itoa(req.Size) + " values") + } + switch req.Dir { + case Recv: + // look up channel before calling serveRecv to + // avoid a lock around client.chans. + if nch := client.newChan(hdr, Send, req.Name, req.Size, req.Count); nch != nil { + go client.serveRecv(nch, *hdr, req.Count) + } + case Send: + client.newChan(hdr, Recv, req.Name, req.Size, req.Count) + // The actual sends will have payload type payData. + // TODO: manage the count? + default: + error.Error = "request: can't handle channel direction" + expLog(error.Error, req.Dir) + client.encode(hdr, payError, error) + } + case payData: + client.serveSend(*hdr) + case payClosed: + client.serveClosed(*hdr) + case payAck: + client.mu.Lock() + if client.ackNum != hdr.SeqNum-1 { + // Since the sequence number is incremented and the message is sent + // in a single instance of locking client.mu, the messages are guaranteed + // to be sent in order. Therefore receipt of acknowledgement N means + // all messages <=N have been seen by the recipient. We check anyway. + expLog("sequence out of order:", client.ackNum, hdr.SeqNum) + } + if client.ackNum < hdr.SeqNum { // If there has been an error, don't back up the count. + client.ackNum = hdr.SeqNum + } + client.mu.Unlock() + case payAckSend: + if nch := client.getChan(hdr, Send); nch != nil { + nch.acked() + } + default: + log.Fatal("netchan export: unknown payload type", hdr.PayloadType) + } + } + client.exp.delClient(client) +} + +// Send all the data on a single channel to a client asking for a Recv. +// The header is passed by value to avoid issues of overwriting. +func (client *expClient) serveRecv(nch *netChan, hdr header, count int64) { + for { + val, ok := nch.recv() + if !ok { + if err := client.encode(&hdr, payClosed, nil); err != nil { + expLog("error encoding server closed message:", err) + } + break + } + // We hold the lock during transmission to guarantee messages are + // sent in sequence number order. Also, we increment first so the + // value of client.SeqNum is the value of the highest used sequence + // number, not one beyond. + client.mu.Lock() + client.seqNum++ + hdr.SeqNum = client.seqNum + client.seqLock.Lock() // guarantee ordering of messages + client.mu.Unlock() + err := client.encode(&hdr, payData, val.Interface()) + client.seqLock.Unlock() + if err != nil { + expLog("error encoding client response:", err) + client.sendError(&hdr, err.String()) + break + } + // Negative count means run forever. + if count >= 0 { + if count--; count <= 0 { + break + } + } + } +} + +// Receive and deliver locally one item from a client asking for a Send +// The header is passed by value to avoid issues of overwriting. +func (client *expClient) serveSend(hdr header) { + nch := client.getChan(&hdr, Recv) + if nch == nil { + return + } + // Create a new value for each received item. + val := reflect.New(nch.ch.Type().Elem()).Elem() + if err := client.decode(val); err != nil { + expLog("value decode:", err, "; type ", nch.ch.Type()) + return + } + nch.send(val) +} + +// Report that client has closed the channel that is sending to us. +// The header is passed by value to avoid issues of overwriting. +func (client *expClient) serveClosed(hdr header) { + nch := client.getChan(&hdr, Recv) + if nch == nil { + return + } + nch.close() +} + +func (client *expClient) unackedCount() int64 { + client.mu.Lock() + n := client.seqNum - client.ackNum + client.mu.Unlock() + return n +} + +func (client *expClient) seq() int64 { + client.mu.Lock() + n := client.seqNum + client.mu.Unlock() + return n +} + +func (client *expClient) ack() int64 { + client.mu.Lock() + n := client.seqNum + client.mu.Unlock() + return n +} + +// Serve waits for incoming connections on the listener +// and serves the Exporter's channels on each. +// It blocks until the listener is closed. +func (exp *Exporter) Serve(listener net.Listener) { + for { + conn, err := listener.Accept() + if err != nil { + expLog("listen:", err) + break + } + go exp.ServeConn(conn) + } +} + +// ServeConn exports the Exporter's channels on conn. +// It blocks until the connection is terminated. +func (exp *Exporter) ServeConn(conn io.ReadWriter) { + exp.addClient(conn).run() +} + +// NewExporter creates a new Exporter that exports a set of channels. +func NewExporter() *Exporter { + e := &Exporter{ + clientSet: &clientSet{ + names: make(map[string]*chanDir), + clients: make(map[unackedCounter]bool), + }, + } + return e +} + +// ListenAndServe exports the exporter's channels through the +// given network and local address defined as in net.Listen. +func (exp *Exporter) ListenAndServe(network, localaddr string) os.Error { + listener, err := net.Listen(network, localaddr) + if err != nil { + return err + } + go exp.Serve(listener) + return nil +} + +// addClient creates a new expClient and records its existence +func (exp *Exporter) addClient(conn io.ReadWriter) *expClient { + client := newClient(exp, conn) + exp.mu.Lock() + exp.clients[client] = true + exp.mu.Unlock() + return client +} + +// delClient forgets the client existed +func (exp *Exporter) delClient(client *expClient) { + exp.mu.Lock() + delete(exp.clients, client) + exp.mu.Unlock() +} + +// Drain waits until all messages sent from this exporter/importer, including +// those not yet sent to any client and possibly including those sent while +// Drain was executing, have been received by the importer. In short, it +// waits until all the exporter's messages have been received by a client. +// If the timeout (measured in nanoseconds) is positive and Drain takes +// longer than that to complete, an error is returned. +func (exp *Exporter) Drain(timeout int64) os.Error { + // This wrapper function is here so the method's comment will appear in godoc. + return exp.clientSet.drain(timeout) +} + +// Sync waits until all clients of the exporter have received the messages +// that were sent at the time Sync was invoked. Unlike Drain, it does not +// wait for messages sent while it is running or messages that have not been +// dispatched to any client. If the timeout (measured in nanoseconds) is +// positive and Sync takes longer than that to complete, an error is +// returned. +func (exp *Exporter) Sync(timeout int64) os.Error { + // This wrapper function is here so the method's comment will appear in godoc. + return exp.clientSet.sync(timeout) +} + +func checkChan(chT interface{}, dir Dir) (reflect.Value, os.Error) { + chanType := reflect.TypeOf(chT) + if chanType.Kind() != reflect.Chan { + return reflect.Value{}, os.NewError("not a channel") + } + if dir != Send && dir != Recv { + return reflect.Value{}, os.NewError("unknown channel direction") + } + switch chanType.ChanDir() { + case reflect.BothDir: + case reflect.SendDir: + if dir != Recv { + return reflect.Value{}, os.NewError("to import/export with Send, must provide <-chan") + } + case reflect.RecvDir: + if dir != Send { + return reflect.Value{}, os.NewError("to import/export with Recv, must provide chan<-") + } + } + return reflect.ValueOf(chT), nil +} + +// Export exports a channel of a given type and specified direction. The +// channel to be exported is provided in the call and may be of arbitrary +// channel type. +// Despite the literal signature, the effective signature is +// Export(name string, chT chan T, dir Dir) +func (exp *Exporter) Export(name string, chT interface{}, dir Dir) os.Error { + ch, err := checkChan(chT, dir) + if err != nil { + return err + } + exp.mu.Lock() + defer exp.mu.Unlock() + _, present := exp.names[name] + if present { + return os.NewError("channel name already being exported:" + name) + } + exp.names[name] = &chanDir{ch, dir} + return nil +} + +// Hangup disassociates the named channel from the Exporter and closes +// the channel. Messages in flight for the channel may be dropped. +func (exp *Exporter) Hangup(name string) os.Error { + exp.mu.Lock() + chDir, ok := exp.names[name] + if ok { + delete(exp.names, name) + } + // TODO drop all instances of channel from client sets + exp.mu.Unlock() + if !ok { + return os.NewError("netchan export: hangup: no such channel: " + name) + } + chDir.ch.Close() + return nil +} diff --git a/libgo/go/old/netchan/import.go b/libgo/go/old/netchan/import.go new file mode 100644 index 0000000..5a459e0 --- /dev/null +++ b/libgo/go/old/netchan/import.go @@ -0,0 +1,287 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package netchan + +import ( + "io" + "log" + "net" + "os" + "reflect" + "sync" + "time" +) + +// Import + +// impLog is a logging convenience function. The first argument must be a string. +func impLog(args ...interface{}) { + args[0] = "netchan import: " + args[0].(string) + log.Print(args...) +} + +// An Importer allows a set of channels to be imported from a single +// remote machine/network port. A machine may have multiple +// importers, even from the same machine/network port. +type Importer struct { + *encDec + chanLock sync.Mutex // protects access to channel map + names map[string]*netChan + chans map[int]*netChan + errors chan os.Error + maxId int + mu sync.Mutex // protects remaining fields + unacked int64 // number of unacknowledged sends. + seqLock sync.Mutex // guarantees messages are in sequence, only locked under mu +} + +// NewImporter creates a new Importer object to import a set of channels +// from the given connection. The Exporter must be available and serving when +// the Importer is created. +func NewImporter(conn io.ReadWriter) *Importer { + imp := new(Importer) + imp.encDec = newEncDec(conn) + imp.chans = make(map[int]*netChan) + imp.names = make(map[string]*netChan) + imp.errors = make(chan os.Error, 10) + imp.unacked = 0 + go imp.run() + return imp +} + +// Import imports a set of channels from the given network and address. +func Import(network, remoteaddr string) (*Importer, os.Error) { + conn, err := net.Dial(network, remoteaddr) + if err != nil { + return nil, err + } + return NewImporter(conn), nil +} + +// shutdown closes all channels for which we are receiving data from the remote side. +func (imp *Importer) shutdown() { + imp.chanLock.Lock() + for _, ich := range imp.chans { + if ich.dir == Recv { + ich.close() + } + } + imp.chanLock.Unlock() +} + +// Handle the data from a single imported data stream, which will +// have the form +// (response, data)* +// The response identifies by name which channel is transmitting data. +func (imp *Importer) run() { + // Loop on responses; requests are sent by ImportNValues() + hdr := new(header) + hdrValue := reflect.ValueOf(hdr) + ackHdr := new(header) + err := new(error) + errValue := reflect.ValueOf(err) + for { + *hdr = header{} + if e := imp.decode(hdrValue); e != nil { + if e != os.EOF { + impLog("header:", e) + imp.shutdown() + } + return + } + switch hdr.PayloadType { + case payData: + // done lower in loop + case payError: + if e := imp.decode(errValue); e != nil { + impLog("error:", e) + return + } + if err.Error != "" { + impLog("response error:", err.Error) + select { + case imp.errors <- os.NewError(err.Error): + continue // errors are not acknowledged + default: + imp.shutdown() + return + } + } + case payClosed: + nch := imp.getChan(hdr.Id, false) + if nch != nil { + nch.close() + } + continue // closes are not acknowledged. + case payAckSend: + // we can receive spurious acks if the channel is + // hung up, so we ask getChan to ignore any errors. + nch := imp.getChan(hdr.Id, true) + if nch != nil { + nch.acked() + imp.mu.Lock() + imp.unacked-- + imp.mu.Unlock() + } + continue + default: + impLog("unexpected payload type:", hdr.PayloadType) + return + } + nch := imp.getChan(hdr.Id, false) + if nch == nil { + continue + } + if nch.dir != Recv { + impLog("cannot happen: receive from non-Recv channel") + return + } + // Acknowledge receipt + ackHdr.Id = hdr.Id + ackHdr.SeqNum = hdr.SeqNum + imp.encode(ackHdr, payAck, nil) + // Create a new value for each received item. + value := reflect.New(nch.ch.Type().Elem()).Elem() + if e := imp.decode(value); e != nil { + impLog("importer value decode:", e) + return + } + nch.send(value) + } +} + +func (imp *Importer) getChan(id int, errOk bool) *netChan { + imp.chanLock.Lock() + ich := imp.chans[id] + imp.chanLock.Unlock() + if ich == nil { + if !errOk { + impLog("unknown id in netchan request: ", id) + } + return nil + } + return ich +} + +// Errors returns a channel from which transmission and protocol errors +// can be read. Clients of the importer are not required to read the error +// channel for correct execution. However, if too many errors occur +// without being read from the error channel, the importer will shut down. +func (imp *Importer) Errors() chan os.Error { + return imp.errors +} + +// Import imports a channel of the given type, size and specified direction. +// It is equivalent to ImportNValues with a count of -1, meaning unbounded. +func (imp *Importer) Import(name string, chT interface{}, dir Dir, size int) os.Error { + return imp.ImportNValues(name, chT, dir, size, -1) +} + +// ImportNValues imports a channel of the given type and specified +// direction and then receives or transmits up to n values on that +// channel. A value of n==-1 implies an unbounded number of values. The +// channel will have buffer space for size values, or 1 value if size < 1. +// The channel to be bound to the remote site's channel is provided +// in the call and may be of arbitrary channel type. +// Despite the literal signature, the effective signature is +// ImportNValues(name string, chT chan T, dir Dir, size, n int) os.Error +// Example usage: +// imp, err := NewImporter("tcp", "netchanserver.mydomain.com:1234") +// if err != nil { log.Fatal(err) } +// ch := make(chan myType) +// err = imp.ImportNValues("name", ch, Recv, 1, 1) +// if err != nil { log.Fatal(err) } +// fmt.Printf("%+v\n", <-ch) +func (imp *Importer) ImportNValues(name string, chT interface{}, dir Dir, size, n int) os.Error { + ch, err := checkChan(chT, dir) + if err != nil { + return err + } + imp.chanLock.Lock() + defer imp.chanLock.Unlock() + _, present := imp.names[name] + if present { + return os.NewError("channel name already being imported:" + name) + } + if size < 1 { + size = 1 + } + id := imp.maxId + imp.maxId++ + nch := newNetChan(name, id, &chanDir{ch, dir}, imp.encDec, size, int64(n)) + imp.names[name] = nch + imp.chans[id] = nch + // Tell the other side about this channel. + hdr := &header{Id: id} + req := &request{Name: name, Count: int64(n), Dir: dir, Size: size} + if err = imp.encode(hdr, payRequest, req); err != nil { + impLog("request encode:", err) + return err + } + if dir == Send { + go func() { + for i := 0; n == -1 || i < n; i++ { + val, ok := nch.recv() + if !ok { + if err = imp.encode(hdr, payClosed, nil); err != nil { + impLog("error encoding client closed message:", err) + } + return + } + // We hold the lock during transmission to guarantee messages are + // sent in order. + imp.mu.Lock() + imp.unacked++ + imp.seqLock.Lock() + imp.mu.Unlock() + if err = imp.encode(hdr, payData, val.Interface()); err != nil { + impLog("error encoding client send:", err) + return + } + imp.seqLock.Unlock() + } + }() + } + return nil +} + +// Hangup disassociates the named channel from the Importer and closes +// the channel. Messages in flight for the channel may be dropped. +func (imp *Importer) Hangup(name string) os.Error { + imp.chanLock.Lock() + defer imp.chanLock.Unlock() + nc := imp.names[name] + if nc == nil { + return os.NewError("netchan import: hangup: no such channel: " + name) + } + delete(imp.names, name) + delete(imp.chans, nc.id) + nc.close() + return nil +} + +func (imp *Importer) unackedCount() int64 { + imp.mu.Lock() + n := imp.unacked + imp.mu.Unlock() + return n +} + +// Drain waits until all messages sent from this exporter/importer, including +// those not yet sent to any server and possibly including those sent while +// Drain was executing, have been received by the exporter. In short, it +// waits until all the importer's messages have been received. +// If the timeout (measured in nanoseconds) is positive and Drain takes +// longer than that to complete, an error is returned. +func (imp *Importer) Drain(timeout int64) os.Error { + startTime := time.Nanoseconds() + for imp.unackedCount() > 0 { + if timeout > 0 && time.Nanoseconds()-startTime >= timeout { + return os.NewError("timeout") + } + time.Sleep(100 * 1e6) + } + return nil +} diff --git a/libgo/go/old/netchan/netchan_test.go b/libgo/go/old/netchan/netchan_test.go new file mode 100644 index 0000000..8c0f9a6 --- /dev/null +++ b/libgo/go/old/netchan/netchan_test.go @@ -0,0 +1,435 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package netchan + +import ( + "net" + "strings" + "testing" + "time" +) + +const count = 10 // number of items in most tests +const closeCount = 5 // number of items when sender closes early + +const base = 23 + +func exportSend(exp *Exporter, n int, t *testing.T, done chan bool) { + ch := make(chan int) + err := exp.Export("exportedSend", ch, Send) + if err != nil { + t.Fatal("exportSend:", err) + } + go func() { + for i := 0; i < n; i++ { + ch <- base + i + } + close(ch) + if done != nil { + done <- true + } + }() +} + +func exportReceive(exp *Exporter, t *testing.T, expDone chan bool) { + ch := make(chan int) + err := exp.Export("exportedRecv", ch, Recv) + expDone <- true + if err != nil { + t.Fatal("exportReceive:", err) + } + for i := 0; i < count; i++ { + v, ok := <-ch + if !ok { + if i != closeCount { + t.Errorf("exportReceive expected close at %d; got one at %d", closeCount, i) + } + break + } + if v != base+i { + t.Errorf("export Receive: bad value: expected %d+%d=%d; got %d", base, i, base+i, v) + } + } +} + +func importSend(imp *Importer, n int, t *testing.T, done chan bool) { + ch := make(chan int) + err := imp.ImportNValues("exportedRecv", ch, Send, 3, -1) + if err != nil { + t.Fatal("importSend:", err) + } + go func() { + for i := 0; i < n; i++ { + ch <- base + i + } + close(ch) + if done != nil { + done <- true + } + }() +} + +func importReceive(imp *Importer, t *testing.T, done chan bool) { + ch := make(chan int) + err := imp.ImportNValues("exportedSend", ch, Recv, 3, count) + if err != nil { + t.Fatal("importReceive:", err) + } + for i := 0; i < count; i++ { + v, ok := <-ch + if !ok { + if i != closeCount { + t.Errorf("importReceive expected close at %d; got one at %d", closeCount, i) + } + break + } + if v != base+i { + t.Errorf("importReceive: bad value: expected %d+%d=%d; got %+d", base, i, base+i, v) + } + } + if done != nil { + done <- true + } +} + +func TestExportSendImportReceive(t *testing.T) { + exp, imp := pair(t) + exportSend(exp, count, t, nil) + importReceive(imp, t, nil) +} + +func TestExportReceiveImportSend(t *testing.T) { + exp, imp := pair(t) + expDone := make(chan bool) + done := make(chan bool) + go func() { + exportReceive(exp, t, expDone) + done <- true + }() + <-expDone + importSend(imp, count, t, nil) + <-done +} + +func TestClosingExportSendImportReceive(t *testing.T) { + exp, imp := pair(t) + exportSend(exp, closeCount, t, nil) + importReceive(imp, t, nil) +} + +func TestClosingImportSendExportReceive(t *testing.T) { + exp, imp := pair(t) + expDone := make(chan bool) + done := make(chan bool) + go func() { + exportReceive(exp, t, expDone) + done <- true + }() + <-expDone + importSend(imp, closeCount, t, nil) + <-done +} + +func TestErrorForIllegalChannel(t *testing.T) { + exp, imp := pair(t) + // Now export a channel. + ch := make(chan int, 1) + err := exp.Export("aChannel", ch, Send) + if err != nil { + t.Fatal("export:", err) + } + ch <- 1234 + close(ch) + // Now try to import a different channel. + ch = make(chan int) + err = imp.Import("notAChannel", ch, Recv, 1) + if err != nil { + t.Fatal("import:", err) + } + // Expect an error now. Start a timeout. + timeout := make(chan bool, 1) // buffered so closure will not hang around. + go func() { + time.Sleep(10e9) // very long, to give even really slow machines a chance. + timeout <- true + }() + select { + case err = <-imp.Errors(): + if strings.Index(err.String(), "no such channel") < 0 { + t.Error("wrong error for nonexistent channel:", err) + } + case <-timeout: + t.Error("import of nonexistent channel did not receive an error") + } +} + +// Not a great test but it does at least invoke Drain. +func TestExportDrain(t *testing.T) { + exp, imp := pair(t) + done := make(chan bool) + go func() { + exportSend(exp, closeCount, t, nil) + done <- true + }() + <-done + go importReceive(imp, t, done) + exp.Drain(0) + <-done +} + +// Not a great test but it does at least invoke Drain. +func TestImportDrain(t *testing.T) { + exp, imp := pair(t) + expDone := make(chan bool) + go exportReceive(exp, t, expDone) + <-expDone + importSend(imp, closeCount, t, nil) + imp.Drain(0) +} + +// Not a great test but it does at least invoke Sync. +func TestExportSync(t *testing.T) { + exp, imp := pair(t) + done := make(chan bool) + exportSend(exp, closeCount, t, nil) + go importReceive(imp, t, done) + exp.Sync(0) + <-done +} + +// Test hanging up the send side of an export. +// TODO: test hanging up the receive side of an export. +func TestExportHangup(t *testing.T) { + exp, imp := pair(t) + ech := make(chan int) + err := exp.Export("exportedSend", ech, Send) + if err != nil { + t.Fatal("export:", err) + } + // Prepare to receive two values. We'll actually deliver only one. + ich := make(chan int) + err = imp.ImportNValues("exportedSend", ich, Recv, 1, 2) + if err != nil { + t.Fatal("import exportedSend:", err) + } + // Send one value, receive it. + const Value = 1234 + ech <- Value + v := <-ich + if v != Value { + t.Fatal("expected", Value, "got", v) + } + // Now hang up the channel. Importer should see it close. + exp.Hangup("exportedSend") + v, ok := <-ich + if ok { + t.Fatal("expected channel to be closed; got value", v) + } +} + +// Test hanging up the send side of an import. +// TODO: test hanging up the receive side of an import. +func TestImportHangup(t *testing.T) { + exp, imp := pair(t) + ech := make(chan int) + err := exp.Export("exportedRecv", ech, Recv) + if err != nil { + t.Fatal("export:", err) + } + // Prepare to Send two values. We'll actually deliver only one. + ich := make(chan int) + err = imp.ImportNValues("exportedRecv", ich, Send, 1, 2) + if err != nil { + t.Fatal("import exportedRecv:", err) + } + // Send one value, receive it. + const Value = 1234 + ich <- Value + v := <-ech + if v != Value { + t.Fatal("expected", Value, "got", v) + } + // Now hang up the channel. Exporter should see it close. + imp.Hangup("exportedRecv") + v, ok := <-ech + if ok { + t.Fatal("expected channel to be closed; got value", v) + } +} + +// loop back exportedRecv to exportedSend, +// but receive a value from ctlch before starting the loop. +func exportLoopback(exp *Exporter, t *testing.T) { + inch := make(chan int) + if err := exp.Export("exportedRecv", inch, Recv); err != nil { + t.Fatal("exportRecv") + } + + outch := make(chan int) + if err := exp.Export("exportedSend", outch, Send); err != nil { + t.Fatal("exportSend") + } + + ctlch := make(chan int) + if err := exp.Export("exportedCtl", ctlch, Recv); err != nil { + t.Fatal("exportRecv") + } + + go func() { + <-ctlch + for i := 0; i < count; i++ { + x := <-inch + if x != base+i { + t.Errorf("exportLoopback expected %d; got %d", i, x) + } + outch <- x + } + }() +} + +// This test checks that channel operations can proceed +// even when other concurrent operations are blocked. +func TestIndependentSends(t *testing.T) { + exp, imp := pair(t) + + exportLoopback(exp, t) + + importSend(imp, count, t, nil) + done := make(chan bool) + go importReceive(imp, t, done) + + // wait for export side to try to deliver some values. + time.Sleep(0.25e9) + + ctlch := make(chan int) + if err := imp.ImportNValues("exportedCtl", ctlch, Send, 1, 1); err != nil { + t.Fatal("importSend:", err) + } + ctlch <- 0 + + <-done +} + +// This test cross-connects a pair of exporter/importer pairs. +type value struct { + I int + Source string +} + +func TestCrossConnect(t *testing.T) { + e1, i1 := pair(t) + e2, i2 := pair(t) + + crossExport(e1, e2, t) + crossImport(i1, i2, t) +} + +// Export side of cross-traffic. +func crossExport(e1, e2 *Exporter, t *testing.T) { + s := make(chan value) + err := e1.Export("exportedSend", s, Send) + if err != nil { + t.Fatal("exportSend:", err) + } + + r := make(chan value) + err = e2.Export("exportedReceive", r, Recv) + if err != nil { + t.Fatal("exportReceive:", err) + } + + go crossLoop("export", s, r, t) +} + +// Import side of cross-traffic. +func crossImport(i1, i2 *Importer, t *testing.T) { + s := make(chan value) + err := i2.Import("exportedReceive", s, Send, 2) + if err != nil { + t.Fatal("import of exportedReceive:", err) + } + + r := make(chan value) + err = i1.Import("exportedSend", r, Recv, 2) + if err != nil { + t.Fatal("import of exported Send:", err) + } + + crossLoop("import", s, r, t) +} + +// Cross-traffic: send and receive 'count' numbers. +func crossLoop(name string, s, r chan value, t *testing.T) { + for si, ri := 0, 0; si < count && ri < count; { + select { + case s <- value{si, name}: + si++ + case v := <-r: + if v.I != ri { + t.Errorf("loop: bad value: expected %d, hello; got %+v", ri, v) + } + ri++ + } + } +} + +const flowCount = 100 + +// test flow control from exporter to importer. +func TestExportFlowControl(t *testing.T) { + exp, imp := pair(t) + + sendDone := make(chan bool, 1) + exportSend(exp, flowCount, t, sendDone) + + ch := make(chan int) + err := imp.ImportNValues("exportedSend", ch, Recv, 20, -1) + if err != nil { + t.Fatal("importReceive:", err) + } + + testFlow(sendDone, ch, flowCount, t) +} + +// test flow control from importer to exporter. +func TestImportFlowControl(t *testing.T) { + exp, imp := pair(t) + + ch := make(chan int) + err := exp.Export("exportedRecv", ch, Recv) + if err != nil { + t.Fatal("importReceive:", err) + } + + sendDone := make(chan bool, 1) + importSend(imp, flowCount, t, sendDone) + testFlow(sendDone, ch, flowCount, t) +} + +func testFlow(sendDone chan bool, ch <-chan int, N int, t *testing.T) { + go func() { + time.Sleep(0.5e9) + sendDone <- false + }() + + if <-sendDone { + t.Fatal("send did not block") + } + n := 0 + for i := range ch { + t.Log("after blocking, got value ", i) + n++ + } + if n != N { + t.Fatalf("expected %d values; got %d", N, n) + } +} + +func pair(t *testing.T) (*Exporter, *Importer) { + c0, c1 := net.Pipe() + exp := NewExporter() + go exp.ServeConn(c0) + imp := NewImporter(c1) + return exp, imp +} diff --git a/libgo/go/old/regexp/all_test.go b/libgo/go/old/regexp/all_test.go new file mode 100644 index 0000000..71edc4d --- /dev/null +++ b/libgo/go/old/regexp/all_test.go @@ -0,0 +1,426 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package regexp + +import ( + "os" + "strings" + "testing" +) + +var good_re = []string{ + ``, + `.`, + `^.$`, + `a`, + `a*`, + `a+`, + `a?`, + `a|b`, + `a*|b*`, + `(a*|b)(c*|d)`, + `[a-z]`, + `[a-abc-c\-\]\[]`, + `[a-z]+`, + `[]`, + `[abc]`, + `[^1234]`, + `[^\n]`, + `\!\\`, +} + +type stringError struct { + re string + err os.Error +} + +var bad_re = []stringError{ + {`*`, ErrBareClosure}, + {`+`, ErrBareClosure}, + {`?`, ErrBareClosure}, + {`(abc`, ErrUnmatchedLpar}, + {`abc)`, ErrUnmatchedRpar}, + {`x[a-z`, ErrUnmatchedLbkt}, + {`abc]`, ErrUnmatchedRbkt}, + {`[z-a]`, ErrBadRange}, + {`abc\`, ErrExtraneousBackslash}, + {`a**`, ErrBadClosure}, + {`a*+`, ErrBadClosure}, + {`a??`, ErrBadClosure}, + {`\x`, ErrBadBackslash}, +} + +func compileTest(t *testing.T, expr string, error os.Error) *Regexp { + re, err := Compile(expr) + if err != error { + t.Error("compiling `", expr, "`; unexpected error: ", err.String()) + } + return re +} + +func TestGoodCompile(t *testing.T) { + for i := 0; i < len(good_re); i++ { + compileTest(t, good_re[i], nil) + } +} + +func TestBadCompile(t *testing.T) { + for i := 0; i < len(bad_re); i++ { + compileTest(t, bad_re[i].re, bad_re[i].err) + } +} + +func matchTest(t *testing.T, test *FindTest) { + re := compileTest(t, test.pat, nil) + if re == nil { + return + } + m := re.MatchString(test.text) + if m != (len(test.matches) > 0) { + t.Errorf("MatchString failure on %s: %t should be %t", test, m, len(test.matches) > 0) + } + // now try bytes + m = re.Match([]byte(test.text)) + if m != (len(test.matches) > 0) { + t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) + } +} + +func TestMatch(t *testing.T) { + for _, test := range findTests { + matchTest(t, &test) + } +} + +func matchFunctionTest(t *testing.T, test *FindTest) { + m, err := MatchString(test.pat, test.text) + if err == nil { + return + } + if m != (len(test.matches) > 0) { + t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) + } +} + +func TestMatchFunction(t *testing.T) { + for _, test := range findTests { + matchFunctionTest(t, &test) + } +} + +type ReplaceTest struct { + pattern, replacement, input, output string +} + +var replaceTests = []ReplaceTest{ + // Test empty input and/or replacement, with pattern that matches the empty string. + {"", "", "", ""}, + {"", "x", "", "x"}, + {"", "", "abc", "abc"}, + {"", "x", "abc", "xaxbxcx"}, + + // Test empty input and/or replacement, with pattern that does not match the empty string. + {"b", "", "", ""}, + {"b", "x", "", ""}, + {"b", "", "abc", "ac"}, + {"b", "x", "abc", "axc"}, + {"y", "", "", ""}, + {"y", "x", "", ""}, + {"y", "", "abc", "abc"}, + {"y", "x", "abc", "abc"}, + + // Multibyte characters -- verify that we don't try to match in the middle + // of a character. + {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, + {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, + + // Start and end of a string. + {"^[a-c]*", "x", "abcdabc", "xdabc"}, + {"[a-c]*$", "x", "abcdabc", "abcdx"}, + {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, + {"^[a-c]*", "x", "abc", "x"}, + {"[a-c]*$", "x", "abc", "x"}, + {"^[a-c]*$", "x", "abc", "x"}, + {"^[a-c]*", "x", "dabce", "xdabce"}, + {"[a-c]*$", "x", "dabce", "dabcex"}, + {"^[a-c]*$", "x", "dabce", "dabce"}, + {"^[a-c]*", "x", "", "x"}, + {"[a-c]*$", "x", "", "x"}, + {"^[a-c]*$", "x", "", "x"}, + + {"^[a-c]+", "x", "abcdabc", "xdabc"}, + {"[a-c]+$", "x", "abcdabc", "abcdx"}, + {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, + {"^[a-c]+", "x", "abc", "x"}, + {"[a-c]+$", "x", "abc", "x"}, + {"^[a-c]+$", "x", "abc", "x"}, + {"^[a-c]+", "x", "dabce", "dabce"}, + {"[a-c]+$", "x", "dabce", "dabce"}, + {"^[a-c]+$", "x", "dabce", "dabce"}, + {"^[a-c]+", "x", "", ""}, + {"[a-c]+$", "x", "", ""}, + {"^[a-c]+$", "x", "", ""}, + + // Other cases. + {"abc", "def", "abcdefg", "defdefg"}, + {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, + {"abc", "", "abcdabc", "d"}, + {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, + {"abc", "d", "", ""}, + {"abc", "d", "abc", "d"}, + {".+", "x", "abc", "x"}, + {"[a-c]*", "x", "def", "xdxexfx"}, + {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, + {"[a-c]*", "x", "abcbcdcdedef", "xdxdxexdxexfx"}, +} + +type ReplaceFuncTest struct { + pattern string + replacement func(string) string + input, output string +} + +var replaceFuncTests = []ReplaceFuncTest{ + {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, + {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, + {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcydxyexyfxy"}, +} + +func TestReplaceAll(t *testing.T) { + for _, tc := range replaceTests { + re, err := Compile(tc.pattern) + if err != nil { + t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) + continue + } + actual := re.ReplaceAllString(tc.input, tc.replacement) + if actual != tc.output { + t.Errorf("%q.Replace(%q,%q) = %q; want %q", + tc.pattern, tc.input, tc.replacement, actual, tc.output) + } + // now try bytes + actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) + if actual != tc.output { + t.Errorf("%q.Replace(%q,%q) = %q; want %q", + tc.pattern, tc.input, tc.replacement, actual, tc.output) + } + } +} + +func TestReplaceAllFunc(t *testing.T) { + for _, tc := range replaceFuncTests { + re, err := Compile(tc.pattern) + if err != nil { + t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) + continue + } + actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) + if actual != tc.output { + t.Errorf("%q.ReplaceFunc(%q,%q) = %q; want %q", + tc.pattern, tc.input, tc.replacement, actual, tc.output) + } + // now try bytes + actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) + if actual != tc.output { + t.Errorf("%q.ReplaceFunc(%q,%q) = %q; want %q", + tc.pattern, tc.input, tc.replacement, actual, tc.output) + } + } +} + +type MetaTest struct { + pattern, output, literal string + isLiteral bool +} + +var metaTests = []MetaTest{ + {``, ``, ``, true}, + {`foo`, `foo`, `foo`, true}, + {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator + {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators + {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false}, +} + +func TestQuoteMeta(t *testing.T) { + for _, tc := range metaTests { + // Verify that QuoteMeta returns the expected string. + quoted := QuoteMeta(tc.pattern) + if quoted != tc.output { + t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", + tc.pattern, quoted, tc.output) + continue + } + + // Verify that the quoted string is in fact treated as expected + // by Compile -- i.e. that it matches the original, unquoted string. + if tc.pattern != "" { + re, err := Compile(quoted) + if err != nil { + t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) + continue + } + src := "abc" + tc.pattern + "def" + repl := "xyz" + replaced := re.ReplaceAllString(src, repl) + expected := "abcxyzdef" + if replaced != expected { + t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", + tc.pattern, src, repl, replaced, expected) + } + } + } +} + +func TestLiteralPrefix(t *testing.T) { + for _, tc := range metaTests { + // Literal method needs to scan the pattern. + re := MustCompile(tc.pattern) + str, complete := re.LiteralPrefix() + if complete != tc.isLiteral { + t.Errorf("LiteralPrefix(`%s`) = %t; want %t", tc.pattern, complete, tc.isLiteral) + } + if str != tc.literal { + t.Errorf("LiteralPrefix(`%s`) = `%s`; want `%s`", tc.pattern, str, tc.literal) + } + } +} + +type numSubexpCase struct { + input string + expected int +} + +var numSubexpCases = []numSubexpCase{ + {``, 0}, + {`.*`, 0}, + {`abba`, 0}, + {`ab(b)a`, 1}, + {`ab(.*)a`, 1}, + {`(.*)ab(.*)a`, 2}, + {`(.*)(ab)(.*)a`, 3}, + {`(.*)((a)b)(.*)a`, 4}, + {`(.*)(\(ab)(.*)a`, 3}, + {`(.*)(\(a\)b)(.*)a`, 3}, +} + +func TestNumSubexp(t *testing.T) { + for _, c := range numSubexpCases { + re := MustCompile(c.input) + n := re.NumSubexp() + if n != c.expected { + t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected) + } + } +} + +func BenchmarkLiteral(b *testing.B) { + x := strings.Repeat("x", 50) + "y" + b.StopTimer() + re := MustCompile("y") + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkNotLiteral(b *testing.B) { + x := strings.Repeat("x", 50) + "y" + b.StopTimer() + re := MustCompile(".y") + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkMatchClass(b *testing.B) { + b.StopTimer() + x := strings.Repeat("xxxx", 20) + "w" + re := MustCompile("[abcdw]") + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkMatchClass_InRange(b *testing.B) { + b.StopTimer() + // 'b' is between 'a' and 'c', so the charclass + // range checking is no help here. + x := strings.Repeat("bbbb", 20) + "c" + re := MustCompile("[ac]") + b.StartTimer() + for i := 0; i < b.N; i++ { + if !re.MatchString(x) { + println("no match!") + break + } + } +} + +func BenchmarkReplaceAll(b *testing.B) { + x := "abcdefghijklmnopqrstuvwxyz" + b.StopTimer() + re := MustCompile("[cjrw]") + b.StartTimer() + for i := 0; i < b.N; i++ { + re.ReplaceAllString(x, "") + } +} + +func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { + b.StopTimer() + x := []byte("abcdefghijklmnopqrstuvwxyz") + re := MustCompile("^zbc(d|e)") + b.StartTimer() + for i := 0; i < b.N; i++ { + re.Match(x) + } +} + +func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { + b.StopTimer() + x := []byte("abcdefghijklmnopqrstuvwxyz") + for i := 0; i < 15; i++ { + x = append(x, x...) + } + re := MustCompile("^zbc(d|e)") + b.StartTimer() + for i := 0; i < b.N; i++ { + re.Match(x) + } +} + +func BenchmarkAnchoredShortMatch(b *testing.B) { + b.StopTimer() + x := []byte("abcdefghijklmnopqrstuvwxyz") + re := MustCompile("^.bc(d|e)") + b.StartTimer() + for i := 0; i < b.N; i++ { + re.Match(x) + } +} + +func BenchmarkAnchoredLongMatch(b *testing.B) { + b.StopTimer() + x := []byte("abcdefghijklmnopqrstuvwxyz") + for i := 0; i < 15; i++ { + x = append(x, x...) + } + re := MustCompile("^.bc(d|e)") + b.StartTimer() + for i := 0; i < b.N; i++ { + re.Match(x) + } +} diff --git a/libgo/go/old/regexp/find_test.go b/libgo/go/old/regexp/find_test.go new file mode 100644 index 0000000..83b249e --- /dev/null +++ b/libgo/go/old/regexp/find_test.go @@ -0,0 +1,472 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package regexp + +import ( + "fmt" + "strings" + "testing" +) + +// For each pattern/text pair, what is the expected output of each function? +// We can derive the textual results from the indexed results, the non-submatch +// results from the submatched results, the single results from the 'all' results, +// and the byte results from the string results. Therefore the table includes +// only the FindAllStringSubmatchIndex result. +type FindTest struct { + pat string + text string + matches [][]int +} + +func (t FindTest) String() string { + return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text) +} + +var findTests = []FindTest{ + {``, ``, build(1, 0, 0)}, + {`^abcdefg`, "abcdefg", build(1, 0, 7)}, + {`a+`, "baaab", build(1, 1, 4)}, + {"abcd..", "abcdef", build(1, 0, 6)}, + {`a`, "a", build(1, 0, 1)}, + {`x`, "y", nil}, + {`b`, "abc", build(1, 1, 2)}, + {`.`, "a", build(1, 0, 1)}, + {`.*`, "abcdef", build(1, 0, 6)}, + {`^`, "abcde", build(1, 0, 0)}, + {`$`, "abcde", build(1, 5, 5)}, + {`^abcd$`, "abcd", build(1, 0, 4)}, + {`^bcd'`, "abcdef", nil}, + {`^abcd$`, "abcde", nil}, + {`a+`, "baaab", build(1, 1, 4)}, + {`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)}, + {`[a-z]+`, "abcd", build(1, 0, 4)}, + {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, + {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, + {`[^\n]+`, "abcd\n", build(1, 0, 4)}, + {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, + {`日本語+`, "日本語", build(1, 0, 9)}, + {`日本語+`, "日本語語語語", build(1, 0, 18)}, + {`()`, "", build(1, 0, 0, 0, 0)}, + {`(a)`, "a", build(1, 0, 1, 0, 1)}, + {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, + {`(.*)`, "", build(1, 0, 0, 0, 0)}, + {`(.*)`, "abcd", build(1, 0, 4, 0, 4)}, + {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, + {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, + {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, + {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, + {`\a\b\f\n\r\t\v`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, + {`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, + + {`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)}, + {`(.*).*`, "ab", build(1, 0, 2, 0, 2)}, + {`[.]`, ".", build(1, 0, 1)}, + {`/$`, "/abc/", build(1, 4, 5)}, + {`/$`, "/abc", nil}, + + // multiple matches + {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, + {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, + {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, + {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, + {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, + + // fixed bugs + {`ab$`, "cab", build(1, 1, 3)}, + {`axxb$`, "axxcb", nil}, + {`data`, "daXY data", build(1, 5, 9)}, + {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, + {`zx+`, "zzx", build(1, 1, 3)}, + + // can backslash-escape any punctuation + {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, + `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, + {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, + `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, + {"\\`", "`", build(1, 0, 1)}, + {"[\\`]+", "`", build(1, 0, 1)}, + + // long set of matches (longer than startSize) + { + ".", + "qwertyuiopasdfghjklzxcvbnm1234567890", + build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, + 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, + 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), + }, +} + +// build is a helper to construct a [][]int by extracting n sequences from x. +// This represents n matches with len(x)/n submatches each. +func build(n int, x ...int) [][]int { + ret := make([][]int, n) + runLength := len(x) / n + j := 0 + for i := range ret { + ret[i] = make([]int, runLength) + copy(ret[i], x[j:]) + j += runLength + if j > len(x) { + panic("invalid build entry") + } + } + return ret +} + +// First the simple cases. + +func TestFind(t *testing.T) { + for _, test := range findTests { + re := MustCompile(test.pat) + if re.String() != test.pat { + t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) + } + result := re.Find([]byte(test.text)) + switch { + case len(test.matches) == 0 && len(result) == 0: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + expect := test.text[test.matches[0][0]:test.matches[0][1]] + if expect != string(result) { + t.Errorf("expected %q got %q: %s", expect, result, test) + } + } + } +} + +func TestFindString(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindString(test.text) + switch { + case len(test.matches) == 0 && len(result) == 0: + // ok + case test.matches == nil && result != "": + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == "": + // Tricky because an empty result has two meanings: no match or empty match. + if test.matches[0][0] != test.matches[0][1] { + t.Errorf("expected match; got none: %s", test) + } + case test.matches != nil && result != "": + expect := test.text[test.matches[0][0]:test.matches[0][1]] + if expect != result { + t.Errorf("expected %q got %q: %s", expect, result, test) + } + } + } +} + +func testFindIndex(test *FindTest, result []int, t *testing.T) { + switch { + case len(test.matches) == 0 && len(result) == 0: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + expect := test.matches[0] + if expect[0] != result[0] || expect[1] != result[1] { + t.Errorf("expected %v got %v: %s", expect, result, test) + } + } +} + +func TestFindIndex(t *testing.T) { + for _, test := range findTests { + testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) + } +} + +func TestFindStringIndex(t *testing.T) { + for _, test := range findTests { + testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) + } +} + +func TestFindReaderIndex(t *testing.T) { + for _, test := range findTests { + testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) + } +} + +// Now come the simple All cases. + +func TestFindAll(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindAll([]byte(test.text), -1) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + if len(test.matches) != len(result) { + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + continue + } + for k, e := range test.matches { + expect := test.text[e[0]:e[1]] + if expect != string(result[k]) { + t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) + } + } + } + } +} + +func TestFindAllString(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindAllString(test.text, -1) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + if len(test.matches) != len(result) { + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + continue + } + for k, e := range test.matches { + expect := test.text[e[0]:e[1]] + if expect != result[k] { + t.Errorf("expected %q got %q: %s", expect, result, test) + } + } + } + } +} + +func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + if len(test.matches) != len(result) { + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + return + } + for k, e := range test.matches { + if e[0] != result[k][0] || e[1] != result[k][1] { + t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) + } + } + } +} + +func TestFindAllIndex(t *testing.T) { + for _, test := range findTests { + testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) + } +} + +func TestFindAllStringIndex(t *testing.T) { + for _, test := range findTests { + testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) + } +} + +// Now come the Submatch cases. + +func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { + if len(submatches) != len(result)*2 { + t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) + return + } + for k := 0; k < len(submatches); k += 2 { + if submatches[k] == -1 { + if result[k/2] != nil { + t.Errorf("match %d: expected nil got %q: %s", n, result, test) + } + continue + } + expect := test.text[submatches[k]:submatches[k+1]] + if expect != string(result[k/2]) { + t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) + return + } + } +} + +func TestFindSubmatch(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + testSubmatchBytes(&test, 0, test.matches[0], result, t) + } + } +} + +func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { + if len(submatches) != len(result)*2 { + t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) + return + } + for k := 0; k < len(submatches); k += 2 { + if submatches[k] == -1 { + if result[k/2] != "" { + t.Errorf("match %d: expected nil got %q: %s", n, result, test) + } + continue + } + expect := test.text[submatches[k]:submatches[k+1]] + if expect != result[k/2] { + t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) + return + } + } +} + +func TestFindStringSubmatch(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindStringSubmatch(test.text) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + testSubmatchString(&test, 0, test.matches[0], result, t) + } + } +} + +func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { + if len(expect) != len(result) { + t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) + return + } + for k, e := range expect { + if e != result[k] { + t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) + } + } +} + +func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case test.matches != nil && result != nil: + testSubmatchIndices(test, 0, test.matches[0], result, t) + } +} + +func TestFindSubmatchIndex(t *testing.T) { + for _, test := range findTests { + testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) + } +} + +func TestFindStringSubmatchIndex(t *testing.T) { + for _, test := range findTests { + testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) + } +} + +func TestFindReaderSubmatchIndex(t *testing.T) { + for _, test := range findTests { + testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) + } +} + +// Now come the monster AllSubmatch cases. + +func TestFindAllSubmatch(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case len(test.matches) != len(result): + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + case test.matches != nil && result != nil: + for k, match := range test.matches { + testSubmatchBytes(&test, k, match, result[k], t) + } + } + } +} + +func TestFindAllStringSubmatch(t *testing.T) { + for _, test := range findTests { + result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case len(test.matches) != len(result): + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + case test.matches != nil && result != nil: + for k, match := range test.matches { + testSubmatchString(&test, k, match, result[k], t) + } + } + } +} + +func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { + switch { + case test.matches == nil && result == nil: + // ok + case test.matches == nil && result != nil: + t.Errorf("expected no match; got one: %s", test) + case test.matches != nil && result == nil: + t.Errorf("expected match; got none: %s", test) + case len(test.matches) != len(result): + t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) + case test.matches != nil && result != nil: + for k, match := range test.matches { + testSubmatchIndices(test, k, match, result[k], t) + } + } +} + +func TestFindAllSubmatchIndex(t *testing.T) { + for _, test := range findTests { + testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) + } +} + +func TestFindAllStringSubmatchIndex(t *testing.T) { + for _, test := range findTests { + testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) + } +} diff --git a/libgo/go/old/regexp/regexp.go b/libgo/go/old/regexp/regexp.go new file mode 100644 index 0000000..e8d4c08 --- /dev/null +++ b/libgo/go/old/regexp/regexp.go @@ -0,0 +1,1488 @@ +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package regexp implements a simple regular expression library. +// +// The syntax of the regular expressions accepted is: +// +// regexp: +// concatenation { '|' concatenation } +// concatenation: +// { closure } +// closure: +// term [ '*' | '+' | '?' ] +// term: +// '^' +// '$' +// '.' +// character +// '[' [ '^' ] { character-range } ']' +// '(' regexp ')' +// character-range: +// character [ '-' character ] +// +// All characters are UTF-8-encoded code points. Backslashes escape special +// characters, including inside character classes. The standard Go character +// escapes are also recognized: \a \b \f \n \r \t \v. +// +// There are 16 methods of Regexp that match a regular expression and identify +// the matched text. Their names are matched by this regular expression: +// +// Find(All)?(String)?(Submatch)?(Index)? +// +// If 'All' is present, the routine matches successive non-overlapping +// matches of the entire expression. Empty matches abutting a preceding +// match are ignored. The return value is a slice containing the successive +// return values of the corresponding non-'All' routine. These routines take +// an extra integer argument, n; if n >= 0, the function returns at most n +// matches/submatches. +// +// If 'String' is present, the argument is a string; otherwise it is a slice +// of bytes; return values are adjusted as appropriate. +// +// If 'Submatch' is present, the return value is a slice identifying the +// successive submatches of the expression. Submatches are matches of +// parenthesized subexpressions within the regular expression, numbered from +// left to right in order of opening parenthesis. Submatch 0 is the match of +// the entire expression, submatch 1 the match of the first parenthesized +// subexpression, and so on. +// +// If 'Index' is present, matches and submatches are identified by byte index +// pairs within the input string: result[2*n:2*n+1] identifies the indexes of +// the nth submatch. The pair for n==0 identifies the match of the entire +// expression. If 'Index' is not present, the match is identified by the +// text of the match/submatch. If an index is negative, it means that +// subexpression did not match any string in the input. +// +// There is also a subset of the methods that can be applied to text read +// from a RuneReader: +// +// MatchReader, FindReaderIndex, FindReaderSubmatchIndex +// +// This set may grow. Note that regular expression matches may need to +// examine text beyond the text returned by a match, so the methods that +// match text from a RuneReader may read arbitrarily far into the input +// before returning. +// +// (There are a few other methods that do not match this pattern.) +// +package regexp + +import ( + "bytes" + "io" + "os" + "strings" + "utf8" +) + +var debug = false + +// Error is the local type for a parsing error. +type Error string + +func (e Error) String() string { + return string(e) +} + +// Error codes returned by failures to parse an expression. +var ( + ErrInternal = Error("regexp: internal error") + ErrUnmatchedLpar = Error("regexp: unmatched '('") + ErrUnmatchedRpar = Error("regexp: unmatched ')'") + ErrUnmatchedLbkt = Error("regexp: unmatched '['") + ErrUnmatchedRbkt = Error("regexp: unmatched ']'") + ErrBadRange = Error("regexp: bad range in character class") + ErrExtraneousBackslash = Error("regexp: extraneous backslash") + ErrBadClosure = Error("regexp: repeated closure (**, ++, etc.)") + ErrBareClosure = Error("regexp: closure applies to nothing") + ErrBadBackslash = Error("regexp: illegal backslash escape") +) + +const ( + iStart = iota // beginning of program + iEnd // end of program: success + iBOT // '^' beginning of text + iEOT // '$' end of text + iChar // 'a' regular character + iCharClass // [a-z] character class + iAny // '.' any character including newline + iNotNL // [^\n] special case: any character but newline + iBra // '(' parenthesized expression: 2*braNum for left, 2*braNum+1 for right + iAlt // '|' alternation + iNop // do nothing; makes it easy to link without patching +) + +// An instruction executed by the NFA +type instr struct { + kind int // the type of this instruction: iChar, iAny, etc. + index int // used only in debugging; could be eliminated + next *instr // the instruction to execute after this one + // Special fields valid only for some items. + char int // iChar + braNum int // iBra, iEbra + cclass *charClass // iCharClass + left *instr // iAlt, other branch +} + +func (i *instr) print() { + switch i.kind { + case iStart: + print("start") + case iEnd: + print("end") + case iBOT: + print("bot") + case iEOT: + print("eot") + case iChar: + print("char ", string(i.char)) + case iCharClass: + i.cclass.print() + case iAny: + print("any") + case iNotNL: + print("notnl") + case iBra: + if i.braNum&1 == 0 { + print("bra", i.braNum/2) + } else { + print("ebra", i.braNum/2) + } + case iAlt: + print("alt(", i.left.index, ")") + case iNop: + print("nop") + } +} + +// Regexp is the representation of a compiled regular expression. +// The public interface is entirely through methods. +// A Regexp is safe for concurrent use by multiple goroutines. +type Regexp struct { + expr string // the original expression + prefix string // initial plain text string + prefixBytes []byte // initial plain text bytes + inst []*instr + start *instr // first instruction of machine + prefixStart *instr // where to start if there is a prefix + nbra int // number of brackets in expression, for subexpressions +} + +type charClass struct { + negate bool // is character class negated? ([^a-z]) + // slice of int, stored pairwise: [a-z] is (a,z); x is (x,x): + ranges []int + cmin, cmax int +} + +func (cclass *charClass) print() { + print("charclass") + if cclass.negate { + print(" (negated)") + } + for i := 0; i < len(cclass.ranges); i += 2 { + l := cclass.ranges[i] + r := cclass.ranges[i+1] + if l == r { + print(" [", string(l), "]") + } else { + print(" [", string(l), "-", string(r), "]") + } + } +} + +func (cclass *charClass) addRange(a, b int) { + // range is a through b inclusive + cclass.ranges = append(cclass.ranges, a, b) + if a < cclass.cmin { + cclass.cmin = a + } + if b > cclass.cmax { + cclass.cmax = b + } +} + +func (cclass *charClass) matches(c int) bool { + if c < cclass.cmin || c > cclass.cmax { + return cclass.negate + } + ranges := cclass.ranges + for i := 0; i < len(ranges); i = i + 2 { + if ranges[i] <= c && c <= ranges[i+1] { + return !cclass.negate + } + } + return cclass.negate +} + +func newCharClass() *instr { + i := &instr{kind: iCharClass} + i.cclass = new(charClass) + i.cclass.ranges = make([]int, 0, 4) + i.cclass.cmin = 0x10FFFF + 1 // MaxRune + 1 + i.cclass.cmax = -1 + return i +} + +func (re *Regexp) add(i *instr) *instr { + i.index = len(re.inst) + re.inst = append(re.inst, i) + return i +} + +type parser struct { + re *Regexp + nlpar int // number of unclosed lpars + pos int + ch int +} + +func (p *parser) error(err Error) { + panic(err) +} + +const endOfText = -1 + +func (p *parser) c() int { return p.ch } + +func (p *parser) nextc() int { + if p.pos >= len(p.re.expr) { + p.ch = endOfText + } else { + c, w := utf8.DecodeRuneInString(p.re.expr[p.pos:]) + p.ch = c + p.pos += w + } + return p.ch +} + +func newParser(re *Regexp) *parser { + p := new(parser) + p.re = re + p.nextc() // load p.ch + return p +} + +func special(c int) bool { + for _, r := range `\.+*?()|[]^$` { + if c == r { + return true + } + } + return false +} + +func ispunct(c int) bool { + for _, r := range "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" { + if c == r { + return true + } + } + return false +} + +var escapes = []byte("abfnrtv") +var escaped = []byte("\a\b\f\n\r\t\v") + +func escape(c int) int { + for i, b := range escapes { + if int(b) == c { + return i + } + } + return -1 +} + +func (p *parser) checkBackslash() int { + c := p.c() + if c == '\\' { + c = p.nextc() + switch { + case c == endOfText: + p.error(ErrExtraneousBackslash) + case ispunct(c): + // c is as delivered + case escape(c) >= 0: + c = int(escaped[escape(c)]) + default: + p.error(ErrBadBackslash) + } + } + return c +} + +func (p *parser) charClass() *instr { + i := newCharClass() + cc := i.cclass + if p.c() == '^' { + cc.negate = true + p.nextc() + } + left := -1 + for { + switch c := p.c(); c { + case ']', endOfText: + if left >= 0 { + p.error(ErrBadRange) + } + // Is it [^\n]? + if cc.negate && len(cc.ranges) == 2 && + cc.ranges[0] == '\n' && cc.ranges[1] == '\n' { + nl := &instr{kind: iNotNL} + p.re.add(nl) + return nl + } + // Special common case: "[a]" -> "a" + if !cc.negate && len(cc.ranges) == 2 && cc.ranges[0] == cc.ranges[1] { + c := &instr{kind: iChar, char: cc.ranges[0]} + p.re.add(c) + return c + } + p.re.add(i) + return i + case '-': // do this before backslash processing + p.error(ErrBadRange) + default: + c = p.checkBackslash() + p.nextc() + switch { + case left < 0: // first of pair + if p.c() == '-' { // range + p.nextc() + left = c + } else { // single char + cc.addRange(c, c) + } + case left <= c: // second of pair + cc.addRange(left, c) + left = -1 + default: + p.error(ErrBadRange) + } + } + } + panic("unreachable") +} + +func (p *parser) term() (start, end *instr) { + switch c := p.c(); c { + case '|', endOfText: + return nil, nil + case '*', '+', '?': + p.error(ErrBareClosure) + case ')': + if p.nlpar == 0 { + p.error(ErrUnmatchedRpar) + } + return nil, nil + case ']': + p.error(ErrUnmatchedRbkt) + case '^': + p.nextc() + start = p.re.add(&instr{kind: iBOT}) + return start, start + case '$': + p.nextc() + start = p.re.add(&instr{kind: iEOT}) + return start, start + case '.': + p.nextc() + start = p.re.add(&instr{kind: iAny}) + return start, start + case '[': + p.nextc() + start = p.charClass() + if p.c() != ']' { + p.error(ErrUnmatchedLbkt) + } + p.nextc() + return start, start + case '(': + p.nextc() + p.nlpar++ + p.re.nbra++ // increment first so first subexpr is \1 + nbra := p.re.nbra + start, end = p.regexp() + if p.c() != ')' { + p.error(ErrUnmatchedLpar) + } + p.nlpar-- + p.nextc() + bra := &instr{kind: iBra, braNum: 2 * nbra} + p.re.add(bra) + ebra := &instr{kind: iBra, braNum: 2*nbra + 1} + p.re.add(ebra) + if start == nil { + if end == nil { + p.error(ErrInternal) + return + } + start = ebra + } else { + end.next = ebra + } + bra.next = start + return bra, ebra + default: + c = p.checkBackslash() + p.nextc() + start = &instr{kind: iChar, char: c} + p.re.add(start) + return start, start + } + panic("unreachable") +} + +func (p *parser) closure() (start, end *instr) { + start, end = p.term() + if start == nil { + return + } + switch p.c() { + case '*': + // (start,end)*: + alt := &instr{kind: iAlt} + p.re.add(alt) + end.next = alt // after end, do alt + alt.left = start // alternate brach: return to start + start = alt // alt becomes new (start, end) + end = alt + case '+': + // (start,end)+: + alt := &instr{kind: iAlt} + p.re.add(alt) + end.next = alt // after end, do alt + alt.left = start // alternate brach: return to start + end = alt // start is unchanged; end is alt + case '?': + // (start,end)?: + alt := &instr{kind: iAlt} + p.re.add(alt) + nop := &instr{kind: iNop} + p.re.add(nop) + alt.left = start // alternate branch is start + alt.next = nop // follow on to nop + end.next = nop // after end, go to nop + start = alt // start is now alt + end = nop // end is nop pointed to by both branches + default: + return + } + switch p.nextc() { + case '*', '+', '?': + p.error(ErrBadClosure) + } + return +} + +func (p *parser) concatenation() (start, end *instr) { + for { + nstart, nend := p.closure() + switch { + case nstart == nil: // end of this concatenation + if start == nil { // this is the empty string + nop := p.re.add(&instr{kind: iNop}) + return nop, nop + } + return + case start == nil: // this is first element of concatenation + start, end = nstart, nend + default: + end.next = nstart + end = nend + } + } + panic("unreachable") +} + +func (p *parser) regexp() (start, end *instr) { + start, end = p.concatenation() + for { + switch p.c() { + default: + return + case '|': + p.nextc() + nstart, nend := p.concatenation() + alt := &instr{kind: iAlt} + p.re.add(alt) + alt.left = start + alt.next = nstart + nop := &instr{kind: iNop} + p.re.add(nop) + end.next = nop + nend.next = nop + start, end = alt, nop + } + } + panic("unreachable") +} + +func unNop(i *instr) *instr { + for i.kind == iNop { + i = i.next + } + return i +} + +func (re *Regexp) eliminateNops() { + for _, inst := range re.inst { + if inst.kind == iEnd { + continue + } + inst.next = unNop(inst.next) + if inst.kind == iAlt { + inst.left = unNop(inst.left) + } + } +} + +func (re *Regexp) dump() { + print("prefix <", re.prefix, ">\n") + for _, inst := range re.inst { + print(inst.index, ": ") + inst.print() + if inst.kind != iEnd { + print(" -> ", inst.next.index) + } + print("\n") + } +} + +func (re *Regexp) doParse() { + p := newParser(re) + start := &instr{kind: iStart} + re.add(start) + s, e := p.regexp() + start.next = s + re.start = start + e.next = re.add(&instr{kind: iEnd}) + + if debug { + re.dump() + println() + } + + re.eliminateNops() + if debug { + re.dump() + println() + } + re.setPrefix() + if debug { + re.dump() + println() + } +} + +// Extract regular text from the beginning of the pattern, +// possibly after a leading iBOT. +// That text can be used by doExecute to speed up matching. +func (re *Regexp) setPrefix() { + var b []byte + var utf = make([]byte, utf8.UTFMax) + var inst *instr + // First instruction is start; skip that. Also skip any initial iBOT. + inst = re.inst[0].next + for inst.kind == iBOT { + inst = inst.next + } +Loop: + for ; inst.kind != iEnd; inst = inst.next { + // stop if this is not a char + if inst.kind != iChar { + break + } + // stop if this char can be followed by a match for an empty string, + // which includes closures, ^, and $. + switch inst.next.kind { + case iBOT, iEOT, iAlt: + break Loop + } + n := utf8.EncodeRune(utf, inst.char) + b = append(b, utf[0:n]...) + } + // point prefixStart instruction to first non-CHAR after prefix + re.prefixStart = inst + re.prefixBytes = b + re.prefix = string(b) +} + +// String returns the source text used to compile the regular expression. +func (re *Regexp) String() string { + return re.expr +} + +// Compile parses a regular expression and returns, if successful, a Regexp +// object that can be used to match against text. +func Compile(str string) (regexp *Regexp, error os.Error) { + regexp = new(Regexp) + // doParse will panic if there is a parse error. + defer func() { + if e := recover(); e != nil { + regexp = nil + error = e.(Error) // Will re-panic if error was not an Error, e.g. nil-pointer exception + } + }() + regexp.expr = str + regexp.inst = make([]*instr, 0, 10) + regexp.doParse() + return +} + +// MustCompile is like Compile but panics if the expression cannot be parsed. +// It simplifies safe initialization of global variables holding compiled regular +// expressions. +func MustCompile(str string) *Regexp { + regexp, error := Compile(str) + if error != nil { + panic(`regexp: compiling "` + str + `": ` + error.String()) + } + return regexp +} + +// NumSubexp returns the number of parenthesized subexpressions in this Regexp. +func (re *Regexp) NumSubexp() int { return re.nbra } + +// The match arena allows us to reduce the garbage generated by tossing +// match vectors away as we execute. Matches are ref counted and returned +// to a free list when no longer active. Increases a simple benchmark by 22X. +type matchArena struct { + head *matchVec + len int // length of match vector + pos int + atBOT bool // whether we're at beginning of text + atEOT bool // whether we're at end of text +} + +type matchVec struct { + m []int // pairs of bracketing submatches. 0th is start,end + ref int + next *matchVec +} + +func (a *matchArena) new() *matchVec { + if a.head == nil { + const N = 10 + block := make([]matchVec, N) + for i := 0; i < N; i++ { + b := &block[i] + b.next = a.head + a.head = b + } + } + m := a.head + a.head = m.next + m.ref = 0 + if m.m == nil { + m.m = make([]int, a.len) + } + return m +} + +func (a *matchArena) free(m *matchVec) { + m.ref-- + if m.ref == 0 { + m.next = a.head + a.head = m + } +} + +func (a *matchArena) copy(m *matchVec) *matchVec { + m1 := a.new() + copy(m1.m, m.m) + return m1 +} + +func (a *matchArena) noMatch() *matchVec { + m := a.new() + for i := range m.m { + m.m[i] = -1 // no match seen; catches cases like "a(b)?c" on "ac" + } + m.ref = 1 + return m +} + +type state struct { + inst *instr // next instruction to execute + prefixed bool // this match began with a fixed prefix + match *matchVec +} + +// Append new state to to-do list. Leftmost-longest wins so avoid +// adding a state that's already active. The matchVec will be inc-ref'ed +// if it is assigned to a state. +func (a *matchArena) addState(s []state, inst *instr, prefixed bool, match *matchVec) []state { + switch inst.kind { + case iBOT: + if a.atBOT { + s = a.addState(s, inst.next, prefixed, match) + } + return s + case iEOT: + if a.atEOT { + s = a.addState(s, inst.next, prefixed, match) + } + return s + case iBra: + match.m[inst.braNum] = a.pos + s = a.addState(s, inst.next, prefixed, match) + return s + } + l := len(s) + // States are inserted in order so it's sufficient to see if we have the same + // instruction; no need to see if existing match is earlier (it is). + for i := 0; i < l; i++ { + if s[i].inst == inst { + return s + } + } + s = append(s, state{inst, prefixed, match}) + match.ref++ + if inst.kind == iAlt { + s = a.addState(s, inst.left, prefixed, a.copy(match)) + // give other branch a copy of this match vector + s = a.addState(s, inst.next, prefixed, a.copy(match)) + } + return s +} + +// input abstracts different representations of the input text. It provides +// one-character lookahead. +type input interface { + step(pos int) (rune int, width int) // advance one rune + canCheckPrefix() bool // can we look ahead without losing info? + hasPrefix(re *Regexp) bool + index(re *Regexp, pos int) int +} + +// inputString scans a string. +type inputString struct { + str string +} + +func newInputString(str string) *inputString { + return &inputString{str: str} +} + +func (i *inputString) step(pos int) (int, int) { + if pos < len(i.str) { + return utf8.DecodeRuneInString(i.str[pos:len(i.str)]) + } + return endOfText, 0 +} + +func (i *inputString) canCheckPrefix() bool { + return true +} + +func (i *inputString) hasPrefix(re *Regexp) bool { + return strings.HasPrefix(i.str, re.prefix) +} + +func (i *inputString) index(re *Regexp, pos int) int { + return strings.Index(i.str[pos:], re.prefix) +} + +// inputBytes scans a byte slice. +type inputBytes struct { + str []byte +} + +func newInputBytes(str []byte) *inputBytes { + return &inputBytes{str: str} +} + +func (i *inputBytes) step(pos int) (int, int) { + if pos < len(i.str) { + return utf8.DecodeRune(i.str[pos:len(i.str)]) + } + return endOfText, 0 +} + +func (i *inputBytes) canCheckPrefix() bool { + return true +} + +func (i *inputBytes) hasPrefix(re *Regexp) bool { + return bytes.HasPrefix(i.str, re.prefixBytes) +} + +func (i *inputBytes) index(re *Regexp, pos int) int { + return bytes.Index(i.str[pos:], re.prefixBytes) +} + +// inputReader scans a RuneReader. +type inputReader struct { + r io.RuneReader + atEOT bool + pos int +} + +func newInputReader(r io.RuneReader) *inputReader { + return &inputReader{r: r} +} + +func (i *inputReader) step(pos int) (int, int) { + if !i.atEOT && pos != i.pos { + return endOfText, 0 + + } + r, w, err := i.r.ReadRune() + if err != nil { + i.atEOT = true + return endOfText, 0 + } + i.pos += w + return r, w +} + +func (i *inputReader) canCheckPrefix() bool { + return false +} + +func (i *inputReader) hasPrefix(re *Regexp) bool { + return false +} + +func (i *inputReader) index(re *Regexp, pos int) int { + return -1 +} + +// Search match starting from pos bytes into the input. +func (re *Regexp) doExecute(i input, pos int) []int { + var s [2][]state + s[0] = make([]state, 0, 10) + s[1] = make([]state, 0, 10) + in, out := 0, 1 + var final state + found := false + anchored := re.inst[0].next.kind == iBOT + if anchored && pos > 0 { + return nil + } + // fast check for initial plain substring + if i.canCheckPrefix() && re.prefix != "" { + advance := 0 + if anchored { + if !i.hasPrefix(re) { + return nil + } + } else { + advance = i.index(re, pos) + if advance == -1 { + return nil + } + } + pos += advance + } + // We look one character ahead so we can match $, which checks whether + // we are at EOT. + nextChar, nextWidth := i.step(pos) + arena := &matchArena{ + len: 2 * (re.nbra + 1), + pos: pos, + atBOT: pos == 0, + atEOT: nextChar == endOfText, + } + for c, startPos := 0, pos; c != endOfText; { + if !found && (pos == startPos || !anchored) { + // prime the pump if we haven't seen a match yet + match := arena.noMatch() + match.m[0] = pos + s[out] = arena.addState(s[out], re.start.next, false, match) + arena.free(match) // if addState saved it, ref was incremented + } else if len(s[out]) == 0 { + // machine has completed + break + } + in, out = out, in // old out state is new in state + // clear out old state + old := s[out] + for _, state := range old { + arena.free(state.match) + } + s[out] = old[0:0] // truncate state vector + c = nextChar + thisPos := pos + pos += nextWidth + nextChar, nextWidth = i.step(pos) + arena.atEOT = nextChar == endOfText + arena.atBOT = false + arena.pos = pos + for _, st := range s[in] { + switch st.inst.kind { + case iBOT: + case iEOT: + case iChar: + if c == st.inst.char { + s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match) + } + case iCharClass: + if st.inst.cclass.matches(c) { + s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match) + } + case iAny: + if c != endOfText { + s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match) + } + case iNotNL: + if c != endOfText && c != '\n' { + s[out] = arena.addState(s[out], st.inst.next, st.prefixed, st.match) + } + case iBra: + case iAlt: + case iEnd: + // choose leftmost longest + if !found || // first + st.match.m[0] < final.match.m[0] || // leftmost + (st.match.m[0] == final.match.m[0] && thisPos > final.match.m[1]) { // longest + if final.match != nil { + arena.free(final.match) + } + final = st + final.match.ref++ + final.match.m[1] = thisPos + } + found = true + default: + st.inst.print() + panic("unknown instruction in execute") + } + } + } + if final.match == nil { + return nil + } + // if match found, back up start of match by width of prefix. + if final.prefixed && len(final.match.m) > 0 { + final.match.m[0] -= len(re.prefix) + } + return final.match.m +} + +// LiteralPrefix returns a literal string that must begin any match +// of the regular expression re. It returns the boolean true if the +// literal string comprises the entire regular expression. +func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { + c := make([]int, len(re.inst)-2) // minus start and end. + // First instruction is start; skip that. + i := 0 + for inst := re.inst[0].next; inst.kind != iEnd; inst = inst.next { + // stop if this is not a char + if inst.kind != iChar { + return string(c[:i]), false + } + c[i] = inst.char + i++ + } + return string(c[:i]), true +} + +// MatchReader returns whether the Regexp matches the text read by the +// RuneReader. The return value is a boolean: true for match, false for no +// match. +func (re *Regexp) MatchReader(r io.RuneReader) bool { + return len(re.doExecute(newInputReader(r), 0)) > 0 +} + +// MatchString returns whether the Regexp matches the string s. +// The return value is a boolean: true for match, false for no match. +func (re *Regexp) MatchString(s string) bool { return len(re.doExecute(newInputString(s), 0)) > 0 } + +// Match returns whether the Regexp matches the byte slice b. +// The return value is a boolean: true for match, false for no match. +func (re *Regexp) Match(b []byte) bool { return len(re.doExecute(newInputBytes(b), 0)) > 0 } + +// MatchReader checks whether a textual regular expression matches the text +// read by the RuneReader. More complicated queries need to use Compile and +// the full Regexp interface. +func MatchReader(pattern string, r io.RuneReader) (matched bool, error os.Error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.MatchReader(r), nil +} + +// MatchString checks whether a textual regular expression +// matches a string. More complicated queries need +// to use Compile and the full Regexp interface. +func MatchString(pattern string, s string) (matched bool, error os.Error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.MatchString(s), nil +} + +// Match checks whether a textual regular expression +// matches a byte slice. More complicated queries need +// to use Compile and the full Regexp interface. +func Match(pattern string, b []byte) (matched bool, error os.Error) { + re, err := Compile(pattern) + if err != nil { + return false, err + } + return re.Match(b), nil +} + +// ReplaceAllString returns a copy of src in which all matches for the Regexp +// have been replaced by repl. No support is provided for expressions +// (e.g. \1 or $1) in the replacement string. +func (re *Regexp) ReplaceAllString(src, repl string) string { + return re.ReplaceAllStringFunc(src, func(string) string { return repl }) +} + +// ReplaceAllStringFunc returns a copy of src in which all matches for the +// Regexp have been replaced by the return value of of function repl (whose +// first argument is the matched string). No support is provided for +// expressions (e.g. \1 or $1) in the replacement string. +func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { + lastMatchEnd := 0 // end position of the most recent match + searchPos := 0 // position where we next look for a match + buf := new(bytes.Buffer) + for searchPos <= len(src) { + a := re.doExecute(newInputString(src), searchPos) + if len(a) == 0 { + break // no more matches + } + + // Copy the unmatched characters before this match. + io.WriteString(buf, src[lastMatchEnd:a[0]]) + + // Now insert a copy of the replacement string, but not for a + // match of the empty string immediately after another match. + // (Otherwise, we get double replacement for patterns that + // match both empty and nonempty strings.) + if a[1] > lastMatchEnd || a[0] == 0 { + io.WriteString(buf, repl(src[a[0]:a[1]])) + } + lastMatchEnd = a[1] + + // Advance past this match; always advance at least one character. + _, width := utf8.DecodeRuneInString(src[searchPos:]) + if searchPos+width > a[1] { + searchPos += width + } else if searchPos+1 > a[1] { + // This clause is only needed at the end of the input + // string. In that case, DecodeRuneInString returns width=0. + searchPos++ + } else { + searchPos = a[1] + } + } + + // Copy the unmatched characters after the last match. + io.WriteString(buf, src[lastMatchEnd:]) + + return buf.String() +} + +// ReplaceAll returns a copy of src in which all matches for the Regexp +// have been replaced by repl. No support is provided for expressions +// (e.g. \1 or $1) in the replacement text. +func (re *Regexp) ReplaceAll(src, repl []byte) []byte { + return re.ReplaceAllFunc(src, func([]byte) []byte { return repl }) +} + +// ReplaceAllFunc returns a copy of src in which all matches for the +// Regexp have been replaced by the return value of of function repl (whose +// first argument is the matched []byte). No support is provided for +// expressions (e.g. \1 or $1) in the replacement string. +func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { + lastMatchEnd := 0 // end position of the most recent match + searchPos := 0 // position where we next look for a match + buf := new(bytes.Buffer) + for searchPos <= len(src) { + a := re.doExecute(newInputBytes(src), searchPos) + if len(a) == 0 { + break // no more matches + } + + // Copy the unmatched characters before this match. + buf.Write(src[lastMatchEnd:a[0]]) + + // Now insert a copy of the replacement string, but not for a + // match of the empty string immediately after another match. + // (Otherwise, we get double replacement for patterns that + // match both empty and nonempty strings.) + if a[1] > lastMatchEnd || a[0] == 0 { + buf.Write(repl(src[a[0]:a[1]])) + } + lastMatchEnd = a[1] + + // Advance past this match; always advance at least one character. + _, width := utf8.DecodeRune(src[searchPos:]) + if searchPos+width > a[1] { + searchPos += width + } else if searchPos+1 > a[1] { + // This clause is only needed at the end of the input + // string. In that case, DecodeRuneInString returns width=0. + searchPos++ + } else { + searchPos = a[1] + } + } + + // Copy the unmatched characters after the last match. + buf.Write(src[lastMatchEnd:]) + + return buf.Bytes() +} + +// QuoteMeta returns a string that quotes all regular expression metacharacters +// inside the argument text; the returned string is a regular expression matching +// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. +func QuoteMeta(s string) string { + b := make([]byte, 2*len(s)) + + // A byte loop is correct because all metacharacters are ASCII. + j := 0 + for i := 0; i < len(s); i++ { + if special(int(s[i])) { + b[j] = '\\' + j++ + } + b[j] = s[i] + j++ + } + return string(b[0:j]) +} + +// Find matches in slice b if b is non-nil, otherwise find matches in string s. +func (re *Regexp) allMatches(s string, b []byte, n int, deliver func([]int)) { + var end int + if b == nil { + end = len(s) + } else { + end = len(b) + } + + for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { + var in input + if b == nil { + in = newInputString(s) + } else { + in = newInputBytes(b) + } + matches := re.doExecute(in, pos) + if len(matches) == 0 { + break + } + + accept := true + if matches[1] == pos { + // We've found an empty match. + if matches[0] == prevMatchEnd { + // We don't allow an empty match right + // after a previous match, so ignore it. + accept = false + } + var width int + // TODO: use step() + if b == nil { + _, width = utf8.DecodeRuneInString(s[pos:end]) + } else { + _, width = utf8.DecodeRune(b[pos:end]) + } + if width > 0 { + pos += width + } else { + pos = end + 1 + } + } else { + pos = matches[1] + } + prevMatchEnd = matches[1] + + if accept { + deliver(matches) + i++ + } + } +} + +// Find returns a slice holding the text of the leftmost match in b of the regular expression. +// A return value of nil indicates no match. +func (re *Regexp) Find(b []byte) []byte { + a := re.doExecute(newInputBytes(b), 0) + if a == nil { + return nil + } + return b[a[0]:a[1]] +} + +// FindIndex returns a two-element slice of integers defining the location of +// the leftmost match in b of the regular expression. The match itself is at +// b[loc[0]:loc[1]]. +// A return value of nil indicates no match. +func (re *Regexp) FindIndex(b []byte) (loc []int) { + a := re.doExecute(newInputBytes(b), 0) + if a == nil { + return nil + } + return a[0:2] +} + +// FindString returns a string holding the text of the leftmost match in s of the regular +// expression. If there is no match, the return value is an empty string, +// but it will also be empty if the regular expression successfully matches +// an empty string. Use FindStringIndex or FindStringSubmatch if it is +// necessary to distinguish these cases. +func (re *Regexp) FindString(s string) string { + a := re.doExecute(newInputString(s), 0) + if a == nil { + return "" + } + return s[a[0]:a[1]] +} + +// FindStringIndex returns a two-element slice of integers defining the +// location of the leftmost match in s of the regular expression. The match +// itself is at s[loc[0]:loc[1]]. +// A return value of nil indicates no match. +func (re *Regexp) FindStringIndex(s string) []int { + a := re.doExecute(newInputString(s), 0) + if a == nil { + return nil + } + return a[0:2] +} + +// FindReaderIndex returns a two-element slice of integers defining the +// location of the leftmost match of the regular expression in text read from +// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return +// value of nil indicates no match. +func (re *Regexp) FindReaderIndex(r io.RuneReader) []int { + a := re.doExecute(newInputReader(r), 0) + if a == nil { + return nil + } + return a[0:2] +} + +// FindSubmatch returns a slice of slices holding the text of the leftmost +// match of the regular expression in b and the matches, if any, of its +// subexpressions, as defined by the 'Submatch' descriptions in the package +// comment. +// A return value of nil indicates no match. +func (re *Regexp) FindSubmatch(b []byte) [][]byte { + a := re.doExecute(newInputBytes(b), 0) + if a == nil { + return nil + } + ret := make([][]byte, len(a)/2) + for i := range ret { + if a[2*i] >= 0 { + ret[i] = b[a[2*i]:a[2*i+1]] + } + } + return ret +} + +// FindSubmatchIndex returns a slice holding the index pairs identifying the +// leftmost match of the regular expression in b and the matches, if any, of +// its subexpressions, as defined by the 'Submatch' and 'Index' descriptions +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindSubmatchIndex(b []byte) []int { + return re.doExecute(newInputBytes(b), 0) +} + +// FindStringSubmatch returns a slice of strings holding the text of the +// leftmost match of the regular expression in s and the matches, if any, of +// its subexpressions, as defined by the 'Submatch' description in the +// package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindStringSubmatch(s string) []string { + a := re.doExecute(newInputString(s), 0) + if a == nil { + return nil + } + ret := make([]string, len(a)/2) + for i := range ret { + if a[2*i] >= 0 { + ret[i] = s[a[2*i]:a[2*i+1]] + } + } + return ret +} + +// FindStringSubmatchIndex returns a slice holding the index pairs +// identifying the leftmost match of the regular expression in s and the +// matches, if any, of its subexpressions, as defined by the 'Submatch' and +// 'Index' descriptions in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindStringSubmatchIndex(s string) []int { + return re.doExecute(newInputString(s), 0) +} + +// FindReaderSubmatchIndex returns a slice holding the index pairs +// identifying the leftmost match of the regular expression of text read by +// the RuneReader, and the matches, if any, of its subexpressions, as defined +// by the 'Submatch' and 'Index' descriptions in the package comment. A +// return value of nil indicates no match. +func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { + return re.doExecute(newInputReader(r), 0) +} + +const startSize = 10 // The size at which to start a slice in the 'All' routines. + +// FindAll is the 'All' version of Find; it returns a slice of all successive +// matches of the expression, as defined by the 'All' description in the +// package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAll(b []byte, n int) [][]byte { + if n < 0 { + n = len(b) + 1 + } + result := make([][]byte, 0, startSize) + re.allMatches("", b, n, func(match []int) { + result = append(result, b[match[0]:match[1]]) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllIndex is the 'All' version of FindIndex; it returns a slice of all +// successive matches of the expression, as defined by the 'All' description +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { + if n < 0 { + n = len(b) + 1 + } + result := make([][]int, 0, startSize) + re.allMatches("", b, n, func(match []int) { + result = append(result, match[0:2]) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllString is the 'All' version of FindString; it returns a slice of all +// successive matches of the expression, as defined by the 'All' description +// in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllString(s string, n int) []string { + if n < 0 { + n = len(s) + 1 + } + result := make([]string, 0, startSize) + re.allMatches(s, nil, n, func(match []int) { + result = append(result, s[match[0]:match[1]]) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllStringIndex is the 'All' version of FindStringIndex; it returns a +// slice of all successive matches of the expression, as defined by the 'All' +// description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { + if n < 0 { + n = len(s) + 1 + } + result := make([][]int, 0, startSize) + re.allMatches(s, nil, n, func(match []int) { + result = append(result, match[0:2]) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllSubmatch is the 'All' version of FindSubmatch; it returns a slice +// of all successive matches of the expression, as defined by the 'All' +// description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { + if n < 0 { + n = len(b) + 1 + } + result := make([][][]byte, 0, startSize) + re.allMatches("", b, n, func(match []int) { + slice := make([][]byte, len(match)/2) + for j := range slice { + if match[2*j] >= 0 { + slice[j] = b[match[2*j]:match[2*j+1]] + } + } + result = append(result, slice) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllSubmatchIndex is the 'All' version of FindSubmatchIndex; it returns +// a slice of all successive matches of the expression, as defined by the +// 'All' description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { + if n < 0 { + n = len(b) + 1 + } + result := make([][]int, 0, startSize) + re.allMatches("", b, n, func(match []int) { + result = append(result, match) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllStringSubmatch is the 'All' version of FindStringSubmatch; it +// returns a slice of all successive matches of the expression, as defined by +// the 'All' description in the package comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { + if n < 0 { + n = len(s) + 1 + } + result := make([][]string, 0, startSize) + re.allMatches(s, nil, n, func(match []int) { + slice := make([]string, len(match)/2) + for j := range slice { + if match[2*j] >= 0 { + slice[j] = s[match[2*j]:match[2*j+1]] + } + } + result = append(result, slice) + }) + if len(result) == 0 { + return nil + } + return result +} + +// FindAllStringSubmatchIndex is the 'All' version of +// FindStringSubmatchIndex; it returns a slice of all successive matches of +// the expression, as defined by the 'All' description in the package +// comment. +// A return value of nil indicates no match. +func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { + if n < 0 { + n = len(s) + 1 + } + result := make([][]int, 0, startSize) + re.allMatches(s, nil, n, func(match []int) { + result = append(result, match) + }) + if len(result) == 0 { + return nil + } + return result +} diff --git a/libgo/go/old/template/template_test.go b/libgo/go/old/template/template_test.go index eae8011..9595eb1 100644 --- a/libgo/go/old/template/template_test.go +++ b/libgo/go/old/template/template_test.go @@ -6,7 +6,6 @@ package template import ( "bytes" - "container/vector" "fmt" "io" "io/ioutil" @@ -42,7 +41,7 @@ type S struct { Empty []*T Emptystring string Null []*T - Vec *vector.Vector + Vec []interface{} True bool False bool Mp map[string]string @@ -497,9 +496,7 @@ func testAll(t *testing.T, parseFunc func(*Test) (*Template, os.Error)) { s.Pdata = []*T{&t1, &t2} s.Empty = []*T{} s.Null = nil - s.Vec = new(vector.Vector) - s.Vec.Push("elt1") - s.Vec.Push("elt2") + s.Vec = []interface{}{"elt1", "elt2"} s.True = true s.False = false s.Mp = make(map[string]string) |