diff options
author | Mark Wielaard <mark@gcc.gnu.org> | 2006-05-18 17:29:21 +0000 |
---|---|---|
committer | Mark Wielaard <mark@gcc.gnu.org> | 2006-05-18 17:29:21 +0000 |
commit | 4f9533c7722fa07511a94d005227961f4a4dec23 (patch) | |
tree | 9f9c470de62ee62fba1331a396450d728d2b1fad /libjava/classpath/gnu/regexp | |
parent | eaec4980e139903ae9b274d1abcf3a13946603a8 (diff) | |
download | gcc-4f9533c7722fa07511a94d005227961f4a4dec23.zip gcc-4f9533c7722fa07511a94d005227961f4a4dec23.tar.gz gcc-4f9533c7722fa07511a94d005227961f4a4dec23.tar.bz2 |
Imported GNU Classpath 0.90
Imported GNU Classpath 0.90
* scripts/makemake.tcl: LocaleData.java moved to gnu/java/locale.
* sources.am: Regenerated.
* gcj/javaprims.h: Regenerated.
* Makefile.in: Regenerated.
* gcj/Makefile.in: Regenerated.
* include/Makefile.in: Regenerated.
* testsuite/Makefile.in: Regenerated.
* gnu/java/lang/VMInstrumentationImpl.java: New override.
* gnu/java/net/local/LocalSocketImpl.java: Likewise.
* gnu/classpath/jdwp/VMMethod.java: Likewise.
* gnu/classpath/jdwp/VMVirtualMachine.java: Update to latest
interface.
* java/lang/Thread.java: Add UncaughtExceptionHandler.
* java/lang/reflect/Method.java: Implements GenericDeclaration and
isSynthetic(),
* java/lang/reflect/Field.java: Likewise.
* java/lang/reflect/Constructor.java
* java/lang/Class.java: Implements Type, GenericDeclaration,
getSimpleName() and getEnclosing*() methods.
* java/lang/Class.h: Add new public methods.
* java/lang/Math.java: Add signum(), ulp() and log10().
* java/lang/natMath.cc (log10): New function.
* java/security/VMSecureRandom.java: New override.
* java/util/logging/Logger.java: Updated to latest classpath
version.
* java/util/logging/LogManager.java: New override.
From-SVN: r113887
Diffstat (limited to 'libjava/classpath/gnu/regexp')
26 files changed, 1229 insertions, 499 deletions
diff --git a/libjava/classpath/gnu/regexp/BacktrackStack.java b/libjava/classpath/gnu/regexp/BacktrackStack.java new file mode 100644 index 0000000..0fc73f0 --- /dev/null +++ b/libjava/classpath/gnu/regexp/BacktrackStack.java @@ -0,0 +1,112 @@ +/* gnu/regexp/BacktrackStack.java + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.regexp; + +/** + * An instance of this class represents a stack + * used for backtracking. + * + * @author Ito Kazumitsu</A> + */ +final class BacktrackStack { + + /** A set of data to be used for backtracking. */ + static class Backtrack { + /** REToken to which to go back */ + REToken token; + /** CharIndexed on which matches are being searched for. */ + CharIndexed input; + /** REMatch to be used by the REToken token. */ + REMatch match; + /** Some parameter used by the token's backtrack method. */ + Object param; + Backtrack(REToken token, CharIndexed input, REMatch match, Object param) { + this.token = token; + this.input = input; + // REMatch may change before backtracking is needed. So we + // keep a clone of it. + this.match = (REMatch) match.clone(); + this.param = param; + } + } + + Backtrack[] stack; + private int size; + private int capacity; + private static final int INITIAL_CAPACITY = 32; + private static final int CAPACITY_INCREMENT = 16; + + BacktrackStack() { + stack = new Backtrack[INITIAL_CAPACITY]; + size = 0; + capacity = INITIAL_CAPACITY; + } + + boolean empty() { + return size == 0; + } + + Backtrack peek() { + return stack[size - 1]; + } + + Backtrack pop() { + Backtrack bt = stack[--size]; + stack[size] = null; + return bt; + } + + void clear() { + for (int i = 0; i < size; i++) { + stack[i] = null; + } + size = 0; + } + + void push(Backtrack bt) { + if (size >= capacity) { + capacity += CAPACITY_INCREMENT; + Backtrack[] newStack = new Backtrack[capacity]; + System.arraycopy(stack, 0, newStack, 0, size); + stack = newStack; + } + stack[size++] = bt; + } + +} diff --git a/libjava/classpath/gnu/regexp/CharIndexed.java b/libjava/classpath/gnu/regexp/CharIndexed.java index df1d893..8aedc49 100644 --- a/libjava/classpath/gnu/regexp/CharIndexed.java +++ b/libjava/classpath/gnu/regexp/CharIndexed.java @@ -93,4 +93,24 @@ public interface CharIndexed { * Returns the effective length of this CharIndexed */ int length(); + + /** + * Sets the REMatch last found on this input. + */ + void setLastMatch(REMatch match); + + /** + * Returns the REMatch last found on this input. + */ + REMatch getLastMatch(); + + /** + * Returns the anchor. + */ + int getAnchor(); + + /** + * Sets the anchor. + */ + void setAnchor(int anchor); } diff --git a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java index 1388d47..96a609b 100644 --- a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java +++ b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java @@ -1,5 +1,5 @@ /* gnu/regexp/CharIndexedCharArray.java - Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -36,36 +36,11 @@ obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ package gnu.regexp; -import java.io.Serializable; +import java.nio.CharBuffer; -class CharIndexedCharArray implements CharIndexed, Serializable { - private char[] s; - private int anchor; +class CharIndexedCharArray extends CharIndexedCharSequence { CharIndexedCharArray(char[] str, int index) { - s = str; - anchor = index; - } - - public char charAt(int index) { - int pos = anchor + index; - return ((pos < s.length) && (pos >= 0)) ? s[pos] : OUT_OF_BOUNDS; - } - - public boolean isValid() { - return (anchor < s.length); - } - - public boolean move(int index) { - return ((anchor += index) < s.length); - } - - public CharIndexed lookBehind(int index, int length) { - if (length > (anchor + index)) length = anchor + index; - return new CharIndexedCharArray(s, anchor + index - length); - } - - public int length() { - return s.length - anchor; + super(CharBuffer.wrap(str), index); } } diff --git a/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java b/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java new file mode 100644 index 0000000..5bbe4ab --- /dev/null +++ b/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java @@ -0,0 +1,82 @@ +/* gnu/regexp/CharIndexedCharSequence.java + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.regexp; +import java.io.Serializable; + +class CharIndexedCharSequence implements CharIndexed, Serializable { + private CharSequence s; + private int anchor; + private int len; + + CharIndexedCharSequence(CharSequence s, int index) { + this.s = s; + len = s.length(); + anchor = index; + } + + public char charAt(int index) { + int pos = anchor + index; + return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS; + } + + public boolean isValid() { + return (anchor < len); + } + + public boolean move(int index) { + return ((anchor += index) < len); + } + + public CharIndexed lookBehind(int index, int length) { + if (length > (anchor + index)) length = anchor + index; + return new CharIndexedCharSequence(s, anchor + index - length); + } + + public int length() { + return len - anchor; + } + + private REMatch lastMatch; + public void setLastMatch(REMatch match) { + lastMatch = (REMatch)match.clone(); + lastMatch.anchor = anchor; + } + public REMatch getLastMatch() { return lastMatch; } + public int getAnchor() { return anchor; } + public void setAnchor(int anchor) { this.anchor = anchor; } +} diff --git a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java index d5225a7..290a94e 100644 --- a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java +++ b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java @@ -155,5 +155,27 @@ class CharIndexedInputStream implements CharIndexed { throw new UnsupportedOperationException( "difficult to tell the length for an input stream"); } + + public void setLastMatch(REMatch match) { + throw new UnsupportedOperationException( + "difficult to support setLastMatch for an input stream"); + } + + public REMatch getLastMatch() { + throw new UnsupportedOperationException( + "difficult to support getLastMatch for an input stream"); + } + + public int getAnchor() { + throw new UnsupportedOperationException( + "difficult to support getAnchor for an input stream"); + } + + public void setAnchor(int anchor) { + throw new UnsupportedOperationException( + "difficult to support setAnchor for an input stream"); + } + + } diff --git a/libjava/classpath/gnu/regexp/CharIndexedString.java b/libjava/classpath/gnu/regexp/CharIndexedString.java index fe4fa8f..2498265 100644 --- a/libjava/classpath/gnu/regexp/CharIndexedString.java +++ b/libjava/classpath/gnu/regexp/CharIndexedString.java @@ -1,5 +1,5 @@ /* gnu/regexp/CharIndexedString.java - Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -36,38 +36,9 @@ obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ package gnu.regexp; -import java.io.Serializable; -class CharIndexedString implements CharIndexed, Serializable { - private String s; - private int anchor; - private int len; - +class CharIndexedString extends CharIndexedCharSequence { CharIndexedString(String str, int index) { - s = str; - len = s.length(); - anchor = index; - } - - public char charAt(int index) { - int pos = anchor + index; - return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS; - } - - public boolean isValid() { - return (anchor < len); - } - - public boolean move(int index) { - return ((anchor += index) < len); - } - - public CharIndexed lookBehind(int index, int length) { - if (length > (anchor + index)) length = anchor + index; - return new CharIndexedString(s, anchor + index - length); - } - - public int length() { - return len - anchor; + super(str, index); } } diff --git a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java index 9c9118d..d6b03bb 100644 --- a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java +++ b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java @@ -1,5 +1,5 @@ /* gnu/regexp/CharIndexedStringBuffer.java - Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -36,36 +36,10 @@ obligated to do so. If you do not wish to do so, delete this exception statement from your version. */ package gnu.regexp; -import java.io.Serializable; -class CharIndexedStringBuffer implements CharIndexed, Serializable { - private StringBuffer s; - private int anchor; +class CharIndexedStringBuffer extends CharIndexedCharSequence { CharIndexedStringBuffer(StringBuffer str, int index) { - s = str; - anchor = index; + super(str, index); } - - public char charAt(int index) { - int pos = anchor + index; - return ((pos < s.length()) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS; - } - - public boolean isValid() { - return (anchor < s.length()); - } - - public boolean move(int index) { - return ((anchor += index) < s.length()); - } - - public CharIndexed lookBehind(int index, int length) { - if (length > (anchor + index)) length = anchor + index; - return new CharIndexedStringBuffer(s, anchor + index - length); - } - - public int length() { - return s.length() - anchor; - } } diff --git a/libjava/classpath/gnu/regexp/RE.java b/libjava/classpath/gnu/regexp/RE.java index ef606a6..e0665f0 100644 --- a/libjava/classpath/gnu/regexp/RE.java +++ b/libjava/classpath/gnu/regexp/RE.java @@ -41,6 +41,7 @@ import java.io.Serializable; import java.util.Locale; import java.util.PropertyResourceBundle; import java.util.ResourceBundle; +import java.util.Stack; import java.util.Vector; /** @@ -78,13 +79,18 @@ import java.util.Vector; * <P> * * These methods all have similar argument lists. The input can be a - * String, a character array, a StringBuffer, or an + * CharIndexed, String, a character array, a StringBuffer, or an * InputStream of some sort. Note that when using an * InputStream, the stream read position cannot be guaranteed after * attempting a match (this is not a bug, but a consequence of the way * regular expressions work). Using an REMatchEnumeration can * eliminate most positioning problems. * + * Although the input object can be of various types, it is recommended + * that it should be a CharIndexed because {@link CharIndexed#getLastMatch()} + * can show the last match found on this input, which helps the expression + * \G work as the end of the previous match. + * * <P> * * The optional index argument specifies the offset from the beginning @@ -235,6 +241,17 @@ public class RE extends REToken { */ public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200; + /** + * Compilation flag. Allow whitespace and comments in pattern. + * This is equivalent to the "/x" operator in Perl. + */ + public static final int REG_X_COMMENTS = 0x0400; + + /** + * Compilation flag. If set, REG_ICASE is effective only for US-ASCII. + */ + public static final int REG_ICASE_USASCII = 0x0800; + /** Returns a string representing the version of the gnu.regexp package. */ public static final String version() { return VERSION; @@ -334,6 +351,7 @@ public class RE extends REToken { // Precalculate these so we don't pay for the math every time we // need to access them. boolean insens = ((cflags & REG_ICASE) > 0); + boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); // Parse pattern into tokens. Does anyone know if it's more efficient // to use char[] than a String.charAt()? I'm assuming so. @@ -372,6 +390,31 @@ public class RE extends REToken { if (quot) unit.bk = false; + if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) { + if (Character.isWhitespace(unit.ch)) { + continue; + } + if (unit.ch == '#') { + for (int i = index; i < pLength; i++) { + if (pattern[i] == '\n') { + index = i + 1; + continue; + } + else if (pattern[i] == '\r') { + if (i + 1 < pLength && pattern[i + 1] == '\n') { + index = i + 2; + } + else { + index = i + 1; + } + continue; + } + } + index = pLength; + continue; + } + } + // ALTERNATION OPERATOR // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT) // not available if RE_LIMITED_OPS is set @@ -420,6 +463,7 @@ public class RE extends REToken { else { addToken(currentToken); currentToken = new RETokenChar(subIndex,unit.ch,insens); + if (insensUSASCII) currentToken.unicodeAware = false; } } @@ -495,8 +539,8 @@ public class RE extends REToken { case 'd': case 'm': case 's': - // case 'u': not supported - // case 'x': not supported + case 'u': + case 'x': case '-': if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break; // Set or reset syntax flags. @@ -535,8 +579,20 @@ public class RE extends REToken { newCflags |= REG_DOT_NEWLINE; flagIndex++; break; - // case 'u': not supported - // case 'x': not supported + case 'u': + if (negate) + newCflags |= REG_ICASE_USASCII; + else + newCflags &= ~REG_ICASE_USASCII; + flagIndex++; + break; + case 'x': + if (negate) + newCflags &= ~REG_X_COMMENTS; + else + newCflags |= REG_X_COMMENTS; + flagIndex++; + break; case '-': negate = true; flagIndex++; @@ -553,6 +609,7 @@ public class RE extends REToken { syntax = newSyntax; cflags = newCflags; insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); // This can be treated as though it were a comment. comment = true; index = flagIndex - 1; @@ -565,6 +622,7 @@ public class RE extends REToken { syntax = newSyntax; cflags = newCflags; insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); index = flagIndex -1; // Fall through to the next case. } @@ -673,6 +731,7 @@ public class RE extends REToken { syntax = savedSyntax; cflags = savedCflags; insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); flagsSaved = false; } } // not a comment @@ -785,6 +844,7 @@ public class RE extends REToken { index = index - 2 + ce.len; addToken(currentToken); currentToken = new RETokenChar(subIndex,ce.ch,insens); + if (insensUSASCII) currentToken.unicodeAware = false; } // BACKREFERENCE OPERATOR @@ -812,6 +872,7 @@ public class RE extends REToken { int num = parseInt(pattern, numBegin, numEnd-numBegin, 10); currentToken = new RETokenBackRef(subIndex,num,insens); + if (insensUSASCII) currentToken.unicodeAware = false; index = numEnd; } @@ -860,6 +921,7 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false); + if (insensUSASCII) currentToken.unicodeAware = false; } // NON-DIGIT OPERATOR @@ -868,6 +930,7 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true); + if (insensUSASCII) currentToken.unicodeAware = false; } // NEWLINE ESCAPE @@ -892,6 +955,7 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false); + if (insensUSASCII) currentToken.unicodeAware = false; } // NON-WHITESPACE OPERATOR @@ -900,6 +964,7 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true); + if (insensUSASCII) currentToken.unicodeAware = false; } // TAB ESCAPE @@ -916,6 +981,7 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false); + if (insensUSASCII) currentToken.unicodeAware = false; } // NON-ALPHANUMERIC OPERATOR @@ -924,12 +990,19 @@ public class RE extends REToken { else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) { addToken(currentToken); currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true); + if (insensUSASCII) currentToken.unicodeAware = false; } // END OF STRING OPERATOR - // \Z + // \Z, \z - else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) { + // FIXME: \Z and \z are different in that if the input string + // ends with a line terminator, \Z matches the position before + // the final terminator. This special behavior of \Z is yet + // to be implemented. + + else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') && + syntax.get(RESyntax.RE_STRING_ANCHORS)) { addToken(currentToken); currentToken = new RETokenEnd(subIndex,null); } @@ -945,6 +1018,7 @@ public class RE extends REToken { index = index - 2 + ce.len; addToken(currentToken); currentToken = new RETokenChar(subIndex,ce.ch,insens); + if (insensUSASCII) currentToken.unicodeAware = false; } // NAMED PROPERTY @@ -958,6 +1032,16 @@ public class RE extends REToken { index = index - 2 + np.len; addToken(currentToken); currentToken = getRETokenNamedProperty(subIndex,np,insens,index); + if (insensUSASCII) currentToken.unicodeAware = false; + } + + // END OF PREVIOUS MATCH + // \G + + else if (unit.bk && (unit.ch == 'G') && + syntax.get(RESyntax.RE_STRING_ANCHORS)) { + addToken(currentToken); + currentToken = new RETokenEndOfPreviousMatch(subIndex); } // NON-SPECIAL CHARACTER (or escape to make literal) @@ -966,6 +1050,7 @@ public class RE extends REToken { else { // not a special character addToken(currentToken); currentToken = new RETokenChar(subIndex,unit.ch,insens); + if (insensUSASCII) currentToken.unicodeAware = false; } } // end while @@ -1006,6 +1091,7 @@ public class RE extends REToken { throws REException { boolean insens = ((cflags & REG_ICASE) > 0); + boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); Vector options = new Vector(); Vector addition = new Vector(); boolean additionAndAppeared = false; @@ -1035,7 +1121,9 @@ public class RE extends REToken { if ((ch == '-') && (lastCharIsSet)) { if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); if ((ch = pattern[index]) == ']') { - options.addElement(new RETokenChar(subIndex,lastChar,insens)); + RETokenChar t = new RETokenChar(subIndex,lastChar,insens); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); lastChar = '-'; } else { if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) { @@ -1045,7 +1133,9 @@ public class RE extends REToken { ch = ce.ch; index = index + ce.len - 1; } - options.addElement(new RETokenRange(subIndex,lastChar,ch,insens)); + RETokenRange t = new RETokenRange(subIndex,lastChar,ch,insens); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); lastChar = 0; lastCharIsSet = false; index++; } @@ -1088,12 +1178,20 @@ public class RE extends REToken { asciiEsc = ce.ch; asciiEscIsSet = true; index = index - 1 + ce.len - 1; } - if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); + if (lastCharIsSet) { + RETokenChar t = new RETokenChar(subIndex,lastChar,insens); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); + } if (posixID != -1) { - options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate)); + RETokenPOSIX t = new RETokenPOSIX(subIndex,posixID,insens,negate); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); } else if (np != null) { - options.addElement(getRETokenNamedProperty(subIndex,np,insens,index)); + RETokenNamedProperty t = getRETokenNamedProperty(subIndex,np,insens,index); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); } else if (asciiEscIsSet) { lastChar = asciiEsc; lastCharIsSet = true; } else { @@ -1104,8 +1202,11 @@ public class RE extends REToken { StringBuffer posixSet = new StringBuffer(); index = getPosixSet(pattern,index+1,posixSet); int posixId = RETokenPOSIX.intValue(posixSet.toString()); - if (posixId != -1) - options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false)); + if (posixId != -1) { + RETokenPOSIX t = new RETokenPOSIX(subIndex,posixId,insens,false); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); + } } else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) { ParseCharClassResult result = parseCharClass( subIndex, pattern, index, pLength, cflags, syntax, 0); @@ -1158,14 +1259,22 @@ public class RE extends REToken { result.index: result.index - 1); } } else { - if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); + if (lastCharIsSet) { + RETokenChar t = new RETokenChar(subIndex,lastChar,insens); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); + } lastChar = ch; lastCharIsSet = true; } if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index); } // while in list // Out of list, index is one past ']' - if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens)); + if (lastCharIsSet) { + RETokenChar t = new RETokenChar(subIndex,lastChar,insens); + if (insensUSASCII) t.unicodeAware = false; + options.addElement(t); + } ParseCharClassResult result = new ParseCharClassResult(); // Create a new RETokenOneOf @@ -1396,11 +1505,10 @@ public class RE extends REToken { return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS); REMatch m = new REMatch(numSubs, index, eflags); if (firstToken.match(input, m)) { - while (m != null) { + if (m != null) { if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) { return true; } - m = m.next; } } return false; @@ -1508,17 +1616,27 @@ public class RE extends REToken { } /* Implements abstract method REToken.match() */ - boolean match(CharIndexed input, REMatch mymatch) { + boolean match(CharIndexed input, REMatch mymatch) { if (firstToken == null) { return next(input, mymatch); } // Note the start of this subexpression - mymatch.start[subIndex] = mymatch.index; + mymatch.start1[subIndex] = mymatch.index; return firstToken.match(input, mymatch); } - + + REMatch findMatch(CharIndexed input, REMatch mymatch) { + if (mymatch.backtrackStack == null) + mymatch.backtrackStack = new BacktrackStack(); + boolean b = match(input, mymatch); + if (b) { + return mymatch; + } + return null; + } + /** * Returns the first match found in the input. If no match is found, * null is returned. @@ -1602,6 +1720,7 @@ public class RE extends REToken { */ best.end[0] = best.index; best.finish(input); + input.setLastMatch(best); return best; } } @@ -1942,19 +2061,34 @@ public class RE extends REToken { } void dump(StringBuffer os) { - os.append('('); + os.append("(?#startRE subIndex=" + subIndex + ")"); if (subIndex == 0) os.append("?:"); if (firstToken != null) firstToken.dumpAll(os); - os.append(')'); + if (subIndex == 0) + os.append(")"); + os.append("(?#endRE subIndex=" + subIndex + ")"); } // Cast input appropriately or throw exception - private static CharIndexed makeCharIndexed(Object input, int index) { - // We could let a String fall through to final input, but since - // it's the most likely input type, we check it first. - if (input instanceof String) + // This method was originally a private method, but has been made + // public because java.util.regex.Matcher uses this. + public static CharIndexed makeCharIndexed(Object input, int index) { + // The case where input is already a CharIndexed is supposed + // be the most likely because this is the case with + // java.util.regex.Matcher. + // We could let a String or a CharSequence fall through + // to final input, but since it'a very likely input type, + // we check it first. + if (input instanceof CharIndexed) { + CharIndexed ci = (CharIndexed) input; + ci.setAnchor(index); + return ci; + } + else if (input instanceof CharSequence) + return new CharIndexedCharSequence((CharSequence) input,index); + else if (input instanceof String) return new CharIndexedString((String) input,index); else if (input instanceof char[]) return new CharIndexedCharArray((char[]) input,index); @@ -1962,8 +2096,6 @@ public class RE extends REToken { return new CharIndexedStringBuffer((StringBuffer) input,index); else if (input instanceof InputStream) return new CharIndexedInputStream((InputStream) input,index); - else if (input instanceof CharIndexed) - return (CharIndexed) input; // do we lose index info? else return new CharIndexedString(input.toString(), index); } diff --git a/libjava/classpath/gnu/regexp/REMatch.java b/libjava/classpath/gnu/regexp/REMatch.java index 91a3c02..140a9c4 100644 --- a/libjava/classpath/gnu/regexp/REMatch.java +++ b/libjava/classpath/gnu/regexp/REMatch.java @@ -49,6 +49,7 @@ import java.io.Serializable; */ public final class REMatch implements Serializable, Cloneable { private String matchedText; + private CharIndexed matchedCharIndexed; // These variables are package scope for fast access within the engine int eflags; // execution flags this match was made using @@ -64,20 +65,28 @@ public final class REMatch implements Serializable, Cloneable { // Package scope; used by RE. int index; // used while matching to mark current match position in input + // start1[i] is set when the i-th subexp starts. And start1[i] is copied + // to start[i] when the i-th subexp ends. So start[i] keeps the previously + // assigned value while the i-th subexp is being processed. This makes + // backreference to the i-th subexp within the i-th subexp possible. int[] start; // start positions (relative to offset) for each (sub)exp. + int[] start1; // start positions (relative to offset) for each (sub)exp. int[] end; // end positions for the same - REMatch next; // other possibility (to avoid having to use arrays) + // start[i] == -1 or end[i] == -1 means that the start/end position is void. + // start[i] == p or end[i] == p where p < 0 and p != -1 means that + // the actual start/end position is (p+1). Start/end positions may + // become negative when the subexpression is in a RETokenLookBehind. boolean empty; // empty string matched. This flag is used only within // RETokenRepeated. - int matchFlags; // flags passed to match methods - static final int MF_FIND_ALL = 0x01; + + BacktrackStack backtrackStack; public Object clone() { try { REMatch copy = (REMatch) super.clone(); - copy.next = null; copy.start = (int[]) start.clone(); + copy.start1 = (int[]) start1.clone(); copy.end = (int[]) end.clone(); return copy; @@ -88,14 +97,15 @@ public final class REMatch implements Serializable, Cloneable { void assignFrom(REMatch other) { start = other.start; + start1 = other.start1; end = other.end; index = other.index; - // need to deep clone? - next = other.next; + backtrackStack = other.backtrackStack; } REMatch(int subs, int anchor, int eflags) { start = new int[subs+1]; + start1 = new int[subs+1]; end = new int[subs+1]; this.anchor = anchor; this.eflags = eflags; @@ -109,6 +119,7 @@ public final class REMatch implements Serializable, Cloneable { for (i = 0; i < end[0]; i++) sb.append(text.charAt(i)); matchedText = sb.toString(); + matchedCharIndexed = text; for (i = 0; i < start.length; i++) { // If any subexpressions didn't terminate, they don't count // TODO check if this code ever gets hit @@ -117,7 +128,7 @@ public final class REMatch implements Serializable, Cloneable { end[i] = -1; } } - next = null; // cut off alternates + backtrackStack = null; } /** Clears the current match and moves the offset to the new index. */ @@ -125,9 +136,9 @@ public final class REMatch implements Serializable, Cloneable { offset = index; this.index = 0; for (int i = 0; i < start.length; i++) { - start[i] = end[i] = -1; + start[i] = start1[i] = end[i] = -1; } - next = null; // cut off alternates + backtrackStack = null; } /** @@ -184,7 +195,19 @@ public final class REMatch implements Serializable, Cloneable { if ((sub >= start.length) || sub < 0) throw new IndexOutOfBoundsException("No group " + sub); if (start[sub] == -1) return null; - return (matchedText.substring(start[sub],end[sub])); + if (start[sub] >= 0 && end[sub] <= matchedText.length()) + return (matchedText.substring(start[sub],end[sub])); + else { + // This case occurs with RETokenLookAhead or RETokenLookBehind. + StringBuffer sb = new StringBuffer(); + int s = start[sub]; + int e = end[sub]; + if (s < 0) s += 1; + if (e < 0) e += 1; + for (int i = start[0] + s; i < start[0] + e; i++) + sb.append(matchedCharIndexed.charAt(i)); + return sb.toString(); + } } /** @@ -198,7 +221,8 @@ public final class REMatch implements Serializable, Cloneable { public int getSubStartIndex(int sub) { if (sub >= start.length) return -1; int x = start[sub]; - return (x == -1) ? x : offset + x; + return (x == -1) ? x : + (x >= 0) ? offset + x : offset + x + 1; } /** @@ -212,7 +236,8 @@ public final class REMatch implements Serializable, Cloneable { public int getStartIndex(int sub) { if (sub >= start.length) return -1; int x = start[sub]; - return (x == -1) ? x : offset + x; + return (x == -1) ? x : + (x >= 0) ? offset + x : offset + x + 1; } /** @@ -226,7 +251,8 @@ public final class REMatch implements Serializable, Cloneable { public int getSubEndIndex(int sub) { if (sub >= start.length) return -1; int x = end[sub]; - return (x == -1) ? x : offset + x; + return (x == -1) ? x : + (x >= 0) ? offset + x : offset + x + 1; } /** @@ -239,7 +265,8 @@ public final class REMatch implements Serializable, Cloneable { public int getEndIndex(int sub) { if (sub >= start.length) return -1; int x = end[sub]; - return (x == -1) ? x : offset + x; + return (x == -1) ? x : + (x >= 0) ? offset + x : offset + x + 1; } /** @@ -279,41 +306,19 @@ public final class REMatch implements Serializable, Cloneable { return output.toString(); } - static class REMatchList { - REMatch head; - REMatch tail; - REMatchList() { - head = tail = null; - } - /* Not used now. But we may need this some day? - void addHead(REMatch newone) { - if (head == null) { - head = newone; - tail = newone; - while (tail.next != null) { - tail = tail.next; - } - } - else { - REMatch tmp = newone; - while (tmp.next != null) tmp = tmp.next; - tmp.next = head; - head = newone; - } - } - */ - void addTail(REMatch newone) { - if (head == null) { - head = newone; - tail = newone; - } - else { - tail.next = newone; - } - while (tail.next != null) { - tail = tail.next; - } +/* The following are used for debugging purpose + static String d(REMatch m) { + if (m == null) return "null"; + else return "[" + m.index + "]"; + } + + String substringUptoIndex(CharIndexed input) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < index; i++) { + sb.append(input.charAt(i)); } + return sb.toString(); } +*/ } diff --git a/libjava/classpath/gnu/regexp/REToken.java b/libjava/classpath/gnu/regexp/REToken.java index 5f4659b..f2abc02 100644 --- a/libjava/classpath/gnu/regexp/REToken.java +++ b/libjava/classpath/gnu/regexp/REToken.java @@ -1,5 +1,5 @@ /* gnu/regexp/REToken.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -43,6 +43,7 @@ abstract class REToken implements Serializable, Cloneable { protected REToken next = null; protected REToken uncle = null; protected int subIndex; + protected boolean unicodeAware = true; public Object clone() { try { @@ -70,30 +71,119 @@ abstract class REToken implements Serializable, Cloneable { } /** Returns true if the match succeeded, false if it failed. */ - abstract boolean match(CharIndexed input, REMatch mymatch); + boolean match(CharIndexed input, REMatch mymatch) { + REMatch m = matchThis(input, mymatch); + if (m == null) return false; + if (next(input, m)) { + mymatch.assignFrom(m); + return true; + } + return false; + } + + /** Returns true if the match succeeded, false if it failed. + * The matching is done against this REToken only. Chained + * tokens are not checked. + * This method is used to define the default match method. + * Simple subclasses of REToken, for example, such that + * matches only one character, should implement this method. + * Then the default match method will work. But complicated + * subclasses of REToken, which needs a special match method, + * do not have to implement this method. + */ + REMatch matchThis(CharIndexed input, REMatch mymatch) { + throw new UnsupportedOperationException( + "This REToken does not have a matchThis method"); + } /** Returns true if the rest of the tokens match, false if they fail. */ protected boolean next(CharIndexed input, REMatch mymatch) { - if (next == null) { - if (uncle == null) { - return true; - } else { - return uncle.match(input, mymatch); - } - } else { - return next.match(input, mymatch); - } + REToken nextToken = getNext(); + if (nextToken == null) return true; + return nextToken.match(input, mymatch); } - + + /** Returns the next REToken chained to this REToken. */ + REToken getNext() { + return (next != null ? next : uncle); + } + + /** Finds a match at the position specified by the given REMatch. + * If necessary, adds a BacktrackStack.Backtrack object to backtrackStack + * of the REmatch found this time so that another possible match + * may be found when backtrack is called. + * By default, nothing is added to the backtrackStack. + * @param CharIndexed input Input character sequence. + * @param mymatch Position at which a match should be found + * @return REMatch object if a match was found, null otherwise. + */ + REMatch findMatch(CharIndexed input, REMatch mymatch) { + boolean b = match(input, mymatch); + if (b) return mymatch; + return null; + } + + boolean returnsFixedLengthMatches() { + return false; + } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + throw new UnsupportedOperationException( + "This token does not support findFixedLengthMatches"); + } + + /** + * Backtrack to another possibility. + * Ordinary REToken cannot do anything if this method is called. + */ + REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) { + throw new IllegalStateException("This token cannot be backtracked to"); + } + boolean chain(REToken token) { next = token; return true; // Token was accepted } - abstract void dump(StringBuffer os); + abstract void dump(StringBuffer os); void dumpAll(StringBuffer os) { dump(os); if (next != null) next.dumpAll(os); } + + public String toString() { + StringBuffer os = new StringBuffer(); + dump(os); + return os.toString(); + } + + /** + * Converts the character argument to lowercase. + * @param ch the character to be converted. + * @param unicodeAware If true, use java.lang.Character#toLowerCase; + * otherwise, only US-ASCII charactes can be converted. + * @return the lowercase equivalent of the character, if any; + * otherwise, the character itself. + */ + public static char toLowerCase(char ch, boolean unicodeAware) { + if (unicodeAware) return Character.toLowerCase(ch); + if (ch >= 'A' && ch <= 'Z') return (char)(ch + 'a' - 'A'); + return ch; + } + + /** + * Converts the character argument to uppercase. + * @param ch the character to be converted. + * @param unicodeAware If true, use java.lang.Character#toUpperCase; + * otherwise, only US-ASCII charactes can be converted. + * @return the uppercase equivalent of the character, if any; + * otherwise, the character itself. + */ + public static char toUpperCase(char ch, boolean unicodeAware) { + if (unicodeAware) return Character.toUpperCase(ch); + if (ch >= 'a' && ch <= 'z') return (char)(ch + 'A' - 'a'); + return ch; + } + } diff --git a/libjava/classpath/gnu/regexp/RETokenAny.java b/libjava/classpath/gnu/regexp/RETokenAny.java index 2b0967a..a37d956 100644 --- a/libjava/classpath/gnu/regexp/RETokenAny.java +++ b/libjava/classpath/gnu/regexp/RETokenAny.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenAny.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -59,15 +59,37 @@ final class RETokenAny extends REToken { return 1; } - boolean match(CharIndexed input, REMatch mymatch) { - char ch = input.charAt(mymatch.index); - if ((ch == CharIndexed.OUT_OF_BOUNDS) - || (!newline && (ch == '\n')) - || (matchNull && (ch == 0))) { - return false; + REMatch matchThis(CharIndexed input, REMatch mymatch) { + char ch = input.charAt(mymatch.index); + boolean retval = matchOneChar(ch); + if (retval) { + ++mymatch.index; + return mymatch; + } + return null; } - ++mymatch.index; - return next(input, mymatch); + + boolean matchOneChar(char ch) { + if ((ch == CharIndexed.OUT_OF_BOUNDS) + || (!newline && (ch == '\n')) + || (matchNull && (ch == 0))) { + return false; + } + return true; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + int index = mymatch.index; + int numRepeats = 0; + while (true) { + if (numRepeats >= max) break; + char ch = input.charAt(index++); + if (! matchOneChar(ch)) break; + numRepeats++; + } + return numRepeats; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenBackRef.java b/libjava/classpath/gnu/regexp/RETokenBackRef.java index 060a6cf..25ef952 100644 --- a/libjava/classpath/gnu/regexp/RETokenBackRef.java +++ b/libjava/classpath/gnu/regexp/RETokenBackRef.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenBackRef.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -50,30 +50,32 @@ final class RETokenBackRef extends REToken { // should implement getMinimumLength() -- any ideas? - boolean match(CharIndexed input, REMatch mymatch) { - if (num >= mymatch.start.length) return false; - if (num >= mymatch.end.length) return false; + REMatch matchThis(CharIndexed input, REMatch mymatch) { + if (num >= mymatch.start.length) return null; + if (num >= mymatch.end.length) return null; int b,e; b = mymatch.start[num]; e = mymatch.end[num]; - if ((b==-1)||(e==-1)) return false; // this shouldn't happen, but... + if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but... + if (b < 0) b += 1; + if (e < 0) e += 1; for (int i=b; i<e; i++) { char c1 = input.charAt(mymatch.index+i-b); char c2 = input.charAt(i); if (c1 != c2) { if (insens) { - if (c1 != Character.toLowerCase(c2) && - c1 != Character.toUpperCase(c2)) { - return false; + if (c1 != toLowerCase(c2, unicodeAware) && + c1 != toUpperCase(c2, unicodeAware)) { + return null; } } else { - return false; + return null; } } } mymatch.index += e-b; - return next(input, mymatch); + return mymatch; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenChar.java b/libjava/classpath/gnu/regexp/RETokenChar.java index 5c087c6..1b3a748 100644 --- a/libjava/classpath/gnu/regexp/RETokenChar.java +++ b/libjava/classpath/gnu/regexp/RETokenChar.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenChar.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -44,8 +44,9 @@ final class RETokenChar extends REToken { RETokenChar(int subIndex, char c, boolean ins) { super(subIndex); + insens = ins; ch = new char [1]; - ch[0] = (insens = ins) ? Character.toLowerCase(c) : c; + ch[0] = c; } int getMinimumLength() { @@ -56,18 +57,50 @@ final class RETokenChar extends REToken { return ch.length; } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { + int z = ch.length; + if (matchOneString(input, mymatch.index)) { + mymatch.index += z; + return mymatch; + } + return null; + } + + boolean matchOneString(CharIndexed input, int index) { int z = ch.length; char c; for (int i=0; i<z; i++) { - c = input.charAt(mymatch.index+i); - if (( (insens) ? Character.toLowerCase(c) : c ) != ch[i]) { + c = input.charAt(index+i); + if (! charEquals(c, ch[i])) { return false; } } - mymatch.index += z; + return true; + } - return next(input, mymatch); + private boolean charEquals(char c1, char c2) { + if (c1 == c2) return true; + if (! insens) return false; + if (toLowerCase(c1, unicodeAware) == c2) return true; + if (toUpperCase(c1, unicodeAware) == c2) return true; + return false; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + int index = mymatch.index; + int numRepeats = 0; + int z = ch.length; + while (true) { + if (numRepeats >= max) break; + if (matchOneString(input, index)) { + index += z; + numRepeats++; + } + else break; + } + return numRepeats; } // Overrides REToken.chain() to optimize for strings diff --git a/libjava/classpath/gnu/regexp/RETokenEnd.java b/libjava/classpath/gnu/regexp/RETokenEnd.java index 788a964..deb2fc5 100644 --- a/libjava/classpath/gnu/regexp/RETokenEnd.java +++ b/libjava/classpath/gnu/regexp/RETokenEnd.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenEnd.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -53,24 +53,34 @@ final class RETokenEnd extends REToken { return 0; } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { char ch = input.charAt(mymatch.index); if (ch == CharIndexed.OUT_OF_BOUNDS) return ((mymatch.eflags & RE.REG_NOTEOL)>0) ? - false : next(input, mymatch); + null : mymatch; if (newline != null) { char z; int i = 0; // position in newline do { z = newline.charAt(i); - if (ch != z) return false; + if (ch != z) return null; ++i; ch = input.charAt(mymatch.index + i); } while (i < newline.length()); - return next(input, mymatch); + return mymatch; } - return false; + return null; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + REMatch m = (REMatch) mymatch.clone(); + REToken tk = (REToken) this.clone(); + tk.chain(null); + if (tk.match(input, m)) return max; + else return 0; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java b/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java new file mode 100644 index 0000000..167d10b --- /dev/null +++ b/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java @@ -0,0 +1,72 @@ +/* gnu/regexp/RETokenEndOfPreviousMatch.java + Copyright (C) 2006 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.regexp; + +class RETokenEndOfPreviousMatch extends RETokenStart { + + RETokenEndOfPreviousMatch(int subIndex) { + super(subIndex, null); + } + + int getMaximumLength() { + return 0; + } + + REMatch matchThis(CharIndexed input, REMatch mymatch) { + REMatch lastMatch = input.getLastMatch(); + if (lastMatch == null) return super.matchThis(input, mymatch); + if (input.getAnchor()+mymatch.index == + lastMatch.anchor+lastMatch.index) { + return mymatch; + } + else { + return null; + } + } + + boolean returnsFixedLengthmatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + if (matchThis(input, mymatch) != null) return max; + else return 0; + } + + void dump(StringBuffer os) { + os.append("\\G"); + } +} diff --git a/libjava/classpath/gnu/regexp/RETokenEndSub.java b/libjava/classpath/gnu/regexp/RETokenEndSub.java index fe2969d..fca01c7 100644 --- a/libjava/classpath/gnu/regexp/RETokenEndSub.java +++ b/libjava/classpath/gnu/regexp/RETokenEndSub.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenEndSub.java - Copyright (C) 2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -46,12 +46,21 @@ final class RETokenEndSub extends REToken { return 0; } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { + mymatch.start[subIndex] = mymatch.start1[subIndex]; mymatch.end[subIndex] = mymatch.index; - return next(input, mymatch); + return mymatch; } - + + REMatch findMatch(CharIndexed input, REMatch mymatch) { + mymatch.start[subIndex] = mymatch.start1[subIndex]; + mymatch.end[subIndex] = mymatch.index; + return super.findMatch(input, mymatch); + } + void dump(StringBuffer os) { // handled by RE + // But add something for debugging. + os.append("(?#RETokenEndSub subIndex=" + subIndex + ")"); } } diff --git a/libjava/classpath/gnu/regexp/RETokenIndependent.java b/libjava/classpath/gnu/regexp/RETokenIndependent.java index 2eb1472..8bb95c6 100644 --- a/libjava/classpath/gnu/regexp/RETokenIndependent.java +++ b/libjava/classpath/gnu/regexp/RETokenIndependent.java @@ -57,14 +57,16 @@ final class RETokenIndependent extends REToken return re.getMaximumLength(); } - boolean match(CharIndexed input, REMatch mymatch) + REMatch matchThis(CharIndexed input, REMatch mymatch) { - if (re.match(input, mymatch)) { + boolean b = re.match(input, mymatch); + if (b) { // Once we have found a match, we do not see other possible matches. - mymatch.next = null; - return next(input, mymatch); + if (mymatch.backtrackStack != null) mymatch.backtrackStack.clear(); + return mymatch; + } - return false; + return null; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenLookAhead.java b/libjava/classpath/gnu/regexp/RETokenLookAhead.java index b44dfa5..009872c 100644 --- a/libjava/classpath/gnu/regexp/RETokenLookAhead.java +++ b/libjava/classpath/gnu/regexp/RETokenLookAhead.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenLookAhead.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -56,28 +56,17 @@ final class RETokenLookAhead extends REToken return 0; } - boolean match(CharIndexed input, REMatch mymatch) + REMatch matchThis(CharIndexed input, REMatch mymatch) { REMatch trymatch = (REMatch)mymatch.clone(); - REMatch trymatch1 = (REMatch)mymatch.clone(); - REMatch newMatch = null; if (re.match(input, trymatch)) { - if (negative) return false; - if (next(input, trymatch1)) - newMatch = trymatch1; + if (negative) return null; + trymatch.index = mymatch.index; + return trymatch; } - - if (newMatch != null) { - if (negative) return false; - //else - mymatch.assignFrom(newMatch); - return true; - } - else { // no match - if (negative) - return next(input, mymatch); - //else - return false; + else { + if (negative) return mymatch; + return null; } } diff --git a/libjava/classpath/gnu/regexp/RETokenLookBehind.java b/libjava/classpath/gnu/regexp/RETokenLookBehind.java index a6c1b34..8311d1a 100644 --- a/libjava/classpath/gnu/regexp/RETokenLookBehind.java +++ b/libjava/classpath/gnu/regexp/RETokenLookBehind.java @@ -55,35 +55,37 @@ final class RETokenLookBehind extends REToken return 0; } - boolean match(CharIndexed input, REMatch mymatch) + REMatch matchThis(CharIndexed input, REMatch mymatch) { int max = re.getMaximumLength(); CharIndexed behind = input.lookBehind(mymatch.index, max); REMatch trymatch = (REMatch)mymatch.clone(); REMatch trymatch1 = (REMatch)mymatch.clone(); REMatch newMatch = null; - int curIndex = trymatch.index + behind.length() - input.length(); + int diff = behind.length() - input.length(); + int curIndex = trymatch.index + diff; trymatch.index = 0; + trymatch.offset = 0; RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex); REToken re1 = (REToken) re.clone(); re1.chain(stopper); if (re1.match(behind, trymatch)) { - if (negative) return false; - if (next(input, trymatch1)) - newMatch = trymatch1; + if (negative) return null; + for (int i = 0; i < trymatch.start.length; i++) { + if (trymatch.start[i] != -1 && trymatch.end[i] != -1) { + trymatch.start[i] -= diff; + if (trymatch.start[i] < 0) trymatch.start[i] -= 1; + trymatch.end[i] -= diff; + if (trymatch.end[i] < 0) trymatch.end[i] -= 1; + } + } + trymatch.index = mymatch.index; + trymatch.offset = mymatch.offset; + return trymatch; } - - if (newMatch != null) { - if (negative) return false; - //else - mymatch.assignFrom(newMatch); - return true; - } - else { // no match - if (negative) - return next(input, mymatch); - //else - return false; + else { + if (negative) return mymatch; + return null; } } @@ -105,8 +107,8 @@ final class RETokenLookBehind extends REToken this.index = index; } - boolean match(CharIndexed input, REMatch mymatch) { - return index == mymatch.index; + REMatch matchThis(CharIndexed input, REMatch mymatch) { + return (index == mymatch.index ? mymatch : null); } void dump(StringBuffer os) {} diff --git a/libjava/classpath/gnu/regexp/RETokenNamedProperty.java b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java index 13c1e41..6147e87 100644 --- a/libjava/classpath/gnu/regexp/RETokenNamedProperty.java +++ b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java @@ -105,24 +105,43 @@ final class RETokenNamedProperty extends REToken { return 1; } - boolean match(CharIndexed input, REMatch mymatch) { - char ch = input.charAt(mymatch.index); + REMatch matchThis(CharIndexed input, REMatch mymatch) { + char ch = input.charAt(mymatch.index); + boolean retval = matchOneChar(ch); + if (retval) { + ++mymatch.index; + return mymatch; + } + return null; + } + + private boolean matchOneChar(char ch) { if (ch == CharIndexed.OUT_OF_BOUNDS) return false; boolean retval = handler.includes(ch); if (insens) { retval = retval || - handler.includes(Character.toUpperCase(ch)) || - handler.includes(Character.toLowerCase(ch)); + handler.includes(toUpperCase(ch, unicodeAware)) || + handler.includes(toLowerCase(ch, unicodeAware)); } if (negate) retval = !retval; - if (retval) { - ++mymatch.index; - return next(input, mymatch); + return retval; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + int index = mymatch.index; + int numRepeats = 0; + while (true) { + if (numRepeats >= max) break; + char ch = input.charAt(index++); + if (! matchOneChar(ch)) break; + numRepeats++; } - else return false; + return numRepeats; } void dump(StringBuffer os) { @@ -246,9 +265,6 @@ final class RETokenNamedProperty extends REToken { private static class POSIXHandler extends Handler { private RETokenPOSIX retoken; - private REMatch mymatch = new REMatch(0,0,0); - private char[] chars = new char[1]; - private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0); public POSIXHandler(String name) { int posixId = RETokenPOSIX.intValue(name.toLowerCase()); if (posixId != -1) @@ -257,9 +273,7 @@ final class RETokenNamedProperty extends REToken { throw new RuntimeException("Unknown posix ID: " + name); } public boolean includes(char c) { - chars[0] = c; - mymatch.index = 0; - return retoken.match(ca, mymatch); + return retoken.matchOneChar(c); } } diff --git a/libjava/classpath/gnu/regexp/RETokenOneOf.java b/libjava/classpath/gnu/regexp/RETokenOneOf.java index 260bc4b..f747d59 100644 --- a/libjava/classpath/gnu/regexp/RETokenOneOf.java +++ b/libjava/classpath/gnu/regexp/RETokenOneOf.java @@ -42,6 +42,9 @@ import java.util.Stack; final class RETokenOneOf extends REToken { private Vector options; private boolean negative; + // True if this RETokenOneOf is supposed to match only one character, + // which is typically the case of a character class expression. + private boolean matchesOneChar; private Vector addition; // This Vector addition is used to store nested character classes. @@ -76,12 +79,14 @@ final class RETokenOneOf extends REToken { this.negative = negative; for (int i = 0; i < optionsStr.length(); i++) options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens)); + matchesOneChar = true; } RETokenOneOf(int subIndex, Vector options, boolean negative) { super(subIndex); this.options = options; this.negative = negative; + matchesOneChar = negative; } RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) { @@ -89,12 +94,11 @@ final class RETokenOneOf extends REToken { this.options = options; this.addition = addition; this.negative = negative; + matchesOneChar = (negative || addition != null); } int getMinimumLength() { - // (negative || addition != null) occurs when this token originates from - // character class expression. - if (negative || addition != null) return 1; + if (matchesOneChar) return 1; int min = Integer.MAX_VALUE; int x; for (int i=0; i < options.size(); i++) { @@ -105,9 +109,7 @@ final class RETokenOneOf extends REToken { } int getMaximumLength() { - // (negative || addition != null) occurs when this token originates from - // character class expression. - if (negative || addition != null) return 1; + if (matchesOneChar) return 1; int max = 0; int x; for (int i=0; i < options.size(); i++) { @@ -118,6 +120,11 @@ final class RETokenOneOf extends REToken { } boolean match(CharIndexed input, REMatch mymatch) { + if (matchesOneChar) return matchOneChar(input, mymatch); + else return matchOneRE(input, mymatch); + } + + boolean matchOneChar(CharIndexed input, REMatch mymatch) { REMatch tryMatch; boolean tryOnly; if (addition == null) { @@ -187,40 +194,80 @@ final class RETokenOneOf extends REToken { } private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) { - boolean stopMatchingIfSatisfied = - (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0; - REMatch.REMatchList newMatch = new REMatch.REMatchList(); REToken tk; for (int i=0; i < options.size(); i++) { - // In order that the backtracking can work, - // each option must be chained to the next token. - // But the chain method has some side effect, so - // we use clones. - tk = (REToken)((REToken) options.elementAt(i)).clone(); - if (! tryOnly) { - tk.chain(this.next); - tk.setUncle(this.uncle); - tk.subIndex = this.subIndex; - } + tk = (REToken) options.elementAt(i); REMatch tryMatch = (REMatch) mymatch.clone(); if (tk.match(input, tryMatch)) { // match was successful if (tryOnly) return true; - newMatch.addTail(tryMatch); - if (stopMatchingIfSatisfied) break; - } // is a match - } // try next option - if (tryOnly) return false; - - if (newMatch.head != null) { - // set contents of mymatch equal to newMatch + if (next(input, tryMatch)) { + mymatch.assignFrom(tryMatch); + return true; + } + } + } + return false; + } - // try each one that matched - mymatch.assignFrom(newMatch.head); + private boolean matchOneRE(CharIndexed input, REMatch mymatch) { + REMatch newMatch = findMatch(input, mymatch); + if (newMatch != null) { + mymatch.assignFrom(newMatch); return true; - } else { - return false; } - } + return false; + } + + REMatch findMatch(CharIndexed input, REMatch mymatch) { + if (matchesOneChar) return super.findMatch(input, mymatch); + return findMatch(input, mymatch, 0); + } + + REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) { + return findMatch(input, mymatch, ((Integer)param).intValue()); + } + + private REMatch findMatch(CharIndexed input, REMatch mymatch, int optionIndex) { + for (int i = optionIndex; i < options.size(); i++) { + REToken tk = (REToken) options.elementAt(i); + tk = (REToken) tk.clone(); + tk.chain(getNext()); + REMatch tryMatch = (REMatch) mymatch.clone(); + if (tryMatch.backtrackStack == null) { + tryMatch.backtrackStack = new BacktrackStack(); + } + boolean stackPushed = false; + if (i + 1 < options.size()) { + tryMatch.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, new Integer(i + 1))); + stackPushed = true; + } + boolean b = tk.match(input, tryMatch); + if (b) { + return tryMatch; + } + if (stackPushed) tryMatch.backtrackStack.pop(); + } + return null; + } + + boolean returnsFixedLengthMatches() { return matchesOneChar; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + if (!matchesOneChar) + return super.findFixedLengthMatches(input, mymatch, max); + int numRepeats = 0; + REMatch m = (REMatch) mymatch.clone(); + REToken tk = (REToken) this.clone(); + tk.chain(null); + while (true) { + if (numRepeats >= max) break; + m = tk.findMatch(input, m); + if (m == null) break; + numRepeats++; + } + return numRepeats; + } void dump(StringBuffer os) { os.append(negative ? "[^" : "(?:"); diff --git a/libjava/classpath/gnu/regexp/RETokenPOSIX.java b/libjava/classpath/gnu/regexp/RETokenPOSIX.java index 4182c6f..dbea98a 100644 --- a/libjava/classpath/gnu/regexp/RETokenPOSIX.java +++ b/libjava/classpath/gnu/regexp/RETokenPOSIX.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenPOSIX.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -85,8 +85,17 @@ final class RETokenPOSIX extends REToken { return 1; } - boolean match(CharIndexed input, REMatch mymatch) { - char ch = input.charAt(mymatch.index); + REMatch matchThis(CharIndexed input, REMatch mymatch) { + char ch = input.charAt(mymatch.index); + boolean retval = matchOneChar(ch); + if (retval) { + ++mymatch.index; + return mymatch; + } + return null; + } + + boolean matchOneChar(char ch) { if (ch == CharIndexed.OUT_OF_BOUNDS) return false; @@ -134,11 +143,21 @@ final class RETokenPOSIX extends REToken { } if (negated) retval = !retval; - if (retval) { - ++mymatch.index; - return next(input, mymatch); - } - else return false; + return retval; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + int index = mymatch.index; + int numRepeats = 0; + while (true) { + if (numRepeats >= max) break; + char ch = input.charAt(index++); + if (! matchOneChar(ch)) break; + numRepeats++; + } + return numRepeats; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenRange.java b/libjava/classpath/gnu/regexp/RETokenRange.java index 8a1ac86..9d3da32 100644 --- a/libjava/classpath/gnu/regexp/RETokenRange.java +++ b/libjava/classpath/gnu/regexp/RETokenRange.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenRange.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -56,23 +56,41 @@ final class RETokenRange extends REToken { return 1; } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { char c = input.charAt(mymatch.index); + if (matchOneChar(c)) { + ++mymatch.index; + return mymatch; + } + return null; + } + + boolean matchOneChar(char c) { if (c == CharIndexed.OUT_OF_BOUNDS) return false; boolean matches = (c >= lo) && (c <= hi); if (! matches && insens) { - char c1 = Character.toLowerCase(c); + char c1 = toLowerCase(c, unicodeAware); matches = (c1 >= lo) && (c1 <= hi); if (!matches) { - c1 = Character.toUpperCase(c); + c1 = toUpperCase(c, unicodeAware); matches = (c1 >= lo) && (c1 <= hi); } } - if (matches) { - ++mymatch.index; - return next(input, mymatch); + return matches; + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + int index = mymatch.index; + int numRepeats = 0; + while (true) { + if (numRepeats >= max) break; + char ch = input.charAt(index++); + if (! matchOneChar(ch)) break; + numRepeats++; } - return false; + return numRepeats; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java index 2d019c5..1bad4c7 100644 --- a/libjava/classpath/gnu/regexp/RETokenRepeated.java +++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java @@ -38,20 +38,27 @@ exception statement from your version. */ package gnu.regexp; -import java.util.Vector; -import java.util.Arrays; +// import java.util.Vector; +// import java.util.Stack; final class RETokenRepeated extends REToken { private REToken token; private int min,max; private boolean stingy; private boolean possessive; + private int tokenFixedLength; RETokenRepeated(int subIndex, REToken token, int min, int max) { super(subIndex); this.token = token; this.min = min; this.max = max; + if (token.returnsFixedLengthMatches()) { + tokenFixedLength = token.getMaximumLength(); + } + else { + tokenFixedLength = -1; + } } /** Sets the minimal matching mode to true. */ @@ -90,69 +97,235 @@ final class RETokenRepeated extends REToken { return (max * tmax); } - private static REMatch findDoables(REToken tk, - CharIndexed input, REMatch mymatch) { - - REMatch.REMatchList doables = new REMatch.REMatchList(); - - // try next repeat at all possible positions - for (REMatch current = mymatch; - current != null; current = current.next) { - REMatch recurrent = (REMatch) current.clone(); - int origin = recurrent.index; - tk = (REToken) tk.clone(); - tk.next = tk.uncle = null; - recurrent.matchFlags |= REMatch.MF_FIND_ALL; - if (tk.match(input, recurrent)) { - for (REMatch m = recurrent; m != null; m = m.next) { - m.matchFlags &= ~REMatch.MF_FIND_ALL; + // The comment "MUST make a clone" below means that some tests + // failed without doing clone(), + + private static class DoablesFinder { + private REToken tk; + private CharIndexed input; + private REMatch rematch; + private boolean findFirst; + + private DoablesFinder(REToken tk, CharIndexed input, REMatch mymatch) { + this.tk = tk; + this.input = input; + this.rematch = (REMatch) mymatch.clone(); // MUST make a clone + this.rematch.backtrackStack = new BacktrackStack(); + findFirst = true; + } + + private REMatch find() { + int origin = rematch.index; + REMatch rem; + if (findFirst) { + rem = tk.findMatch(input, rematch); + findFirst = false; + } + else { + while (true) { + if (rematch.backtrackStack.empty()) { + rem = null; + break; } - if (recurrent.index == origin) recurrent.empty = true; - // add all items in current to doables array - doables.addTail(recurrent); + BacktrackStack.Backtrack bt = rematch.backtrackStack.pop(); + rem = bt.token.backtrack(bt.input, bt.match, bt.param); + if (rem != null) break; } } - return doables.head; + if (rem == null) return null; + if (rem.index == origin) rem.empty = true; + rematch = rem; + return (REMatch) rem.clone(); // MUST make a clone. + } + + boolean noMore() { + return rematch.backtrackStack.empty(); + } } - // We do need to save every possible point, but the number of clone() - // invocations here is really a killer for performance on non-stingy - // repeat operators. I'm open to suggestions... + REMatch findMatch(CharIndexed input, REMatch mymatch) { + if (tokenFixedLength >= 0) return findMatchFixedLength(input, mymatch); + BacktrackStack stack = new BacktrackStack(); + stack.push(new StackedInfo(input, 0, mymatch, null, null)); + return findMatch(stack); + } - // Hypothetical question: can you have a RE that matches 1 times, - // 3 times, 5 times, but not 2 times or 4 times? Does having - // the subexpression back-reference operator allow that? + REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) { + if (tokenFixedLength >= 0) return backtrackFixedLength(input, mymatch, param); + return findMatch((BacktrackStack)param); + } - boolean match(CharIndexed input, REMatch mymatch) { + private static class StackedInfo extends BacktrackStack.Backtrack { + int numRepeats; + int[] visited; + DoablesFinder finder; + StackedInfo(CharIndexed input, int numRepeats, REMatch match, + int[] visited, DoablesFinder finder) { + super(null, input, match, null); + this.numRepeats = numRepeats; + this.visited = visited; + this.finder = finder; + } + } - boolean stopMatchingIfSatisfied = - (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0; + private REMatch findMatch(BacktrackStack stack) { + // Avoid using recursive calls. + MAIN_LOOP: + while (true) { + + if (stack.empty()) return null; + StackedInfo si = (StackedInfo)(stack.peek()); + CharIndexed input = si.input; + int numRepeats = si.numRepeats; + REMatch mymatch = si.match; + int[] visited = si.visited; + DoablesFinder finder = si.finder; + + if (mymatch.backtrackStack == null) + mymatch.backtrackStack = new BacktrackStack(); + + if (numRepeats >= max) { + stack.pop(); + REMatch m1 = matchRest(input, mymatch); + if (m1 != null) { + if (! stack.empty()) { + m1.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, stack)); + } + return m1; + } + if (stingy) { + continue MAIN_LOOP; + } + return null; + } - REMatch newMatch = matchMinimum(input, mymatch); - if (newMatch == null) return false; + if (finder == null) { + finder = new DoablesFinder(token, input, mymatch); + si.finder = finder; + } - // Array of positions we have already visited - int[] visited = initVisited(); - for (REMatch m = newMatch; m != null; m = m.next) { - visited = addVisited(m.index, visited); + if (numRepeats < min) { + while (true) { + REMatch doable = finder.find(); + if (doable == null) { + if (stack.empty()) return null; + stack.pop(); + continue MAIN_LOOP; + } + if (finder.noMore()) stack.pop(); + int newNumRepeats = (doable.empty ? min : numRepeats + 1); + stack.push(new StackedInfo( + input, newNumRepeats, doable, visited, null)); + continue MAIN_LOOP; + } } - int max1 = decreaseMax(max, min); + if (visited == null) visited = initVisited(); - newMatch = _match(input, newMatch, max1, - stopMatchingIfSatisfied, visited); - if (newMatch != null) { - mymatch.assignFrom(newMatch); - return true; + if (stingy) { + REMatch nextMatch = finder.find(); + if (nextMatch != null && !nextMatch.empty) { + stack.push(new StackedInfo( + input, numRepeats + 1, nextMatch, visited, null)); + } + else { + stack.pop(); + } + REMatch m1 = matchRest(input, mymatch); + if (m1 != null) { + if (!stack.empty()) { + m1.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, stack)); + } + return m1; + } + else { + continue MAIN_LOOP; + } } - return false; - } - private static int decreaseMax(int m, int n) { - if (m == Integer.MAX_VALUE) return m; - return m - n; + visited = addVisited(mymatch.index, visited); + + DO_THIS: + do { + + boolean emptyMatchFound = false; + + DO_ONE_DOABLE: + while (true) { + + REMatch doable = finder.find(); + if (doable == null) { + break DO_THIS; + } + if (doable.empty) emptyMatchFound = true; + + if (!emptyMatchFound) { + int n = doable.index; + if (! visitedContains(n, visited)) { + visited = addVisited(n, visited); + } + else { + continue DO_ONE_DOABLE; + } + stack.push(new StackedInfo( + input, numRepeats + 1, doable, visited, null)); + REMatch m1 = findMatch(stack); + if (possessive) { + return m1; + } + if (m1 != null) { + m1.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, stack)); + return m1; + } + } + else { + REMatch m1 = matchRest(input, doable); + if (possessive) { + return m1; + } + if (m1 != null) { + if (! stack.empty()) { + m1.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, stack)); + } + return m1; + } + } + + } // DO_ONE_DOABLE + + } while (false); // DO_THIS only once; + + if (!stack.empty()) { + stack.pop(); + } + if (possessive) { + stack.clear(); + } + REMatch m1 = matchRest(input, mymatch); + if (m1 != null) { + if (! stack.empty()) { + m1.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, stack)); + } + return m1; + } + + } // MAIN_LOOP } + boolean match(CharIndexed input, REMatch mymatch) { + REMatch m1 = findMatch(input, mymatch); + if (m1 != null) { + mymatch.assignFrom(m1); + return true; + } + return false; + } + // Array visited is an array of character positions we have already // visited. visited[0] is used to store the effective length of the // array. @@ -183,134 +356,55 @@ final class RETokenRepeated extends REToken { return visited; } - private REMatch _match(CharIndexed input, REMatch mymatch, - int max1, boolean stopMatchingIfSatisfied, - int[] visited) { - - if (max1 == 0) { - return matchRest(input, mymatch); - } - max1 = decreaseMax(max1, 1); - - REMatch.REMatchList allResults = new REMatch.REMatchList(); - - // Depth-first search - - for (REMatch cur = mymatch; cur != null; cur = cur.next) { - - REMatch cur1 = (REMatch) cur.clone(); - - if (stingy) { - REMatch results = matchRest(input, cur1); - if (results != null) { - if (stopMatchingIfSatisfied) { - return results; - } - allResults.addTail(results); - } - } - - DO_THIS: - do { - - boolean emptyMatchFound = false; - REMatch doables = findDoables(token, input, cur1); - if (doables == null) break DO_THIS; - if (doables.empty) emptyMatchFound = true; - - if (!emptyMatchFound) { - REMatch.REMatchList list = new REMatch.REMatchList(); - for (REMatch m = doables; m != null; m = m.next) { - REMatch m1 = (REMatch) m.clone(); - int n = m1.index; - if (! visitedContains(n, visited)) { - visited = addVisited(n, visited); - list.addTail(m1); - } - } - if (list.head == null) break DO_THIS; - doables = list.head; - } - - for (REMatch m = doables; m != null; m = m.next) { - if (! emptyMatchFound) { - REMatch m1 = _match(input, m, max1, - stopMatchingIfSatisfied, visited); - if (possessive) return m1; - if (m1 != null) { - if (stopMatchingIfSatisfied) { - return m1; - } - allResults.addTail(m1); - } - } - else { - REMatch m1 = matchRest(input, m); - if (m1 != null) { - if (stopMatchingIfSatisfied) { - return m1; - } - allResults.addTail(m1); - } - } - } - - } while (false); // DO_THIS only once; - - // This point itself is a candidate. - if (!stingy) { - REMatch m2 = matchRest(input, cur1); - if (m2 != null) { - if (stopMatchingIfSatisfied) { - return m2; - } - allResults.addTail(m2); - } - } + private REMatch matchRest(CharIndexed input, final REMatch newMatch) { + if (next(input, newMatch)) { + return newMatch; } - - return allResults.head; + return null; } - private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) { - // Possible positions for the next repeat to match at - REMatch newMatch = mymatch; - - // number of times we've matched so far - int numRepeats = 0; - - while (numRepeats < min) { - REMatch doables = findDoables(token, input, newMatch); - - // if none of the possibilities worked out, - // it means that minimum number of repeats could not be found. - if (doables == null) return null; - - // reassign where the next repeat can match - newMatch = doables; - - // increment how many repeats we've successfully found - ++numRepeats; - - if (newMatch.empty) break; - } - return newMatch; + private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch) { + if (mymatch.backtrackStack == null) + mymatch.backtrackStack = new BacktrackStack(); + int numRepeats = token.findFixedLengthMatches(input, (REMatch)mymatch.clone(), max); + if (numRepeats == Integer.MAX_VALUE) numRepeats = min; + int count = numRepeats - min + 1; + if (count <= 0) return null; + int index = 0; + if (!stingy) index = mymatch.index + (tokenFixedLength * numRepeats); + else index = mymatch.index + (tokenFixedLength * min); + return findMatchFixedLength(input, mymatch, index, count); } - private REMatch matchRest(CharIndexed input, final REMatch newMatch) { - REMatch current, single; - REMatch.REMatchList doneIndex = new REMatch.REMatchList(); - // Test all possible matches for this number of repeats - for (current = newMatch; current != null; current = current.next) { - // clone() separates a single match from the chain - single = (REMatch) current.clone(); - if (next(input, single)) { - // chain results to doneIndex - doneIndex.addTail(single); + private REMatch backtrackFixedLength(CharIndexed input, REMatch mymatch, + Object param) { + int[] params = (int[])param; + int index = params[0]; + int count = params[1]; + return findMatchFixedLength(input, mymatch, index, count); + } + + private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch, + int index, int count) { + REMatch tryMatch = (REMatch) mymatch.clone(); + while (true) { + tryMatch.index = index; + REMatch m = matchRest(input, tryMatch); + count--; + if (stingy) index += tokenFixedLength; + else index -= tokenFixedLength; + if (possessive) return m; + if (m != null) { + if (count > 0) { + m.backtrackStack.push(new BacktrackStack.Backtrack( + this, input, mymatch, + new int[] {index, count})); + } + return m; } + if (count <= 0) return null; } - return doneIndex.head; - } + } void dump(StringBuffer os) { os.append("(?:"); diff --git a/libjava/classpath/gnu/regexp/RETokenStart.java b/libjava/classpath/gnu/regexp/RETokenStart.java index 42e3c0b..b992bd6 100644 --- a/libjava/classpath/gnu/regexp/RETokenStart.java +++ b/libjava/classpath/gnu/regexp/RETokenStart.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenStart.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -49,7 +49,7 @@ class RETokenStart extends REToken { return 0; } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { // charAt(index-n) may be unknown on a Reader/InputStream. FIXME // Match after a newline if in multiline mode @@ -70,19 +70,26 @@ class RETokenStart extends REToken { ch = input.charAt(mymatch.index - len + i); } while (i < len); - if (found) return next(input, mymatch); + if (found) return mymatch; } } // Don't match at all if REG_NOTBOL is set. - if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return false; + if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return null; if ((mymatch.eflags & RE.REG_ANCHORINDEX) > 0) return (mymatch.anchor == mymatch.offset) ? - next(input, mymatch) : false; + mymatch : null; else return ((mymatch.index == 0) && (mymatch.offset == 0)) ? - next(input, mymatch) : false; + mymatch : null; + } + + boolean returnsFixedLengthmatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + if (matchThis(input, mymatch) != null) return max; + else return 0; } void dump(StringBuffer os) { diff --git a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java index f86214b..1810339 100644 --- a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java +++ b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenWordBoundary.java - Copyright (C) 2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -58,7 +58,7 @@ final class RETokenWordBoundary extends REToken { } - boolean match(CharIndexed input, REMatch mymatch) { + REMatch matchThis(CharIndexed input, REMatch mymatch) { // Word boundary means input[index-1] was a word character // and input[index] is not, or input[index] is a word character // and input[index-1] was not @@ -94,7 +94,14 @@ final class RETokenWordBoundary extends REToken { if (negated) doNext = !doNext; - return (doNext ? next(input, mymatch) : false); + return (doNext ? mymatch : null); + } + + boolean returnsFixedLengthMatches() { return true; } + + int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) { + if(matchThis(input, mymatch) != null) return max; + else return 0; } void dump(StringBuffer os) { |