aboutsummaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/regexp
diff options
context:
space:
mode:
authorMark Wielaard <mark@gcc.gnu.org>2006-05-18 17:29:21 +0000
committerMark Wielaard <mark@gcc.gnu.org>2006-05-18 17:29:21 +0000
commit4f9533c7722fa07511a94d005227961f4a4dec23 (patch)
tree9f9c470de62ee62fba1331a396450d728d2b1fad /libjava/classpath/gnu/regexp
parenteaec4980e139903ae9b274d1abcf3a13946603a8 (diff)
downloadgcc-4f9533c7722fa07511a94d005227961f4a4dec23.zip
gcc-4f9533c7722fa07511a94d005227961f4a4dec23.tar.gz
gcc-4f9533c7722fa07511a94d005227961f4a4dec23.tar.bz2
Imported GNU Classpath 0.90
Imported GNU Classpath 0.90 * scripts/makemake.tcl: LocaleData.java moved to gnu/java/locale. * sources.am: Regenerated. * gcj/javaprims.h: Regenerated. * Makefile.in: Regenerated. * gcj/Makefile.in: Regenerated. * include/Makefile.in: Regenerated. * testsuite/Makefile.in: Regenerated. * gnu/java/lang/VMInstrumentationImpl.java: New override. * gnu/java/net/local/LocalSocketImpl.java: Likewise. * gnu/classpath/jdwp/VMMethod.java: Likewise. * gnu/classpath/jdwp/VMVirtualMachine.java: Update to latest interface. * java/lang/Thread.java: Add UncaughtExceptionHandler. * java/lang/reflect/Method.java: Implements GenericDeclaration and isSynthetic(), * java/lang/reflect/Field.java: Likewise. * java/lang/reflect/Constructor.java * java/lang/Class.java: Implements Type, GenericDeclaration, getSimpleName() and getEnclosing*() methods. * java/lang/Class.h: Add new public methods. * java/lang/Math.java: Add signum(), ulp() and log10(). * java/lang/natMath.cc (log10): New function. * java/security/VMSecureRandom.java: New override. * java/util/logging/Logger.java: Updated to latest classpath version. * java/util/logging/LogManager.java: New override. From-SVN: r113887
Diffstat (limited to 'libjava/classpath/gnu/regexp')
-rw-r--r--libjava/classpath/gnu/regexp/BacktrackStack.java112
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexed.java20
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexedCharArray.java33
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexedCharSequence.java82
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexedInputStream.java22
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexedString.java35
-rw-r--r--libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java32
-rw-r--r--libjava/classpath/gnu/regexp/RE.java190
-rw-r--r--libjava/classpath/gnu/regexp/REMatch.java101
-rw-r--r--libjava/classpath/gnu/regexp/REToken.java116
-rw-r--r--libjava/classpath/gnu/regexp/RETokenAny.java40
-rw-r--r--libjava/classpath/gnu/regexp/RETokenBackRef.java22
-rw-r--r--libjava/classpath/gnu/regexp/RETokenChar.java47
-rw-r--r--libjava/classpath/gnu/regexp/RETokenEnd.java22
-rw-r--r--libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java72
-rw-r--r--libjava/classpath/gnu/regexp/RETokenEndSub.java17
-rw-r--r--libjava/classpath/gnu/regexp/RETokenIndependent.java12
-rw-r--r--libjava/classpath/gnu/regexp/RETokenLookAhead.java27
-rw-r--r--libjava/classpath/gnu/regexp/RETokenLookBehind.java40
-rw-r--r--libjava/classpath/gnu/regexp/RETokenNamedProperty.java42
-rw-r--r--libjava/classpath/gnu/regexp/RETokenOneOf.java111
-rw-r--r--libjava/classpath/gnu/regexp/RETokenPOSIX.java35
-rw-r--r--libjava/classpath/gnu/regexp/RETokenRange.java34
-rw-r--r--libjava/classpath/gnu/regexp/RETokenRepeated.java432
-rw-r--r--libjava/classpath/gnu/regexp/RETokenStart.java19
-rw-r--r--libjava/classpath/gnu/regexp/RETokenWordBoundary.java13
26 files changed, 1229 insertions, 499 deletions
diff --git a/libjava/classpath/gnu/regexp/BacktrackStack.java b/libjava/classpath/gnu/regexp/BacktrackStack.java
new file mode 100644
index 0000000..0fc73f0
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/BacktrackStack.java
@@ -0,0 +1,112 @@
+/* gnu/regexp/BacktrackStack.java
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+/**
+ * An instance of this class represents a stack
+ * used for backtracking.
+ *
+ * @author Ito Kazumitsu</A>
+ */
+final class BacktrackStack {
+
+ /** A set of data to be used for backtracking. */
+ static class Backtrack {
+ /** REToken to which to go back */
+ REToken token;
+ /** CharIndexed on which matches are being searched for. */
+ CharIndexed input;
+ /** REMatch to be used by the REToken token. */
+ REMatch match;
+ /** Some parameter used by the token's backtrack method. */
+ Object param;
+ Backtrack(REToken token, CharIndexed input, REMatch match, Object param) {
+ this.token = token;
+ this.input = input;
+ // REMatch may change before backtracking is needed. So we
+ // keep a clone of it.
+ this.match = (REMatch) match.clone();
+ this.param = param;
+ }
+ }
+
+ Backtrack[] stack;
+ private int size;
+ private int capacity;
+ private static final int INITIAL_CAPACITY = 32;
+ private static final int CAPACITY_INCREMENT = 16;
+
+ BacktrackStack() {
+ stack = new Backtrack[INITIAL_CAPACITY];
+ size = 0;
+ capacity = INITIAL_CAPACITY;
+ }
+
+ boolean empty() {
+ return size == 0;
+ }
+
+ Backtrack peek() {
+ return stack[size - 1];
+ }
+
+ Backtrack pop() {
+ Backtrack bt = stack[--size];
+ stack[size] = null;
+ return bt;
+ }
+
+ void clear() {
+ for (int i = 0; i < size; i++) {
+ stack[i] = null;
+ }
+ size = 0;
+ }
+
+ void push(Backtrack bt) {
+ if (size >= capacity) {
+ capacity += CAPACITY_INCREMENT;
+ Backtrack[] newStack = new Backtrack[capacity];
+ System.arraycopy(stack, 0, newStack, 0, size);
+ stack = newStack;
+ }
+ stack[size++] = bt;
+ }
+
+}
diff --git a/libjava/classpath/gnu/regexp/CharIndexed.java b/libjava/classpath/gnu/regexp/CharIndexed.java
index df1d893..8aedc49 100644
--- a/libjava/classpath/gnu/regexp/CharIndexed.java
+++ b/libjava/classpath/gnu/regexp/CharIndexed.java
@@ -93,4 +93,24 @@ public interface CharIndexed {
* Returns the effective length of this CharIndexed
*/
int length();
+
+ /**
+ * Sets the REMatch last found on this input.
+ */
+ void setLastMatch(REMatch match);
+
+ /**
+ * Returns the REMatch last found on this input.
+ */
+ REMatch getLastMatch();
+
+ /**
+ * Returns the anchor.
+ */
+ int getAnchor();
+
+ /**
+ * Sets the anchor.
+ */
+ void setAnchor(int anchor);
}
diff --git a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
index 1388d47..96a609b 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedCharArray.java
@@ -1,5 +1,5 @@
/* gnu/regexp/CharIndexedCharArray.java
- Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -36,36 +36,11 @@ obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.regexp;
-import java.io.Serializable;
+import java.nio.CharBuffer;
-class CharIndexedCharArray implements CharIndexed, Serializable {
- private char[] s;
- private int anchor;
+class CharIndexedCharArray extends CharIndexedCharSequence {
CharIndexedCharArray(char[] str, int index) {
- s = str;
- anchor = index;
- }
-
- public char charAt(int index) {
- int pos = anchor + index;
- return ((pos < s.length) && (pos >= 0)) ? s[pos] : OUT_OF_BOUNDS;
- }
-
- public boolean isValid() {
- return (anchor < s.length);
- }
-
- public boolean move(int index) {
- return ((anchor += index) < s.length);
- }
-
- public CharIndexed lookBehind(int index, int length) {
- if (length > (anchor + index)) length = anchor + index;
- return new CharIndexedCharArray(s, anchor + index - length);
- }
-
- public int length() {
- return s.length - anchor;
+ super(CharBuffer.wrap(str), index);
}
}
diff --git a/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java b/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java
new file mode 100644
index 0000000..5bbe4ab
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/CharIndexedCharSequence.java
@@ -0,0 +1,82 @@
+/* gnu/regexp/CharIndexedCharSequence.java
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+import java.io.Serializable;
+
+class CharIndexedCharSequence implements CharIndexed, Serializable {
+ private CharSequence s;
+ private int anchor;
+ private int len;
+
+ CharIndexedCharSequence(CharSequence s, int index) {
+ this.s = s;
+ len = s.length();
+ anchor = index;
+ }
+
+ public char charAt(int index) {
+ int pos = anchor + index;
+ return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
+ }
+
+ public boolean isValid() {
+ return (anchor < len);
+ }
+
+ public boolean move(int index) {
+ return ((anchor += index) < len);
+ }
+
+ public CharIndexed lookBehind(int index, int length) {
+ if (length > (anchor + index)) length = anchor + index;
+ return new CharIndexedCharSequence(s, anchor + index - length);
+ }
+
+ public int length() {
+ return len - anchor;
+ }
+
+ private REMatch lastMatch;
+ public void setLastMatch(REMatch match) {
+ lastMatch = (REMatch)match.clone();
+ lastMatch.anchor = anchor;
+ }
+ public REMatch getLastMatch() { return lastMatch; }
+ public int getAnchor() { return anchor; }
+ public void setAnchor(int anchor) { this.anchor = anchor; }
+}
diff --git a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
index d5225a7..290a94e 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedInputStream.java
@@ -155,5 +155,27 @@ class CharIndexedInputStream implements CharIndexed {
throw new UnsupportedOperationException(
"difficult to tell the length for an input stream");
}
+
+ public void setLastMatch(REMatch match) {
+ throw new UnsupportedOperationException(
+ "difficult to support setLastMatch for an input stream");
+ }
+
+ public REMatch getLastMatch() {
+ throw new UnsupportedOperationException(
+ "difficult to support getLastMatch for an input stream");
+ }
+
+ public int getAnchor() {
+ throw new UnsupportedOperationException(
+ "difficult to support getAnchor for an input stream");
+ }
+
+ public void setAnchor(int anchor) {
+ throw new UnsupportedOperationException(
+ "difficult to support setAnchor for an input stream");
+ }
+
+
}
diff --git a/libjava/classpath/gnu/regexp/CharIndexedString.java b/libjava/classpath/gnu/regexp/CharIndexedString.java
index fe4fa8f..2498265 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedString.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedString.java
@@ -1,5 +1,5 @@
/* gnu/regexp/CharIndexedString.java
- Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -36,38 +36,9 @@ obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.regexp;
-import java.io.Serializable;
-class CharIndexedString implements CharIndexed, Serializable {
- private String s;
- private int anchor;
- private int len;
-
+class CharIndexedString extends CharIndexedCharSequence {
CharIndexedString(String str, int index) {
- s = str;
- len = s.length();
- anchor = index;
- }
-
- public char charAt(int index) {
- int pos = anchor + index;
- return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
- }
-
- public boolean isValid() {
- return (anchor < len);
- }
-
- public boolean move(int index) {
- return ((anchor += index) < len);
- }
-
- public CharIndexed lookBehind(int index, int length) {
- if (length > (anchor + index)) length = anchor + index;
- return new CharIndexedString(s, anchor + index - length);
- }
-
- public int length() {
- return len - anchor;
+ super(str, index);
}
}
diff --git a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
index 9c9118d..d6b03bb 100644
--- a/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
+++ b/libjava/classpath/gnu/regexp/CharIndexedStringBuffer.java
@@ -1,5 +1,5 @@
/* gnu/regexp/CharIndexedStringBuffer.java
- Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -36,36 +36,10 @@ obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.regexp;
-import java.io.Serializable;
-class CharIndexedStringBuffer implements CharIndexed, Serializable {
- private StringBuffer s;
- private int anchor;
+class CharIndexedStringBuffer extends CharIndexedCharSequence {
CharIndexedStringBuffer(StringBuffer str, int index) {
- s = str;
- anchor = index;
+ super(str, index);
}
-
- public char charAt(int index) {
- int pos = anchor + index;
- return ((pos < s.length()) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
- }
-
- public boolean isValid() {
- return (anchor < s.length());
- }
-
- public boolean move(int index) {
- return ((anchor += index) < s.length());
- }
-
- public CharIndexed lookBehind(int index, int length) {
- if (length > (anchor + index)) length = anchor + index;
- return new CharIndexedStringBuffer(s, anchor + index - length);
- }
-
- public int length() {
- return s.length() - anchor;
- }
}
diff --git a/libjava/classpath/gnu/regexp/RE.java b/libjava/classpath/gnu/regexp/RE.java
index ef606a6..e0665f0 100644
--- a/libjava/classpath/gnu/regexp/RE.java
+++ b/libjava/classpath/gnu/regexp/RE.java
@@ -41,6 +41,7 @@ import java.io.Serializable;
import java.util.Locale;
import java.util.PropertyResourceBundle;
import java.util.ResourceBundle;
+import java.util.Stack;
import java.util.Vector;
/**
@@ -78,13 +79,18 @@ import java.util.Vector;
* <P>
*
* These methods all have similar argument lists. The input can be a
- * String, a character array, a StringBuffer, or an
+ * CharIndexed, String, a character array, a StringBuffer, or an
* InputStream of some sort. Note that when using an
* InputStream, the stream read position cannot be guaranteed after
* attempting a match (this is not a bug, but a consequence of the way
* regular expressions work). Using an REMatchEnumeration can
* eliminate most positioning problems.
*
+ * Although the input object can be of various types, it is recommended
+ * that it should be a CharIndexed because {@link CharIndexed#getLastMatch()}
+ * can show the last match found on this input, which helps the expression
+ * \G work as the end of the previous match.
+ *
* <P>
*
* The optional index argument specifies the offset from the beginning
@@ -235,6 +241,17 @@ public class RE extends REToken {
*/
public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
+ /**
+ * Compilation flag. Allow whitespace and comments in pattern.
+ * This is equivalent to the "/x" operator in Perl.
+ */
+ public static final int REG_X_COMMENTS = 0x0400;
+
+ /**
+ * Compilation flag. If set, REG_ICASE is effective only for US-ASCII.
+ */
+ public static final int REG_ICASE_USASCII = 0x0800;
+
/** Returns a string representing the version of the gnu.regexp package. */
public static final String version() {
return VERSION;
@@ -334,6 +351,7 @@ public class RE extends REToken {
// Precalculate these so we don't pay for the math every time we
// need to access them.
boolean insens = ((cflags & REG_ICASE) > 0);
+ boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
// Parse pattern into tokens. Does anyone know if it's more efficient
// to use char[] than a String.charAt()? I'm assuming so.
@@ -372,6 +390,31 @@ public class RE extends REToken {
if (quot)
unit.bk = false;
+ if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) {
+ if (Character.isWhitespace(unit.ch)) {
+ continue;
+ }
+ if (unit.ch == '#') {
+ for (int i = index; i < pLength; i++) {
+ if (pattern[i] == '\n') {
+ index = i + 1;
+ continue;
+ }
+ else if (pattern[i] == '\r') {
+ if (i + 1 < pLength && pattern[i + 1] == '\n') {
+ index = i + 2;
+ }
+ else {
+ index = i + 1;
+ }
+ continue;
+ }
+ }
+ index = pLength;
+ continue;
+ }
+ }
+
// ALTERNATION OPERATOR
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
// not available if RE_LIMITED_OPS is set
@@ -420,6 +463,7 @@ public class RE extends REToken {
else {
addToken(currentToken);
currentToken = new RETokenChar(subIndex,unit.ch,insens);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
}
@@ -495,8 +539,8 @@ public class RE extends REToken {
case 'd':
case 'm':
case 's':
- // case 'u': not supported
- // case 'x': not supported
+ case 'u':
+ case 'x':
case '-':
if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
// Set or reset syntax flags.
@@ -535,8 +579,20 @@ public class RE extends REToken {
newCflags |= REG_DOT_NEWLINE;
flagIndex++;
break;
- // case 'u': not supported
- // case 'x': not supported
+ case 'u':
+ if (negate)
+ newCflags |= REG_ICASE_USASCII;
+ else
+ newCflags &= ~REG_ICASE_USASCII;
+ flagIndex++;
+ break;
+ case 'x':
+ if (negate)
+ newCflags &= ~REG_X_COMMENTS;
+ else
+ newCflags |= REG_X_COMMENTS;
+ flagIndex++;
+ break;
case '-':
negate = true;
flagIndex++;
@@ -553,6 +609,7 @@ public class RE extends REToken {
syntax = newSyntax;
cflags = newCflags;
insens = ((cflags & REG_ICASE) > 0);
+ insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
// This can be treated as though it were a comment.
comment = true;
index = flagIndex - 1;
@@ -565,6 +622,7 @@ public class RE extends REToken {
syntax = newSyntax;
cflags = newCflags;
insens = ((cflags & REG_ICASE) > 0);
+ insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
index = flagIndex -1;
// Fall through to the next case.
}
@@ -673,6 +731,7 @@ public class RE extends REToken {
syntax = savedSyntax;
cflags = savedCflags;
insens = ((cflags & REG_ICASE) > 0);
+ insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
flagsSaved = false;
}
} // not a comment
@@ -785,6 +844,7 @@ public class RE extends REToken {
index = index - 2 + ce.len;
addToken(currentToken);
currentToken = new RETokenChar(subIndex,ce.ch,insens);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// BACKREFERENCE OPERATOR
@@ -812,6 +872,7 @@ public class RE extends REToken {
int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
currentToken = new RETokenBackRef(subIndex,num,insens);
+ if (insensUSASCII) currentToken.unicodeAware = false;
index = numEnd;
}
@@ -860,6 +921,7 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// NON-DIGIT OPERATOR
@@ -868,6 +930,7 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// NEWLINE ESCAPE
@@ -892,6 +955,7 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// NON-WHITESPACE OPERATOR
@@ -900,6 +964,7 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// TAB ESCAPE
@@ -916,6 +981,7 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// NON-ALPHANUMERIC OPERATOR
@@ -924,12 +990,19 @@ public class RE extends REToken {
else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
addToken(currentToken);
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// END OF STRING OPERATOR
- // \Z
+ // \Z, \z
- else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ // FIXME: \Z and \z are different in that if the input string
+ // ends with a line terminator, \Z matches the position before
+ // the final terminator. This special behavior of \Z is yet
+ // to be implemented.
+
+ else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') &&
+ syntax.get(RESyntax.RE_STRING_ANCHORS)) {
addToken(currentToken);
currentToken = new RETokenEnd(subIndex,null);
}
@@ -945,6 +1018,7 @@ public class RE extends REToken {
index = index - 2 + ce.len;
addToken(currentToken);
currentToken = new RETokenChar(subIndex,ce.ch,insens);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
// NAMED PROPERTY
@@ -958,6 +1032,16 @@ public class RE extends REToken {
index = index - 2 + np.len;
addToken(currentToken);
currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
+ if (insensUSASCII) currentToken.unicodeAware = false;
+ }
+
+ // END OF PREVIOUS MATCH
+ // \G
+
+ else if (unit.bk && (unit.ch == 'G') &&
+ syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+ addToken(currentToken);
+ currentToken = new RETokenEndOfPreviousMatch(subIndex);
}
// NON-SPECIAL CHARACTER (or escape to make literal)
@@ -966,6 +1050,7 @@ public class RE extends REToken {
else { // not a special character
addToken(currentToken);
currentToken = new RETokenChar(subIndex,unit.ch,insens);
+ if (insensUSASCII) currentToken.unicodeAware = false;
}
} // end while
@@ -1006,6 +1091,7 @@ public class RE extends REToken {
throws REException {
boolean insens = ((cflags & REG_ICASE) > 0);
+ boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
Vector options = new Vector();
Vector addition = new Vector();
boolean additionAndAppeared = false;
@@ -1035,7 +1121,9 @@ public class RE extends REToken {
if ((ch == '-') && (lastCharIsSet)) {
if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
if ((ch = pattern[index]) == ']') {
- options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
lastChar = '-';
} else {
if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
@@ -1045,7 +1133,9 @@ public class RE extends REToken {
ch = ce.ch;
index = index + ce.len - 1;
}
- options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
+ RETokenRange t = new RETokenRange(subIndex,lastChar,ch,insens);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
lastChar = 0; lastCharIsSet = false;
index++;
}
@@ -1088,12 +1178,20 @@ public class RE extends REToken {
asciiEsc = ce.ch; asciiEscIsSet = true;
index = index - 1 + ce.len - 1;
}
- if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ if (lastCharIsSet) {
+ RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
+ }
if (posixID != -1) {
- options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
+ RETokenPOSIX t = new RETokenPOSIX(subIndex,posixID,insens,negate);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
} else if (np != null) {
- options.addElement(getRETokenNamedProperty(subIndex,np,insens,index));
+ RETokenNamedProperty t = getRETokenNamedProperty(subIndex,np,insens,index);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
} else if (asciiEscIsSet) {
lastChar = asciiEsc; lastCharIsSet = true;
} else {
@@ -1104,8 +1202,11 @@ public class RE extends REToken {
StringBuffer posixSet = new StringBuffer();
index = getPosixSet(pattern,index+1,posixSet);
int posixId = RETokenPOSIX.intValue(posixSet.toString());
- if (posixId != -1)
- options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
+ if (posixId != -1) {
+ RETokenPOSIX t = new RETokenPOSIX(subIndex,posixId,insens,false);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
+ }
} else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
ParseCharClassResult result = parseCharClass(
subIndex, pattern, index, pLength, cflags, syntax, 0);
@@ -1158,14 +1259,22 @@ public class RE extends REToken {
result.index: result.index - 1);
}
} else {
- if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ if (lastCharIsSet) {
+ RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
+ }
lastChar = ch; lastCharIsSet = true;
}
if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
} // while in list
// Out of list, index is one past ']'
- if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
+ if (lastCharIsSet) {
+ RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
+ if (insensUSASCII) t.unicodeAware = false;
+ options.addElement(t);
+ }
ParseCharClassResult result = new ParseCharClassResult();
// Create a new RETokenOneOf
@@ -1396,11 +1505,10 @@ public class RE extends REToken {
return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
REMatch m = new REMatch(numSubs, index, eflags);
if (firstToken.match(input, m)) {
- while (m != null) {
+ if (m != null) {
if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
return true;
}
- m = m.next;
}
}
return false;
@@ -1508,17 +1616,27 @@ public class RE extends REToken {
}
/* Implements abstract method REToken.match() */
- boolean match(CharIndexed input, REMatch mymatch) {
+ boolean match(CharIndexed input, REMatch mymatch) {
if (firstToken == null) {
return next(input, mymatch);
}
// Note the start of this subexpression
- mymatch.start[subIndex] = mymatch.index;
+ mymatch.start1[subIndex] = mymatch.index;
return firstToken.match(input, mymatch);
}
-
+
+ REMatch findMatch(CharIndexed input, REMatch mymatch) {
+ if (mymatch.backtrackStack == null)
+ mymatch.backtrackStack = new BacktrackStack();
+ boolean b = match(input, mymatch);
+ if (b) {
+ return mymatch;
+ }
+ return null;
+ }
+
/**
* Returns the first match found in the input. If no match is found,
* null is returned.
@@ -1602,6 +1720,7 @@ public class RE extends REToken {
*/
best.end[0] = best.index;
best.finish(input);
+ input.setLastMatch(best);
return best;
}
}
@@ -1942,19 +2061,34 @@ public class RE extends REToken {
}
void dump(StringBuffer os) {
- os.append('(');
+ os.append("(?#startRE subIndex=" + subIndex + ")");
if (subIndex == 0)
os.append("?:");
if (firstToken != null)
firstToken.dumpAll(os);
- os.append(')');
+ if (subIndex == 0)
+ os.append(")");
+ os.append("(?#endRE subIndex=" + subIndex + ")");
}
// Cast input appropriately or throw exception
- private static CharIndexed makeCharIndexed(Object input, int index) {
- // We could let a String fall through to final input, but since
- // it's the most likely input type, we check it first.
- if (input instanceof String)
+ // This method was originally a private method, but has been made
+ // public because java.util.regex.Matcher uses this.
+ public static CharIndexed makeCharIndexed(Object input, int index) {
+ // The case where input is already a CharIndexed is supposed
+ // be the most likely because this is the case with
+ // java.util.regex.Matcher.
+ // We could let a String or a CharSequence fall through
+ // to final input, but since it'a very likely input type,
+ // we check it first.
+ if (input instanceof CharIndexed) {
+ CharIndexed ci = (CharIndexed) input;
+ ci.setAnchor(index);
+ return ci;
+ }
+ else if (input instanceof CharSequence)
+ return new CharIndexedCharSequence((CharSequence) input,index);
+ else if (input instanceof String)
return new CharIndexedString((String) input,index);
else if (input instanceof char[])
return new CharIndexedCharArray((char[]) input,index);
@@ -1962,8 +2096,6 @@ public class RE extends REToken {
return new CharIndexedStringBuffer((StringBuffer) input,index);
else if (input instanceof InputStream)
return new CharIndexedInputStream((InputStream) input,index);
- else if (input instanceof CharIndexed)
- return (CharIndexed) input; // do we lose index info?
else
return new CharIndexedString(input.toString(), index);
}
diff --git a/libjava/classpath/gnu/regexp/REMatch.java b/libjava/classpath/gnu/regexp/REMatch.java
index 91a3c02..140a9c4 100644
--- a/libjava/classpath/gnu/regexp/REMatch.java
+++ b/libjava/classpath/gnu/regexp/REMatch.java
@@ -49,6 +49,7 @@ import java.io.Serializable;
*/
public final class REMatch implements Serializable, Cloneable {
private String matchedText;
+ private CharIndexed matchedCharIndexed;
// These variables are package scope for fast access within the engine
int eflags; // execution flags this match was made using
@@ -64,20 +65,28 @@ public final class REMatch implements Serializable, Cloneable {
// Package scope; used by RE.
int index; // used while matching to mark current match position in input
+ // start1[i] is set when the i-th subexp starts. And start1[i] is copied
+ // to start[i] when the i-th subexp ends. So start[i] keeps the previously
+ // assigned value while the i-th subexp is being processed. This makes
+ // backreference to the i-th subexp within the i-th subexp possible.
int[] start; // start positions (relative to offset) for each (sub)exp.
+ int[] start1; // start positions (relative to offset) for each (sub)exp.
int[] end; // end positions for the same
- REMatch next; // other possibility (to avoid having to use arrays)
+ // start[i] == -1 or end[i] == -1 means that the start/end position is void.
+ // start[i] == p or end[i] == p where p < 0 and p != -1 means that
+ // the actual start/end position is (p+1). Start/end positions may
+ // become negative when the subexpression is in a RETokenLookBehind.
boolean empty; // empty string matched. This flag is used only within
// RETokenRepeated.
- int matchFlags; // flags passed to match methods
- static final int MF_FIND_ALL = 0x01;
+
+ BacktrackStack backtrackStack;
public Object clone() {
try {
REMatch copy = (REMatch) super.clone();
- copy.next = null;
copy.start = (int[]) start.clone();
+ copy.start1 = (int[]) start1.clone();
copy.end = (int[]) end.clone();
return copy;
@@ -88,14 +97,15 @@ public final class REMatch implements Serializable, Cloneable {
void assignFrom(REMatch other) {
start = other.start;
+ start1 = other.start1;
end = other.end;
index = other.index;
- // need to deep clone?
- next = other.next;
+ backtrackStack = other.backtrackStack;
}
REMatch(int subs, int anchor, int eflags) {
start = new int[subs+1];
+ start1 = new int[subs+1];
end = new int[subs+1];
this.anchor = anchor;
this.eflags = eflags;
@@ -109,6 +119,7 @@ public final class REMatch implements Serializable, Cloneable {
for (i = 0; i < end[0]; i++)
sb.append(text.charAt(i));
matchedText = sb.toString();
+ matchedCharIndexed = text;
for (i = 0; i < start.length; i++) {
// If any subexpressions didn't terminate, they don't count
// TODO check if this code ever gets hit
@@ -117,7 +128,7 @@ public final class REMatch implements Serializable, Cloneable {
end[i] = -1;
}
}
- next = null; // cut off alternates
+ backtrackStack = null;
}
/** Clears the current match and moves the offset to the new index. */
@@ -125,9 +136,9 @@ public final class REMatch implements Serializable, Cloneable {
offset = index;
this.index = 0;
for (int i = 0; i < start.length; i++) {
- start[i] = end[i] = -1;
+ start[i] = start1[i] = end[i] = -1;
}
- next = null; // cut off alternates
+ backtrackStack = null;
}
/**
@@ -184,7 +195,19 @@ public final class REMatch implements Serializable, Cloneable {
if ((sub >= start.length) || sub < 0)
throw new IndexOutOfBoundsException("No group " + sub);
if (start[sub] == -1) return null;
- return (matchedText.substring(start[sub],end[sub]));
+ if (start[sub] >= 0 && end[sub] <= matchedText.length())
+ return (matchedText.substring(start[sub],end[sub]));
+ else {
+ // This case occurs with RETokenLookAhead or RETokenLookBehind.
+ StringBuffer sb = new StringBuffer();
+ int s = start[sub];
+ int e = end[sub];
+ if (s < 0) s += 1;
+ if (e < 0) e += 1;
+ for (int i = start[0] + s; i < start[0] + e; i++)
+ sb.append(matchedCharIndexed.charAt(i));
+ return sb.toString();
+ }
}
/**
@@ -198,7 +221,8 @@ public final class REMatch implements Serializable, Cloneable {
public int getSubStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
- return (x == -1) ? x : offset + x;
+ return (x == -1) ? x :
+ (x >= 0) ? offset + x : offset + x + 1;
}
/**
@@ -212,7 +236,8 @@ public final class REMatch implements Serializable, Cloneable {
public int getStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
- return (x == -1) ? x : offset + x;
+ return (x == -1) ? x :
+ (x >= 0) ? offset + x : offset + x + 1;
}
/**
@@ -226,7 +251,8 @@ public final class REMatch implements Serializable, Cloneable {
public int getSubEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
- return (x == -1) ? x : offset + x;
+ return (x == -1) ? x :
+ (x >= 0) ? offset + x : offset + x + 1;
}
/**
@@ -239,7 +265,8 @@ public final class REMatch implements Serializable, Cloneable {
public int getEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
- return (x == -1) ? x : offset + x;
+ return (x == -1) ? x :
+ (x >= 0) ? offset + x : offset + x + 1;
}
/**
@@ -279,41 +306,19 @@ public final class REMatch implements Serializable, Cloneable {
return output.toString();
}
- static class REMatchList {
- REMatch head;
- REMatch tail;
- REMatchList() {
- head = tail = null;
- }
- /* Not used now. But we may need this some day?
- void addHead(REMatch newone) {
- if (head == null) {
- head = newone;
- tail = newone;
- while (tail.next != null) {
- tail = tail.next;
- }
- }
- else {
- REMatch tmp = newone;
- while (tmp.next != null) tmp = tmp.next;
- tmp.next = head;
- head = newone;
- }
- }
- */
- void addTail(REMatch newone) {
- if (head == null) {
- head = newone;
- tail = newone;
- }
- else {
- tail.next = newone;
- }
- while (tail.next != null) {
- tail = tail.next;
- }
+/* The following are used for debugging purpose
+ static String d(REMatch m) {
+ if (m == null) return "null";
+ else return "[" + m.index + "]";
+ }
+
+ String substringUptoIndex(CharIndexed input) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < index; i++) {
+ sb.append(input.charAt(i));
}
+ return sb.toString();
}
+*/
}
diff --git a/libjava/classpath/gnu/regexp/REToken.java b/libjava/classpath/gnu/regexp/REToken.java
index 5f4659b..f2abc02 100644
--- a/libjava/classpath/gnu/regexp/REToken.java
+++ b/libjava/classpath/gnu/regexp/REToken.java
@@ -1,5 +1,5 @@
/* gnu/regexp/REToken.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -43,6 +43,7 @@ abstract class REToken implements Serializable, Cloneable {
protected REToken next = null;
protected REToken uncle = null;
protected int subIndex;
+ protected boolean unicodeAware = true;
public Object clone() {
try {
@@ -70,30 +71,119 @@ abstract class REToken implements Serializable, Cloneable {
}
/** Returns true if the match succeeded, false if it failed. */
- abstract boolean match(CharIndexed input, REMatch mymatch);
+ boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch m = matchThis(input, mymatch);
+ if (m == null) return false;
+ if (next(input, m)) {
+ mymatch.assignFrom(m);
+ return true;
+ }
+ return false;
+ }
+
+ /** Returns true if the match succeeded, false if it failed.
+ * The matching is done against this REToken only. Chained
+ * tokens are not checked.
+ * This method is used to define the default match method.
+ * Simple subclasses of REToken, for example, such that
+ * matches only one character, should implement this method.
+ * Then the default match method will work. But complicated
+ * subclasses of REToken, which needs a special match method,
+ * do not have to implement this method.
+ */
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ throw new UnsupportedOperationException(
+ "This REToken does not have a matchThis method");
+ }
/** Returns true if the rest of the tokens match, false if they fail. */
protected boolean next(CharIndexed input, REMatch mymatch) {
- if (next == null) {
- if (uncle == null) {
- return true;
- } else {
- return uncle.match(input, mymatch);
- }
- } else {
- return next.match(input, mymatch);
- }
+ REToken nextToken = getNext();
+ if (nextToken == null) return true;
+ return nextToken.match(input, mymatch);
}
-
+
+ /** Returns the next REToken chained to this REToken. */
+ REToken getNext() {
+ return (next != null ? next : uncle);
+ }
+
+ /** Finds a match at the position specified by the given REMatch.
+ * If necessary, adds a BacktrackStack.Backtrack object to backtrackStack
+ * of the REmatch found this time so that another possible match
+ * may be found when backtrack is called.
+ * By default, nothing is added to the backtrackStack.
+ * @param CharIndexed input Input character sequence.
+ * @param mymatch Position at which a match should be found
+ * @return REMatch object if a match was found, null otherwise.
+ */
+ REMatch findMatch(CharIndexed input, REMatch mymatch) {
+ boolean b = match(input, mymatch);
+ if (b) return mymatch;
+ return null;
+ }
+
+ boolean returnsFixedLengthMatches() {
+ return false;
+ }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ throw new UnsupportedOperationException(
+ "This token does not support findFixedLengthMatches");
+ }
+
+ /**
+ * Backtrack to another possibility.
+ * Ordinary REToken cannot do anything if this method is called.
+ */
+ REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
+ throw new IllegalStateException("This token cannot be backtracked to");
+ }
+
boolean chain(REToken token) {
next = token;
return true; // Token was accepted
}
- abstract void dump(StringBuffer os);
+ abstract void dump(StringBuffer os);
void dumpAll(StringBuffer os) {
dump(os);
if (next != null) next.dumpAll(os);
}
+
+ public String toString() {
+ StringBuffer os = new StringBuffer();
+ dump(os);
+ return os.toString();
+ }
+
+ /**
+ * Converts the character argument to lowercase.
+ * @param ch the character to be converted.
+ * @param unicodeAware If true, use java.lang.Character#toLowerCase;
+ * otherwise, only US-ASCII charactes can be converted.
+ * @return the lowercase equivalent of the character, if any;
+ * otherwise, the character itself.
+ */
+ public static char toLowerCase(char ch, boolean unicodeAware) {
+ if (unicodeAware) return Character.toLowerCase(ch);
+ if (ch >= 'A' && ch <= 'Z') return (char)(ch + 'a' - 'A');
+ return ch;
+ }
+
+ /**
+ * Converts the character argument to uppercase.
+ * @param ch the character to be converted.
+ * @param unicodeAware If true, use java.lang.Character#toUpperCase;
+ * otherwise, only US-ASCII charactes can be converted.
+ * @return the uppercase equivalent of the character, if any;
+ * otherwise, the character itself.
+ */
+ public static char toUpperCase(char ch, boolean unicodeAware) {
+ if (unicodeAware) return Character.toUpperCase(ch);
+ if (ch >= 'a' && ch <= 'z') return (char)(ch + 'A' - 'a');
+ return ch;
+ }
+
}
diff --git a/libjava/classpath/gnu/regexp/RETokenAny.java b/libjava/classpath/gnu/regexp/RETokenAny.java
index 2b0967a..a37d956 100644
--- a/libjava/classpath/gnu/regexp/RETokenAny.java
+++ b/libjava/classpath/gnu/regexp/RETokenAny.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenAny.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -59,15 +59,37 @@ final class RETokenAny extends REToken {
return 1;
}
- boolean match(CharIndexed input, REMatch mymatch) {
- char ch = input.charAt(mymatch.index);
- if ((ch == CharIndexed.OUT_OF_BOUNDS)
- || (!newline && (ch == '\n'))
- || (matchNull && (ch == 0))) {
- return false;
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ boolean retval = matchOneChar(ch);
+ if (retval) {
+ ++mymatch.index;
+ return mymatch;
+ }
+ return null;
}
- ++mymatch.index;
- return next(input, mymatch);
+
+ boolean matchOneChar(char ch) {
+ if ((ch == CharIndexed.OUT_OF_BOUNDS)
+ || (!newline && (ch == '\n'))
+ || (matchNull && (ch == 0))) {
+ return false;
+ }
+ return true;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ int index = mymatch.index;
+ int numRepeats = 0;
+ while (true) {
+ if (numRepeats >= max) break;
+ char ch = input.charAt(index++);
+ if (! matchOneChar(ch)) break;
+ numRepeats++;
+ }
+ return numRepeats;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenBackRef.java b/libjava/classpath/gnu/regexp/RETokenBackRef.java
index 060a6cf..25ef952 100644
--- a/libjava/classpath/gnu/regexp/RETokenBackRef.java
+++ b/libjava/classpath/gnu/regexp/RETokenBackRef.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenBackRef.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -50,30 +50,32 @@ final class RETokenBackRef extends REToken {
// should implement getMinimumLength() -- any ideas?
- boolean match(CharIndexed input, REMatch mymatch) {
- if (num >= mymatch.start.length) return false;
- if (num >= mymatch.end.length) return false;
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ if (num >= mymatch.start.length) return null;
+ if (num >= mymatch.end.length) return null;
int b,e;
b = mymatch.start[num];
e = mymatch.end[num];
- if ((b==-1)||(e==-1)) return false; // this shouldn't happen, but...
+ if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but...
+ if (b < 0) b += 1;
+ if (e < 0) e += 1;
for (int i=b; i<e; i++) {
char c1 = input.charAt(mymatch.index+i-b);
char c2 = input.charAt(i);
if (c1 != c2) {
if (insens) {
- if (c1 != Character.toLowerCase(c2) &&
- c1 != Character.toUpperCase(c2)) {
- return false;
+ if (c1 != toLowerCase(c2, unicodeAware) &&
+ c1 != toUpperCase(c2, unicodeAware)) {
+ return null;
}
}
else {
- return false;
+ return null;
}
}
}
mymatch.index += e-b;
- return next(input, mymatch);
+ return mymatch;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenChar.java b/libjava/classpath/gnu/regexp/RETokenChar.java
index 5c087c6..1b3a748 100644
--- a/libjava/classpath/gnu/regexp/RETokenChar.java
+++ b/libjava/classpath/gnu/regexp/RETokenChar.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenChar.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -44,8 +44,9 @@ final class RETokenChar extends REToken {
RETokenChar(int subIndex, char c, boolean ins) {
super(subIndex);
+ insens = ins;
ch = new char [1];
- ch[0] = (insens = ins) ? Character.toLowerCase(c) : c;
+ ch[0] = c;
}
int getMinimumLength() {
@@ -56,18 +57,50 @@ final class RETokenChar extends REToken {
return ch.length;
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ int z = ch.length;
+ if (matchOneString(input, mymatch.index)) {
+ mymatch.index += z;
+ return mymatch;
+ }
+ return null;
+ }
+
+ boolean matchOneString(CharIndexed input, int index) {
int z = ch.length;
char c;
for (int i=0; i<z; i++) {
- c = input.charAt(mymatch.index+i);
- if (( (insens) ? Character.toLowerCase(c) : c ) != ch[i]) {
+ c = input.charAt(index+i);
+ if (! charEquals(c, ch[i])) {
return false;
}
}
- mymatch.index += z;
+ return true;
+ }
- return next(input, mymatch);
+ private boolean charEquals(char c1, char c2) {
+ if (c1 == c2) return true;
+ if (! insens) return false;
+ if (toLowerCase(c1, unicodeAware) == c2) return true;
+ if (toUpperCase(c1, unicodeAware) == c2) return true;
+ return false;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ int index = mymatch.index;
+ int numRepeats = 0;
+ int z = ch.length;
+ while (true) {
+ if (numRepeats >= max) break;
+ if (matchOneString(input, index)) {
+ index += z;
+ numRepeats++;
+ }
+ else break;
+ }
+ return numRepeats;
}
// Overrides REToken.chain() to optimize for strings
diff --git a/libjava/classpath/gnu/regexp/RETokenEnd.java b/libjava/classpath/gnu/regexp/RETokenEnd.java
index 788a964..deb2fc5 100644
--- a/libjava/classpath/gnu/regexp/RETokenEnd.java
+++ b/libjava/classpath/gnu/regexp/RETokenEnd.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenEnd.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -53,24 +53,34 @@ final class RETokenEnd extends REToken {
return 0;
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
if (ch == CharIndexed.OUT_OF_BOUNDS)
return ((mymatch.eflags & RE.REG_NOTEOL)>0) ?
- false : next(input, mymatch);
+ null : mymatch;
if (newline != null) {
char z;
int i = 0; // position in newline
do {
z = newline.charAt(i);
- if (ch != z) return false;
+ if (ch != z) return null;
++i;
ch = input.charAt(mymatch.index + i);
} while (i < newline.length());
- return next(input, mymatch);
+ return mymatch;
}
- return false;
+ return null;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ REMatch m = (REMatch) mymatch.clone();
+ REToken tk = (REToken) this.clone();
+ tk.chain(null);
+ if (tk.match(input, m)) return max;
+ else return 0;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java b/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
new file mode 100644
index 0000000..167d10b
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
@@ -0,0 +1,72 @@
+/* gnu/regexp/RETokenEndOfPreviousMatch.java
+ Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+class RETokenEndOfPreviousMatch extends RETokenStart {
+
+ RETokenEndOfPreviousMatch(int subIndex) {
+ super(subIndex, null);
+ }
+
+ int getMaximumLength() {
+ return 0;
+ }
+
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ REMatch lastMatch = input.getLastMatch();
+ if (lastMatch == null) return super.matchThis(input, mymatch);
+ if (input.getAnchor()+mymatch.index ==
+ lastMatch.anchor+lastMatch.index) {
+ return mymatch;
+ }
+ else {
+ return null;
+ }
+ }
+
+ boolean returnsFixedLengthmatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ if (matchThis(input, mymatch) != null) return max;
+ else return 0;
+ }
+
+ void dump(StringBuffer os) {
+ os.append("\\G");
+ }
+}
diff --git a/libjava/classpath/gnu/regexp/RETokenEndSub.java b/libjava/classpath/gnu/regexp/RETokenEndSub.java
index fe2969d..fca01c7 100644
--- a/libjava/classpath/gnu/regexp/RETokenEndSub.java
+++ b/libjava/classpath/gnu/regexp/RETokenEndSub.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenEndSub.java
- Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -46,12 +46,21 @@ final class RETokenEndSub extends REToken {
return 0;
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ mymatch.start[subIndex] = mymatch.start1[subIndex];
mymatch.end[subIndex] = mymatch.index;
- return next(input, mymatch);
+ return mymatch;
}
-
+
+ REMatch findMatch(CharIndexed input, REMatch mymatch) {
+ mymatch.start[subIndex] = mymatch.start1[subIndex];
+ mymatch.end[subIndex] = mymatch.index;
+ return super.findMatch(input, mymatch);
+ }
+
void dump(StringBuffer os) {
// handled by RE
+ // But add something for debugging.
+ os.append("(?#RETokenEndSub subIndex=" + subIndex + ")");
}
}
diff --git a/libjava/classpath/gnu/regexp/RETokenIndependent.java b/libjava/classpath/gnu/regexp/RETokenIndependent.java
index 2eb1472..8bb95c6 100644
--- a/libjava/classpath/gnu/regexp/RETokenIndependent.java
+++ b/libjava/classpath/gnu/regexp/RETokenIndependent.java
@@ -57,14 +57,16 @@ final class RETokenIndependent extends REToken
return re.getMaximumLength();
}
- boolean match(CharIndexed input, REMatch mymatch)
+ REMatch matchThis(CharIndexed input, REMatch mymatch)
{
- if (re.match(input, mymatch)) {
+ boolean b = re.match(input, mymatch);
+ if (b) {
// Once we have found a match, we do not see other possible matches.
- mymatch.next = null;
- return next(input, mymatch);
+ if (mymatch.backtrackStack != null) mymatch.backtrackStack.clear();
+ return mymatch;
+
}
- return false;
+ return null;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenLookAhead.java b/libjava/classpath/gnu/regexp/RETokenLookAhead.java
index b44dfa5..009872c 100644
--- a/libjava/classpath/gnu/regexp/RETokenLookAhead.java
+++ b/libjava/classpath/gnu/regexp/RETokenLookAhead.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenLookAhead.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -56,28 +56,17 @@ final class RETokenLookAhead extends REToken
return 0;
}
- boolean match(CharIndexed input, REMatch mymatch)
+ REMatch matchThis(CharIndexed input, REMatch mymatch)
{
REMatch trymatch = (REMatch)mymatch.clone();
- REMatch trymatch1 = (REMatch)mymatch.clone();
- REMatch newMatch = null;
if (re.match(input, trymatch)) {
- if (negative) return false;
- if (next(input, trymatch1))
- newMatch = trymatch1;
+ if (negative) return null;
+ trymatch.index = mymatch.index;
+ return trymatch;
}
-
- if (newMatch != null) {
- if (negative) return false;
- //else
- mymatch.assignFrom(newMatch);
- return true;
- }
- else { // no match
- if (negative)
- return next(input, mymatch);
- //else
- return false;
+ else {
+ if (negative) return mymatch;
+ return null;
}
}
diff --git a/libjava/classpath/gnu/regexp/RETokenLookBehind.java b/libjava/classpath/gnu/regexp/RETokenLookBehind.java
index a6c1b34..8311d1a 100644
--- a/libjava/classpath/gnu/regexp/RETokenLookBehind.java
+++ b/libjava/classpath/gnu/regexp/RETokenLookBehind.java
@@ -55,35 +55,37 @@ final class RETokenLookBehind extends REToken
return 0;
}
- boolean match(CharIndexed input, REMatch mymatch)
+ REMatch matchThis(CharIndexed input, REMatch mymatch)
{
int max = re.getMaximumLength();
CharIndexed behind = input.lookBehind(mymatch.index, max);
REMatch trymatch = (REMatch)mymatch.clone();
REMatch trymatch1 = (REMatch)mymatch.clone();
REMatch newMatch = null;
- int curIndex = trymatch.index + behind.length() - input.length();
+ int diff = behind.length() - input.length();
+ int curIndex = trymatch.index + diff;
trymatch.index = 0;
+ trymatch.offset = 0;
RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
REToken re1 = (REToken) re.clone();
re1.chain(stopper);
if (re1.match(behind, trymatch)) {
- if (negative) return false;
- if (next(input, trymatch1))
- newMatch = trymatch1;
+ if (negative) return null;
+ for (int i = 0; i < trymatch.start.length; i++) {
+ if (trymatch.start[i] != -1 && trymatch.end[i] != -1) {
+ trymatch.start[i] -= diff;
+ if (trymatch.start[i] < 0) trymatch.start[i] -= 1;
+ trymatch.end[i] -= diff;
+ if (trymatch.end[i] < 0) trymatch.end[i] -= 1;
+ }
+ }
+ trymatch.index = mymatch.index;
+ trymatch.offset = mymatch.offset;
+ return trymatch;
}
-
- if (newMatch != null) {
- if (negative) return false;
- //else
- mymatch.assignFrom(newMatch);
- return true;
- }
- else { // no match
- if (negative)
- return next(input, mymatch);
- //else
- return false;
+ else {
+ if (negative) return mymatch;
+ return null;
}
}
@@ -105,8 +107,8 @@ final class RETokenLookBehind extends REToken
this.index = index;
}
- boolean match(CharIndexed input, REMatch mymatch) {
- return index == mymatch.index;
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ return (index == mymatch.index ? mymatch : null);
}
void dump(StringBuffer os) {}
diff --git a/libjava/classpath/gnu/regexp/RETokenNamedProperty.java b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java
index 13c1e41..6147e87 100644
--- a/libjava/classpath/gnu/regexp/RETokenNamedProperty.java
+++ b/libjava/classpath/gnu/regexp/RETokenNamedProperty.java
@@ -105,24 +105,43 @@ final class RETokenNamedProperty extends REToken {
return 1;
}
- boolean match(CharIndexed input, REMatch mymatch) {
- char ch = input.charAt(mymatch.index);
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ boolean retval = matchOneChar(ch);
+ if (retval) {
+ ++mymatch.index;
+ return mymatch;
+ }
+ return null;
+ }
+
+ private boolean matchOneChar(char ch) {
if (ch == CharIndexed.OUT_OF_BOUNDS)
return false;
boolean retval = handler.includes(ch);
if (insens) {
retval = retval ||
- handler.includes(Character.toUpperCase(ch)) ||
- handler.includes(Character.toLowerCase(ch));
+ handler.includes(toUpperCase(ch, unicodeAware)) ||
+ handler.includes(toLowerCase(ch, unicodeAware));
}
if (negate) retval = !retval;
- if (retval) {
- ++mymatch.index;
- return next(input, mymatch);
+ return retval;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ int index = mymatch.index;
+ int numRepeats = 0;
+ while (true) {
+ if (numRepeats >= max) break;
+ char ch = input.charAt(index++);
+ if (! matchOneChar(ch)) break;
+ numRepeats++;
}
- else return false;
+ return numRepeats;
}
void dump(StringBuffer os) {
@@ -246,9 +265,6 @@ final class RETokenNamedProperty extends REToken {
private static class POSIXHandler extends Handler {
private RETokenPOSIX retoken;
- private REMatch mymatch = new REMatch(0,0,0);
- private char[] chars = new char[1];
- private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0);
public POSIXHandler(String name) {
int posixId = RETokenPOSIX.intValue(name.toLowerCase());
if (posixId != -1)
@@ -257,9 +273,7 @@ final class RETokenNamedProperty extends REToken {
throw new RuntimeException("Unknown posix ID: " + name);
}
public boolean includes(char c) {
- chars[0] = c;
- mymatch.index = 0;
- return retoken.match(ca, mymatch);
+ return retoken.matchOneChar(c);
}
}
diff --git a/libjava/classpath/gnu/regexp/RETokenOneOf.java b/libjava/classpath/gnu/regexp/RETokenOneOf.java
index 260bc4b..f747d59 100644
--- a/libjava/classpath/gnu/regexp/RETokenOneOf.java
+++ b/libjava/classpath/gnu/regexp/RETokenOneOf.java
@@ -42,6 +42,9 @@ import java.util.Stack;
final class RETokenOneOf extends REToken {
private Vector options;
private boolean negative;
+ // True if this RETokenOneOf is supposed to match only one character,
+ // which is typically the case of a character class expression.
+ private boolean matchesOneChar;
private Vector addition;
// This Vector addition is used to store nested character classes.
@@ -76,12 +79,14 @@ final class RETokenOneOf extends REToken {
this.negative = negative;
for (int i = 0; i < optionsStr.length(); i++)
options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
+ matchesOneChar = true;
}
RETokenOneOf(int subIndex, Vector options, boolean negative) {
super(subIndex);
this.options = options;
this.negative = negative;
+ matchesOneChar = negative;
}
RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
@@ -89,12 +94,11 @@ final class RETokenOneOf extends REToken {
this.options = options;
this.addition = addition;
this.negative = negative;
+ matchesOneChar = (negative || addition != null);
}
int getMinimumLength() {
- // (negative || addition != null) occurs when this token originates from
- // character class expression.
- if (negative || addition != null) return 1;
+ if (matchesOneChar) return 1;
int min = Integer.MAX_VALUE;
int x;
for (int i=0; i < options.size(); i++) {
@@ -105,9 +109,7 @@ final class RETokenOneOf extends REToken {
}
int getMaximumLength() {
- // (negative || addition != null) occurs when this token originates from
- // character class expression.
- if (negative || addition != null) return 1;
+ if (matchesOneChar) return 1;
int max = 0;
int x;
for (int i=0; i < options.size(); i++) {
@@ -118,6 +120,11 @@ final class RETokenOneOf extends REToken {
}
boolean match(CharIndexed input, REMatch mymatch) {
+ if (matchesOneChar) return matchOneChar(input, mymatch);
+ else return matchOneRE(input, mymatch);
+ }
+
+ boolean matchOneChar(CharIndexed input, REMatch mymatch) {
REMatch tryMatch;
boolean tryOnly;
if (addition == null) {
@@ -187,40 +194,80 @@ final class RETokenOneOf extends REToken {
}
private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
- boolean stopMatchingIfSatisfied =
- (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
- REMatch.REMatchList newMatch = new REMatch.REMatchList();
REToken tk;
for (int i=0; i < options.size(); i++) {
- // In order that the backtracking can work,
- // each option must be chained to the next token.
- // But the chain method has some side effect, so
- // we use clones.
- tk = (REToken)((REToken) options.elementAt(i)).clone();
- if (! tryOnly) {
- tk.chain(this.next);
- tk.setUncle(this.uncle);
- tk.subIndex = this.subIndex;
- }
+ tk = (REToken) options.elementAt(i);
REMatch tryMatch = (REMatch) mymatch.clone();
if (tk.match(input, tryMatch)) { // match was successful
if (tryOnly) return true;
- newMatch.addTail(tryMatch);
- if (stopMatchingIfSatisfied) break;
- } // is a match
- } // try next option
- if (tryOnly) return false;
-
- if (newMatch.head != null) {
- // set contents of mymatch equal to newMatch
+ if (next(input, tryMatch)) {
+ mymatch.assignFrom(tryMatch);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
- // try each one that matched
- mymatch.assignFrom(newMatch.head);
+ private boolean matchOneRE(CharIndexed input, REMatch mymatch) {
+ REMatch newMatch = findMatch(input, mymatch);
+ if (newMatch != null) {
+ mymatch.assignFrom(newMatch);
return true;
- } else {
- return false;
}
- }
+ return false;
+ }
+
+ REMatch findMatch(CharIndexed input, REMatch mymatch) {
+ if (matchesOneChar) return super.findMatch(input, mymatch);
+ return findMatch(input, mymatch, 0);
+ }
+
+ REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
+ return findMatch(input, mymatch, ((Integer)param).intValue());
+ }
+
+ private REMatch findMatch(CharIndexed input, REMatch mymatch, int optionIndex) {
+ for (int i = optionIndex; i < options.size(); i++) {
+ REToken tk = (REToken) options.elementAt(i);
+ tk = (REToken) tk.clone();
+ tk.chain(getNext());
+ REMatch tryMatch = (REMatch) mymatch.clone();
+ if (tryMatch.backtrackStack == null) {
+ tryMatch.backtrackStack = new BacktrackStack();
+ }
+ boolean stackPushed = false;
+ if (i + 1 < options.size()) {
+ tryMatch.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, new Integer(i + 1)));
+ stackPushed = true;
+ }
+ boolean b = tk.match(input, tryMatch);
+ if (b) {
+ return tryMatch;
+ }
+ if (stackPushed) tryMatch.backtrackStack.pop();
+ }
+ return null;
+ }
+
+ boolean returnsFixedLengthMatches() { return matchesOneChar; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ if (!matchesOneChar)
+ return super.findFixedLengthMatches(input, mymatch, max);
+ int numRepeats = 0;
+ REMatch m = (REMatch) mymatch.clone();
+ REToken tk = (REToken) this.clone();
+ tk.chain(null);
+ while (true) {
+ if (numRepeats >= max) break;
+ m = tk.findMatch(input, m);
+ if (m == null) break;
+ numRepeats++;
+ }
+ return numRepeats;
+ }
void dump(StringBuffer os) {
os.append(negative ? "[^" : "(?:");
diff --git a/libjava/classpath/gnu/regexp/RETokenPOSIX.java b/libjava/classpath/gnu/regexp/RETokenPOSIX.java
index 4182c6f..dbea98a 100644
--- a/libjava/classpath/gnu/regexp/RETokenPOSIX.java
+++ b/libjava/classpath/gnu/regexp/RETokenPOSIX.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenPOSIX.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -85,8 +85,17 @@ final class RETokenPOSIX extends REToken {
return 1;
}
- boolean match(CharIndexed input, REMatch mymatch) {
- char ch = input.charAt(mymatch.index);
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
+ char ch = input.charAt(mymatch.index);
+ boolean retval = matchOneChar(ch);
+ if (retval) {
+ ++mymatch.index;
+ return mymatch;
+ }
+ return null;
+ }
+
+ boolean matchOneChar(char ch) {
if (ch == CharIndexed.OUT_OF_BOUNDS)
return false;
@@ -134,11 +143,21 @@ final class RETokenPOSIX extends REToken {
}
if (negated) retval = !retval;
- if (retval) {
- ++mymatch.index;
- return next(input, mymatch);
- }
- else return false;
+ return retval;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ int index = mymatch.index;
+ int numRepeats = 0;
+ while (true) {
+ if (numRepeats >= max) break;
+ char ch = input.charAt(index++);
+ if (! matchOneChar(ch)) break;
+ numRepeats++;
+ }
+ return numRepeats;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenRange.java b/libjava/classpath/gnu/regexp/RETokenRange.java
index 8a1ac86..9d3da32 100644
--- a/libjava/classpath/gnu/regexp/RETokenRange.java
+++ b/libjava/classpath/gnu/regexp/RETokenRange.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenRange.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -56,23 +56,41 @@ final class RETokenRange extends REToken {
return 1;
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
char c = input.charAt(mymatch.index);
+ if (matchOneChar(c)) {
+ ++mymatch.index;
+ return mymatch;
+ }
+ return null;
+ }
+
+ boolean matchOneChar(char c) {
if (c == CharIndexed.OUT_OF_BOUNDS) return false;
boolean matches = (c >= lo) && (c <= hi);
if (! matches && insens) {
- char c1 = Character.toLowerCase(c);
+ char c1 = toLowerCase(c, unicodeAware);
matches = (c1 >= lo) && (c1 <= hi);
if (!matches) {
- c1 = Character.toUpperCase(c);
+ c1 = toUpperCase(c, unicodeAware);
matches = (c1 >= lo) && (c1 <= hi);
}
}
- if (matches) {
- ++mymatch.index;
- return next(input, mymatch);
+ return matches;
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ int index = mymatch.index;
+ int numRepeats = 0;
+ while (true) {
+ if (numRepeats >= max) break;
+ char ch = input.charAt(index++);
+ if (! matchOneChar(ch)) break;
+ numRepeats++;
}
- return false;
+ return numRepeats;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java
index 2d019c5..1bad4c7 100644
--- a/libjava/classpath/gnu/regexp/RETokenRepeated.java
+++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java
@@ -38,20 +38,27 @@ exception statement from your version. */
package gnu.regexp;
-import java.util.Vector;
-import java.util.Arrays;
+// import java.util.Vector;
+// import java.util.Stack;
final class RETokenRepeated extends REToken {
private REToken token;
private int min,max;
private boolean stingy;
private boolean possessive;
+ private int tokenFixedLength;
RETokenRepeated(int subIndex, REToken token, int min, int max) {
super(subIndex);
this.token = token;
this.min = min;
this.max = max;
+ if (token.returnsFixedLengthMatches()) {
+ tokenFixedLength = token.getMaximumLength();
+ }
+ else {
+ tokenFixedLength = -1;
+ }
}
/** Sets the minimal matching mode to true. */
@@ -90,69 +97,235 @@ final class RETokenRepeated extends REToken {
return (max * tmax);
}
- private static REMatch findDoables(REToken tk,
- CharIndexed input, REMatch mymatch) {
-
- REMatch.REMatchList doables = new REMatch.REMatchList();
-
- // try next repeat at all possible positions
- for (REMatch current = mymatch;
- current != null; current = current.next) {
- REMatch recurrent = (REMatch) current.clone();
- int origin = recurrent.index;
- tk = (REToken) tk.clone();
- tk.next = tk.uncle = null;
- recurrent.matchFlags |= REMatch.MF_FIND_ALL;
- if (tk.match(input, recurrent)) {
- for (REMatch m = recurrent; m != null; m = m.next) {
- m.matchFlags &= ~REMatch.MF_FIND_ALL;
+ // The comment "MUST make a clone" below means that some tests
+ // failed without doing clone(),
+
+ private static class DoablesFinder {
+ private REToken tk;
+ private CharIndexed input;
+ private REMatch rematch;
+ private boolean findFirst;
+
+ private DoablesFinder(REToken tk, CharIndexed input, REMatch mymatch) {
+ this.tk = tk;
+ this.input = input;
+ this.rematch = (REMatch) mymatch.clone(); // MUST make a clone
+ this.rematch.backtrackStack = new BacktrackStack();
+ findFirst = true;
+ }
+
+ private REMatch find() {
+ int origin = rematch.index;
+ REMatch rem;
+ if (findFirst) {
+ rem = tk.findMatch(input, rematch);
+ findFirst = false;
+ }
+ else {
+ while (true) {
+ if (rematch.backtrackStack.empty()) {
+ rem = null;
+ break;
}
- if (recurrent.index == origin) recurrent.empty = true;
- // add all items in current to doables array
- doables.addTail(recurrent);
+ BacktrackStack.Backtrack bt = rematch.backtrackStack.pop();
+ rem = bt.token.backtrack(bt.input, bt.match, bt.param);
+ if (rem != null) break;
}
}
- return doables.head;
+ if (rem == null) return null;
+ if (rem.index == origin) rem.empty = true;
+ rematch = rem;
+ return (REMatch) rem.clone(); // MUST make a clone.
+ }
+
+ boolean noMore() {
+ return rematch.backtrackStack.empty();
+ }
}
- // We do need to save every possible point, but the number of clone()
- // invocations here is really a killer for performance on non-stingy
- // repeat operators. I'm open to suggestions...
+ REMatch findMatch(CharIndexed input, REMatch mymatch) {
+ if (tokenFixedLength >= 0) return findMatchFixedLength(input, mymatch);
+ BacktrackStack stack = new BacktrackStack();
+ stack.push(new StackedInfo(input, 0, mymatch, null, null));
+ return findMatch(stack);
+ }
- // Hypothetical question: can you have a RE that matches 1 times,
- // 3 times, 5 times, but not 2 times or 4 times? Does having
- // the subexpression back-reference operator allow that?
+ REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
+ if (tokenFixedLength >= 0) return backtrackFixedLength(input, mymatch, param);
+ return findMatch((BacktrackStack)param);
+ }
- boolean match(CharIndexed input, REMatch mymatch) {
+ private static class StackedInfo extends BacktrackStack.Backtrack {
+ int numRepeats;
+ int[] visited;
+ DoablesFinder finder;
+ StackedInfo(CharIndexed input, int numRepeats, REMatch match,
+ int[] visited, DoablesFinder finder) {
+ super(null, input, match, null);
+ this.numRepeats = numRepeats;
+ this.visited = visited;
+ this.finder = finder;
+ }
+ }
- boolean stopMatchingIfSatisfied =
- (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
+ private REMatch findMatch(BacktrackStack stack) {
+ // Avoid using recursive calls.
+ MAIN_LOOP:
+ while (true) {
+
+ if (stack.empty()) return null;
+ StackedInfo si = (StackedInfo)(stack.peek());
+ CharIndexed input = si.input;
+ int numRepeats = si.numRepeats;
+ REMatch mymatch = si.match;
+ int[] visited = si.visited;
+ DoablesFinder finder = si.finder;
+
+ if (mymatch.backtrackStack == null)
+ mymatch.backtrackStack = new BacktrackStack();
+
+ if (numRepeats >= max) {
+ stack.pop();
+ REMatch m1 = matchRest(input, mymatch);
+ if (m1 != null) {
+ if (! stack.empty()) {
+ m1.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, stack));
+ }
+ return m1;
+ }
+ if (stingy) {
+ continue MAIN_LOOP;
+ }
+ return null;
+ }
- REMatch newMatch = matchMinimum(input, mymatch);
- if (newMatch == null) return false;
+ if (finder == null) {
+ finder = new DoablesFinder(token, input, mymatch);
+ si.finder = finder;
+ }
- // Array of positions we have already visited
- int[] visited = initVisited();
- for (REMatch m = newMatch; m != null; m = m.next) {
- visited = addVisited(m.index, visited);
+ if (numRepeats < min) {
+ while (true) {
+ REMatch doable = finder.find();
+ if (doable == null) {
+ if (stack.empty()) return null;
+ stack.pop();
+ continue MAIN_LOOP;
+ }
+ if (finder.noMore()) stack.pop();
+ int newNumRepeats = (doable.empty ? min : numRepeats + 1);
+ stack.push(new StackedInfo(
+ input, newNumRepeats, doable, visited, null));
+ continue MAIN_LOOP;
+ }
}
- int max1 = decreaseMax(max, min);
+ if (visited == null) visited = initVisited();
- newMatch = _match(input, newMatch, max1,
- stopMatchingIfSatisfied, visited);
- if (newMatch != null) {
- mymatch.assignFrom(newMatch);
- return true;
+ if (stingy) {
+ REMatch nextMatch = finder.find();
+ if (nextMatch != null && !nextMatch.empty) {
+ stack.push(new StackedInfo(
+ input, numRepeats + 1, nextMatch, visited, null));
+ }
+ else {
+ stack.pop();
+ }
+ REMatch m1 = matchRest(input, mymatch);
+ if (m1 != null) {
+ if (!stack.empty()) {
+ m1.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, stack));
+ }
+ return m1;
+ }
+ else {
+ continue MAIN_LOOP;
+ }
}
- return false;
- }
- private static int decreaseMax(int m, int n) {
- if (m == Integer.MAX_VALUE) return m;
- return m - n;
+ visited = addVisited(mymatch.index, visited);
+
+ DO_THIS:
+ do {
+
+ boolean emptyMatchFound = false;
+
+ DO_ONE_DOABLE:
+ while (true) {
+
+ REMatch doable = finder.find();
+ if (doable == null) {
+ break DO_THIS;
+ }
+ if (doable.empty) emptyMatchFound = true;
+
+ if (!emptyMatchFound) {
+ int n = doable.index;
+ if (! visitedContains(n, visited)) {
+ visited = addVisited(n, visited);
+ }
+ else {
+ continue DO_ONE_DOABLE;
+ }
+ stack.push(new StackedInfo(
+ input, numRepeats + 1, doable, visited, null));
+ REMatch m1 = findMatch(stack);
+ if (possessive) {
+ return m1;
+ }
+ if (m1 != null) {
+ m1.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, stack));
+ return m1;
+ }
+ }
+ else {
+ REMatch m1 = matchRest(input, doable);
+ if (possessive) {
+ return m1;
+ }
+ if (m1 != null) {
+ if (! stack.empty()) {
+ m1.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, stack));
+ }
+ return m1;
+ }
+ }
+
+ } // DO_ONE_DOABLE
+
+ } while (false); // DO_THIS only once;
+
+ if (!stack.empty()) {
+ stack.pop();
+ }
+ if (possessive) {
+ stack.clear();
+ }
+ REMatch m1 = matchRest(input, mymatch);
+ if (m1 != null) {
+ if (! stack.empty()) {
+ m1.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch, stack));
+ }
+ return m1;
+ }
+
+ } // MAIN_LOOP
}
+ boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch m1 = findMatch(input, mymatch);
+ if (m1 != null) {
+ mymatch.assignFrom(m1);
+ return true;
+ }
+ return false;
+ }
+
// Array visited is an array of character positions we have already
// visited. visited[0] is used to store the effective length of the
// array.
@@ -183,134 +356,55 @@ final class RETokenRepeated extends REToken {
return visited;
}
- private REMatch _match(CharIndexed input, REMatch mymatch,
- int max1, boolean stopMatchingIfSatisfied,
- int[] visited) {
-
- if (max1 == 0) {
- return matchRest(input, mymatch);
- }
- max1 = decreaseMax(max1, 1);
-
- REMatch.REMatchList allResults = new REMatch.REMatchList();
-
- // Depth-first search
-
- for (REMatch cur = mymatch; cur != null; cur = cur.next) {
-
- REMatch cur1 = (REMatch) cur.clone();
-
- if (stingy) {
- REMatch results = matchRest(input, cur1);
- if (results != null) {
- if (stopMatchingIfSatisfied) {
- return results;
- }
- allResults.addTail(results);
- }
- }
-
- DO_THIS:
- do {
-
- boolean emptyMatchFound = false;
- REMatch doables = findDoables(token, input, cur1);
- if (doables == null) break DO_THIS;
- if (doables.empty) emptyMatchFound = true;
-
- if (!emptyMatchFound) {
- REMatch.REMatchList list = new REMatch.REMatchList();
- for (REMatch m = doables; m != null; m = m.next) {
- REMatch m1 = (REMatch) m.clone();
- int n = m1.index;
- if (! visitedContains(n, visited)) {
- visited = addVisited(n, visited);
- list.addTail(m1);
- }
- }
- if (list.head == null) break DO_THIS;
- doables = list.head;
- }
-
- for (REMatch m = doables; m != null; m = m.next) {
- if (! emptyMatchFound) {
- REMatch m1 = _match(input, m, max1,
- stopMatchingIfSatisfied, visited);
- if (possessive) return m1;
- if (m1 != null) {
- if (stopMatchingIfSatisfied) {
- return m1;
- }
- allResults.addTail(m1);
- }
- }
- else {
- REMatch m1 = matchRest(input, m);
- if (m1 != null) {
- if (stopMatchingIfSatisfied) {
- return m1;
- }
- allResults.addTail(m1);
- }
- }
- }
-
- } while (false); // DO_THIS only once;
-
- // This point itself is a candidate.
- if (!stingy) {
- REMatch m2 = matchRest(input, cur1);
- if (m2 != null) {
- if (stopMatchingIfSatisfied) {
- return m2;
- }
- allResults.addTail(m2);
- }
- }
+ private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
+ if (next(input, newMatch)) {
+ return newMatch;
}
-
- return allResults.head;
+ return null;
}
- private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
- // Possible positions for the next repeat to match at
- REMatch newMatch = mymatch;
-
- // number of times we've matched so far
- int numRepeats = 0;
-
- while (numRepeats < min) {
- REMatch doables = findDoables(token, input, newMatch);
-
- // if none of the possibilities worked out,
- // it means that minimum number of repeats could not be found.
- if (doables == null) return null;
-
- // reassign where the next repeat can match
- newMatch = doables;
-
- // increment how many repeats we've successfully found
- ++numRepeats;
-
- if (newMatch.empty) break;
- }
- return newMatch;
+ private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch) {
+ if (mymatch.backtrackStack == null)
+ mymatch.backtrackStack = new BacktrackStack();
+ int numRepeats = token.findFixedLengthMatches(input, (REMatch)mymatch.clone(), max);
+ if (numRepeats == Integer.MAX_VALUE) numRepeats = min;
+ int count = numRepeats - min + 1;
+ if (count <= 0) return null;
+ int index = 0;
+ if (!stingy) index = mymatch.index + (tokenFixedLength * numRepeats);
+ else index = mymatch.index + (tokenFixedLength * min);
+ return findMatchFixedLength(input, mymatch, index, count);
}
- private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
- REMatch current, single;
- REMatch.REMatchList doneIndex = new REMatch.REMatchList();
- // Test all possible matches for this number of repeats
- for (current = newMatch; current != null; current = current.next) {
- // clone() separates a single match from the chain
- single = (REMatch) current.clone();
- if (next(input, single)) {
- // chain results to doneIndex
- doneIndex.addTail(single);
+ private REMatch backtrackFixedLength(CharIndexed input, REMatch mymatch,
+ Object param) {
+ int[] params = (int[])param;
+ int index = params[0];
+ int count = params[1];
+ return findMatchFixedLength(input, mymatch, index, count);
+ }
+
+ private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch,
+ int index, int count) {
+ REMatch tryMatch = (REMatch) mymatch.clone();
+ while (true) {
+ tryMatch.index = index;
+ REMatch m = matchRest(input, tryMatch);
+ count--;
+ if (stingy) index += tokenFixedLength;
+ else index -= tokenFixedLength;
+ if (possessive) return m;
+ if (m != null) {
+ if (count > 0) {
+ m.backtrackStack.push(new BacktrackStack.Backtrack(
+ this, input, mymatch,
+ new int[] {index, count}));
+ }
+ return m;
}
+ if (count <= 0) return null;
}
- return doneIndex.head;
- }
+ }
void dump(StringBuffer os) {
os.append("(?:");
diff --git a/libjava/classpath/gnu/regexp/RETokenStart.java b/libjava/classpath/gnu/regexp/RETokenStart.java
index 42e3c0b..b992bd6 100644
--- a/libjava/classpath/gnu/regexp/RETokenStart.java
+++ b/libjava/classpath/gnu/regexp/RETokenStart.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenStart.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -49,7 +49,7 @@ class RETokenStart extends REToken {
return 0;
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
// charAt(index-n) may be unknown on a Reader/InputStream. FIXME
// Match after a newline if in multiline mode
@@ -70,19 +70,26 @@ class RETokenStart extends REToken {
ch = input.charAt(mymatch.index - len + i);
} while (i < len);
- if (found) return next(input, mymatch);
+ if (found) return mymatch;
}
}
// Don't match at all if REG_NOTBOL is set.
- if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return false;
+ if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return null;
if ((mymatch.eflags & RE.REG_ANCHORINDEX) > 0)
return (mymatch.anchor == mymatch.offset) ?
- next(input, mymatch) : false;
+ mymatch : null;
else
return ((mymatch.index == 0) && (mymatch.offset == 0)) ?
- next(input, mymatch) : false;
+ mymatch : null;
+ }
+
+ boolean returnsFixedLengthmatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ if (matchThis(input, mymatch) != null) return max;
+ else return 0;
}
void dump(StringBuffer os) {
diff --git a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
index f86214b..1810339 100644
--- a/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
+++ b/libjava/classpath/gnu/regexp/RETokenWordBoundary.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenWordBoundary.java
- Copyright (C) 2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -58,7 +58,7 @@ final class RETokenWordBoundary extends REToken {
}
- boolean match(CharIndexed input, REMatch mymatch) {
+ REMatch matchThis(CharIndexed input, REMatch mymatch) {
// Word boundary means input[index-1] was a word character
// and input[index] is not, or input[index] is a word character
// and input[index-1] was not
@@ -94,7 +94,14 @@ final class RETokenWordBoundary extends REToken {
if (negated) doNext = !doNext;
- return (doNext ? next(input, mymatch) : false);
+ return (doNext ? mymatch : null);
+ }
+
+ boolean returnsFixedLengthMatches() { return true; }
+
+ int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+ if(matchThis(input, mymatch) != null) return max;
+ else return 0;
}
void dump(StringBuffer os) {