diff options
Diffstat (limited to 'libjava/classpath/gnu/java/util/regex')
25 files changed, 2371 insertions, 2371 deletions
diff --git a/libjava/classpath/gnu/java/util/regex/BacktrackStack.java b/libjava/classpath/gnu/java/util/regex/BacktrackStack.java index 6847261..b03fb87 100644 --- a/libjava/classpath/gnu/java/util/regex/BacktrackStack.java +++ b/libjava/classpath/gnu/java/util/regex/BacktrackStack.java @@ -50,16 +50,16 @@ final class BacktrackStack /** A set of data to be used for backtracking. */ static class Backtrack { - /** REToken to which to go back */ + /** REToken to which to go back */ REToken token; - /** CharIndexed on which matches are being searched for. */ + /** CharIndexed on which matches are being searched for. */ CharIndexed input; - /** REMatch to be used by the REToken token. */ + /** REMatch to be used by the REToken token. */ REMatch match; - /** Some parameter used by the token's backtrack method. */ + /** Some parameter used by the token's backtrack method. */ Object param; Backtrack (REToken token, CharIndexed input, REMatch match, - Object param) + Object param) { this.token = token; this.input = input; @@ -104,7 +104,7 @@ final class BacktrackStack { for (int i = 0; i < size; i++) { - stack[i] = null; + stack[i] = null; } size = 0; } @@ -113,10 +113,10 @@ final class BacktrackStack { if (size >= capacity) { - capacity += CAPACITY_INCREMENT; - Backtrack[]newStack = new Backtrack[capacity]; - System.arraycopy (stack, 0, newStack, 0, size); - stack = newStack; + capacity += CAPACITY_INCREMENT; + Backtrack[]newStack = new Backtrack[capacity]; + System.arraycopy (stack, 0, newStack, 0, size); + stack = newStack; } stack[size++] = bt; } diff --git a/libjava/classpath/gnu/java/util/regex/CharIndexed.java b/libjava/classpath/gnu/java/util/regex/CharIndexed.java index 070547d..de4b166 100644 --- a/libjava/classpath/gnu/java/util/regex/CharIndexed.java +++ b/libjava/classpath/gnu/java/util/regex/CharIndexed.java @@ -82,7 +82,7 @@ public interface CharIndexed * true if the new cursor position is valid or cursor position is at * the end of input. */ - boolean move1 (int index); // I cannot think of a better name for this. + boolean move1 (int index); // I cannot think of a better name for this. /** * Returns true if the most recent move() operation placed the cursor diff --git a/libjava/classpath/gnu/java/util/regex/CharIndexedInputStream.java b/libjava/classpath/gnu/java/util/regex/CharIndexedInputStream.java index e42710b..d634029 100644 --- a/libjava/classpath/gnu/java/util/regex/CharIndexedInputStream.java +++ b/libjava/classpath/gnu/java/util/regex/CharIndexedInputStream.java @@ -45,7 +45,7 @@ import java.io.InputStream; class CharIndexedInputStream implements CharIndexed { private static final int BUFFER_INCREMENT = 1024; - private static final int UNKNOWN = Integer.MAX_VALUE; // value for end + private static final int UNKNOWN = Integer.MAX_VALUE; // value for end private BufferedInputStream br; @@ -78,22 +78,22 @@ class CharIndexedInputStream implements CharIndexed { if (end == 1) return false; - end--; // closer to end + end--; // closer to end try { if (index != -1) - { - br.reset (); - } + { + br.reset (); + } int i = br.read (); br.mark (bufsize); if (i == -1) - { - end = 1; - cached = OUT_OF_BOUNDS; - return false; - } + { + end = 1; + cached = OUT_OF_BOUNDS; + return false; + } cached = (char) i; index = 1; } catch (IOException e) @@ -109,62 +109,62 @@ class CharIndexedInputStream implements CharIndexed { if (index == 0) { - return cached; + return cached; } else if (index >= end) { - return OUT_OF_BOUNDS; + return OUT_OF_BOUNDS; } else if (index == -1) { - return lookBehind[0]; + return lookBehind[0]; } else if (index == -2) { - return lookBehind[1]; + return lookBehind[1]; } else if (index < -2) { - return OUT_OF_BOUNDS; + return OUT_OF_BOUNDS; } else if (index >= bufsize) { - // Allocate more space in the buffer. - try - { - while (bufsize <= index) - bufsize += BUFFER_INCREMENT; - br.reset (); - br.mark (bufsize); - br.skip (index - 1); - } - catch (IOException e) - { - } + // Allocate more space in the buffer. + try + { + while (bufsize <= index) + bufsize += BUFFER_INCREMENT; + br.reset (); + br.mark (bufsize); + br.skip (index - 1); + } + catch (IOException e) + { + } } else if (this.index != index) { - try - { - br.reset (); - br.skip (index - 1); - } - catch (IOException e) - { - } + try + { + br.reset (); + br.skip (index - 1); + } + catch (IOException e) + { + } } char ch = OUT_OF_BOUNDS; try { int i = br.read (); - this.index = index + 1; // this.index is index of next pos relative to charAt(0) + this.index = index + 1; // this.index is index of next pos relative to charAt(0) if (i == -1) - { - // set flag that next should fail next time? - end = index; - return ch; - } + { + // set flag that next should fail next time? + end = index; + return ch; + } ch = (char) i; } catch (IOException ie) { diff --git a/libjava/classpath/gnu/java/util/regex/RE.java b/libjava/classpath/gnu/java/util/regex/RE.java index d064f7a..5e9974a 100644 --- a/libjava/classpath/gnu/java/util/regex/RE.java +++ b/libjava/classpath/gnu/java/util/regex/RE.java @@ -53,7 +53,7 @@ import java.util.ResourceBundle; * expressions. * <P> * A regular expression object (class RE) is compiled by constructing it - * from a String, StringBuffer or character array, with optional + * from a String, StringBuffer or character array, with optional * compilation flags (below) * and an optional syntax specification (see RESyntax; if not specified, * <code>RESyntax.RE_SYNTAX_PERL5</code> is used). @@ -110,7 +110,7 @@ import java.util.ResourceBundle; * <P> * You can optionally affect the execution environment by using a * combination of execution flags (constants listed below). - * + * * <P> * All operations on a regular expression are performed in a * thread-safe manner. @@ -281,7 +281,7 @@ public class RE extends REToken { if (messages == null) messages = - PropertyResourceBundle.getBundle (bundle, Locale.getDefault ()); + PropertyResourceBundle.getBundle (bundle, Locale.getDefault ()); return messages.getString (key); } @@ -335,7 +335,7 @@ public class RE extends REToken // internal constructor used for alternation private RE (REToken first, REToken last, int subs, int subIndex, - int minLength, int maxLength) + int minLength, int maxLength) { super (subIndex); firstToken = first; @@ -347,9 +347,9 @@ public class RE extends REToken } private RE (Object patternObj, int cflags, RESyntax syntax, int myIndex, - int nextSub) throws REException + int nextSub) throws REException { - super (myIndex); // Subexpression index of this token. + super (myIndex); // Subexpression index of this token. initialize (patternObj, cflags, syntax, myIndex, nextSub); } @@ -361,41 +361,41 @@ public class RE extends REToken // The meat of construction protected void initialize (Object patternObj, int cflags, RESyntax syntax, - int myIndex, int nextSub) throws REException + int myIndex, int nextSub) throws REException { char[] pattern; if (patternObj instanceof String) { - pattern = ((String) patternObj).toCharArray (); + pattern = ((String) patternObj).toCharArray (); } else if (patternObj instanceof char[]) { - pattern = (char[]) patternObj; + pattern = (char[]) patternObj; } else if (patternObj instanceof StringBuffer) { - pattern = new char[((StringBuffer) patternObj).length ()]; - ((StringBuffer) patternObj).getChars (0, pattern.length, pattern, 0); + pattern = new char[((StringBuffer) patternObj).length ()]; + ((StringBuffer) patternObj).getChars (0, pattern.length, pattern, 0); } else if (patternObj instanceof StringBuilder) { - pattern = new char[((StringBuilder) patternObj).length ()]; - ((StringBuilder) patternObj).getChars (0, pattern.length, pattern, 0); + pattern = new char[((StringBuilder) patternObj).length ()]; + ((StringBuilder) patternObj).getChars (0, pattern.length, pattern, 0); } else if (patternObj instanceof CPStringBuilder) { - pattern = new char[((CPStringBuilder) patternObj).length ()]; - ((CPStringBuilder) patternObj).getChars (0, pattern.length, pattern, - 0); + pattern = new char[((CPStringBuilder) patternObj).length ()]; + ((CPStringBuilder) patternObj).getChars (0, pattern.length, pattern, + 0); } else { - pattern = patternObj.toString ().toCharArray (); + pattern = patternObj.toString ().toCharArray (); } int pLength = pattern.length; - numSubs = 0; // Number of subexpressions in this token. + numSubs = 0; // Number of subexpressions in this token. ArrayList < REToken > branches = null; // linked list of tokens (sort of -- some closed loops can exist) @@ -429,956 +429,956 @@ public class RE extends REToken while (index < pLength) { - // read the next character unit (including backslash escapes) - index = getCharUnit (pattern, index, unit, quot); - - if (unit.bk) - if (unit.ch == 'Q') - { - quot = true; - continue; - } - else if (unit.ch == 'E') - { - quot = false; - continue; - } - if (quot) - unit.bk = false; - - if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) - { - if (Character.isWhitespace (unit.ch)) - { - continue; - } - if (unit.ch == '#') - { - for (int i = index; i < pLength; i++) - { - if (pattern[i] == '\n') - { - index = i + 1; - continue; - } - else if (pattern[i] == '\r') - { - if (i + 1 < pLength && pattern[i + 1] == '\n') - { - index = i + 2; - } - else - { - index = i + 1; - } - continue; - } - } - index = pLength; - continue; - } - } - - // ALTERNATION OPERATOR - // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT) - // not available if RE_LIMITED_OPS is set - - // TODO: the '\n' literal here should be a test against REToken.newline, - // which unfortunately may be more than a single character. - if (((unit.ch == '|' - && (syntax.get (RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot))) - || (syntax.get (RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') - && !(unit.bk || quot))) - && !syntax.get (RESyntax.RE_LIMITED_OPS)) - { - // make everything up to here be a branch. create vector if nec. - addToken (currentToken); - RE theBranch = - new RE (firstToken, lastToken, numSubs, subIndex, minimumLength, - maximumLength); - minimumLength = 0; - maximumLength = 0; - if (branches == null) - { - branches = new ArrayList < REToken > (); - } - branches.add (theBranch); - firstToken = lastToken = currentToken = null; - } - - // INTERVAL OPERATOR: - // {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES) - // \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES) - // - // OPEN QUESTION: - // what is proper interpretation of '{' at start of string? - // - // This method used to check "repeat.empty.token" to avoid such regexp - // as "(a*){2,}", but now "repeat.empty.token" is allowed. - - else if ((unit.ch == '{') && syntax.get (RESyntax.RE_INTERVALS) - && (syntax. - get (RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) - { - int newIndex = getMinMax (pattern, index, minMax, syntax); - if (newIndex > index) - { - if (minMax.first > minMax.second) - throw new - REException (getLocalizedMessage ("interval.order"), - REException.REG_BADRPT, newIndex); - if (currentToken == null) - throw new - REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, newIndex); - if (currentToken instanceof RETokenRepeated) - throw new - REException (getLocalizedMessage ("repeat.chained"), - REException.REG_BADRPT, newIndex); - if (currentToken instanceof RETokenWordBoundary - || currentToken instanceof RETokenWordBoundary) - throw new - REException (getLocalizedMessage ("repeat.assertion"), - REException.REG_BADRPT, newIndex); - index = newIndex; - currentToken = - setRepeated (currentToken, minMax.first, minMax.second, - index); - } - else - { - addToken (currentToken); - currentToken = new RETokenChar (subIndex, unit.ch, insens); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - } - - // LIST OPERATOR: - // [...] | [^...] - - else if ((unit.ch == '[') && !(unit.bk || quot)) - { - // Create a new RETokenOneOf - ParseCharClassResult result = - parseCharClass (subIndex, pattern, index, pLength, cflags, - syntax, 0); - addToken (currentToken); - currentToken = result.token; - index = result.index; - } - - // SUBEXPRESSIONS - // (...) | \(...\) depending on RE_NO_BK_PARENS - - else if ((unit.ch == '(') - && (syntax. - get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) - { - boolean pure = false; - boolean comment = false; - boolean lookAhead = false; - boolean lookBehind = false; - boolean independent = false; - boolean negativelh = false; - boolean negativelb = false; - if ((index + 1 < pLength) && (pattern[index] == '?')) - { - switch (pattern[index + 1]) - { - case '!': - if (syntax.get (RESyntax.RE_LOOKAHEAD)) - { - pure = true; - negativelh = true; - lookAhead = true; - index += 2; - } - break; - case '=': - if (syntax.get (RESyntax.RE_LOOKAHEAD)) - { - pure = true; - lookAhead = true; - index += 2; - } - break; - case '<': - // We assume that if the syntax supports look-ahead, - // it also supports look-behind. - if (syntax.get (RESyntax.RE_LOOKAHEAD)) - { - index++; - switch (pattern[index + 1]) - { - case '!': - pure = true; - negativelb = true; - lookBehind = true; - index += 2; - break; - case '=': - pure = true; - lookBehind = true; - index += 2; - } - } - break; - case '>': - // We assume that if the syntax supports look-ahead, - // it also supports independent group. - if (syntax.get (RESyntax.RE_LOOKAHEAD)) - { - pure = true; - independent = true; - index += 2; - } - break; - case 'i': - case 'd': - case 'm': - case 's': - case 'u': - case 'x': - case '-': - if (!syntax.get (RESyntax.RE_EMBEDDED_FLAGS)) - break; - // Set or reset syntax flags. - int flagIndex = index + 1; - int endFlag = -1; - RESyntax newSyntax = new RESyntax (syntax); - int newCflags = cflags; - boolean negate = false; - while (flagIndex < pLength && endFlag < 0) - { - switch (pattern[flagIndex]) - { - case 'i': - if (negate) - newCflags &= ~REG_ICASE; - else - newCflags |= REG_ICASE; - flagIndex++; - break; - case 'd': - if (negate) - newSyntax.setLineSeparator (RESyntax. - DEFAULT_LINE_SEPARATOR); - else - newSyntax.setLineSeparator ("\n"); - flagIndex++; - break; - case 'm': - if (negate) - newCflags &= ~REG_MULTILINE; - else - newCflags |= REG_MULTILINE; - flagIndex++; - break; - case 's': - if (negate) - newCflags &= ~REG_DOT_NEWLINE; - else - newCflags |= REG_DOT_NEWLINE; - flagIndex++; - break; - case 'u': - if (negate) - newCflags |= REG_ICASE_USASCII; - else - newCflags &= ~REG_ICASE_USASCII; - flagIndex++; - break; - case 'x': - if (negate) - newCflags &= ~REG_X_COMMENTS; - else - newCflags |= REG_X_COMMENTS; - flagIndex++; - break; - case '-': - negate = true; - flagIndex++; - break; - case ':': - case ')': - endFlag = pattern[flagIndex]; - break; - default: - throw new - REException (getLocalizedMessage - ("repeat.no.token"), - REException.REG_BADRPT, index); - } - } - if (endFlag == ')') - { - syntax = newSyntax; - cflags = newCflags; - insens = ((cflags & REG_ICASE) > 0); - insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); - // This can be treated as though it were a comment. - comment = true; - index = flagIndex - 1; - break; - } - if (endFlag == ':') - { - savedSyntax = syntax; - savedCflags = cflags; - flagsSaved = true; - syntax = newSyntax; - cflags = newCflags; - insens = ((cflags & REG_ICASE) > 0); - insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); - index = flagIndex - 1; - // Fall through to the next case. - } - else - { - throw new - REException (getLocalizedMessage - ("unmatched.paren"), - REException.REG_ESUBREG, index); - } - case ':': - if (syntax.get (RESyntax.RE_PURE_GROUPING)) - { - pure = true; - index += 2; - } - break; - case '#': - if (syntax.get (RESyntax.RE_COMMENTS)) - { - comment = true; - } - break; - default: - throw new - REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, index); - } - } - - if (index >= pLength) - { - throw new - REException (getLocalizedMessage ("unmatched.paren"), - REException.REG_ESUBREG, index); - } - - // find end of subexpression - int endIndex = index; - int nextIndex = index; - int nested = 0; - - while (((nextIndex = - getCharUnit (pattern, endIndex, unit, false)) > 0) - && !(nested == 0 && (unit.ch == ')') - && (syntax. - get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk - || quot)))) - { - if ((endIndex = nextIndex) >= pLength) - throw new - REException (getLocalizedMessage ("subexpr.no.end"), - REException.REG_ESUBREG, nextIndex); - else - if ((unit.ch == '[') && !(unit.bk || quot)) - { - // I hate to do something similar to the LIST OPERATOR matters - // above, but ... - int listIndex = nextIndex; - if (listIndex < pLength && pattern[listIndex] == '^') - listIndex++; - if (listIndex < pLength && pattern[listIndex] == ']') - listIndex++; - int listEndIndex = -1; - int listNest = 0; - while (listIndex < pLength && listEndIndex < 0) - { - switch (pattern[listIndex++]) - { - case '\\': - listIndex++; - break; - case '[': - // Sun's API document says that regexp like "[a-d[m-p]]" - // is legal. Even something like "[[[^]]]]" is accepted. - listNest++; - if (listIndex < pLength - && pattern[listIndex] == '^') - listIndex++; - if (listIndex < pLength - && pattern[listIndex] == ']') - listIndex++; - break; - case ']': - if (listNest == 0) - listEndIndex = listIndex; - listNest--; - break; - } - } - if (listEndIndex >= 0) - { - nextIndex = listEndIndex; - if ((endIndex = nextIndex) >= pLength) - throw new - REException (getLocalizedMessage ("subexpr.no.end"), - REException.REG_ESUBREG, nextIndex); - else - continue; - } - throw new - REException (getLocalizedMessage ("subexpr.no.end"), - REException.REG_ESUBREG, nextIndex); - } - else if (unit.ch == '(' - && (syntax. - get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk - || quot))) - nested++; - else if (unit.ch == ')' - && (syntax. - get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk - || quot))) - nested--; - } - - // endIndex is now position at a ')','\)' - // nextIndex is end of string or position after ')' or '\)' - - if (comment) - index = nextIndex; - else - { // not a comment - // create RE subexpression as token. - addToken (currentToken); - if (!pure) - { - numSubs++; - } - - int useIndex = (pure || lookAhead || lookBehind - || independent) ? 0 : nextSub + numSubs; - currentToken = - new RE (String.valueOf (pattern, index, endIndex - index). - toCharArray (), cflags, syntax, useIndex, - nextSub + numSubs); - numSubs += ((RE) currentToken).getNumSubs (); - - if (lookAhead) - { - currentToken = - new RETokenLookAhead (currentToken, negativelh); - } - else if (lookBehind) - { - currentToken = - new RETokenLookBehind (currentToken, negativelb); - } - else if (independent) - { - currentToken = new RETokenIndependent (currentToken); - } - - index = nextIndex; - if (flagsSaved) - { - syntax = savedSyntax; - cflags = savedCflags; - insens = ((cflags & REG_ICASE) > 0); - insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); - flagsSaved = false; - } - } // not a comment - } // subexpression - - // UNMATCHED RIGHT PAREN - // ) or \) throw exception if - // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) - else if (!syntax.get (RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) - && ((unit.ch == ')') - && (syntax. - get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) - { - throw new REException (getLocalizedMessage ("unmatched.paren"), - REException.REG_EPAREN, index); - } - - // START OF LINE OPERATOR - // ^ - - else if ((unit.ch == '^') && !(unit.bk || quot)) - { - addToken (currentToken); - currentToken = null; - RETokenStart token = null; - if ((cflags & REG_MULTILINE) > 0) - { - String sep = syntax.getLineSeparator (); - if (sep == null) - { - token = new RETokenStart (subIndex, null, true); - } - else - { - token = new RETokenStart (subIndex, sep); - } - } - else - { - token = new RETokenStart (subIndex, null); - } - addToken (token); - } - - // END OF LINE OPERATOR - // $ - - else if ((unit.ch == '$') && !(unit.bk || quot)) - { - addToken (currentToken); - currentToken = null; - RETokenEnd token = null; - if ((cflags & REG_MULTILINE) > 0) - { - String sep = syntax.getLineSeparator (); - if (sep == null) - { - token = new RETokenEnd (subIndex, null, true); - } - else - { - token = new RETokenEnd (subIndex, sep); - } - } - else - { - token = new RETokenEnd (subIndex, null); - } - addToken (token); - } - - // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null) - // . - - else if ((unit.ch == '.') && !(unit.bk || quot)) - { - addToken (currentToken); - currentToken = - new RETokenAny (subIndex, syntax.get (RESyntax.RE_DOT_NEWLINE) - || ((cflags & REG_DOT_NEWLINE) > 0), - syntax.get (RESyntax.RE_DOT_NOT_NULL)); - } - - // ZERO-OR-MORE REPEAT OPERATOR - // * - // - // This method used to check "repeat.empty.token" to avoid such regexp - // as "(a*)*", but now "repeat.empty.token" is allowed. - - else if ((unit.ch == '*') && !(unit.bk || quot)) - { - if (currentToken == null) - throw new REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, index); - if (currentToken instanceof RETokenRepeated) - throw new REException (getLocalizedMessage ("repeat.chained"), - REException.REG_BADRPT, index); - if (currentToken instanceof RETokenWordBoundary - || currentToken instanceof RETokenWordBoundary) - throw new REException (getLocalizedMessage ("repeat.assertion"), - REException.REG_BADRPT, index); - currentToken = - setRepeated (currentToken, 0, Integer.MAX_VALUE, index); - } - - // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR - // + | \+ depending on RE_BK_PLUS_QM - // not available if RE_LIMITED_OPS is set - // - // This method used to check "repeat.empty.token" to avoid such regexp - // as "(a*)+", but now "repeat.empty.token" is allowed. - - else if ((unit.ch == '+') && !syntax.get (RESyntax.RE_LIMITED_OPS) - && (!syntax. - get (RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) - { - if (currentToken == null) - throw new REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, index); - - // Check for possessive matching on RETokenRepeated - if (currentToken instanceof RETokenRepeated) - { - RETokenRepeated tokenRep = (RETokenRepeated) currentToken; - if (syntax.get (RESyntax.RE_POSSESSIVE_OPS) - && !tokenRep.isPossessive () && !tokenRep.isStingy ()) - tokenRep.makePossessive (); - else - throw new - REException (getLocalizedMessage ("repeat.chained"), - REException.REG_BADRPT, index); - - } - else if (currentToken instanceof RETokenWordBoundary - || currentToken instanceof RETokenWordBoundary) - throw new REException (getLocalizedMessage ("repeat.assertion"), - REException.REG_BADRPT, index); - else - currentToken = - setRepeated (currentToken, 1, Integer.MAX_VALUE, index); - } - - // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR - // ? | \? depending on RE_BK_PLUS_QM - // not available if RE_LIMITED_OPS is set - // stingy matching if RE_STINGY_OPS is set and it follows a quantifier - - else if ((unit.ch == '?') && !syntax.get (RESyntax.RE_LIMITED_OPS) - && (!syntax. - get (RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) - { - if (currentToken == null) - throw new REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, index); - - // Check for stingy matching on RETokenRepeated - if (currentToken instanceof RETokenRepeated) - { - RETokenRepeated tokenRep = (RETokenRepeated) currentToken; - if (syntax.get (RESyntax.RE_STINGY_OPS) - && !tokenRep.isStingy () && !tokenRep.isPossessive ()) - tokenRep.makeStingy (); - else - throw new - REException (getLocalizedMessage ("repeat.chained"), - REException.REG_BADRPT, index); - } - else if (currentToken instanceof RETokenWordBoundary - || currentToken instanceof RETokenWordBoundary) - throw new REException (getLocalizedMessage ("repeat.assertion"), - REException.REG_BADRPT, index); - else - currentToken = setRepeated (currentToken, 0, 1, index); - } - - // OCTAL CHARACTER - // \0377 - - else if (unit.bk && (unit.ch == '0') - && syntax.get (RESyntax.RE_OCTAL_CHAR)) - { - CharExpression ce = - getCharExpression (pattern, index - 2, pLength, syntax); - if (ce == null) - throw new REException ("invalid octal character", - REException.REG_ESCAPE, index); - index = index - 2 + ce.len; - addToken (currentToken); - currentToken = new RETokenChar (subIndex, ce.ch, insens); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // BACKREFERENCE OPERATOR - // \1 \2 ... \9 and \10 \11 \12 ... - // not available if RE_NO_BK_REFS is set - // Perl recognizes \10, \11, and so on only if enough number of - // parentheses have opened before it, otherwise they are treated - // as aliases of \010, \011, ... (octal characters). In case of - // Sun's JDK, octal character expression must always begin with \0. - // We will do as JDK does. But FIXME, take a look at "(a)(b)\29". - // JDK treats \2 as a back reference to the 2nd group because - // there are only two groups. But in our poor implementation, - // we cannot help but treat \29 as a back reference to the 29th group. - - else if (unit.bk && Character.isDigit (unit.ch) - && !syntax.get (RESyntax.RE_NO_BK_REFS)) - { - addToken (currentToken); - int numBegin = index - 1; - int numEnd = pLength; - for (int i = index; i < pLength; i++) - { - if (!Character.isDigit (pattern[i])) - { - numEnd = i; - break; - } - } - int num = parseInt (pattern, numBegin, numEnd - numBegin, 10); - - currentToken = new RETokenBackRef (subIndex, num, insens); - if (insensUSASCII) - currentToken.unicodeAware = false; - index = numEnd; - } - - // START OF STRING OPERATOR - // \A if RE_STRING_ANCHORS is set - - else if (unit.bk && (unit.ch == 'A') - && syntax.get (RESyntax.RE_STRING_ANCHORS)) - { - addToken (currentToken); - currentToken = new RETokenStart (subIndex, null); - } - - // WORD BREAK OPERATOR - // \b if ???? - - else if (unit.bk && (unit.ch == 'b') - && syntax.get (RESyntax.RE_STRING_ANCHORS)) - { - addToken (currentToken); - currentToken = - new RETokenWordBoundary (subIndex, - RETokenWordBoundary. - BEGIN | RETokenWordBoundary.END, - false); - } - - // WORD BEGIN OPERATOR - // \< if ???? - else if (unit.bk && (unit.ch == '<')) - { - addToken (currentToken); - currentToken = - new RETokenWordBoundary (subIndex, RETokenWordBoundary.BEGIN, - false); - } - - // WORD END OPERATOR - // \> if ???? - else if (unit.bk && (unit.ch == '>')) - { - addToken (currentToken); - currentToken = - new RETokenWordBoundary (subIndex, RETokenWordBoundary.END, - false); - } - - // NON-WORD BREAK OPERATOR - // \B if ???? - - else if (unit.bk && (unit.ch == 'B') - && syntax.get (RESyntax.RE_STRING_ANCHORS)) - { - addToken (currentToken); - currentToken = - new RETokenWordBoundary (subIndex, - RETokenWordBoundary. - BEGIN | RETokenWordBoundary.END, true); - } - - - // DIGIT OPERATOR - // \d if RE_CHAR_CLASS_ESCAPES is set - - else if (unit.bk && (unit.ch == 'd') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.DIGIT, insens, false); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // NON-DIGIT OPERATOR - // \D - - else if (unit.bk && (unit.ch == 'D') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.DIGIT, insens, true); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // NEWLINE ESCAPE - // \n - - else if (unit.bk && (unit.ch == 'n')) - { - addToken (currentToken); - currentToken = new RETokenChar (subIndex, '\n', false); - } - - // RETURN ESCAPE - // \r - - else if (unit.bk && (unit.ch == 'r')) - { - addToken (currentToken); - currentToken = new RETokenChar (subIndex, '\r', false); - } - - // WHITESPACE OPERATOR - // \s if RE_CHAR_CLASS_ESCAPES is set - - else if (unit.bk && (unit.ch == 's') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.SPACE, insens, false); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // NON-WHITESPACE OPERATOR - // \S - - else if (unit.bk && (unit.ch == 'S') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.SPACE, insens, true); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // TAB ESCAPE - // \t - - else if (unit.bk && (unit.ch == 't')) - { - addToken (currentToken); - currentToken = new RETokenChar (subIndex, '\t', false); - } - - // ALPHANUMERIC OPERATOR - // \w - - else if (unit.bk && (unit.ch == 'w') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.ALNUM, insens, false); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // NON-ALPHANUMERIC OPERATOR - // \W - - else if (unit.bk && (unit.ch == 'W') - && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) - { - addToken (currentToken); - currentToken = - new RETokenPOSIX (subIndex, RETokenPOSIX.ALNUM, insens, true); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // END OF STRING OPERATOR - // \Z, \z - - // FIXME: \Z and \z are different in that if the input string - // ends with a line terminator, \Z matches the position before - // the final terminator. This special behavior of \Z is yet - // to be implemented. - - else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') && - syntax.get (RESyntax.RE_STRING_ANCHORS)) - { - addToken (currentToken); - currentToken = new RETokenEnd (subIndex, null); - } - - // HEX CHARACTER, UNICODE CHARACTER - // \x1B, \u1234 - - else - if ((unit.bk && (unit.ch == 'x') - && syntax.get (RESyntax.RE_HEX_CHAR)) || (unit.bk - && (unit.ch == 'u') - && syntax. - get (RESyntax. - RE_UNICODE_CHAR))) - { - CharExpression ce = - getCharExpression (pattern, index - 2, pLength, syntax); - if (ce == null) - throw new REException ("invalid hex character", - REException.REG_ESCAPE, index); - index = index - 2 + ce.len; - addToken (currentToken); - currentToken = new RETokenChar (subIndex, ce.ch, insens); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // NAMED PROPERTY - // \p{prop}, \P{prop} - - else - if ((unit.bk && (unit.ch == 'p') - && syntax.get (RESyntax.RE_NAMED_PROPERTY)) || (unit.bk - && (unit.ch == - 'P') - && syntax. - get (RESyntax. - RE_NAMED_PROPERTY))) - { - NamedProperty np = getNamedProperty (pattern, index - 2, pLength); - if (np == null) - throw new REException ("invalid escape sequence", - REException.REG_ESCAPE, index); - index = index - 2 + np.len; - addToken (currentToken); - currentToken = - getRETokenNamedProperty (subIndex, np, insens, index); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - - // END OF PREVIOUS MATCH - // \G - - else if (unit.bk && (unit.ch == 'G') && - syntax.get (RESyntax.RE_STRING_ANCHORS)) - { - addToken (currentToken); - currentToken = new RETokenEndOfPreviousMatch (subIndex); - } - - // NON-SPECIAL CHARACTER (or escape to make literal) - // c | \* for example - - else - { // not a special character - addToken (currentToken); - currentToken = new RETokenChar (subIndex, unit.ch, insens); - if (insensUSASCII) - currentToken.unicodeAware = false; - } - } // end while + // read the next character unit (including backslash escapes) + index = getCharUnit (pattern, index, unit, quot); + + if (unit.bk) + if (unit.ch == 'Q') + { + quot = true; + continue; + } + else if (unit.ch == 'E') + { + quot = false; + continue; + } + if (quot) + unit.bk = false; + + if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) + { + if (Character.isWhitespace (unit.ch)) + { + continue; + } + if (unit.ch == '#') + { + for (int i = index; i < pLength; i++) + { + if (pattern[i] == '\n') + { + index = i + 1; + continue; + } + else if (pattern[i] == '\r') + { + if (i + 1 < pLength && pattern[i + 1] == '\n') + { + index = i + 2; + } + else + { + index = i + 1; + } + continue; + } + } + index = pLength; + continue; + } + } + + // ALTERNATION OPERATOR + // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT) + // not available if RE_LIMITED_OPS is set + + // TODO: the '\n' literal here should be a test against REToken.newline, + // which unfortunately may be more than a single character. + if (((unit.ch == '|' + && (syntax.get (RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot))) + || (syntax.get (RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') + && !(unit.bk || quot))) + && !syntax.get (RESyntax.RE_LIMITED_OPS)) + { + // make everything up to here be a branch. create vector if nec. + addToken (currentToken); + RE theBranch = + new RE (firstToken, lastToken, numSubs, subIndex, minimumLength, + maximumLength); + minimumLength = 0; + maximumLength = 0; + if (branches == null) + { + branches = new ArrayList < REToken > (); + } + branches.add (theBranch); + firstToken = lastToken = currentToken = null; + } + + // INTERVAL OPERATOR: + // {x} | {x,} | {x,y} (RE_INTERVALS && RE_NO_BK_BRACES) + // \{x\} | \{x,\} | \{x,y\} (RE_INTERVALS && !RE_NO_BK_BRACES) + // + // OPEN QUESTION: + // what is proper interpretation of '{' at start of string? + // + // This method used to check "repeat.empty.token" to avoid such regexp + // as "(a*){2,}", but now "repeat.empty.token" is allowed. + + else if ((unit.ch == '{') && syntax.get (RESyntax.RE_INTERVALS) + && (syntax. + get (RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) + { + int newIndex = getMinMax (pattern, index, minMax, syntax); + if (newIndex > index) + { + if (minMax.first > minMax.second) + throw new + REException (getLocalizedMessage ("interval.order"), + REException.REG_BADRPT, newIndex); + if (currentToken == null) + throw new + REException (getLocalizedMessage ("repeat.no.token"), + REException.REG_BADRPT, newIndex); + if (currentToken instanceof RETokenRepeated) + throw new + REException (getLocalizedMessage ("repeat.chained"), + REException.REG_BADRPT, newIndex); + if (currentToken instanceof RETokenWordBoundary + || currentToken instanceof RETokenWordBoundary) + throw new + REException (getLocalizedMessage ("repeat.assertion"), + REException.REG_BADRPT, newIndex); + index = newIndex; + currentToken = + setRepeated (currentToken, minMax.first, minMax.second, + index); + } + else + { + addToken (currentToken); + currentToken = new RETokenChar (subIndex, unit.ch, insens); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + } + + // LIST OPERATOR: + // [...] | [^...] + + else if ((unit.ch == '[') && !(unit.bk || quot)) + { + // Create a new RETokenOneOf + ParseCharClassResult result = + parseCharClass (subIndex, pattern, index, pLength, cflags, + syntax, 0); + addToken (currentToken); + currentToken = result.token; + index = result.index; + } + + // SUBEXPRESSIONS + // (...) | \(...\) depending on RE_NO_BK_PARENS + + else if ((unit.ch == '(') + && (syntax. + get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) + { + boolean pure = false; + boolean comment = false; + boolean lookAhead = false; + boolean lookBehind = false; + boolean independent = false; + boolean negativelh = false; + boolean negativelb = false; + if ((index + 1 < pLength) && (pattern[index] == '?')) + { + switch (pattern[index + 1]) + { + case '!': + if (syntax.get (RESyntax.RE_LOOKAHEAD)) + { + pure = true; + negativelh = true; + lookAhead = true; + index += 2; + } + break; + case '=': + if (syntax.get (RESyntax.RE_LOOKAHEAD)) + { + pure = true; + lookAhead = true; + index += 2; + } + break; + case '<': + // We assume that if the syntax supports look-ahead, + // it also supports look-behind. + if (syntax.get (RESyntax.RE_LOOKAHEAD)) + { + index++; + switch (pattern[index + 1]) + { + case '!': + pure = true; + negativelb = true; + lookBehind = true; + index += 2; + break; + case '=': + pure = true; + lookBehind = true; + index += 2; + } + } + break; + case '>': + // We assume that if the syntax supports look-ahead, + // it also supports independent group. + if (syntax.get (RESyntax.RE_LOOKAHEAD)) + { + pure = true; + independent = true; + index += 2; + } + break; + case 'i': + case 'd': + case 'm': + case 's': + case 'u': + case 'x': + case '-': + if (!syntax.get (RESyntax.RE_EMBEDDED_FLAGS)) + break; + // Set or reset syntax flags. + int flagIndex = index + 1; + int endFlag = -1; + RESyntax newSyntax = new RESyntax (syntax); + int newCflags = cflags; + boolean negate = false; + while (flagIndex < pLength && endFlag < 0) + { + switch (pattern[flagIndex]) + { + case 'i': + if (negate) + newCflags &= ~REG_ICASE; + else + newCflags |= REG_ICASE; + flagIndex++; + break; + case 'd': + if (negate) + newSyntax.setLineSeparator (RESyntax. + DEFAULT_LINE_SEPARATOR); + else + newSyntax.setLineSeparator ("\n"); + flagIndex++; + break; + case 'm': + if (negate) + newCflags &= ~REG_MULTILINE; + else + newCflags |= REG_MULTILINE; + flagIndex++; + break; + case 's': + if (negate) + newCflags &= ~REG_DOT_NEWLINE; + else + newCflags |= REG_DOT_NEWLINE; + flagIndex++; + break; + case 'u': + if (negate) + newCflags |= REG_ICASE_USASCII; + else + newCflags &= ~REG_ICASE_USASCII; + flagIndex++; + break; + case 'x': + if (negate) + newCflags &= ~REG_X_COMMENTS; + else + newCflags |= REG_X_COMMENTS; + flagIndex++; + break; + case '-': + negate = true; + flagIndex++; + break; + case ':': + case ')': + endFlag = pattern[flagIndex]; + break; + default: + throw new + REException (getLocalizedMessage + ("repeat.no.token"), + REException.REG_BADRPT, index); + } + } + if (endFlag == ')') + { + syntax = newSyntax; + cflags = newCflags; + insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); + // This can be treated as though it were a comment. + comment = true; + index = flagIndex - 1; + break; + } + if (endFlag == ':') + { + savedSyntax = syntax; + savedCflags = cflags; + flagsSaved = true; + syntax = newSyntax; + cflags = newCflags; + insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); + index = flagIndex - 1; + // Fall through to the next case. + } + else + { + throw new + REException (getLocalizedMessage + ("unmatched.paren"), + REException.REG_ESUBREG, index); + } + case ':': + if (syntax.get (RESyntax.RE_PURE_GROUPING)) + { + pure = true; + index += 2; + } + break; + case '#': + if (syntax.get (RESyntax.RE_COMMENTS)) + { + comment = true; + } + break; + default: + throw new + REException (getLocalizedMessage ("repeat.no.token"), + REException.REG_BADRPT, index); + } + } + + if (index >= pLength) + { + throw new + REException (getLocalizedMessage ("unmatched.paren"), + REException.REG_ESUBREG, index); + } + + // find end of subexpression + int endIndex = index; + int nextIndex = index; + int nested = 0; + + while (((nextIndex = + getCharUnit (pattern, endIndex, unit, false)) > 0) + && !(nested == 0 && (unit.ch == ')') + && (syntax. + get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk + || quot)))) + { + if ((endIndex = nextIndex) >= pLength) + throw new + REException (getLocalizedMessage ("subexpr.no.end"), + REException.REG_ESUBREG, nextIndex); + else + if ((unit.ch == '[') && !(unit.bk || quot)) + { + // I hate to do something similar to the LIST OPERATOR matters + // above, but ... + int listIndex = nextIndex; + if (listIndex < pLength && pattern[listIndex] == '^') + listIndex++; + if (listIndex < pLength && pattern[listIndex] == ']') + listIndex++; + int listEndIndex = -1; + int listNest = 0; + while (listIndex < pLength && listEndIndex < 0) + { + switch (pattern[listIndex++]) + { + case '\\': + listIndex++; + break; + case '[': + // Sun's API document says that regexp like "[a-d[m-p]]" + // is legal. Even something like "[[[^]]]]" is accepted. + listNest++; + if (listIndex < pLength + && pattern[listIndex] == '^') + listIndex++; + if (listIndex < pLength + && pattern[listIndex] == ']') + listIndex++; + break; + case ']': + if (listNest == 0) + listEndIndex = listIndex; + listNest--; + break; + } + } + if (listEndIndex >= 0) + { + nextIndex = listEndIndex; + if ((endIndex = nextIndex) >= pLength) + throw new + REException (getLocalizedMessage ("subexpr.no.end"), + REException.REG_ESUBREG, nextIndex); + else + continue; + } + throw new + REException (getLocalizedMessage ("subexpr.no.end"), + REException.REG_ESUBREG, nextIndex); + } + else if (unit.ch == '(' + && (syntax. + get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk + || quot))) + nested++; + else if (unit.ch == ')' + && (syntax. + get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk + || quot))) + nested--; + } + + // endIndex is now position at a ')','\)' + // nextIndex is end of string or position after ')' or '\)' + + if (comment) + index = nextIndex; + else + { // not a comment + // create RE subexpression as token. + addToken (currentToken); + if (!pure) + { + numSubs++; + } + + int useIndex = (pure || lookAhead || lookBehind + || independent) ? 0 : nextSub + numSubs; + currentToken = + new RE (String.valueOf (pattern, index, endIndex - index). + toCharArray (), cflags, syntax, useIndex, + nextSub + numSubs); + numSubs += ((RE) currentToken).getNumSubs (); + + if (lookAhead) + { + currentToken = + new RETokenLookAhead (currentToken, negativelh); + } + else if (lookBehind) + { + currentToken = + new RETokenLookBehind (currentToken, negativelb); + } + else if (independent) + { + currentToken = new RETokenIndependent (currentToken); + } + + index = nextIndex; + if (flagsSaved) + { + syntax = savedSyntax; + cflags = savedCflags; + insens = ((cflags & REG_ICASE) > 0); + insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0); + flagsSaved = false; + } + } // not a comment + } // subexpression + + // UNMATCHED RIGHT PAREN + // ) or \) throw exception if + // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) + else if (!syntax.get (RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) + && ((unit.ch == ')') + && (syntax. + get (RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) + { + throw new REException (getLocalizedMessage ("unmatched.paren"), + REException.REG_EPAREN, index); + } + + // START OF LINE OPERATOR + // ^ + + else if ((unit.ch == '^') && !(unit.bk || quot)) + { + addToken (currentToken); + currentToken = null; + RETokenStart token = null; + if ((cflags & REG_MULTILINE) > 0) + { + String sep = syntax.getLineSeparator (); + if (sep == null) + { + token = new RETokenStart (subIndex, null, true); + } + else + { + token = new RETokenStart (subIndex, sep); + } + } + else + { + token = new RETokenStart (subIndex, null); + } + addToken (token); + } + + // END OF LINE OPERATOR + // $ + + else if ((unit.ch == '$') && !(unit.bk || quot)) + { + addToken (currentToken); + currentToken = null; + RETokenEnd token = null; + if ((cflags & REG_MULTILINE) > 0) + { + String sep = syntax.getLineSeparator (); + if (sep == null) + { + token = new RETokenEnd (subIndex, null, true); + } + else + { + token = new RETokenEnd (subIndex, sep); + } + } + else + { + token = new RETokenEnd (subIndex, null); + } + addToken (token); + } + + // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null) + // . + + else if ((unit.ch == '.') && !(unit.bk || quot)) + { + addToken (currentToken); + currentToken = + new RETokenAny (subIndex, syntax.get (RESyntax.RE_DOT_NEWLINE) + || ((cflags & REG_DOT_NEWLINE) > 0), + syntax.get (RESyntax.RE_DOT_NOT_NULL)); + } + + // ZERO-OR-MORE REPEAT OPERATOR + // * + // + // This method used to check "repeat.empty.token" to avoid such regexp + // as "(a*)*", but now "repeat.empty.token" is allowed. + + else if ((unit.ch == '*') && !(unit.bk || quot)) + { + if (currentToken == null) + throw new REException (getLocalizedMessage ("repeat.no.token"), + REException.REG_BADRPT, index); + if (currentToken instanceof RETokenRepeated) + throw new REException (getLocalizedMessage ("repeat.chained"), + REException.REG_BADRPT, index); + if (currentToken instanceof RETokenWordBoundary + || currentToken instanceof RETokenWordBoundary) + throw new REException (getLocalizedMessage ("repeat.assertion"), + REException.REG_BADRPT, index); + currentToken = + setRepeated (currentToken, 0, Integer.MAX_VALUE, index); + } + + // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR + // + | \+ depending on RE_BK_PLUS_QM + // not available if RE_LIMITED_OPS is set + // + // This method used to check "repeat.empty.token" to avoid such regexp + // as "(a*)+", but now "repeat.empty.token" is allowed. + + else if ((unit.ch == '+') && !syntax.get (RESyntax.RE_LIMITED_OPS) + && (!syntax. + get (RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) + { + if (currentToken == null) + throw new REException (getLocalizedMessage ("repeat.no.token"), + REException.REG_BADRPT, index); + + // Check for possessive matching on RETokenRepeated + if (currentToken instanceof RETokenRepeated) + { + RETokenRepeated tokenRep = (RETokenRepeated) currentToken; + if (syntax.get (RESyntax.RE_POSSESSIVE_OPS) + && !tokenRep.isPossessive () && !tokenRep.isStingy ()) + tokenRep.makePossessive (); + else + throw new + REException (getLocalizedMessage ("repeat.chained"), + REException.REG_BADRPT, index); + + } + else if (currentToken instanceof RETokenWordBoundary + || currentToken instanceof RETokenWordBoundary) + throw new REException (getLocalizedMessage ("repeat.assertion"), + REException.REG_BADRPT, index); + else + currentToken = + setRepeated (currentToken, 1, Integer.MAX_VALUE, index); + } + + // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR + // ? | \? depending on RE_BK_PLUS_QM + // not available if RE_LIMITED_OPS is set + // stingy matching if RE_STINGY_OPS is set and it follows a quantifier + + else if ((unit.ch == '?') && !syntax.get (RESyntax.RE_LIMITED_OPS) + && (!syntax. + get (RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) + { + if (currentToken == null) + throw new REException (getLocalizedMessage ("repeat.no.token"), + REException.REG_BADRPT, index); + + // Check for stingy matching on RETokenRepeated + if (currentToken instanceof RETokenRepeated) + { + RETokenRepeated tokenRep = (RETokenRepeated) currentToken; + if (syntax.get (RESyntax.RE_STINGY_OPS) + && !tokenRep.isStingy () && !tokenRep.isPossessive ()) + tokenRep.makeStingy (); + else + throw new + REException (getLocalizedMessage ("repeat.chained"), + REException.REG_BADRPT, index); + } + else if (currentToken instanceof RETokenWordBoundary + || currentToken instanceof RETokenWordBoundary) + throw new REException (getLocalizedMessage ("repeat.assertion"), + REException.REG_BADRPT, index); + else + currentToken = setRepeated (currentToken, 0, 1, index); + } + + // OCTAL CHARACTER + // \0377 + + else if (unit.bk && (unit.ch == '0') + && syntax.get (RESyntax.RE_OCTAL_CHAR)) + { + CharExpression ce = + getCharExpression (pattern, index - 2, pLength, syntax); + if (ce == null) + throw new REException ("invalid octal character", + REException.REG_ESCAPE, index); + index = index - 2 + ce.len; + addToken (currentToken); + currentToken = new RETokenChar (subIndex, ce.ch, insens); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // BACKREFERENCE OPERATOR + // \1 \2 ... \9 and \10 \11 \12 ... + // not available if RE_NO_BK_REFS is set + // Perl recognizes \10, \11, and so on only if enough number of + // parentheses have opened before it, otherwise they are treated + // as aliases of \010, \011, ... (octal characters). In case of + // Sun's JDK, octal character expression must always begin with \0. + // We will do as JDK does. But FIXME, take a look at "(a)(b)\29". + // JDK treats \2 as a back reference to the 2nd group because + // there are only two groups. But in our poor implementation, + // we cannot help but treat \29 as a back reference to the 29th group. + + else if (unit.bk && Character.isDigit (unit.ch) + && !syntax.get (RESyntax.RE_NO_BK_REFS)) + { + addToken (currentToken); + int numBegin = index - 1; + int numEnd = pLength; + for (int i = index; i < pLength; i++) + { + if (!Character.isDigit (pattern[i])) + { + numEnd = i; + break; + } + } + int num = parseInt (pattern, numBegin, numEnd - numBegin, 10); + + currentToken = new RETokenBackRef (subIndex, num, insens); + if (insensUSASCII) + currentToken.unicodeAware = false; + index = numEnd; + } + + // START OF STRING OPERATOR + // \A if RE_STRING_ANCHORS is set + + else if (unit.bk && (unit.ch == 'A') + && syntax.get (RESyntax.RE_STRING_ANCHORS)) + { + addToken (currentToken); + currentToken = new RETokenStart (subIndex, null); + } + + // WORD BREAK OPERATOR + // \b if ???? + + else if (unit.bk && (unit.ch == 'b') + && syntax.get (RESyntax.RE_STRING_ANCHORS)) + { + addToken (currentToken); + currentToken = + new RETokenWordBoundary (subIndex, + RETokenWordBoundary. + BEGIN | RETokenWordBoundary.END, + false); + } + + // WORD BEGIN OPERATOR + // \< if ???? + else if (unit.bk && (unit.ch == '<')) + { + addToken (currentToken); + currentToken = + new RETokenWordBoundary (subIndex, RETokenWordBoundary.BEGIN, + false); + } + + // WORD END OPERATOR + // \> if ???? + else if (unit.bk && (unit.ch == '>')) + { + addToken (currentToken); + currentToken = + new RETokenWordBoundary (subIndex, RETokenWordBoundary.END, + false); + } + + // NON-WORD BREAK OPERATOR + // \B if ???? + + else if (unit.bk && (unit.ch == 'B') + && syntax.get (RESyntax.RE_STRING_ANCHORS)) + { + addToken (currentToken); + currentToken = + new RETokenWordBoundary (subIndex, + RETokenWordBoundary. + BEGIN | RETokenWordBoundary.END, true); + } + + + // DIGIT OPERATOR + // \d if RE_CHAR_CLASS_ESCAPES is set + + else if (unit.bk && (unit.ch == 'd') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.DIGIT, insens, false); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // NON-DIGIT OPERATOR + // \D + + else if (unit.bk && (unit.ch == 'D') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.DIGIT, insens, true); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // NEWLINE ESCAPE + // \n + + else if (unit.bk && (unit.ch == 'n')) + { + addToken (currentToken); + currentToken = new RETokenChar (subIndex, '\n', false); + } + + // RETURN ESCAPE + // \r + + else if (unit.bk && (unit.ch == 'r')) + { + addToken (currentToken); + currentToken = new RETokenChar (subIndex, '\r', false); + } + + // WHITESPACE OPERATOR + // \s if RE_CHAR_CLASS_ESCAPES is set + + else if (unit.bk && (unit.ch == 's') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.SPACE, insens, false); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // NON-WHITESPACE OPERATOR + // \S + + else if (unit.bk && (unit.ch == 'S') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.SPACE, insens, true); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // TAB ESCAPE + // \t + + else if (unit.bk && (unit.ch == 't')) + { + addToken (currentToken); + currentToken = new RETokenChar (subIndex, '\t', false); + } + + // ALPHANUMERIC OPERATOR + // \w + + else if (unit.bk && (unit.ch == 'w') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.ALNUM, insens, false); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // NON-ALPHANUMERIC OPERATOR + // \W + + else if (unit.bk && (unit.ch == 'W') + && syntax.get (RESyntax.RE_CHAR_CLASS_ESCAPES)) + { + addToken (currentToken); + currentToken = + new RETokenPOSIX (subIndex, RETokenPOSIX.ALNUM, insens, true); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // END OF STRING OPERATOR + // \Z, \z + + // FIXME: \Z and \z are different in that if the input string + // ends with a line terminator, \Z matches the position before + // the final terminator. This special behavior of \Z is yet + // to be implemented. + + else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') && + syntax.get (RESyntax.RE_STRING_ANCHORS)) + { + addToken (currentToken); + currentToken = new RETokenEnd (subIndex, null); + } + + // HEX CHARACTER, UNICODE CHARACTER + // \x1B, \u1234 + + else + if ((unit.bk && (unit.ch == 'x') + && syntax.get (RESyntax.RE_HEX_CHAR)) || (unit.bk + && (unit.ch == 'u') + && syntax. + get (RESyntax. + RE_UNICODE_CHAR))) + { + CharExpression ce = + getCharExpression (pattern, index - 2, pLength, syntax); + if (ce == null) + throw new REException ("invalid hex character", + REException.REG_ESCAPE, index); + index = index - 2 + ce.len; + addToken (currentToken); + currentToken = new RETokenChar (subIndex, ce.ch, insens); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // NAMED PROPERTY + // \p{prop}, \P{prop} + + else + if ((unit.bk && (unit.ch == 'p') + && syntax.get (RESyntax.RE_NAMED_PROPERTY)) || (unit.bk + && (unit.ch == + 'P') + && syntax. + get (RESyntax. + RE_NAMED_PROPERTY))) + { + NamedProperty np = getNamedProperty (pattern, index - 2, pLength); + if (np == null) + throw new REException ("invalid escape sequence", + REException.REG_ESCAPE, index); + index = index - 2 + np.len; + addToken (currentToken); + currentToken = + getRETokenNamedProperty (subIndex, np, insens, index); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + + // END OF PREVIOUS MATCH + // \G + + else if (unit.bk && (unit.ch == 'G') && + syntax.get (RESyntax.RE_STRING_ANCHORS)) + { + addToken (currentToken); + currentToken = new RETokenEndOfPreviousMatch (subIndex); + } + + // NON-SPECIAL CHARACTER (or escape to make literal) + // c | \* for example + + else + { // not a special character + addToken (currentToken); + currentToken = new RETokenChar (subIndex, unit.ch, insens); + if (insensUSASCII) + currentToken.unicodeAware = false; + } + } // end while // Add final buffered token and an EndSub marker addToken (currentToken); if (branches != null) { - branches. - add (new - RE (firstToken, lastToken, numSubs, subIndex, minimumLength, - maximumLength)); - branches.trimToSize (); // compact the Vector - minimumLength = 0; - maximumLength = 0; - firstToken = lastToken = null; - addToken (new RETokenOneOf (subIndex, branches, false)); + branches. + add (new + RE (firstToken, lastToken, numSubs, subIndex, minimumLength, + maximumLength)); + branches.trimToSize (); // compact the Vector + minimumLength = 0; + maximumLength = 0; + firstToken = lastToken = null; + addToken (new RETokenOneOf (subIndex, branches, false)); } else addToken (new RETokenEndSub (subIndex)); @@ -1403,11 +1403,11 @@ public class RE extends REToken * @param syntax Syntax used to parse the pattern. */ private static ParseCharClassResult parseCharClass (int subIndex, - char[]pattern, - int index, int pLength, - int cflags, - RESyntax syntax, - int pflags) throws + char[]pattern, + int index, int pLength, + int cflags, + RESyntax syntax, + int pflags) throws REException { @@ -1425,256 +1425,256 @@ public class RE extends REToken boolean lastCharIsSet = false; if (index == pLength) throw new REException (getLocalizedMessage ("unmatched.bracket"), - REException.REG_EBRACK, index); + REException.REG_EBRACK, index); // Check for initial caret, negation if ((ch = pattern[index]) == '^') { - negative = true; - if (++index == pLength) - throw new REException (getLocalizedMessage ("class.no.end"), - REException.REG_EBRACK, index); - ch = pattern[index]; + negative = true; + if (++index == pLength) + throw new REException (getLocalizedMessage ("class.no.end"), + REException.REG_EBRACK, index); + ch = pattern[index]; } // Check for leading right bracket literal if (ch == ']') { - lastChar = ch; - lastCharIsSet = true; - if (++index == pLength) - throw new REException (getLocalizedMessage ("class.no.end"), - REException.REG_EBRACK, index); + lastChar = ch; + lastCharIsSet = true; + if (++index == pLength) + throw new REException (getLocalizedMessage ("class.no.end"), + REException.REG_EBRACK, index); } while ((ch = pattern[index++]) != ']') { - if ((ch == '-') && (lastCharIsSet)) - { - if (index == pLength) - throw new REException (getLocalizedMessage ("class.no.end"), - REException.REG_EBRACK, index); - if ((ch = pattern[index]) == ']') - { - RETokenChar t = new RETokenChar (subIndex, lastChar, insens); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - lastChar = '-'; - } - else - { - if ((ch == '\\') - && syntax.get (RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) - { - CharExpression ce = - getCharExpression (pattern, index, pLength, syntax); - if (ce == null) - throw new REException ("invalid escape sequence", - REException.REG_ESCAPE, index); - ch = ce.ch; - index = index + ce.len - 1; - } - RETokenRange t = - new RETokenRange (subIndex, lastChar, ch, insens); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - lastChar = 0; - lastCharIsSet = false; - index++; - } - } - else if ((ch == '\\') - && syntax.get (RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) - { - if (index == pLength) - throw new REException (getLocalizedMessage ("class.no.end"), - REException.REG_EBRACK, index); - int posixID = -1; - boolean negate = false; - char asciiEsc = 0; - boolean asciiEscIsSet = false; - NamedProperty np = null; - if (("dswDSW".indexOf (pattern[index]) != -1) - && syntax.get (RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) - { - switch (pattern[index]) - { - case 'D': - negate = true; - case 'd': - posixID = RETokenPOSIX.DIGIT; - break; - case 'S': - negate = true; - case 's': - posixID = RETokenPOSIX.SPACE; - break; - case 'W': - negate = true; - case 'w': - posixID = RETokenPOSIX.ALNUM; - break; - } - } - if (("pP".indexOf (pattern[index]) != -1) - && syntax.get (RESyntax.RE_NAMED_PROPERTY)) - { - np = getNamedProperty (pattern, index - 1, pLength); - if (np == null) - throw new REException ("invalid escape sequence", - REException.REG_ESCAPE, index); - index = index - 1 + np.len - 1; - } - else - { - CharExpression ce = - getCharExpression (pattern, index - 1, pLength, syntax); - if (ce == null) - throw new REException ("invalid escape sequence", - REException.REG_ESCAPE, index); - asciiEsc = ce.ch; - asciiEscIsSet = true; - index = index - 1 + ce.len - 1; - } - if (lastCharIsSet) - { - RETokenChar t = new RETokenChar (subIndex, lastChar, insens); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - } - - if (posixID != -1) - { - RETokenPOSIX t = - new RETokenPOSIX (subIndex, posixID, insens, negate); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - } - else if (np != null) - { - RETokenNamedProperty t = - getRETokenNamedProperty (subIndex, np, insens, index); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - } - else if (asciiEscIsSet) - { - lastChar = asciiEsc; - lastCharIsSet = true; - } - else - { - lastChar = pattern[index]; - lastCharIsSet = true; - } - ++index; - } - else if ((ch == '[') && (syntax.get (RESyntax.RE_CHAR_CLASSES)) - && (index < pLength) && (pattern[index] == ':')) - { - CPStringBuilder posixSet = new CPStringBuilder (); - index = getPosixSet (pattern, index + 1, posixSet); - int posixId = RETokenPOSIX.intValue (posixSet.toString ()); - if (posixId != -1) - { - RETokenPOSIX t = - new RETokenPOSIX (subIndex, posixId, insens, false); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - } - } - else if ((ch == '[') && (syntax.get (RESyntax.RE_NESTED_CHARCLASS))) - { - ParseCharClassResult result = - parseCharClass (subIndex, pattern, index, pLength, cflags, - syntax, 0); - addition.add (result.token); - addition.add ("|"); - index = result.index; - } - else if ((ch == '&') && - (syntax.get (RESyntax.RE_NESTED_CHARCLASS)) && - (index < pLength) && (pattern[index] == '&')) - { - if (returnAtAndOperator) - { - ParseCharClassResult result = new ParseCharClassResult (); - options.trimToSize (); - if (additionAndAppeared) - addition.add ("&"); - if (addition.size () == 0) - addition = null; - result.token = new RETokenOneOf (subIndex, - options, addition, negative); - result.index = index - 1; - result.returnAtAndOperator = true; - return result; - } - // The precedence of the operator "&&" is the lowest. - // So we postpone adding "&" until other elements - // are added. And we insert Boolean.FALSE at the - // beginning of the list of tokens following "&&". - // So, "&&[a-b][k-m]" will be stored in the Vecter - // addition in this order: - // Boolean.FALSE, [a-b], "|", [k-m], "|", "&" - if (additionAndAppeared) - addition.add ("&"); - addition.add (Boolean.FALSE); - additionAndAppeared = true; - - // The part on which "&&" operates may be either - // (1) explicitly enclosed by [] - // or - // (2) not enclosed by [] and terminated by the - // next "&&" or the end of the character list. - // Let the preceding else if block do the case (1). - // We must do something in case of (2). - if ((index + 1 < pLength) && (pattern[index + 1] != '[')) - { - ParseCharClassResult result = - parseCharClass (subIndex, pattern, index + 1, pLength, - cflags, syntax, - RETURN_AT_AND); - addition.add (result.token); - addition.add ("|"); - // If the method returned at the next "&&", it is OK. - // Otherwise we have eaten the mark of the end of this - // character list "]". In this case we must give back - // the end mark. - index = (result.returnAtAndOperator ? - result.index : result.index - 1); - } - } - else - { - if (lastCharIsSet) - { - RETokenChar t = new RETokenChar (subIndex, lastChar, insens); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); - } - lastChar = ch; - lastCharIsSet = true; - } - if (index == pLength) - throw new REException (getLocalizedMessage ("class.no.end"), - REException.REG_EBRACK, index); - } // while in list + if ((ch == '-') && (lastCharIsSet)) + { + if (index == pLength) + throw new REException (getLocalizedMessage ("class.no.end"), + REException.REG_EBRACK, index); + if ((ch = pattern[index]) == ']') + { + RETokenChar t = new RETokenChar (subIndex, lastChar, insens); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + lastChar = '-'; + } + else + { + if ((ch == '\\') + && syntax.get (RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) + { + CharExpression ce = + getCharExpression (pattern, index, pLength, syntax); + if (ce == null) + throw new REException ("invalid escape sequence", + REException.REG_ESCAPE, index); + ch = ce.ch; + index = index + ce.len - 1; + } + RETokenRange t = + new RETokenRange (subIndex, lastChar, ch, insens); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + lastChar = 0; + lastCharIsSet = false; + index++; + } + } + else if ((ch == '\\') + && syntax.get (RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) + { + if (index == pLength) + throw new REException (getLocalizedMessage ("class.no.end"), + REException.REG_EBRACK, index); + int posixID = -1; + boolean negate = false; + char asciiEsc = 0; + boolean asciiEscIsSet = false; + NamedProperty np = null; + if (("dswDSW".indexOf (pattern[index]) != -1) + && syntax.get (RESyntax.RE_CHAR_CLASS_ESC_IN_LISTS)) + { + switch (pattern[index]) + { + case 'D': + negate = true; + case 'd': + posixID = RETokenPOSIX.DIGIT; + break; + case 'S': + negate = true; + case 's': + posixID = RETokenPOSIX.SPACE; + break; + case 'W': + negate = true; + case 'w': + posixID = RETokenPOSIX.ALNUM; + break; + } + } + if (("pP".indexOf (pattern[index]) != -1) + && syntax.get (RESyntax.RE_NAMED_PROPERTY)) + { + np = getNamedProperty (pattern, index - 1, pLength); + if (np == null) + throw new REException ("invalid escape sequence", + REException.REG_ESCAPE, index); + index = index - 1 + np.len - 1; + } + else + { + CharExpression ce = + getCharExpression (pattern, index - 1, pLength, syntax); + if (ce == null) + throw new REException ("invalid escape sequence", + REException.REG_ESCAPE, index); + asciiEsc = ce.ch; + asciiEscIsSet = true; + index = index - 1 + ce.len - 1; + } + if (lastCharIsSet) + { + RETokenChar t = new RETokenChar (subIndex, lastChar, insens); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + } + + if (posixID != -1) + { + RETokenPOSIX t = + new RETokenPOSIX (subIndex, posixID, insens, negate); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + } + else if (np != null) + { + RETokenNamedProperty t = + getRETokenNamedProperty (subIndex, np, insens, index); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + } + else if (asciiEscIsSet) + { + lastChar = asciiEsc; + lastCharIsSet = true; + } + else + { + lastChar = pattern[index]; + lastCharIsSet = true; + } + ++index; + } + else if ((ch == '[') && (syntax.get (RESyntax.RE_CHAR_CLASSES)) + && (index < pLength) && (pattern[index] == ':')) + { + CPStringBuilder posixSet = new CPStringBuilder (); + index = getPosixSet (pattern, index + 1, posixSet); + int posixId = RETokenPOSIX.intValue (posixSet.toString ()); + if (posixId != -1) + { + RETokenPOSIX t = + new RETokenPOSIX (subIndex, posixId, insens, false); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + } + } + else if ((ch == '[') && (syntax.get (RESyntax.RE_NESTED_CHARCLASS))) + { + ParseCharClassResult result = + parseCharClass (subIndex, pattern, index, pLength, cflags, + syntax, 0); + addition.add (result.token); + addition.add ("|"); + index = result.index; + } + else if ((ch == '&') && + (syntax.get (RESyntax.RE_NESTED_CHARCLASS)) && + (index < pLength) && (pattern[index] == '&')) + { + if (returnAtAndOperator) + { + ParseCharClassResult result = new ParseCharClassResult (); + options.trimToSize (); + if (additionAndAppeared) + addition.add ("&"); + if (addition.size () == 0) + addition = null; + result.token = new RETokenOneOf (subIndex, + options, addition, negative); + result.index = index - 1; + result.returnAtAndOperator = true; + return result; + } + // The precedence of the operator "&&" is the lowest. + // So we postpone adding "&" until other elements + // are added. And we insert Boolean.FALSE at the + // beginning of the list of tokens following "&&". + // So, "&&[a-b][k-m]" will be stored in the Vecter + // addition in this order: + // Boolean.FALSE, [a-b], "|", [k-m], "|", "&" + if (additionAndAppeared) + addition.add ("&"); + addition.add (Boolean.FALSE); + additionAndAppeared = true; + + // The part on which "&&" operates may be either + // (1) explicitly enclosed by [] + // or + // (2) not enclosed by [] and terminated by the + // next "&&" or the end of the character list. + // Let the preceding else if block do the case (1). + // We must do something in case of (2). + if ((index + 1 < pLength) && (pattern[index + 1] != '[')) + { + ParseCharClassResult result = + parseCharClass (subIndex, pattern, index + 1, pLength, + cflags, syntax, + RETURN_AT_AND); + addition.add (result.token); + addition.add ("|"); + // If the method returned at the next "&&", it is OK. + // Otherwise we have eaten the mark of the end of this + // character list "]". In this case we must give back + // the end mark. + index = (result.returnAtAndOperator ? + result.index : result.index - 1); + } + } + else + { + if (lastCharIsSet) + { + RETokenChar t = new RETokenChar (subIndex, lastChar, insens); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); + } + lastChar = ch; + lastCharIsSet = true; + } + if (index == pLength) + throw new REException (getLocalizedMessage ("class.no.end"), + REException.REG_EBRACK, index); + } // while in list // Out of list, index is one past ']' if (lastCharIsSet) { - RETokenChar t = new RETokenChar (subIndex, lastChar, insens); - if (insensUSASCII) - t.unicodeAware = false; - options.add (t); + RETokenChar t = new RETokenChar (subIndex, lastChar, insens); + if (insensUSASCII) + t.unicodeAware = false; + options.add (t); } ParseCharClassResult result = new ParseCharClassResult (); @@ -1690,17 +1690,17 @@ public class RE extends REToken } private static int getCharUnit (char[]input, int index, CharUnit unit, - boolean quot) throws REException + boolean quot) throws REException { unit.ch = input[index++]; unit.bk = (unit.ch == '\\' - && (!quot || index >= input.length || input[index] == 'E')); + && (!quot || index >= input.length || input[index] == 'E')); if (unit.bk) if (index < input.length) - unit.ch = input[index++]; + unit.ch = input[index++]; else - throw new REException (getLocalizedMessage ("ends.with.backslash"), - REException.REG_ESCAPE, index); + throw new REException (getLocalizedMessage ("ends.with.backslash"), + REException.REG_ESCAPE, index); return index; } @@ -1709,7 +1709,7 @@ public class RE extends REToken int ret = 0; for (int i = pos; i < pos + len; i++) { - ret = ret * radix + Character.digit (input[i], radix); + ret = ret * radix + Character.digit (input[i], radix); } return ret; } @@ -1736,92 +1736,92 @@ public class RE extends REToken } private static CharExpression getCharExpression (char[]input, int pos, - int lim, RESyntax syntax) + int lim, RESyntax syntax) { CharExpression ce = new CharExpression (); char c = input[pos]; if (c == '\\') { - if (pos + 1 >= lim) - return null; - c = input[pos + 1]; - switch (c) - { - case 't': - ce.ch = '\t'; - ce.len = 2; - break; - case 'n': - ce.ch = '\n'; - ce.len = 2; - break; - case 'r': - ce.ch = '\r'; - ce.len = 2; - break; - case 'x': - case 'u': - if ((c == 'x' && syntax.get (RESyntax.RE_HEX_CHAR)) || - (c == 'u' && syntax.get (RESyntax.RE_UNICODE_CHAR))) - { - int l = 0; - int expectedLength = (c == 'x' ? 2 : 4); - for (int i = pos + 2; i < pos + 2 + expectedLength; i++) - { - if (i >= lim) - break; - if (!((input[i] >= '0' && input[i] <= '9') || - (input[i] >= 'A' && input[i] <= 'F') || - (input[i] >= 'a' && input[i] <= 'f'))) - break; - l++; - } - if (l != expectedLength) - return null; - ce.ch = (char) (parseInt (input, pos + 2, l, 16)); - ce.len = l + 2; - } - else - { - ce.ch = c; - ce.len = 2; - } - break; - case '0': - if (syntax.get (RESyntax.RE_OCTAL_CHAR)) - { - int l = 0; - for (int i = pos + 2; i < pos + 2 + 3; i++) - { - if (i >= lim) - break; - if (input[i] < '0' || input[i] > '7') - break; - l++; - } - if (l == 3 && input[pos + 2] > '3') - l--; - if (l <= 0) - return null; - ce.ch = (char) (parseInt (input, pos + 2, l, 8)); - ce.len = l + 2; - } - else - { - ce.ch = c; - ce.len = 2; - } - break; - default: - ce.ch = c; - ce.len = 2; - break; - } + if (pos + 1 >= lim) + return null; + c = input[pos + 1]; + switch (c) + { + case 't': + ce.ch = '\t'; + ce.len = 2; + break; + case 'n': + ce.ch = '\n'; + ce.len = 2; + break; + case 'r': + ce.ch = '\r'; + ce.len = 2; + break; + case 'x': + case 'u': + if ((c == 'x' && syntax.get (RESyntax.RE_HEX_CHAR)) || + (c == 'u' && syntax.get (RESyntax.RE_UNICODE_CHAR))) + { + int l = 0; + int expectedLength = (c == 'x' ? 2 : 4); + for (int i = pos + 2; i < pos + 2 + expectedLength; i++) + { + if (i >= lim) + break; + if (!((input[i] >= '0' && input[i] <= '9') || + (input[i] >= 'A' && input[i] <= 'F') || + (input[i] >= 'a' && input[i] <= 'f'))) + break; + l++; + } + if (l != expectedLength) + return null; + ce.ch = (char) (parseInt (input, pos + 2, l, 16)); + ce.len = l + 2; + } + else + { + ce.ch = c; + ce.len = 2; + } + break; + case '0': + if (syntax.get (RESyntax.RE_OCTAL_CHAR)) + { + int l = 0; + for (int i = pos + 2; i < pos + 2 + 3; i++) + { + if (i >= lim) + break; + if (input[i] < '0' || input[i] > '7') + break; + l++; + } + if (l == 3 && input[pos + 2] > '3') + l--; + if (l <= 0) + return null; + ce.ch = (char) (parseInt (input, pos + 2, l, 8)); + ce.len = l + 2; + } + else + { + ce.ch = c; + ce.len = 2; + } + break; + default: + ce.ch = c; + ce.len = 2; + break; + } } else { - ce.ch = input[pos]; - ce.len = 1; + ce.ch = input[pos]; + ce.len = 1; } ce.expr = new String (input, pos, ce.len); return ce; @@ -1846,60 +1846,60 @@ public class RE extends REToken } private static NamedProperty getNamedProperty (char[]input, int pos, - int lim) + int lim) { NamedProperty np = new NamedProperty (); char c = input[pos]; if (c == '\\') { - if (++pos >= lim) - return null; - c = input[pos++]; - switch (c) - { - case 'p': - np.negate = false; - break; - case 'P': - np.negate = true; - break; - default: - return null; - } - c = input[pos++]; - if (c == '{') - { - int p = -1; - for (int i = pos; i < lim; i++) - { - if (input[i] == '}') - { - p = i; - break; - } - } - if (p < 0) - return null; - int len = p - pos; - np.name = new String (input, pos, len); - np.len = len + 4; - } - else - { - np.name = new String (input, pos - 1, 1); - np.len = 3; - } - return np; + if (++pos >= lim) + return null; + c = input[pos++]; + switch (c) + { + case 'p': + np.negate = false; + break; + case 'P': + np.negate = true; + break; + default: + return null; + } + c = input[pos++]; + if (c == '{') + { + int p = -1; + for (int i = pos; i < lim; i++) + { + if (input[i] == '}') + { + p = i; + break; + } + } + if (p < 0) + return null; + int len = p - pos; + np.name = new String (input, pos, len); + np.len = len + 4; + } + else + { + np.name = new String (input, pos - 1, 1); + np.len = 3; + } + return np; } else return null; } private static RETokenNamedProperty getRETokenNamedProperty (int subIndex, - NamedProperty - np, - boolean insens, - int index) + NamedProperty + np, + boolean insens, + int index) throws REException { try @@ -1953,18 +1953,18 @@ public class RE extends REToken private boolean isMatchImpl (CharIndexed input, int index, int eflags) { - if (firstToken == null) // Trivial case + if (firstToken == null) // Trivial case return (input.charAt (0) == CharIndexed.OUT_OF_BOUNDS); REMatch m = new REMatch (numSubs, index, eflags); if (firstToken.match (input, m)) { - if (m != null) - { - if (input.charAt (m.index) == CharIndexed.OUT_OF_BOUNDS) - { - return true; - } - } + if (m != null) + { + if (input.charAt (m.index) == CharIndexed.OUT_OF_BOUNDS) + { + return true; + } + } } return false; } @@ -1984,10 +1984,10 @@ public class RE extends REToken { if (lastToken != null) { - lastToken.setUncle (uncle); + lastToken.setUncle (uncle); } else - super.setUncle (uncle); // to deal with empty subexpressions + super.setUncle (uncle); // to deal with empty subexpressions } // Overrides REToken.chain @@ -2063,25 +2063,25 @@ public class RE extends REToken // this has been changed since 1.03 to be non-overlapping matches private REMatch[] getAllMatchesImpl (CharIndexed input, int index, - int eflags) + int eflags) { List < REMatch > all = new ArrayList < REMatch > (); REMatch m = null; while ((m = getMatchImpl (input, index, eflags, null)) != null) { - all.add (m); - index = m.getEndIndex (); - if (m.end[0] == 0) - { // handle pathological case of zero-length match - index++; - input.move (1); - } - else - { - input.move (m.end[0]); - } - if (!input.isValid ()) - break; + all.add (m); + index = m.getEndIndex (); + if (m.end[0] == 0) + { // handle pathological case of zero-length match + index++; + input.move (1); + } + else + { + input.move (m.end[0]); + } + if (!input.isValid ()) + break; } return all.toArray (new REMatch[all.size ()]); } @@ -2092,7 +2092,7 @@ public class RE extends REToken input.setHitEnd (mymatch); if (firstToken == null) { - return next (input, mymatch); + return next (input, mymatch); } // Note the start of this subexpression @@ -2108,7 +2108,7 @@ public class RE extends REToken boolean b = match (input, mymatch); if (b) { - return mymatch; + return mymatch; } return null; } @@ -2168,78 +2168,78 @@ public class RE extends REToken * @param buffer The StringBuffer to save pre-match text in. * @return An REMatch instance referencing the match, or null if none. */ public REMatch getMatch (Object input, int index, int eflags, - CPStringBuilder buffer) + CPStringBuilder buffer) { return getMatchImpl (makeCharIndexed (input, index), index, eflags, - buffer); + buffer); } REMatch getMatchImpl (CharIndexed input, int anchor, int eflags, - CPStringBuilder buffer) + CPStringBuilder buffer) { boolean tryEntireMatch = ((eflags & REG_TRY_ENTIRE_MATCH) != 0); boolean doMove = ((eflags & REG_FIX_STARTING_POSITION) == 0); RE re = (tryEntireMatch ? (RE) this.clone () : this); if (tryEntireMatch) { - RETokenEnd reEnd = new RETokenEnd (0, null); - reEnd.setFake (true); - re.chain (reEnd); + RETokenEnd reEnd = new RETokenEnd (0, null); + reEnd.setFake (true); + re.chain (reEnd); } // Create a new REMatch to hold results REMatch mymatch = new REMatch (numSubs, anchor, eflags); do { - /* The following potimization is commented out because - the matching should be tried even if the length of - input is obviously too short in order that - java.util.regex.Matcher#hitEnd() may work correctly. - // Optimization: check if anchor + minimumLength > length - if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) { - */ - if (re.match (input, mymatch)) - { - REMatch best = mymatch; - // We assume that the match that coms first is the best. - // And the following "The longer, the better" rule has - // been commented out. The longest is not neccesarily - // the best. For example, "a" out of "aaa" is the best - // match for /a+?/. - /* - // Find best match of them all to observe leftmost longest - while ((mymatch = mymatch.next) != null) { - if (mymatch.index > best.index) { - best = mymatch; - } - } - */ - best.end[0] = best.index; - best.finish (input); - input.setLastMatch (best); - return best; - } - /* End of the optimization commented out - } - */ - mymatch.clear (++anchor); - // Append character to buffer if needed - if (buffer != null && input.charAt (0) != CharIndexed.OUT_OF_BOUNDS) - { - buffer.append (input.charAt (0)); - } - // java.util.regex.Matcher#hitEnd() requires that the search should - // be tried at the end of input, so we use move1(1) instead of move(1) + /* The following potimization is commented out because + the matching should be tried even if the length of + input is obviously too short in order that + java.util.regex.Matcher#hitEnd() may work correctly. + // Optimization: check if anchor + minimumLength > length + if (minimumLength == 0 || input.charAt(minimumLength-1) != CharIndexed.OUT_OF_BOUNDS) { + */ + if (re.match (input, mymatch)) + { + REMatch best = mymatch; + // We assume that the match that coms first is the best. + // And the following "The longer, the better" rule has + // been commented out. The longest is not neccesarily + // the best. For example, "a" out of "aaa" is the best + // match for /a+?/. + /* + // Find best match of them all to observe leftmost longest + while ((mymatch = mymatch.next) != null) { + if (mymatch.index > best.index) { + best = mymatch; + } + } + */ + best.end[0] = best.index; + best.finish (input); + input.setLastMatch (best); + return best; + } + /* End of the optimization commented out + } + */ + mymatch.clear (++anchor); + // Append character to buffer if needed + if (buffer != null && input.charAt (0) != CharIndexed.OUT_OF_BOUNDS) + { + buffer.append (input.charAt (0)); + } + // java.util.regex.Matcher#hitEnd() requires that the search should + // be tried at the end of input, so we use move1(1) instead of move(1) } while (doMove && input.move1 (1)); // Special handling at end of input for e.g. "$" if (minimumLength == 0) { - if (match (input, mymatch)) - { - mymatch.finish (input); - return mymatch; - } + if (match (input, mymatch)) + { + mymatch.finish (input); + return mymatch; + } } return null; @@ -2283,10 +2283,10 @@ public class RE extends REToken * set to the index position specified. */ public REMatchEnumeration getMatchEnumeration (Object input, int index, - int eflags) + int eflags) { return new REMatchEnumeration (this, makeCharIndexed (input, index), - index, eflags); + index, eflags); } @@ -2307,7 +2307,7 @@ public class RE extends REToken * Substitutes the replacement text for the first match found in the input * beginning at the specified index position. Specifying an index * effectively causes the regular expression engine to throw away the - * specified number of characters. + * specified number of characters. * * @param input The input text. * @param replace The replacement text, which may contain $x metacharacters (see REMatch.substituteInto). @@ -2335,14 +2335,14 @@ public class RE extends REToken * @see REMatch#substituteInto */ public String substitute (Object input, String replace, int index, - int eflags) + int eflags) { return substituteImpl (makeCharIndexed (input, index), replace, index, - eflags); + eflags); } private String substituteImpl (CharIndexed input, String replace, int index, - int eflags) + int eflags) { CPStringBuilder buffer = new CPStringBuilder (); REMatch m = getMatchImpl (input, index, eflags, buffer); @@ -2351,17 +2351,17 @@ public class RE extends REToken buffer.append (getReplacement (replace, m, eflags)); if (input.move (m.end[0])) { - do - { - buffer.append (input.charAt (0)); - } - while (input.move (1)); + do + { + buffer.append (input.charAt (0)); + } + while (input.move (1)); } return buffer.toString (); } /** - * Substitutes the replacement text for each non-overlapping match found + * Substitutes the replacement text for each non-overlapping match found * in the input text. * * @param input The input text. @@ -2375,7 +2375,7 @@ public class RE extends REToken } /** - * Substitutes the replacement text for each non-overlapping match found + * Substitutes the replacement text for each non-overlapping match found * in the input text, starting at the specified index. * * If the regular expression allows the empty string to match, it will @@ -2394,7 +2394,7 @@ public class RE extends REToken } /** - * Substitutes the replacement text for each non-overlapping match found + * Substitutes the replacement text for each non-overlapping match found * in the input text, starting at the specified index and using the * specified execution flags. * @@ -2407,35 +2407,35 @@ public class RE extends REToken * @see REMatch#substituteInto */ public String substituteAll (Object input, String replace, int index, - int eflags) + int eflags) { return substituteAllImpl (makeCharIndexed (input, index), replace, index, - eflags); + eflags); } private String substituteAllImpl (CharIndexed input, String replace, - int index, int eflags) + int index, int eflags) { CPStringBuilder buffer = new CPStringBuilder (); REMatch m; while ((m = getMatchImpl (input, index, eflags, buffer)) != null) { - buffer.append (getReplacement (replace, m, eflags)); - index = m.getEndIndex (); - if (m.end[0] == 0) - { - char ch = input.charAt (0); - if (ch != CharIndexed.OUT_OF_BOUNDS) - buffer.append (ch); - input.move (1); - } - else - { - input.move (m.end[0]); - } - - if (!input.isValid ()) - break; + buffer.append (getReplacement (replace, m, eflags)); + index = m.getEndIndex (); + if (m.end[0] == 0) + { + char ch = input.charAt (0); + if (ch != CharIndexed.OUT_OF_BOUNDS) + buffer.append (ch); + input.move (1); + } + else + { + input.move (m.end[0]); + } + + if (!input.isValid ()) + break; } return buffer.toString (); } @@ -2446,36 +2446,36 @@ public class RE extends REToken return replace; else { - if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) - { - CPStringBuilder sb = new CPStringBuilder (); - int l = replace.length (); - for (int i = 0; i < l; i++) - { - char c = replace.charAt (i); - switch (c) - { - case '\\': - i++; - // Let StringIndexOutOfBoundsException be thrown. - sb.append (replace.charAt (i)); - break; - case '$': - int i1 = i + 1; - while (i1 < replace.length () && - Character.isDigit (replace.charAt (i1))) - i1++; - sb.append (m.substituteInto (replace.substring (i, i1))); - i = i1 - 1; - break; - default: - sb.append (c); - } - } - return sb.toString (); - } - else - return m.substituteInto (replace); + if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) + { + CPStringBuilder sb = new CPStringBuilder (); + int l = replace.length (); + for (int i = 0; i < l; i++) + { + char c = replace.charAt (i); + switch (c) + { + case '\\': + i++; + // Let StringIndexOutOfBoundsException be thrown. + sb.append (replace.charAt (i)); + break; + case '$': + int i1 = i + 1; + while (i1 < replace.length () && + Character.isDigit (replace.charAt (i1))) + i1++; + sb.append (m.substituteInto (replace.substring (i, i1))); + i = i1 - 1; + break; + default: + sb.append (c); + } + } + return sb.toString (); + } + else + return m.substituteInto (replace); } } @@ -2493,45 +2493,45 @@ public class RE extends REToken if (firstToken == null) { - lastToken = firstToken = next; + lastToken = firstToken = next; } else { - // if chain returns false, it "rejected" the token due to - // an optimization, and next was combined with lastToken - if (lastToken.chain (next)) - { - lastToken = next; - } + // if chain returns false, it "rejected" the token due to + // an optimization, and next was combined with lastToken + if (lastToken.chain (next)) + { + lastToken = next; + } } } private static REToken setRepeated (REToken current, int min, int max, - int index) throws REException + int index) throws REException { if (current == null) throw new REException (getLocalizedMessage ("repeat.no.token"), - REException.REG_BADRPT, index); + REException.REG_BADRPT, index); return new RETokenRepeated (current.subIndex, current, min, max); } private static int getPosixSet (char[]pattern, int index, - CPStringBuilder buf) + CPStringBuilder buf) { // Precondition: pattern[index-1] == ':' // we will return pos of closing ']'. int i; for (i = index; i < (pattern.length - 1); i++) { - if ((pattern[i] == ':') && (pattern[i + 1] == ']')) - return i + 2; - buf.append (pattern[i]); + if ((pattern[i] == ':') && (pattern[i + 1] == ']')) + return i + 2; + buf.append (pattern[i]); } - return index; // didn't match up + return index; // didn't match up } private int getMinMax (char[]input, int index, IntPair minMax, - RESyntax syntax) throws REException + RESyntax syntax) throws REException { // Precondition: input[index-1] == '{', minMax != null @@ -2539,11 +2539,11 @@ public class RE extends REToken int startIndex = index; if (index == input.length) { - if (mustMatch) - throw new REException (getLocalizedMessage ("unmatched.brace"), - REException.REG_EBRACE, index); - else - return startIndex; + if (mustMatch) + throw new REException (getLocalizedMessage ("unmatched.brace"), + REException.REG_EBRACE, index); + else + return startIndex; } int min, max = 0; @@ -2553,20 +2553,20 @@ public class RE extends REToken // Read string of digits do { - index = getCharUnit (input, index, unit, false); - if (Character.isDigit (unit.ch)) - buf.append (unit.ch); + index = getCharUnit (input, index, unit, false); + if (Character.isDigit (unit.ch)) + buf.append (unit.ch); } while ((index != input.length) && Character.isDigit (unit.ch)); // Check for {} tomfoolery if (buf.length () == 0) { - if (mustMatch) - throw new REException (getLocalizedMessage ("interval.error"), - REException.REG_EBRACE, index); - else - return startIndex; + if (mustMatch) + throw new REException (getLocalizedMessage ("interval.error"), + REException.REG_EBRACE, index); + else + return startIndex; } min = Integer.parseInt (buf.toString ()); @@ -2575,8 +2575,8 @@ public class RE extends REToken max = min; else if (index == input.length) if (mustMatch) - throw new REException (getLocalizedMessage ("interval.no.end"), - REException.REG_EBRACE, index); + throw new REException (getLocalizedMessage ("interval.no.end"), + REException.REG_EBRACE, index); else return startIndex; else @@ -2585,28 +2585,28 @@ public class RE extends REToken buf = new CPStringBuilder (); // Read string of digits while (((index = - getCharUnit (input, index, unit, false)) != input.length) - && Character.isDigit (unit.ch)) - buf.append (unit.ch); + getCharUnit (input, index, unit, false)) != input.length) + && Character.isDigit (unit.ch)) + buf.append (unit.ch); if (! - ((unit.ch == '}') - && (syntax.get (RESyntax.RE_NO_BK_BRACES) ^ unit.bk))) - if (mustMatch) - throw new REException (getLocalizedMessage ("interval.error"), - REException.REG_EBRACE, index); + ((unit.ch == '}') + && (syntax.get (RESyntax.RE_NO_BK_BRACES) ^ unit.bk))) + if (mustMatch) + throw new REException (getLocalizedMessage ("interval.error"), + REException.REG_EBRACE, index); else return startIndex; // This is the case of {x,} if (buf.length () == 0) - max = Integer.MAX_VALUE; + max = Integer.MAX_VALUE; else - max = Integer.parseInt (buf.toString ()); + max = Integer.parseInt (buf.toString ()); } else if (mustMatch) throw new REException (getLocalizedMessage ("interval.error"), - REException.REG_EBRACE, index); + REException.REG_EBRACE, index); else return startIndex; @@ -2651,13 +2651,13 @@ public class RE extends REToken // be the most likely because this is the case with // java.util.regex.Matcher. // We could let a String or a CharSequence fall through - // to final input, but since it'a very likely input type, + // to final input, but since it'a very likely input type, // we check it first. if (input instanceof CharIndexed) { - CharIndexed ci = (CharIndexed) input; - ci.setAnchor (index); - return ci; + CharIndexed ci = (CharIndexed) input; + ci.setAnchor (index); + return ci; } else if (input instanceof CharSequence) return new CharIndexedCharSequence ((CharSequence) input, index); diff --git a/libjava/classpath/gnu/java/util/regex/REFilterInputStream.java b/libjava/classpath/gnu/java/util/regex/REFilterInputStream.java index c7acb71..c9fb346 100644 --- a/libjava/classpath/gnu/java/util/regex/REFilterInputStream.java +++ b/libjava/classpath/gnu/java/util/regex/REFilterInputStream.java @@ -70,7 +70,7 @@ public class REFilterInputStream extends FilterInputStream * * @param stream The InputStream to be filtered. * @param expr The regular expression to search for. - * @param replace The text pattern to replace matches with. + * @param replace The text pattern to replace matches with. */ public REFilterInputStream (InputStream stream, RE expr, String replace) { @@ -89,7 +89,7 @@ public class REFilterInputStream extends FilterInputStream // If we have buffered replace data, use it. if ((buffer != null) && (bufpos < buffer.length ())) { - return (int) buffer.charAt (bufpos++); + return (int) buffer.charAt (bufpos++); } // check if input is at a valid position @@ -99,18 +99,18 @@ public class REFilterInputStream extends FilterInputStream REMatch mymatch = new REMatch (expr.getNumSubs (), offset, 0); if (expr.match (stream, mymatch)) { - mymatch.end[0] = mymatch.index; - mymatch.finish (stream); - stream.move (mymatch.toString ().length ()); - offset += mymatch.toString ().length (); - buffer = mymatch.substituteInto (replace); - bufpos = 1; - - // This is prone to infinite loops if replace string turns out empty. - if (buffer.length () > 0) - { - return buffer.charAt (0); - } + mymatch.end[0] = mymatch.index; + mymatch.finish (stream); + stream.move (mymatch.toString ().length ()); + offset += mymatch.toString ().length (); + buffer = mymatch.substituteInto (replace); + bufpos = 1; + + // This is prone to infinite loops if replace string turns out empty. + if (buffer.length () > 0) + { + return buffer.charAt (0); + } } char ch = stream.charAt (0); if (ch == CharIndexed.OUT_OF_BOUNDS) @@ -120,9 +120,9 @@ public class REFilterInputStream extends FilterInputStream return ch; } - /** + /** * Returns false. REFilterInputStream does not support mark() and - * reset() methods. + * reset() methods. */ public boolean markSupported () { @@ -136,11 +136,11 @@ public class REFilterInputStream extends FilterInputStream int ok = 0; while (len-- > 0) { - i = read (); - if (i == -1) - return (ok == 0) ? -1 : ok; - b[off++] = (byte) i; - ok++; + i = read (); + if (i == -1) + return (ok == 0) ? -1 : ok; + b[off++] = (byte) i; + ok++; } return ok; } diff --git a/libjava/classpath/gnu/java/util/regex/REMatch.java b/libjava/classpath/gnu/java/util/regex/REMatch.java index d29972e..5940094 100644 --- a/libjava/classpath/gnu/java/util/regex/REMatch.java +++ b/libjava/classpath/gnu/java/util/regex/REMatch.java @@ -56,7 +56,7 @@ public final class REMatch implements Serializable, Cloneable private CharIndexed matchedCharIndexed; // These variables are package scope for fast access within the engine - int eflags; // execution flags this match was made using + int eflags; // execution flags this match was made using // Offset in source text where match was tried. This is zero-based; // the actual position in the source text is given by (offset + anchor). @@ -68,19 +68,19 @@ public final class REMatch implements Serializable, Cloneable int anchor; // Package scope; used by RE. - int index; // used while matching to mark current match position in input + int index; // used while matching to mark current match position in input // start1[i] is set when the i-th subexp starts. And start1[i] is copied // to start[i] when the i-th subexp ends. So start[i] keeps the previously // assigned value while the i-th subexp is being processed. This makes // backreference to the i-th subexp within the i-th subexp possible. - int[] start; // start positions (relative to offset) for each (sub)exp. - int[] start1; // start positions (relative to offset) for each (sub)exp. - int[] end; // end positions for the same + int[] start; // start positions (relative to offset) for each (sub)exp. + int[] start1; // start positions (relative to offset) for each (sub)exp. + int[] end; // end positions for the same // start[i] == -1 or end[i] == -1 means that the start/end position is void. // start[i] == p or end[i] == p where p < 0 and p != -1 means that // the actual start/end position is (p+1). Start/end positions may // become negative when the subexpression is in a RETokenLookBehind. - boolean empty; // empty string matched. This flag is used only within + boolean empty; // empty string matched. This flag is used only within // RETokenRepeated. BacktrackStack backtrackStack; @@ -99,7 +99,7 @@ public final class REMatch implements Serializable, Cloneable } catch (CloneNotSupportedException e) { - throw new Error (); // doesn't happen + throw new Error (); // doesn't happen } } @@ -133,13 +133,13 @@ public final class REMatch implements Serializable, Cloneable matchedCharIndexed = text; for (i = 0; i < start.length; i++) { - // If any subexpressions didn't terminate, they don't count - // TODO check if this code ever gets hit - if ((start[i] == -1) ^ (end[i] == -1)) - { - start[i] = -1; - end[i] = -1; - } + // If any subexpressions didn't terminate, they don't count + // TODO check if this code ever gets hit + if ((start[i] == -1) ^ (end[i] == -1)) + { + start[i] = -1; + end[i] = -1; + } } backtrackStack = null; } @@ -151,7 +151,7 @@ public final class REMatch implements Serializable, Cloneable this.index = 0; for (int i = 0; i < start.length; i++) { - start[i] = start1[i] = end[i] = -1; + start[i] = start1[i] = end[i] = -1; } backtrackStack = null; } @@ -160,7 +160,7 @@ public final class REMatch implements Serializable, Cloneable * Returns the string matching the pattern. This makes it convenient * to write code like the following: * <P> - * <code> + * <code> * REMatch myMatch = myExpression.getMatch(myString);<br> * if (myMatch != null) System.out.println("Regexp found: "+myMatch); * </code> @@ -194,7 +194,7 @@ public final class REMatch implements Serializable, Cloneable * myMatch.getEndIndex());</code> * <P> * But you can save yourself that work, since the <code>toString()</code> - * method (above) does exactly that for you. + * method (above) does exactly that for you. */ public int getEndIndex () { @@ -219,21 +219,21 @@ public final class REMatch implements Serializable, Cloneable return (matchedText.substring (start[sub], end[sub])); else { - // This case occurs with RETokenLookAhead or RETokenLookBehind. - CPStringBuilder sb = new CPStringBuilder (); - int s = start[sub]; - int e = end[sub]; - if (s < 0) - s += 1; - if (e < 0) - e += 1; - for (int i = start[0] + s; i < start[0] + e; i++) - sb.append (matchedCharIndexed.charAt (i)); - return sb.toString (); + // This case occurs with RETokenLookAhead or RETokenLookBehind. + CPStringBuilder sb = new CPStringBuilder (); + int s = start[sub]; + int e = end[sub]; + if (s < 0) + s += 1; + if (e < 0) + e += 1; + for (int i = start[0] + s; i < start[0] + e; i++) + sb.append (matchedCharIndexed.charAt (i)); + return sb.toString (); } } - /** + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> begins, or <code>-1</code> if * the subexpression does not exist. The initial position is zero. @@ -249,7 +249,7 @@ public final class REMatch implements Serializable, Cloneable return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; } - /** + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> begins, or <code>-1</code> if * the subexpression does not exist. The initial position is zero. @@ -265,7 +265,7 @@ public final class REMatch implements Serializable, Cloneable return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; } - /** + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> ends, or <code>-1</code> if * the subexpression does not exist. The initial position is zero. @@ -281,7 +281,7 @@ public final class REMatch implements Serializable, Cloneable return (x == -1) ? x : (x >= 0) ? offset + x : offset + x + 1; } - /** + /** * Returns the index within the input string used to generate this match * where subexpression number <i>sub</i> ends, or <code>-1</code> if * the subexpression does not exist. The initial position is zero. @@ -314,30 +314,30 @@ public final class REMatch implements Serializable, Cloneable int pos; for (pos = 0; pos < input.length () - 1; pos++) { - if ((input.charAt (pos) == '$') - && (Character.isDigit (input.charAt (pos + 1)))) - { - int val = Character.digit (input.charAt (++pos), 10); - int pos1 = pos + 1; - while (pos1 < input.length () && - Character.isDigit (input.charAt (pos1))) - { - int val1 = - val * 10 + Character.digit (input.charAt (pos1), 10); - if (val1 >= start.length) - break; - pos1++; - val = val1; - } - pos = pos1 - 1; - - if (val < start.length) - { - output.append (toString (val)); - } - } - else - output.append (input.charAt (pos)); + if ((input.charAt (pos) == '$') + && (Character.isDigit (input.charAt (pos + 1)))) + { + int val = Character.digit (input.charAt (++pos), 10); + int pos1 = pos + 1; + while (pos1 < input.length () && + Character.isDigit (input.charAt (pos1))) + { + int val1 = + val * 10 + Character.digit (input.charAt (pos1), 10); + if (val1 >= start.length) + break; + pos1++; + val = val1; + } + pos = pos1 - 1; + + if (val < start.length) + { + output.append (toString (val)); + } + } + else + output.append (input.charAt (pos)); } if (pos < input.length ()) output.append (input.charAt (pos)); @@ -346,16 +346,16 @@ public final class REMatch implements Serializable, Cloneable /* The following are used for debugging purpose public static String d(REMatch m) { - if (m == null) return "null"; + if (m == null) return "null"; else return "[" + m.index + "]"; } public String substringUptoIndex(CharIndexed input) { - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < index; i++) { - sb.append(input.charAt(i)); - } - return sb.toString(); + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < index; i++) { + sb.append(input.charAt(i)); + } + return sb.toString(); } */ diff --git a/libjava/classpath/gnu/java/util/regex/REMatchEnumeration.java b/libjava/classpath/gnu/java/util/regex/REMatchEnumeration.java index f0c78be..04432d0 100644 --- a/libjava/classpath/gnu/java/util/regex/REMatchEnumeration.java +++ b/libjava/classpath/gnu/java/util/regex/REMatchEnumeration.java @@ -47,7 +47,7 @@ import java.util.NoSuchElementException; * An REMatchEnumeration enumerates regular expression matches over a * given input text. You obtain a reference to an enumeration using * the <code>getMatchEnumeration()</code> methods on an instance of - * RE. + * RE. * * <P> * @@ -59,15 +59,15 @@ import java.util.NoSuchElementException; * not need to be searched immediately. * * <P> - * + * * The enumerated type is especially useful when searching on a Reader * or InputStream, because the InputStream read position cannot be * guaranteed after calling <code>getMatch()</code> (see the * description of that method for an explanation of why). Enumeration * also saves a lot of overhead required when calling * <code>getMatch()</code> multiple times. - * - * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> + * + * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> */ public class REMatchEnumeration implements Enumeration < REMatch >, Serializable @@ -113,17 +113,17 @@ public class REMatchEnumeration { if (more == MAYBE) { - match = expr.getMatchImpl (input, index, eflags, buffer); - if (match != null) - { - input.move ((match.end[0] > 0) ? match.end[0] : 1); - - index = - (match.end[0] > 0) ? match.end[0] + match.offset : index + 1; - more = YES; - } - else - more = NO; + match = expr.getMatchImpl (input, index, eflags, buffer); + if (match != null) + { + input.move ((match.end[0] > 0) ? match.end[0] : 1); + + index = + (match.end[0] > 0) ? match.end[0] + match.offset : index + 1; + more = YES; + } + else + more = NO; } return (more == YES); } @@ -133,8 +133,8 @@ public class REMatchEnumeration { if (hasMoreElements ()) { - more = (input.isValid ())? MAYBE : NO; - return match; + more = (input.isValid ())? MAYBE : NO; + return match; } throw new NoSuchElementException (); } diff --git a/libjava/classpath/gnu/java/util/regex/RESyntax.java b/libjava/classpath/gnu/java/util/regex/RESyntax.java index 38d7056..2080cb7 100644 --- a/libjava/classpath/gnu/java/util/regex/RESyntax.java +++ b/libjava/classpath/gnu/java/util/regex/RESyntax.java @@ -94,7 +94,7 @@ public final class RESyntax implements Serializable /** * Syntax bit. Repetition and alternation operators are invalid - * at start and end of pattern and other places. + * at start and end of pattern and other places. * <B>Not implemented</B>. */ public static final int RE_CONTEXT_INVALID_OPS = 5; @@ -115,7 +115,7 @@ public final class RESyntax implements Serializable public static final int RE_INTERVALS = 8; /** - * Syntax bit. No alternation (|), match one-or-more (+), or + * Syntax bit. No alternation (|), match one-or-more (+), or * match zero-or-one (?) operators. */ public static final int RE_LIMITED_OPS = 9; @@ -123,7 +123,7 @@ public final class RESyntax implements Serializable /** * Syntax bit. Newline is an alternation operator. */ - public static final int RE_NEWLINE_ALT = 10; // impl. + public static final int RE_NEWLINE_ALT = 10; // impl. /** * Syntax bit. Intervals use { } instead of \{ \} @@ -317,7 +317,7 @@ public final class RESyntax implements Serializable * Emulates regular expression support in Larry Wall's perl, version 4, * using single line mode (/s modifier). */ - public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s) + public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s) /** * Predefined syntax. @@ -403,23 +403,23 @@ public final class RESyntax implements Serializable /* There is no official Perl spec, but here's a "best guess" */ - RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently - .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S + RE_SYNTAX_PERL4 = new RESyntax ().set (RE_BACKSLASH_ESCAPE_IN_LISTS).set (RE_CONTEXT_INDEP_ANCHORS).set (RE_CONTEXT_INDEP_OPS) // except for '{', apparently + .set (RE_INTERVALS).set (RE_NO_BK_BRACES).set (RE_NO_BK_PARENS).set (RE_NO_BK_VBAR).set (RE_NO_EMPTY_RANGES).set (RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S .makeFinal (); RE_SYNTAX_PERL4_S = new RESyntax (RE_SYNTAX_PERL4).set (RE_DOT_NEWLINE).makeFinal (); - RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:) - .set (RE_STINGY_OPS) // *?,??,+?,{}? - .set (RE_LOOKAHEAD) // (?=)(?!) - .set (RE_STRING_ANCHORS) // \A,\Z - .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within [] - .set (RE_COMMENTS) // (?#) - .set (RE_EMBEDDED_FLAGS) // (?imsx-imsx) - .set (RE_OCTAL_CHAR) // \0377 - .set (RE_HEX_CHAR) // \x1b - .set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop} + RE_SYNTAX_PERL5 = new RESyntax (RE_SYNTAX_PERL4).set (RE_PURE_GROUPING) // (?:) + .set (RE_STINGY_OPS) // *?,??,+?,{}? + .set (RE_LOOKAHEAD) // (?=)(?!) + .set (RE_STRING_ANCHORS) // \A,\Z + .set (RE_CHAR_CLASS_ESC_IN_LISTS) // \d,\D,\w,\W,\s,\S within [] + .set (RE_COMMENTS) // (?#) + .set (RE_EMBEDDED_FLAGS) // (?imsx-imsx) + .set (RE_OCTAL_CHAR) // \0377 + .set (RE_HEX_CHAR) // \x1b + .set (RE_NAMED_PROPERTY) // \p{prop}, \P{prop} .makeFinal (); RE_SYNTAX_PERL5_S = @@ -427,9 +427,9 @@ public final class RESyntax implements Serializable RE_SYNTAX_JAVA_1_4 = new RESyntax (RE_SYNTAX_PERL5) // XXX - .set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+ - .set (RE_UNICODE_CHAR) // \u1234 - .set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]] + .set (RE_POSSESSIVE_OPS) // *+,?+,++,{}+ + .set (RE_UNICODE_CHAR) // \u1234 + .set (RE_NESTED_CHARCLASS) // [a-z&&[^p-r]] .makeFinal (); } @@ -457,7 +457,7 @@ public final class RESyntax implements Serializable } /** - * Construct a new syntax object with all bits set the same + * Construct a new syntax object with all bits set the same * as the other syntax. */ public RESyntax (RESyntax other) @@ -474,7 +474,7 @@ public final class RESyntax implements Serializable } /** - * Set a given bit in this syntax. + * Set a given bit in this syntax. * * @param index the constant (RESyntax.RE_xxx) bit to set. * @return a reference to this object for easy chaining. @@ -488,7 +488,7 @@ public final class RESyntax implements Serializable } /** - * Clear a given bit in this syntax. + * Clear a given bit in this syntax. * * @param index the constant (RESyntax.RE_xxx) bit to clear. * @return a reference to this object for easy chaining. @@ -516,7 +516,7 @@ public final class RESyntax implements Serializable * characters that have specific meaning within the current syntax * can cause unexpected chronosynclastic infundibula. * - * @return this object for convenient chaining + * @return this object for convenient chaining */ public RESyntax setLineSeparator (String aSeparator) { diff --git a/libjava/classpath/gnu/java/util/regex/REToken.java b/libjava/classpath/gnu/java/util/regex/REToken.java index 681ac51..ed9b317 100644 --- a/libjava/classpath/gnu/java/util/regex/REToken.java +++ b/libjava/classpath/gnu/java/util/regex/REToken.java @@ -58,7 +58,7 @@ abstract class REToken implements Serializable, Cloneable } catch (CloneNotSupportedException e) { - throw new Error (); // doesn't happen + throw new Error (); // doesn't happen } } @@ -96,15 +96,15 @@ abstract class REToken implements Serializable, Cloneable { if (!fake) { - setHitEnd (input, mymatch); + setHitEnd (input, mymatch); } REMatch m = matchThis (input, mymatch); if (m == null) return false; if (next (input, m)) { - mymatch.assignFrom (m); - return true; + mymatch.assignFrom (m); + return true; } return false; } @@ -188,7 +188,7 @@ abstract class REToken implements Serializable, Cloneable boolean chain (REToken token) { next = token; - return true; // Token was accepted + return true; // Token was accepted } abstract void dump (CPStringBuilder os); diff --git a/libjava/classpath/gnu/java/util/regex/RETokenAny.java b/libjava/classpath/gnu/java/util/regex/RETokenAny.java index c002d05..f503968 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenAny.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenAny.java @@ -71,8 +71,8 @@ final class RETokenAny extends REToken boolean retval = matchOneChar (ch); if (retval) { - ++mymatch.index; - return mymatch; + ++mymatch.index; + return mymatch; } return null; } @@ -80,9 +80,9 @@ final class RETokenAny extends REToken boolean matchOneChar (char ch) { if ((ch == CharIndexed.OUT_OF_BOUNDS) - || (!newline && (ch == '\n')) || (matchNull && (ch == 0))) + || (!newline && (ch == '\n')) || (matchNull && (ch == 0))) { - return false; + return false; } return true; } @@ -98,12 +98,12 @@ final class RETokenAny extends REToken int numRepeats = 0; while (true) { - if (numRepeats >= max) - break; - char ch = input.charAt (index++); - if (!matchOneChar (ch)) - break; - numRepeats++; + if (numRepeats >= max) + break; + char ch = input.charAt (index++); + if (!matchOneChar (ch)) + break; + numRepeats++; } return numRepeats; } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenBackRef.java b/libjava/classpath/gnu/java/util/regex/RETokenBackRef.java index 8f14622..52061ca 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenBackRef.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenBackRef.java @@ -64,30 +64,30 @@ final class RETokenBackRef extends REToken b = mymatch.start[num]; e = mymatch.end[num]; if ((b == -1) || (e == -1)) - return null; // this shouldn't happen, but... + return null; // this shouldn't happen, but... if (b < 0) b += 1; if (e < 0) e += 1; for (int i = b; i < e; i++) { - char c1 = input.charAt (mymatch.index + i - b); - char c2 = input.charAt (i); - if (c1 != c2) - { - if (insens) - { - if (c1 != toLowerCase (c2, unicodeAware) && - c1 != toUpperCase (c2, unicodeAware)) - { - return null; - } - } - else - { - return null; - } - } + char c1 = input.charAt (mymatch.index + i - b); + char c2 = input.charAt (i); + if (c1 != c2) + { + if (insens) + { + if (c1 != toLowerCase (c2, unicodeAware) && + c1 != toUpperCase (c2, unicodeAware)) + { + return null; + } + } + else + { + return null; + } + } } mymatch.index += e - b; return mymatch; diff --git a/libjava/classpath/gnu/java/util/regex/RETokenChar.java b/libjava/classpath/gnu/java/util/regex/RETokenChar.java index babcf3e..3469ecf 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenChar.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenChar.java @@ -67,8 +67,8 @@ final class RETokenChar extends REToken { if (matchOneString (input, mymatch.index)) { - mymatch.index += matchedLength; - return mymatch; + mymatch.index += matchedLength; + return mymatch; } // java.util.regex.Matcher#hitEnd() requires that the length of // partial match be counted. @@ -85,12 +85,12 @@ final class RETokenChar extends REToken char c; for (int i = 0; i < z; i++) { - c = input.charAt (index + i); - if (!charEquals (c, ch[i])) - { - return false; - } - ++matchedLength; + c = input.charAt (index + i); + if (!charEquals (c, ch[i])) + { + return false; + } + ++matchedLength; } return true; } @@ -120,15 +120,15 @@ final class RETokenChar extends REToken int z = ch.length; while (true) { - if (numRepeats >= max) - break; - if (matchOneString (input, index)) - { - index += z; - numRepeats++; - } - else - break; + if (numRepeats >= max) + break; + if (matchOneString (input, index)) + { + index += z; + numRepeats++; + } + else + break; } return numRepeats; } @@ -138,18 +138,18 @@ final class RETokenChar extends REToken { if (next instanceof RETokenChar && ((RETokenChar) next).insens == insens) { - RETokenChar cnext = (RETokenChar) next; - int newsize = ch.length + cnext.ch.length; + RETokenChar cnext = (RETokenChar) next; + int newsize = ch.length + cnext.ch.length; - char[] chTemp = new char[newsize]; + char[] chTemp = new char[newsize]; - System.arraycopy (ch, 0, chTemp, 0, ch.length); - System.arraycopy (cnext.ch, 0, chTemp, ch.length, cnext.ch.length); + System.arraycopy (ch, 0, chTemp, 0, ch.length); + System.arraycopy (cnext.ch, 0, chTemp, ch.length, cnext.ch.length); - ch = chTemp; - if (cnext.next == null) - return false; - return chain (cnext.next); + ch = chTemp; + if (cnext.next == null) + return false; + return chain (cnext.next); } else return super.chain (next); diff --git a/libjava/classpath/gnu/java/util/regex/RETokenEnd.java b/libjava/classpath/gnu/java/util/regex/RETokenEnd.java index 33e0feb..28d7823 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenEnd.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenEnd.java @@ -92,38 +92,38 @@ final class RETokenEnd extends REToken return ((mymatch.eflags & RE.REG_NOTEOL) > 0) ? null : mymatch; if (check_java_line_terminators) { - if (ch == '\n') - { - char ch1 = input.charAt (mymatch.index - 1); - if (ch1 == '\r') - return null; - return mymatch; - } - if (ch == '\r') - return mymatch; - if (ch == '\u0085') - return mymatch; // A next-line character - if (ch == '\u2028') - return mymatch; // A line-separator character - if (ch == '\u2029') - return mymatch; // A paragraph-separator character - return null; + if (ch == '\n') + { + char ch1 = input.charAt (mymatch.index - 1); + if (ch1 == '\r') + return null; + return mymatch; + } + if (ch == '\r') + return mymatch; + if (ch == '\u0085') + return mymatch; // A next-line character + if (ch == '\u2028') + return mymatch; // A line-separator character + if (ch == '\u2029') + return mymatch; // A paragraph-separator character + return null; } if (newline != null) { - char z; - int i = 0; // position in newline - do - { - z = newline.charAt (i); - if (ch != z) - return null; - ++i; - ch = input.charAt (mymatch.index + i); - } - while (i < newline.length ()); - - return mymatch; + char z; + int i = 0; // position in newline + do + { + z = newline.charAt (i); + if (ch != z) + return null; + ++i; + ch = input.charAt (mymatch.index + i); + } + while (i < newline.length ()); + + return mymatch; } return null; } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenEndOfPreviousMatch.java b/libjava/classpath/gnu/java/util/regex/RETokenEndOfPreviousMatch.java index e4ad619..67a1b85 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenEndOfPreviousMatch.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenEndOfPreviousMatch.java @@ -58,13 +58,13 @@ class RETokenEndOfPreviousMatch extends RETokenStart if (lastMatch == null) return super.matchThis (input, mymatch); if (input.getAnchor () + mymatch.index == - lastMatch.anchor + lastMatch.index) + lastMatch.anchor + lastMatch.index) { - return mymatch; + return mymatch; } else { - return null; + return null; } } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenIndependent.java b/libjava/classpath/gnu/java/util/regex/RETokenIndependent.java index 8ad728d..089aa18 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenIndependent.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenIndependent.java @@ -67,10 +67,10 @@ final class RETokenIndependent extends REToken boolean b = re.match (input, mymatch); if (b) { - // Once we have found a match, we do not see other possible matches. - if (mymatch.backtrackStack != null) - mymatch.backtrackStack.clear (); - return mymatch; + // Once we have found a match, we do not see other possible matches. + if (mymatch.backtrackStack != null) + mymatch.backtrackStack.clear (); + return mymatch; } return null; diff --git a/libjava/classpath/gnu/java/util/regex/RETokenLookAhead.java b/libjava/classpath/gnu/java/util/regex/RETokenLookAhead.java index 1dc6019..34625aa 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenLookAhead.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenLookAhead.java @@ -65,16 +65,16 @@ final class RETokenLookAhead extends REToken REMatch trymatch = (REMatch) mymatch.clone (); if (re.match (input, trymatch)) { - if (negative) - return null; - trymatch.index = mymatch.index; - return trymatch; + if (negative) + return null; + trymatch.index = mymatch.index; + return trymatch; } else { - if (negative) - return mymatch; - return null; + if (negative) + return mymatch; + return null; } } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenLookBehind.java b/libjava/classpath/gnu/java/util/regex/RETokenLookBehind.java index f61c93c..c85e37f 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenLookBehind.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenLookBehind.java @@ -73,29 +73,29 @@ final class RETokenLookBehind extends REToken re1.chain (stopper); if (re1.match (behind, trymatch)) { - if (negative) - return null; - for (int i = 0; i < trymatch.start.length; i++) - { - if (trymatch.start[i] != -1 && trymatch.end[i] != -1) - { - trymatch.start[i] -= diff; - if (trymatch.start[i] < 0) - trymatch.start[i] -= 1; - trymatch.end[i] -= diff; - if (trymatch.end[i] < 0) - trymatch.end[i] -= 1; - } - } - trymatch.index = mymatch.index; - trymatch.offset = mymatch.offset; - return trymatch; + if (negative) + return null; + for (int i = 0; i < trymatch.start.length; i++) + { + if (trymatch.start[i] != -1 && trymatch.end[i] != -1) + { + trymatch.start[i] -= diff; + if (trymatch.start[i] < 0) + trymatch.start[i] -= 1; + trymatch.end[i] -= diff; + if (trymatch.end[i] < 0) + trymatch.end[i] -= 1; + } + } + trymatch.index = mymatch.index; + trymatch.offset = mymatch.offset; + return trymatch; } else { - if (negative) - return mymatch; - return null; + if (negative) + return mymatch; + return null; } } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenNamedProperty.java b/libjava/classpath/gnu/java/util/regex/RETokenNamedProperty.java index 1683cb1..340da03 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenNamedProperty.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenNamedProperty.java @@ -96,7 +96,7 @@ final class RETokenNamedProperty extends REToken }; RETokenNamedProperty (int subIndex, String name, boolean insens, - boolean negate) throws REException + boolean negate) throws REException { super (subIndex); this.name = name; @@ -121,8 +121,8 @@ final class RETokenNamedProperty extends REToken boolean retval = matchOneChar (ch); if (retval) { - ++mymatch.index; - return mymatch; + ++mymatch.index; + return mymatch; } return null; } @@ -135,9 +135,9 @@ final class RETokenNamedProperty extends REToken boolean retval = handler.includes (ch); if (insens) { - retval = retval || - handler.includes (toUpperCase (ch, unicodeAware)) || - handler.includes (toLowerCase (ch, unicodeAware)); + retval = retval || + handler.includes (toUpperCase (ch, unicodeAware)) || + handler.includes (toLowerCase (ch, unicodeAware)); } if (negate) @@ -156,12 +156,12 @@ final class RETokenNamedProperty extends REToken int numRepeats = 0; while (true) { - if (numRepeats >= max) - break; - char ch = input.charAt (index++); - if (!matchOneChar (ch)) - break; - numRepeats++; + if (numRepeats >= max) + break; + char ch = input.charAt (index++); + if (!matchOneChar (ch)) + break; + numRepeats++; } return numRepeats; } @@ -179,37 +179,37 @@ final class RETokenNamedProperty extends REToken private Handler getHandler (String name) throws REException { if (name.equals ("Lower") || name.equals ("Upper") || - // name.equals("ASCII") || - name.equals ("Alpha") || - name.equals ("Digit") || - name.equals ("Alnum") || - name.equals ("Punct") || - name.equals ("Graph") || - name.equals ("Print") || - name.equals ("Blank") || - name.equals ("Cntrl") || - name.equals ("XDigit") || name.equals ("Space")) + // name.equals("ASCII") || + name.equals ("Alpha") || + name.equals ("Digit") || + name.equals ("Alnum") || + name.equals ("Punct") || + name.equals ("Graph") || + name.equals ("Print") || + name.equals ("Blank") || + name.equals ("Cntrl") || + name.equals ("XDigit") || name.equals ("Space")) { - return new POSIXHandler (name); + return new POSIXHandler (name); } if (name.startsWith ("In")) { - try - { - name = name.substring (2); - Character.UnicodeBlock block = - Character.UnicodeBlock.forName (name); - return new UnicodeBlockHandler (block); - } - catch (IllegalArgumentException e) - { - throw new REException ("Invalid Unicode block name: " + name, - REException.REG_ESCAPE, 0); - } + try + { + name = name.substring (2); + Character.UnicodeBlock block = + Character.UnicodeBlock.forName (name); + return new UnicodeBlockHandler (block); + } + catch (IllegalArgumentException e) + { + throw new REException ("Invalid Unicode block name: " + name, + REException.REG_ESCAPE, 0); + } } if (name.startsWith ("Is")) { - name = name.substring (2); + name = name.substring (2); } // "grouped properties" @@ -291,27 +291,27 @@ final class RETokenNamedProperty extends REToken if (name.equals ("all")) return new Handler () { - public boolean includes (char c) - { - return true; - } + public boolean includes (char c) + { + return true; + } }; if (name.startsWith ("java")) { - try - { - Method m = Character.class.getMethod ("is" + name.substring (4), - Character.TYPE); - return new JavaCategoryHandler (m); - } - catch (NoSuchMethodException e) - { - throw new REException ("Unsupported Java handler: " + name, e, - REException.REG_ESCAPE, 0); - } + try + { + Method m = Character.class.getMethod ("is" + name.substring (4), + Character.TYPE); + return new JavaCategoryHandler (m); + } + catch (NoSuchMethodException e) + { + throw new REException ("Unsupported Java handler: " + name, e, + REException.REG_ESCAPE, 0); + } } throw new REException ("unsupported name " + name, REException.REG_ESCAPE, - 0); + 0); } private static class POSIXHandler extends Handler @@ -321,9 +321,9 @@ final class RETokenNamedProperty extends REToken { int posixId = RETokenPOSIX.intValue (name.toLowerCase ()); if (posixId != -1) - retoken = new RETokenPOSIX (0, posixId, false, false); + retoken = new RETokenPOSIX (0, posixId, false, false); else - throw new RuntimeException ("Unknown posix ID: " + name); + throw new RuntimeException ("Unknown posix ID: " + name); } public boolean includes (char c) { @@ -355,8 +355,8 @@ final class RETokenNamedProperty extends REToken { int category = Character.getType (c); for (int i = 0; i < categories.length; i++) - if (category == categories[i]) - return true; + if (category == categories[i]) + return true; return false; } } @@ -394,15 +394,15 @@ final class RETokenNamedProperty extends REToken { try { - return (Boolean) method.invoke (null, c); + return (Boolean) method.invoke (null, c); } catch (IllegalAccessException e) { - throw new InternalError ("Unable to access method " + method); + throw new InternalError ("Unable to access method " + method); } catch (InvocationTargetException e) { - throw new InternalError ("Error invoking " + method); + throw new InternalError ("Error invoking " + method); } } } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenOneOf.java b/libjava/classpath/gnu/java/util/regex/RETokenOneOf.java index fcae3c2..3bea889 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenOneOf.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenOneOf.java @@ -59,7 +59,7 @@ final class RETokenOneOf extends REToken // the basic part /2-7a-c/ is stored in the ArrayList options, and // the additional part /[f-k][m-z]&&[^p-v][st]/ is stored in the // ArrayList addition in the following order (Reverse Polish Notation): - // -- The matching result of the basic part is assumed here. + // -- The matching result of the basic part is assumed here. // [f-k] -- REToken // "|" -- or // [m-z] -- REToken @@ -80,7 +80,7 @@ final class RETokenOneOf extends REToken // \D --> new RETokenOneOf("0123456789",true, ..) RETokenOneOf (int subIndex, String optionsStr, boolean negative, - boolean insens) + boolean insens) { super (subIndex); options = new ArrayList < REToken > (); @@ -97,7 +97,7 @@ final class RETokenOneOf extends REToken } RETokenOneOf (int subIndex, List < REToken > options, - List < Object > addition, boolean negative) + List < Object > addition, boolean negative) { super (subIndex); this.options = options; @@ -114,8 +114,8 @@ final class RETokenOneOf extends REToken int x; for (REToken t:options) { - if ((x = t.getMinimumLength ()) < min) - min = x; + if ((x = t.getMinimumLength ()) < min) + min = x; } return min; } @@ -128,8 +128,8 @@ final class RETokenOneOf extends REToken int x; for (REToken t:options) { - if ((x = t.getMaximumLength ()) > max) - max = x; + if ((x = t.getMaximumLength ()) > max) + max = x; } return max; } @@ -149,13 +149,13 @@ final class RETokenOneOf extends REToken boolean tryOnly; if (addition == null) { - tryMatch = mymatch; - tryOnly = false; + tryMatch = mymatch; + tryOnly = false; } else { - tryMatch = (REMatch) mymatch.clone (); - tryOnly = true; + tryMatch = (REMatch) mymatch.clone (); + tryOnly = true; } boolean b = negative ? matchN (input, tryMatch, tryOnly) : matchP (input, tryMatch, tryOnly); @@ -166,36 +166,36 @@ final class RETokenOneOf extends REToken stack.push (new Boolean (b)); for (Object obj:addition) { - if (obj instanceof REToken) - { - b = ((REToken) obj).match (input, (REMatch) mymatch.clone ()); - stack.push (new Boolean (b)); - } - else if (obj instanceof Boolean) - { - stack.push ((Boolean) obj); - } - else if (obj.equals ("|")) - { - b = stack.pop (); - b = stack.pop () || b; - stack.push (new Boolean (b)); - } - else if (obj.equals ("&")) - { - b = stack.pop (); - b = stack.pop () && b; - stack.push (new Boolean (b)); - } - else - { - throw new RuntimeException ("Invalid object found"); - } + if (obj instanceof REToken) + { + b = ((REToken) obj).match (input, (REMatch) mymatch.clone ()); + stack.push (new Boolean (b)); + } + else if (obj instanceof Boolean) + { + stack.push ((Boolean) obj); + } + else if (obj.equals ("|")) + { + b = stack.pop (); + b = stack.pop () || b; + stack.push (new Boolean (b)); + } + else if (obj.equals ("&")) + { + b = stack.pop (); + b = stack.pop () && b; + stack.push (new Boolean (b)); + } + else + { + throw new RuntimeException ("Invalid object found"); + } } if (stack.pop ()) { - ++mymatch.index; - return next (input, mymatch); + ++mymatch.index; + return next (input, mymatch); } return false; } @@ -207,12 +207,12 @@ final class RETokenOneOf extends REToken for (REToken tk:options) { - REMatch tryMatch = (REMatch) mymatch.clone (); - if (tk.match (input, tryMatch)) - { // match was successful - return false; - } // is a match - } // try next option + REMatch tryMatch = (REMatch) mymatch.clone (); + if (tk.match (input, tryMatch)) + { // match was successful + return false; + } // is a match + } // try next option if (tryOnly) return true; @@ -224,17 +224,17 @@ final class RETokenOneOf extends REToken { for (REToken tk:options) { - REMatch tryMatch = (REMatch) mymatch.clone (); - if (tk.match (input, tryMatch)) - { // match was successful - if (tryOnly) - return true; - if (next (input, tryMatch)) - { - mymatch.assignFrom (tryMatch); - return true; - } - } + REMatch tryMatch = (REMatch) mymatch.clone (); + if (tk.match (input, tryMatch)) + { // match was successful + if (tryOnly) + return true; + if (next (input, tryMatch)) + { + mymatch.assignFrom (tryMatch); + return true; + } + } } return false; } @@ -244,8 +244,8 @@ final class RETokenOneOf extends REToken REMatch newMatch = findMatch (input, mymatch); if (newMatch != null) { - mymatch.assignFrom (newMatch); - return true; + mymatch.assignFrom (newMatch); + return true; } return false; } @@ -263,32 +263,32 @@ final class RETokenOneOf extends REToken } private REMatch findMatch (CharIndexed input, REMatch mymatch, - int optionIndex) + int optionIndex) { for (int i = optionIndex; i < options.size (); i++) { - REToken tk = options.get (i); - tk = (REToken) tk.clone (); - tk.chain (getNext ()); - REMatch tryMatch = (REMatch) mymatch.clone (); - if (tryMatch.backtrackStack == null) - { - tryMatch.backtrackStack = new BacktrackStack (); - } - boolean stackPushed = false; - if (i + 1 < options.size ()) - { - tryMatch.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, mymatch, - i + 1)); - stackPushed = true; - } - if (tk.match (input, tryMatch)) - { - return tryMatch; - } - if (stackPushed) - tryMatch.backtrackStack.pop (); + REToken tk = options.get (i); + tk = (REToken) tk.clone (); + tk.chain (getNext ()); + REMatch tryMatch = (REMatch) mymatch.clone (); + if (tryMatch.backtrackStack == null) + { + tryMatch.backtrackStack = new BacktrackStack (); + } + boolean stackPushed = false; + if (i + 1 < options.size ()) + { + tryMatch.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, mymatch, + i + 1)); + stackPushed = true; + } + if (tk.match (input, tryMatch)) + { + return tryMatch; + } + if (stackPushed) + tryMatch.backtrackStack.pop (); } return null; } @@ -308,12 +308,12 @@ final class RETokenOneOf extends REToken tk.chain (null); while (true) { - if (numRepeats >= max) - break; - m = tk.findMatch (input, m); - if (m == null) - break; - numRepeats++; + if (numRepeats >= max) + break; + m = tk.findMatch (input, m); + if (m == null) + break; + numRepeats++; } return numRepeats; } @@ -323,9 +323,9 @@ final class RETokenOneOf extends REToken os.append (negative ? "[^" : "(?:"); for (int i = 0; i < options.size (); i++) { - if (!negative && (i > 0)) - os.append ('|'); - options.get (i).dumpAll (os); + if (!negative && (i > 0)) + os.append ('|'); + options.get (i).dumpAll (os); } os.append (negative ? ']' : ')'); } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenPOSIX.java b/libjava/classpath/gnu/java/util/regex/RETokenPOSIX.java index 2c80957..f0fd04b 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenPOSIX.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenPOSIX.java @@ -70,8 +70,8 @@ final class RETokenPOSIX extends REToken { for (int i = 0; i < s_nameTable.length; i++) { - if (s_nameTable[i].equals (key)) - return i; + if (s_nameTable[i].equals (key)) + return i; } return -1; } @@ -100,8 +100,8 @@ final class RETokenPOSIX extends REToken boolean retval = matchOneChar (ch); if (retval) { - ++mymatch.index; - return mymatch; + ++mymatch.index; + return mymatch; } return null; } @@ -115,49 +115,49 @@ final class RETokenPOSIX extends REToken switch (type) { case ALNUM: - // Note that there is some debate over whether '_' should be included - retval = Character.isLetterOrDigit (ch) || (ch == '_'); - break; + // Note that there is some debate over whether '_' should be included + retval = Character.isLetterOrDigit (ch) || (ch == '_'); + break; case ALPHA: - retval = Character.isLetter (ch); - break; + retval = Character.isLetter (ch); + break; case BLANK: - retval = ((ch == ' ') || (ch == '\t')); - break; + retval = ((ch == ' ') || (ch == '\t')); + break; case CNTRL: - retval = Character.isISOControl (ch); - break; + retval = Character.isISOControl (ch); + break; case DIGIT: - retval = Character.isDigit (ch); - break; + retval = Character.isDigit (ch); + break; case GRAPH: - retval = - (!(Character.isWhitespace (ch) || Character.isISOControl (ch))); - break; + retval = + (!(Character.isWhitespace (ch) || Character.isISOControl (ch))); + break; case LOWER: - retval = ((insens && Character.isLetter (ch)) - || Character.isLowerCase (ch)); - break; + retval = ((insens && Character.isLetter (ch)) + || Character.isLowerCase (ch)); + break; case PRINT: - retval = - (!(Character.isWhitespace (ch) || Character.isISOControl (ch))) - || (ch == ' '); - break; + retval = + (!(Character.isWhitespace (ch) || Character.isISOControl (ch))) + || (ch == ' '); + break; case PUNCT: - // This feels sloppy, especially for non-U.S. locales. - retval = ("`~!@#$%^&*()-_=+[]{}\\|;:'\"/?,.<>".indexOf (ch) != -1); - break; + // This feels sloppy, especially for non-U.S. locales. + retval = ("`~!@#$%^&*()-_=+[]{}\\|;:'\"/?,.<>".indexOf (ch) != -1); + break; case SPACE: - retval = Character.isWhitespace (ch); - break; + retval = Character.isWhitespace (ch); + break; case UPPER: - retval = ((insens && Character.isLetter (ch)) - || Character.isUpperCase (ch)); - break; + retval = ((insens && Character.isLetter (ch)) + || Character.isUpperCase (ch)); + break; case XDIGIT: - retval = (Character.isDigit (ch) - || ("abcdefABCDEF".indexOf (ch) != -1)); - break; + retval = (Character.isDigit (ch) + || ("abcdefABCDEF".indexOf (ch) != -1)); + break; } if (negated) @@ -176,12 +176,12 @@ final class RETokenPOSIX extends REToken int numRepeats = 0; while (true) { - if (numRepeats >= max) - break; - char ch = input.charAt (index++); - if (!matchOneChar (ch)) - break; - numRepeats++; + if (numRepeats >= max) + break; + char ch = input.charAt (index++); + if (!matchOneChar (ch)) + break; + numRepeats++; } return numRepeats; } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenRange.java b/libjava/classpath/gnu/java/util/regex/RETokenRange.java index 2597d4d..8898ef5 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenRange.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenRange.java @@ -67,8 +67,8 @@ final class RETokenRange extends REToken char c = input.charAt (mymatch.index); if (matchOneChar (c)) { - ++mymatch.index; - return mymatch; + ++mymatch.index; + return mymatch; } return null; } @@ -80,13 +80,13 @@ final class RETokenRange extends REToken boolean matches = (c >= lo) && (c <= hi); if (!matches && insens) { - char c1 = toLowerCase (c, unicodeAware); - matches = (c1 >= lo) && (c1 <= hi); - if (!matches) - { - c1 = toUpperCase (c, unicodeAware); - matches = (c1 >= lo) && (c1 <= hi); - } + char c1 = toLowerCase (c, unicodeAware); + matches = (c1 >= lo) && (c1 <= hi); + if (!matches) + { + c1 = toUpperCase (c, unicodeAware); + matches = (c1 >= lo) && (c1 <= hi); + } } return matches; } @@ -102,12 +102,12 @@ final class RETokenRange extends REToken int numRepeats = 0; while (true) { - if (numRepeats >= max) - break; - char ch = input.charAt (index++); - if (!matchOneChar (ch)) - break; - numRepeats++; + if (numRepeats >= max) + break; + char ch = input.charAt (index++); + if (!matchOneChar (ch)) + break; + numRepeats++; } return numRepeats; } diff --git a/libjava/classpath/gnu/java/util/regex/RETokenRepeated.java b/libjava/classpath/gnu/java/util/regex/RETokenRepeated.java index 0ba880d..cd86353 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenRepeated.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenRepeated.java @@ -59,11 +59,11 @@ final class RETokenRepeated extends REToken this.max = max; if (token.returnsFixedLengthMatches ()) { - tokenFixedLength = token.getMaximumLength (); + tokenFixedLength = token.getMaximumLength (); } else { - tokenFixedLength = -1; + tokenFixedLength = -1; } } @@ -125,7 +125,7 @@ final class RETokenRepeated extends REToken { this.tk = tk; this.input = input; - this.rematch = (REMatch) mymatch.clone (); // MUST make a clone + this.rematch = (REMatch) mymatch.clone (); // MUST make a clone this.rematch.backtrackStack = new BacktrackStack (); findFirst = true; } @@ -135,31 +135,31 @@ final class RETokenRepeated extends REToken int origin = rematch.index; REMatch rem; if (findFirst) - { - rem = tk.findMatch (input, rematch); - findFirst = false; - } + { + rem = tk.findMatch (input, rematch); + findFirst = false; + } else - { - while (true) - { - if (rematch.backtrackStack.empty ()) - { - rem = null; - break; - } - BacktrackStack.Backtrack bt = rematch.backtrackStack.pop (); - rem = bt.token.backtrack (bt.input, bt.match, bt.param); - if (rem != null) - break; - } - } + { + while (true) + { + if (rematch.backtrackStack.empty ()) + { + rem = null; + break; + } + BacktrackStack.Backtrack bt = rematch.backtrackStack.pop (); + rem = bt.token.backtrack (bt.input, bt.match, bt.param); + if (rem != null) + break; + } + } if (rem == null) - return null; + return null; if (rem.index == origin) - rem.empty = true; + rem.empty = true; rematch = rem; - return (REMatch) rem.clone (); // MUST make a clone. + return (REMatch) rem.clone (); // MUST make a clone. } boolean noMore () @@ -190,7 +190,7 @@ final class RETokenRepeated extends REToken int[] visited; DoablesFinder finder; StackedInfo (CharIndexed input, int numRepeats, REMatch match, - int[]visited, DoablesFinder finder) + int[]visited, DoablesFinder finder) { super (null, input, match, null); this.numRepeats = numRepeats; @@ -214,7 +214,7 @@ final class RETokenRepeated extends REToken } private REMatch findMatch (BacktrackStack stack, - Deque < FindMatchControl > controlStack) + Deque < FindMatchControl > controlStack) { REMatch result = null; StackedInfo si = null; @@ -235,180 +235,180 @@ final class RETokenRepeated extends REToken while (true) { - // This is the second entry point of this method. - // If you want to call this method recursively but you do not need the - // result returned, just continue from this point. + // This is the second entry point of this method. + // If you want to call this method recursively but you do not need the + // result returned, just continue from this point. MAIN_LOOP: - while (true) - { - - if (stack.empty ()) - break MAIN_LOOP; - si = (StackedInfo) (stack.peek ()); - input = si.input; - numRepeats = si.numRepeats; - mymatch = si.match; - visited = si.visited; - finder = si.finder; - - if (mymatch.backtrackStack == null) - mymatch.backtrackStack = new BacktrackStack (); - - if (numRepeats >= max) - { - stack.pop (); - REMatch m1 = matchRest (input, mymatch); - if (m1 != null) - { - if (!stack.empty ()) - { - m1.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, - mymatch, stack)); - } - result = m1; - break MAIN_LOOP; - } - if (stingy) - { - continue MAIN_LOOP; - } - break MAIN_LOOP; - } - - if (finder == null) - { - finder = new DoablesFinder (token, input, mymatch); - si.finder = finder; - } - - if (numRepeats < min) - { - while (true) - { - REMatch doable = finder.find (); - if (doable == null) - { - if (stack.empty ()) - return null; - stack.pop (); - continue MAIN_LOOP; - } - if (finder.noMore ()) - stack.pop (); - int newNumRepeats = (doable.empty ? min : numRepeats + 1); - stack. - push (new - StackedInfo (input, newNumRepeats, doable, - visited, null)); - continue MAIN_LOOP; - } - } - - if (visited == null) - visited = initVisited (); - - if (stingy) - { - REMatch nextMatch = finder.find (); - if (nextMatch != null && !nextMatch.empty) - { - stack. - push (new - StackedInfo (input, numRepeats + 1, nextMatch, - visited, null)); - } - else - { - stack.pop (); - } - REMatch m1 = matchRest (input, mymatch); - if (m1 != null) - { - if (!stack.empty ()) - { - m1.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, - mymatch, stack)); - } - result = m1; - break MAIN_LOOP; - } - else - { - continue MAIN_LOOP; - } - } - - visited = addVisited (mymatch.index, visited); - - TryAnotherResult taresult = - tryAnother (stack, input, mymatch, numRepeats, finder, visited); - visited = taresult.visited; - switch (taresult.status) - { - case TryAnotherResult.TRY_FURTHER: - controlStack.push (new FindMatchControl (finder)); - continue MAIN_LOOP0; - case TryAnotherResult.RESULT_FOUND: - result = taresult.result; - break MAIN_LOOP; - } - - if (!stack.empty ()) - { - stack.pop (); - } - if (possessive) - { - stack.clear (); - } - REMatch m1 = matchRest (input, mymatch); - if (m1 != null) - { - if (!stack.empty ()) - { - m1.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, mymatch, - stack)); - } - result = m1; - break MAIN_LOOP; - } - - } // MAIN_LOOP - - if (controlStack.isEmpty ()) - return result; - FindMatchControl control = controlStack.pop (); - if (possessive) - { - return result; - } - if (result != null) - { - result.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, mymatch, - stack)); - return result; - } - - finder = control.finder; - - TryAnotherResult taresult = - tryAnother (stack, input, mymatch, numRepeats, finder, visited); - visited = taresult.visited; - switch (taresult.status) - { - case TryAnotherResult.TRY_FURTHER: - controlStack.push (new FindMatchControl (finder)); - continue MAIN_LOOP0; - case TryAnotherResult.RESULT_FOUND: - return taresult.result; - } - continue MAIN_LOOP0; - - } // MAIN_LOOP0 + while (true) + { + + if (stack.empty ()) + break MAIN_LOOP; + si = (StackedInfo) (stack.peek ()); + input = si.input; + numRepeats = si.numRepeats; + mymatch = si.match; + visited = si.visited; + finder = si.finder; + + if (mymatch.backtrackStack == null) + mymatch.backtrackStack = new BacktrackStack (); + + if (numRepeats >= max) + { + stack.pop (); + REMatch m1 = matchRest (input, mymatch); + if (m1 != null) + { + if (!stack.empty ()) + { + m1.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, + mymatch, stack)); + } + result = m1; + break MAIN_LOOP; + } + if (stingy) + { + continue MAIN_LOOP; + } + break MAIN_LOOP; + } + + if (finder == null) + { + finder = new DoablesFinder (token, input, mymatch); + si.finder = finder; + } + + if (numRepeats < min) + { + while (true) + { + REMatch doable = finder.find (); + if (doable == null) + { + if (stack.empty ()) + return null; + stack.pop (); + continue MAIN_LOOP; + } + if (finder.noMore ()) + stack.pop (); + int newNumRepeats = (doable.empty ? min : numRepeats + 1); + stack. + push (new + StackedInfo (input, newNumRepeats, doable, + visited, null)); + continue MAIN_LOOP; + } + } + + if (visited == null) + visited = initVisited (); + + if (stingy) + { + REMatch nextMatch = finder.find (); + if (nextMatch != null && !nextMatch.empty) + { + stack. + push (new + StackedInfo (input, numRepeats + 1, nextMatch, + visited, null)); + } + else + { + stack.pop (); + } + REMatch m1 = matchRest (input, mymatch); + if (m1 != null) + { + if (!stack.empty ()) + { + m1.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, + mymatch, stack)); + } + result = m1; + break MAIN_LOOP; + } + else + { + continue MAIN_LOOP; + } + } + + visited = addVisited (mymatch.index, visited); + + TryAnotherResult taresult = + tryAnother (stack, input, mymatch, numRepeats, finder, visited); + visited = taresult.visited; + switch (taresult.status) + { + case TryAnotherResult.TRY_FURTHER: + controlStack.push (new FindMatchControl (finder)); + continue MAIN_LOOP0; + case TryAnotherResult.RESULT_FOUND: + result = taresult.result; + break MAIN_LOOP; + } + + if (!stack.empty ()) + { + stack.pop (); + } + if (possessive) + { + stack.clear (); + } + REMatch m1 = matchRest (input, mymatch); + if (m1 != null) + { + if (!stack.empty ()) + { + m1.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, mymatch, + stack)); + } + result = m1; + break MAIN_LOOP; + } + + } // MAIN_LOOP + + if (controlStack.isEmpty ()) + return result; + FindMatchControl control = controlStack.pop (); + if (possessive) + { + return result; + } + if (result != null) + { + result.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, mymatch, + stack)); + return result; + } + + finder = control.finder; + + TryAnotherResult taresult = + tryAnother (stack, input, mymatch, numRepeats, finder, visited); + visited = taresult.visited; + switch (taresult.status) + { + case TryAnotherResult.TRY_FURTHER: + controlStack.push (new FindMatchControl (finder)); + continue MAIN_LOOP0; + case TryAnotherResult.RESULT_FOUND: + return taresult.result; + } + continue MAIN_LOOP0; + + } // MAIN_LOOP0 } private static class TryAnotherResult @@ -422,9 +422,9 @@ final class RETokenRepeated extends REToken } private TryAnotherResult tryAnother (BacktrackStack stack, - CharIndexed input, REMatch mymatch, - int numRepeats, DoablesFinder finder, - int[]visited) + CharIndexed input, REMatch mymatch, + int numRepeats, DoablesFinder finder, + int[]visited) { TryAnotherResult taresult = new TryAnotherResult (); @@ -437,58 +437,58 @@ final class RETokenRepeated extends REToken DO_ONE_DOABLE: while (true) - { - - REMatch doable = finder.find (); - if (doable == null) - { - break DO_THIS; - } - if (doable.empty) - emptyMatchFound = true; - - if (!emptyMatchFound) - { - int n = doable.index; - if (visitedContains (n, visited)) - { - continue DO_ONE_DOABLE; - } - visited = addVisited (n, visited); - stack. - push (new - StackedInfo (input, numRepeats + 1, doable, visited, - null)); - taresult.visited = visited; - taresult.status = TryAnotherResult.TRY_FURTHER; - return taresult; - } - else - { - REMatch m1 = matchRest (input, doable); - if (possessive) - { - taresult.result = m1; - taresult.status = TryAnotherResult.RESULT_FOUND; - return taresult; - } - if (m1 != null) - { - if (!stack.empty ()) - { - m1.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, mymatch, - stack)); - } - taresult.result = m1; - taresult.status = TryAnotherResult.RESULT_FOUND; - return taresult; - } - } - - } // DO_ONE_DOABLE - - } // DO_THIS + { + + REMatch doable = finder.find (); + if (doable == null) + { + break DO_THIS; + } + if (doable.empty) + emptyMatchFound = true; + + if (!emptyMatchFound) + { + int n = doable.index; + if (visitedContains (n, visited)) + { + continue DO_ONE_DOABLE; + } + visited = addVisited (n, visited); + stack. + push (new + StackedInfo (input, numRepeats + 1, doable, visited, + null)); + taresult.visited = visited; + taresult.status = TryAnotherResult.TRY_FURTHER; + return taresult; + } + else + { + REMatch m1 = matchRest (input, doable); + if (possessive) + { + taresult.result = m1; + taresult.status = TryAnotherResult.RESULT_FOUND; + return taresult; + } + if (m1 != null) + { + if (!stack.empty ()) + { + m1.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, mymatch, + stack)); + } + taresult.result = m1; + taresult.status = TryAnotherResult.RESULT_FOUND; + return taresult; + } + } + + } // DO_ONE_DOABLE + + } // DO_THIS taresult.status = TryAnotherResult.NOTHING_FOUND; return taresult; @@ -501,8 +501,8 @@ final class RETokenRepeated extends REToken REMatch m1 = findMatch (input, mymatch); if (m1 != null) { - mymatch.assignFrom (m1); - return true; + mymatch.assignFrom (m1); + return true; } return false; } @@ -523,8 +523,8 @@ final class RETokenRepeated extends REToken // simple linear search is faster than binary search. for (int i = 1; i < visited[0]; i++) { - if (n == visited[i]) - return true; + if (n == visited[i]) + return true; } return false; } @@ -535,9 +535,9 @@ final class RETokenRepeated extends REToken return visited; if (visited[0] >= visited.length - 1) { - int[] newvisited = new int[visited.length + 32]; - System.arraycopy (visited, 0, newvisited, 0, visited.length); - visited = newvisited; + int[] newvisited = new int[visited.length + 32]; + System.arraycopy (visited, 0, newvisited, 0, visited.length); + visited = newvisited; } visited[0]++; visited[visited[0]] = n; @@ -548,7 +548,7 @@ final class RETokenRepeated extends REToken { if (next (input, newMatch)) { - return newMatch; + return newMatch; } return null; } @@ -573,7 +573,7 @@ final class RETokenRepeated extends REToken } private REMatch backtrackFixedLength (CharIndexed input, REMatch mymatch, - Object param) + Object param) { int[] params = (int[]) param; int index = params[0]; @@ -582,34 +582,34 @@ final class RETokenRepeated extends REToken } private REMatch findMatchFixedLength (CharIndexed input, REMatch mymatch, - int index, int count) + int index, int count) { REMatch tryMatch = (REMatch) mymatch.clone (); while (true) { - tryMatch.index = index; - REMatch m = matchRest (input, tryMatch); - count--; - if (stingy) - index += tokenFixedLength; - else - index -= tokenFixedLength; - if (possessive) - return m; - if (m != null) - { - if (count > 0) - { - m.backtrackStack.push (new BacktrackStack. - Backtrack (this, input, mymatch, - new int[] - { - index, count})); - } - return m; - } - if (count <= 0) - return null; + tryMatch.index = index; + REMatch m = matchRest (input, tryMatch); + count--; + if (stingy) + index += tokenFixedLength; + else + index -= tokenFixedLength; + if (possessive) + return m; + if (m != null) + { + if (count > 0) + { + m.backtrackStack.push (new BacktrackStack. + Backtrack (this, input, mymatch, + new int[] + { + index, count})); + } + return m; + } + if (count <= 0) + return null; } } @@ -624,14 +624,14 @@ final class RETokenRepeated extends REToken os.append ('?'); else { - os.append ('{').append (min); - if (max > min) - { - os.append (','); - if (max != Integer.MAX_VALUE) - os.append (max); - } - os.append ('}'); + os.append ('{').append (min); + if (max > min) + { + os.append (','); + if (max != Integer.MAX_VALUE) + os.append (max); + } + os.append ('}'); } if (stingy) os.append ('?'); diff --git a/libjava/classpath/gnu/java/util/regex/RETokenStart.java b/libjava/classpath/gnu/java/util/regex/RETokenStart.java index 6a8d247..c57ba9c 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenStart.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenStart.java @@ -41,7 +41,7 @@ import gnu.java.lang.CPStringBuilder; class RETokenStart extends REToken { - private String newline; // matches after a newline + private String newline; // matches after a newline private boolean check_java_line_terminators; RETokenStart (int subIndex, String newline) @@ -58,13 +58,13 @@ class RETokenStart extends REToken this.check_java_line_terminators = b; } - @Override + @Override int getMaximumLength () { return 0; } - @Override + @Override REMatch matchThis (CharIndexed input, REMatch mymatch) { // charAt(index-n) may be unknown on a Reader/InputStream. FIXME @@ -72,52 +72,52 @@ class RETokenStart extends REToken if (check_java_line_terminators) { - char ch = input.charAt (mymatch.index - 1); - if (ch != CharIndexed.OUT_OF_BOUNDS) - { - if (ch == '\n') - return mymatch; - if (ch == '\r') - { - char ch1 = input.charAt (mymatch.index); - if (ch1 != '\n') - return mymatch; - return null; - } - if (ch == '\u0085') - return mymatch; // A next-line character - if (ch == '\u2028') - return mymatch; // A line-separator character - if (ch == '\u2029') - return mymatch; // A paragraph-separator character - } + char ch = input.charAt (mymatch.index - 1); + if (ch != CharIndexed.OUT_OF_BOUNDS) + { + if (ch == '\n') + return mymatch; + if (ch == '\r') + { + char ch1 = input.charAt (mymatch.index); + if (ch1 != '\n') + return mymatch; + return null; + } + if (ch == '\u0085') + return mymatch; // A next-line character + if (ch == '\u2028') + return mymatch; // A line-separator character + if (ch == '\u2029') + return mymatch; // A paragraph-separator character + } } if (newline != null) { - int len = newline.length (); - if (mymatch.offset >= len) - { - boolean found = true; - char z; - int i = 0; // position in REToken.newline - char ch = input.charAt (mymatch.index - len); - do - { - z = newline.charAt (i); - if (ch != z) - { - found = false; - break; - } - ++i; - ch = input.charAt (mymatch.index - len + i); - } - while (i < len); - - if (found) - return mymatch; - } + int len = newline.length (); + if (mymatch.offset >= len) + { + boolean found = true; + char z; + int i = 0; // position in REToken.newline + char ch = input.charAt (mymatch.index - len); + do + { + z = newline.charAt (i); + if (ch != z) + { + found = false; + break; + } + ++i; + ch = input.charAt (mymatch.index - len + i); + } + while (i < len); + + if (found) + return mymatch; + } } // Don't match at all if REG_NOTBOL is set. @@ -130,7 +130,7 @@ class RETokenStart extends REToken return ((mymatch.index == 0) && (mymatch.offset == 0)) ? mymatch : null; } - @Override + @Override boolean returnsFixedLengthMatches () { return true; diff --git a/libjava/classpath/gnu/java/util/regex/RETokenWordBoundary.java b/libjava/classpath/gnu/java/util/regex/RETokenWordBoundary.java index 9f758b6..04fd839 100644 --- a/libjava/classpath/gnu/java/util/regex/RETokenWordBoundary.java +++ b/libjava/classpath/gnu/java/util/regex/RETokenWordBoundary.java @@ -71,24 +71,24 @@ final class RETokenWordBoundary extends REToken // In the string "one two three", these positions match: // |o|n|e| |t|w|o| |t|h|r|e|e| // ^ ^ ^ ^ ^ ^ - boolean after = false; // is current character a letter or digit? - boolean before = false; // is previous character a letter or digit? + boolean after = false; // is current character a letter or digit? + boolean before = false; // is previous character a letter or digit? char ch; // TODO: Also check REG_ANCHORINDEX vs. anchor if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX) - || (mymatch.offset + mymatch.index > mymatch.anchor)) + || (mymatch.offset + mymatch.index > mymatch.anchor)) { - if ((ch = - input.charAt (mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) - { - before = Character.isLetterOrDigit (ch) || (ch == '_'); - } + if ((ch = + input.charAt (mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) + { + before = Character.isLetterOrDigit (ch) || (ch == '_'); + } } if ((ch = input.charAt (mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) { - after = Character.isLetterOrDigit (ch) || (ch == '_'); + after = Character.isLetterOrDigit (ch) || (ch == '_'); } // if (before) and (!after), we're at end (\>) @@ -97,11 +97,11 @@ final class RETokenWordBoundary extends REToken if ((where & BEGIN) == BEGIN) { - doNext = after && !before; + doNext = after && !before; } if ((where & END) == END) { - doNext ^= before && !after; + doNext ^= before && !after; } if (negated) @@ -127,15 +127,15 @@ final class RETokenWordBoundary extends REToken { if (where == (BEGIN | END)) { - os.append (negated ? "\\B" : "\\b"); + os.append (negated ? "\\B" : "\\b"); } else if (where == BEGIN) { - os.append ("\\<"); + os.append ("\\<"); } else { - os.append ("\\>"); + os.append ("\\>"); } } } diff --git a/libjava/classpath/gnu/java/util/regex/UncheckedRE.java b/libjava/classpath/gnu/java/util/regex/UncheckedRE.java index 5f8c749..7d215a6 100644 --- a/libjava/classpath/gnu/java/util/regex/UncheckedRE.java +++ b/libjava/classpath/gnu/java/util/regex/UncheckedRE.java @@ -47,11 +47,11 @@ package gnu.java.util.regex; * <P> * Note that this makes UncheckedRE dangerous if constructed with * dynamic data. Do not use UncheckedRE unless you are completely sure - * that all input being passed to it contains valid, well-formed + * that all input being passed to it contains valid, well-formed * regular expressions for the syntax specified. * * @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A> - * @see gnu.java.util.regex.RE + * @see gnu.java.util.regex.RE * @since gnu.regexp 1.1.4 */ @@ -105,7 +105,7 @@ public final class UncheckedRE extends RE try { initialize (pattern, cflags, syntax, 0, 0); - } + } catch (REException e) { throw new RuntimeException (e.getMessage ()); |