diff options
author | Tom Tromey <tromey@gcc.gnu.org> | 2005-07-16 00:30:23 +0000 |
---|---|---|
committer | Tom Tromey <tromey@gcc.gnu.org> | 2005-07-16 00:30:23 +0000 |
commit | f911ba985aa7fe0096c386c5be385ac5825ea527 (patch) | |
tree | a0b991cf5866ae1d616639b906ac001811d74508 /libjava/classpath/java/util/regex | |
parent | 6f4434b39b261de5317dc81ddfdd94d2e1d62b11 (diff) | |
download | gcc-f911ba985aa7fe0096c386c5be385ac5825ea527.zip gcc-f911ba985aa7fe0096c386c5be385ac5825ea527.tar.gz gcc-f911ba985aa7fe0096c386c5be385ac5825ea527.tar.bz2 |
Initial revision
From-SVN: r102074
Diffstat (limited to 'libjava/classpath/java/util/regex')
-rw-r--r-- | libjava/classpath/java/util/regex/Matcher.java | 301 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/Pattern.java | 254 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/PatternSyntaxException.java | 132 | ||||
-rw-r--r-- | libjava/classpath/java/util/regex/package.html | 46 |
4 files changed, 733 insertions, 0 deletions
diff --git a/libjava/classpath/java/util/regex/Matcher.java b/libjava/classpath/java/util/regex/Matcher.java new file mode 100644 index 0000000..bd97ace --- /dev/null +++ b/libjava/classpath/java/util/regex/Matcher.java @@ -0,0 +1,301 @@ +/* Matcher.java -- Instance of a regular expression applied to a char sequence. + Copyright (C) 2002, 2004 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package java.util.regex; + +import gnu.regexp.REMatch; + +/** + * Instance of a regular expression applied to a char sequence. + * + * @since 1.4 + */ +public final class Matcher +{ + private Pattern pattern; + private CharSequence input; + private int position; + private int appendPosition; + private REMatch match; + + Matcher(Pattern pattern, CharSequence input) + { + this.pattern = pattern; + this.input = input; + } + + /** + * @param sb The target string buffer + * @param replacement The replacement string + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public Matcher appendReplacement (StringBuffer sb, String replacement) + throws IllegalStateException + { + assertMatchOp(); + sb.append(input.subSequence(appendPosition, + match.getStartIndex()).toString()); + sb.append(match.substituteInto(replacement)); + appendPosition = match.getEndIndex(); + return this; + } + + /** + * @param sb The target string buffer + */ + public StringBuffer appendTail (StringBuffer sb) + { + sb.append(input.subSequence(appendPosition, input.length()).toString()); + return sb; + } + + /** + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public int end () + throws IllegalStateException + { + assertMatchOp(); + return match.getEndIndex(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public int end (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.getEndIndex(group); + } + + public boolean find () + { + boolean first = (match == null); + match = pattern.getRE().getMatch(input, position); + if (match != null) + { + int endIndex = match.getEndIndex(); + // Are we stuck at the same position? + if (!first && endIndex == position) + { + match = null; + // Not at the end of the input yet? + if (position < input.length() - 1) + { + position++; + return find(position); + } + else + return false; + } + position = endIndex; + return true; + } + return false; + } + + /** + * @param start The index to start the new pattern matching + * + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public boolean find (int start) + { + match = pattern.getRE().getMatch(input, start); + if (match != null) + { + position = match.getEndIndex(); + return true; + } + return false; + } + + /** + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public String group () + { + assertMatchOp(); + return match.toString(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public String group (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.toString(group); + } + + /** + * @param replacement The replacement string + */ + public String replaceFirst (String replacement) + { + reset(); + // Semantics might not quite match + return pattern.getRE().substitute(input, replacement, position); + } + + /** + * @param replacement The replacement string + */ + public String replaceAll (String replacement) + { + reset(); + return pattern.getRE().substituteAll(input, replacement, position); + } + + public int groupCount () + { + return pattern.getRE().getNumSubs(); + } + + public boolean lookingAt () + { + match = pattern.getRE().getMatch(input, 0); + if (match != null) + { + if (match.getStartIndex() == 0) + { + position = match.getEndIndex(); + return true; + } + match = null; + } + return false; + } + + /** + * Attempts to match the entire input sequence against the pattern. + * + * If the match succeeds then more information can be obtained via the + * start, end, and group methods. + * + * @see #start + * @see #end + * @see #group + */ + public boolean matches () + { + if (lookingAt()) + { + if (position == input.length()) + return true; + match = null; + } + return false; + } + + /** + * Returns the Pattern that is interpreted by this Matcher + */ + public Pattern pattern () + { + return pattern; + } + + public Matcher reset () + { + position = 0; + match = null; + return this; + } + + /** + * @param input The new input character sequence + */ + public Matcher reset (CharSequence input) + { + this.input = input; + return reset(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + */ + public int start () + throws IllegalStateException + { + assertMatchOp(); + return match.getStartIndex(); + } + + /** + * @param group The index of a capturing group in this matcher's pattern + * + * @exception IllegalStateException If no match has yet been attempted, + * or if the previous match operation failed + * @exception IndexOutOfBoundsException If the replacement string refers + * to a capturing group that does not exist in the pattern + */ + public int start (int group) + throws IllegalStateException + { + assertMatchOp(); + return match.getStartIndex(group); + } + + private void assertMatchOp() + { + if (match == null) throw new IllegalStateException(); + } +} diff --git a/libjava/classpath/java/util/regex/Pattern.java b/libjava/classpath/java/util/regex/Pattern.java new file mode 100644 index 0000000..6a31ef9 --- /dev/null +++ b/libjava/classpath/java/util/regex/Pattern.java @@ -0,0 +1,254 @@ +/* Pattern.java -- Compiled regular expression ready to be applied. + Copyright (C) 2002, 2004, 2005 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package java.util.regex; + +import gnu.regexp.RE; +import gnu.regexp.REException; +import gnu.regexp.RESyntax; + +import java.io.Serializable; +import java.util.ArrayList; + + +/** + * Compiled regular expression ready to be applied. + * + * @since 1.4 + */ +public final class Pattern implements Serializable +{ + private static final long serialVersionUID = 5073258162644648461L; + + public static final int CANON_EQ = 128; + public static final int CASE_INSENSITIVE = 2; + public static final int COMMENTS = 4; + public static final int DOTALL = 32; + public static final int MULTILINE = 8; + public static final int UNICODE_CASE = 64; + public static final int UNIX_LINES = 1; + + private final String regex; + private final int flags; + + private final RE re; + + private Pattern (String regex, int flags) + throws PatternSyntaxException + { + this.regex = regex; + this.flags = flags; + + int gnuFlags = 0; + if ((flags & CASE_INSENSITIVE) != 0) + gnuFlags |= RE.REG_ICASE; + if ((flags & MULTILINE) != 0) + gnuFlags |= RE.REG_MULTILINE; + if ((flags & DOTALL) != 0) + gnuFlags |= RE.REG_DOT_NEWLINE; + // not yet supported: + // if ((flags & UNICODE_CASE) != 0) gnuFlags = + // if ((flags & CANON_EQ) != 0) gnuFlags = + + RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4; + if ((flags & UNIX_LINES) != 0) + { + // Use a syntax set with \n for linefeeds? + syntax = new RESyntax(syntax); + syntax.setLineSeparator("\n"); + } + + if ((flags & COMMENTS) != 0) + { + // Use a syntax with support for comments? + } + + try + { + this.re = new RE(regex, gnuFlags, syntax); + } + catch (REException e) + { + throw new PatternSyntaxException(e.getMessage(), + regex, e.getPosition()); + } + } + + // package private accessor method + RE getRE() + { + return re; + } + + /** + * @param regex The regular expression + * + * @exception PatternSyntaxException If the expression's syntax is invalid + */ + public static Pattern compile (String regex) + throws PatternSyntaxException + { + return compile(regex, 0); + } + + /** + * @param regex The regular expression + * @param flags The match flags, a bit mask + * + * @exception PatternSyntaxException If the expression's syntax is invalid + * @exception IllegalArgumentException If bit values other than those + * corresponding to the defined match flags are set in flags + */ + public static Pattern compile (String regex, int flags) + throws PatternSyntaxException + { + // FIXME: check which flags are really accepted + if ((flags & ~0xEF) != 0) + throw new IllegalArgumentException (); + + return new Pattern (regex, flags); + } + + public int flags () + { + return this.flags; + } + + /** + * @param regex The regular expression + * @param input The character sequence to be matched + * + * @exception PatternSyntaxException If the expression's syntax is invalid + */ + public static boolean matches (String regex, CharSequence input) + { + return compile(regex).matcher(input).matches(); + } + + /** + * @param input The character sequence to be matched + */ + public Matcher matcher (CharSequence input) + { + return new Matcher(this, input); + } + + /** + * @param input The character sequence to be matched + */ + public String[] split (CharSequence input) + { + return split(input, 0); + } + + /** + * @param input The character sequence to be matched + * @param limit The result threshold + */ + public String[] split (CharSequence input, int limit) + { + Matcher matcher = new Matcher(this, input); + ArrayList list = new ArrayList(); + int empties = 0; + int count = 0; + int start = 0; + int end; + boolean matched = matcher.find(); + + while (matched && (limit <= 0 || count < limit - 1)) + { + ++count; + end = matcher.start(); + if (start == end) + empties++; + else + { + while (empties > 0) + { + list.add(""); + empties--; + } + + String text = input.subSequence(start, end).toString(); + list.add(text); + } + start = matcher.end(); + matched = matcher.find(); + } + + // We matched nothing. + if (!matched && count == 0) + return new String[] { input.toString() }; + + // Is the last token empty? + boolean emptyLast = (start == input.length()); + + // Can/Must we add empties or an extra last token at the end? + if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast)) + { + if (limit > list.size()) + { + int max = limit - list.size(); + empties = (empties > max) ? max : empties; + } + while (empties > 0) + { + list.add(""); + empties--; + } + } + + // last token at end + if (limit != 0 || (limit == 0 && !emptyLast)) + { + String t = input.subSequence(start, input.length()).toString(); + if ("".equals(t) && limit == 0) + ; // Don't add. + else + list.add(t); + } + + String[] output = new String [list.size()]; + list.toArray(output); + return output; + } + + public String pattern () + { + return regex; + } +} diff --git a/libjava/classpath/java/util/regex/PatternSyntaxException.java b/libjava/classpath/java/util/regex/PatternSyntaxException.java new file mode 100644 index 0000000..0c80e11 --- /dev/null +++ b/libjava/classpath/java/util/regex/PatternSyntaxException.java @@ -0,0 +1,132 @@ +/* PatternSyntaxException - Indicates illegal pattern for regular expression. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package java.util.regex; + +/** + * Indicates illegal pattern for regular expression. + * Includes state to inspect the pattern and what and where the expression + * was not valid regular expression. + */ +public class PatternSyntaxException extends IllegalArgumentException +{ + private static final long serialVersionUID = -3864639126226059218L; + + /** + * Human readable escription of the syntax error. + */ + private final String desc; + + /** + * The original pattern that contained the syntax error. + */ + private final String pattern; + + /** + * Index of the first character in the String that was probably invalid, + * or -1 when unknown. + */ + private final int index; + + /** + * Creates a new PatternSyntaxException. + * + * @param description Human readable escription of the syntax error. + * @param pattern The original pattern that contained the syntax error. + * @param index Index of the first character in the String that was + * probably invalid, or -1 when unknown. + */ + public PatternSyntaxException(String description, + String pattern, + int index) + { + super(description); + this.desc = description; + this.pattern = pattern; + this.index = index; + } + + /** + * Returns a human readable escription of the syntax error. + */ + public String getDescription() + { + return desc; + } + + /** + * Returns the original pattern that contained the syntax error. + */ + public String getPattern() + { + return pattern; + } + + /** + * Returns the index of the first character in the String that was probably + * invalid, or -1 when unknown. + */ + public int getIndex() + { + return index; + } + + /** + * Returns a string containing a line with the description, a line with + * the original pattern and a line indicating with a ^ which character is + * probably the first invalid character in the pattern if the index is not + * negative. + */ + public String getMessage() + { + String lineSep = System.getProperty("line.separator"); + StringBuffer sb = new StringBuffer(desc); + sb.append(lineSep); + sb.append('\t'); + sb.append(pattern); + if (index != -1) + { + sb.append(lineSep); + sb.append('\t'); + for (int i=0; i<index; i++) + sb.append(' '); + sb.append('^'); + } + return sb.toString(); + } + +} diff --git a/libjava/classpath/java/util/regex/package.html b/libjava/classpath/java/util/regex/package.html new file mode 100644 index 0000000..0573a36 --- /dev/null +++ b/libjava/classpath/java/util/regex/package.html @@ -0,0 +1,46 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN"> +<!-- package.html - describes classes in java.util.regex package. + Copyright (C) 2002 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. --> + +<html> +<head><title>GNU Classpath - java.util.regex</title></head> + +<body> +<p>Regular expression patterns and matchers.</p> + +</body> +</html> |