From f911ba985aa7fe0096c386c5be385ac5825ea527 Mon Sep 17 00:00:00 2001 From: Tom Tromey Date: Sat, 16 Jul 2005 00:30:23 +0000 Subject: Initial revision From-SVN: r102074 --- libjava/classpath/java/io/StreamTokenizer.java | 708 +++++++++++++++++++++++++ 1 file changed, 708 insertions(+) create mode 100644 libjava/classpath/java/io/StreamTokenizer.java (limited to 'libjava/classpath/java/io/StreamTokenizer.java') diff --git a/libjava/classpath/java/io/StreamTokenizer.java b/libjava/classpath/java/io/StreamTokenizer.java new file mode 100644 index 0000000..bd7773b --- /dev/null +++ b/libjava/classpath/java/io/StreamTokenizer.java @@ -0,0 +1,708 @@ +/* StreamTokenizer.java -- parses streams of characters into tokens + Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package java.io; + +/** + * This class parses streams of characters into tokens. There are a + * million-zillion flags that can be set to control the parsing, as + * described under the various method headings. + * + * @author Warren Levy (warrenl@cygnus.com) + * @date October 25, 1998. + */ +/* Written using "Java Class Libraries", 2nd edition, ISBN 0-201-31002-3 + * "The Java Language Specification", ISBN 0-201-63451-1 + * plus online API docs for JDK 1.2 beta from http://www.javasoft.com. + * Status: Believed complete and correct. + */ + +public class StreamTokenizer +{ + /** A constant indicating that the end of the stream has been read. */ + public static final int TT_EOF = -1; + + /** A constant indicating that the end of the line has been read. */ + public static final int TT_EOL = '\n'; + + /** A constant indicating that a number token has been read. */ + public static final int TT_NUMBER = -2; + + /** A constant indicating that a word token has been read. */ + public static final int TT_WORD = -3; + + /** A constant indicating that no tokens have been read yet. */ + private static final int TT_NONE = -4; + + /** + * Contains the type of the token read resulting from a call to nextToken + * The rules are as follows: + * + */ + public int ttype = TT_NONE; + + /** The String associated with word and string tokens. */ + public String sval; + + /** The numeric value associated with number tokens. */ + public double nval; + + /* Indicates whether end-of-line is recognized as a token. */ + private boolean eolSignificant = false; + + /* Indicates whether word tokens are automatically made lower case. */ + private boolean lowerCase = false; + + /* Indicates whether C++ style comments are recognized and skipped. */ + private boolean slashSlash = false; + + /* Indicates whether C style comments are recognized and skipped. */ + private boolean slashStar = false; + + /* Attribute tables of each byte from 0x00 to 0xFF. */ + private boolean[] whitespace = new boolean[256]; + private boolean[] alphabetic = new boolean[256]; + private boolean[] numeric = new boolean[256]; + private boolean[] quote = new boolean[256]; + private boolean[] comment = new boolean[256]; + + /* The Reader associated with this class. */ + private PushbackReader in; + + /* Indicates if a token has been pushed back. */ + private boolean pushedBack = false; + + /* Contains the current line number of the reader. */ + private int lineNumber = 1; + + /** + * This method reads bytes from an InputStream and tokenizes + * them. For details on how this method operates by default, see + * StreamTokenizer(Reader). + * + * @param is The InputStream to read from + * + * @deprecated Since JDK 1.1. + */ + public StreamTokenizer(InputStream is) + { + this(new InputStreamReader(is)); + } + + /** + * This method initializes a new StreamTokenizer to read + * characters from a Reader and parse them. The char values + * have their hight bits masked so that the value is treated a character + * in the range of 0x0000 to 0x00FF. + *

+ * This constructor sets up the parsing table to parse the stream in the + * following manner: + *

+ * + * @param r The Reader to read chars from + */ + public StreamTokenizer(Reader r) + { + in = new PushbackReader(r); + + whitespaceChars(0x00, 0x20); + wordChars('A', 'Z'); + wordChars('a', 'z'); + wordChars(0xA0, 0xFF); + commentChar('/'); + quoteChar('\''); + quoteChar('"'); + parseNumbers(); + } + + /** + * This method sets the comment attribute on the specified + * character. Other attributes for the character are cleared. + * + * @param ch The character to set the comment attribute for, passed as an int + */ + public void commentChar(int ch) + { + if (ch >= 0 && ch <= 255) + { + comment[ch] = true; + whitespace[ch] = false; + alphabetic[ch] = false; + numeric[ch] = false; + quote[ch] = false; + } + } + + /** + * This method sets a flag that indicates whether or not the end of line + * sequence terminates and is a token. The defaults to false + * + * @param flag true if EOF is significant, false + * otherwise + */ + public void eolIsSignificant(boolean flag) + { + eolSignificant = flag; + } + + /** + * This method returns the current line number. Note that if the + * pushBack() method is called, it has no effect on the + * line number returned by this method. + * + * @return The current line number + */ + public int lineno() + { + return lineNumber; + } + + /** + * This method sets a flag that indicates whether or not alphabetic + * tokens that are returned should be converted to lower case. + * + * @param flag true to convert to lower case, + * false otherwise + */ + public void lowerCaseMode(boolean flag) + { + lowerCase = flag; + } + + private boolean isWhitespace(int ch) + { + return (ch >= 0 && ch <= 255 && whitespace[ch]); + } + + private boolean isAlphabetic(int ch) + { + return ((ch > 255) || (ch >= 0 && alphabetic[ch])); + } + + private boolean isNumeric(int ch) + { + return (ch >= 0 && ch <= 255 && numeric[ch]); + } + + private boolean isQuote(int ch) + { + return (ch >= 0 && ch <= 255 && quote[ch]); + } + + private boolean isComment(int ch) + { + return (ch >= 0 && ch <= 255 && comment[ch]); + } + + /** + * This method reads the next token from the stream. It sets the + * ttype variable to the appropriate token type and + * returns it. It also can set sval or nval + * as described below. The parsing strategy is as follows: + * + * + * @return The token type + * @exception IOException If an I/O error occurs + */ + public int nextToken() throws IOException + { + if (pushedBack) + { + pushedBack = false; + if (ttype != TT_NONE) + return ttype; + } + + sval = null; + int ch; + + // Skip whitespace. Deal with EOL along the way. + while (isWhitespace(ch = in.read())) + if (ch == '\n' || ch == '\r') + { + lineNumber++; + + // Throw away \n if in combination with \r. + if (ch == '\r' && (ch = in.read()) != '\n') + { + if (ch != TT_EOF) + in.unread(ch); + } + if (eolSignificant) + return (ttype = TT_EOL); + } + + if (ch == '/') + if ((ch = in.read()) == '/' && slashSlash) + { + while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF) + ; + if (ch != TT_EOF) + in.unread(ch); + return nextToken(); // Recursive, but not too deep in normal cases + } + else if (ch == '*' && slashStar) + { + while (true) + { + ch = in.read(); + if (ch == '*') + { + if ((ch = in.read()) == '/') + break; + else if (ch != TT_EOF) + in.unread(ch); + } + else if (ch == '\n' || ch == '\r') + { + lineNumber++; + if (ch == '\r' && (ch = in.read()) != '\n') + { + if (ch != TT_EOF) + in.unread(ch); + } + } + else if (ch == TT_EOF) + { + break; + } + } + return nextToken(); // Recursive, but not too deep in normal cases + } + else + { + if (ch != TT_EOF) + in.unread(ch); + ch = '/'; + } + + if (ch == TT_EOF) + ttype = TT_EOF; + else if (isNumeric(ch)) + { + boolean isNegative = false; + if (ch == '-') + { + // Read ahead to see if this is an ordinary '-' rather than numeric. + ch = in.read(); + if (isNumeric(ch) && ch != '-') + { + isNegative = true; + } + else + { + if (ch != TT_EOF) + in.unread(ch); + return (ttype = '-'); + } + } + + StringBuffer tokbuf = new StringBuffer(); + tokbuf.append((char) ch); + + int decCount = 0; + while (isNumeric(ch = in.read()) && ch != '-') + if (ch == '.' && decCount++ > 0) + break; + else + tokbuf.append((char) ch); + + if (ch != TT_EOF) + in.unread(ch); + ttype = TT_NUMBER; + try + { + nval = Double.valueOf(tokbuf.toString()).doubleValue(); + } + catch (NumberFormatException _) + { + nval = 0.0; + } + if (isNegative) + nval = -nval; + } + else if (isAlphabetic(ch)) + { + StringBuffer tokbuf = new StringBuffer(); + tokbuf.append((char) ch); + while (isAlphabetic(ch = in.read()) || isNumeric(ch)) + tokbuf.append((char) ch); + if (ch != TT_EOF) + in.unread(ch); + ttype = TT_WORD; + sval = tokbuf.toString(); + if (lowerCase) + sval = sval.toLowerCase(); + } + else if (isComment(ch)) + { + while ((ch = in.read()) != '\n' && ch != '\r' && ch != TT_EOF) + ; + if (ch != TT_EOF) + in.unread(ch); + return nextToken(); // Recursive, but not too deep in normal cases. + } + else if (isQuote(ch)) + { + ttype = ch; + StringBuffer tokbuf = new StringBuffer(); + while ((ch = in.read()) != ttype && ch != '\n' && ch != '\r' && + ch != TT_EOF) + { + if (ch == '\\') + switch (ch = in.read()) + { + case 'a': ch = 0x7; + break; + case 'b': ch = '\b'; + break; + case 'f': ch = 0xC; + break; + case 'n': ch = '\n'; + break; + case 'r': ch = '\r'; + break; + case 't': ch = '\t'; + break; + case 'v': ch = 0xB; + break; + case '\n': ch = '\n'; + break; + case '\r': ch = '\r'; + break; + case '\"': + case '\'': + case '\\': + break; + default: + int ch1, nextch; + if ((nextch = ch1 = ch) >= '0' && ch <= '7') + { + ch -= '0'; + if ((nextch = in.read()) >= '0' && nextch <= '7') + { + ch = ch * 8 + nextch - '0'; + if ((nextch = in.read()) >= '0' && nextch <= '7' && + ch1 >= '0' && ch1 <= '3') + { + ch = ch * 8 + nextch - '0'; + nextch = in.read(); + } + } + } + + if (nextch != TT_EOF) + in.unread(nextch); + } + + tokbuf.append((char) ch); + } + + // Throw away matching quote char. + if (ch != ttype && ch != TT_EOF) + in.unread(ch); + + sval = tokbuf.toString(); + } + else + { + ttype = ch; + } + + return ttype; + } + + private void resetChar(int ch) + { + whitespace[ch] = alphabetic[ch] = numeric[ch] = quote[ch] = comment[ch] = + false; + } + + /** + * This method makes the specified character an ordinary character. This + * means that none of the attributes (whitespace, alphabetic, numeric, + * quote, or comment) will be set on this character. This character will + * parse as its own token. + * + * @param ch The character to make ordinary, passed as an int + */ + public void ordinaryChar(int ch) + { + if (ch >= 0 && ch <= 255) + resetChar(ch); + } + + /** + * This method makes all the characters in the specified range, range + * terminators included, ordinary. This means the none of the attributes + * (whitespace, alphabetic, numeric, quote, or comment) will be set on + * any of the characters in the range. This makes each character in this + * range parse as its own token. + * + * @param low The low end of the range of values to set the whitespace + * attribute for + * @param hi The high end of the range of values to set the whitespace + * attribute for + */ + public void ordinaryChars(int low, int hi) + { + if (low < 0) + low = 0; + if (hi > 255) + hi = 255; + for (int i = low; i <= hi; i++) + resetChar(i); + } + + /** + * This method sets the numeric attribute on the characters '0' - '9' and + * the characters '.' and '-'. + */ + public void parseNumbers() + { + for (int i = 0; i <= 9; i++) + numeric['0' + i] = true; + + numeric['.'] = true; + numeric['-'] = true; + } + + /** + * Puts the current token back into the StreamTokenizer so + * nextToken will return the same value on the next call. + * May cause the lineno method to return an incorrect value + * if lineno is called before the next call to nextToken. + */ + public void pushBack() + { + pushedBack = true; + } + + /** + * This method sets the quote attribute on the specified character. + * Other attributes for the character are cleared. + * + * @param ch The character to set the quote attribute for, passed as an int. + */ + public void quoteChar(int ch) + { + if (ch >= 0 && ch <= 255) + { + quote[ch] = true; + comment[ch] = false; + whitespace[ch] = false; + alphabetic[ch] = false; + numeric[ch] = false; + } + } + + /** + * This method removes all attributes (whitespace, alphabetic, numeric, + * quote, and comment) from all characters. It is equivalent to calling + * ordinaryChars(0x00, 0xFF). + * + * @see #ordinaryChars(int, int) + */ + public void resetSyntax() + { + ordinaryChars(0x00, 0xFF); + } + + /** + * This method sets a flag that indicates whether or not "C++" language style + * comments ("//" comments through EOL ) are handled by the parser. + * If this is true commented out sequences are skipped and + * ignored by the parser. This defaults to false. + * + * @param flag true to recognized and handle "C++" style + * comments, false otherwise + */ + public void slashSlashComments(boolean flag) + { + slashSlash = flag; + } + + /** + * This method sets a flag that indicates whether or not "C" language style + * comments (with nesting not allowed) are handled by the parser. + * If this is true commented out sequences are skipped and + * ignored by the parser. This defaults to false. + * + * @param flag true to recognized and handle "C" style comments, + * false otherwise + */ + public void slashStarComments(boolean flag) + { + slashStar = flag; + } + + /** + * This method returns the current token value as a String in + * the form "Token[x], line n", where 'n' is the current line numbers and + * 'x' is determined as follows. + *

+ *

+ */ + public String toString() + { + String tempstr; + if (ttype == TT_EOF) + tempstr = "EOF"; + else if (ttype == TT_EOL) + tempstr = "EOL"; + else if (ttype == TT_WORD) + tempstr = sval; + else if (ttype == TT_NUMBER) + tempstr = "n=" + nval; + else if (ttype == TT_NONE) + tempstr = "NOTHING"; + else // must be an ordinary char. + tempstr = "\'" + (char) ttype + "\'"; + + return "Token[" + tempstr + "], line " + lineno(); + } + + /** + * This method sets the whitespace attribute for all characters in the + * specified range, range terminators included. + * + * @param low The low end of the range of values to set the whitespace + * attribute for + * @param hi The high end of the range of values to set the whitespace + * attribute for + */ + public void whitespaceChars(int low, int hi) + { + if (low < 0) + low = 0; + if (hi > 255) + hi = 255; + for (int i = low; i <= hi; i++) + { + resetChar(i); + whitespace[i] = true; + } + } + + /** + * This method sets the alphabetic attribute for all characters in the + * specified range, range terminators included. + * + * @param low The low end of the range of values to set the alphabetic + * attribute for + * @param hi The high end of the range of values to set the alphabetic + * attribute for + */ + public void wordChars(int low, int hi) + { + if (low < 0) + low = 0; + if (hi > 255) + hi = 255; + for (int i = low; i <= hi; i++) + alphabetic[i] = true; + } +} -- cgit v1.1