From 05ae1c31cf55c746a49ff06bab5470c466e0b13d Mon Sep 17 00:00:00 2001 From: Oskar Liljeblad Date: Sun, 23 Jul 2000 19:52:13 +0200 Subject: StreamTokenizer.java: Merged with classpath. 2000-07-23 Oskar Liljeblad * java/io/StreamTokenizer.java: Merged with classpath. From-SVN: r35208 --- libjava/ChangeLog | 4 + libjava/java/io/StreamTokenizer.java | 298 +++++++++++++++++++++++++++++------ 2 files changed, 253 insertions(+), 49 deletions(-) diff --git a/libjava/ChangeLog b/libjava/ChangeLog index 7f26446..655081f 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,7 @@ +2000-07-23 Oskar Liljeblad + + * java/io/StreamTokenizer.java: Merged with classpath. + 2000-07-20 Tom Tromey * Makefile.in: Rebuilt. diff --git a/libjava/java/io/StreamTokenizer.java b/libjava/java/io/StreamTokenizer.java index e763cbe..f0099d4 100644 --- a/libjava/java/io/StreamTokenizer.java +++ b/libjava/java/io/StreamTokenizer.java @@ -9,6 +9,10 @@ details. */ package java.io; /** + * This class parses streams of characters into tokens. There are a + * million-zillion flags that can be set to control the parsing, as + * described under the various method headings. + * * @author Warren Levy * @date October 25, 1998. */ @@ -20,25 +24,40 @@ package java.io; public class StreamTokenizer { - /* A constant indicating that the end of the stream has been read. */ + /** A constant indicating that the end of the stream has been read. */ public static final int TT_EOF = -1; - /* A constant indicating that the end of the line has been read. */ + /** A constant indicating that the end of the line has been read. */ public static final int TT_EOL = '\n'; - /* A constant indicating that a number token has been read. */ + /** A constant indicating that a number token has been read. */ public static final int TT_NUMBER = -2; - /* A constant indicating that a word token has been read. */ + /** A constant indicating that a word token has been read. */ public static final int TT_WORD = -3; - /* Contains the type of the token read resulting from a call to nextToken. */ - public int ttype; - - /* The String associated with word and string tokens. */ + /** A constant indicating that no tokens have been read yet. */ + public static final int TT_NONE = -4; + + /** + * Contains the type of the token read resulting from a call to nextToken + * The rules are as follows: + *
    + *
  • For a token consisting of a single ordinary character, this is the + * value of that character. + *
  • For a quoted string, this is the value of the quote character + *
  • For a word, this is TT_WORD + *
  • For a number, this is TT_NUMBER + *
  • For the end of the line, this is TT_EOL + *
  • For the end of the stream, this is TT_EOF + *
+ */ + public int ttype = TT_NONE; + + /** The String associated with word and string tokens. */ public String sval; - /* The numeric value associated with number tokens. */ + /** The numeric value associated with number tokens. */ public double nval; /* Indicates whether end-of-line is recognized as a token. */ @@ -54,11 +73,11 @@ public class StreamTokenizer private boolean slashStar = false; /* Attribute tables of each byte from 0x00 to 0xFF. */ - private boolean[] whitespace; - private boolean[] alphabetic; - private boolean[] numeric; - private boolean[] quote; - private boolean[] comment; + private boolean[] whitespace = new boolean[256]; + private boolean[] alphabetic = new boolean[256]; + private boolean[] numeric = new boolean[256]; + private boolean[] quote = new boolean[256]; + private boolean[] comment = new boolean[256]; /* The Reader associated with this class. */ private PushbackReader in; @@ -69,24 +88,45 @@ public class StreamTokenizer /* Contains the current line number of the reader. */ private int lineNumber = 1; - // Deprecated in JDK 1.1. + /** + * This method reads bytes from an InputStream and tokenizes + * them. For details on how this method operates by default, see + * StreamTokenizer(Reader). + * + * @param in The InputStream to read from + * + * @deprecated Since JDK 1.1. + */ public StreamTokenizer(InputStream is) { this(new InputStreamReader(is)); } + /** + * This method initializes a new StreamTokenizer to read + * characters from a Reader and parse them. The char values + * have their hight bits masked so that the value is treated a character + * in the range of 0x0000 to 0x00FF. + *

+ * This constructor sets up the parsing table to parse the stream in the + * following manner: + *

    + *
  • The values 'A' through 'Z', 'a' through 'z' and 0xA0 through 0xFF + * are initialized as alphabetic + *
  • The values 0x00 through 0x20 are initialized as whitespace + *
  • The values '\'' and '"' are initialized as quote characters + *
  • '/' is a comment character + *
  • Numbers will be parsed + *
  • EOL is not treated as significant + *
  • C and C++ (//) comments are not recognized + *
+ * + * @param in The Reader to read chars from + */ public StreamTokenizer(Reader r) { in = new PushbackReader(r); - whitespace = new boolean[256]; - alphabetic = new boolean[256]; - numeric = new boolean[256]; - quote = new boolean[256]; - comment = new boolean[256]; - for (int i = 0; i < 256; i++) - resetChar(i); - whitespaceChars(0x00, 0x20); wordChars('A', 'Z'); wordChars('a', 'z'); @@ -97,22 +137,48 @@ public class StreamTokenizer parseNumbers(); } + /** + * This method sets the comment attribute on the specified character. + * + * @param c The character to set the comment attribute for, passed as an int + */ public void commentChar(int ch) { if (ch >= 0 && ch <= 255) comment[ch] = true; } + /** + * This method sets a flag that indicates whether or not the end of line + * sequence terminates and is a token. The defaults to false + * + * @param flag true if EOF is significant, false + * otherwise + */ public void eolIsSignificant(boolean flag) { eolSignificant = flag; } + /** + * This method returns the current line number. Note that if the + * pushBack() method is called, it has no effect on the + * line number returned by this method. + * + * @return The current line number + */ public int lineno() { return lineNumber; } + /** + * This method sets a flag that indicates whether or not alphabetic + * tokens that are returned should be converted to lower case. + * + * @param flag true to convert to lower case, + * false otherwise + */ public void lowerCaseMode(boolean flag) { lowerCase = flag; @@ -120,52 +186,88 @@ public class StreamTokenizer private boolean isWhitespace(int ch) { - if (ch >= 0 && ch <= 255) - return whitespace[ch]; - - return false; + return (ch >= 0 && ch <= 255 && whitespace[ch]); } private boolean isAlphabetic(int ch) { - if (ch >= 0 && ch <= 255) - return alphabetic[ch]; - else if (ch > 255) - return true; - - return false; + return ((ch > 255) || (ch >= 0 && alphabetic[ch])); } private boolean isNumeric(int ch) { - if (ch >= 0 && ch <= 255) - return numeric[ch]; - - return false; + return (ch >= 0 && ch <= 255 && numeric[ch]); } private boolean isQuote(int ch) { - if (ch >= 0 && ch <= 255) - return quote[ch]; - - return false; + return (ch >= 0 && ch <= 255 && quote[ch]); } private boolean isComment(int ch) { - if (ch >= 0 && ch <= 255) - return comment[ch]; - - return false; + return (ch >= 0 && ch <= 255 && comment[ch]); } + /** + * This method reads the next token from the stream. It sets the + * ttype variable to the appropriate token type and + * returns it. It also can set sval or nval + * as described below. The parsing strategy is as follows: + *
    + *
  • Skip any whitespace characters. + *
  • If a numeric character is encountered, attempt to parse a numeric + * value. Leading '-' characters indicate a numeric only if followed by + * another non-'-' numeric. The value of the numeric token is terminated + * by either the first non-numeric encountered, or the second occurrence of + * '-' or '.'. The token type returned is TT_NUMBER and nval + * is set to the value parsed. + *
  • If an alphabetic character is parsed, all subsequent characters + * are read until the first non-alphabetic or non-numeric character is + * encountered. The token type returned is TT_WORD and the value parsed + * is stored in sval. If lower case mode is set, the token + * stored in sval is converted to lower case. The end of line + * sequence terminates a word only if EOL signficance has been turned on. + * The start of a comment also terminates a word. Any character with a + * non-alphabetic and non-numeric attribute (such as white space, a quote, + * or a commet) are treated as non-alphabetic and terminate the word. + *
  • If a comment charcters is parsed, then all remaining characters on + * the current line are skipped and another token is parsed. Any EOL or + * EOF's encountered are not discarded, but rather terminate the comment. + *
  • If a quote character is parsed, then all characters up to the + * second occurrence of the same quote character are parsed into a + * String. This String is stored as + * sval, but is not converted to lower case, even if lower case + * mode is enabled. The token type returned is the value of the quote + * character encountered. Any escape sequences + * (\b (backspace), \t (HTAB), \n (linefeed), \f (form feed), \r + * (carriage return), \" (double quote), \' (single quote), \\ + * (backslash), \XXX (octal esacpe)) are converted to the appropriate + * char values. Invalid esacape sequences are left in untranslated. + * Unicode characters like ('\ u0000') are not recognized. + *
  • If the C++ comment sequence "//" is encountered, and the parser + * is configured to handle that sequence, then the remainder of the line + * is skipped and another token is read exactly as if a character with + * the comment attribute was encountered. + *
  • If the C comment sequence "/*" is encountered, and the parser + * is configured to handle that sequence, then all characters up to and + * including the comment terminator sequence are discarded and another + * token is parsed. + *
  • If all cases above are not met, then the character is an ordinary + * character that is parsed as a token by itself. The char encountered + * is returned as the token type. + *
+ * + * @return The token type + * @exception IOException If an I/O error occurs + */ public int nextToken() throws IOException { if (pushedBack) { pushedBack = false; - return ttype; + if (ttype != TT_NONE) + return ttype; } sval = null; @@ -355,12 +457,32 @@ public class StreamTokenizer false; } + /** + * This method makes the specified character an ordinary character. This + * means that none of the attributes (whitespace, alphabetic, numeric, + * quote, or comment) will be set on this character. This character will + * parse as its own token. + * + * @param c The charcter to make ordinary, passed as an int + */ public void ordinaryChar(int ch) { if (ch >= 0 && ch <= 255) resetChar(ch); } + /** + * This method makes all the characters in the specified range, range + * terminators included, ordinary. This means the none of the attributes + * (whitespace, alphabetic, numeric, quote, or comment) will be set on + * any of the characters in the range. This makes each character in this + * range parse as its own token. + * + * @param low The low end of the range of values to set the whitespace + * attribute for + * @param high The high end of the range of values to set the whitespace + * attribute for + */ public void ordinaryChars(int low, int hi) { if (low < 0) @@ -371,6 +493,10 @@ public class StreamTokenizer resetChar(i); } + /** + * This method sets the numeric attribute on the characters '0' - '9' and + * the characters '.' and '-'. + */ public void parseNumbers() { for (int i = 0; i <= 9; i++) @@ -380,6 +506,13 @@ public class StreamTokenizer numeric['-'] = true; } + /** + * This method returns the current line number. Note that if the + * pushBack() method is called, it has no effect on the + * line number returned by this method. + * + * @return The current line number + */ public void pushBack() { // pushBack may cause the lineno method to return an incorrect value @@ -387,27 +520,74 @@ public class StreamTokenizer pushedBack = true; } + /** + * This method sets the quote attribute on the specified character. + * + * @param c The character to set the quote attribute for, passed as an int. + */ public void quoteChar(int ch) { if (ch >= 0 && ch <= 255) quote[ch] = true; } + /** + * This method removes all attributes (whitespace, alphabetic, numeric, + * quote, and comment) from all characters. It is equivalent to calling + * ordinaryChars(0x00, 0xFF). + * + * @see ordinaryChars + */ public void resetSyntax() { ordinaryChars(0x00, 0xFF); } + /** + * This method sets a flag that indicates whether or not "C++" language style + * comments ("//" comments through EOL ) are handled by the parser. + * If this is true commented out sequences are skipped and + * ignored by the parser. This defaults to false. + * + * @param flag true to recognized and handle "C++" style + * comments, false otherwise + */ public void slashSlashComments(boolean flag) { slashSlash = flag; } + /** + * This method sets a flag that indicates whether or not "C" language style + * comments (with nesting not allowed) are handled by the parser. + * If this is true commented out sequences are skipped and + * ignored by the parser. This defaults to false. + * + * @param flag true to recognized and handle "C" style comments, + * false otherwise + */ public void slashStarComments(boolean flag) { slashStar = flag; } + /** + * This method returns the current token value as a String in + * the form "Token[x], line n", where 'n' is the current line numbers and + * 'x' is determined as follows. + *

+ *

    + *
  • If no token has been read, then 'x' is "NOTHING" and 'n' is 0 + *
  • If ttype is TT_EOF, then 'x' is "EOF" + *
  • If ttype is TT_EOL, then 'x' is "EOL" + *
  • If ttype is TT_WORD, then 'x' is sval + *
  • If ttype is TT_NUMBER, then 'x' is "n=strnval" where + * 'strnval' is String.valueOf(nval). + *
  • If ttype is a quote character, then 'x' is + * sval + *
  • For all other cases, 'x' is ttype + *
+ */ public String toString() { String tempstr; @@ -418,13 +598,24 @@ public class StreamTokenizer else if (ttype == TT_WORD) tempstr = sval; else if (ttype == TT_NUMBER) - tempstr = "n=" + Double.toString(nval); + tempstr = "n=" + nval; + else if (ttype == TT_NONE) + tempstr = "NOTHING"; else // must be an ordinary char. - tempstr = "\'" + (new Character((char) ttype)).toString() + "\'"; + tempstr = "\'" + (char) ttype + "\'"; - return "Token[" + tempstr + "], line " + Integer.toString(lineno()); + return "Token[" + tempstr + "], line " + lineno(); } + /** + * This method sets the whitespace attribute for all charcters in the + * specified range, range terminators included. + * + * @param low The low end of the range of values to set the whitespace + * attribute for + * @param high The high end of the range of values to set the whitespace + * attribute for + */ public void whitespaceChars(int low, int hi) { if (low < 0) @@ -435,6 +626,15 @@ public class StreamTokenizer whitespace[i] = true; } + /** + * This method sets the alphabetic attribute for all charcters in the + * specified range, range terminators included. + * + * @param low The low end of the range of values to set the alphabetic + * attribute for + * @param high The high end of the range of values to set the alphabetic + * attribute for + */ public void wordChars(int low, int hi) { if (low < 0) -- cgit v1.1