/* Language lexer for the GNU compiler for the Java(TM) language. Copyright (C) 1997, 1998 Free Software Foundation, Inc. Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com) This file is part of GNU CC. GNU CC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU CC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Java and all Java-based marks are trademarks or registered trademarks of Sun Microsystems, Inc. in the United States and other countries. The Free Software Foundation is independent of Sun Microsystems, Inc. */ /* It defines java_lex (yylex) that reads a Java ASCII source file possibly containing Unicode escape sequence or utf8 encoded characters and returns a token for everything found but comments, white spaces and line terminators. When necessary, it also fills the java_lval (yylval) union. It's implemented to be called by a re-entrant parser generated by Bison. The lexical analysis conforms to the Java grammar described in "The Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele. Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */ #include #include #include #ifdef JAVA_LEX_DEBUG #include #endif #ifdef inline /* javaop.h redefines inline as static */ #undef inline #endif #include "keyword.h" #ifndef SEEK_SET #include #endif void java_init_lex () { int java_lang_imported = 0; #ifndef JC1_LITE if (!java_lang_imported) { tree node = build_tree_list (build_expr_wfl (get_identifier ("java.lang"), NULL, 0, 0), NULL_TREE); read_import_dir (TREE_PURPOSE (node)); TREE_CHAIN (node) = ctxp->import_demand_list; ctxp->import_demand_list = node; java_lang_imported = 1; } if (!wfl_operator) wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0); if (!label_id) label_id = get_identifier ("$L"); ctxp->static_initialized = ctxp->non_static_initialized = ctxp->incomplete_class = NULL_TREE; bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0])); classpath = NULL; bzero (current_jcf, sizeof (JCF)); ctxp->current_parsed_class = NULL; ctxp->package = NULL_TREE; #endif ctxp->filename = input_filename; ctxp->lineno = lineno = 0; ctxp->p_line = NULL; ctxp->c_line = NULL; ctxp->unget_utf8_value = 0; ctxp->minus_seen = 0; ctxp->java_error_flag = 0; } static char * java_sprint_unicode (line, i) struct java_line *line; int i; { static char buffer [10]; if (line->unicode_escape_p [i] || line->line [i] > 128) sprintf (buffer, "\\u%04x", line->line [i]); else { buffer [0] = line->line [i]; buffer [1] = '\0'; } return buffer; } static unicode_t java_sneak_unicode () { return (ctxp->c_line->line [ctxp->c_line->current]); } static void java_unget_unicode (c) unicode_t c; { if (!ctxp->c_line->current) fatal ("can't unget unicode - java_unget_unicode"); ctxp->c_line->current--; ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0); } void java_allocate_new_line () { int i; unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0'); char ahead_escape_p = (ctxp->c_line ? ctxp->c_line->unicode_escape_ahead_p : 0); if (ctxp->c_line && !ctxp->c_line->white_space_only) { if (ctxp->p_line) { free (ctxp->p_line->unicode_escape_p); free (ctxp->p_line->line); free (ctxp->p_line); } ctxp->p_line = ctxp->c_line; ctxp->c_line = NULL; /* Reallocated */ } if (!ctxp->c_line) { ctxp->c_line = (struct java_line *)malloc (sizeof (struct java_line)); ctxp->c_line->max = JAVA_LINE_MAX; ctxp->c_line->line = (unicode_t *)malloc (sizeof (unicode_t)*ctxp->c_line->max); ctxp->c_line->unicode_escape_p = (char *)malloc (sizeof (char)*ctxp->c_line->max); ctxp->c_line->white_space_only = 0; } ctxp->c_line->line [0] = ctxp->c_line->size = 0; ctxp->c_line->char_col = ctxp->c_line->current = 0; if (ahead) { ctxp->c_line->line [ctxp->c_line->size] = ahead; ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p; ctxp->c_line->size++; } ctxp->c_line->ahead [0] = 0; ctxp->c_line->unicode_escape_ahead_p = 0; ctxp->c_line->lineno = ++lineno; ctxp->c_line->white_space_only = 1; } static unicode_t java_read_char () { int c; int c1, c2; if (ctxp->unget_utf8_value) { int to_return = ctxp->unget_utf8_value; ctxp->unget_utf8_value = 0; return (to_return); } c = GETC (); if (c < 128) return (unicode_t)c; if (c == EOF) return UEOF; else { if (c & 0xe0 == 0xc0) { c1 = GETC (); if (c1 & 0xc0 == 0x80) return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); } else if (c & 0xf0 == 0xe0) { c1 = GETC (); if (c1 & 0xc0 == 0x80) { c2 = GETC (); if (c2 & 0xc0 == 0x80) return (unicode_t)(((c & 0xf) << 12) + (( c1 & 0x3f) << 6) + (c2 & 0x3f)); } } java_lex_error ("Bad utf8 encoding", 0); } } static void java_store_unicode (l, c, unicode_escape_p) struct java_line *l; unicode_t c; int unicode_escape_p; { if (l->size == l->max) { l->max += JAVA_LINE_MAX; l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max); l->unicode_escape_p = (char *)realloc (l->unicode_escape_p, sizeof (char)*l->max); } l->line [l->size] = c; l->unicode_escape_p [l->size++] = unicode_escape_p; } static unicode_t java_read_unicode (term_context, unicode_escape_p) int term_context; int *unicode_escape_p; { unicode_t c; long i, base; c = java_read_char (); *unicode_escape_p = 0; if (c != '\\') return ((term_context ? c : java_lineterminator (c) ? '\n' : (unicode_t)c)); /* Count the number of preceeding '\' */ for (base = ftell (finput), i = base-2; c == '\\';) { fseek (finput, i--, SEEK_SET); c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */ } fseek (finput, base, SEEK_SET); if ((base-i-3)%2 == 0) /* If odd number of \ seen */ { c = java_read_char (); if (c == 'u') { unsigned short unicode = 0; int shift = 12; /* Next should be 4 hex digits, otherwise it's an error. The hex value is converted into the unicode, pushed into the Unicode stream. */ for (shift = 12; shift >= 0; shift -= 4) { if ((c = java_read_char ()) == UEOF) return UEOF; if (c >= '0' && c <= '9') unicode |= (unicode_t)((c-'0') << shift); else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) unicode |= (unicode_t)(10+(c | 0x20)-'a' << shift); else java_lex_error ("Non hex digit in Unicode escape sequence", 0); } *unicode_escape_p = 1; return (term_context ? unicode : (java_lineterminator (c) ? '\n' : unicode)); } UNGETC (c); } return (unicode_t)'\\'; } static unicode_t java_get_unicode () { /* It's time to read a line when... */ if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size) { unicode_t c; java_allocate_new_line (); if (ctxp->c_line->line[0] != '\n') for (;;) { int unicode_escape_p; c = java_read_unicode (0, &unicode_escape_p); java_store_unicode (ctxp->c_line, c, unicode_escape_p); if (ctxp->c_line->white_space_only && !JAVA_WHITE_SPACE_P (c) && c!='\n') ctxp->c_line->white_space_only = 0; if ((c == '\n') || (c == UEOF)) break; } } ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0); JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]); return ctxp->c_line->line [ctxp->c_line->current++]; } static int java_lineterminator (c) unicode_t c; { int unicode_escape_p; if (c == '\n') /* CR */ { if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r') { ctxp->c_line->ahead [0] = c; ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; } return 1; } else if (c == '\r') /* LF */ { if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n') { ctxp->c_line->ahead [0] = c; ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p; } return 1; } else return 0; } /* Parse the end of a C style comment */ static void java_parse_end_comment () { unicode_t c; for (c = java_get_unicode ();; c = java_get_unicode ()) { switch (c) { case UEOF: java_lex_error ("Comment not terminated at end of input", 0); case '*': switch (c = java_get_unicode ()) { case UEOF: java_lex_error ("Comment not terminated at end of input", 0); case '/': return; case '*': /* reparse only '*' */ java_unget_unicode (c); } } } } /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it will return a wrong result. */ static int java_letter_or_digit_p (c) unicode_t c; { return _JAVA_LETTER_OR_DIGIT_P (c); } static unicode_t java_parse_escape_sequence () { unicode_t char_lit; unicode_t c; switch (c = java_get_unicode ()) { case 'b': return (unicode_t)0x8; case 't': return (unicode_t)0x9; case 'n': return (unicode_t)0xa; case 'f': return (unicode_t)0xc; case 'r': return (unicode_t)0xd; case '"': return (unicode_t)0x22; case '\'': return (unicode_t)0x27; case '\\': return (unicode_t)0x5c; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int octal_escape[3]; int octal_escape_index = 0; for (; octal_escape_index < 3 && RANGE (c, '0', '9'); c = java_get_unicode ()) octal_escape [octal_escape_index++] = c; java_unget_unicode (c); if ((octal_escape_index == 3) && (octal_escape [0] > '3')) { java_lex_error ("Literal octal escape out of range", 0); return JAVA_CHAR_ERROR; } else { int i, shift; for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1); i < octal_escape_index; i++, shift -= 3) char_lit |= (octal_escape [i] - '0') << shift; return (char_lit); } break; } case '\n': return '\n'; /* ULT, caught latter as a specific error */ default: java_lex_error ("Illegal character in escape sequence", 0); return JAVA_CHAR_ERROR; } } int #ifdef JC1_LITE yylex (java_lval) #else java_lex (java_lval) #endif YYSTYPE *java_lval; { unicode_t c, first_unicode; int line_terminator; int ascii_index, all_ascii; char *string; /* Translation of the Unicode escape in the raw stream of Unicode characters. Takes care of line terminator. */ step1: /* Skip white spaces: SP, TAB and FF or ULT */ for (c = java_get_unicode (); c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ()) if (c == '\n') { ctxp->elc.line = ctxp->c_line->lineno; ctxp->elc.col = ctxp->c_line->char_col-2; } ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col); if (c == 0x1a) /* CTRL-Z */ { if ((c = java_get_unicode ()) == UEOF) return 0; /* Ok here */ else java_unget_unicode (c); /* Caught latter at the end the function */ } /* Handle EOF here */ if (c == UEOF) /* Should probably do something here... */ return 0; /* Take care of eventual comments. */ if (c == '/') { switch (c = java_get_unicode ()) { case '/': for (c = java_get_unicode ();;c = java_get_unicode ()) { if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); if (c == '\n') /* ULT */ goto step1; } break; case '*': if ((c = java_get_unicode ()) == '*') { if ((c = java_get_unicode ()) == '/') goto step1; /* Empy documentation comment */ else /* Parsing the documentation section. We're looking for the @depracated pseudo keyword. the @deprecated tag must be at the beginning of a doc comment line (ignoring white space and any * character) */ { int valid_tag = 0, seen_star; while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n') { switch (c) { case '*': seen_star = 1; break; case '\n': /* ULT */ valid_tag = 1; break; default: seen_star = 0; } c = java_get_unicode(); } if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); if (seen_star && (c == '/')) goto step1; /* End of documentation */ if (valid_tag && (c == '@')) { char deprecated [10]; int deprecated_index = 0; for (deprecated_index = 0, c = java_get_unicode (); deprecated_index < 10 && c != UEOF; c = java_get_unicode ()) deprecated [deprecated_index++] = c; if (c == UEOF) java_lex_error ("Comment not terminated at end of input", 0); java_unget_unicode (c); deprecated [deprecated_index] = '\0'; if (!strcmp (deprecated, "deprecated")) { /* Set global flag to be checked by class. FIXME */ warning ("deprecated implementation found"); } } } } else java_unget_unicode (c); java_parse_end_comment (); goto step1; break; default: java_unget_unicode (c); c = '/'; break; } } ctxp->elc.line = ctxp->c_line->lineno; ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1); if (ctxp->elc.col < 0) fatal ("ctxp->elc.col < 0 - java_lex"); /* Numeric literals */ if (JAVA_ASCII_DIGIT (c) || (c == '.')) { unicode_t peep; /* This section of code is borrowed from gcc/c-lex.c */ #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2) int parts[TOTAL_PARTS]; HOST_WIDE_INT high, low; /* End borrowed section */ char literal_token [256]; int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes; int i; int number_beginning = ctxp->c_line->current; /* We might have a . separator instead of a FP like .[0-9]* */ if (c == '.') { unicode_t peep = java_sneak_unicode (); if (!JAVA_ASCII_DIGIT (peep)) { JAVA_LEX_SEP('.'); BUILD_OPERATOR (DOT_TK); } } for (i = 0; i < TOTAL_PARTS; i++) parts [i] = 0; if (c == '0') { c = java_get_unicode (); if (c == 'x' || c == 'X') { radix = 16; c = java_get_unicode (); } else if (JAVA_ASCII_DIGIT (c)) radix = 8; else if (c == '.') { /* Push the '.' back and prepare for a FP parsing... */ java_unget_unicode (c); c = '0'; } else { /* We have a zero literal: 0, 0{f,F}, 0{d,D} */ JAVA_LEX_LIT ("0", 10); switch (c) { case 'L': case 'l': SET_LVAL_NODE_TYPE (integer_zero_node, long_type_node); return (INT_LIT_TK); case 'f': case 'F': SET_LVAL_NODE_TYPE (build_real (float_type_node, dconst0), float_type_node); return (FP_LIT_TK); case 'd': case 'D': SET_LVAL_NODE_TYPE (build_real (double_type_node, dconst0), double_type_node); return (FP_LIT_TK); default: java_unget_unicode (c); SET_LVAL_NODE_TYPE (integer_zero_node, int_type_node); return (INT_LIT_TK); } } } /* Parse the first part of the literal, until we find something which is not a number. */ while ((radix == 10 && JAVA_ASCII_DIGIT (c)) || (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) || (radix == 8 && JAVA_ASCII_OCTDIGIT (c))) { /* We store in a string (in case it turns out to be a FP) and in PARTS if we have to process a integer literal. */ int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a'); int count; literal_token [literal_index++] = c; /* This section of code if borrowed from gcc/c-lex.c */ for (count = 0; count < TOTAL_PARTS; count++) { parts[count] *= radix; if (count) { parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR); parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1; } else parts[0] += numeric; } if (parts [TOTAL_PARTS-1] != 0) overflow = 1; /* End borrowed section. */ c = java_get_unicode (); } /* If we have something from the FP char set but not a digit, parse a FP literal. */ if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c)) { int stage = 0; int seen_digit = (literal_index ? 1 : 0); int seen_exponent = 0; int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are double unless specified. */ if (radix != 10) java_lex_error ("Can't express non-decimal FP literal", 0); for (;;) { if (c == '.') { if (stage < 1) { stage = 1; literal_token [literal_index++ ] = c; c = java_get_unicode (); } else java_lex_error ("Invalid character in FP literal", 0); } if (c == 'e' || c == 'E') { if (stage < 2) { /* {E,e} must have seen at list a digit */ if (!seen_digit) java_lex_error ("Invalid FP literal", 0); seen_digit = 0; seen_exponent = 1; stage = 2; literal_token [literal_index++] = c; c = java_get_unicode (); } else java_lex_error ("Invalid character in FP literal", 0); } if ( c == 'f' || c == 'F' || c == 'd' || c == 'D') { fflag = ((c == 'd') || (c == 'D')) ? 0 : 1; stage = 4; /* So we fall through */ } if ((c=='-' || c =='+') && stage < 3) { stage = 3; literal_token [literal_index++] = c; c = java_get_unicode (); } if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) || (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) || (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) || (stage == 3 && JAVA_ASCII_DIGIT (c))) { if (JAVA_ASCII_DIGIT (c)) seen_digit = 1; literal_token [literal_index++ ] = c; c = java_get_unicode (); } else { jmp_buf handler; REAL_VALUE_TYPE value; #ifndef JC1_LITE tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE); #endif if (stage != 4) /* Don't push back fF/dD */ java_unget_unicode (c); /* An exponent (if any) must have seen a digit. */ if (seen_exponent && !seen_digit) java_lex_error ("Invalid FP literal", 0); literal_token [literal_index] = '\0'; JAVA_LEX_LIT (literal_token, radix); if (setjmp (handler)) { JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double")); value = DCONST0; } else { SET_FLOAT_HANDLER (handler); SET_REAL_VALUE_ATOF (value, REAL_VALUE_ATOF (literal_token, TYPE_MODE (type))); if (REAL_VALUE_ISINF (value)) JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double")); if (REAL_VALUE_ISNAN (value)) JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double")); SET_LVAL_NODE_TYPE (build_real (type, value), type); SET_FLOAT_HANDLER (NULL_PTR); return FP_LIT_TK; } } } } /* JAVA_ASCCI_FPCHAR (c) */ /* Here we get back to converting the integral literal. */ if (c == 'L' || c == 'l') long_suffix = 1; else if (radix == 16 && JAVA_ASCII_LETTER (c)) java_lex_error ("Digit out of range in hexadecimal literal", 0); else if (radix == 8 && JAVA_ASCII_DIGIT (c)) java_lex_error ("Digit out of range in octal literal", 0); else if (radix == 16 && !literal_index) java_lex_error ("No digit specified for hexadecimal literal", 0); else java_unget_unicode (c); #ifdef JAVA_LEX_DEBUG literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */ JAVA_LEX_LIT (literal_token, radix); #endif /* This section of code is borrowed from gcc/c-lex.c */ if (!overflow) { bytes = GET_TYPE_PRECISION (long_type_node); for (i = bytes; i < TOTAL_PARTS; i++) if (parts [i]) { overflow = 1; break; } } high = low = 0; for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++) { high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR)] << (i * HOST_BITS_PER_CHAR)); low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR); } /* End borrowed section. */ /* Range checking */ if (long_suffix) { /* 9223372036854775808L is valid if operand of a '-'. Otherwise 9223372036854775807L is the biggest `long' literal that can be expressed using a 10 radix. For other radixes, everything that fits withing 64 bits is OK. */ int hb = (high >> 31); if (overflow || (hb && low && radix == 10) || (hb && high & 0x7fffffff && radix == 10) || (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10)) JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal"); } else { /* 2147483648 is valid if operand of a '-'. Otherwise, 2147483647 is the biggest `int' literal that can be expressed using a 10 radix. For other radixes, everything that fits within 32 bits is OK. */ int hb = (low >> 31) & 0x1; if (overflow || high || (hb && low & 0x7fffffff && radix == 10) || (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10)) JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal"); } ctxp->minus_seen = 0; SET_LVAL_NODE_TYPE (build_int_2 (low, high), (long_suffix ? long_type_node : int_type_node)); return INT_LIT_TK; } ctxp->minus_seen = 0; /* Character literals */ if (c == '\'') { unicode_t char_lit; if ((c = java_get_unicode ()) == '\\') char_lit = java_parse_escape_sequence (); else char_lit = c; c = java_get_unicode (); if ((c == '\n') || (c == UEOF)) java_lex_error ("Character literal not terminated at end of line", 0); if (c != '\'') java_lex_error ("Syntax error in character literal", 0); if (c == JAVA_CHAR_ERROR) char_lit = 0; /* We silently convert it to zero */ JAVA_LEX_CHAR_LIT (char_lit); SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node); return CHAR_LIT_TK; } /* String literals */ if (c == '"') { int no_error; char *string; for (no_error = 1, c = java_get_unicode (); c != '"' && c != '\n'; c = java_get_unicode ()) { if (c == '\\') c = java_parse_escape_sequence (); no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0); if (c) java_unicode_2_utf8 (c); } if (c == '\n' || c == UEOF) /* ULT */ { lineno--; /* Refer to the line the terminator was seen */ java_lex_error ("String not terminated at end of line.", 0); lineno++; } obstack_1grow (&temporary_obstack, '\0'); string = obstack_finish (&temporary_obstack); if (!no_error || (c != '"')) *string = '\0'; /* Silently turns the string to an empty one */ JAVA_LEX_STR_LIT (string) #ifndef JC1_LITE if (*string) { extern struct obstack *expression_obstack; tree s = make_node (STRING_CST); TREE_STRING_LENGTH (s) = strlen (string); TREE_STRING_POINTER (s) = obstack_alloc (expression_obstack, strlen (string)); strcpy (TREE_STRING_POINTER (s), string); java_lval->node = s; } else java_lval->node = error_mark_node; #endif return STRING_LIT_TK; } /* Separator */ switch (c) { case '(': JAVA_LEX_SEP (c); BUILD_OPERATOR (OP_TK); case ')': JAVA_LEX_SEP (c); return CP_TK; case '{': JAVA_LEX_SEP (c); if (ctxp->ccb_indent == 1) ctxp->first_ccb_indent1 = lineno; ctxp->ccb_indent++; return OCB_TK; case '}': JAVA_LEX_SEP (c); ctxp->ccb_indent--; if (ctxp->ccb_indent == 1) ctxp->last_ccb_indent1 = lineno; return CCB_TK; case '[': JAVA_LEX_SEP (c); BUILD_OPERATOR (OSB_TK); case ']': JAVA_LEX_SEP (c); return CSB_TK; case ';': JAVA_LEX_SEP (c); return SC_TK; case ',': JAVA_LEX_SEP (c); return C_TK; case '.': JAVA_LEX_SEP (c); BUILD_OPERATOR (DOT_TK); /* return DOT_TK; */ } /* Operators */ switch (c) { case '=': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR (EQ_TK); } else { /* Equals is used in two different locations. In the variable_declarator: rule, it has to be seen as '=' as opposed to being seen as an ordinary assignment operator in assignment_operators: rule. */ java_unget_unicode (c); BUILD_OPERATOR (ASSIGN_TK); } case '>': switch ((c = java_get_unicode ())) { case '=': BUILD_OPERATOR (GTE_TK); case '>': switch ((c = java_get_unicode ())) { case '>': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (ZRS_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (ZRS_TK); } case '=': BUILD_OPERATOR2 (SRS_ASSIGN_TK); default: java_unget_unicode (c); BUILD_OPERATOR (SRS_TK); } default: java_unget_unicode (c); BUILD_OPERATOR (GT_TK); } case '<': switch ((c = java_get_unicode ())) { case '=': BUILD_OPERATOR (LTE_TK); case '<': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (LS_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (LS_TK); } default: java_unget_unicode (c); BUILD_OPERATOR (LT_TK); } case '&': switch ((c = java_get_unicode ())) { case '&': BUILD_OPERATOR (BOOL_AND_TK); case '=': BUILD_OPERATOR2 (AND_ASSIGN_TK); default: java_unget_unicode (c); BUILD_OPERATOR (AND_TK); } case '|': switch ((c = java_get_unicode ())) { case '|': BUILD_OPERATOR (BOOL_OR_TK); case '=': BUILD_OPERATOR2 (OR_ASSIGN_TK); default: java_unget_unicode (c); BUILD_OPERATOR (OR_TK); } case '+': switch ((c = java_get_unicode ())) { case '+': BUILD_OPERATOR (INCR_TK); case '=': BUILD_OPERATOR2 (PLUS_ASSIGN_TK); default: java_unget_unicode (c); BUILD_OPERATOR (PLUS_TK); } case '-': switch ((c = java_get_unicode ())) { case '-': BUILD_OPERATOR (DECR_TK); case '=': BUILD_OPERATOR2 (MINUS_ASSIGN_TK); default: java_unget_unicode (c); ctxp->minus_seen = 1; BUILD_OPERATOR (MINUS_TK); } case '*': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (MULT_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (MULT_TK); } case '/': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (DIV_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (DIV_TK); } case '^': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (XOR_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (XOR_TK); } case '%': if ((c = java_get_unicode ()) == '=') { BUILD_OPERATOR2 (REM_ASSIGN_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (REM_TK); } case '!': if ((c = java_get_unicode()) == '=') { BUILD_OPERATOR (NEQ_TK); } else { java_unget_unicode (c); BUILD_OPERATOR (NEG_TK); } case '?': JAVA_LEX_OP ("?"); BUILD_OPERATOR (REL_QM_TK); case ':': JAVA_LEX_OP (":"); BUILD_OPERATOR (REL_CL_TK); case '~': BUILD_OPERATOR (NOT_TK); } /* Keyword, boolean literal or null literal */ for (first_unicode = c, all_ascii = 1, ascii_index = 0; JAVA_ID_CHAR_P (c); c = java_get_unicode ()) { java_unicode_2_utf8 (c); if (all_ascii && c >= 128) all_ascii = 0; ascii_index++; } obstack_1grow (&temporary_obstack, '\0'); string = obstack_finish (&temporary_obstack); java_unget_unicode (c); /* If we have something all ascii, we consider a keyword, a boolean literal, a null literal or an all ASCII identifier. Otherwise, this is an identifier (possibly not respecting formation rule). */ if (all_ascii) { struct java_keyword *kw; if ((kw=java_keyword (string, ascii_index))) { JAVA_LEX_KW (string); switch (kw->token) { case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK: case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK: case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK: case PRIVATE_TK: SET_MODIFIER_CTX (kw->token); return MODIFIER_TK; case FLOAT_TK: SET_LVAL_NODE (float_type_node); return FP_TK; case DOUBLE_TK: SET_LVAL_NODE (double_type_node); return FP_TK; case BOOLEAN_TK: SET_LVAL_NODE (boolean_type_node); return BOOLEAN_TK; case BYTE_TK: SET_LVAL_NODE (byte_type_node); return INTEGRAL_TK; case SHORT_TK: SET_LVAL_NODE (short_type_node); return INTEGRAL_TK; case INT_TK: SET_LVAL_NODE (int_type_node); return INTEGRAL_TK; case LONG_TK: SET_LVAL_NODE (long_type_node); return INTEGRAL_TK; case CHAR_TK: SET_LVAL_NODE (char_type_node); return INTEGRAL_TK; /* Keyword based literals */ case TRUE_TK: case FALSE_TK: SET_LVAL_NODE ((kw->token == TRUE_TK ? boolean_true_node : boolean_false_node)); return BOOL_LIT_TK; case NULL_TK: SET_LVAL_NODE (null_pointer_node); return NULL_TK; /* We build an operator for SUPER, so we can keep its position */ case SUPER_TK: case THIS_TK: case RETURN_TK: case BREAK_TK: case CONTINUE_TK: BUILD_OPERATOR (kw->token); default: return kw->token; } } } /* We may have and ID here */ if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode)) { JAVA_LEX_ID (string); java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string)); return ID_TK; } /* Everything else is an invalid character in the input */ { char lex_error_buffer [128]; sprintf (lex_error_buffer, "Invalid character '%s' in input", java_sprint_unicode (ctxp->c_line, ctxp->c_line->current)); java_lex_error (lex_error_buffer, 1); } return 0; } static void java_unicode_2_utf8 (unicode) unicode_t unicode; { if (RANGE (unicode, 0x01, 0x7f)) obstack_1grow (&temporary_obstack, (char)unicode); else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0) { obstack_1grow (&temporary_obstack, (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6))); obstack_1grow (&temporary_obstack, (unsigned char)(0x80 | (unicode & 0x3f))); } else /* Range 0x800-0xffff */ { obstack_1grow (&temporary_obstack, (unsigned char)(0xe0 | (unicode & 0xf000) >> 12)); obstack_1grow (&temporary_obstack, (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6)); obstack_1grow (&temporary_obstack, (unsigned char)(0x80 | (unicode & 0x003f) >> 12)); } } #ifndef JC1_LITE static tree build_wfl_node (node) tree node; { return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col); } #endif static void java_lex_error (msg, forward) char *msg; int forward; { #ifndef JC1_LITE ctxp->elc.line = ctxp->c_line->lineno; ctxp->elc.col = ctxp->c_line->char_col-1+forward; /* Might be caught in the middle of some error report */ ctxp->java_error_flag = 0; java_error (NULL); java_error (msg); #endif } static int java_is_eol (fp, c) FILE *fp; int c; { int next; switch (c) { case '\n': next = getc (fp); if (next != '\r' && next != EOF) ungetc (next, fp); return 1; case '\r': return 1; default: return 0; } } char * java_get_line_col (filename, line, col) char *filename; int line, col; { #ifdef JC1_LITE return 0; #else /* Dumb implementation. Doesn't try to cache or optimize things. */ /* First line of the file is line 1, first column is 1 */ /* COL <= 0 means, at the CR/LF in LINE */ FILE *fp; int c, ccol, cline = 1; int current_line_col = 0; if (!(fp = fopen (filename, "r"))) fatal ("Can't open file - java_display_line_col"); while (cline != line) { c = getc (fp); if (c < 0) { static char msg[] = "<>"; obstack_grow (&temporary_obstack, msg, sizeof(msg)-1); goto have_line; } if (java_is_eol (fp, c)) cline++; } /* Gather the chars of the current line in a buffer */ for (;;) { c = getc (fp); if (c < 0 || java_is_eol (fp, c)) break; obstack_1grow (&temporary_obstack, c); current_line_col++; } have_line: obstack_1grow (&temporary_obstack, '\n'); if (col < 0) col = current_line_col; /* Place the '^' a the right position */ for (ccol = 1; ccol <= col; ccol++) obstack_1grow (&temporary_obstack, ' '); obstack_grow0 (&temporary_obstack, "^", 1); fclose (fp); return obstack_finish (&temporary_obstack); #endif }