aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Tromey <tromey@cygnus.com>2000-09-12 22:23:59 +0000
committerTom Tromey <tromey@gcc.gnu.org>2000-09-12 22:23:59 +0000
commitd19cbcb5e3dd83e2628d25d2cd23892a4cac83b0 (patch)
tree21cc6935b87686835780712e1d9a7d64eae418d0
parentee17a29049f330ff40a486e56826468a223323c2 (diff)
downloadgcc-d19cbcb5e3dd83e2628d25d2cd23892a4cac83b0.zip
gcc-d19cbcb5e3dd83e2628d25d2cd23892a4cac83b0.tar.gz
gcc-d19cbcb5e3dd83e2628d25d2cd23892a4cac83b0.tar.bz2
re GNATS gcj/33 (gcj mangles composed characters)
Fix for PR gcj/33: * jv-scan.c (help): Document --encoding. (options): Added `encoding' entry. (OPT_ENCODING): New define. (main): Handle --encoding. Include <langinfo.h> if nl_langinfo exists. * lang-options.h: Document --classpath, --CLASSPATH, --main, and --encoding. * jcf-parse.c Include <langinfo.h> if we have nl_langinfo. (parse_source_file): Correctly call java_init_lex. Added `finput' argument. Use nl_langinfo to determine default encoding. * java-tree.h (current_encoding): Declare. * parse.y (java_parser_context_restore_global): Don't restore `finput'. (java_parser_context_save_global): Don't set `finput' field. (java_pop_parser_context): Don't restore `finput'. Free old lexer if required. * lang.c (current_encoding): New global. (lang_decode_option): Recognize `-fencoding='. (finish_parse): Don't close finput. * parse.h (struct parser_ctxt): Removed `finput' and `unget_utf8_value' fields. Added `lexer' field. (java_init_lex): Fixed declaration. * lex.c (java_new_lexer): New function. (java_destroy_lexer): Likewise. (java_read_char): Added `lex' argument. Handle iconv case. (java_read_unicode): Added `lex' argument. Count backslashes in lexer structure. (java_init_lex): Added `finput' and `encoding' arguments. Set `lexer' field in ctxp. (BAD_UTF8_VALUE): Removed. (java_lex): Handle seeing UEOF in the middle of a string literal. * lex.h: Include <iconv.h> if HAVE_ICONV defined. (java_lexer): New structure. (UNGETC): Removed. (GETC): Removed. (DEFAULT_ENCODING): New define. (java_destroy_lexer): Declare. From-SVN: r36377
-rw-r--r--gcc/java/ChangeLog41
-rw-r--r--gcc/java/java-tree.h3
-rw-r--r--gcc/java/jcf-parse.c31
-rw-r--r--gcc/java/jv-scan.c27
-rw-r--r--gcc/java/lang-options.h6
-rw-r--r--gcc/java/lang.c14
-rw-r--r--gcc/java/lex.c292
-rw-r--r--gcc/java/lex.h41
-rw-r--r--gcc/java/parse.h5
-rw-r--r--gcc/java/parse.y7
10 files changed, 363 insertions, 104 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog
index 642f4a7..7b13f9a 100644
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,5 +1,46 @@
2000-09-12 Tom Tromey <tromey@cygnus.com>
+ Fix for PR gcj/33:
+ * jv-scan.c (help): Document --encoding.
+ (options): Added `encoding' entry.
+ (OPT_ENCODING): New define.
+ (main): Handle --encoding.
+ Include <langinfo.h> if nl_langinfo exists.
+ * lang-options.h: Document --classpath, --CLASSPATH, --main, and
+ --encoding.
+ * jcf-parse.c Include <langinfo.h> if we have nl_langinfo.
+ (parse_source_file): Correctly call java_init_lex. Added `finput'
+ argument. Use nl_langinfo to determine default encoding.
+ * java-tree.h (current_encoding): Declare.
+ * parse.y (java_parser_context_restore_global): Don't restore
+ `finput'.
+ (java_parser_context_save_global): Don't set `finput' field.
+ (java_pop_parser_context): Don't restore `finput'. Free old lexer
+ if required.
+ * lang.c (current_encoding): New global.
+ (lang_decode_option): Recognize `-fencoding='.
+ (finish_parse): Don't close finput.
+ * parse.h (struct parser_ctxt): Removed `finput' and
+ `unget_utf8_value' fields. Added `lexer' field.
+ (java_init_lex): Fixed declaration.
+ * lex.c (java_new_lexer): New function.
+ (java_destroy_lexer): Likewise.
+ (java_read_char): Added `lex' argument. Handle iconv case.
+ (java_read_unicode): Added `lex' argument. Count backslashes in
+ lexer structure.
+ (java_init_lex): Added `finput' and `encoding' arguments. Set
+ `lexer' field in ctxp.
+ (BAD_UTF8_VALUE): Removed.
+ (java_lex): Handle seeing UEOF in the middle of a string literal.
+ * lex.h: Include <iconv.h> if HAVE_ICONV defined.
+ (java_lexer): New structure.
+ (UNGETC): Removed.
+ (GETC): Removed.
+ (DEFAULT_ENCODING): New define.
+ (java_destroy_lexer): Declare.
+
+2000-09-12 Tom Tromey <tromey@cygnus.com>
+
Fix for PR gcj/343:
* lex.c (java_init_lex): Initialize java_io_serializable.
* parse.y (java_io_serializable): New global.
diff --git a/gcc/java/java-tree.h b/gcc/java/java-tree.h
index 94fdcae..18cdf7a 100644
--- a/gcc/java/java-tree.h
+++ b/gcc/java/java-tree.h
@@ -169,6 +169,9 @@ extern int flag_use_boehm_gc;
object to its synchronization structure. */
extern int flag_hash_synchronization;
+/* Encoding used for source files. */
+extern char *current_encoding;
+
/* The Java .class file that provides main_class; the main input file. */
extern struct JCF *current_jcf;
diff --git a/gcc/java/jcf-parse.c b/gcc/java/jcf-parse.c
index 02becc0..4b76f59 100644
--- a/gcc/java/jcf-parse.c
+++ b/gcc/java/jcf-parse.c
@@ -35,6 +35,10 @@ The Free Software Foundation is independent of Sun Microsystems, Inc. */
#include "toplev.h"
#include "parse.h"
+#ifdef HAVE_NL_LANGINFO
+#include <langinfo.h>
+#endif
+
/* A CONSTANT_Utf8 element is converted to an IDENTIFIER_NODE at parse time. */
#define JPOOL_UTF(JCF, INDEX) CPOOL_UTF(&(JCF)->cpool, INDEX)
#define JPOOL_UTF_LENGTH(JCF, INDEX) IDENTIFIER_LENGTH (JPOOL_UTF (JCF, INDEX))
@@ -83,7 +87,7 @@ static struct JCF main_jcf[1];
static tree give_name_to_class PARAMS ((JCF *jcf, int index));
static void parse_zip_file_entries PARAMS ((void));
static void process_zip_dir PARAMS ((void));
-static void parse_source_file PARAMS ((tree));
+static void parse_source_file PARAMS ((tree, FILE *));
static void jcf_parse_source PARAMS ((void));
static int jcf_figure_file_type PARAMS ((JCF *));
static int find_in_current_zip PARAMS ((const char *, struct JCF **));
@@ -564,6 +568,7 @@ static void
jcf_parse_source ()
{
tree file;
+ FILE *finput;
java_parser_context_save_global ();
java_push_parser_context ();
@@ -576,7 +581,7 @@ jcf_parse_source ()
if (!(finput = fopen (input_filename, "r")))
fatal ("input file `%s' just disappeared - jcf_parse_source",
input_filename);
- parse_source_file (file);
+ parse_source_file (file, finput);
if (fclose (finput))
fatal ("can't close input file `%s' stream - jcf_parse_source",
input_filename);
@@ -754,8 +759,9 @@ parse_class_file ()
/* Parse a source file, as pointed by the current value of INPUT_FILENAME. */
static void
-parse_source_file (file)
+parse_source_file (file, finput)
tree file;
+ FILE *finput;
{
int save_error_count = java_error_count;
/* Mark the file as parsed */
@@ -765,7 +771,21 @@ parse_source_file (file)
lang_init_source (1); /* Error msgs have no method prototypes */
- java_init_lex (); /* Initialize the parser */
+ /* There's no point in trying to find the current encoding unless we
+ are going to do something intelligent with it -- hence the test
+ for iconv. */
+#ifdef HAVE_ICONV
+#ifdef HAVE_NL_LANGINFO
+ setlocale (LC_CTYPE, "");
+ if (current_encoding == NULL)
+ current_encoding = nl_langinfo (CODESET);
+#endif /* HAVE_NL_LANGINFO */
+#endif /* HAVE_ICONV */
+ if (current_encoding == NULL || *current_encoding == '\0')
+ current_encoding = DEFAULT_ENCODING;
+
+ /* Initialize the parser */
+ java_init_lex (finput, current_encoding);
java_parse_abort_on_error ();
java_parse (); /* Parse and build partial tree nodes. */
@@ -796,6 +816,7 @@ yyparse ()
int several_files = 0;
char *list = xstrdup (input_filename), *next;
tree node, current_file_list = NULL_TREE;
+ FILE *finput;
do
{
@@ -901,7 +922,7 @@ yyparse ()
case JCF_SOURCE:
java_push_parser_context ();
java_parser_context_save_global ();
- parse_source_file (name);
+ parse_source_file (name, finput);
java_parser_context_restore_global ();
java_pop_parser_context (1);
break;
diff --git a/gcc/java/jv-scan.c b/gcc/java/jv-scan.c
index adb7ba3..ae9c91d 100644
--- a/gcc/java/jv-scan.c
+++ b/gcc/java/jv-scan.c
@@ -26,6 +26,10 @@ Boston, MA 02111-1307, USA. */
#include "version.h"
+#ifdef HAVE_NL_LANGINFO
+#include <langinfo.h>
+#endif
+
#include <getopt.h>
void fatal PARAMS ((const char *s, ...)) ATTRIBUTE_PRINTF_1 ATTRIBUTE_NORETURN;
@@ -61,6 +65,7 @@ int flag_list_filename = 0;
#define OPT_HELP LONG_OPT (0)
#define OPT_VERSION LONG_OPT (1)
+#define OPT_ENCODING LONG_OPT (2)
static struct option options[] =
{
@@ -69,6 +74,7 @@ static struct option options[] =
{ "print-main", no_argument, &flag_find_main, 1 },
{ "list-filename", no_argument, &flag_list_filename, 1 },
{ "list-class", no_argument, &flag_dump_class, 1 },
+ { "encoding", required_argument, NULL, OPT_ENCODING },
{ NULL, no_argument, NULL, 0 }
};
@@ -84,6 +90,7 @@ help ()
{
printf ("Usage: jv-scan [OPTION]... FILE...\n\n");
printf ("Print useful information read from Java source files.\n\n");
+ printf (" --encoding NAME Specify encoding of input file\n");
printf (" --print-main Print name of class containing `main'\n");
printf (" --list-class List all classes defined in file\n");
printf (" --list-filename Print input filename when listing class names\n");
@@ -114,6 +121,7 @@ DEFUN (main, (argc, argv),
{
int i = 1;
const char *output_file = NULL;
+ const char *encoding = NULL;
long ft;
int opt;
@@ -144,6 +152,10 @@ DEFUN (main, (argc, argv),
version ();
break;
+ case OPT_ENCODING:
+ encoding = optarg;
+ break;
+
default:
usage ();
break;
@@ -172,7 +184,20 @@ DEFUN (main, (argc, argv),
input_filename = argv [i];
if ( (finput = fopen (argv [i], "r")) )
{
- java_init_lex ();
+ /* There's no point in trying to find the current encoding
+ unless we are going to do something intelligent with it
+ -- hence the test for iconv. */
+#ifdef HAVE_ICONV
+#ifdef HAVE_NL_LANGINFO
+ setlocale (LC_CTYPE, "");
+ if (encoding == NULL)
+ encoding = nl_langinfo (CODESET);
+#endif /* HAVE_NL_LANGINFO */
+#endif /* HAVE_ICONV */
+ if (encoding == NULL || *encoding == '\0')
+ encoding = DEFAULT_ENCODING;
+
+ java_init_lex (finput, encoding);
yyparse ();
if (ftell (out) != ft)
fputc ('\n', out);
diff --git a/gcc/java/lang-options.h b/gcc/java/lang-options.h
index 630e6d8..2b207dc 100644
--- a/gcc/java/lang-options.h
+++ b/gcc/java/lang-options.h
@@ -42,8 +42,10 @@ DEFINE_LANG_NAME ("Java")
{ "-M", "Print dependencies to stdout" },
{ "-MM", "Print dependencies to stdout" },
#endif /* ! USE_CPPLIB */
- { "-fclasspath", "Set class path and suppress system path" },
- { "-fCLASSPATH", "Set class path" },
+ { "--classpath", "Set class path and suppress system path" },
+ { "--CLASSPATH", "Set class path" },
+ { "--main", "Choose class whose main method should be used" },
+ { "--encoding", "Choose input encoding (default is UTF-8)" },
{ "-I", "Add directory to class path" },
{ "-foutput-class-dir", "Directory where class files should be written" },
{ "-fuse-divide-subroutine", "" },
diff --git a/gcc/java/lang.c b/gcc/java/lang.c
index 5f95f2d..2dc33f2 100644
--- a/gcc/java/lang.c
+++ b/gcc/java/lang.c
@@ -121,6 +121,9 @@ int flag_hash_synchronization;
JNI, not CNI. */
int flag_jni = 0;
+/* The encoding of the source file. */
+char *current_encoding = NULL;
+
/* When non zero, report the now deprecated empty statements. */
int flag_extraneous_semicolon;
@@ -222,6 +225,13 @@ lang_decode_option (argc, argv)
return 1;
}
#undef ARG
+#define ARG "-fencoding="
+ if (strncmp (p, ARG, sizeof (ARG) - 1) == 0)
+ {
+ current_encoding = p + sizeof (ARG) - 1;
+ return 1;
+ }
+#undef ARG
if (p[0] == '-' && p[1] == 'f')
{
@@ -309,7 +319,9 @@ lang_decode_option (argc, argv)
return 0;
}
+/* Global open file. */
FILE *finput;
+
const char *
init_parse (filename)
const char *filename;
@@ -362,6 +374,7 @@ init_parse (filename)
}
}
}
+
init_lex ();
return filename;
@@ -370,7 +383,6 @@ init_parse (filename)
void
finish_parse ()
{
- fclose (finput);
jcf_dependency_write ();
}
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index 535733f..4179b1d 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -24,15 +24,15 @@ of Sun Microsystems, Inc. in the United States and other countries.
The Free Software Foundation is independent of Sun Microsystems, Inc. */
/* It defines java_lex (yylex) that reads a Java ASCII source file
-possibly containing Unicode escape sequence or utf8 encoded characters
-and returns a token for everything found but comments, white spaces
-and line terminators. When necessary, it also fills the java_lval
-(yylval) union. It's implemented to be called by a re-entrant parser
-generated by Bison.
+ possibly containing Unicode escape sequence or utf8 encoded
+ characters and returns a token for everything found but comments,
+ white spaces and line terminators. When necessary, it also fills
+ the java_lval (yylval) union. It's implemented to be called by a
+ re-entrant parser generated by Bison.
-The lexical analysis conforms to the Java grammar described in "The
-Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
-Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
+ The lexical analysis conforms to the Java grammar described in "The
+ Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
+ Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
#include "keyword.h"
@@ -55,15 +55,18 @@ static int java_letter_or_digit_p PARAMS ((unicode_t));
static int java_parse_doc_section PARAMS ((unicode_t));
static void java_parse_end_comment PARAMS ((unicode_t));
static unicode_t java_get_unicode PARAMS ((void));
-static unicode_t java_read_unicode PARAMS ((int, int *));
+static unicode_t java_read_unicode PARAMS ((java_lexer *, int, int *));
static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
-static unicode_t java_read_char PARAMS ((void));
+static unicode_t java_read_char PARAMS ((java_lexer *));
static void java_allocate_new_line PARAMS ((void));
static void java_unget_unicode PARAMS ((void));
static unicode_t java_sneak_unicode PARAMS ((void));
+java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
void
-java_init_lex ()
+java_init_lex (finput, encoding)
+ FILE *finput;
+ const char *encoding;
{
#ifndef JC1_LITE
int java_lang_imported = 0;
@@ -114,9 +117,9 @@ java_init_lex ()
ctxp->lineno = lineno = 0;
ctxp->p_line = NULL;
ctxp->c_line = NULL;
- ctxp->unget_utf8_value = 0;
ctxp->minus_seen = 0;
ctxp->java_error_flag = 0;
+ ctxp->lexer = java_new_lexer (finput, encoding);
}
static char *
@@ -194,59 +197,180 @@ java_allocate_new_line ()
ctxp->c_line->white_space_only = 1;
}
-#define BAD_UTF8_VALUE 0xFFFE
-
-static unicode_t
-java_read_char ()
+/* Create a new lexer object. */
+java_lexer *
+java_new_lexer (finput, encoding)
+ FILE *finput;
+ const char *encoding;
{
- int c;
- int c1, c2;
+ java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
+ int enc_error = 0;
+
+ lex->finput = finput;
+ lex->bs_count = 0;
+ lex->unget_value = 0;
- if (ctxp->unget_utf8_value)
+#ifdef HAVE_ICONV
+ lex->handle = iconv_open ("UCS-2", encoding);
+ if (lex->handle == (iconv_t) -1)
{
- int to_return = ctxp->unget_utf8_value;
- ctxp->unget_utf8_value = 0;
- return (to_return);
+ /* FIXME: we should give a nice error based on errno here. */
+ enc_error = 1;
}
+ lex->first = -1;
+ lex->last = -1;
+#else /* HAVE_ICONV */
+ if (strcmp (encoding, DEFAULT_ENCODING))
+ enc_error = 1;
+#endif /* HAVE_ICONV */
- c = GETC ();
+ if (enc_error)
+ fatal ("unknown encoding: `%s'", encoding);
- if (c < 128)
- return (unicode_t)c;
- if (c == EOF)
- return UEOF;
- else
+ return lex;
+}
+
+void
+java_destroy_lexer (lex)
+ java_lexer *lex;
+{
+#ifdef HAVE_ICONV
+ iconv_close (lex->handle);
+#endif
+ free (lex);
+}
+
+static unicode_t
+java_read_char (lex)
+ java_lexer *lex;
+{
+ if (lex->unget_value)
{
- if ((c & 0xe0) == 0xc0)
- {
- c1 = GETC ();
- if ((c1 & 0xc0) == 0x80)
- return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
- c = c1;
- }
- else if ((c & 0xf0) == 0xe0)
- {
- c1 = GETC ();
- if ((c1 & 0xc0) == 0x80)
- {
- c2 = GETC ();
- if ((c2 & 0xc0) == 0x80)
- return (unicode_t)(((c & 0xf) << 12) +
- (( c1 & 0x3f) << 6) + (c2 & 0x3f));
- else
- c = c2;
- }
- else
- c = c1;
- }
- /* We looked for a UTF8 multi-byte sequence (since we saw an initial
- byte with the high bit set), but found invalid bytes instead.
- If the most recent byte was Ascii (and not EOF), we should
- unget it, in case it was a comment terminator or other delimitor. */
- if ((c & 0x80) == 0)
- UNGETC (c);
- return BAD_UTF8_VALUE;
+ unicode_t r = lex->unget_value;
+ lex->unget_value = 0;
+ return r;
}
+
+#ifdef HAVE_ICONV
+ {
+ char out[2];
+ size_t ir, inbytesleft, in_save, out_count;
+ char *inp, *outp;
+
+ while (1)
+ {
+ /* See if we need to read more data. If FIRST == 0 then the
+ previous conversion attempt ended in the middle of a
+ character at the end of the buffer. Otherwise we only have
+ to read if the buffer is empty. */
+ if (lex->first == 0 || lex->first >= lex->last)
+ {
+ int r;
+
+ if (lex->first >= lex->last)
+ {
+ lex->first = 0;
+ lex->last = 0;
+ }
+ if (feof (lex->finput))
+ return UEOF;
+ r = fread (&lex->buffer[lex->last], 1,
+ sizeof (lex->buffer) - lex->last,
+ lex->finput);
+ lex->last += r;
+ }
+
+ inbytesleft = lex->last - lex->first;
+
+ if (inbytesleft == 0)
+ {
+ /* We've tried to read and there is nothing left. */
+ return UEOF;
+ }
+
+ in_save = inbytesleft;
+ out_count = 2;
+ inp = &lex->buffer[lex->first];
+ outp = out;
+ ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+ &outp, &out_count);
+ lex->first += in_save - inbytesleft;
+
+ if (out_count == 0)
+ {
+ /* Success. We assume that UCS-2 is big-endian. This
+ appears to be an ok assumption. */
+ unicode_t result;
+ result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
+ return result;
+ }
+
+ if (ir == (size_t) -1)
+ {
+ if (errno == EINVAL)
+ {
+ /* This is ok. This means that the end of our buffer
+ is in the middle of a character sequence. We just
+ move the valid part of the buffer to the beginning
+ to force a read. */
+ /* We use bcopy() because it should work for
+ overlapping strings. Use memmove() instead... */
+ bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+ lex->last - lex->first);
+ lex->last -= lex->first;
+ lex->first = 0;
+ }
+ else
+ {
+ /* A more serious error. */
+ java_lex_error ("unrecognized character in input stream", 0);
+ return UEOF;
+ }
+ }
+ }
+ }
+#else /* HAVE_ICONV */
+ {
+ int c, c1, c2;
+ c = getc (lex->finput);
+
+ if (c < 128)
+ return (unicode_t)c;
+ if (c == EOF)
+ return UEOF;
+ else
+ {
+ if ((c & 0xe0) == 0xc0)
+ {
+ c1 = getc (lex->finput);
+ if ((c1 & 0xc0) == 0x80)
+ return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+ c = c1;
+ }
+ else if ((c & 0xf0) == 0xe0)
+ {
+ c1 = getc (lex->finput);
+ if ((c1 & 0xc0) == 0x80)
+ {
+ c2 = getc (lex->finput);
+ if ((c2 & 0xc0) == 0x80)
+ return (unicode_t)(((c & 0xf) << 12) +
+ (( c1 & 0x3f) << 6) + (c2 & 0x3f));
+ else
+ c = c2;
+ }
+ else
+ c = c1;
+ }
+
+ /* We simply don't support invalid characters. */
+ java_lex_error ("malformed UTF-8 character", 0);
+ }
+ }
+#endif /* HAVE_ICONV */
+
+ /* We only get here on error. */
+ return UEOF;
}
static void
@@ -267,56 +391,54 @@ java_store_unicode (l, c, unicode_escape_p)
}
static unicode_t
-java_read_unicode (term_context, unicode_escape_p)
- int term_context;
- int *unicode_escape_p;
+java_read_unicode (lex, term_context, unicode_escape_p)
+ java_lexer *lex;
+ int term_context;
+ int *unicode_escape_p;
{
unicode_t c;
- long i, base;
- c = java_read_char ();
+ c = java_read_char (lex);
*unicode_escape_p = 0;
if (c != '\\')
- return ((term_context ? c :
- java_lineterminator (c) ? '\n' : (unicode_t)c));
-
- /* Count the number of preceeding '\' */
- for (base = ftell (finput), i = base-2; c == '\\';)
- {
- fseek (finput, i--, SEEK_SET);
- c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
+ {
+ lex->bs_count = 0;
+ return (term_context ? c : (java_lineterminator (c)
+ ? '\n'
+ : (unicode_t) c));
}
- fseek (finput, base, SEEK_SET);
- if ((base-i-3)%2 == 0) /* If odd number of \ seen */
+
+ ++lex->bs_count;
+ if ((lex->bs_count) % 2 == 1)
{
- c = java_read_char ();
+ /* Odd number of \ seen. */
+ c = java_read_char (lex);
if (c == 'u')
{
- unsigned short unicode = 0;
+ unicode_t unicode = 0;
int shift = 12;
/* Next should be 4 hex digits, otherwise it's an error.
The hex value is converted into the unicode, pushed into
the Unicode stream. */
for (shift = 12; shift >= 0; shift -= 4)
{
- if ((c = java_read_char ()) == UEOF)
+ if ((c = java_read_char (lex)) == UEOF)
return UEOF;
if (c >= '0' && c <= '9')
unicode |= (unicode_t)((c-'0') << shift);
else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
else
- java_lex_error
- ("Non hex digit in Unicode escape sequence", 0);
+ java_lex_error ("Non hex digit in Unicode escape sequence", 0);
}
*unicode_escape_p = 1;
- return (term_context ? unicode :
- (java_lineterminator (c) ? '\n' : unicode));
+ return (term_context
+ ? unicode : (java_lineterminator (c) ? '\n' : unicode));
}
- ctxp->unget_utf8_value = c;
+ lex->unget_value = c;
}
- return (unicode_t)'\\';
+ return (unicode_t) '\\';
}
static unicode_t
@@ -331,7 +453,7 @@ java_get_unicode ()
for (;;)
{
int unicode_escape_p;
- c = java_read_unicode (0, &unicode_escape_p);
+ c = java_read_unicode (ctxp->lexer, 0, &unicode_escape_p);
java_store_unicode (ctxp->c_line, c, unicode_escape_p);
if (ctxp->c_line->white_space_only
&& !JAVA_WHITE_SPACE_P (c) && c!='\n')
@@ -354,7 +476,7 @@ java_lineterminator (c)
else if (c == '\r') /* CR */
{
int unicode_escape_p;
- c = java_read_unicode (1, &unicode_escape_p);
+ c = java_read_unicode (ctxp->lexer, 1, &unicode_escape_p);
if (c == '\r')
{
/* In this case we will have another terminator. For some
@@ -363,7 +485,7 @@ java_lineterminator (c)
up in the actual text of the line, causing an error. So
instead we choose a very low-level method. FIXME: this
is incredibly ugly. */
- UNGETC (c);
+ ctxp->lexer->unget_value = c;
}
else if (c != '\n')
{
@@ -939,7 +1061,7 @@ java_lex (java_lval)
char *string;
for (no_error = 1, c = java_get_unicode ();
- c != '"' && c != '\n'; c = java_get_unicode ())
+ c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
{
if (c == '\\')
c = java_parse_escape_sequence ();
diff --git a/gcc/java/lex.h b/gcc/java/lex.h
index d4754ab..cf29aa1 100644
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -35,6 +35,13 @@ extern int lineno;
/* A Unicode character, as read from the input file */
typedef unsigned short unicode_t;
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif /* HAVE_ICONV */
+
+/* Default encoding to use if no encoding is specified. */
+#define DEFAULT_ENCODING "UTF-8"
+
/* Debug macro to print-out what we match */
#ifdef JAVA_LEX_DEBUG
#ifdef JAVA_LEX_DEBUG_CHAR
@@ -96,12 +103,38 @@ typedef struct _java_lc {
int col;
} java_lc;
+typedef struct java_lexer
+{
+ /* The file from which we're reading. */
+ FILE *finput;
-#define JAVA_LINE_MAX 80
+ /* Number of consecutive backslashes we've read. */
+ int bs_count;
+
+ /* If nonzero, a value that was pushed back. */
+ unicode_t unget_value;
+
+#ifdef HAVE_ICONV
+ /* The handle for the iconv converter we're using. */
+ iconv_t handle;
-/* Macro to read and unread bytes */
-#define UNGETC(c) ungetc(c, finput)
-#define GETC() getc(finput)
+ /* Bytes we've read from the file but have not sent to iconv. */
+ char buffer[1024];
+
+ /* Index of first valid character in buffer, -1 if no valid
+ characters. */
+ int first;
+
+ /* Index of last valid character in buffer, plus one. -1 if no
+ valid characters in buffer. */
+ int last;
+#endif /* HAVE_ICONV */
+} java_lexer;
+
+/* Destroy a lexer object. */
+extern void java_destroy_lexer PARAMS ((java_lexer *));
+
+#define JAVA_LINE_MAX 80
/* Build a location compound integer */
#define BUILD_LOCATION() ((ctxp->elc.line << 12) | (ctxp->elc.col & 0xfff))
diff --git a/gcc/java/parse.h b/gcc/java/parse.h
index 8071237..b1b0e8e 100644
--- a/gcc/java/parse.h
+++ b/gcc/java/parse.h
@@ -728,13 +728,12 @@ typedef struct _jdeplist {
struct parser_ctxt {
const char *filename; /* Current filename */
- FILE *finput; /* Current file input stream */
struct parser_ctxt *next;
+ java_lexer *lexer; /* Current lexer state */
char marker_begining; /* Marker. Should be a sub-struct */
struct java_line *p_line, *c_line; /* Previous and current line */
java_lc elc; /* Error's line column info */
- unicode_t unget_utf8_value; /* An unget utf8 value */
int ccb_indent; /* Keep track of {} indent, lexer */
int first_ccb_indent1; /* First { at ident level 1 */
int last_ccb_indent1; /* Last } at ident level 1 */
@@ -928,7 +927,7 @@ extern void reset_report PARAMS ((void));
/* Always in use, no matter what you compile */
void java_push_parser_context PARAMS ((void));
void java_pop_parser_context PARAMS ((int));
-void java_init_lex PARAMS ((void));
+void java_init_lex PARAMS ((FILE *, const char *));
extern void java_parser_context_save_global PARAMS ((void));
extern void java_parser_context_restore_global PARAMS ((void));
int yyparse PARAMS ((void));
diff --git a/gcc/java/parse.y b/gcc/java/parse.y
index 9c92e58..42f4206 100644
--- a/gcc/java/parse.y
+++ b/gcc/java/parse.y
@@ -2618,10 +2618,13 @@ java_pop_parser_context (generate)
next->incomplete_class = ctxp->incomplete_class;
next->gclass_list = ctxp->gclass_list;
lineno = ctxp->lineno;
- finput = ctxp->finput;
current_class = ctxp->current_class;
}
+ /* If the old and new lexers differ, then free the old one. */
+ if (ctxp->lexer && next && ctxp->lexer != next->lexer)
+ java_destroy_lexer (ctxp->lexer);
+
/* Set the single import class file flag to 0 for the current list
of imported things */
for (current = ctxp->import_list; current; current = TREE_CHAIN (current))
@@ -2661,7 +2664,6 @@ java_parser_context_save_global ()
else if (ctxp->saved_data)
create_new_parser_context (1);
- ctxp->finput = finput;
ctxp->lineno = lineno;
ctxp->current_class = current_class;
ctxp->filename = input_filename;
@@ -2675,7 +2677,6 @@ java_parser_context_save_global ()
void
java_parser_context_restore_global ()
{
- finput = ctxp->finput;
lineno = ctxp->lineno;
current_class = ctxp->current_class;
input_filename = ctxp->filename;