diff options
author | Tom Tromey <tromey@cygnus.com> | 2000-10-10 22:02:12 +0000 |
---|---|---|
committer | Tom Tromey <tromey@gcc.gnu.org> | 2000-10-10 22:02:12 +0000 |
commit | 2e165c53deb61806019c4a72f006abaa1c2379b3 (patch) | |
tree | 47943869eedfa993a7aba6e2b6e0474e725d44bd | |
parent | 67e570ea48ad148b9f2d6a4c27f372c4c7532768 (diff) | |
download | gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.zip gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.gz gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.bz2 |
lex.c (java_new_lexer): Initialize out_first and out_last fields.
* lex.c (java_new_lexer): Initialize out_first and out_last
fields.
* lex.h (java_lexer): Added out_buffer, out_first, out_last.
From-SVN: r36830
-rw-r--r-- | gcc/java/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/java/lex.c | 142 | ||||
-rw-r--r-- | gcc/java/lex.h | 14 |
3 files changed, 101 insertions, 61 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog index b63d751..99fa854 100644 --- a/gcc/java/ChangeLog +++ b/gcc/java/ChangeLog @@ -1,3 +1,9 @@ +2000-10-10 Tom Tromey <tromey@cygnus.com> + + * lex.c (java_new_lexer): Initialize out_first and out_last + fields. + * lex.h (java_lexer): Added out_buffer, out_first, out_last. + 2000-10-07 Alexandre Petit-Bianco <apbianco@cygnus.com> Patch contributed by Corey Minyard. diff --git a/gcc/java/lex.c b/gcc/java/lex.c index be1b139..8fb6a15 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -219,6 +219,8 @@ java_new_lexer (finput, encoding) } lex->first = -1; lex->last = -1; + lex->out_first = -1; + lex->out_last = -1; #else /* HAVE_ICONV */ if (strcmp (encoding, DEFAULT_ENCODING)) enc_error = 1; @@ -253,81 +255,99 @@ java_read_char (lex) #ifdef HAVE_ICONV { - char out[2]; - size_t ir, inbytesleft, in_save, out_count; + size_t ir, inbytesleft, in_save, out_count, out_save; char *inp, *outp; + unicode_t result; - while (1) + /* If there is data which has already been converted, use it. */ + if (lex->out_first == -1 || lex->out_first >= lex->out_last) { - /* See if we need to read more data. If FIRST == 0 then the - previous conversion attempt ended in the middle of a - character at the end of the buffer. Otherwise we only have - to read if the buffer is empty. */ - if (lex->first == 0 || lex->first >= lex->last) - { - int r; + lex->out_first = 0; + lex->out_last = 0; - if (lex->first >= lex->last) + while (1) + { + /* See if we need to read more data. If FIRST == 0 then + the previous conversion attempt ended in the middle of + a character at the end of the buffer. Otherwise we + only have to read if the buffer is empty. */ + if (lex->first == 0 || lex->first >= lex->last) { - lex->first = 0; - lex->last = 0; + int r; + + if (lex->first >= lex->last) + { + lex->first = 0; + lex->last = 0; + } + if (feof (lex->finput)) + return UEOF; + r = fread (&lex->buffer[lex->last], 1, + sizeof (lex->buffer) - lex->last, + lex->finput); + lex->last += r; } - if (feof (lex->finput)) - return UEOF; - r = fread (&lex->buffer[lex->last], 1, - sizeof (lex->buffer) - lex->last, - lex->finput); - lex->last += r; - } - inbytesleft = lex->last - lex->first; - - if (inbytesleft == 0) - { - /* We've tried to read and there is nothing left. */ - return UEOF; - } + inbytesleft = lex->last - lex->first; + out_count = sizeof (lex->out_buffer) - lex->out_last; - in_save = inbytesleft; - out_count = 2; - inp = &lex->buffer[lex->first]; - outp = out; - ir = iconv (lex->handle, (const char **) &inp, &inbytesleft, - &outp, &out_count); - lex->first += in_save - inbytesleft; - - if (out_count == 0) - { - /* Success. We assume that UCS-2 is big-endian. This - appears to be an ok assumption. */ - unicode_t result; - result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1]; - return result; - } - - if (ir == (size_t) -1) - { - if (errno == EINVAL) + if (inbytesleft == 0) { - /* This is ok. This means that the end of our buffer - is in the middle of a character sequence. We just - move the valid part of the buffer to the beginning - to force a read. */ - /* We use bcopy() because it should work for - overlapping strings. Use memmove() instead... */ - bcopy (&lex->buffer[lex->first], &lex->buffer[0], - lex->last - lex->first); - lex->last -= lex->first; - lex->first = 0; + /* We've tried to read and there is nothing left. */ + return UEOF; } - else + + in_save = inbytesleft; + out_save = out_count; + inp = &lex->buffer[lex->first]; + outp = &lex->out_buffer[lex->out_last]; + ir = iconv (lex->handle, (const char **) &inp, &inbytesleft, + &outp, &out_count); + lex->first += in_save - inbytesleft; + lex->out_last += out_save - out_count; + + /* If we converted anything at all, move along. */ + if (out_count != out_save) + break; + + if (ir == (size_t) -1) { - /* A more serious error. */ - java_lex_error ("unrecognized character in input stream", 0); - return UEOF; + if (errno == EINVAL) + { + /* This is ok. This means that the end of our buffer + is in the middle of a character sequence. We just + move the valid part of the buffer to the beginning + to force a read. */ + /* We use bcopy() because it should work for + overlapping strings. Use memmove() instead... */ + bcopy (&lex->buffer[lex->first], &lex->buffer[0], + lex->last - lex->first); + lex->last -= lex->first; + lex->first = 0; + } + else + { + /* A more serious error. */ + java_lex_error ("unrecognized character in input stream", + 0); + return UEOF; + } } } } + + if (lex->out_first == -1 || lex->out_first >= lex->out_last) + { + /* Don't have any data. */ + return UEOF; + } + + /* Success. We assume that UCS-2 is big-endian. This appears to + be an ok assumption. */ + result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8) + | (unsigned char) lex->out_buffer[lex->out_first + 1]); + lex->out_first += 2; + return result; } #else /* HAVE_ICONV */ { diff --git a/gcc/java/lex.h b/gcc/java/lex.h index 68cb1c0..71a030d 100644 --- a/gcc/java/lex.h +++ b/gcc/java/lex.h @@ -128,6 +128,20 @@ typedef struct java_lexer /* Index of last valid character in buffer, plus one. -1 if no valid characters in buffer. */ int last; + + /* This is a buffer of characters already converted by iconv. We + use `char' here because we're assuming that iconv() converts to + big-endian UCS-2, and then we convert it ourselves. */ + char out_buffer[1024]; + + /* Index of first valid output character. -1 if no valid + characters. */ + int out_first; + + /* Index of last valid output character, plus one. -1 if no valid + characters. */ + int out_last; + #endif /* HAVE_ICONV */ } java_lexer; |