aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/java/ChangeLog6
-rw-r--r--gcc/java/lex.c142
-rw-r--r--gcc/java/lex.h14
3 files changed, 101 insertions, 61 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog
index b63d751..99fa854 100644
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,3 +1,9 @@
+2000-10-10 Tom Tromey <tromey@cygnus.com>
+
+ * lex.c (java_new_lexer): Initialize out_first and out_last
+ fields.
+ * lex.h (java_lexer): Added out_buffer, out_first, out_last.
+
2000-10-07 Alexandre Petit-Bianco <apbianco@cygnus.com>
Patch contributed by Corey Minyard.
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index be1b139..8fb6a15 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -219,6 +219,8 @@ java_new_lexer (finput, encoding)
}
lex->first = -1;
lex->last = -1;
+ lex->out_first = -1;
+ lex->out_last = -1;
#else /* HAVE_ICONV */
if (strcmp (encoding, DEFAULT_ENCODING))
enc_error = 1;
@@ -253,81 +255,99 @@ java_read_char (lex)
#ifdef HAVE_ICONV
{
- char out[2];
- size_t ir, inbytesleft, in_save, out_count;
+ size_t ir, inbytesleft, in_save, out_count, out_save;
char *inp, *outp;
+ unicode_t result;
- while (1)
+ /* If there is data which has already been converted, use it. */
+ if (lex->out_first == -1 || lex->out_first >= lex->out_last)
{
- /* See if we need to read more data. If FIRST == 0 then the
- previous conversion attempt ended in the middle of a
- character at the end of the buffer. Otherwise we only have
- to read if the buffer is empty. */
- if (lex->first == 0 || lex->first >= lex->last)
- {
- int r;
+ lex->out_first = 0;
+ lex->out_last = 0;
- if (lex->first >= lex->last)
+ while (1)
+ {
+ /* See if we need to read more data. If FIRST == 0 then
+ the previous conversion attempt ended in the middle of
+ a character at the end of the buffer. Otherwise we
+ only have to read if the buffer is empty. */
+ if (lex->first == 0 || lex->first >= lex->last)
{
- lex->first = 0;
- lex->last = 0;
+ int r;
+
+ if (lex->first >= lex->last)
+ {
+ lex->first = 0;
+ lex->last = 0;
+ }
+ if (feof (lex->finput))
+ return UEOF;
+ r = fread (&lex->buffer[lex->last], 1,
+ sizeof (lex->buffer) - lex->last,
+ lex->finput);
+ lex->last += r;
}
- if (feof (lex->finput))
- return UEOF;
- r = fread (&lex->buffer[lex->last], 1,
- sizeof (lex->buffer) - lex->last,
- lex->finput);
- lex->last += r;
- }
- inbytesleft = lex->last - lex->first;
-
- if (inbytesleft == 0)
- {
- /* We've tried to read and there is nothing left. */
- return UEOF;
- }
+ inbytesleft = lex->last - lex->first;
+ out_count = sizeof (lex->out_buffer) - lex->out_last;
- in_save = inbytesleft;
- out_count = 2;
- inp = &lex->buffer[lex->first];
- outp = out;
- ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
- &outp, &out_count);
- lex->first += in_save - inbytesleft;
-
- if (out_count == 0)
- {
- /* Success. We assume that UCS-2 is big-endian. This
- appears to be an ok assumption. */
- unicode_t result;
- result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
- return result;
- }
-
- if (ir == (size_t) -1)
- {
- if (errno == EINVAL)
+ if (inbytesleft == 0)
{
- /* This is ok. This means that the end of our buffer
- is in the middle of a character sequence. We just
- move the valid part of the buffer to the beginning
- to force a read. */
- /* We use bcopy() because it should work for
- overlapping strings. Use memmove() instead... */
- bcopy (&lex->buffer[lex->first], &lex->buffer[0],
- lex->last - lex->first);
- lex->last -= lex->first;
- lex->first = 0;
+ /* We've tried to read and there is nothing left. */
+ return UEOF;
}
- else
+
+ in_save = inbytesleft;
+ out_save = out_count;
+ inp = &lex->buffer[lex->first];
+ outp = &lex->out_buffer[lex->out_last];
+ ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+ &outp, &out_count);
+ lex->first += in_save - inbytesleft;
+ lex->out_last += out_save - out_count;
+
+ /* If we converted anything at all, move along. */
+ if (out_count != out_save)
+ break;
+
+ if (ir == (size_t) -1)
{
- /* A more serious error. */
- java_lex_error ("unrecognized character in input stream", 0);
- return UEOF;
+ if (errno == EINVAL)
+ {
+ /* This is ok. This means that the end of our buffer
+ is in the middle of a character sequence. We just
+ move the valid part of the buffer to the beginning
+ to force a read. */
+ /* We use bcopy() because it should work for
+ overlapping strings. Use memmove() instead... */
+ bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+ lex->last - lex->first);
+ lex->last -= lex->first;
+ lex->first = 0;
+ }
+ else
+ {
+ /* A more serious error. */
+ java_lex_error ("unrecognized character in input stream",
+ 0);
+ return UEOF;
+ }
}
}
}
+
+ if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+ {
+ /* Don't have any data. */
+ return UEOF;
+ }
+
+ /* Success. We assume that UCS-2 is big-endian. This appears to
+ be an ok assumption. */
+ result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
+ | (unsigned char) lex->out_buffer[lex->out_first + 1]);
+ lex->out_first += 2;
+ return result;
}
#else /* HAVE_ICONV */
{
diff --git a/gcc/java/lex.h b/gcc/java/lex.h
index 68cb1c0..71a030d 100644
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -128,6 +128,20 @@ typedef struct java_lexer
/* Index of last valid character in buffer, plus one. -1 if no
valid characters in buffer. */
int last;
+
+ /* This is a buffer of characters already converted by iconv. We
+ use `char' here because we're assuming that iconv() converts to
+ big-endian UCS-2, and then we convert it ourselves. */
+ char out_buffer[1024];
+
+ /* Index of first valid output character. -1 if no valid
+ characters. */
+ int out_first;
+
+ /* Index of last valid output character, plus one. -1 if no valid
+ characters. */
+ int out_last;
+
#endif /* HAVE_ICONV */
} java_lexer;