lex.c (java_new_lexer): Initialize out_first and out_last fields.

* lex.c (java_new_lexer): Initialize out_first and out_last fields. * lex.h (java_lexer): Added out_buffer, out_first, out_last. From-SVN: r36830
author: Tom Tromey <tromey@cygnus.com> 2000-10-10 22:02:12 +0000
committer: Tom Tromey <tromey@gcc.gnu.org> 2000-10-10 22:02:12 +0000
commit: 2e165c53deb61806019c4a72f006abaa1c2379b3 (patch)
tree: 47943869eedfa993a7aba6e2b6e0474e725d44bd /gcc/java
parent: 67e570ea48ad148b9f2d6a4c27f372c4c7532768 (diff)
download: gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.zip
gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.gz
gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.bz2
3 files changed, 101 insertions, 61 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog
index b63d751..99fa854 100644
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,3 +1,9 @@
+2000-10-10  Tom Tromey  <tromey@cygnus.com>
+
+	* lex.c (java_new_lexer): Initialize out_first and out_last
+	fields.
+	* lex.h (java_lexer): Added out_buffer, out_first, out_last.
+
 2000-10-07  Alexandre Petit-Bianco  <apbianco@cygnus.com>
 
 	Patch contributed by Corey Minyard.
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index be1b139..8fb6a15 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -219,6 +219,8 @@ java_new_lexer (finput, encoding)
     }
   lex->first = -1;
   lex->last = -1;
+  lex->out_first = -1;
+  lex->out_last = -1;
 #else /* HAVE_ICONV */
   if (strcmp (encoding, DEFAULT_ENCODING))
     enc_error = 1;
@@ -253,81 +255,99 @@ java_read_char (lex)
 
 #ifdef HAVE_ICONV
   {
-    char out[2];
-    size_t ir, inbytesleft, in_save, out_count;
+    size_t ir, inbytesleft, in_save, out_count, out_save;
     char *inp, *outp;
+    unicode_t result;
 
-    while (1)
+    /* If there is data which has already been converted, use it.  */
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
       {
-	/* See if we need to read more data.  If FIRST == 0 then the
-	   previous conversion attempt ended in the middle of a
-	   character at the end of the buffer.  Otherwise we only have
-	   to read if the buffer is empty.  */
-	if (lex->first == 0 || lex->first >= lex->last)
-	  {
-	    int r;
+	lex->out_first = 0;
+	lex->out_last = 0;
 
-	    if (lex->first >= lex->last)
+	while (1)
+	  {
+	    /* See if we need to read more data.  If FIRST == 0 then
+	       the previous conversion attempt ended in the middle of
+	       a character at the end of the buffer.  Otherwise we
+	       only have to read if the buffer is empty.  */
+	    if (lex->first == 0 || lex->first >= lex->last)
 	      {
-		lex->first = 0;
-		lex->last = 0;
+		int r;
+
+		if (lex->first >= lex->last)
+		  {
+		    lex->first = 0;
+		    lex->last = 0;
+		  }
+		if (feof (lex->finput))
+		  return UEOF;
+		r = fread (&lex->buffer[lex->last], 1,
+			   sizeof (lex->buffer) - lex->last,
+			   lex->finput);
+		lex->last += r;
 	      }
-	    if (feof (lex->finput))
-	      return UEOF;
-	    r = fread (&lex->buffer[lex->last], 1,
-		       sizeof (lex->buffer) - lex->last,
-		       lex->finput);
-	    lex->last += r;
-	  }
 
-	inbytesleft = lex->last - lex->first;
-
-	if (inbytesleft == 0)
-	  {
-	    /* We've tried to read and there is nothing left.  */
-	    return UEOF;
-	  }
+	    inbytesleft = lex->last - lex->first;
+	    out_count = sizeof (lex->out_buffer) - lex->out_last;
 
-	in_save = inbytesleft;
-	out_count = 2;
-	inp = &lex->buffer[lex->first];
-	outp = out;
-	ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
-		    &outp, &out_count);
-	lex->first += in_save - inbytesleft;
-
-	if (out_count == 0)
-	  {
-	    /* Success.  We assume that UCS-2 is big-endian.  This
-	       appears to be an ok assumption.  */
-	    unicode_t result;
-	    result = (((unsigned char) out[0]) << 8) | (unsigned char) out[1];
-	    return result;
-	  }
-
-	if (ir == (size_t) -1)
-	  {
-	    if (errno == EINVAL)
+	    if (inbytesleft == 0)
 	      {
-		/* This is ok.  This means that the end of our buffer
-		   is in the middle of a character sequence.  We just
-		   move the valid part of the buffer to the beginning
-		   to force a read.  */
-		/* We use bcopy() because it should work for
-		   overlapping strings.  Use memmove() instead... */
-		bcopy (&lex->buffer[lex->first], &lex->buffer[0],
-		       lex->last - lex->first);
-		lex->last -= lex->first;
-		lex->first = 0;
+		/* We've tried to read and there is nothing left.  */
+		return UEOF;
 	      }
-	    else
+
+	    in_save = inbytesleft;
+	    out_save = out_count;
+	    inp = &lex->buffer[lex->first];
+	    outp = &lex->out_buffer[lex->out_last];
+	    ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
+			&outp, &out_count);
+	    lex->first += in_save - inbytesleft;
+	    lex->out_last += out_save - out_count;
+
+	    /* If we converted anything at all, move along.  */
+	    if (out_count != out_save)
+	      break;
+
+	    if (ir == (size_t) -1)
 	      {
-		/* A more serious error.  */
-		java_lex_error ("unrecognized character in input stream", 0);
-		return UEOF;
+		if (errno == EINVAL)
+		  {
+		    /* This is ok.  This means that the end of our buffer
+		       is in the middle of a character sequence.  We just
+		       move the valid part of the buffer to the beginning
+		       to force a read.  */
+		    /* We use bcopy() because it should work for
+		       overlapping strings.  Use memmove() instead... */
+		    bcopy (&lex->buffer[lex->first], &lex->buffer[0],
+			   lex->last - lex->first);
+		    lex->last -= lex->first;
+		    lex->first = 0;
+		  }
+		else
+		  {
+		    /* A more serious error.  */
+		    java_lex_error ("unrecognized character in input stream",
+				    0);
+		    return UEOF;
+		  }
 	      }
 	  }
       }
+
+    if (lex->out_first == -1 || lex->out_first >= lex->out_last)
+      {
+	/* Don't have any data.  */
+	return UEOF;
+      }
+
+    /* Success.  We assume that UCS-2 is big-endian.  This appears to
+       be an ok assumption.  */
+    result = ((((unsigned char) lex->out_buffer[lex->out_first]) << 8)
+	      | (unsigned char) lex->out_buffer[lex->out_first + 1]);
+    lex->out_first += 2;
+    return result;
   }
 #else /* HAVE_ICONV */
   {
diff --git a/gcc/java/lex.h b/gcc/java/lex.h
index 68cb1c0..71a030d 100644
--- a/gcc/java/lex.h
+++ b/gcc/java/lex.h
@@ -128,6 +128,20 @@ typedef struct java_lexer
   /* Index of last valid character in buffer, plus one.  -1 if no
      valid characters in buffer.  */
   int last;
+
+  /* This is a buffer of characters already converted by iconv.  We
+     use `char' here because we're assuming that iconv() converts to
+     big-endian UCS-2, and then we convert it ourselves.  */
+  char out_buffer[1024];
+
+  /* Index of first valid output character.  -1 if no valid
+     characters.  */
+  int out_first;
+
+  /* Index of last valid output character, plus one.  -1 if no valid
+     characters.  */
+  int out_last;
+
 #endif /* HAVE_ICONV */
 } java_lexer;
author	Tom Tromey <tromey@cygnus.com>	2000-10-10 22:02:12 +0000
committer	Tom Tromey <tromey@gcc.gnu.org>	2000-10-10 22:02:12 +0000
commit	2e165c53deb61806019c4a72f006abaa1c2379b3 (patch)
tree	47943869eedfa993a7aba6e2b6e0474e725d44bd /gcc/java
parent	67e570ea48ad148b9f2d6a4c27f372c4c7532768 (diff)
download	gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.zip gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.gz gcc-2e165c53deb61806019c4a72f006abaa1c2379b3.tar.bz2