aboutsummaryrefslogtreecommitdiff
path: root/gcc/java/lex.c
diff options
context:
space:
mode:
authorPer Bothner <bothner@gcc.gnu.org>1999-03-13 11:21:38 -0800
committerPer Bothner <bothner@gcc.gnu.org>1999-03-13 11:21:38 -0800
commit45ec036eed0e74556fa7c8362469405d6f752898 (patch)
treeae957d164b86f4fe6639f911c2afd8096aa25bf0 /gcc/java/lex.c
parentb452ec852d0c8200297e6fce7205f3090aa8a01c (diff)
downloadgcc-45ec036eed0e74556fa7c8362469405d6f752898.zip
gcc-45ec036eed0e74556fa7c8362469405d6f752898.tar.gz
gcc-45ec036eed0e74556fa7c8362469405d6f752898.tar.bz2
lex.c (java_read_char): UNGET invalid non-initial utf8 character.
h * lex.c (java_read_char): UNGET invalid non-initial utf8 character. * lex.h (UNGETC): Change misleading macro. From-SVN: r25753
Diffstat (limited to 'gcc/java/lex.c')
-rw-r--r--gcc/java/lex.c13
1 files changed, 12 insertions, 1 deletions
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index 8c40d47..54708ad 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -227,6 +227,7 @@ java_read_char ()
c1 = GETC ();
if ((c1 & 0xc0) == 0x80)
return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
+ c = c1;
}
else if ((c & 0xf0) == 0xe0)
{
@@ -237,8 +238,18 @@ java_read_char ()
if ((c2 & 0xc0) == 0x80)
return (unicode_t)(((c & 0xf) << 12) +
(( c1 & 0x3f) << 6) + (c2 & 0x3f));
+ else
+ c = c2;
}
+ else
+ c = c1;
}
+ /* We looked for a UTF8 multi-byte sequence (since we saw an initial
+ byte with the high bit set), but found invalid bytes instead.
+ If the most recent byte was Ascii (and not EOF), we should
+ unget it, in case it was a comment terminator or other delimitor. */
+ if ((c & 0x80) == 0)
+ UNGETC (c);
return BAD_UTF8_VALUE;
}
}
@@ -308,7 +319,7 @@ java_read_unicode (term_context, unicode_escape_p)
return (term_context ? unicode :
(java_lineterminator (c) ? '\n' : unicode));
}
- UNGETC (c);
+ ctxp->unget_utf8_value = c;
}
return (unicode_t)'\\';
}