aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Tromey <tromey@redhat.com>2001-06-20 16:21:24 +0000
committerTom Tromey <tromey@gcc.gnu.org>2001-06-20 16:21:24 +0000
commitc01b7cdf97e69255dd4a5dddda782ba29a32b3d1 (patch)
tree532ce701f09afb858ec0ca8628468260e7cbbb80
parent5d291213315a18afad4054e4cdcfdd303ed42fd2 (diff)
downloadgcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.zip
gcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.tar.gz
gcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.tar.bz2
re PR java/2319 (invalid UTF-8 sequences should be rejected)
* lex.c (java_read_char): Disallow invalid and overlong sequences. Fixes PR java/2319. From-SVN: r43475
-rw-r--r--gcc/java/ChangeLog5
-rw-r--r--gcc/java/lex.c33
2 files changed, 28 insertions, 10 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog
index f55ad6c..96e1ba5 100644
--- a/gcc/java/ChangeLog
+++ b/gcc/java/ChangeLog
@@ -1,3 +1,8 @@
+2001-06-19 Tom Tromey <tromey@redhat.com>
+
+ * lex.c (java_read_char): Disallow invalid and overlong
+ sequences. Fixes PR java/2319.
+
2001-06-05 Jeff Sturm <jsturm@one-point.com>
* decl.c (create_primitive_vtable): Don't call make_decl_rtl.
diff --git a/gcc/java/lex.c b/gcc/java/lex.c
index 28a73e3..35cd317 100644
--- a/gcc/java/lex.c
+++ b/gcc/java/lex.c
@@ -454,15 +454,21 @@ java_read_char (lex)
if (c == EOF)
return UEOF;
if (c < 128)
- return (unicode_t)c;
+ return (unicode_t) c;
else
{
if ((c & 0xe0) == 0xc0)
{
c1 = getc (lex->finput);
if ((c1 & 0xc0) == 0x80)
- return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
- c = c1;
+ {
+ unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
+ /* Check for valid 2-byte characters. We explicitly
+ allow \0 because this encoding is common in the
+ Java world. */
+ if (r == 0 || (r >= 0x80 && r <= 0x7ff))
+ return r;
+ }
}
else if ((c & 0xf0) == 0xe0)
{
@@ -471,16 +477,23 @@ java_read_char (lex)
{
c2 = getc (lex->finput);
if ((c2 & 0xc0) == 0x80)
- return (unicode_t)(((c & 0xf) << 12) +
- (( c1 & 0x3f) << 6) + (c2 & 0x3f));
- else
- c = c2;
+ {
+ unicode_t r = (unicode_t)(((c & 0xf) << 12) +
+ (( c1 & 0x3f) << 6)
+ + (c2 & 0x3f));
+ /* Check for valid 3-byte characters.
+ Don't allow surrogate, \ufffe or \uffff. */
+ if (r >= 0x800 && r <= 0xffff
+ && ! (r >= 0xd800 && r <= 0xdfff)
+ && r != 0xfffe && r != 0xffff)
+ return r;
+ }
}
- else
- c = c1;
}
- /* We simply don't support invalid characters. */
+ /* We simply don't support invalid characters. We also
+ don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
+ cannot be valid Java characters. */
java_lex_error ("malformed UTF-8 character", 0);
}
}