diff options
author | Tom Tromey <tromey@redhat.com> | 2001-06-20 16:21:24 +0000 |
---|---|---|
committer | Tom Tromey <tromey@gcc.gnu.org> | 2001-06-20 16:21:24 +0000 |
commit | c01b7cdf97e69255dd4a5dddda782ba29a32b3d1 (patch) | |
tree | 532ce701f09afb858ec0ca8628468260e7cbbb80 /gcc/java | |
parent | 5d291213315a18afad4054e4cdcfdd303ed42fd2 (diff) | |
download | gcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.zip gcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.tar.gz gcc-c01b7cdf97e69255dd4a5dddda782ba29a32b3d1.tar.bz2 |
re PR java/2319 (invalid UTF-8 sequences should be rejected)
* lex.c (java_read_char): Disallow invalid and overlong
sequences. Fixes PR java/2319.
From-SVN: r43475
Diffstat (limited to 'gcc/java')
-rw-r--r-- | gcc/java/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/java/lex.c | 33 |
2 files changed, 28 insertions, 10 deletions
diff --git a/gcc/java/ChangeLog b/gcc/java/ChangeLog index f55ad6c..96e1ba5 100644 --- a/gcc/java/ChangeLog +++ b/gcc/java/ChangeLog @@ -1,3 +1,8 @@ +2001-06-19 Tom Tromey <tromey@redhat.com> + + * lex.c (java_read_char): Disallow invalid and overlong + sequences. Fixes PR java/2319. + 2001-06-05 Jeff Sturm <jsturm@one-point.com> * decl.c (create_primitive_vtable): Don't call make_decl_rtl. diff --git a/gcc/java/lex.c b/gcc/java/lex.c index 28a73e3..35cd317 100644 --- a/gcc/java/lex.c +++ b/gcc/java/lex.c @@ -454,15 +454,21 @@ java_read_char (lex) if (c == EOF) return UEOF; if (c < 128) - return (unicode_t)c; + return (unicode_t) c; else { if ((c & 0xe0) == 0xc0) { c1 = getc (lex->finput); if ((c1 & 0xc0) == 0x80) - return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f)); - c = c1; + { + unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f)); + /* Check for valid 2-byte characters. We explicitly + allow \0 because this encoding is common in the + Java world. */ + if (r == 0 || (r >= 0x80 && r <= 0x7ff)) + return r; + } } else if ((c & 0xf0) == 0xe0) { @@ -471,16 +477,23 @@ java_read_char (lex) { c2 = getc (lex->finput); if ((c2 & 0xc0) == 0x80) - return (unicode_t)(((c & 0xf) << 12) + - (( c1 & 0x3f) << 6) + (c2 & 0x3f)); - else - c = c2; + { + unicode_t r = (unicode_t)(((c & 0xf) << 12) + + (( c1 & 0x3f) << 6) + + (c2 & 0x3f)); + /* Check for valid 3-byte characters. + Don't allow surrogate, \ufffe or \uffff. */ + if (r >= 0x800 && r <= 0xffff + && ! (r >= 0xd800 && r <= 0xdfff) + && r != 0xfffe && r != 0xffff) + return r; + } } - else - c = c1; } - /* We simply don't support invalid characters. */ + /* We simply don't support invalid characters. We also + don't support 4-, 5-, or 6-byte UTF-8 sequences, as these + cannot be valid Java characters. */ java_lex_error ("malformed UTF-8 character", 0); } } |