aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Carlini <paolo@gcc.gnu.org>2015-07-02 18:54:41 +0000
committerPaolo Carlini <paolo@gcc.gnu.org>2015-07-02 18:54:41 +0000
commitfbb22910cfa4e4567b46fc8b74ccfad92fa745d8 (patch)
tree2aac293a422c002719640bf2767fde8c557ab77b
parenta05d02b293b299352b9523875e96bf697f96baf4 (diff)
downloadgcc-fbb22910cfa4e4567b46fc8b74ccfad92fa745d8.zip
gcc-fbb22910cfa4e4567b46fc8b74ccfad92fa745d8.tar.gz
gcc-fbb22910cfa4e4567b46fc8b74ccfad92fa745d8.tar.bz2
re PR preprocessor/53690 ([C++11] \u0000 and \U00000000 are wrongly encoded as U+0001.)
/libcpp 2015-07-02 Paolo Carlini <paolo.carlini@oracle.com> PR c++/53690 * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change return type to bool. Fix encoding of \u0000 and \U00000000 in C++. (convert_ucn): Adjust call. * lex.c (forms_identifier_p): Likewise. * internal.h (_cpp_valid_ucn): Adjust declaration. /gcc/testsuite 2015-07-02 Paolo Carlini <paolo.carlini@oracle.com> PR c++/53690 * g++.dg/cpp/pr53690.C: New. From-SVN: r225353
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/g++.dg/cpp/pr53690.C7
-rw-r--r--libcpp/ChangeLog11
-rw-r--r--libcpp/charset.c30
-rw-r--r--libcpp/internal.h7
-rw-r--r--libcpp/lex.c3
6 files changed, 43 insertions, 20 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8d7005f..530f8fc 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
+
+ PR c++/53690
+ * g++.dg/cpp/pr53690.C: New.
+
2015-07-02 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/vec-cmp.c: New test.
diff --git a/gcc/testsuite/g++.dg/cpp/pr53690.C b/gcc/testsuite/g++.dg/cpp/pr53690.C
new file mode 100644
index 0000000..ea91359
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/pr53690.C
@@ -0,0 +1,7 @@
+// PR c++/53690
+// { dg-do compile { target c++11 } }
+
+int array1[U'\U00000000' == 0 ? 1 : -1];
+int array2[U'\u0000' == 0 ? 1 : -1];
+int array3[u'\U00000000' == 0 ? 1 : -1];
+int array4[u'\u0000' == 0 ? 1 : -1];
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index 4e29802..ab259c5 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,12 @@
+2015-07-02 Paolo Carlini <paolo.carlini@oracle.com>
+
+ PR c++/53690
+ * charset.c (_cpp_valid_ucn): Add cppchar_t * parameter and change
+ return type to bool. Fix encoding of \u0000 and \U00000000 in C++.
+ (convert_ucn): Adjust call.
+ * lex.c (forms_identifier_p): Likewise.
+ * internal.h (_cpp_valid_ucn): Adjust declaration.
+
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
@@ -5,7 +14,7 @@
(struct cpp_options): Add utf8_char_literals.
* init.c (struct lang_flags): Add utf8_char_literals;
(struct lang_flags lang_defaults): Add column for utf8_char_literals.
- * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
+ * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
* expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
(cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 8e92bc6..5a1c929 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -972,21 +972,20 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
or 0060 (`), nor one in the range D800 through DFFF inclusive.
*PSTR must be preceded by "\u" or "\U"; it is assumed that the
- buffer end is delimited by a non-hex digit. Returns zero if the
- UCN has not been consumed.
+ buffer end is delimited by a non-hex digit. Returns false if the
+ UCN has not been consumed, true otherwise.
- Otherwise the nonzero value of the UCN, whether valid or invalid,
- is returned. Diagnostics are emitted for invalid values. PSTR
- is updated to point one beyond the UCN, or to the syntactically
- invalid character.
+ The value of the UCN, whether valid or invalid, is returned in *CP.
+ Diagnostics are emitted for invalid values. PSTR is updated to point
+ one beyond the UCN, or to the syntactically invalid character.
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
an identifier, or 2 otherwise. */
-cppchar_t
+bool
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
const uchar *limit, int identifier_pos,
- struct normalize_state *nst)
+ struct normalize_state *nst, cppchar_t *cp)
{
cppchar_t result, c;
unsigned int length;
@@ -1030,8 +1029,11 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
multiple tokens in identifiers, so we can't give a helpful
error message in that case. */
if (length && identifier_pos)
- return 0;
-
+ {
+ *cp = 0;
+ return false;
+ }
+
*pstr = str;
if (length)
{
@@ -1079,10 +1081,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
(int) (str - base), base);
}
- if (result == 0)
- result = 1;
-
- return result;
+ *cp = result;
+ return true;
}
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
@@ -1100,7 +1100,7 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
from++; /* Skip u/U. */
- ucn = _cpp_valid_ucn (pfile, &from, limit, 0, &nst);
+ _cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
if (rval)
diff --git a/libcpp/internal.h b/libcpp/internal.h
index 95cf9c2..abd464f 100644
--- a/libcpp/internal.h
+++ b/libcpp/internal.h
@@ -744,9 +744,10 @@ struct normalize_state
#define NORMALIZE_STATE_UPDATE_IDNUM(st, c) \
((st)->previous = (c), (st)->prev_class = 0)
-extern cppchar_t _cpp_valid_ucn (cpp_reader *, const unsigned char **,
- const unsigned char *, int,
- struct normalize_state *state);
+extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
+ const unsigned char *, int,
+ struct normalize_state *state,
+ cppchar_t *);
extern void _cpp_destroy_iconv (cpp_reader *);
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
unsigned char *, size_t, size_t,
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 8f2bdc8..0aa1090 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1244,9 +1244,10 @@ forms_identifier_p (cpp_reader *pfile, int first,
&& *buffer->cur == '\\'
&& (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
{
+ cppchar_t s;
buffer->cur += 2;
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
- state))
+ state, &s))
return true;
buffer->cur -= 2;
}