aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorEdward Smith-Rowland <3dw4rd@verizon.net>2015-06-30 12:58:48 +0000
committerEdward Smith-Rowland <emsr@gcc.gnu.org>2015-06-30 12:58:48 +0000
commitfe95b0366a02855988d4c626de2045dae6e11502 (patch)
tree8b22f058594c24d03fcb8f25908876f4764a4e03 /gcc
parent5b00d9211625c18148d3bacdc53c9f527557d063 (diff)
downloadgcc-fe95b0366a02855988d4c626de2045dae6e11502.zip
gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.gz
gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.bz2
Implement N4197 - Adding u8 character literals
libcpp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens; (struct cpp_options): Add utf8_char_literals. * init.c (struct lang_flags): Add utf8_char_literals; (struct lang_flags lang_defaults): Add column for utf8_char_literals. * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token. * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens. * charset.c (converter_for_type(), cpp_interpret_charconst()): Treat CPP_UTF8CHAR token. gcc/c-family: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR like CPP_CHAR. * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens. * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token. gcc/cp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens; (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token. gcc/testsuite: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * g++.dg/cpp1z/utf8.C: New. * g++.dg/cpp1z/utf8-neg.C: New. * g++.dg/cpp1z/udlit-utf8char.C: New. From-SVN: r225185
Diffstat (limited to 'gcc')
-rw-r--r--gcc/c-family/ChangeLog10
-rw-r--r--gcc/c-family/c-ada-spec.c1
-rw-r--r--gcc/c-family/c-common.c9
-rw-r--r--gcc/c-family/c-lex.c4
-rw-r--r--gcc/cp/ChangeLog7
-rw-r--r--gcc/cp/parser.c3
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C8
-rw-r--r--gcc/testsuite/g++.dg/cpp1z/utf8-neg.C7
-rw-r--r--gcc/testsuite/g++.dg/cpp1z/utf8.C15
10 files changed, 69 insertions, 2 deletions
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 0e03e7e..66ae6a8 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,13 @@
+2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
+
+ Implement N4197 - Adding u8 character literals
+ * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR
+ like CPP_CHAR.
+ * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR
+ and CPP_UTF8CHAR_USERDEF tokens.
+ * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF
+ and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token.
+
2015-06-29 Manuel López-Ibáñez <manu@gcc.gnu.org>
PR fortran/66605
diff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c
index ef3c5e3..41d612f 100644
--- a/gcc/c-family/c-ada-spec.c
+++ b/gcc/c-family/c-ada-spec.c
@@ -249,6 +249,7 @@ print_ada_macros (pretty_printer *pp, cpp_hashnode **macros, int max_ada_macros)
case CPP_WCHAR:
case CPP_CHAR16:
case CPP_CHAR32:
+ case CPP_UTF8CHAR:
case CPP_NAME:
case CPP_STRING:
case CPP_NUMBER:
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index ac42e4a..835fc2c 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -10256,7 +10256,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
else if (token_type == CPP_CHAR
|| token_type == CPP_WCHAR
|| token_type == CPP_CHAR16
- || token_type == CPP_CHAR32)
+ || token_type == CPP_CHAR32
+ || token_type == CPP_UTF8CHAR)
{
unsigned int val = TREE_INT_CST_LOW (value);
const char *prefix;
@@ -10275,6 +10276,9 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
case CPP_CHAR32:
prefix = "U";
break;
+ case CPP_UTF8CHAR:
+ prefix = "u8";
+ break;
}
if (val <= UCHAR_MAX && ISGRAPH (val))
@@ -10289,7 +10293,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type,
else if (token_type == CPP_CHAR_USERDEF
|| token_type == CPP_WCHAR_USERDEF
|| token_type == CPP_CHAR16_USERDEF
- || token_type == CPP_CHAR32_USERDEF)
+ || token_type == CPP_CHAR32_USERDEF
+ || token_type == CPP_UTF8CHAR_USERDEF)
message = catenate_messages (gmsgid,
" before user-defined character literal");
else if (token_type == CPP_STRING_USERDEF
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index 9c56832..aa5d6e2 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -536,6 +536,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WCHAR_USERDEF:
case CPP_CHAR16_USERDEF:
case CPP_CHAR32_USERDEF:
+ case CPP_UTF8CHAR_USERDEF:
{
tree literal;
cpp_token temp_tok = *tok;
@@ -553,6 +554,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags,
case CPP_WCHAR:
case CPP_CHAR16:
case CPP_CHAR32:
+ case CPP_UTF8CHAR:
*value = lex_charconst (tok);
break;
@@ -1250,6 +1252,8 @@ lex_charconst (const cpp_token *token)
type = char32_type_node;
else if (token->type == CPP_CHAR16)
type = char16_type_node;
+ else if (token->type == CPP_UTF8CHAR)
+ type = char_type_node;
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
else if (!c_dialect_cxx () || chars_seen > 1)
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 843b7ed..04fb007 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,10 @@
+2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
+
+ Implement N4197 - Adding u8 character literals
+ * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR
+ and CPP_UTF8CHAR_USERDEF tokens;
+ (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token.
+
2015-06-29 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/65977
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index acf147c..bb3d636 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -4284,6 +4284,7 @@ cp_parser_primary_expression (cp_parser *parser,
case CPP_CHAR16:
case CPP_CHAR32:
case CPP_WCHAR:
+ case CPP_UTF8CHAR:
case CPP_NUMBER:
case CPP_PREPARSED_EXPR:
if (TREE_CODE (token->u.value) == USERDEF_LITERAL)
@@ -4345,6 +4346,7 @@ cp_parser_primary_expression (cp_parser *parser,
case CPP_CHAR16_USERDEF:
case CPP_CHAR32_USERDEF:
case CPP_WCHAR_USERDEF:
+ case CPP_UTF8CHAR_USERDEF:
return cp_parser_userdef_char_literal (parser);
case CPP_STRING:
@@ -6887,6 +6889,7 @@ cp_parser_parenthesized_expression_list (cp_parser* parser,
case CPP_WCHAR:
case CPP_CHAR16:
case CPP_CHAR32:
+ case CPP_UTF8CHAR:
/* If a parameter is literal zero alone, remember it
for -Wmemset-transposed-args warning. */
if (integer_zerop (tok->u.value)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f14bbb1..f766b0d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
+
+ Implement N4197 - Adding u8 character literals
+ * g++.dg/cpp1z/utf8.C: New.
+ * g++.dg/cpp1z/utf8-neg.C: New.
+ * g++.dg/cpp1z/udlit-utf8char.C: New.
+
2015-06-30 Marek Polacek <polacek@redhat.com>
* gcc.dg/fold-ior-2.c (fn4): Swap operands.
diff --git a/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C
new file mode 100644
index 0000000..fb9cdf18
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C
@@ -0,0 +1,8 @@
+// { dg-do compile }
+// { dg-options "-std=c++1z" }
+
+constexpr int
+operator""_foo(char c)
+{ return c * 100; }
+
+auto cc = u8'8'_foo;
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
new file mode 100644
index 0000000..339f0e3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++1z" } */
+
+const static char c0 = u8''; // { dg-error "empty character" }
+const static char c1 = u8'ab'; // { dg-warning "multi-character character constant" }
+const static char c2 = u8'\u0124'; // { dg-warning "multi-character character constant" }
+const static char c3 = u8'\U00064321'; // { dg-warning "multi-character character constant" }
diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8.C b/gcc/testsuite/g++.dg/cpp1z/utf8.C
new file mode 100644
index 0000000..52816f8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/utf8.C
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-options "-std=c++1z" }
+
+#include <cassert>
+#include <experimental/type_traits>
+
+auto c = 'c';
+auto u8c = u8'c';
+
+static_assert(std::experimental::is_same_v<decltype(u8c), decltype(c)>, "");
+
+auto u8s = u8"c";
+auto x = u8s[0];
+
+static_assert(std::experimental::is_same_v<decltype(u8c), decltype(x)>, "");