diff options
author | Edward Smith-Rowland <3dw4rd@verizon.net> | 2015-06-30 12:58:48 +0000 |
---|---|---|
committer | Edward Smith-Rowland <emsr@gcc.gnu.org> | 2015-06-30 12:58:48 +0000 |
commit | fe95b0366a02855988d4c626de2045dae6e11502 (patch) | |
tree | 8b22f058594c24d03fcb8f25908876f4764a4e03 /gcc | |
parent | 5b00d9211625c18148d3bacdc53c9f527557d063 (diff) | |
download | gcc-fe95b0366a02855988d4c626de2045dae6e11502.zip gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.gz gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.bz2 |
Implement N4197 - Adding u8 character literals
libcpp:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens;
(struct cpp_options): Add utf8_char_literals.
* init.c (struct lang_flags): Add utf8_char_literals;
(struct lang_flags lang_defaults): Add column for utf8_char_literals.
* macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
* expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
(cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
(eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token.
* lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens.
* charset.c (converter_for_type(), cpp_interpret_charconst()):
Treat CPP_UTF8CHAR token.
gcc/c-family:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR
like CPP_CHAR.
* c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR
and CPP_UTF8CHAR_USERDEF tokens.
* c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF
and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token.
gcc/cp:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR
and CPP_UTF8CHAR_USERDEF tokens;
(cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token.
gcc/testsuite:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* g++.dg/cpp1z/utf8.C: New.
* g++.dg/cpp1z/utf8-neg.C: New.
* g++.dg/cpp1z/udlit-utf8char.C: New.
From-SVN: r225185
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/c-family/ChangeLog | 10 | ||||
-rw-r--r-- | gcc/c-family/c-ada-spec.c | 1 | ||||
-rw-r--r-- | gcc/c-family/c-common.c | 9 | ||||
-rw-r--r-- | gcc/c-family/c-lex.c | 4 | ||||
-rw-r--r-- | gcc/cp/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/cp/parser.c | 3 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C | 8 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp1z/utf8-neg.C | 7 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp1z/utf8.C | 15 |
10 files changed, 69 insertions, 2 deletions
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 0e03e7e..66ae6a8 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,13 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR + like CPP_CHAR. + * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR + and CPP_UTF8CHAR_USERDEF tokens. + * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF + and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token. + 2015-06-29 Manuel López-Ibáñez <manu@gcc.gnu.org> PR fortran/66605 diff --git a/gcc/c-family/c-ada-spec.c b/gcc/c-family/c-ada-spec.c index ef3c5e3..41d612f 100644 --- a/gcc/c-family/c-ada-spec.c +++ b/gcc/c-family/c-ada-spec.c @@ -249,6 +249,7 @@ print_ada_macros (pretty_printer *pp, cpp_hashnode **macros, int max_ada_macros) case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_NAME: case CPP_STRING: case CPP_NUMBER: diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index ac42e4a..835fc2c 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -10256,7 +10256,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, else if (token_type == CPP_CHAR || token_type == CPP_WCHAR || token_type == CPP_CHAR16 - || token_type == CPP_CHAR32) + || token_type == CPP_CHAR32 + || token_type == CPP_UTF8CHAR) { unsigned int val = TREE_INT_CST_LOW (value); const char *prefix; @@ -10275,6 +10276,9 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, case CPP_CHAR32: prefix = "U"; break; + case CPP_UTF8CHAR: + prefix = "u8"; + break; } if (val <= UCHAR_MAX && ISGRAPH (val)) @@ -10289,7 +10293,8 @@ c_parse_error (const char *gmsgid, enum cpp_ttype token_type, else if (token_type == CPP_CHAR_USERDEF || token_type == CPP_WCHAR_USERDEF || token_type == CPP_CHAR16_USERDEF - || token_type == CPP_CHAR32_USERDEF) + || token_type == CPP_CHAR32_USERDEF + || token_type == CPP_UTF8CHAR_USERDEF) message = catenate_messages (gmsgid, " before user-defined character literal"); else if (token_type == CPP_STRING_USERDEF diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c index 9c56832..aa5d6e2 100644 --- a/gcc/c-family/c-lex.c +++ b/gcc/c-family/c-lex.c @@ -536,6 +536,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WCHAR_USERDEF: case CPP_CHAR16_USERDEF: case CPP_CHAR32_USERDEF: + case CPP_UTF8CHAR_USERDEF: { tree literal; cpp_token temp_tok = *tok; @@ -553,6 +554,7 @@ c_lex_with_flags (tree *value, location_t *loc, unsigned char *cpp_flags, case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: *value = lex_charconst (tok); break; @@ -1250,6 +1252,8 @@ lex_charconst (const cpp_token *token) type = char32_type_node; else if (token->type == CPP_CHAR16) type = char16_type_node; + else if (token->type == CPP_UTF8CHAR) + type = char_type_node; /* In C, a character constant has type 'int'. In C++ 'char', but multi-char charconsts have type 'int'. */ else if (!c_dialect_cxx () || chars_seen > 1) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 843b7ed..04fb007 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,10 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR + and CPP_UTF8CHAR_USERDEF tokens; + (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token. + 2015-06-29 Paolo Carlini <paolo.carlini@oracle.com> PR c++/65977 diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c index acf147c..bb3d636 100644 --- a/gcc/cp/parser.c +++ b/gcc/cp/parser.c @@ -4284,6 +4284,7 @@ cp_parser_primary_expression (cp_parser *parser, case CPP_CHAR16: case CPP_CHAR32: case CPP_WCHAR: + case CPP_UTF8CHAR: case CPP_NUMBER: case CPP_PREPARSED_EXPR: if (TREE_CODE (token->u.value) == USERDEF_LITERAL) @@ -4345,6 +4346,7 @@ cp_parser_primary_expression (cp_parser *parser, case CPP_CHAR16_USERDEF: case CPP_CHAR32_USERDEF: case CPP_WCHAR_USERDEF: + case CPP_UTF8CHAR_USERDEF: return cp_parser_userdef_char_literal (parser); case CPP_STRING: @@ -6887,6 +6889,7 @@ cp_parser_parenthesized_expression_list (cp_parser* parser, case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: /* If a parameter is literal zero alone, remember it for -Wmemset-transposed-args warning. */ if (integer_zerop (tok->u.value) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f14bbb1..f766b0d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * g++.dg/cpp1z/utf8.C: New. + * g++.dg/cpp1z/utf8-neg.C: New. + * g++.dg/cpp1z/udlit-utf8char.C: New. + 2015-06-30 Marek Polacek <polacek@redhat.com> * gcc.dg/fold-ior-2.c (fn4): Swap operands. diff --git a/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C new file mode 100644 index 0000000..fb9cdf18 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/udlit-utf8char.C @@ -0,0 +1,8 @@ +// { dg-do compile } +// { dg-options "-std=c++1z" } + +constexpr int +operator""_foo(char c) +{ return c * 100; } + +auto cc = u8'8'_foo; diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C new file mode 100644 index 0000000..339f0e3 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/utf8-neg.C @@ -0,0 +1,7 @@ +/* { dg-do compile } */ +/* { dg-options "-std=c++1z" } */ + +const static char c0 = u8''; // { dg-error "empty character" } +const static char c1 = u8'ab'; // { dg-warning "multi-character character constant" } +const static char c2 = u8'\u0124'; // { dg-warning "multi-character character constant" } +const static char c3 = u8'\U00064321'; // { dg-warning "multi-character character constant" } diff --git a/gcc/testsuite/g++.dg/cpp1z/utf8.C b/gcc/testsuite/g++.dg/cpp1z/utf8.C new file mode 100644 index 0000000..52816f8 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp1z/utf8.C @@ -0,0 +1,15 @@ +// { dg-do compile } +// { dg-options "-std=c++1z" } + +#include <cassert> +#include <experimental/type_traits> + +auto c = 'c'; +auto u8c = u8'c'; + +static_assert(std::experimental::is_same_v<decltype(u8c), decltype(c)>, ""); + +auto u8s = u8"c"; +auto x = u8s[0]; + +static_assert(std::experimental::is_same_v<decltype(u8c), decltype(x)>, ""); |