diff options
author | Tom Honermann <tom@honermann.net> | 2019-01-14 19:55:51 +0000 |
---|---|---|
committer | Jason Merrill <jason@gcc.gnu.org> | 2019-01-14 14:55:51 -0500 |
commit | 2d91f79dc990f81dcea89a5087cad566238b2456 (patch) | |
tree | ae9137b811a3a0bdada65dbb8a6ed5dba9d24b0e /gcc/c-family/c-common.c | |
parent | 23db6ced33c245c38c147c31011bbafa392e4328 (diff) | |
download | gcc-2d91f79dc990f81dcea89a5087cad566238b2456.zip gcc-2d91f79dc990f81dcea89a5087cad566238b2456.tar.gz gcc-2d91f79dc990f81dcea89a5087cad566238b2456.tar.bz2 |
Implement P0482R5, char8_t: A type for UTF-8 characters and strings
gcc/cp/
* cvt.c (type_promotes_to): Handle char8_t promotion.
* decl.c (grokdeclarator): Handle invalid type specifier
combinations involving char8_t.
* lex.c (init_reswords): Add char8_t as a reserved word.
* mangle.c (write_builtin_type): Add name mangling for char8_t (Du).
* parser.c (cp_keyword_starts_decl_specifier_p)
(cp_parser_simple_type_specifier): Recognize char8_t as a simple
type specifier.
(cp_parser_string_literal): Use char8_array_type_node for the type
of CPP_UTF8STRING.
(cp_parser_set_decl_spec_type): Tolerate char8_t typedefs in system
headers.
* rtti.c (emit_support_tinfos): type_info support for char8_t.
* tree.c (char_type_p): Recognize char8_t as a character type.
* typeck.c (string_conv_p): Handle conversions of u8 string
literals of char8_t type.
(check_literal_operator_args): Handle UDLs with u8 string literals
of char8_t type.
* typeck2.c (ordinary_char_type_p): New.
(digest_init_r): Disallow initializing a char array with a u8 string
literal.
gcc/c-family/
* c-common.c (c_common_reswords): Add char8_t.
(fix_string_type): Use char8_t for the type of u8 string literals.
(c_common_get_alias_set): char8_t doesn't alias.
(c_common_nodes_and_builtins): Define char8_t as a builtin type in
C++.
(c_stddef_cpp_builtins): Add __CHAR8_TYPE__.
(keyword_begins_type_specifier): Add RID_CHAR8.
* c-common.h (rid): Add RID_CHAR8.
(c_tree_index): Add CTI_CHAR8_TYPE and CTI_CHAR8_ARRAY_TYPE.
Define D_CXX_CHAR8_T and D_CXX_CHAR8_T_FLAGS.
Define char8_type_node and char8_array_type_node.
* c-cppbuiltin.c (cpp_atomic_builtins): Predefine
__GCC_ATOMIC_CHAR8_T_LOCK_FREE.
(c_cpp_builtins): Predefine __cpp_char8_t.
* c-lex.c (lex_string): Use char8_array_type_node as the type of
CPP_UTF8STRING.
(lex_charconst): Use char8_type_node as the type of CPP_UTF8CHAR.
* c-opts.c: If not otherwise specified, enable -fchar8_t when
targeting C++2a.
* c.opt: Add the -fchar8_t command line option.
libiberty/
* cp-demangle.c (cplus_demangle_builtin_types)
(cplus_demangle_type): Add name demangling for char8_t (Du).
* cp-demangle.h: Increase D_BUILTIN_TYPE_COUNT to accommodate the
new char8_t type.
From-SVN: r267923
Diffstat (limited to 'gcc/c-family/c-common.c')
-rw-r--r-- | gcc/c-family/c-common.c | 41 |
1 files changed, 39 insertions, 2 deletions
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c index d2ea384..2a5a8e7 100644 --- a/gcc/c-family/c-common.c +++ b/gcc/c-family/c-common.c @@ -79,6 +79,7 @@ machine_mode c_default_pointer_mode = VOIDmode; tree signed_char_type_node; tree wchar_type_node; + tree char8_type_node; tree char16_type_node; tree char32_type_node; @@ -128,6 +129,11 @@ machine_mode c_default_pointer_mode = VOIDmode; tree wchar_array_type_node; + Type `char8_t[SOMENUMBER]' or something like it. + Used when a UTF-8 string literal is created. + + tree char8_array_type_node; + Type `char16_t[SOMENUMBER]' or something like it. Used when a UTF-16 string literal is created. @@ -452,6 +458,7 @@ const struct c_common_resword c_common_reswords[] = { "case", RID_CASE, 0 }, { "catch", RID_CATCH, D_CXX_OBJC | D_CXXWARN }, { "char", RID_CHAR, 0 }, + { "char8_t", RID_CHAR8, D_CXX_CHAR8_T_FLAGS | D_CXXWARN }, { "char16_t", RID_CHAR16, D_CXXONLY | D_CXX11 | D_CXXWARN }, { "char32_t", RID_CHAR32, D_CXXONLY | D_CXX11 | D_CXXWARN }, { "class", RID_CLASS, D_CXX_OBJC | D_CXXWARN }, @@ -748,6 +755,11 @@ fix_string_type (tree value) charsz = 1; e_type = char_type_node; } + else if (flag_char8_t && TREE_TYPE (value) == char8_array_type_node) + { + charsz = TYPE_PRECISION (char8_type_node) / BITS_PER_UNIT; + e_type = char8_type_node; + } else if (TREE_TYPE (value) == char16_array_type_node) { charsz = TYPE_PRECISION (char16_type_node) / BITS_PER_UNIT; @@ -828,7 +840,8 @@ fix_string_type (tree value) CPP_STRING16, or CPP_STRING32. Return CPP_OTHER in case of error. This may not be exactly the string token type that initially created the string, since CPP_WSTRING is indistinguishable from the 16/32 bit - string type at this point. + string type, and CPP_UTF8STRING is indistinguishable from CPP_STRING + at this point. This effectively reverses part of the logic in lex_string and fix_string_type. */ @@ -3640,8 +3653,12 @@ c_common_get_alias_set (tree t) if (!TYPE_P (t)) return -1; + /* Unlike char, char8_t doesn't alias. */ + if (flag_char8_t && t == char8_type_node) + return -1; + /* The C standard guarantees that any object may be accessed via an - lvalue that has character type. */ + lvalue that has narrow character type (except char8_t). */ if (t == char_type_node || t == signed_char_type_node || t == unsigned_char_type_node) @@ -4050,6 +4067,7 @@ c_get_ident (const char *id) void c_common_nodes_and_builtins (void) { + int char8_type_size; int char16_type_size; int char32_type_size; int wchar_type_size; @@ -4341,6 +4359,22 @@ c_common_nodes_and_builtins (void) wchar_array_type_node = build_array_type (wchar_type_node, array_domain_type); + /* Define 'char8_t'. */ + char8_type_node = get_identifier (CHAR8_TYPE); + char8_type_node = TREE_TYPE (identifier_global_value (char8_type_node)); + char8_type_size = TYPE_PRECISION (char8_type_node); + if (c_dialect_cxx ()) + { + char8_type_node = make_unsigned_type (char8_type_size); + + if (flag_char8_t) + record_builtin_type (RID_CHAR8, "char8_t", char8_type_node); + } + + /* This is for UTF-8 string constants. */ + char8_array_type_node + = build_array_type (char8_type_node, array_domain_type); + /* Define 'char16_t'. */ char16_type_node = get_identifier (CHAR16_TYPE); char16_type_node = TREE_TYPE (identifier_global_value (char16_type_node)); @@ -5138,6 +5172,8 @@ c_stddef_cpp_builtins(void) builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0); builtin_define_with_value ("__INTMAX_TYPE__", INTMAX_TYPE, 0); builtin_define_with_value ("__UINTMAX_TYPE__", UINTMAX_TYPE, 0); + if (flag_char8_t) + builtin_define_with_value ("__CHAR8_TYPE__", CHAR8_TYPE, 0); builtin_define_with_value ("__CHAR16_TYPE__", CHAR16_TYPE, 0); builtin_define_with_value ("__CHAR32_TYPE__", CHAR32_TYPE, 0); if (SIG_ATOMIC_TYPE) @@ -7856,6 +7892,7 @@ keyword_begins_type_specifier (enum rid keyword) case RID_ACCUM: case RID_BOOL: case RID_WCHAR: + case RID_CHAR8: case RID_CHAR16: case RID_CHAR32: case RID_SAT: |