aboutsummaryrefslogtreecommitdiff
path: root/gcc/c-family
diff options
context:
space:
mode:
authorTom Honermann <tom@honermann.net>2019-01-14 19:55:51 +0000
committerJason Merrill <jason@gcc.gnu.org>2019-01-14 14:55:51 -0500
commit2d91f79dc990f81dcea89a5087cad566238b2456 (patch)
treeae9137b811a3a0bdada65dbb8a6ed5dba9d24b0e /gcc/c-family
parent23db6ced33c245c38c147c31011bbafa392e4328 (diff)
downloadgcc-2d91f79dc990f81dcea89a5087cad566238b2456.zip
gcc-2d91f79dc990f81dcea89a5087cad566238b2456.tar.gz
gcc-2d91f79dc990f81dcea89a5087cad566238b2456.tar.bz2
Implement P0482R5, char8_t: A type for UTF-8 characters and strings
gcc/cp/ * cvt.c (type_promotes_to): Handle char8_t promotion. * decl.c (grokdeclarator): Handle invalid type specifier combinations involving char8_t. * lex.c (init_reswords): Add char8_t as a reserved word. * mangle.c (write_builtin_type): Add name mangling for char8_t (Du). * parser.c (cp_keyword_starts_decl_specifier_p) (cp_parser_simple_type_specifier): Recognize char8_t as a simple type specifier. (cp_parser_string_literal): Use char8_array_type_node for the type of CPP_UTF8STRING. (cp_parser_set_decl_spec_type): Tolerate char8_t typedefs in system headers. * rtti.c (emit_support_tinfos): type_info support for char8_t. * tree.c (char_type_p): Recognize char8_t as a character type. * typeck.c (string_conv_p): Handle conversions of u8 string literals of char8_t type. (check_literal_operator_args): Handle UDLs with u8 string literals of char8_t type. * typeck2.c (ordinary_char_type_p): New. (digest_init_r): Disallow initializing a char array with a u8 string literal. gcc/c-family/ * c-common.c (c_common_reswords): Add char8_t. (fix_string_type): Use char8_t for the type of u8 string literals. (c_common_get_alias_set): char8_t doesn't alias. (c_common_nodes_and_builtins): Define char8_t as a builtin type in C++. (c_stddef_cpp_builtins): Add __CHAR8_TYPE__. (keyword_begins_type_specifier): Add RID_CHAR8. * c-common.h (rid): Add RID_CHAR8. (c_tree_index): Add CTI_CHAR8_TYPE and CTI_CHAR8_ARRAY_TYPE. Define D_CXX_CHAR8_T and D_CXX_CHAR8_T_FLAGS. Define char8_type_node and char8_array_type_node. * c-cppbuiltin.c (cpp_atomic_builtins): Predefine __GCC_ATOMIC_CHAR8_T_LOCK_FREE. (c_cpp_builtins): Predefine __cpp_char8_t. * c-lex.c (lex_string): Use char8_array_type_node as the type of CPP_UTF8STRING. (lex_charconst): Use char8_type_node as the type of CPP_UTF8CHAR. * c-opts.c: If not otherwise specified, enable -fchar8_t when targeting C++2a. * c.opt: Add the -fchar8_t command line option. libiberty/ * cp-demangle.c (cplus_demangle_builtin_types) (cplus_demangle_type): Add name demangling for char8_t (Du). * cp-demangle.h: Increase D_BUILTIN_TYPE_COUNT to accommodate the new char8_t type. From-SVN: r267923
Diffstat (limited to 'gcc/c-family')
-rw-r--r--gcc/c-family/ChangeLog24
-rw-r--r--gcc/c-family/c-common.c41
-rw-r--r--gcc/c-family/c-common.h33
-rw-r--r--gcc/c-family/c-cppbuiltin.c5
-rw-r--r--gcc/c-family/c-lex.c14
-rw-r--r--gcc/c-family/c-opts.c4
-rw-r--r--gcc/c-family/c.opt5
7 files changed, 110 insertions, 16 deletions
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index fe4c81f..0ff16ab 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,27 @@
+2019-01-14 Tom Honermann <tom@honermann.net>
+
+ Implement P0482R5, char8_t: A type for UTF-8 characters and strings
+ * c-common.c (c_common_reswords): Add char8_t.
+ (fix_string_type): Use char8_t for the type of u8 string literals.
+ (c_common_get_alias_set): char8_t doesn't alias.
+ (c_common_nodes_and_builtins): Define char8_t as a builtin type in
+ C++.
+ (c_stddef_cpp_builtins): Add __CHAR8_TYPE__.
+ (keyword_begins_type_specifier): Add RID_CHAR8.
+ * c-common.h (rid): Add RID_CHAR8.
+ (c_tree_index): Add CTI_CHAR8_TYPE and CTI_CHAR8_ARRAY_TYPE.
+ Define D_CXX_CHAR8_T and D_CXX_CHAR8_T_FLAGS.
+ Define char8_type_node and char8_array_type_node.
+ * c-cppbuiltin.c (cpp_atomic_builtins): Predefine
+ __GCC_ATOMIC_CHAR8_T_LOCK_FREE.
+ (c_cpp_builtins): Predefine __cpp_char8_t.
+ * c-lex.c (lex_string): Use char8_array_type_node as the type of
+ CPP_UTF8STRING.
+ (lex_charconst): Use char8_type_node as the type of CPP_UTF8CHAR.
+ * c-opts.c: If not otherwise specified, enable -fchar8_t when
+ targeting C++2a.
+ * c.opt: Add the -fchar8_t command line option.
+
2019-01-14 Martin Sebor <msebor@redhat.com>
PR target/88638
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index d2ea384..2a5a8e7 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -79,6 +79,7 @@ machine_mode c_default_pointer_mode = VOIDmode;
tree signed_char_type_node;
tree wchar_type_node;
+ tree char8_type_node;
tree char16_type_node;
tree char32_type_node;
@@ -128,6 +129,11 @@ machine_mode c_default_pointer_mode = VOIDmode;
tree wchar_array_type_node;
+ Type `char8_t[SOMENUMBER]' or something like it.
+ Used when a UTF-8 string literal is created.
+
+ tree char8_array_type_node;
+
Type `char16_t[SOMENUMBER]' or something like it.
Used when a UTF-16 string literal is created.
@@ -452,6 +458,7 @@ const struct c_common_resword c_common_reswords[] =
{ "case", RID_CASE, 0 },
{ "catch", RID_CATCH, D_CXX_OBJC | D_CXXWARN },
{ "char", RID_CHAR, 0 },
+ { "char8_t", RID_CHAR8, D_CXX_CHAR8_T_FLAGS | D_CXXWARN },
{ "char16_t", RID_CHAR16, D_CXXONLY | D_CXX11 | D_CXXWARN },
{ "char32_t", RID_CHAR32, D_CXXONLY | D_CXX11 | D_CXXWARN },
{ "class", RID_CLASS, D_CXX_OBJC | D_CXXWARN },
@@ -748,6 +755,11 @@ fix_string_type (tree value)
charsz = 1;
e_type = char_type_node;
}
+ else if (flag_char8_t && TREE_TYPE (value) == char8_array_type_node)
+ {
+ charsz = TYPE_PRECISION (char8_type_node) / BITS_PER_UNIT;
+ e_type = char8_type_node;
+ }
else if (TREE_TYPE (value) == char16_array_type_node)
{
charsz = TYPE_PRECISION (char16_type_node) / BITS_PER_UNIT;
@@ -828,7 +840,8 @@ fix_string_type (tree value)
CPP_STRING16, or CPP_STRING32. Return CPP_OTHER in case of error.
This may not be exactly the string token type that initially created
the string, since CPP_WSTRING is indistinguishable from the 16/32 bit
- string type at this point.
+ string type, and CPP_UTF8STRING is indistinguishable from CPP_STRING
+ at this point.
This effectively reverses part of the logic in lex_string and
fix_string_type. */
@@ -3640,8 +3653,12 @@ c_common_get_alias_set (tree t)
if (!TYPE_P (t))
return -1;
+ /* Unlike char, char8_t doesn't alias. */
+ if (flag_char8_t && t == char8_type_node)
+ return -1;
+
/* The C standard guarantees that any object may be accessed via an
- lvalue that has character type. */
+ lvalue that has narrow character type (except char8_t). */
if (t == char_type_node
|| t == signed_char_type_node
|| t == unsigned_char_type_node)
@@ -4050,6 +4067,7 @@ c_get_ident (const char *id)
void
c_common_nodes_and_builtins (void)
{
+ int char8_type_size;
int char16_type_size;
int char32_type_size;
int wchar_type_size;
@@ -4341,6 +4359,22 @@ c_common_nodes_and_builtins (void)
wchar_array_type_node
= build_array_type (wchar_type_node, array_domain_type);
+ /* Define 'char8_t'. */
+ char8_type_node = get_identifier (CHAR8_TYPE);
+ char8_type_node = TREE_TYPE (identifier_global_value (char8_type_node));
+ char8_type_size = TYPE_PRECISION (char8_type_node);
+ if (c_dialect_cxx ())
+ {
+ char8_type_node = make_unsigned_type (char8_type_size);
+
+ if (flag_char8_t)
+ record_builtin_type (RID_CHAR8, "char8_t", char8_type_node);
+ }
+
+ /* This is for UTF-8 string constants. */
+ char8_array_type_node
+ = build_array_type (char8_type_node, array_domain_type);
+
/* Define 'char16_t'. */
char16_type_node = get_identifier (CHAR16_TYPE);
char16_type_node = TREE_TYPE (identifier_global_value (char16_type_node));
@@ -5138,6 +5172,8 @@ c_stddef_cpp_builtins(void)
builtin_define_with_value ("__WINT_TYPE__", WINT_TYPE, 0);
builtin_define_with_value ("__INTMAX_TYPE__", INTMAX_TYPE, 0);
builtin_define_with_value ("__UINTMAX_TYPE__", UINTMAX_TYPE, 0);
+ if (flag_char8_t)
+ builtin_define_with_value ("__CHAR8_TYPE__", CHAR8_TYPE, 0);
builtin_define_with_value ("__CHAR16_TYPE__", CHAR16_TYPE, 0);
builtin_define_with_value ("__CHAR32_TYPE__", CHAR32_TYPE, 0);
if (SIG_ATOMIC_TYPE)
@@ -7856,6 +7892,7 @@ keyword_begins_type_specifier (enum rid keyword)
case RID_ACCUM:
case RID_BOOL:
case RID_WCHAR:
+ case RID_CHAR8:
case RID_CHAR16:
case RID_CHAR32:
case RID_SAT:
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index 9f790bc..9fe90f3 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -180,6 +180,9 @@ enum rid
/* C++11 */
RID_CONSTEXPR, RID_DECLTYPE, RID_NOEXCEPT, RID_NULLPTR, RID_STATIC_ASSERT,
+ /* char8_t */
+ RID_CHAR8,
+
/* C++ concepts */
RID_CONCEPT, RID_REQUIRES,
@@ -287,6 +290,7 @@ extern GTY ((length ("(int) RID_MAX"))) tree *ridpointers;
enum c_tree_index
{
+ CTI_CHAR8_TYPE,
CTI_CHAR16_TYPE,
CTI_CHAR32_TYPE,
CTI_WCHAR_TYPE,
@@ -330,6 +334,7 @@ enum c_tree_index
CTI_UINTPTR_TYPE,
CTI_CHAR_ARRAY_TYPE,
+ CTI_CHAR8_ARRAY_TYPE,
CTI_CHAR16_ARRAY_TYPE,
CTI_CHAR32_ARRAY_TYPE,
CTI_WCHAR_ARRAY_TYPE,
@@ -409,20 +414,22 @@ extern machine_mode c_default_pointer_mode;
mask) is _true_. Thus for keywords which are present in all
languages the disable field is zero. */
-#define D_CONLY 0x001 /* C only (not in C++). */
-#define D_CXXONLY 0x002 /* C++ only (not in C). */
-#define D_C99 0x004 /* In C, C99 only. */
-#define D_CXX11 0x008 /* In C++, C++11 only. */
-#define D_EXT 0x010 /* GCC extension. */
-#define D_EXT89 0x020 /* GCC extension incorporated in C99. */
-#define D_ASM 0x040 /* Disabled by -fno-asm. */
-#define D_OBJC 0x080 /* In Objective C and neither C nor C++. */
-#define D_CXX_OBJC 0x100 /* In Objective C, and C++, but not C. */
-#define D_CXXWARN 0x200 /* In C warn with -Wcxx-compat. */
-#define D_CXX_CONCEPTS 0x400 /* In C++, only with concepts. */
-#define D_TRANSMEM 0X800 /* C++ transactional memory TS. */
+#define D_CONLY 0x0001 /* C only (not in C++). */
+#define D_CXXONLY 0x0002 /* C++ only (not in C). */
+#define D_C99 0x0004 /* In C, C99 only. */
+#define D_CXX11 0x0008 /* In C++, C++11 only. */
+#define D_EXT 0x0010 /* GCC extension. */
+#define D_EXT89 0x0020 /* GCC extension incorporated in C99. */
+#define D_ASM 0x0040 /* Disabled by -fno-asm. */
+#define D_OBJC 0x0080 /* In Objective C and neither C nor C++. */
+#define D_CXX_OBJC 0x0100 /* In Objective C, and C++, but not C. */
+#define D_CXXWARN 0x0200 /* In C warn with -Wcxx-compat. */
+#define D_CXX_CONCEPTS 0x0400 /* In C++, only with concepts. */
+#define D_TRANSMEM 0X0800 /* C++ transactional memory TS. */
+#define D_CXX_CHAR8_T 0X1000 /* In C++, only with -fchar8_t. */
#define D_CXX_CONCEPTS_FLAGS D_CXXONLY | D_CXX_CONCEPTS
+#define D_CXX_CHAR8_T_FLAGS D_CXXONLY | D_CXX_CHAR8_T
/* The reserved keyword table. */
extern const struct c_common_resword c_common_reswords[];
@@ -430,6 +437,7 @@ extern const struct c_common_resword c_common_reswords[];
/* The number of items in the reserved keyword table. */
extern const unsigned int num_c_common_reswords;
+#define char8_type_node c_global_trees[CTI_CHAR8_TYPE]
#define char16_type_node c_global_trees[CTI_CHAR16_TYPE]
#define char32_type_node c_global_trees[CTI_CHAR32_TYPE]
#define wchar_type_node c_global_trees[CTI_WCHAR_TYPE]
@@ -475,6 +483,7 @@ extern const unsigned int num_c_common_reswords;
#define truthvalue_false_node c_global_trees[CTI_TRUTHVALUE_FALSE]
#define char_array_type_node c_global_trees[CTI_CHAR_ARRAY_TYPE]
+#define char8_array_type_node c_global_trees[CTI_CHAR8_ARRAY_TYPE]
#define char16_array_type_node c_global_trees[CTI_CHAR16_ARRAY_TYPE]
#define char32_array_type_node c_global_trees[CTI_CHAR32_ARRAY_TYPE]
#define wchar_array_type_node c_global_trees[CTI_WCHAR_ARRAY_TYPE]
diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 25b5c1a..c9b63ca 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -702,6 +702,9 @@ cpp_atomic_builtins (cpp_reader *pfile)
(have_swap[SWAP_INDEX (boolean_type_node)]? 2 : 1));
builtin_define_with_int_value ("__GCC_ATOMIC_CHAR_LOCK_FREE",
(have_swap[SWAP_INDEX (signed_char_type_node)]? 2 : 1));
+ if (flag_char8_t)
+ builtin_define_with_int_value ("__GCC_ATOMIC_CHAR8_T_LOCK_FREE",
+ (have_swap[SWAP_INDEX (char8_type_node)]? 2 : 1));
builtin_define_with_int_value ("__GCC_ATOMIC_CHAR16_T_LOCK_FREE",
(have_swap[SWAP_INDEX (char16_type_node)]? 2 : 1));
builtin_define_with_int_value ("__GCC_ATOMIC_CHAR32_T_LOCK_FREE",
@@ -1000,6 +1003,8 @@ c_cpp_builtins (cpp_reader *pfile)
cpp_define (pfile, "__cpp_template_template_args=201611");
if (flag_threadsafe_statics)
cpp_define (pfile, "__cpp_threadsafe_static_init=200806");
+ if (flag_char8_t)
+ cpp_define (pfile, "__cpp_char8_t=201811");
}
/* Note that we define this for C as well, so that we know if
__attribute__((cleanup)) will interface with EH. */
diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c
index d5ce9e9..0a368a3 100644
--- a/gcc/c-family/c-lex.c
+++ b/gcc/c-family/c-lex.c
@@ -1281,9 +1281,14 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
{
default:
case CPP_STRING:
- case CPP_UTF8STRING:
TREE_TYPE (value) = char_array_type_node;
break;
+ case CPP_UTF8STRING:
+ if (flag_char8_t)
+ TREE_TYPE (value) = char8_array_type_node;
+ else
+ TREE_TYPE (value) = char_array_type_node;
+ break;
case CPP_STRING16:
TREE_TYPE (value) = char16_array_type_node;
break;
@@ -1323,7 +1328,12 @@ lex_charconst (const cpp_token *token)
else if (token->type == CPP_CHAR16)
type = char16_type_node;
else if (token->type == CPP_UTF8CHAR)
- type = char_type_node;
+ {
+ if (flag_char8_t)
+ type = char8_type_node;
+ else
+ type = char_type_node;
+ }
/* In C, a character constant has type 'int'.
In C++ 'char', but multi-char charconsts have type 'int'. */
else if (!c_dialect_cxx () || chars_seen > 1)
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 2c22574..9660f51 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -996,6 +996,10 @@ c_common_post_options (const char **pfilename)
if (flag_sized_deallocation == -1)
flag_sized_deallocation = (cxx_dialect >= cxx14);
+ /* char8_t support is new in C++2A. */
+ if (flag_char8_t == -1)
+ flag_char8_t = (cxx_dialect >= cxx2a);
+
if (flag_extern_tls_init)
{
if (!TARGET_SUPPORTS_ALIASES || !SUPPORTS_WEAK)
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index d118e74..858beff 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -1300,6 +1300,11 @@ fcanonical-system-headers
C ObjC C++ ObjC++
Where shorter, use canonicalized paths to systems headers.
+fchar8_t
+C++ ObjC++ Var(flag_char8_t) Init(-1)
+Enable the char8_t fundamental type and use it as the type for UTF-8 string
+and character literals.
+
fcheck-pointer-bounds
C ObjC C++ ObjC++ LTO Deprecated
Deprecated in GCC 9. This switch has no effect.