aboutsummaryrefslogtreecommitdiff
path: root/libcpp
diff options
context:
space:
mode:
authorEdward Smith-Rowland <3dw4rd@verizon.net>2015-06-30 12:58:48 +0000
committerEdward Smith-Rowland <emsr@gcc.gnu.org>2015-06-30 12:58:48 +0000
commitfe95b0366a02855988d4c626de2045dae6e11502 (patch)
tree8b22f058594c24d03fcb8f25908876f4764a4e03 /libcpp
parent5b00d9211625c18148d3bacdc53c9f527557d063 (diff)
downloadgcc-fe95b0366a02855988d4c626de2045dae6e11502.zip
gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.gz
gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.bz2
Implement N4197 - Adding u8 character literals
libcpp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens; (struct cpp_options): Add utf8_char_literals. * init.c (struct lang_flags): Add utf8_char_literals; (struct lang_flags lang_defaults): Add column for utf8_char_literals. * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token. * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens. * charset.c (converter_for_type(), cpp_interpret_charconst()): Treat CPP_UTF8CHAR token. gcc/c-family: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR like CPP_CHAR. * c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens. * c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token. gcc/cp: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR and CPP_UTF8CHAR_USERDEF tokens; (cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token. gcc/testsuite: 2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> Implement N4197 - Adding u8 character literals * g++.dg/cpp1z/utf8.C: New. * g++.dg/cpp1z/utf8-neg.C: New. * g++.dg/cpp1z/udlit-utf8char.C: New. From-SVN: r225185
Diffstat (limited to 'libcpp')
-rw-r--r--libcpp/ChangeLog16
-rw-r--r--libcpp/charset.c8
-rw-r--r--libcpp/expr.c9
-rw-r--r--libcpp/include/cpplib.h5
-rw-r--r--libcpp/init.c36
-rw-r--r--libcpp/lex.c6
-rw-r--r--libcpp/macro.c2
7 files changed, 58 insertions, 24 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index c7ac1e3..4e29802 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,19 @@
+2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
+
+ Implement N4197 - Adding u8 character literals
+ * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens;
+ (struct cpp_options): Add utf8_char_literals.
+ * init.c (struct lang_flags): Add utf8_char_literals;
+ (struct lang_flags lang_defaults): Add column for utf8_char_literals.
+ * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
+ * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
+ Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
+ (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
+ (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token.
+ * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens.
+ * charset.c (converter_for_type(), cpp_interpret_charconst()):
+ Treat CPP_UTF8CHAR token.
+
2015-06-30 Uros Bizjak <ubizjak@gmail.com>
* lex.c (search_line_sse42) [__GCC_ASM_FLAG_OUTPUTS__]: New main
diff --git a/libcpp/charset.c b/libcpp/charset.c
index b427638..8e92bc6 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -1355,6 +1355,7 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
{
default:
return pfile->narrow_cset_desc;
+ case CPP_UTF8CHAR:
case CPP_UTF8STRING:
return pfile->utf8_cset_desc;
case CPP_CHAR16:
@@ -1611,11 +1612,12 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token,
unsigned int *pchars_seen, int *unsignedp)
{
cpp_string str = { 0, 0 };
- bool wide = (token->type != CPP_CHAR);
+ bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR);
+ int u8 = 2 * int(token->type == CPP_UTF8CHAR);
cppchar_t result;
- /* an empty constant will appear as L'', u'', U'' or '' */
- if (token->val.str.len == (size_t) (2 + wide))
+ /* An empty constant will appear as L'', u'', U'', u8'', or '' */
+ if (token->val.str.len == (size_t) (2 + wide + u8))
{
cpp_error (pfile, CPP_DL_ERROR, "empty character constant");
return 0;
diff --git a/libcpp/expr.c b/libcpp/expr.c
index b8e88c5..3dc5c0b 100644
--- a/libcpp/expr.c
+++ b/libcpp/expr.c
@@ -307,6 +307,8 @@ cpp_userdef_char_remove_type (enum cpp_ttype type)
return CPP_CHAR16;
else if (type == CPP_CHAR32_USERDEF)
return CPP_CHAR32;
+ else if (type == CPP_UTF8CHAR_USERDEF)
+ return CPP_UTF8CHAR;
else
return type;
}
@@ -325,6 +327,8 @@ cpp_userdef_char_add_type (enum cpp_ttype type)
return CPP_CHAR16_USERDEF;
else if (type == CPP_CHAR32)
return CPP_CHAR32_USERDEF;
+ else if (type == CPP_UTF8CHAR)
+ return CPP_UTF8CHAR_USERDEF;
else
return type;
}
@@ -350,7 +354,8 @@ cpp_userdef_char_p (enum cpp_ttype type)
if (type == CPP_CHAR_USERDEF
|| type == CPP_WCHAR_USERDEF
|| type == CPP_CHAR16_USERDEF
- || type == CPP_CHAR32_USERDEF)
+ || type == CPP_CHAR32_USERDEF
+ || type == CPP_UTF8CHAR_USERDEF)
return true;
else
return false;
@@ -1029,6 +1034,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token,
case CPP_CHAR:
case CPP_CHAR16:
case CPP_CHAR32:
+ case CPP_UTF8CHAR:
{
cppchar_t cc = cpp_interpret_charconst (pfile, token,
&temp, &unsignedp);
@@ -1214,6 +1220,7 @@ _cpp_parse_expr (cpp_reader *pfile, bool is_if)
case CPP_WCHAR:
case CPP_CHAR16:
case CPP_CHAR32:
+ case CPP_UTF8CHAR:
case CPP_NAME:
case CPP_HASH:
if (!want_value)
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 1b1a53c..5eaea6b 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -119,6 +119,7 @@ struct _cpp_file;
TK(WCHAR, LITERAL) /* L'char' */ \
TK(CHAR16, LITERAL) /* u'char' */ \
TK(CHAR32, LITERAL) /* U'char' */ \
+ TK(UTF8CHAR, LITERAL) /* u8'char' */ \
TK(OTHER, LITERAL) /* stray punctuation */ \
\
TK(STRING, LITERAL) /* "string" */ \
@@ -133,6 +134,7 @@ struct _cpp_file;
TK(WCHAR_USERDEF, LITERAL) /* L'char'_suffix - C++-0x */ \
TK(CHAR16_USERDEF, LITERAL) /* u'char'_suffix - C++-0x */ \
TK(CHAR32_USERDEF, LITERAL) /* U'char'_suffix - C++-0x */ \
+ TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++-0x */ \
TK(STRING_USERDEF, LITERAL) /* "string"_suffix - C++-0x */ \
TK(WSTRING_USERDEF, LITERAL) /* L"string"_suffix - C++-0x */ \
TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++-0x */ \
@@ -339,6 +341,9 @@ struct cpp_options
/* Nonzero means process u/U prefix literals (UTF-16/32). */
unsigned char uliterals;
+ /* Nonzero means process u8 prefixed character literals (UTF-8). */
+ unsigned char utf8_char_literals;
+
/* Nonzero means process r/R raw strings. If this is set, uliterals
must be set as well. */
unsigned char rliterals;
diff --git a/libcpp/init.c b/libcpp/init.c
index 1ebd709..2d5626f 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -90,26 +90,27 @@ struct lang_flags
char binary_constants;
char digit_separators;
char trigraphs;
+ char utf8_char_literals;
};
static const struct lang_flags lang_defaults[] =
-{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig */
- /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0 },
- /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 },
- /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 },
- /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1 },
- /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 },
- /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
- /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0 },
- /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 },
- /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 },
- /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
- /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 },
- /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 },
- /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit */
+ /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
+ /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 },
+ /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 },
+ /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 },
+ /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 },
+ /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 },
+ /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+ /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 },
+ /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 },
+ /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 },
+ /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0 },
+ /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 },
+ /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1 },
+ /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 },
+ /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
};
/* Sets internal flags correctly for a given language. */
@@ -133,6 +134,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
CPP_OPTION (pfile, binary_constants) = l->binary_constants;
CPP_OPTION (pfile, digit_separators) = l->digit_separators;
CPP_OPTION (pfile, trigraphs) = l->trigraphs;
+ CPP_OPTION (pfile, utf8_char_literals) = l->utf8_char_literals;
}
/* Initialize library global state. */
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 5758e58..8f2bdc8 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1858,7 +1858,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
else if (terminator == '\'')
type = (*base == 'L' ? CPP_WCHAR :
*base == 'U' ? CPP_CHAR32 :
- *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
+ *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
+ : CPP_CHAR);
else
terminator = '>', type = CPP_HEADER_NAME;
@@ -2403,7 +2404,8 @@ _cpp_lex_direct (cpp_reader *pfile)
&& CPP_OPTION (pfile, rliterals))
|| (*buffer->cur == '8'
&& c == 'u'
- && (buffer->cur[1] == '"'
+ && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
+ && CPP_OPTION (pfile, utf8_char_literals)))
|| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
&& CPP_OPTION (pfile, rliterals)))))
{
diff --git a/libcpp/macro.c b/libcpp/macro.c
index f76e10b..786c21b 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -531,7 +531,7 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
|| token->type == CPP_WSTRING || token->type == CPP_WCHAR
|| token->type == CPP_STRING32 || token->type == CPP_CHAR32
|| token->type == CPP_STRING16 || token->type == CPP_CHAR16
- || token->type == CPP_UTF8STRING
+ || token->type == CPP_UTF8STRING || token->type == CPP_UTF8CHAR
|| cpp_userdef_string_p (token->type)
|| cpp_userdef_char_p (token->type));