diff options
author | Edward Smith-Rowland <3dw4rd@verizon.net> | 2015-06-30 12:58:48 +0000 |
---|---|---|
committer | Edward Smith-Rowland <emsr@gcc.gnu.org> | 2015-06-30 12:58:48 +0000 |
commit | fe95b0366a02855988d4c626de2045dae6e11502 (patch) | |
tree | 8b22f058594c24d03fcb8f25908876f4764a4e03 /libcpp | |
parent | 5b00d9211625c18148d3bacdc53c9f527557d063 (diff) | |
download | gcc-fe95b0366a02855988d4c626de2045dae6e11502.zip gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.gz gcc-fe95b0366a02855988d4c626de2045dae6e11502.tar.bz2 |
Implement N4197 - Adding u8 character literals
libcpp:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens;
(struct cpp_options): Add utf8_char_literals.
* init.c (struct lang_flags): Add utf8_char_literals;
(struct lang_flags lang_defaults): Add column for utf8_char_literals.
* macro.c (stringify_arg()): Treat CPP_UTF8CHAR token;
* expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()):
Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens;
(cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token;
(eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token.
* lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens.
* charset.c (converter_for_type(), cpp_interpret_charconst()):
Treat CPP_UTF8CHAR token.
gcc/c-family:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* c-family/c-ada-spec.c (print_ada_macros()): Treat CPP_UTF8CHAR
like CPP_CHAR.
* c-family/c-common.c (c_parse_error()): print CPP_UTF8CHAR
and CPP_UTF8CHAR_USERDEF tokens.
* c-family/c-lex.c (c_lex_with_flags()): Treat CPP_UTF8CHAR_USERDEF
and CPP_UTF8CHAR tokens; (lex_charconst()): Treat CPP_UTF8CHAR token.
gcc/cp:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* parser.c (cp_parser_primary_expression()): Treat CPP_UTF8CHAR
and CPP_UTF8CHAR_USERDEF tokens;
(cp_parser_parenthesized_expression_list()): Treat CPP_UTF8CHAR token.
gcc/testsuite:
2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net>
Implement N4197 - Adding u8 character literals
* g++.dg/cpp1z/utf8.C: New.
* g++.dg/cpp1z/utf8-neg.C: New.
* g++.dg/cpp1z/udlit-utf8char.C: New.
From-SVN: r225185
Diffstat (limited to 'libcpp')
-rw-r--r-- | libcpp/ChangeLog | 16 | ||||
-rw-r--r-- | libcpp/charset.c | 8 | ||||
-rw-r--r-- | libcpp/expr.c | 9 | ||||
-rw-r--r-- | libcpp/include/cpplib.h | 5 | ||||
-rw-r--r-- | libcpp/init.c | 36 | ||||
-rw-r--r-- | libcpp/lex.c | 6 | ||||
-rw-r--r-- | libcpp/macro.c | 2 |
7 files changed, 58 insertions, 24 deletions
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index c7ac1e3..4e29802 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,19 @@ +2015-06-30 Edward Smith-Rowland <3dw4rd@verizon.net> + + Implement N4197 - Adding u8 character literals + * include/cpplib.h (UTF8CHAR, UTF8CHAR_USERDEF): New cpp tokens; + (struct cpp_options): Add utf8_char_literals. + * init.c (struct lang_flags): Add utf8_char_literals; + (struct lang_flags lang_defaults): Add column for utf8_char_literals. + * macro.c (stringify_arg()): Treat CPP_UTF8CHAR token; + * expr.c (cpp_userdef_char_remove_type(), cpp_userdef_char_add_type()): + Treat CPP_UTF8CHAR_USERDEF, CPP_UTF8CHAR tokens; + (cpp_userdef_char_p()): Treat CPP_UTF8CHAR_USERDEF token; + (eval_token(), _cpp_parse_expr()): Treat CPP_UTF8CHAR token. + * lex.c (lex_string(), _cpp_lex_direct()): Include CPP_UTF8CHAR tokens. + * charset.c (converter_for_type(), cpp_interpret_charconst()): + Treat CPP_UTF8CHAR token. + 2015-06-30 Uros Bizjak <ubizjak@gmail.com> * lex.c (search_line_sse42) [__GCC_ASM_FLAG_OUTPUTS__]: New main diff --git a/libcpp/charset.c b/libcpp/charset.c index b427638..8e92bc6 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1355,6 +1355,7 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type) { default: return pfile->narrow_cset_desc; + case CPP_UTF8CHAR: case CPP_UTF8STRING: return pfile->utf8_cset_desc; case CPP_CHAR16: @@ -1611,11 +1612,12 @@ cpp_interpret_charconst (cpp_reader *pfile, const cpp_token *token, unsigned int *pchars_seen, int *unsignedp) { cpp_string str = { 0, 0 }; - bool wide = (token->type != CPP_CHAR); + bool wide = (token->type != CPP_CHAR && token->type != CPP_UTF8CHAR); + int u8 = 2 * int(token->type == CPP_UTF8CHAR); cppchar_t result; - /* an empty constant will appear as L'', u'', U'' or '' */ - if (token->val.str.len == (size_t) (2 + wide)) + /* An empty constant will appear as L'', u'', U'', u8'', or '' */ + if (token->val.str.len == (size_t) (2 + wide + u8)) { cpp_error (pfile, CPP_DL_ERROR, "empty character constant"); return 0; diff --git a/libcpp/expr.c b/libcpp/expr.c index b8e88c5..3dc5c0b 100644 --- a/libcpp/expr.c +++ b/libcpp/expr.c @@ -307,6 +307,8 @@ cpp_userdef_char_remove_type (enum cpp_ttype type) return CPP_CHAR16; else if (type == CPP_CHAR32_USERDEF) return CPP_CHAR32; + else if (type == CPP_UTF8CHAR_USERDEF) + return CPP_UTF8CHAR; else return type; } @@ -325,6 +327,8 @@ cpp_userdef_char_add_type (enum cpp_ttype type) return CPP_CHAR16_USERDEF; else if (type == CPP_CHAR32) return CPP_CHAR32_USERDEF; + else if (type == CPP_UTF8CHAR) + return CPP_UTF8CHAR_USERDEF; else return type; } @@ -350,7 +354,8 @@ cpp_userdef_char_p (enum cpp_ttype type) if (type == CPP_CHAR_USERDEF || type == CPP_WCHAR_USERDEF || type == CPP_CHAR16_USERDEF - || type == CPP_CHAR32_USERDEF) + || type == CPP_CHAR32_USERDEF + || type == CPP_UTF8CHAR_USERDEF) return true; else return false; @@ -1029,6 +1034,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token, case CPP_CHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: { cppchar_t cc = cpp_interpret_charconst (pfile, token, &temp, &unsignedp); @@ -1214,6 +1220,7 @@ _cpp_parse_expr (cpp_reader *pfile, bool is_if) case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_NAME: case CPP_HASH: if (!want_value) diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h index 1b1a53c..5eaea6b 100644 --- a/libcpp/include/cpplib.h +++ b/libcpp/include/cpplib.h @@ -119,6 +119,7 @@ struct _cpp_file; TK(WCHAR, LITERAL) /* L'char' */ \ TK(CHAR16, LITERAL) /* u'char' */ \ TK(CHAR32, LITERAL) /* U'char' */ \ + TK(UTF8CHAR, LITERAL) /* u8'char' */ \ TK(OTHER, LITERAL) /* stray punctuation */ \ \ TK(STRING, LITERAL) /* "string" */ \ @@ -133,6 +134,7 @@ struct _cpp_file; TK(WCHAR_USERDEF, LITERAL) /* L'char'_suffix - C++-0x */ \ TK(CHAR16_USERDEF, LITERAL) /* u'char'_suffix - C++-0x */ \ TK(CHAR32_USERDEF, LITERAL) /* U'char'_suffix - C++-0x */ \ + TK(UTF8CHAR_USERDEF, LITERAL) /* u8'char'_suffix - C++-0x */ \ TK(STRING_USERDEF, LITERAL) /* "string"_suffix - C++-0x */ \ TK(WSTRING_USERDEF, LITERAL) /* L"string"_suffix - C++-0x */ \ TK(STRING16_USERDEF, LITERAL) /* u"string"_suffix - C++-0x */ \ @@ -339,6 +341,9 @@ struct cpp_options /* Nonzero means process u/U prefix literals (UTF-16/32). */ unsigned char uliterals; + /* Nonzero means process u8 prefixed character literals (UTF-8). */ + unsigned char utf8_char_literals; + /* Nonzero means process r/R raw strings. If this is set, uliterals must be set as well. */ unsigned char rliterals; diff --git a/libcpp/init.c b/libcpp/init.c index 1ebd709..2d5626f 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -90,26 +90,27 @@ struct lang_flags char binary_constants; char digit_separators; char trigraphs; + char utf8_char_literals; }; static const struct lang_flags lang_defaults[] = -{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 }, - /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0 }, - /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0 }, + /* CXX11 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0 }, + /* CXX14 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }, + /* GNUCXX1Z */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1 }, + /* CXX1Z */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; /* Sets internal flags correctly for a given language. */ @@ -133,6 +134,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang) CPP_OPTION (pfile, binary_constants) = l->binary_constants; CPP_OPTION (pfile, digit_separators) = l->digit_separators; CPP_OPTION (pfile, trigraphs) = l->trigraphs; + CPP_OPTION (pfile, utf8_char_literals) = l->utf8_char_literals; } /* Initialize library global state. */ diff --git a/libcpp/lex.c b/libcpp/lex.c index 5758e58..8f2bdc8 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -1858,7 +1858,8 @@ lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base) else if (terminator == '\'') type = (*base == 'L' ? CPP_WCHAR : *base == 'U' ? CPP_CHAR32 : - *base == 'u' ? CPP_CHAR16 : CPP_CHAR); + *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16) + : CPP_CHAR); else terminator = '>', type = CPP_HEADER_NAME; @@ -2403,7 +2404,8 @@ _cpp_lex_direct (cpp_reader *pfile) && CPP_OPTION (pfile, rliterals)) || (*buffer->cur == '8' && c == 'u' - && (buffer->cur[1] == '"' + && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\'' + && CPP_OPTION (pfile, utf8_char_literals))) || (buffer->cur[1] == 'R' && buffer->cur[2] == '"' && CPP_OPTION (pfile, rliterals))))) { diff --git a/libcpp/macro.c b/libcpp/macro.c index f76e10b..786c21b 100644 --- a/libcpp/macro.c +++ b/libcpp/macro.c @@ -531,7 +531,7 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg) || token->type == CPP_WSTRING || token->type == CPP_WCHAR || token->type == CPP_STRING32 || token->type == CPP_CHAR32 || token->type == CPP_STRING16 || token->type == CPP_CHAR16 - || token->type == CPP_UTF8STRING + || token->type == CPP_UTF8STRING || token->type == CPP_UTF8CHAR || cpp_userdef_string_p (token->type) || cpp_userdef_char_p (token->type)); |