aboutsummaryrefslogtreecommitdiff
path: root/libcpp/charset.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2009-10-19 23:41:15 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2009-10-19 23:41:15 +0200
commit2c6e3f5540bc82374b7f71333f550cfccca93a74 (patch)
tree2156a8d43e7b0cb0eb28776c7a735b606e1caea8 /libcpp/charset.c
parent4d696ad0116dafa47b891ceb0db246ad8b81c8ff (diff)
downloadgcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.zip
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.gz
gcc-2c6e3f5540bc82374b7f71333f550cfccca93a74.tar.bz2
charset.c (cpp_init_iconv): Initialize utf8_cset_desc.
* charset.c (cpp_init_iconv): Initialize utf8_cset_desc. (_cpp_destroy_iconv): Destroy utf8_cset_desc, char16_cset_desc and char32_cset_desc. (converter_for_type): Handle CPP_UTF8STRING. (cpp_interpret_string): Handle CPP_UTF8STRING and raw-strings. * directives.c (get__Pragma_string): Handle CPP_UTF8STRING. (parse_include): Reject raw strings. * include/cpplib.h (CPP_UTF8STRING): New token type. * internal.h (struct cpp_reader): Add utf8_cset_desc field. * lex.c (lex_raw_string): New function. (lex_string): Handle u8 string literals, call lex_raw_string for raw string literals. (_cpp_lex_direct): Call lex_string even for u8" and {,u,U,L,u8}R" sequences. * macro.c (stringify_arg): Handle CPP_UTF8STRING. * c-common.c (c_parse_error): Handle CPP_UTF8STRING. * c-lex.c (c_lex_with_flags): Likewise. Test C_LEX_STRING_NO_JOIN instead of C_LEX_RAW_STRINGS. (lex_string): Handle CPP_UTF8STRING. * c-parser.c (c_parser_postfix_expression): Likewise. * c-pragma.h (C_LEX_RAW_STRINGS): Rename to ... (C_LEX_STRING_NO_JOIN): ... this. * parser.c (cp_lexer_print_token, cp_parser_is_string_literal, cp_parser_string_literal, cp_parser_primary_expression): Likewise. (cp_lexer_get_preprocessor_token): Use C_LEX_STRING_JOIN instead of C_LEX_RAW_STRINGS. * gcc.dg/raw-string-1.c: New test. * gcc.dg/raw-string-2.c: New test. * gcc.dg/raw-string-3.c: New test. * gcc.dg/raw-string-4.c: New test. * gcc.dg/raw-string-5.c: New test. * gcc.dg/raw-string-6.c: New test. * gcc.dg/raw-string-7.c: New test. * gcc.dg/utf8-1.c: New test. * gcc.dg/utf8-2.c: New test. * gcc.dg/utf-badconcat2.c: New test. * gcc.dg/utf-dflt2.c: New test. * gcc.dg/cpp/include6.c: New test. * g++.dg/ext/raw-string-1.C: New test. * g++.dg/ext/raw-string-2.C: New test. * g++.dg/ext/raw-string-3.C: New test. * g++.dg/ext/raw-string-4.C: New test. * g++.dg/ext/raw-string-5.C: New test. * g++.dg/ext/raw-string-6.C: New test. * g++.dg/ext/raw-string-7.C: New test. * g++.dg/ext/utf8-1.C: New test. * g++.dg/ext/utf8-2.C: New test. * g++.dg/ext/utf-badconcat2.C: New test. * g++.dg/ext/utf-dflt2.C: New test. From-SVN: r152995
Diffstat (limited to 'libcpp/charset.c')
-rw-r--r--libcpp/charset.c52
1 files changed, 51 insertions, 1 deletions
diff --git a/libcpp/charset.c b/libcpp/charset.c
index bd24ec24..837ccd7 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -721,6 +721,8 @@ cpp_init_iconv (cpp_reader *pfile)
pfile->narrow_cset_desc = init_iconv_desc (pfile, ncset, SOURCE_CHARSET);
pfile->narrow_cset_desc.width = CPP_OPTION (pfile, char_precision);
+ pfile->utf8_cset_desc = init_iconv_desc (pfile, "UTF-8", SOURCE_CHARSET);
+ pfile->utf8_cset_desc.width = CPP_OPTION (pfile, char_precision);
pfile->char16_cset_desc = init_iconv_desc (pfile,
be ? "UTF-16BE" : "UTF-16LE",
SOURCE_CHARSET);
@@ -741,6 +743,12 @@ _cpp_destroy_iconv (cpp_reader *pfile)
{
if (pfile->narrow_cset_desc.func == convert_using_iconv)
iconv_close (pfile->narrow_cset_desc.cd);
+ if (pfile->utf8_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->utf8_cset_desc.cd);
+ if (pfile->char16_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->char16_cset_desc.cd);
+ if (pfile->char32_cset_desc.func == convert_using_iconv)
+ iconv_close (pfile->char32_cset_desc.cd);
if (pfile->wide_cset_desc.func == convert_using_iconv)
iconv_close (pfile->wide_cset_desc.cd);
}
@@ -1339,6 +1347,8 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
{
default:
return pfile->narrow_cset_desc;
+ case CPP_UTF8STRING:
+ return pfile->utf8_cset_desc;
case CPP_CHAR16:
case CPP_STRING16:
return pfile->char16_cset_desc;
@@ -1373,7 +1383,47 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
for (i = 0; i < count; i++)
{
p = from[i].text;
- if (*p == 'L' || *p == 'u' || *p == 'U') p++;
+ if (*p == 'u')
+ {
+ if (*++p == '8')
+ p++;
+ }
+ else if (*p == 'L' || *p == 'U') p++;
+ if (*p == 'R')
+ {
+ const uchar *prefix;
+
+ /* Skip over 'R"'. */
+ p += 2;
+ prefix = p;
+ while (*p != '[')
+ p++;
+ p++;
+ limit = from[i].text + from[i].len;
+ if (limit >= p + (p - prefix) + 1)
+ limit -= (p - prefix) + 1;
+
+ for (;;)
+ {
+ base = p;
+ while (p < limit && (*p != '\\' || (p[1] != 'u' && p[1] != 'U')))
+ p++;
+ if (p > base)
+ {
+ /* We have a run of normal characters; these can be fed
+ directly to convert_cset. */
+ if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
+ goto fail;
+ }
+ if (p == limit)
+ break;
+
+ p = convert_ucn (pfile, p + 1, limit, &tbuf, cvt);
+ }
+
+ continue;
+ }
+
p++; /* Skip leading quote. */
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */