7 files changed, 167 insertions, 20 deletions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f76a52c..d8f7f11 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2011-08-18  Joseph Myers  <joseph@codesourcery.com>
+
+	* gcc.dg/c1x-uni-string-1.c, gcc.dg/c1x-uni-string-2.c: New tests.
+
 2011-08-18  Tobias Burnus  <burnus@net-b.de>
 
 	PR fortran/18918
diff --git a/gcc/testsuite/gcc.dg/c1x-uni-string-1.c b/gcc/testsuite/gcc.dg/c1x-uni-string-1.c
new file mode 100644
index 0000000..30a98c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c1x-uni-string-1.c
@@ -0,0 +1,112 @@
+/* Test Unicode strings in C1X.  Test valid code.  */
+/* { dg-do run } */
+/* { dg-options "-std=c1x -pedantic-errors" } */
+
+/* More thorough tests are in c-c++-common/raw-string-*.c; this test
+   verifies the particular subset (Unicode but not raw strings) that
+   is in C1X.  */
+
+typedef __CHAR16_TYPE__ char16_t;
+typedef __CHAR32_TYPE__ char32_t;
+typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void exit (int);
+extern int memcmp (const void *, const void *, size_t);
+
+#define R "(R)"
+#define u8R "(u8R)"
+#define uR "(uR)"
+#define UR "(UR)"
+#define LR "(LR)"
+#define u8 randomu8
+#define u randomu
+#define U randomU
+
+const char su8[] = u8"a\u010d";
+const char su8a[] = "a\xc4\x8d";
+
+const char16_t su16[] = u"\u0567";
+const char16_t su16a[] = { 0x0567, 0 };
+
+const char32_t su32[] = U"\u0123";
+const char32_t su32a[] = { 0x0123, 0 };
+
+const char tu[] = R"a";
+const char tua[] = "(R)a";
+
+const char tu8[] = u8R"b";
+const char tu8a[] = "(u8R)b";
+
+const char tu16[] = uR"c";
+const char tu16a[] = "(uR)c";
+
+const char tu32[] = UR"d";
+const char tu32a[] = "(UR)d";
+
+const char tl[] = LR"e";
+const char tla[] = "(LR)e";
+
+#define str(x) #x
+const char ts[] = str(u"a" U"b" u8"c");
+const char tsa[] = "u\"a\" U\"b\" u8\"c\"";
+
+/* GCC always uses UTF-16 and UTF-32 for char16_t and char32_t.  */
+#ifndef __STDC_UTF_16__
+#error "__STDC_UTF_16__ not defined"
+#endif
+#ifndef __STDC_UTF_32__
+#error "__STDC_UTF_32__ not defined"
+#endif
+#define xstr(x) str(x)
+const char tm16[] = xstr(__STDC_UTF_16__);
+const char tm16a[] = "1";
+const char tm32[] = xstr(__STDC_UTF_32__);
+const char tm32a[] = "1";
+
+int
+main (void)
+{
+  if (sizeof (su8) != sizeof (su8a)
+      || memcmp (su8, su8a, sizeof (su8)) != 0)
+    abort ();
+  if (sizeof (su16) != sizeof (su16a)
+      || memcmp (su16, su16a, sizeof (su16)) != 0)
+    abort ();
+  if (sizeof (su32) != sizeof (su32a)
+      || memcmp (su32, su32a, sizeof (su32)) != 0)
+    abort ();
+  if (sizeof (tu) != sizeof (tua)
+      || memcmp (tu, tua, sizeof (tu)) != 0)
+    abort ();
+  if (sizeof (tu8) != sizeof (tu8a)
+      || memcmp (tu8, tu8a, sizeof (tu8)) != 0)
+    abort ();
+  if (sizeof (tu16) != sizeof (tu16a)
+      || memcmp (tu16, tu16a, sizeof (tu16)) != 0)
+    abort ();
+  if (sizeof (tu32) != sizeof (tu32a)
+      || memcmp (tu32, tu32a, sizeof (tu32)) != 0)
+    abort ();
+  if (sizeof (tl) != sizeof (tla)
+      || memcmp (tl, tla, sizeof (tl)) != 0)
+    abort ();
+  if (sizeof (ts) != sizeof (tsa)
+      || memcmp (ts, tsa, sizeof (ts)) != 0)
+    abort ();
+  if (sizeof (tm16) != sizeof (tm16a)
+      || memcmp (tm16, tm16a, sizeof (tm16)) != 0)
+    abort ();
+  if (sizeof (tm32) != sizeof (tm32a)
+      || memcmp (tm32, tm32a, sizeof (tm32)) != 0)
+    abort ();
+  if (u'\u0123' != 0x0123)
+    abort ();
+  if (U'\u0456' != 0x0456)
+    abort ();
+#undef u8
+#define u8
+  if (u8'a' != 'a')
+    abort ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.dg/c1x-uni-string-2.c b/gcc/testsuite/gcc.dg/c1x-uni-string-2.c
new file mode 100644
index 0000000..698b0c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/c1x-uni-string-2.c
@@ -0,0 +1,8 @@
+/* Test Unicode strings in C1X.  Test constraint.  */
+/* { dg-do compile } */
+/* { dg-options "-std=c1x -pedantic-errors" } */
+
+const void *p1 = L"a" u8"b"; /* { dg-error "concatenation" } */
+const void *p2 = L"a" "b" u8"c"; /* { dg-error "concatenation" } */
+const void *p3 = u8"a" L"b"; /* { dg-error "concatenation" } */
+const void *p4 = u8"a" "b" L"c"; /* { dg-error "concatenation" } */
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index e136852..ad51473 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,11 @@
+2011-08-18  Joseph Myers  <joseph@codesourcery.com>
+
+	* include/cpplib.h (struct cpp_options): Add rliterals.
+	* init.c  (struct lang_flags, lang_defaults): Add rliterals.
+	(cpp_set_lang): Set rliterals option.
+	(cpp_init_builtins): Define __STDC_UTF_16__ and __STDC_UTF_32__.
+	* lex.c (_cpp_lex_direct): Only accept raw strings if rliterals.
+
 2011-08-15  Gabriel Charette  <gchare@google.com>
 
 	* include/line-map.h (LINEMAP_POSITION_FOR_COLUMN): Remove.
diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 55b0f1b..30b1e98 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -1,6 +1,6 @@
 /* Definitions for CPP library.
    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2007, 2008, 2009, 2010
+   2004, 2005, 2007, 2008, 2009, 2010, 2011
    Free Software Foundation, Inc.
    Written by Per Bothner, 1994-95.
 
@@ -315,6 +315,10 @@ struct cpp_options
   /* Nonzero means process u/U prefix literals (UTF-16/32).  */
   unsigned char uliterals;
 
+  /* Nonzero means process r/R rax strings.  If this is set, uliterals
+     must be set as well.  */
+  unsigned char rliterals;
+
   /* Nonzero means print names of header files (-H).  */
   unsigned char print_include_names;
 
diff --git a/libcpp/init.c b/libcpp/init.c
index 5ba6666..1cbb9dd 100644
--- a/libcpp/init.c
+++ b/libcpp/init.c
@@ -1,7 +1,7 @@
 /* CPP Library.
    Copyright (C) 1986, 1987, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008,
-   2009, 2010 Free Software Foundation, Inc.
+   2009, 2010, 2011 Free Software Foundation, Inc.
    Contributed by Per Bothner, 1994-95.
    Based on CCCP program by Paul Rubin, June 1986
    Adapted to ANSI C, Richard Stallman, Jan 1987
@@ -79,22 +79,23 @@ struct lang_flags
   char cplusplus_comments;
   char digraphs;
   char uliterals;
+  char rliterals;
 };
 
 static const struct lang_flags lang_defaults[] =
-{ /*              c99 c++ xnum xid std  //   digr ulit */
-  /* GNUC89   */  { 0,  0,  1,   0,  0,   1,   1,   0 },
-  /* GNUC99   */  { 1,  0,  1,   0,  0,   1,   1,   1 },
-  /* GNUC1X   */  { 1,  0,  1,   0,  0,   1,   1,   1 },
-  /* STDC89   */  { 0,  0,  0,   0,  1,   0,   0,   0 },
-  /* STDC94   */  { 0,  0,  0,   0,  1,   0,   1,   0 },
-  /* STDC99   */  { 1,  0,  1,   0,  1,   1,   1,   0 },
-  /* STDC1X   */  { 1,  0,  1,   0,  1,   1,   1,   0 },
-  /* GNUCXX   */  { 0,  1,  1,   0,  0,   1,   1,   0 },
-  /* CXX98    */  { 0,  1,  1,   0,  1,   1,   1,   0 },
-  /* GNUCXX0X */  { 1,  1,  1,   0,  0,   1,   1,   1 },
-  /* CXX0X    */  { 1,  1,  1,   0,  1,   1,   1,   1 },
-  /* ASM      */  { 0,  0,  1,   0,  0,   1,   0,   0 }
+{ /*              c99 c++ xnum xid std  //   digr ulit rlit */
+  /* GNUC89   */  { 0,  0,  1,   0,  0,   1,   1,   0,   0 },
+  /* GNUC99   */  { 1,  0,  1,   0,  0,   1,   1,   1,   1 },
+  /* GNUC1X   */  { 1,  0,  1,   0,  0,   1,   1,   1,   1 },
+  /* STDC89   */  { 0,  0,  0,   0,  1,   0,   0,   0,   0 },
+  /* STDC94   */  { 0,  0,  0,   0,  1,   0,   1,   0,   0 },
+  /* STDC99   */  { 1,  0,  1,   0,  1,   1,   1,   0,   0 },
+  /* STDC1X   */  { 1,  0,  1,   0,  1,   1,   1,   1,   0 },
+  /* GNUCXX   */  { 0,  1,  1,   0,  0,   1,   1,   0,   0 },
+  /* CXX98    */  { 0,  1,  1,   0,  1,   1,   1,   0,   0 },
+  /* GNUCXX0X */  { 1,  1,  1,   0,  0,   1,   1,   1,   1 },
+  /* CXX0X    */  { 1,  1,  1,   0,  1,   1,   1,   1,   1 },
+  /* ASM      */  { 0,  0,  1,   0,  0,   1,   0,   0,   0 }
   /* xid should be 1 for GNUC99, STDC99, GNUCXX, CXX98, GNUCXX0X, and
      CXX0X when no longer experimental (when all uses of identifiers
      in the compiler have been audited for correct handling of
@@ -118,6 +119,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
   CPP_OPTION (pfile, cplusplus_comments)	 = l->cplusplus_comments;
   CPP_OPTION (pfile, digraphs)			 = l->digraphs;
   CPP_OPTION (pfile, uliterals)			 = l->uliterals;
+  CPP_OPTION (pfile, rliterals)			 = l->rliterals;
 }
 
 /* Initialize library global state.  */
@@ -464,6 +466,13 @@ cpp_init_builtins (cpp_reader *pfile, int hosted)
   else if (CPP_OPTION (pfile, c99))
     _cpp_define_builtin (pfile, "__STDC_VERSION__ 199901L");
 
+  if (CPP_OPTION (pfile, uliterals)
+      && !CPP_OPTION (pfile, cplusplus))
+    {
+      _cpp_define_builtin (pfile, "__STDC_UTF_16__ 1");
+      _cpp_define_builtin (pfile, "__STDC_UTF_32__ 1");
+    }
+
   if (hosted)
     _cpp_define_builtin (pfile, "__STDC_HOSTED__ 1");
   else
diff --git a/libcpp/lex.c b/libcpp/lex.c
index d460b98..463b5c8 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -1,6 +1,6 @@
 /* CPP Library - lexical analysis.
-   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010
-   Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010,
+   2011 Free Software Foundation, Inc.
    Contributed by Per Bothner, 1994-95.
    Based on CCCP program by Paul Rubin, June 1986
    Adapted to ANSI C, Richard Stallman, Jan 1987
@@ -2007,18 +2007,20 @@ _cpp_lex_direct (cpp_reader *pfile)
     case 'R':
       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
 	 wide strings or raw strings.  */
-      if (c == 'L' || CPP_OPTION (pfile, uliterals))
+      if (c == 'L' || CPP_OPTION (pfile, rliterals)
+	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
 	{
 	  if ((*buffer->cur == '\'' && c != 'R')
 	      || *buffer->cur == '"'
 	      || (*buffer->cur == 'R'
 		  && c != 'R'
 		  && buffer->cur[1] == '"'
-		  && CPP_OPTION (pfile, uliterals))
+		  && CPP_OPTION (pfile, rliterals))
 	      || (*buffer->cur == '8'
 		  && c == 'u'
 		  && (buffer->cur[1] == '"'
-		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'))))
+		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
+			  && CPP_OPTION (pfile, rliterals)))))
 	    {
 	      lex_string (pfile, result, buffer->cur - 1);
 	      break;