aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJoseph Myers <joseph@codesourcery.com>2014-11-06 21:08:52 +0000
committerJoseph Myers <jsm28@gcc.gnu.org>2014-11-06 21:08:52 +0000
commitbe5ffc59ad88c5cd1cf6f568ec73001228cf28d5 (patch)
treed1827c82a91ebb41554320d1b791e364d6dd406d /gcc
parent2f1b0141be5581d0795c3369dcb9eaf27abe35f7 (diff)
downloadgcc-be5ffc59ad88c5cd1cf6f568ec73001228cf28d5.zip
gcc-be5ffc59ad88c5cd1cf6f568ec73001228cf28d5.tar.gz
gcc-be5ffc59ad88c5cd1cf6f568ec73001228cf28d5.tar.bz2
Preserve original spellings of extended identifiers.
This patch makes cpplib track the original spellings of extended identifiers, as well as the canonical UTF-8 version, in order to follow standard semantics properly without needing a convoluted and undocumented canonicalization in translation phase 1 (see bug 9449 comments 39-46 regarding such a canonicalization). The spelling is tracked in cpp_identifier and cpp_macro_arg without making cpp_token any larger. The original spelling is used for checks of duplicate macro definitions, stringizing (see the C++ tests added; this case is only an issue for C++ not C because C makes it implementation-defined whether a \ is inserted before the \ of a UCN in a string or character constant when stringizing, while C++ does not), pasting (relevant when the result is then stringized for C++) and when macro definitions are output as text (e.g. for -d options). Once a macro has been defined, only the original spelling of the argument names needs keeping in the argument list. While it is being defined, however, both spellings are needed: the original one for subsequent saving for checks of duplicate macro definitions, and the canonical one which is the node marked specially to generate macro argument tokens rather than normal identifier tokens. The buffer that is used to save the original values of the identifier tokens is changed so that it stores both those original values and a pointer to the canonical hash nodes, so that those canonical nodes can be found when their values need restoring after the macro definition has been parsed. I believe this covers the known standards issues in extended identifiers support (the remaining unimplemented C99 areas in GCC all being floating-point-related), except for C++ translation of extended characters to UCNs in phase 1 (which I have no plans to work on). There are however probably issues left with handling of extended identifiers in other places, as listed in <https://gcc.gnu.org/ml/gcc-patches/2014-11/msg00337.html> (those issues are generally the sort of thing that could be addressed as bugs outside development stage 1). (The bulk of the potential issues Zack was concerned about in 2003-5, that resulted in extended identifiers being disabled in the absence of -fextended-identifiers, were effectively eliminated by the audit and fixes I did in 2009, however; that todo list reflects what was left over after that audit.) Bootstrapped with no regressions on x86_64-unknown-linux-gnu. libcpp: * include/cpp-id-data.h (struct cpp_macro): Update comment regarding parameters. * include/cpplib.h (struct cpp_macro_arg, struct cpp_identifier): Add spelling fields. (struct cpp_token): Update comment on macro_arg. * internal.h (_cpp_save_parameter): Add extra argument. (_cpp_spell_ident_ucns): New declaration. * lex.c (lex_identifier): Add SPELLING argument. Set *SPELLING to original spelling of identifier. (_cpp_lex_direct): Update calls to lex_identifier. (_cpp_spell_ident_ucns): New function, factored out of cpp_spell_token. (cpp_spell_token): Adjust FORSTRING argument semantics to return original spelling of identifiers. Use _cpp_spell_ident_ucns in !FORSTRING case. (_cpp_equiv_tokens): Check spellings of identifiers and macro arguments are identical. * macro.c (macro_arg_saved_data): New structure. (paste_tokens): Use original spellings of identifiers from cpp_spell_token. (_cpp_save_parameter): Add argument SPELLING. Save both canonical node and its value. (parse_params): Update calls to _cpp_save_parameter. (lex_expansion_token): Save spelling of macro argument tokens. (_cpp_create_definition): Extract canonical node from saved data. (cpp_macro_definition): Use UCNs in spelling of macro name. Use original spellings of macro argument tokens and identifiers. * traditional.c (scan_parameters): Update call to _cpp_save_parameter. gcc: * doc/invoke.texi (-std=c99, -std=c11): Don't refer to corner cases of extended identifiers. gcc/testsuite: * g++.dg/cpp/ucnid-2.C, g++.dg/cpp/ucnid-3.C, gcc.dg/cpp/ucnid-11.c, gcc.dg/cpp/ucnid-12.c, gcc.dg/cpp/ucnid-13.c, gcc.dg/cpp/ucnid-14.c, gcc.dg/cpp/ucnid-15.c: New tests. From-SVN: r217202
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/doc/invoke.texi5
-rw-r--r--gcc/testsuite/ChangeLog7
-rw-r--r--gcc/testsuite/g++.dg/cpp/ucnid-2.C17
-rw-r--r--gcc/testsuite/g++.dg/cpp/ucnid-3.C16
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-11.c30
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-12.c13
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-13.c6
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-14.c6
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-15.c6
10 files changed, 108 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 57976f5..320e0d6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2014-11-06 Joseph Myers <joseph@codesourcery.com>
+
+ * doc/invoke.texi (-std=c99, -std=c11): Don't refer to corner
+ cases of extended identifiers.
+
2014-11-06 Eric Botcazou <ebotcazou@adacore.com>
* tree-cfgcleanup.c (fixup_noreturn_call): Do not perform DCE here.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 2f89284..0819804 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1674,7 +1674,7 @@ ISO C90 as modified in amendment 1.
@itemx iso9899:1999
@itemx iso9899:199x
ISO C99. This standard is substantially completely supported, modulo
-bugs, corner cases of extended identifiers and floating-point issues
+bugs and floating-point issues
(mainly but not entirely relating to optional C99 features from
Annexes F and G). See
@w{@uref{http://gcc.gnu.org/c99status.html}} for more information. The
@@ -1684,8 +1684,7 @@ names @samp{c9x} and @samp{iso9899:199x} are deprecated.
@itemx c1x
@itemx iso9899:2011
ISO C11, the 2011 revision of the ISO C standard. This standard is
-substantially completely supported, modulo bugs, corner cases of
-extended identifiers, floating-point issues
+substantially completely supported, modulo bugs, floating-point issues
(mainly but not entirely relating to optional C11 features from
Annexes F and G) and the optional Annexes K (Bounds-checking
interfaces) and L (Analyzability). The name @samp{c1x} is deprecated.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bc50a71..b1397f1 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2014-11-06 Joseph Myers <joseph@codesourcery.com>
+
+ * g++.dg/cpp/ucnid-2.C, g++.dg/cpp/ucnid-3.C,
+ gcc.dg/cpp/ucnid-11.c, gcc.dg/cpp/ucnid-12.c,
+ gcc.dg/cpp/ucnid-13.c, gcc.dg/cpp/ucnid-14.c,
+ gcc.dg/cpp/ucnid-15.c: New tests.
+
2014-11-06 Eric Botcazou <ebotcazou@adacore.com>
* gnat.dg/opt43.adb: New test.
diff --git a/gcc/testsuite/g++.dg/cpp/ucnid-2.C b/gcc/testsuite/g++.dg/cpp/ucnid-2.C
new file mode 100644
index 0000000..2ffb3c9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/ucnid-2.C
@@ -0,0 +1,17 @@
+/* Test stringization of identifiers with UCNs preserves spelling. */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define h(s) #s
+#define str(s) h(s)
+
+int
+main ()
+{
+ if (strcmp (str (str (\u00c1)), "\"\\u00c1\""))
+ abort ();
+ if (strcmp (str (str (\u00C1)), "\"\\u00C1\""))
+ abort ();
+}
diff --git a/gcc/testsuite/g++.dg/cpp/ucnid-3.C b/gcc/testsuite/g++.dg/cpp/ucnid-3.C
new file mode 100644
index 0000000..0db9aaa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/ucnid-3.C
@@ -0,0 +1,16 @@
+/* Test pasting of identifiers with UCNs preserves spelling. */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <string.h>
+
+#define c(s1, s2) s1 ## s2
+#define h(s) #s
+#define str(s) h(s)
+
+int
+main ()
+{
+ if (strcmp (str (str (c (\u00c1, \u00C1))), "\"\\u00c1\\u00C1\""))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-11.c b/gcc/testsuite/gcc.dg/cpp/ucnid-11.c
new file mode 100644
index 0000000..a44a3ea
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-11.c
@@ -0,0 +1,30 @@
+/* Test spelling differences in UCNs are properly diagnosed for macro
+ redefinitions. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -pedantic-errors" } */
+
+/* Different spelling of UCN in expansion. */
+#define m1 \u00c1 /* { dg-message "previous definition" } */
+#define m1 \u00C1 /* { dg-error "redefined" } */
+
+#define m1ok \u00c1
+#define m1ok \u00c1
+
+/* Different spelling of UCN in argument name. */
+#define m2(\u00c1) /* { dg-message "previous definition" } */
+#define m2(\u00C1) /* { dg-error "redefined" } */
+
+#define m2ok(\u00c1)
+#define m2ok(\u00c1)
+
+/* Same spelling in argument name but different spelling when used in
+ expansion. */
+#define m3(\u00c1) \u00c1 /* { dg-message "previous definition" } */
+#define m3(\u00c1) \u00C1 /* { dg-error "redefined" } */
+
+#define m3ok(\u00c1) \u00C1
+#define m3ok(\u00c1) \u00C1
+
+/* Different spelling of the macro name itself is OK. */
+#define m4ok\u00c1
+#define m4ok\u00C1
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-12.c b/gcc/testsuite/gcc.dg/cpp/ucnid-12.c
new file mode 100644
index 0000000..2932768
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-12.c
@@ -0,0 +1,13 @@
+/* Test spelling differences in UCNs in macro definitions still count
+ as the same identifier for macro expansion. */
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -pedantic-errors" } */
+
+#define m1\u00c1
+#ifndef m1\u00C1
+#error not defined
+#endif
+
+#define m2(\u00c1) \u00C1
+
+int i = m2 (0);
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-13.c b/gcc/testsuite/gcc.dg/cpp/ucnid-13.c
new file mode 100644
index 0000000..b8778fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-13.c
@@ -0,0 +1,6 @@
+/* Verify macros named with UCNs are output in -dD output with UCNs,
+ not UTF-8. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-13.i "\\\\U000000c1" } } */
+#define \u00c1 1
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-14.c b/gcc/testsuite/gcc.dg/cpp/ucnid-14.c
new file mode 100644
index 0000000..4653aff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-14.c
@@ -0,0 +1,6 @@
+/* Verify macro definitions with UCNs are output in -dD output with
+ the original spelling. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-14.i "\\\\u00c1" } } */
+#define a \u00c1
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-15.c b/gcc/testsuite/gcc.dg/cpp/ucnid-15.c
new file mode 100644
index 0000000..2d73292
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-15.c
@@ -0,0 +1,6 @@
+/* Verify macro definitions with UCNs in argument names are output in
+ -dD output with the original spelling. */
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -dD" } */
+/* { dg-final { scan-file ucnid-15.i "#define a\\(\\\\u00c1\\) x:\\\\u00C1:y:\\\\u00c1:z" } } */
+#define a(\u00c1) x:\u00C1:y:\u00c1:z