aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorGeoffrey Keating <geoffk@apple.com>2005-03-15 00:36:33 +0000
committerGeoffrey Keating <geoffk@gcc.gnu.org>2005-03-15 00:36:33 +0000
commit50668cf626cf30043890f1000f500ce69a54fedb (patch)
treed3cd092701f32b8f84eec7a95a4e244aafcf795e /gcc
parentcd8b38b9eb3dfdc7709ad0088ff543a3a2df67ec (diff)
downloadgcc-50668cf626cf30043890f1000f500ce69a54fedb.zip
gcc-50668cf626cf30043890f1000f500ce69a54fedb.tar.gz
gcc-50668cf626cf30043890f1000f500ce69a54fedb.tar.bz2
Index: gcc/ChangeLog
2005-03-14 Geoffrey Keating <geoffk@apple.com> * doc/cppopts.texi (-fexec-charset): Add concept index entry. (-fwide-exec-charset): Likewise. (-finput-charset): Likewise. * doc/invoke.texi (Warning Options): Document -Wnormalized=. * c-opts.c (c_common_handle_option): Handle -Wnormalized=. * c.opt (Wnormalized): New. Index: libcpp/ChangeLog 2005-03-14 Geoffrey Keating <geoffk@apple.com> * init.c (cpp_create_reader): Default warn_normalize to normalized_C. * charset.c: Update for new format of ucnid.h. (ucn_valid_in_identifier): Update for new format of ucnid.h. Add NST parameter, and update it; update callers. (cpp_valid_ucn): Add NST parameter, update callers. Replace abort with cpp_error. (convert_ucn): Pass normalize_state to cpp_valid_ucn. * internal.h (struct normalize_state): New. (INITIAL_NORMALIZE_STATE): New. (NORMALIZE_STATE_RESULT): New. (NORMALIZE_STATE_UPDATE_IDNUM): New. (_cpp_valid_ucn): New. * lex.c (warn_about_normalization): New. (forms_identifier_p): Add normalize_state parameter, update callers. (lex_identifier): Add normalize_state parameter, update callers. Keep the state current. (lex_number): Likewise. (_cpp_lex_direct): Pass normalize_state to subroutines. Check it with warn_about_normalization. * makeucnid.c: New. * ucnid.h: Replace. * ucnid.pl: Remove. * ucnid.tab: Make appropriate for input to makeucnid.c. Remove comments about obsolete version of C++. * include/cpplib.h (enum cpp_normalize_level): New. (struct cpp_options): Add warn_normalize field. Index: gcc/testsuite/ChangeLog 2005-03-14 Geoffrey Keating <geoffk@apple.com> * gcc.dg/cpp/normalize-1.c: New. * gcc.dg/cpp/normalize-2.c: New. * gcc.dg/cpp/normalize-3.c: New. * gcc.dg/cpp/normalize-4.c: New. * gcc.dg/cpp/ucnid-4.c: New. * gcc.dg/cpp/ucnid-5.c: New. * g++.dg/cpp/normalize-1.C: New. * g++.dg/cpp/ucnid-1.C: New. From-SVN: r96459
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog9
-rw-r--r--gcc/c-opts.c13
-rw-r--r--gcc/c.opt4
-rw-r--r--gcc/doc/cppopts.texi3
-rw-r--r--gcc/doc/invoke.texi45
-rw-r--r--gcc/testsuite/ChangeLog11
-rw-r--r--gcc/testsuite/g++.dg/cpp/normalize-1.C34
-rw-r--r--gcc/testsuite/g++.dg/cpp/ucnid-1.C17
-rw-r--r--gcc/testsuite/gcc.dg/cpp/normalize-1.c34
-rw-r--r--gcc/testsuite/gcc.dg/cpp/normalize-2.c34
-rw-r--r--gcc/testsuite/gcc.dg/cpp/normalize-3.c34
-rw-r--r--gcc/testsuite/gcc.dg/cpp/normalize-4.c34
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-4.c17
-rw-r--r--gcc/testsuite/gcc.dg/cpp/ucnid-5.c17
14 files changed, 306 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9150a8c..13e2a83 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2005-03-14 Geoffrey Keating <geoffk@apple.com>
+
+ * doc/cppopts.texi (-fexec-charset): Add concept index entry.
+ (-fwide-exec-charset): Likewise.
+ (-finput-charset): Likewise.
+ * doc/invoke.texi (Warning Options): Document -Wnormalized=.
+ * c-opts.c (c_common_handle_option): Handle -Wnormalized=.
+ * c.opt (Wnormalized): New.
+
2005-03-14 Devang Patel <dpatel@apple.com>
* doc/invoke.texi: Add reference to Visibility document.
diff --git a/gcc/c-opts.c b/gcc/c-opts.c
index 128c83a..731511a 100644
--- a/gcc/c-opts.c
+++ b/gcc/c-opts.c
@@ -460,6 +460,19 @@ c_common_handle_option (size_t scode, const char *arg, int value)
cpp_opts->warn_multichar = value;
break;
+ case OPT_Wnormalized_:
+ if (!value || (arg && strcasecmp (arg, "none") == 0))
+ cpp_opts->warn_normalize = normalized_none;
+ else if (!arg || strcasecmp (arg, "nfkc") == 0)
+ cpp_opts->warn_normalize = normalized_KC;
+ else if (strcasecmp (arg, "id") == 0)
+ cpp_opts->warn_normalize = normalized_identifier_C;
+ else if (strcasecmp (arg, "nfc") == 0)
+ cpp_opts->warn_normalize = normalized_C;
+ else
+ error ("argument %qs to %<-Wnormalized%> not recognized", arg);
+ break;
+
case OPT_Wreturn_type:
warn_return_type = value;
break;
diff --git a/gcc/c.opt b/gcc/c.opt
index 2582661..e5260f1 100644
--- a/gcc/c.opt
+++ b/gcc/c.opt
@@ -285,6 +285,10 @@ Wnonnull
C ObjC Var(warn_nonnull)
Warn about NULL being passed to argument slots marked as requiring non-NULL
+Wnormalized=
+C ObjC C++ ObjC++ Joined
+-Wnormalized=<id|nfc|nfkc> Warn about non-normalised Unicode strings
+
Wold-style-cast
C++ ObjC++ Var(warn_old_style_cast)
Warn if a C-style cast is used in a program
diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
index 872cffc..c6376c6 100644
--- a/gcc/doc/cppopts.texi
+++ b/gcc/doc/cppopts.texi
@@ -530,12 +530,14 @@ ignored. The default is 8.
@item -fexec-charset=@var{charset}
@opindex fexec-charset
+@cindex character set, execution
Set the execution character set, used for string and character
constants. The default is UTF-8. @var{charset} can be any encoding
supported by the system's @code{iconv} library routine.
@item -fwide-exec-charset=@var{charset}
@opindex fwide-exec-charset
+@cindex character set, wide execution
Set the wide execution character set, used for wide string and
character constants. The default is UTF-32 or UTF-16, whichever
corresponds to the width of @code{wchar_t}. As with
@@ -545,6 +547,7 @@ problems with encodings that do not fit exactly in @code{wchar_t}.
@item -finput-charset=@var{charset}
@opindex finput-charset
+@cindex character set, input
Set the input character set, used for translation from the character
set of the input file to the source character set used by GCC@. If the
locale does not specify, or GCC cannot get this information from the
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 51cebb5..2e08c4f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -3039,6 +3039,51 @@ Do not warn if a multicharacter constant (@samp{'FOOF'}) is used.
Usually they indicate a typo in the user's code, as they have
implementation-defined values, and should not be used in portable code.
+@item -Wnormalized=<none|id|nfc|nfkc>
+@opindex Wnormalized
+@cindex NFC
+@cindex NFKC
+@cindex character set, input normalization
+In ISO C and ISO C++, two identifiers are different if they are
+different sequences of characters. However, sometimes when characters
+outside the basic ASCII character set are used, you can have two
+different character sequences that look the same. To avoid confusion,
+the ISO 10646 standard sets out some @dfn{normalization rules} which
+when applied ensure that two sequences that look the same are turned into
+the same sequence. GCC can warn you if you are using identifiers which
+have not been normalized; this option controls that warning.
+
+There are four levels of warning that GCC supports. The default is
+@option{-Wnormalized=nfc}, which warns about any identifier which is
+not in the ISO 10646 ``C'' normalized form, @dfn{NFC}. NFC is the
+recommended form for most uses.
+
+Unfortunately, there are some characters which ISO C and ISO C++ allow
+in identifiers that when turned into NFC aren't allowable as
+identifiers. That is, there's no way to use these symbols in portable
+ISO C or C++ and have all your identifiers in NFC.
+@option{-Wnormalized=id} suppresses the warning for these characters.
+It is hoped that future versions of the standards involved will correct
+this, which is why this option is not the default.
+
+You can switch the warning off for all characters by writing
+@option{-Wnormalized=none}. You would only want to do this if you
+were using some other normalization scheme (like ``D''), because
+otherwise you can easily create bugs that are literally impossible to see.
+
+Some characters in ISO 10646 have distinct meanings but look identical
+in some fonts or display methodologies, especially once formatting has
+been applied. For instance @code{\u207F}, ``SUPERSCRIPT LATIN SMALL
+LETTER N'', will display just like a regular @code{n} which has been
+placed in a superscript. ISO 10646 defines the @dfn{NFKC}
+normalisation scheme to convert all these into a standard form as
+well, and GCC will warn if your code is not in NFKC if you use
+@option{-Wnormalized=nfkc}. This warning is comparable to warning
+about every identifier that contains the letter O because it might be
+confused with the digit 0, and so is not the default, but may be
+useful as a local coding convention if the programming environment is
+unable to be fixed to display these characters distinctly.
+
@item -Wno-deprecated-declarations
@opindex Wno-deprecated-declarations
Do not warn about uses of functions, variables, and types marked as
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3053e82..647f155 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2005-03-14 Geoffrey Keating <geoffk@apple.com>
+
+ * gcc.dg/cpp/normalize-1.c: New.
+ * gcc.dg/cpp/normalize-2.c: New.
+ * gcc.dg/cpp/normalize-3.c: New.
+ * gcc.dg/cpp/normalize-4.c: New.
+ * gcc.dg/cpp/ucnid-4.c: New.
+ * gcc.dg/cpp/ucnid-5.c: New.
+ * g++.dg/cpp/normalize-1.C: New.
+ * g++.dg/cpp/ucnid-1.C: New.
+
2005-03-14 Alexandre Oliva <aoliva@redhat.com>
* gcc.dg/pr18628.c: New.
diff --git a/gcc/testsuite/g++.dg/cpp/normalize-1.C b/gcc/testsuite/g++.dg/cpp/normalize-1.C
new file mode 100644
index 0000000..8c49602
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/normalize-1.C
@@ -0,0 +1,34 @@
+/* { dg-do preprocess } */
+/* { dg-options "-Wnormalized=id" } */
+
+\u00AA
+\u00B7
+\u0F43 /* { dg-warning "not in NFC" } */
+a\u05B8\u05B9\u05B9\u05BBb
+ a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */
+\u09CB
+\u09C7\u09BE /* { dg-warning "not in NFC" } */
+\u0B4B
+\u0B47\u0B3E /* { dg-warning "not in NFC" } */
+\u0BCA
+\u0BC6\u0BBE /* { dg-warning "not in NFC" } */
+\u0BCB
+\u0BC7\u0BBE /* { dg-warning "not in NFC" } */
+\u0CCA
+\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */
+\u0D4A
+\u0D46\u0D3E /* { dg-warning "not in NFC" } */
+\u0D4B
+\u0D47\u0D3E /* { dg-warning "not in NFC" } */
+
+K
+\u212A
+
+\u03AC
+\u1F71 /* { dg-warning "not in NFC" } */
+
+\uAC00
+\u1100\u1161
+\uAC01
+\u1100\u1161\u11A8
+\uAC00\u11A8
diff --git a/gcc/testsuite/g++.dg/cpp/ucnid-1.C b/gcc/testsuite/g++.dg/cpp/ucnid-1.C
new file mode 100644
index 0000000..ccbb1ea
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/ucnid-1.C
@@ -0,0 +1,17 @@
+/* { dg-do preprocess } */
+/* { dg-options "-pedantic" } */
+
+\u00AA /* { dg-error "not valid in an identifier" } */
+\u00AB /* { dg-error "not valid in an identifier" } */
+\u00B6 /* { dg-error "not valid in an identifier" } */
+\u00BA /* { dg-error "not valid in an identifier" } */
+\u00C0
+\u00D6
+\u0384
+
+\u0669 /* { dg-error "not valid in an identifier" } */
+A\u0669 /* { dg-error "not valid in an identifier" } */
+0\u00BA /* { dg-error "not valid in an identifier" } */
+0\u0669 /* { dg-error "not valid in an identifier" } */
+\u0E59
+A\u0E59
diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-1.c b/gcc/testsuite/gcc.dg/cpp/normalize-1.c
new file mode 100644
index 0000000..768e193
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/normalize-1.c
@@ -0,0 +1,34 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99" } */
+
+\u00AA
+\u00B7
+\u0F43 /* { dg-warning "not in NFC" } */
+a\u05B8\u05B9\u05B9\u05BBb
+ a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */
+\u09CB
+\u09C7\u09BE /* { dg-warning "not in NFC" } */
+\u0B4B
+\u0B47\u0B3E /* { dg-warning "not in NFC" } */
+\u0BCA
+\u0BC6\u0BBE /* { dg-warning "not in NFC" } */
+\u0BCB
+\u0BC7\u0BBE /* { dg-warning "not in NFC" } */
+\u0CCA
+\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */
+\u0D4A
+\u0D46\u0D3E /* { dg-warning "not in NFC" } */
+\u0D4B
+\u0D47\u0D3E /* { dg-warning "not in NFC" } */
+
+K
+\u212A /* { dg-warning "not in NFC" } */
+
+\u03AC
+\u1F71 /* { dg-warning "not in NFC" } */
+
+\uAC00
+\u1100\u1161 /* { dg-warning "not in NFC" } */
+\uAC01
+\u1100\u1161\u11A8 /* { dg-warning "not in NFC" } */
+\uAC00\u11A8 /* { dg-warning "not in NFC" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-2.c b/gcc/testsuite/gcc.dg/cpp/normalize-2.c
new file mode 100644
index 0000000..28ef2f1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/normalize-2.c
@@ -0,0 +1,34 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -Wnormalized=nfkc" } */
+
+\u00AA /* { dg-warning "not in NFKC" } */
+\u00B7
+\u0F43 /* { dg-warning "not in NFC" } */
+a\u05B8\u05B9\u05B9\u05BBb
+ a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */
+\u09CB
+\u09C7\u09BE /* { dg-warning "not in NFC" } */
+\u0B4B
+\u0B47\u0B3E /* { dg-warning "not in NFC" } */
+\u0BCA
+\u0BC6\u0BBE /* { dg-warning "not in NFC" } */
+\u0BCB
+\u0BC7\u0BBE /* { dg-warning "not in NFC" } */
+\u0CCA
+\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */
+\u0D4A
+\u0D46\u0D3E /* { dg-warning "not in NFC" } */
+\u0D4B
+\u0D47\u0D3E /* { dg-warning "not in NFC" } */
+
+K
+\u212A /* { dg-warning "not in NFC" } */
+
+\u03AC
+\u1F71 /* { dg-warning "not in NFC" } */
+
+\uAC00
+\u1100\u1161 /* { dg-warning "not in NFC" } */
+\uAC01
+\u1100\u1161\u11A8 /* { dg-warning "not in NFC" } */
+\uAC00\u11A8 /* { dg-warning "not in NFC" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-3.c b/gcc/testsuite/gcc.dg/cpp/normalize-3.c
new file mode 100644
index 0000000..0407492
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/normalize-3.c
@@ -0,0 +1,34 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -Wnormalized=id" } */
+
+\u00AA
+\u00B7
+\u0F43 /* { dg-warning "not in NFC" } */
+a\u05B8\u05B9\u05B9\u05BBb
+ a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */
+\u09CB
+\u09C7\u09BE /* { dg-warning "not in NFC" } */
+\u0B4B
+\u0B47\u0B3E /* { dg-warning "not in NFC" } */
+\u0BCA
+\u0BC6\u0BBE /* { dg-warning "not in NFC" } */
+\u0BCB
+\u0BC7\u0BBE /* { dg-warning "not in NFC" } */
+\u0CCA
+\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */
+\u0D4A
+\u0D46\u0D3E /* { dg-warning "not in NFC" } */
+\u0D4B
+\u0D47\u0D3E /* { dg-warning "not in NFC" } */
+
+K
+\u212A
+
+\u03AC
+\u1F71 /* { dg-warning "not in NFC" } */
+
+\uAC00
+\u1100\u1161
+\uAC01
+\u1100\u1161\u11A8
+\uAC00\u11A8
diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-4.c b/gcc/testsuite/gcc.dg/cpp/normalize-4.c
new file mode 100644
index 0000000..1ee3ff5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/normalize-4.c
@@ -0,0 +1,34 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -Wnormalized=none" } */
+
+\u00AA
+\u00B7
+\u0F43
+a\u05B8\u05B9\u05B9\u05BBb
+ a\u05BB\u05B9\u05B8\u05B9b
+\u09CB
+\u09C7\u09BE
+\u0B4B
+\u0B47\u0B3E
+\u0BCA
+\u0BC6\u0BBE
+\u0BCB
+\u0BC7\u0BBE
+\u0CCA
+\u0CC6\u0CC2
+\u0D4A
+\u0D46\u0D3E
+\u0D4B
+\u0D47\u0D3E
+
+K
+\u212A
+
+\u03AC
+\u1F71
+
+\uAC00
+\u1100\u1161
+\uAC01
+\u1100\u1161\u11A8
+\uAC00\u11A8
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-4.c b/gcc/testsuite/gcc.dg/cpp/ucnid-4.c
new file mode 100644
index 0000000..e41a3f5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-4.c
@@ -0,0 +1,17 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99" } */
+
+\u00AA
+\u00AB /* { dg-error "not valid in an identifier" } */
+\u00B6 /* { dg-error "not valid in an identifier" } */
+\u00BA
+\u00C0
+\u00D6
+\u0384
+
+\u0669 /* { dg-error "not valid at the start of an identifier" } */
+A\u0669
+0\u00BA
+0\u0669
+\u0E59 /* { dg-error "not valid at the start of an identifier" } */
+A\u0E59
diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-5.c b/gcc/testsuite/gcc.dg/cpp/ucnid-5.c
new file mode 100644
index 0000000..8fcaeac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/ucnid-5.c
@@ -0,0 +1,17 @@
+/* { dg-do preprocess } */
+/* { dg-options "-std=c99 -pedantic" } */
+
+\u00AA
+\u00AB /* { dg-error "not valid in an identifier" } */
+\u00B6 /* { dg-error "not valid in an identifier" } */
+\u00BA
+\u00C0
+\u00D6
+\u0384 /* { dg-error "not valid in an identifier" } */
+
+\u0669 /* { dg-error "not valid at the start of an identifier" } */
+A\u0669
+0\u00BA
+0\u0669
+\u0E59 /* { dg-error "not valid at the start of an identifier" } */
+A\u0E59