diff options
author | Geoffrey Keating <geoffk@apple.com> | 2005-03-15 00:36:33 +0000 |
---|---|---|
committer | Geoffrey Keating <geoffk@gcc.gnu.org> | 2005-03-15 00:36:33 +0000 |
commit | 50668cf626cf30043890f1000f500ce69a54fedb (patch) | |
tree | d3cd092701f32b8f84eec7a95a4e244aafcf795e /gcc | |
parent | cd8b38b9eb3dfdc7709ad0088ff543a3a2df67ec (diff) | |
download | gcc-50668cf626cf30043890f1000f500ce69a54fedb.zip gcc-50668cf626cf30043890f1000f500ce69a54fedb.tar.gz gcc-50668cf626cf30043890f1000f500ce69a54fedb.tar.bz2 |
Index: gcc/ChangeLog
2005-03-14 Geoffrey Keating <geoffk@apple.com>
* doc/cppopts.texi (-fexec-charset): Add concept index entry.
(-fwide-exec-charset): Likewise.
(-finput-charset): Likewise.
* doc/invoke.texi (Warning Options): Document -Wnormalized=.
* c-opts.c (c_common_handle_option): Handle -Wnormalized=.
* c.opt (Wnormalized): New.
Index: libcpp/ChangeLog
2005-03-14 Geoffrey Keating <geoffk@apple.com>
* init.c (cpp_create_reader): Default warn_normalize to normalized_C.
* charset.c: Update for new format of ucnid.h.
(ucn_valid_in_identifier): Update for new format of ucnid.h.
Add NST parameter, and update it; update callers.
(cpp_valid_ucn): Add NST parameter, update callers. Replace abort
with cpp_error.
(convert_ucn): Pass normalize_state to cpp_valid_ucn.
* internal.h (struct normalize_state): New.
(INITIAL_NORMALIZE_STATE): New.
(NORMALIZE_STATE_RESULT): New.
(NORMALIZE_STATE_UPDATE_IDNUM): New.
(_cpp_valid_ucn): New.
* lex.c (warn_about_normalization): New.
(forms_identifier_p): Add normalize_state parameter, update callers.
(lex_identifier): Add normalize_state parameter, update callers. Keep
the state current.
(lex_number): Likewise.
(_cpp_lex_direct): Pass normalize_state to subroutines. Check
it with warn_about_normalization.
* makeucnid.c: New.
* ucnid.h: Replace.
* ucnid.pl: Remove.
* ucnid.tab: Make appropriate for input to makeucnid.c. Remove
comments about obsolete version of C++.
* include/cpplib.h (enum cpp_normalize_level): New.
(struct cpp_options): Add warn_normalize field.
Index: gcc/testsuite/ChangeLog
2005-03-14 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/cpp/normalize-1.c: New.
* gcc.dg/cpp/normalize-2.c: New.
* gcc.dg/cpp/normalize-3.c: New.
* gcc.dg/cpp/normalize-4.c: New.
* gcc.dg/cpp/ucnid-4.c: New.
* gcc.dg/cpp/ucnid-5.c: New.
* g++.dg/cpp/normalize-1.C: New.
* g++.dg/cpp/ucnid-1.C: New.
From-SVN: r96459
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 9 | ||||
-rw-r--r-- | gcc/c-opts.c | 13 | ||||
-rw-r--r-- | gcc/c.opt | 4 | ||||
-rw-r--r-- | gcc/doc/cppopts.texi | 3 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 45 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 11 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp/normalize-1.C | 34 | ||||
-rw-r--r-- | gcc/testsuite/g++.dg/cpp/ucnid-1.C | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/normalize-1.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/normalize-2.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/normalize-3.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/normalize-4.c | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/ucnid-4.c | 17 | ||||
-rw-r--r-- | gcc/testsuite/gcc.dg/cpp/ucnid-5.c | 17 |
14 files changed, 306 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9150a8c..13e2a83 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2005-03-14 Geoffrey Keating <geoffk@apple.com> + + * doc/cppopts.texi (-fexec-charset): Add concept index entry. + (-fwide-exec-charset): Likewise. + (-finput-charset): Likewise. + * doc/invoke.texi (Warning Options): Document -Wnormalized=. + * c-opts.c (c_common_handle_option): Handle -Wnormalized=. + * c.opt (Wnormalized): New. + 2005-03-14 Devang Patel <dpatel@apple.com> * doc/invoke.texi: Add reference to Visibility document. diff --git a/gcc/c-opts.c b/gcc/c-opts.c index 128c83a..731511a 100644 --- a/gcc/c-opts.c +++ b/gcc/c-opts.c @@ -460,6 +460,19 @@ c_common_handle_option (size_t scode, const char *arg, int value) cpp_opts->warn_multichar = value; break; + case OPT_Wnormalized_: + if (!value || (arg && strcasecmp (arg, "none") == 0)) + cpp_opts->warn_normalize = normalized_none; + else if (!arg || strcasecmp (arg, "nfkc") == 0) + cpp_opts->warn_normalize = normalized_KC; + else if (strcasecmp (arg, "id") == 0) + cpp_opts->warn_normalize = normalized_identifier_C; + else if (strcasecmp (arg, "nfc") == 0) + cpp_opts->warn_normalize = normalized_C; + else + error ("argument %qs to %<-Wnormalized%> not recognized", arg); + break; + case OPT_Wreturn_type: warn_return_type = value; break; @@ -285,6 +285,10 @@ Wnonnull C ObjC Var(warn_nonnull) Warn about NULL being passed to argument slots marked as requiring non-NULL +Wnormalized= +C ObjC C++ ObjC++ Joined +-Wnormalized=<id|nfc|nfkc> Warn about non-normalised Unicode strings + Wold-style-cast C++ ObjC++ Var(warn_old_style_cast) Warn if a C-style cast is used in a program diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi index 872cffc..c6376c6 100644 --- a/gcc/doc/cppopts.texi +++ b/gcc/doc/cppopts.texi @@ -530,12 +530,14 @@ ignored. The default is 8. @item -fexec-charset=@var{charset} @opindex fexec-charset +@cindex character set, execution Set the execution character set, used for string and character constants. The default is UTF-8. @var{charset} can be any encoding supported by the system's @code{iconv} library routine. @item -fwide-exec-charset=@var{charset} @opindex fwide-exec-charset +@cindex character set, wide execution Set the wide execution character set, used for wide string and character constants. The default is UTF-32 or UTF-16, whichever corresponds to the width of @code{wchar_t}. As with @@ -545,6 +547,7 @@ problems with encodings that do not fit exactly in @code{wchar_t}. @item -finput-charset=@var{charset} @opindex finput-charset +@cindex character set, input Set the input character set, used for translation from the character set of the input file to the source character set used by GCC@. If the locale does not specify, or GCC cannot get this information from the diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 51cebb5..2e08c4f 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -3039,6 +3039,51 @@ Do not warn if a multicharacter constant (@samp{'FOOF'}) is used. Usually they indicate a typo in the user's code, as they have implementation-defined values, and should not be used in portable code. +@item -Wnormalized=<none|id|nfc|nfkc> +@opindex Wnormalized +@cindex NFC +@cindex NFKC +@cindex character set, input normalization +In ISO C and ISO C++, two identifiers are different if they are +different sequences of characters. However, sometimes when characters +outside the basic ASCII character set are used, you can have two +different character sequences that look the same. To avoid confusion, +the ISO 10646 standard sets out some @dfn{normalization rules} which +when applied ensure that two sequences that look the same are turned into +the same sequence. GCC can warn you if you are using identifiers which +have not been normalized; this option controls that warning. + +There are four levels of warning that GCC supports. The default is +@option{-Wnormalized=nfc}, which warns about any identifier which is +not in the ISO 10646 ``C'' normalized form, @dfn{NFC}. NFC is the +recommended form for most uses. + +Unfortunately, there are some characters which ISO C and ISO C++ allow +in identifiers that when turned into NFC aren't allowable as +identifiers. That is, there's no way to use these symbols in portable +ISO C or C++ and have all your identifiers in NFC. +@option{-Wnormalized=id} suppresses the warning for these characters. +It is hoped that future versions of the standards involved will correct +this, which is why this option is not the default. + +You can switch the warning off for all characters by writing +@option{-Wnormalized=none}. You would only want to do this if you +were using some other normalization scheme (like ``D''), because +otherwise you can easily create bugs that are literally impossible to see. + +Some characters in ISO 10646 have distinct meanings but look identical +in some fonts or display methodologies, especially once formatting has +been applied. For instance @code{\u207F}, ``SUPERSCRIPT LATIN SMALL +LETTER N'', will display just like a regular @code{n} which has been +placed in a superscript. ISO 10646 defines the @dfn{NFKC} +normalisation scheme to convert all these into a standard form as +well, and GCC will warn if your code is not in NFKC if you use +@option{-Wnormalized=nfkc}. This warning is comparable to warning +about every identifier that contains the letter O because it might be +confused with the digit 0, and so is not the default, but may be +useful as a local coding convention if the programming environment is +unable to be fixed to display these characters distinctly. + @item -Wno-deprecated-declarations @opindex Wno-deprecated-declarations Do not warn about uses of functions, variables, and types marked as diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 3053e82..647f155 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2005-03-14 Geoffrey Keating <geoffk@apple.com> + + * gcc.dg/cpp/normalize-1.c: New. + * gcc.dg/cpp/normalize-2.c: New. + * gcc.dg/cpp/normalize-3.c: New. + * gcc.dg/cpp/normalize-4.c: New. + * gcc.dg/cpp/ucnid-4.c: New. + * gcc.dg/cpp/ucnid-5.c: New. + * g++.dg/cpp/normalize-1.C: New. + * g++.dg/cpp/ucnid-1.C: New. + 2005-03-14 Alexandre Oliva <aoliva@redhat.com> * gcc.dg/pr18628.c: New. diff --git a/gcc/testsuite/g++.dg/cpp/normalize-1.C b/gcc/testsuite/g++.dg/cpp/normalize-1.C new file mode 100644 index 0000000..8c49602 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/normalize-1.C @@ -0,0 +1,34 @@ +/* { dg-do preprocess } */ +/* { dg-options "-Wnormalized=id" } */ + +\u00AA +\u00B7 +\u0F43 /* { dg-warning "not in NFC" } */ +a\u05B8\u05B9\u05B9\u05BBb + a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */ +\u09CB +\u09C7\u09BE /* { dg-warning "not in NFC" } */ +\u0B4B +\u0B47\u0B3E /* { dg-warning "not in NFC" } */ +\u0BCA +\u0BC6\u0BBE /* { dg-warning "not in NFC" } */ +\u0BCB +\u0BC7\u0BBE /* { dg-warning "not in NFC" } */ +\u0CCA +\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */ +\u0D4A +\u0D46\u0D3E /* { dg-warning "not in NFC" } */ +\u0D4B +\u0D47\u0D3E /* { dg-warning "not in NFC" } */ + +K +\u212A + +\u03AC +\u1F71 /* { dg-warning "not in NFC" } */ + +\uAC00 +\u1100\u1161 +\uAC01 +\u1100\u1161\u11A8 +\uAC00\u11A8 diff --git a/gcc/testsuite/g++.dg/cpp/ucnid-1.C b/gcc/testsuite/g++.dg/cpp/ucnid-1.C new file mode 100644 index 0000000..ccbb1ea --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp/ucnid-1.C @@ -0,0 +1,17 @@ +/* { dg-do preprocess } */ +/* { dg-options "-pedantic" } */ + +\u00AA /* { dg-error "not valid in an identifier" } */ +\u00AB /* { dg-error "not valid in an identifier" } */ +\u00B6 /* { dg-error "not valid in an identifier" } */ +\u00BA /* { dg-error "not valid in an identifier" } */ +\u00C0 +\u00D6 +\u0384 + +\u0669 /* { dg-error "not valid in an identifier" } */ +A\u0669 /* { dg-error "not valid in an identifier" } */ +0\u00BA /* { dg-error "not valid in an identifier" } */ +0\u0669 /* { dg-error "not valid in an identifier" } */ +\u0E59 +A\u0E59 diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-1.c b/gcc/testsuite/gcc.dg/cpp/normalize-1.c new file mode 100644 index 0000000..768e193 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/normalize-1.c @@ -0,0 +1,34 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99" } */ + +\u00AA +\u00B7 +\u0F43 /* { dg-warning "not in NFC" } */ +a\u05B8\u05B9\u05B9\u05BBb + a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */ +\u09CB +\u09C7\u09BE /* { dg-warning "not in NFC" } */ +\u0B4B +\u0B47\u0B3E /* { dg-warning "not in NFC" } */ +\u0BCA +\u0BC6\u0BBE /* { dg-warning "not in NFC" } */ +\u0BCB +\u0BC7\u0BBE /* { dg-warning "not in NFC" } */ +\u0CCA +\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */ +\u0D4A +\u0D46\u0D3E /* { dg-warning "not in NFC" } */ +\u0D4B +\u0D47\u0D3E /* { dg-warning "not in NFC" } */ + +K +\u212A /* { dg-warning "not in NFC" } */ + +\u03AC +\u1F71 /* { dg-warning "not in NFC" } */ + +\uAC00 +\u1100\u1161 /* { dg-warning "not in NFC" } */ +\uAC01 +\u1100\u1161\u11A8 /* { dg-warning "not in NFC" } */ +\uAC00\u11A8 /* { dg-warning "not in NFC" } */ diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-2.c b/gcc/testsuite/gcc.dg/cpp/normalize-2.c new file mode 100644 index 0000000..28ef2f1 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/normalize-2.c @@ -0,0 +1,34 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99 -Wnormalized=nfkc" } */ + +\u00AA /* { dg-warning "not in NFKC" } */ +\u00B7 +\u0F43 /* { dg-warning "not in NFC" } */ +a\u05B8\u05B9\u05B9\u05BBb + a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */ +\u09CB +\u09C7\u09BE /* { dg-warning "not in NFC" } */ +\u0B4B +\u0B47\u0B3E /* { dg-warning "not in NFC" } */ +\u0BCA +\u0BC6\u0BBE /* { dg-warning "not in NFC" } */ +\u0BCB +\u0BC7\u0BBE /* { dg-warning "not in NFC" } */ +\u0CCA +\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */ +\u0D4A +\u0D46\u0D3E /* { dg-warning "not in NFC" } */ +\u0D4B +\u0D47\u0D3E /* { dg-warning "not in NFC" } */ + +K +\u212A /* { dg-warning "not in NFC" } */ + +\u03AC +\u1F71 /* { dg-warning "not in NFC" } */ + +\uAC00 +\u1100\u1161 /* { dg-warning "not in NFC" } */ +\uAC01 +\u1100\u1161\u11A8 /* { dg-warning "not in NFC" } */ +\uAC00\u11A8 /* { dg-warning "not in NFC" } */ diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-3.c b/gcc/testsuite/gcc.dg/cpp/normalize-3.c new file mode 100644 index 0000000..0407492 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/normalize-3.c @@ -0,0 +1,34 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99 -Wnormalized=id" } */ + +\u00AA +\u00B7 +\u0F43 /* { dg-warning "not in NFC" } */ +a\u05B8\u05B9\u05B9\u05BBb + a\u05BB\u05B9\u05B8\u05B9b /* { dg-warning "not in NFC" } */ +\u09CB +\u09C7\u09BE /* { dg-warning "not in NFC" } */ +\u0B4B +\u0B47\u0B3E /* { dg-warning "not in NFC" } */ +\u0BCA +\u0BC6\u0BBE /* { dg-warning "not in NFC" } */ +\u0BCB +\u0BC7\u0BBE /* { dg-warning "not in NFC" } */ +\u0CCA +\u0CC6\u0CC2 /* { dg-warning "not in NFC" } */ +\u0D4A +\u0D46\u0D3E /* { dg-warning "not in NFC" } */ +\u0D4B +\u0D47\u0D3E /* { dg-warning "not in NFC" } */ + +K +\u212A + +\u03AC +\u1F71 /* { dg-warning "not in NFC" } */ + +\uAC00 +\u1100\u1161 +\uAC01 +\u1100\u1161\u11A8 +\uAC00\u11A8 diff --git a/gcc/testsuite/gcc.dg/cpp/normalize-4.c b/gcc/testsuite/gcc.dg/cpp/normalize-4.c new file mode 100644 index 0000000..1ee3ff5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/normalize-4.c @@ -0,0 +1,34 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99 -Wnormalized=none" } */ + +\u00AA +\u00B7 +\u0F43 +a\u05B8\u05B9\u05B9\u05BBb + a\u05BB\u05B9\u05B8\u05B9b +\u09CB +\u09C7\u09BE +\u0B4B +\u0B47\u0B3E +\u0BCA +\u0BC6\u0BBE +\u0BCB +\u0BC7\u0BBE +\u0CCA +\u0CC6\u0CC2 +\u0D4A +\u0D46\u0D3E +\u0D4B +\u0D47\u0D3E + +K +\u212A + +\u03AC +\u1F71 + +\uAC00 +\u1100\u1161 +\uAC01 +\u1100\u1161\u11A8 +\uAC00\u11A8 diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-4.c b/gcc/testsuite/gcc.dg/cpp/ucnid-4.c new file mode 100644 index 0000000..e41a3f5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucnid-4.c @@ -0,0 +1,17 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99" } */ + +\u00AA +\u00AB /* { dg-error "not valid in an identifier" } */ +\u00B6 /* { dg-error "not valid in an identifier" } */ +\u00BA +\u00C0 +\u00D6 +\u0384 + +\u0669 /* { dg-error "not valid at the start of an identifier" } */ +A\u0669 +0\u00BA +0\u0669 +\u0E59 /* { dg-error "not valid at the start of an identifier" } */ +A\u0E59 diff --git a/gcc/testsuite/gcc.dg/cpp/ucnid-5.c b/gcc/testsuite/gcc.dg/cpp/ucnid-5.c new file mode 100644 index 0000000..8fcaeac --- /dev/null +++ b/gcc/testsuite/gcc.dg/cpp/ucnid-5.c @@ -0,0 +1,17 @@ +/* { dg-do preprocess } */ +/* { dg-options "-std=c99 -pedantic" } */ + +\u00AA +\u00AB /* { dg-error "not valid in an identifier" } */ +\u00B6 /* { dg-error "not valid in an identifier" } */ +\u00BA +\u00C0 +\u00D6 +\u0384 /* { dg-error "not valid in an identifier" } */ + +\u0669 /* { dg-error "not valid at the start of an identifier" } */ +A\u0669 +0\u00BA +0\u0669 +\u0E59 /* { dg-error "not valid at the start of an identifier" } */ +A\u0E59 |