aboutsummaryrefslogtreecommitdiff
path: root/gcc/ada/libgnat
diff options
context:
space:
mode:
authorArnaud Charlet <charlet@adacore.com>2020-06-10 08:26:06 -0400
committerPierre-Marie de Rodat <derodat@adacore.com>2020-07-16 05:18:12 -0400
commit8e640a5f32160fd786cc4722c8b46bbca7ba500a (patch)
tree3c428cd1d4ba2585b9240fd889319769303aecd9 /gcc/ada/libgnat
parent504dc34720756d4383f47fa48a0f40a5ad908146 (diff)
downloadgcc-8e640a5f32160fd786cc4722c8b46bbca7ba500a.zip
gcc-8e640a5f32160fd786cc4722c8b46bbca7ba500a.tar.gz
gcc-8e640a5f32160fd786cc4722c8b46bbca7ba500a.tar.bz2
[Ada] AI12-0004 Normalization and allowed characters
gcc/ada/ * scng.adb (Scan): Detect wide characters not in NFKC. * libgnat/a-chahan.adb, libgnat/a-chahan.ads, libgnat/a-wichha.adb, libgnat/a-wichha.ads, libgnat/a-wichun.adb, libgnat/a-wichun.ads, libgnat/a-zchhan.adb, libgnat/a-zchhan.ads, libgnat/a-zchuni.adb, libgnat/a-zchuni.ads (Is_NFKC): New. * libgnat/s-utf_32.ads, libgnat/s-utf_32.adb (Is_UTF_32_NFKC): New.
Diffstat (limited to 'gcc/ada/libgnat')
-rw-r--r--gcc/ada/libgnat/a-chahan.adb11
-rw-r--r--gcc/ada/libgnat/a-chahan.ads1
-rw-r--r--gcc/ada/libgnat/a-wichha.adb7
-rw-r--r--gcc/ada/libgnat/a-wichha.ads6
-rw-r--r--gcc/ada/libgnat/a-wichun.adb9
-rw-r--r--gcc/ada/libgnat/a-wichun.ads8
-rw-r--r--gcc/ada/libgnat/a-zchhan.adb7
-rw-r--r--gcc/ada/libgnat/a-zchhan.ads6
-rw-r--r--gcc/ada/libgnat/a-zchuni.adb9
-rw-r--r--gcc/ada/libgnat/a-zchuni.ads6
-rw-r--r--gcc/ada/libgnat/s-utf_32.adb411
-rw-r--r--gcc/ada/libgnat/s-utf_32.ads6
12 files changed, 482 insertions, 5 deletions
diff --git a/gcc/ada/libgnat/a-chahan.adb b/gcc/ada/libgnat/a-chahan.adb
index faee41b..de66846 100644
--- a/gcc/ada/libgnat/a-chahan.adb
+++ b/gcc/ada/libgnat/a-chahan.adb
@@ -399,6 +399,17 @@ package body Ada.Characters.Handling is
return False;
end Is_Mark;
+ -------------
+ -- Is_NFKC --
+ -------------
+
+ function Is_NFKC (Item : Character) return Boolean is
+ begin
+ return Character'Pos (Item) not in
+ 160 | 168 | 170 | 175 | 178 | 179 | 180 | 181 | 184 | 185 | 186 |
+ 188 | 189 | 190;
+ end Is_NFKC;
+
---------------------
-- Is_Other_Format --
---------------------
diff --git a/gcc/ada/libgnat/a-chahan.ads b/gcc/ada/libgnat/a-chahan.ads
index 957d623..04f975c 100644
--- a/gcc/ada/libgnat/a-chahan.ads
+++ b/gcc/ada/libgnat/a-chahan.ads
@@ -58,6 +58,7 @@ package Ada.Characters.Handling is
function Is_Other_Format (Item : Character) return Boolean;
function Is_Punctuation_Connector (Item : Character) return Boolean;
function Is_Space (Item : Character) return Boolean;
+ function Is_NFKC (Item : Character) return Boolean;
---------------------------------------------------
-- Conversion Functions for Character and String --
diff --git a/gcc/ada/libgnat/a-wichha.adb b/gcc/ada/libgnat/a-wichha.adb
index 7531ef6..feccc23 100644
--- a/gcc/ada/libgnat/a-wichha.adb
+++ b/gcc/ada/libgnat/a-wichha.adb
@@ -124,6 +124,13 @@ package body Ada.Wide_Characters.Handling is
function Is_Mark (Item : Wide_Character) return Boolean
renames Ada.Wide_Characters.Unicode.Is_Mark;
+ -------------
+ -- Is_NFKC --
+ -------------
+
+ function Is_NFKC (Item : Wide_Character) return Boolean
+ renames Ada.Wide_Characters.Unicode.Is_NFKC;
+
---------------------
-- Is_Other_Format --
---------------------
diff --git a/gcc/ada/libgnat/a-wichha.ads b/gcc/ada/libgnat/a-wichha.ads
index bb9452f..23eb468 100644
--- a/gcc/ada/libgnat/a-wichha.ads
+++ b/gcc/ada/libgnat/a-wichha.ads
@@ -101,6 +101,12 @@ package Ada.Wide_Characters.Handling is
-- Returns True if the Wide_Character designated by Item is categorized as
-- separator_space, otherwise returns False.
+ function Is_NFKC (Item : Wide_Character) return Boolean;
+ pragma Inline (Is_NFKC);
+ -- Returns True if the Wide_Character designated by Item could be present
+ -- in a string normalized to Normalization Form KC (as defined by Clause
+ -- 21 of ISO/IEC 10646:2017), otherwise returns False.
+
function Is_Graphic (Item : Wide_Character) return Boolean;
pragma Inline (Is_Graphic);
-- Returns True if the Wide_Character designated by Item is categorized as
diff --git a/gcc/ada/libgnat/a-wichun.adb b/gcc/ada/libgnat/a-wichun.adb
index cfd84da..09cbad2 100644
--- a/gcc/ada/libgnat/a-wichun.adb
+++ b/gcc/ada/libgnat/a-wichun.adb
@@ -116,6 +116,15 @@ package body Ada.Wide_Characters.Unicode is
return G.Is_UTF_32_Non_Graphic (G.Category (C));
end Is_Non_Graphic;
+ -------------
+ -- Is_NFKC --
+ -------------
+
+ function Is_NFKC (U : Wide_Character) return Boolean is
+ begin
+ return G.Is_UTF_32_NFKC (Wide_Character'Pos (U));
+ end Is_NFKC;
+
--------------
-- Is_Other --
--------------
diff --git a/gcc/ada/libgnat/a-wichun.ads b/gcc/ada/libgnat/a-wichun.ads
index c9eb938..9e42749 100644
--- a/gcc/ada/libgnat/a-wichun.ads
+++ b/gcc/ada/libgnat/a-wichun.ads
@@ -131,7 +131,7 @@ package Ada.Wide_Characters.Unicode is
pragma Inline (Is_Other);
-- Returns true iff U is an other format character, which means that it
-- can be used to extend an identifier, but is ignored for the purposes of
- -- matching of identiers, or if C is one of the corresponding categories,
+ -- matching of identifiers, or if C is one of the corresponding categories,
-- which are the following:
-- Other, Format (Cf)
@@ -150,6 +150,12 @@ package Ada.Wide_Characters.Unicode is
-- of the corresponding categories, which are the following:
-- Separator, Space (Zs)
+ function Is_NFKC (U : Wide_Character) return Boolean;
+ pragma Inline (Is_NFKC);
+ -- Returns True if the Wide_Character designated by U could be present
+ -- in a string normalized to Normalization Form KC (as defined by Clause
+ -- 21 of ISO/IEC 10646:2017), otherwise returns False.
+
function Is_Non_Graphic (U : Wide_Character) return Boolean;
function Is_Non_Graphic (C : Category) return Boolean;
pragma Inline (Is_Non_Graphic);
diff --git a/gcc/ada/libgnat/a-zchhan.adb b/gcc/ada/libgnat/a-zchhan.adb
index 4fd7eba..6930121 100644
--- a/gcc/ada/libgnat/a-zchhan.adb
+++ b/gcc/ada/libgnat/a-zchhan.adb
@@ -108,6 +108,13 @@ package body Ada.Wide_Wide_Characters.Handling is
function Is_Mark (Item : Wide_Wide_Character) return Boolean
renames Ada.Wide_Wide_Characters.Unicode.Is_Mark;
+ -------------
+ -- Is_NFKC --
+ -------------
+
+ function Is_NFKC (Item : Wide_Wide_Character) return Boolean
+ renames Ada.Wide_Wide_Characters.Unicode.Is_NFKC;
+
---------------------
-- Is_Other_Format --
---------------------
diff --git a/gcc/ada/libgnat/a-zchhan.ads b/gcc/ada/libgnat/a-zchhan.ads
index 354452b..74fab2a 100644
--- a/gcc/ada/libgnat/a-zchhan.ads
+++ b/gcc/ada/libgnat/a-zchhan.ads
@@ -98,6 +98,12 @@ package Ada.Wide_Wide_Characters.Handling is
-- Returns True if the Wide_Wide_Character designated by Item is
-- categorized as separator_space, otherwise returns false.
+ function Is_NFKC (Item : Wide_Wide_Character) return Boolean;
+ pragma Inline (Is_NFKC);
+ -- Returns True if the Wide_Wide_Character designated by Item could be
+ -- present in a string normalized to Normalization Form KC (as defined by
+ -- Clause 21 of ISO/IEC 10646:2017), otherwise returns False.
+
function Is_Graphic (Item : Wide_Wide_Character) return Boolean;
pragma Inline (Is_Graphic);
-- Returns True if the Wide_Wide_Character designated by Item is
diff --git a/gcc/ada/libgnat/a-zchuni.adb b/gcc/ada/libgnat/a-zchuni.adb
index b754af9..203c3aa 100644
--- a/gcc/ada/libgnat/a-zchuni.adb
+++ b/gcc/ada/libgnat/a-zchuni.adb
@@ -107,6 +107,15 @@ package body Ada.Wide_Wide_Characters.Unicode is
return G.Is_UTF_32_Non_Graphic (G.Category (C));
end Is_Non_Graphic;
+ -------------
+ -- Is_NFKC --
+ -------------
+
+ function Is_NFKC (U : Wide_Wide_Character) return Boolean is
+ begin
+ return G.Is_UTF_32_NFKC (Wide_Wide_Character'Pos (U));
+ end Is_NFKC;
+
--------------
-- Is_Other --
--------------
diff --git a/gcc/ada/libgnat/a-zchuni.ads b/gcc/ada/libgnat/a-zchuni.ads
index 162d18d..7f4a30b 100644
--- a/gcc/ada/libgnat/a-zchuni.ads
+++ b/gcc/ada/libgnat/a-zchuni.ads
@@ -147,6 +147,12 @@ package Ada.Wide_Wide_Characters.Unicode is
-- of the corresponding categories, which are the following:
-- Separator, Space (Zs)
+ function Is_NFKC (U : Wide_Wide_Character) return Boolean;
+ pragma Inline (Is_NFKC);
+ -- Returns True if the Wide_Wide_Character designated by U could be present
+ -- in a string normalized to Normalization Form KC (as defined by Clause
+ -- 21 of ISO/IEC 10646:2017), otherwise returns False.
+
function Is_Non_Graphic (U : Wide_Wide_Character) return Boolean;
function Is_Non_Graphic (C : Category) return Boolean;
pragma Inline (Is_Non_Graphic);
diff --git a/gcc/ada/libgnat/s-utf_32.adb b/gcc/ada/libgnat/s-utf_32.adb
index a722d62..a1346f3 100644
--- a/gcc/ada/libgnat/s-utf_32.adb
+++ b/gcc/ada/libgnat/s-utf_32.adb
@@ -49,7 +49,7 @@ package body System.UTF_32 is
----------------------
-- Note these tables are derived from those given in AI-285. For details
- -- see //www.ada-auth.org/cgi-bin/cvsweb.cgi/AIs/AI-00285.TXT?rev=1.22.
+ -- see www.ada-auth.org/cgi-bin/cvsweb.cgi/AIs/AI-00285.TXT?rev=1.22.
type UTF_32_Range is record
Lo : UTF_32;
@@ -6071,9 +6071,6 @@ package body System.UTF_32 is
40, -- DESERET CAPITAL LETTER LONG I .. DESERET CAPITAL LETTER EW
32); -- TAG LATIN CAPITAL LETTER A .. TAG LATIN CAPITAL LETTER Z
- pragma Warnings (On);
- -- Temporary until pragma Warnings at start can be activated ???
-
-- The following is a list of the 10646 names for CAPITAL LETTER entries
-- that have no matching SMALL LETTER entry and are thus not folded
@@ -6117,6 +6114,403 @@ package body System.UTF_32 is
-- GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
-- GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+ -- The following array includes all characters in the Unicode table with
+ -- the category NFKC_Quick_Check=No, taken from
+ -- www.unicode.org/Public/UCD/latest/ucd/DerivedNormalizationProps.txt
+
+ UTF_32_NFKC_QC_No : constant UTF_32_Ranges := (
+ (16#00A0#, 16#00A0#), -- NO-BREAK SPACE
+ (16#00A8#, 16#00A8#), -- DIAERESIS
+ (16#00AA#, 16#00AA#), -- FEMININE ORDINAL INDICATOR
+ (16#00AF#, 16#00AF#), -- MACRON
+ (16#00B2#, 16#00B3#), -- SUPERSCRIPT TWO..SUPERSCRIPT THREE
+ (16#00B4#, 16#00B4#), -- ACUTE ACCENT
+ (16#00B5#, 16#00B5#), -- MICRO SIGN
+ (16#00B8#, 16#00B8#), -- CEDILLA
+ (16#00B9#, 16#00B9#), -- SUPERSCRIPT ONE
+ (16#00BA#, 16#00BA#), -- MASCULINE ORDINAL INDICATOR
+ (16#00BC#, 16#00BE#), -- VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS
+ (16#0132#, 16#0133#), -- LATIN CAPITAL LIGATURE IJ..LATIN SMALL LIGATURE IJ
+ (16#013F#, 16#0140#), -- LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH MIDDLE DOT
+ (16#0149#, 16#0149#), -- LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+ (16#017F#, 16#017F#), -- LATIN SMALL LETTER LONG S
+ (16#01C4#, 16#01CC#), -- LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER NJ
+ (16#01F1#, 16#01F3#), -- LATIN CAPITAL LETTER DZ..LATIN SMALL LETTER DZ
+ (16#02B0#, 16#02B8#), -- MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
+ (16#02D8#, 16#02DD#), -- BREVE..DOUBLE ACUTE ACCENT
+ (16#02E0#, 16#02E4#), -- MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP
+ (16#0340#, 16#0341#), -- COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
+ (16#0343#, 16#0344#), -- COMBINING GREEK KORONIS..COMBINING GREEK DIALYTIKA TONOS
+ (16#0374#, 16#0374#), -- GREEK NUMERAL SIGN
+ (16#037A#, 16#037A#), -- GREEK YPOGEGRAMMENI
+ (16#037E#, 16#037E#), -- GREEK QUESTION MARK
+ (16#0384#, 16#0385#), -- GREEK TONOS..GREEK DIALYTIKA TONOS
+ (16#0387#, 16#0387#), -- GREEK ANO TELEIA
+ (16#03D0#, 16#03D6#), -- GREEK BETA SYMBOL..GREEK PI SYMBOL
+ (16#03F0#, 16#03F2#), -- GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBOL
+ (16#03F4#, 16#03F5#), -- GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL
+ (16#03F9#, 16#03F9#), -- GREEK CAPITAL LUNATE SIGMA SYMBOL
+ (16#0587#, 16#0587#), -- ARMENIAN SMALL LIGATURE ECH YIWN
+ (16#0675#, 16#0678#), -- ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER HIGH HAMZA YEH
+ (16#0958#, 16#095F#), -- DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
+ (16#09DC#, 16#09DD#), -- BENGALI LETTER RRA..BENGALI LETTER RHA
+ (16#09DF#, 16#09DF#), -- BENGALI LETTER YYA
+ (16#0A33#, 16#0A33#), -- GURMUKHI LETTER LLA
+ (16#0A36#, 16#0A36#), -- GURMUKHI LETTER SHA
+ (16#0A59#, 16#0A5B#), -- GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA
+ (16#0A5E#, 16#0A5E#), -- GURMUKHI LETTER FA
+ (16#0B5C#, 16#0B5D#), -- ORIYA LETTER RRA..ORIYA LETTER RHA
+ (16#0E33#, 16#0E33#), -- THAI CHARACTER SARA AM
+ (16#0EB3#, 16#0EB3#), -- LAO VOWEL SIGN AM
+ (16#0EDC#, 16#0EDD#), -- LAO HO NO..LAO HO MO
+ (16#0F0C#, 16#0F0C#), -- TIBETAN MARK DELIMITER TSHEG BSTAR
+ (16#0F43#, 16#0F43#), -- TIBETAN LETTER GHA
+ (16#0F4D#, 16#0F4D#), -- TIBETAN LETTER DDHA
+ (16#0F52#, 16#0F52#), -- TIBETAN LETTER DHA
+ (16#0F57#, 16#0F57#), -- TIBETAN LETTER BHA
+ (16#0F5C#, 16#0F5C#), -- TIBETAN LETTER DZHA
+ (16#0F69#, 16#0F69#), -- TIBETAN LETTER KSSA
+ (16#0F73#, 16#0F73#), -- TIBETAN VOWEL SIGN II
+ (16#0F75#, 16#0F79#), -- TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VOCALIC LL
+ (16#0F81#, 16#0F81#), -- TIBETAN VOWEL SIGN REVERSED II
+ (16#0F93#, 16#0F93#), -- TIBETAN SUBJOINED LETTER GHA
+ (16#0F9D#, 16#0F9D#), -- TIBETAN SUBJOINED LETTER DDHA
+ (16#0FA2#, 16#0FA2#), -- TIBETAN SUBJOINED LETTER DHA
+ (16#0FA7#, 16#0FA7#), -- TIBETAN SUBJOINED LETTER BHA
+ (16#0FAC#, 16#0FAC#), -- TIBETAN SUBJOINED LETTER DZHA
+ (16#0FB9#, 16#0FB9#), -- TIBETAN SUBJOINED LETTER KSSA
+ (16#10FC#, 16#10FC#), -- MODIFIER LETTER GEORGIAN NAR
+ (16#1D2C#, 16#1D2E#), -- MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
+ (16#1D30#, 16#1D3A#), -- MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
+ (16#1D3C#, 16#1D4D#), -- MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
+ (16#1D4F#, 16#1D6A#), -- MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI
+ (16#1D78#, 16#1D78#), -- MODIFIER LETTER CYRILLIC EN
+ (16#1D9B#, 16#1DBF#), -- MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
+ (16#1E9A#, 16#1E9B#), -- LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE
+ (16#1F71#, 16#1F71#), -- GREEK SMALL LETTER ALPHA WITH OXIA
+ (16#1F73#, 16#1F73#), -- GREEK SMALL LETTER EPSILON WITH OXIA
+ (16#1F75#, 16#1F75#), -- GREEK SMALL LETTER ETA WITH OXIA
+ (16#1F77#, 16#1F77#), -- GREEK SMALL LETTER IOTA WITH OXIA
+ (16#1F79#, 16#1F79#), -- GREEK SMALL LETTER OMICRON WITH OXIA
+ (16#1F7B#, 16#1F7B#), -- GREEK SMALL LETTER UPSILON WITH OXIA
+ (16#1F7D#, 16#1F7D#), -- GREEK SMALL LETTER OMEGA WITH OXIA
+ (16#1FBB#, 16#1FBB#), -- GREEK CAPITAL LETTER ALPHA WITH OXIA
+ (16#1FBD#, 16#1FBD#), -- GREEK KORONIS
+ (16#1FBE#, 16#1FBE#), -- GREEK PROSGEGRAMMENI
+ (16#1FBF#, 16#1FC1#), -- GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI
+ (16#1FC9#, 16#1FC9#), -- GREEK CAPITAL LETTER EPSILON WITH OXIA
+ (16#1FCB#, 16#1FCB#), -- GREEK CAPITAL LETTER ETA WITH OXIA
+ (16#1FCD#, 16#1FCF#), -- GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI
+ (16#1FD3#, 16#1FD3#), -- GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+ (16#1FDB#, 16#1FDB#), -- GREEK CAPITAL LETTER IOTA WITH OXIA
+ (16#1FDD#, 16#1FDF#), -- GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
+ (16#1FE3#, 16#1FE3#), -- GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+ (16#1FEB#, 16#1FEB#), -- GREEK CAPITAL LETTER UPSILON WITH OXIA
+ (16#1FED#, 16#1FEF#), -- GREEK DIALYTIKA AND VARIA..GREEK VARIA
+ (16#1FF9#, 16#1FF9#), -- GREEK CAPITAL LETTER OMICRON WITH OXIA
+ (16#1FFB#, 16#1FFB#), -- GREEK CAPITAL LETTER OMEGA WITH OXIA
+ (16#1FFD#, 16#1FFE#), -- GREEK OXIA..GREEK DASIA
+ (16#2000#, 16#200A#), -- EN QUAD..HAIR SPACE
+ (16#2011#, 16#2011#), -- NON-BREAKING HYPHEN
+ (16#2017#, 16#2017#), -- DOUBLE LOW LINE
+ (16#2024#, 16#2026#), -- ONE DOT LEADER..HORIZONTAL ELLIPSIS
+ (16#202F#, 16#202F#), -- NARROW NO-BREAK SPACE
+ (16#2033#, 16#2034#), -- DOUBLE PRIME..TRIPLE PRIME
+ (16#2036#, 16#2037#), -- REVERSED DOUBLE PRIME..REVERSED TRIPLE PRIME
+ (16#203C#, 16#203C#), -- DOUBLE EXCLAMATION MARK
+ (16#203E#, 16#203E#), -- OVERLINE
+ (16#2047#, 16#2049#), -- DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK
+ (16#2057#, 16#2057#), -- QUADRUPLE PRIME
+ (16#205F#, 16#205F#), -- MEDIUM MATHEMATICAL SPACE
+ (16#2070#, 16#2070#), -- SUPERSCRIPT ZERO
+ (16#2071#, 16#2071#), -- SUPERSCRIPT LATIN SMALL LETTER I
+ (16#2074#, 16#2079#), -- SUPERSCRIPT FOUR..SUPERSCRIPT NINE
+ (16#207A#, 16#207C#), -- SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN
+ (16#207D#, 16#207D#), -- SUPERSCRIPT LEFT PARENTHESIS
+ (16#207E#, 16#207E#), -- SUPERSCRIPT RIGHT PARENTHESIS
+ (16#207F#, 16#207F#), -- SUPERSCRIPT LATIN SMALL LETTER N
+ (16#2080#, 16#2089#), -- SUBSCRIPT ZERO..SUBSCRIPT NINE
+ (16#208A#, 16#208C#), -- SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN
+ (16#208D#, 16#208D#), -- SUBSCRIPT LEFT PARENTHESIS
+ (16#208E#, 16#208E#), -- SUBSCRIPT RIGHT PARENTHESIS
+ (16#2090#, 16#209C#), -- LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T
+ (16#20A8#, 16#20A8#), -- RUPEE SIGN
+ (16#2100#, 16#2101#), -- ACCOUNT OF..ADDRESSED TO THE SUBJECT
+ (16#2102#, 16#2102#), -- DOUBLE-STRUCK CAPITAL C
+ (16#2103#, 16#2103#), -- DEGREE CELSIUS
+ (16#2105#, 16#2106#), -- CARE OF..CADA UNA
+ (16#2107#, 16#2107#), -- EULER CONSTANT
+ (16#2109#, 16#2109#), -- DEGREE FAHRENHEIT
+ (16#210A#, 16#2113#), -- SCRIPT SMALL G..SCRIPT SMALL L
+ (16#2115#, 16#2115#), -- DOUBLE-STRUCK CAPITAL N
+ (16#2116#, 16#2116#), -- NUMERO SIGN
+ (16#2119#, 16#211D#), -- DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R
+ (16#2120#, 16#2122#), -- SERVICE MARK..TRADE MARK SIGN
+ (16#2124#, 16#2124#), -- DOUBLE-STRUCK CAPITAL Z
+ (16#2126#, 16#2126#), -- OHM SIGN
+ (16#2128#, 16#2128#), -- BLACK-LETTER CAPITAL Z
+ (16#212A#, 16#212D#), -- KELVIN SIGN..BLACK-LETTER CAPITAL C
+ (16#212F#, 16#2131#), -- SCRIPT SMALL E..SCRIPT CAPITAL F
+ (16#2133#, 16#2134#), -- SCRIPT CAPITAL M..SCRIPT SMALL O
+ (16#2135#, 16#2138#), -- ALEF SYMBOL..DALET SYMBOL
+ (16#2139#, 16#2139#), -- INFORMATION SOURCE
+ (16#213B#, 16#213B#), -- FACSIMILE SIGN
+ (16#213C#, 16#213F#), -- DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI
+ (16#2140#, 16#2140#), -- DOUBLE-STRUCK N-ARY SUMMATION
+ (16#2145#, 16#2149#), -- DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J
+ (16#2150#, 16#215F#), -- VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE
+ (16#2160#, 16#217F#), -- ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND
+ (16#2189#, 16#2189#), -- VULGAR FRACTION ZERO THIRDS
+ (16#222C#, 16#222D#), -- DOUBLE INTEGRAL..TRIPLE INTEGRAL
+ (16#222F#, 16#2230#), -- SURFACE INTEGRAL..VOLUME INTEGRAL
+ (16#2329#, 16#2329#), -- LEFT-POINTING ANGLE BRACKET
+ (16#232A#, 16#232A#), -- RIGHT-POINTING ANGLE BRACKET
+ (16#2460#, 16#249B#), -- CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP
+ (16#249C#, 16#24E9#), -- PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z
+ (16#24EA#, 16#24EA#), -- CIRCLED DIGIT ZERO
+ (16#2A0C#, 16#2A0C#), -- QUADRUPLE INTEGRAL OPERATOR
+ (16#2A74#, 16#2A76#), -- DOUBLE COLON EQUAL..THREE CONSECUTIVE EQUALS SIGNS
+ (16#2ADC#, 16#2ADC#), -- FORKING
+ (16#2C7C#, 16#2C7D#), -- LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V
+ (16#2D6F#, 16#2D6F#), -- TIFINAGH MODIFIER LETTER LABIALIZATION MARK
+ (16#2E9F#, 16#2E9F#), -- CJK RADICAL MOTHER
+ (16#2EF3#, 16#2EF3#), -- CJK RADICAL C-SIMPLIFIED TURTLE
+ (16#2F00#, 16#2FD5#), -- KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
+ (16#3000#, 16#3000#), -- IDEOGRAPHIC SPACE
+ (16#3036#, 16#3036#), -- CIRCLED POSTAL MARK
+ (16#3038#, 16#303A#), -- HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY
+ (16#309B#, 16#309C#), -- KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ (16#309F#, 16#309F#), -- HIRAGANA DIGRAPH YORI
+ (16#30FF#, 16#30FF#), -- KATAKANA DIGRAPH KOTO
+ (16#3131#, 16#318E#), -- HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
+ (16#3192#, 16#3195#), -- IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK
+ (16#3196#, 16#319F#), -- IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK
+ (16#3200#, 16#321E#), -- PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
+ (16#3220#, 16#3229#), -- PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN
+ (16#322A#, 16#3247#), -- PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO
+ (16#3250#, 16#3250#), -- PARTNERSHIP SIGN
+ (16#3251#, 16#325F#), -- CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE
+ (16#3260#, 16#327E#), -- CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U
+ (16#3280#, 16#3289#), -- CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN
+ (16#328A#, 16#32B0#), -- CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT
+ (16#32B1#, 16#32BF#), -- CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY
+ (16#32C0#, 16#33FF#), -- IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL
+ (16#A69C#, 16#A69D#), -- MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN
+ (16#A770#, 16#A770#), -- MODIFIER LETTER US
+ (16#A7F8#, 16#A7F9#), -- MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE
+ (16#AB5C#, 16#AB5F#), -- MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
+ (16#AB69#, 16#AB69#), -- MODIFIER LETTER SMALL TURNED W
+ (16#F900#, 16#FA0D#), -- CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
+ (16#FA10#, 16#FA10#), -- CJK COMPATIBILITY IDEOGRAPH-FA10
+ (16#FA12#, 16#FA12#), -- CJK COMPATIBILITY IDEOGRAPH-FA12
+ (16#FA15#, 16#FA1E#), -- CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
+ (16#FA20#, 16#FA20#), -- CJK COMPATIBILITY IDEOGRAPH-FA20
+ (16#FA22#, 16#FA22#), -- CJK COMPATIBILITY IDEOGRAPH-FA22
+ (16#FA25#, 16#FA26#), -- CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
+ (16#FA2A#, 16#FA6D#), -- CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
+ (16#FA70#, 16#FAD9#), -- CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+ (16#FB00#, 16#FB06#), -- LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
+ (16#FB13#, 16#FB17#), -- ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
+ (16#FB1D#, 16#FB1D#), -- HEBREW LETTER YOD WITH HIRIQ
+ (16#FB1F#, 16#FB28#), -- HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV
+ (16#FB29#, 16#FB29#), -- HEBREW LETTER ALTERNATIVE PLUS SIGN
+ (16#FB2A#, 16#FB36#), -- HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH
+ (16#FB38#, 16#FB3C#), -- HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
+ (16#FB3E#, 16#FB3E#), -- HEBREW LETTER MEM WITH DAGESH
+ (16#FB40#, 16#FB41#), -- HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
+ (16#FB43#, 16#FB44#), -- HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
+ (16#FB46#, 16#FBB1#), -- HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
+ (16#FBD3#, 16#FD3D#), -- ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM
+ (16#FD50#, 16#FD8F#), -- ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
+ (16#FD92#, 16#FDC7#), -- ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
+ (16#FDF0#, 16#FDFB#), -- ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU
+ (16#FDFC#, 16#FDFC#), -- RIAL SIGN
+ (16#FE10#, 16#FE16#), -- PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK
+ (16#FE17#, 16#FE17#), -- PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET
+ (16#FE18#, 16#FE18#), -- PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET
+ (16#FE19#, 16#FE19#), -- PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+ (16#FE30#, 16#FE30#), -- PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+ (16#FE31#, 16#FE32#), -- PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH
+ (16#FE33#, 16#FE34#), -- PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
+ (16#FE35#, 16#FE35#), -- PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
+ (16#FE36#, 16#FE36#), -- PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS
+ (16#FE37#, 16#FE37#), -- PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET
+ (16#FE38#, 16#FE38#), -- PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET
+ (16#FE39#, 16#FE39#), -- PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET
+ (16#FE3A#, 16#FE3A#), -- PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET
+ (16#FE3B#, 16#FE3B#), -- PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET
+ (16#FE3C#, 16#FE3C#), -- PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET
+ (16#FE3D#, 16#FE3D#), -- PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET
+ (16#FE3E#, 16#FE3E#), -- PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET
+ (16#FE3F#, 16#FE3F#), -- PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET
+ (16#FE40#, 16#FE40#), -- PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET
+ (16#FE41#, 16#FE41#), -- PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET
+ (16#FE42#, 16#FE42#), -- PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET
+ (16#FE43#, 16#FE43#), -- PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET
+ (16#FE44#, 16#FE44#), -- PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET
+ (16#FE47#, 16#FE47#), -- PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET
+ (16#FE48#, 16#FE48#), -- PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET
+ (16#FE49#, 16#FE4C#), -- DASHED OVERLINE..DOUBLE WAVY OVERLINE
+ (16#FE4D#, 16#FE4F#), -- DASHED LOW LINE..WAVY LOW LINE
+ (16#FE50#, 16#FE52#), -- SMALL COMMA..SMALL FULL STOP
+ (16#FE54#, 16#FE57#), -- SMALL SEMICOLON..SMALL EXCLAMATION MARK
+ (16#FE58#, 16#FE58#), -- SMALL EM DASH
+ (16#FE59#, 16#FE59#), -- SMALL LEFT PARENTHESIS
+ (16#FE5A#, 16#FE5A#), -- SMALL RIGHT PARENTHESIS
+ (16#FE5B#, 16#FE5B#), -- SMALL LEFT CURLY BRACKET
+ (16#FE5C#, 16#FE5C#), -- SMALL RIGHT CURLY BRACKET
+ (16#FE5D#, 16#FE5D#), -- SMALL LEFT TORTOISE SHELL BRACKET
+ (16#FE5E#, 16#FE5E#), -- SMALL RIGHT TORTOISE SHELL BRACKET
+ (16#FE5F#, 16#FE61#), -- SMALL NUMBER SIGN..SMALL ASTERISK
+ (16#FE62#, 16#FE62#), -- SMALL PLUS SIGN
+ (16#FE63#, 16#FE63#), -- SMALL HYPHEN-MINUS
+ (16#FE64#, 16#FE66#), -- SMALL LESS-THAN SIGN..SMALL EQUALS SIGN
+ (16#FE68#, 16#FE68#), -- SMALL REVERSE SOLIDUS
+ (16#FE69#, 16#FE69#), -- SMALL DOLLAR SIGN
+ (16#FE6A#, 16#FE6B#), -- SMALL PERCENT SIGN..SMALL COMMERCIAL AT
+ (16#FE70#, 16#FE72#), -- ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMATAN ISOLATED FORM
+ (16#FE74#, 16#FE74#), -- ARABIC KASRATAN ISOLATED FORM
+ (16#FE76#, 16#FEFC#), -- ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+ (16#FF01#, 16#FF03#), -- FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN
+ (16#FF04#, 16#FF04#), -- FULLWIDTH DOLLAR SIGN
+ (16#FF05#, 16#FF07#), -- FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE
+ (16#FF08#, 16#FF08#), -- FULLWIDTH LEFT PARENTHESIS
+ (16#FF09#, 16#FF09#), -- FULLWIDTH RIGHT PARENTHESIS
+ (16#FF0A#, 16#FF0A#), -- FULLWIDTH ASTERISK
+ (16#FF0B#, 16#FF0B#), -- FULLWIDTH PLUS SIGN
+ (16#FF0C#, 16#FF0C#), -- FULLWIDTH COMMA
+ (16#FF0D#, 16#FF0D#), -- FULLWIDTH HYPHEN-MINUS
+ (16#FF0E#, 16#FF0F#), -- FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS
+ (16#FF10#, 16#FF19#), -- FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE
+ (16#FF1A#, 16#FF1B#), -- FULLWIDTH COLON..FULLWIDTH SEMICOLON
+ (16#FF1C#, 16#FF1E#), -- FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN
+ (16#FF1F#, 16#FF20#), -- FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT
+ (16#FF21#, 16#FF3A#), -- FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z
+ (16#FF3B#, 16#FF3B#), -- FULLWIDTH LEFT SQUARE BRACKET
+ (16#FF3C#, 16#FF3C#), -- FULLWIDTH REVERSE SOLIDUS
+ (16#FF3D#, 16#FF3D#), -- FULLWIDTH RIGHT SQUARE BRACKET
+ (16#FF3E#, 16#FF3E#), -- FULLWIDTH CIRCUMFLEX ACCENT
+ (16#FF3F#, 16#FF3F#), -- FULLWIDTH LOW LINE
+ (16#FF40#, 16#FF40#), -- FULLWIDTH GRAVE ACCENT
+ (16#FF41#, 16#FF5A#), -- FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z
+ (16#FF5B#, 16#FF5B#), -- FULLWIDTH LEFT CURLY BRACKET
+ (16#FF5C#, 16#FF5C#), -- FULLWIDTH VERTICAL LINE
+ (16#FF5D#, 16#FF5D#), -- FULLWIDTH RIGHT CURLY BRACKET
+ (16#FF5E#, 16#FF5E#), -- FULLWIDTH TILDE
+ (16#FF5F#, 16#FF5F#), -- FULLWIDTH LEFT WHITE PARENTHESIS
+ (16#FF60#, 16#FF60#), -- FULLWIDTH RIGHT WHITE PARENTHESIS
+ (16#FF61#, 16#FF61#), -- HALFWIDTH IDEOGRAPHIC FULL STOP
+ (16#FF62#, 16#FF62#), -- HALFWIDTH LEFT CORNER BRACKET
+ (16#FF63#, 16#FF63#), -- HALFWIDTH RIGHT CORNER BRACKET
+ (16#FF64#, 16#FF65#), -- HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT
+ (16#FF66#, 16#FF6F#), -- HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU
+ (16#FF70#, 16#FF70#), -- HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ (16#FF71#, 16#FF9D#), -- HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N
+ (16#FF9E#, 16#FF9F#), -- HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+ (16#FFA0#, 16#FFBE#), -- HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH
+ (16#FFC2#, 16#FFC7#), -- HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
+ (16#FFCA#, 16#FFCF#), -- HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
+ (16#FFD2#, 16#FFD7#), -- HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
+ (16#FFDA#, 16#FFDC#), -- HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
+ (16#FFE0#, 16#FFE1#), -- FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN
+ (16#FFE2#, 16#FFE2#), -- FULLWIDTH NOT SIGN
+ (16#FFE3#, 16#FFE3#), -- FULLWIDTH MACRON
+ (16#FFE4#, 16#FFE4#), -- FULLWIDTH BROKEN BAR
+ (16#FFE5#, 16#FFE6#), -- FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN
+ (16#FFE8#, 16#FFE8#), -- HALFWIDTH FORMS LIGHT VERTICAL
+ (16#FFE9#, 16#FFEC#), -- HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW
+ (16#FFED#, 16#FFEE#), -- HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
+ (16#1D15E#, 16#1D164#), -- MUSICAL SYMBOL HALF NOTE..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+ (16#1D1BB#, 16#1D1C0#), -- MUSICAL SYMBOL MINIMA..MUSICAL SYMBOL FUSA BLACK
+ (16#1D400#, 16#1D454#), -- MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
+ (16#1D456#, 16#1D49C#), -- MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
+ (16#1D49E#, 16#1D49F#), -- MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
+ (16#1D4A2#, 16#1D4A2#), -- MATHEMATICAL SCRIPT CAPITAL G
+ (16#1D4A5#, 16#1D4A6#), -- MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
+ (16#1D4A9#, 16#1D4AC#), -- MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
+ (16#1D4AE#, 16#1D4B9#), -- MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
+ (16#1D4BB#, 16#1D4BB#), -- MATHEMATICAL SCRIPT SMALL F
+ (16#1D4BD#, 16#1D4C3#), -- MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
+ (16#1D4C5#, 16#1D505#), -- MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
+ (16#1D507#, 16#1D50A#), -- MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
+ (16#1D50D#, 16#1D514#), -- MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
+ (16#1D516#, 16#1D51C#), -- MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
+ (16#1D51E#, 16#1D539#), -- MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
+ (16#1D53B#, 16#1D53E#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
+ (16#1D540#, 16#1D544#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
+ (16#1D546#, 16#1D546#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL O
+ (16#1D54A#, 16#1D550#), -- MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
+ (16#1D552#, 16#1D6A5#), -- MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
+ (16#1D6A8#, 16#1D6C0#), -- MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA
+ (16#1D6C1#, 16#1D6C1#), -- MATHEMATICAL BOLD NABLA
+ (16#1D6C2#, 16#1D6DA#), -- MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA
+ (16#1D6DB#, 16#1D6DB#), -- MATHEMATICAL BOLD PARTIAL DIFFERENTIAL
+ (16#1D6DC#, 16#1D6FA#), -- MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA
+ (16#1D6FB#, 16#1D6FB#), -- MATHEMATICAL ITALIC NABLA
+ (16#1D6FC#, 16#1D714#), -- MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA
+ (16#1D715#, 16#1D715#), -- MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL
+ (16#1D716#, 16#1D734#), -- MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA
+ (16#1D735#, 16#1D735#), -- MATHEMATICAL BOLD ITALIC NABLA
+ (16#1D736#, 16#1D74E#), -- MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA
+ (16#1D74F#, 16#1D74F#), -- MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL
+ (16#1D750#, 16#1D76E#), -- MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA
+ (16#1D76F#, 16#1D76F#), -- MATHEMATICAL SANS-SERIF BOLD NABLA
+ (16#1D770#, 16#1D788#), -- MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA
+ (16#1D789#, 16#1D789#), -- MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL
+ (16#1D78A#, 16#1D7A8#), -- MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA
+ (16#1D7A9#, 16#1D7A9#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA
+ (16#1D7AA#, 16#1D7C2#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA
+ (16#1D7C3#, 16#1D7C3#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL
+ (16#1D7C4#, 16#1D7CB#), -- MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA
+ (16#1D7CE#, 16#1D7FF#), -- MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
+ (16#1EE00#, 16#1EE03#), -- ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL
+ (16#1EE05#, 16#1EE1F#), -- ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF
+ (16#1EE21#, 16#1EE22#), -- ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM
+ (16#1EE24#, 16#1EE24#), -- ARABIC MATHEMATICAL INITIAL HEH
+ (16#1EE27#, 16#1EE27#), -- ARABIC MATHEMATICAL INITIAL HAH
+ (16#1EE29#, 16#1EE32#), -- ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF
+ (16#1EE34#, 16#1EE37#), -- ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH
+ (16#1EE39#, 16#1EE39#), -- ARABIC MATHEMATICAL INITIAL DAD
+ (16#1EE3B#, 16#1EE3B#), -- ARABIC MATHEMATICAL INITIAL GHAIN
+ (16#1EE42#, 16#1EE42#), -- ARABIC MATHEMATICAL TAILED JEEM
+ (16#1EE47#, 16#1EE47#), -- ARABIC MATHEMATICAL TAILED HAH
+ (16#1EE49#, 16#1EE49#), -- ARABIC MATHEMATICAL TAILED YEH
+ (16#1EE4B#, 16#1EE4B#), -- ARABIC MATHEMATICAL TAILED LAM
+ (16#1EE4D#, 16#1EE4F#), -- ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN
+ (16#1EE51#, 16#1EE52#), -- ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF
+ (16#1EE54#, 16#1EE54#), -- ARABIC MATHEMATICAL TAILED SHEEN
+ (16#1EE57#, 16#1EE57#), -- ARABIC MATHEMATICAL TAILED KHAH
+ (16#1EE59#, 16#1EE59#), -- ARABIC MATHEMATICAL TAILED DAD
+ (16#1EE5B#, 16#1EE5B#), -- ARABIC MATHEMATICAL TAILED GHAIN
+ (16#1EE5D#, 16#1EE5D#), -- ARABIC MATHEMATICAL TAILED DOTLESS NOON
+ (16#1EE5F#, 16#1EE5F#), -- ARABIC MATHEMATICAL TAILED DOTLESS QAF
+ (16#1EE61#, 16#1EE62#), -- ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM
+ (16#1EE64#, 16#1EE64#), -- ARABIC MATHEMATICAL STRETCHED HEH
+ (16#1EE67#, 16#1EE6A#), -- ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF
+ (16#1EE6C#, 16#1EE72#), -- ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF
+ (16#1EE74#, 16#1EE77#), -- ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH
+ (16#1EE79#, 16#1EE7C#), -- ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH
+ (16#1EE7E#, 16#1EE7E#), -- ARABIC MATHEMATICAL STRETCHED DOTLESS FEH
+ (16#1EE80#, 16#1EE89#), -- ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH
+ (16#1EE8B#, 16#1EE9B#), -- ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN
+ (16#1EEA1#, 16#1EEA3#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL
+ (16#1EEA5#, 16#1EEA9#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH
+ (16#1EEAB#, 16#1EEBB#), -- ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN
+ (16#1F100#, 16#1F10A#), -- DIGIT ZERO FULL STOP..DIGIT NINE COMMA
+ (16#1F110#, 16#1F12E#), -- PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
+ (16#1F130#, 16#1F14F#), -- SQUARED LATIN CAPITAL LETTER A..SQUARED WC
+ (16#1F16A#, 16#1F16C#), -- RAISED MC SIGN..RAISED MR SIGN
+ (16#1F190#, 16#1F190#), -- SQUARE DJ
+ (16#1F200#, 16#1F202#), -- SQUARE HIRAGANA HOKA..SQUARED KATAKANA SA
+ (16#1F210#, 16#1F23B#), -- SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D
+ (16#1F240#, 16#1F248#), -- TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
+ (16#1F250#, 16#1F251#), -- CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT
+ (16#1FBF0#, 16#1FBF9#), -- SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
+ (16#2F800#, 16#2FA1D#)); -- CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+ pragma Warnings (On);
+ -- Temporary until pragma Warnings at start can be activated ???
+
type Decomposition_Mapping is record
Item : UTF_32;
First_Char_Mapping : UTF_32;
@@ -12001,6 +12395,15 @@ package body System.UTF_32 is
return Non_Graphic (C);
end Is_UTF_32_Non_Graphic;
+ --------------------
+ -- Is_UTF_32_NFKC --
+ --------------------
+
+ function Is_UTF_32_NFKC (U : UTF_32) return Boolean is
+ begin
+ return U < 160 or else Range_Search (U, UTF_32_NFKC_QC_No) = 0;
+ end Is_UTF_32_NFKC;
+
---------------------
-- Is_UTF_32_Other --
---------------------
diff --git a/gcc/ada/libgnat/s-utf_32.ads b/gcc/ada/libgnat/s-utf_32.ads
index b8e4e3e..e3f0e00 100644
--- a/gcc/ada/libgnat/s-utf_32.ads
+++ b/gcc/ada/libgnat/s-utf_32.ads
@@ -189,6 +189,12 @@ package System.UTF_32 is
-- letters to upper case using this routine. A corresponding routine to
-- fold to lower case is also provided.
+ function Is_UTF_32_NFKC (U : UTF_32) return Boolean;
+ pragma Inline (Is_UTF_32_NFKC);
+ -- Return True if U could be present in a string normalized to
+ -- Normalization Form KC (as defined by Clause 21 of ISO/IEC 10646:2017),
+ -- otherwise returns False.
+
function Is_UTF_32_Basic (U : UTF_32) return Boolean;
pragma Inline (Is_UTF_32_Basic);
-- Return True if U has no Decomposition Mapping in the code charts of