aboutsummaryrefslogtreecommitdiff
path: root/libcpp/makeuname2c.cc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2022-11-04 18:18:42 +0100
committerJakub Jelinek <jakub@redhat.com>2022-11-04 18:18:42 +0100
commit2662d537b0397b3d8cd6e8ea0568d310f0b85f87 (patch)
tree33c9b87bb437081d996e35ec4d6c77f238f77580 /libcpp/makeuname2c.cc
parent26d2db895b05388fa215c43ee97a0aed3d45f3f8 (diff)
downloadgcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.zip
gcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.tar.gz
gcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.tar.bz2
libcpp: Update to Unicode 15
The following pseudo-patch regenerates the libcpp tables with Unicode 15.0.0 which added 4489 new characters. As mentioned previously, this isn't just a matter of running the two libcpp/make*.cc programs on the new Unicode files, but one needs to manually update a table inside of makeuname2c.cc according to a table in Unicode text (which is partially reflected in the text files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0 actually accurately). I've also added some randomly chosen subset of those 4489 new characters to a testcase. 2022-11-04 Jakub Jelinek <jakub@redhat.com> gcc/testsuite/ * c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some characters newly added in Unicode 15.0.0. libcpp/ * makeuname2c.cc (struct generated): Update from Unicode 15.0.0 table 4-8. * ucnid.h: Regenerated for Unicode 15.0.0. * uname2c.h: Likewise.
Diffstat (limited to 'libcpp/makeuname2c.cc')
-rw-r--r--libcpp/makeuname2c.cc9
1 files changed, 5 insertions, 4 deletions
diff --git a/libcpp/makeuname2c.cc b/libcpp/makeuname2c.cc
index 2b3c708..0254088 100644
--- a/libcpp/makeuname2c.cc
+++ b/libcpp/makeuname2c.cc
@@ -69,7 +69,7 @@ struct entry { const char *name; unsigned long codepoint; };
static struct entry *entries;
static unsigned long num_allocated, num_entries;
-/* Unicode 14 Table 4-8. */
+/* Unicode 15 Table 4-8. */
struct generated {
const char *prefix;
/* max_high is a workaround for UnicodeData.txt inconsistencies
@@ -81,13 +81,14 @@ struct generated {
static struct generated generated_ranges[] =
{ { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */
{ "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */
- { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 },
- { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 },
- { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 },
+ { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 },
+ { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 },
+ { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 },
{ "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 },
+ { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 },
{ "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 },
{ "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 },
{ "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 },