diff options
author | Jakub Jelinek <jakub@redhat.com> | 2022-11-04 18:18:42 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2022-11-04 18:18:42 +0100 |
commit | 2662d537b0397b3d8cd6e8ea0568d310f0b85f87 (patch) | |
tree | 33c9b87bb437081d996e35ec4d6c77f238f77580 /libcpp/makeuname2c.cc | |
parent | 26d2db895b05388fa215c43ee97a0aed3d45f3f8 (diff) | |
download | gcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.zip gcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.tar.gz gcc-2662d537b0397b3d8cd6e8ea0568d310f0b85f87.tar.bz2 |
libcpp: Update to Unicode 15
The following pseudo-patch regenerates the libcpp tables with Unicode 15.0.0
which added 4489 new characters.
As mentioned previously, this isn't just a matter of running the
two libcpp/make*.cc programs on the new Unicode files, but one needs
to manually update a table inside of makeuname2c.cc according to
a table in Unicode text (which is partially reflected in the text
files, but e.g. in Unicode 14.0.0 not 100% accurately, in 15.0.0
actually accurately).
I've also added some randomly chosen subset of those 4489 new
characters to a testcase.
2022-11-04 Jakub Jelinek <jakub@redhat.com>
gcc/testsuite/
* c-c++-common/cpp/named-universal-char-escape-1.c: Add tests for some
characters newly added in Unicode 15.0.0.
libcpp/
* makeuname2c.cc (struct generated): Update from Unicode 15.0.0
table 4-8.
* ucnid.h: Regenerated for Unicode 15.0.0.
* uname2c.h: Likewise.
Diffstat (limited to 'libcpp/makeuname2c.cc')
-rw-r--r-- | libcpp/makeuname2c.cc | 9 |
1 files changed, 5 insertions, 4 deletions
diff --git a/libcpp/makeuname2c.cc b/libcpp/makeuname2c.cc index 2b3c708..0254088 100644 --- a/libcpp/makeuname2c.cc +++ b/libcpp/makeuname2c.cc @@ -69,7 +69,7 @@ struct entry { const char *name; unsigned long codepoint; }; static struct entry *entries; static unsigned long num_allocated, num_entries; -/* Unicode 14 Table 4-8. */ +/* Unicode 15 Table 4-8. */ struct generated { const char *prefix; /* max_high is a workaround for UnicodeData.txt inconsistencies @@ -81,13 +81,14 @@ struct generated { static struct generated generated_ranges[] = { { "HANGUL SYLLABLE ", 0xac00, 0xd7a3, 0, 0, 0 }, /* NR1 rule */ { "CJK UNIFIED IDEOGRAPH-", 0x3400, 0x4dbf, 0, 1, 0 }, /* NR2 rules */ - { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9ffc, 0x9fff, 1, 0 }, - { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6dd, 0x2a6df, 1, 0 }, - { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b734, 0x2b738, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x4e00, 0x9fff, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x20000, 0x2a6df, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x2a700, 0x2b739, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2b740, 0x2b81d, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2b820, 0x2cea1, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x2ceb0, 0x2ebe0, 0, 1, 0 }, { "CJK UNIFIED IDEOGRAPH-", 0x30000, 0x3134a, 0, 1, 0 }, + { "CJK UNIFIED IDEOGRAPH-", 0x31350, 0x323af, 0, 1, 0 }, { "TANGUT IDEOGRAPH-", 0x17000, 0x187f7, 0, 2, 0 }, { "TANGUT IDEOGRAPH-", 0x18d00, 0x18d08, 0, 2, 0 }, { "KHITAN SMALL SCRIPT CHARACTER-", 0x18b00, 0x18cd5, 0, 3, 0 }, |