aboutsummaryrefslogtreecommitdiff
path: root/libcpp/makeucnid.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcpp/makeucnid.c')
-rw-r--r--libcpp/makeucnid.c87
1 files changed, 80 insertions, 7 deletions
diff --git a/libcpp/makeucnid.c b/libcpp/makeucnid.c
index d2c8d58..b3a0aab 100644
--- a/libcpp/makeucnid.c
+++ b/libcpp/makeucnid.c
@@ -17,7 +17,7 @@ along with this program; see the file COPYING3. If not see
/* Run this program as
./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
- > ucnid.h
+ DerivedCoreProperties.txt > ucnid.h
*/
#include <stdio.h>
@@ -32,10 +32,12 @@ enum {
N99 = 4,
C11 = 8,
N11 = 16,
- all_languages = C99 | CXX | C11,
- not_NFC = 32,
- not_NFKC = 64,
- maybe_not_NFC = 128
+ CXX23 = 32,
+ NXX23 = 64,
+ all_languages = C99 | CXX | C11 | CXX23 | NXX23,
+ not_NFC = 128,
+ not_NFKC = 256,
+ maybe_not_NFC = 512
};
#define NUM_CODE_POINTS 0x110000
@@ -241,6 +243,74 @@ read_derived (const char *fname)
fclose (f);
}
+/* Read DerivedCoreProperties.txt and fill in languages version in
+ flags from the XID_Start and XID_Continue properties. */
+
+static void
+read_derivedcore (char *fname)
+{
+ FILE * f = fopen (fname, "r");
+
+ if (!f)
+ fail ("opening DerivedCoreProperties.txt");
+ for (;;)
+ {
+ char line[256];
+ unsigned long codepoint_start, codepoint_end;
+ char *l;
+ int i, j;
+
+ if (!fgets (line, sizeof (line), f))
+ break;
+ if (line[0] == '#' || line[0] == '\n' || line[0] == '\r')
+ continue;
+ codepoint_start = strtoul (line, &l, 16);
+ if (l == line)
+ fail ("parsing DerivedCoreProperties.txt, reading code point");
+ if (codepoint_start > MAX_CODE_POINT)
+ fail ("parsing DerivedCoreProperties.txt, code point too large");
+
+ if (*l == '.' && l[1] == '.')
+ {
+ char *l2 = l + 2;
+ codepoint_end = strtoul (l + 2, &l, 16);
+ if (l == l2 || codepoint_end < codepoint_start)
+ fail ("parsing DerivedCoreProperties.txt, reading code point");
+ if (codepoint_end > MAX_CODE_POINT)
+ fail ("parsing DerivedCoreProperties.txt, code point too large");
+ }
+ else
+ codepoint_end = codepoint_start;
+
+ while (*l == ' ')
+ l++;
+ if (*l++ != ';')
+ fail ("parsing DerivedCoreProperties.txt, reading code point");
+
+ while (*l == ' ')
+ l++;
+
+ if (codepoint_end < 0x80)
+ continue;
+
+ if (strncmp (l, "XID_Start ", 10) == 0)
+ {
+ for (; codepoint_start <= codepoint_end; codepoint_start++)
+ flags[codepoint_start]
+ = (flags[codepoint_start] | CXX23) & ~NXX23;
+ }
+ else if (strncmp (l, "XID_Continue ", 13) == 0)
+ {
+ for (; codepoint_start <= codepoint_end; codepoint_start++)
+ if ((flags[codepoint_start] & CXX23) == 0)
+ flags[codepoint_start] |= CXX23 | NXX23;
+ }
+ }
+ if (ferror (f))
+ fail ("reading DerivedCoreProperties.txt");
+ fclose (f);
+}
+
/* Write out the table.
The table consists of two words per entry. The first word is the flags
for the unicode code points up to and including the second word. */
@@ -261,12 +331,14 @@ write_table (void)
|| really_safe != (decomp[i][0] == 0)
|| combining_value[i] != last_combine)
{
- printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
+ printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
last_flag & C99 ? "C99" : " 0",
last_flag & N99 ? "N99" : " 0",
last_flag & CXX ? "CXX" : " 0",
last_flag & C11 ? "C11" : " 0",
last_flag & N11 ? "N11" : " 0",
+ last_flag & CXX23 ? "CXX23" : " 0",
+ last_flag & NXX23 ? "NXX23" : " 0",
really_safe ? "CID" : " 0",
last_flag & not_NFC ? " 0" : "NFC",
last_flag & not_NFKC ? " 0" : "NKC",
@@ -439,11 +511,12 @@ write_copyright (void)
int
main(int argc, char ** argv)
{
- if (argc != 4)
+ if (argc != 5)
fail ("too few arguments to makeucn");
read_ucnid (argv[1]);
read_table (argv[2]);
read_derived (argv[3]);
+ read_derivedcore (argv[4]);
write_copyright ();
write_table ();