aboutsummaryrefslogtreecommitdiff
path: root/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
diff options
context:
space:
mode:
Diffstat (limited to 'iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c')
-rw-r--r--iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c160
1 files changed, 160 insertions, 0 deletions
diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
new file mode 100644
index 0000000..8389ade
--- /dev/null
+++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c
@@ -0,0 +1,160 @@
+/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734).
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#include <wchar.h>
+#include <support/check.h>
+#include <support/support.h>
+
+/* A few BIG5-HKSCS characters map in two unicode code points.
+ They are:
+ /x88/x62 => <U00CA><U0304>
+ /x88/x64 => <U00CA><U030C>
+ /x88/xa3 => <U00EA><U0304>
+ /x88/xa5 => <U00EA><U030C>
+ Each of these is special cased in iconvdata/big5hkscs.c.
+ This test ensures that we correctly reset the shift state after
+ outputting any of these characters. We do this by converting
+ each them followed by converting an ASCII character. If we fail
+ to reset the shift state (bug 25734) then we'll see the last
+ character in the queue output again. */
+
+/* Each test has name, input bytes, and expected wide character
+ output. */
+struct testdata {
+ const char *name;
+ const char input[3];
+ wchar_t expected[3];
+};
+
+/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple
+ wide characters. */
+struct testdata tests[4] = {
+ /* <H-8862>X => <U+00CA><U+0304>X */
+ { "<H-8862>", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } },
+ /* <H-8864>X => <U+00CA><U+030C>X */
+ { "<H-8864>", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } },
+ /* <H-88A3>X => <U+00EA><U+0304>X */
+ { "<H-88A3>", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } },
+ /* <H-88A5>X => <U+00EA><U+030C>X */
+ { "<H-88A5>", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } }
+};
+
+/* Each test is of the form:
+ - Translate first code sequence (two bytes)
+ - Translate second (zero bytes)
+ - Translate the third (one byte). */
+static int
+check_conversion (struct testdata test)
+{
+ int err = 0;
+ wchar_t wc;
+ mbstate_t st;
+ size_t ret;
+ const char *mbs = test.input;
+ int consumed = 0;
+ /* Input is always 3 bytes long. */
+ int inlen = 3;
+
+ memset (&st, 0, sizeof (st));
+ /* First conversion: Consumes first 2 bytes. */
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
+ if (ret != 2)
+ {
+ printf ("error: First conversion consumed only %zd bytes.\n", ret);
+ err++;
+ }
+ /* Advance the two consumed bytes. */
+ mbs += ret;
+ consumed += ret;
+ if (wc != test.expected[0])
+ {
+ printf ("error: Result of first conversion was wrong.\n");
+ err++;
+ }
+ /* Second conversion: Consumes 0 bytes. */
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
+ if (ret != 0)
+ {
+ printf ("error: Second conversion consumed only %zd bytes.\n", ret);
+ err++;
+ }
+ /* Advance the zero consumed bytes. */
+ mbs += ret;
+ consumed += ret;
+ if (wc != test.expected[1])
+ {
+ printf ("error: Result of second conversion was wrong.\n");
+ err++;
+ }
+ /* After the second conversion the state of the converter should be
+ in the initial state. It is in the initial state because the two
+ input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have
+ been output. */
+ if (mbsinit (&st) == 0)
+ {
+ printf ("error: Converter not in initial state.\n");
+ err++;
+ }
+ /* Third conversion: Consumes 1 byte (it's an ASCII character). */
+ ret = mbrtowc (&wc, mbs, inlen - consumed, &st);
+ if (ret != 1)
+ {
+ printf ("error: Third conversion consumed only %zd bytes.\n", ret);
+ err++;
+ }
+ /* Advance the one byte. */
+ mbs += ret;
+ consumed += ret;
+ if (wc != test.expected[2])
+ {
+ printf ("error: Result of third conversion was wrong.\n");
+ err++;
+ }
+ /* Return 0 if we saw no errors. */
+ return err;
+}
+
+static int
+do_test (void)
+{
+ int err = 0;
+ int ret;
+ /* Testing BIG5-HKSCS. */
+ xsetlocale (LC_ALL, "zh_HK.BIG5-HKSCS");
+
+ /* Run all the special conversions. */
+ for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++)
+ {
+ printf ("Running test for %s\n", tests[i].name);
+ ret = check_conversion (tests[i]);
+ if (ret > 0)
+ printf ("Test %s failed.\n", tests[i].name);
+ err += ret;
+ }
+
+ /* Fail if any conversion had an error. */
+ if (err > 0)
+ FAIL_EXIT1 ("One or more conversions failed.");
+
+ return 0;
+}
+
+#include <support/test-driver.c>