diff options
-rw-r--r-- | iconvdata/Makefile | 17 | ||||
-rw-r--r-- | iconvdata/big5hkscs.c | 3 | ||||
-rw-r--r-- | iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 160 |
3 files changed, 176 insertions, 4 deletions
diff --git a/iconvdata/Makefile b/iconvdata/Makefile index c83962f..4ec2741 100644 --- a/iconvdata/Makefile +++ b/iconvdata/Makefile @@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules)) ifeq (yes,$(build-shared)) tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ - bug-iconv10 bug-iconv11 bug-iconv12 + bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 ifeq ($(have-thread-library),yes) tests += bug-iconv3 endif @@ -275,16 +275,21 @@ endif endif endif -include ../Rules - ifeq ($(run-built-tests),yes) -LOCALES := de_DE.UTF-8 +LOCALES := \ + de_DE.UTF-8 \ + zh_HK.BIG5-HKSCS \ + $(NULL) + include ../gen-locales.mk $(objpfx)bug-iconv6.out: $(gen-locales) $(objpfx)tst-iconv7.out: $(gen-locales) +$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(gen-locales) endif +include ../Rules + # Set libof-* for each routine. cpp-srcs-left := $(modules) $(generated-modules) $(libJIS-routines) \ $(libKSC-routines) $(libGB-routines) $(libCNS-routines) \ @@ -340,3 +345,7 @@ tst-tables-clean: $(objpfx)gconv-modules: gconv-modules cat $(sysdeps-gconv-modules) $^ > $@ + +# Test requires BIG5HKSCS. +$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(objpfx)gconv-modules \ + $(addprefix $(objpfx),$(modules.so)) diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c index 01fcfeb..ef32511 100644 --- a/iconvdata/big5hkscs.c +++ b/iconvdata/big5hkscs.c @@ -17895,6 +17895,9 @@ static struct else \ ++inptr; \ } \ + else \ + /* Clear the queue and proceed to output the saved character. */ \ + *statep = 0; \ \ put32 (outptr, ch); \ outptr += 4; \ diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c new file mode 100644 index 0000000..8389ade --- /dev/null +++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c @@ -0,0 +1,160 @@ +/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734). + Copyright (C) 2020 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. */ + +#include <stdio.h> +#include <string.h> +#include <locale.h> +#include <wchar.h> +#include <support/check.h> +#include <support/support.h> + +/* A few BIG5-HKSCS characters map in two unicode code points. + They are: + /x88/x62 => <U00CA><U0304> + /x88/x64 => <U00CA><U030C> + /x88/xa3 => <U00EA><U0304> + /x88/xa5 => <U00EA><U030C> + Each of these is special cased in iconvdata/big5hkscs.c. + This test ensures that we correctly reset the shift state after + outputting any of these characters. We do this by converting + each them followed by converting an ASCII character. If we fail + to reset the shift state (bug 25734) then we'll see the last + character in the queue output again. */ + +/* Each test has name, input bytes, and expected wide character + output. */ +struct testdata { + const char *name; + const char input[3]; + wchar_t expected[3]; +}; + +/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple + wide characters. */ +struct testdata tests[4] = { + /* <H-8862>X => <U+00CA><U+0304>X */ + { "<H-8862>", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } }, + /* <H-8864>X => <U+00CA><U+030C>X */ + { "<H-8864>", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } }, + /* <H-88A3>X => <U+00EA><U+0304>X */ + { "<H-88A3>", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } }, + /* <H-88A5>X => <U+00EA><U+030C>X */ + { "<H-88A5>", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } } +}; + +/* Each test is of the form: + - Translate first code sequence (two bytes) + - Translate second (zero bytes) + - Translate the third (one byte). */ +static int +check_conversion (struct testdata test) +{ + int err = 0; + wchar_t wc; + mbstate_t st; + size_t ret; + const char *mbs = test.input; + int consumed = 0; + /* Input is always 3 bytes long. */ + int inlen = 3; + + memset (&st, 0, sizeof (st)); + /* First conversion: Consumes first 2 bytes. */ + ret = mbrtowc (&wc, mbs, inlen - consumed, &st); + if (ret != 2) + { + printf ("error: First conversion consumed only %zd bytes.\n", ret); + err++; + } + /* Advance the two consumed bytes. */ + mbs += ret; + consumed += ret; + if (wc != test.expected[0]) + { + printf ("error: Result of first conversion was wrong.\n"); + err++; + } + /* Second conversion: Consumes 0 bytes. */ + ret = mbrtowc (&wc, mbs, inlen - consumed, &st); + if (ret != 0) + { + printf ("error: Second conversion consumed only %zd bytes.\n", ret); + err++; + } + /* Advance the zero consumed bytes. */ + mbs += ret; + consumed += ret; + if (wc != test.expected[1]) + { + printf ("error: Result of second conversion was wrong.\n"); + err++; + } + /* After the second conversion the state of the converter should be + in the initial state. It is in the initial state because the two + input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have + been output. */ + if (mbsinit (&st) == 0) + { + printf ("error: Converter not in initial state.\n"); + err++; + } + /* Third conversion: Consumes 1 byte (it's an ASCII character). */ + ret = mbrtowc (&wc, mbs, inlen - consumed, &st); + if (ret != 1) + { + printf ("error: Third conversion consumed only %zd bytes.\n", ret); + err++; + } + /* Advance the one byte. */ + mbs += ret; + consumed += ret; + if (wc != test.expected[2]) + { + printf ("error: Result of third conversion was wrong.\n"); + err++; + } + /* Return 0 if we saw no errors. */ + return err; +} + +static int +do_test (void) +{ + int err = 0; + int ret; + /* Testing BIG5-HKSCS. */ + xsetlocale (LC_ALL, "zh_HK.BIG5-HKSCS"); + + /* Run all the special conversions. */ + for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++) + { + printf ("Running test for %s\n", tests[i].name); + ret = check_conversion (tests[i]); + if (ret > 0) + printf ("Test %s failed.\n", tests[i].name); + err += ret; + } + + /* Fail if any conversion had an error. */ + if (err > 0) + FAIL_EXIT1 ("One or more conversions failed."); + + return 0; +} + +#include <support/test-driver.c> |