From 513aaa0d782f8fae36732d06ca59d658149f0139 Mon Sep 17 00:00:00 2001 From: Arjun Shankar Date: Wed, 23 Oct 2019 18:51:29 +0200 Subject: Add Transliterations for Unicode Misc. Mathematical Symbols-A/B [BZ #23132] This commit adds previously missing transliterations for several code points in the Unicode blocks "Miscellaneous Mathematical Symbols-A/B" - transliterated to their approximate ASCII representations. It also adds a corresponding iconv transliteration test. Reviewed-by: Carlos O'Donell --- localedata/Makefile | 4 +- localedata/locales/translit_neutral | 52 +++++++++++++++++- localedata/tst-iconv-math-trans.c | 104 ++++++++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+), 3 deletions(-) create mode 100644 localedata/tst-iconv-math-trans.c (limited to 'localedata') diff --git a/localedata/Makefile b/localedata/Makefile index ce6a750..89ba404 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -156,7 +156,7 @@ tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \ tst-leaks tst-mbswcs1 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 \ tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \ tst-strfmon1 tst-sscanf bug-setlocale1 tst-setlocale2 tst-setlocale3 \ - tst-wctype + tst-wctype tst-iconv-math-trans tests-static = bug-setlocale1-static tests += $(tests-static) ifeq (yes,$(build-shared)) @@ -287,6 +287,8 @@ LOCALES := \ $(NULL) include ../gen-locales.mk + +$(objpfx)tst-iconv-math-trans.out: $(gen-locales) endif include ../Rules diff --git a/localedata/locales/translit_neutral b/localedata/locales/translit_neutral index e3639c0..72f6622 100644 --- a/localedata/locales/translit_neutral +++ b/localedata/locales/translit_neutral @@ -743,10 +743,22 @@ include "translit_wide";"" "" % BLACK SMILING FACE "" -% MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET⟫ - "" +% MATHEMATICAL LEFT WHITE SQUARE BRACKET + "" +% MATHEMATICAL RIGHT WHITE SQUARE BRACKET + "" +% MATHEMATICAL LEFT ANGLE BRACKET + +% MATHEMATICAL RIGHT ANGLE BRACKET + % MATHEMATICAL LEFT DOUBLE ANGLE BRACKET "" +% MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET + "" +% MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET + "" +% MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET + "" % MATHEMATICAL LEFT FLATTENED PARENTHESIS % MATHEMATICAL RIGHT FLATTENED PARENTHESIS @@ -755,6 +767,42 @@ include "translit_wide";"" % MATHEMATICAL FALLING DIAGONAL +% TRIPLE VERTICAL BAR DELIMITER + "" +% LEFT WHITE CURLY BRACKET + "" +% RIGHT WHITE CURLY BRACKET + "" +% LEFT WHITE PARENTHESIS + "" +% RIGHT WHITE PARENTHESIS + "" +% Z NOTATION LEFT IMAGE BRACKET + "" +% Z NOTATION RIGHT IMAGE BRACKET + "" +% Z NOTATION LEFT BINDING BRACKET + "" +% Z NOTATION RIGHT BINDING BRACKET + "" +% EQUALS SIGN AND SLANTED PARALLEL + +% IDENTICAL TO AND SLANTED PARALLEL + +% REVERSE SOLIDUS OPERATOR + +% BIG SOLIDUS + +% BIG REVERSE SOLIDUS + +% LEFT-POINTING CURVED ANGLE BRACKET + +% RIGHT-POINTING CURVED ANGLE BRACKET + +% TINY + +% MINY + % LEFT ANGLE BRACKET % RIGHT ANGLE BRACKET diff --git a/localedata/tst-iconv-math-trans.c b/localedata/tst-iconv-math-trans.c new file mode 100644 index 0000000..32473bb --- /dev/null +++ b/localedata/tst-iconv-math-trans.c @@ -0,0 +1,104 @@ +/* Test some mathematical operator transliterations (BZ #23132) + + Copyright (C) 2019 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include +#include +#include +#include + +static int +do_test (void) +{ + iconv_t cd; + + /* str[] = "⟦ ⟧ ⟨ ⟩" + " ⟬ ⟭ ⦀" + " ⦃ ⦄ ⦅ ⦆" + " ⦇ ⦈ ⦉ ⦊" + " ⧣ ⧥ ⧵ ⧸ ⧹" + " ⧼ ⧽ ⧾ ⧿"; */ + + const char str[] = "\u27E6 \u27E7 \u27E8 \u27E9" + " \u27EC \u27ED \u2980" + " \u2983 \u2984 \u2985 \u2986" + " \u2987 \u2988 \u2989 \u298A" + " \u29E3 \u29E5 \u29F5 \u29F8 \u29F9" + " \u29FC \u29FD \u29FE \u29FF"; + + const char expected[] = "[| |] < >" + " (( )) |||" + " {| |} (( ))" + " (| |) <| |>" + " # # \\ / \\" + " < > + -"; + + char *inptr = (char *) str; + size_t inlen = strlen (str) + 1; + char outbuf[500]; + char *outptr = outbuf; + size_t outlen = sizeof (outbuf); + int result = 0; + size_t n; + + if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) + FAIL_EXIT1 ("setlocale failed"); + + cd = iconv_open ("ASCII//TRANSLIT", "UTF-8"); + if (cd == (iconv_t) -1) + FAIL_EXIT1 ("iconv_open failed"); + + n = iconv (cd, &inptr, &inlen, &outptr, &outlen); + if (n != 24) + { + if (n == (size_t) -1) + printf ("iconv() returned error: %m\n"); + else + printf ("iconv() returned %Zd, expected 24\n", n); + result = 1; + } + if (inlen != 0) + { + puts ("not all input consumed"); + result = 1; + } + else if (inptr - str != strlen (str) + 1) + { + printf ("inptr wrong, advanced by %td\n", inptr - str); + result = 1; + } + if (memcmp (outbuf, expected, sizeof (expected)) != 0) + { + printf ("result wrong: \"%.*s\", expected: \"%s\"\n", + (int) (sizeof (outbuf) - outlen), outbuf, expected); + result = 1; + } + else if (outlen != sizeof (outbuf) - sizeof (expected)) + { + printf ("outlen wrong: %Zd, expected %Zd\n", outlen, + sizeof (outbuf) - 15); + result = 1; + } + else + printf ("output is \"%s\" which is OK\n", outbuf); + + return result; +} + +#include -- cgit v1.1