aboutsummaryrefslogtreecommitdiff
path: root/newlib
diff options
context:
space:
mode:
authorJeff Johnston <jjohnstn@redhat.com>2002-09-27 20:17:52 +0000
committerJeff Johnston <jjohnstn@redhat.com>2002-09-27 20:17:52 +0000
commit1ed3c86dd72ba76b9a1048bbd4b756899f803302 (patch)
treef57d496af02d21c651f7f16d89d90e032a62ba37 /newlib
parent8921a7a7a3e72041187b63e6325abf10dc42884b (diff)
downloadnewlib-1ed3c86dd72ba76b9a1048bbd4b756899f803302.zip
newlib-1ed3c86dd72ba76b9a1048bbd4b756899f803302.tar.gz
newlib-1ed3c86dd72ba76b9a1048bbd4b756899f803302.tar.bz2
2002-09-27 Jeff Johnston <jjohnstn@redhat.com>
* libc/ctype/jp2uc.c: Change to use multiple arrays in jp2uc.h. Also convert to EUCJP before using arrays. For values not in the conversion arrays, return WEOF. * libc/ctype/jp2uc.h: Change from one array to a number of arrays to account for the fact that the originating table is not contiguous for the input values since some are invalid.
Diffstat (limited to 'newlib')
-rw-r--r--newlib/ChangeLog9
-rw-r--r--newlib/libc/ctype/jp2uc.c99
-rw-r--r--newlib/libc/ctype/jp2uc.h444
3 files changed, 292 insertions, 260 deletions
diff --git a/newlib/ChangeLog b/newlib/ChangeLog
index 90e5604..faab582 100644
--- a/newlib/ChangeLog
+++ b/newlib/ChangeLog
@@ -1,3 +1,12 @@
+2002-09-27 Jeff Johnston <jjohnstn@redhat.com>
+
+ * libc/ctype/jp2uc.c: Change to use multiple arrays in jp2uc.h.
+ Also convert to EUCJP before using arrays. For values not in
+ the conversion arrays, return WEOF.
+ * libc/ctype/jp2uc.h: Change from one array to a number of
+ arrays to account for the fact that the originating table
+ is not contiguous for the input values since some are invalid.
+
2002-09-24 Jeff Johnston <jjohnstn@redhat.com>
* libc/time/ctime.c: Fix prototype documentation.
diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c
index 1e64b69..044c76b 100644
--- a/newlib/libc/ctype/jp2uc.c
+++ b/newlib/libc/ctype/jp2uc.c
@@ -40,47 +40,104 @@ wint_t
_DEFUN (__jp2uc, (c, type), wint_t c _AND int type)
{
int index, adj;
- unsigned char byte1, byte2, adj_byte1, adj_byte2;
+ unsigned char byte1, byte2;
+ wint_t ret;
- /* we actually use a table of JIS to Unicode. For SJIS, we simply
- note that SJIS is essentially JIS with the top bits on in each
- byte. For EUCJP, we essentially do a translation to JIS before
- accessing the table. */
+ /* we actually use tables of EUCJP to Unicode. For JIS, we simply
+ note that EUCJP is essentially JIS with the top bits on in each
+ byte and translate to EUCJP. For SJIS, we do a translation to EUCJP before
+ accessing the tables. */
switch (type)
{
case JP_JIS:
- index = ((c >> 8) - 0x21) * 0xfe + ((c & 0xff) - 0x21);
- break;
- case JP_SJIS:
- index = ((c >> 8) - 0xa1) * 0xfe + ((c & 0xff) - 0xa1);
+ byte1 = (c >> 8) + 0x80;
+ byte2 = (c & 0xff) + 0x80;
break;
case JP_EUCJP:
+ byte1 = (c >> 8);
+ byte2 = (c & 0xff);
+ break;
+ case JP_SJIS:
byte1 = c >> 8;
byte2 = c & 0xff;
- if (byte2 <= 0x7e || (byte2 & 0x1))
+ if (byte2 <= 0x9e)
{
- adj = -0x22;
- adj_byte2 = (byte2 & 0xfe) - 31;
+ adj = 0xa1 - 0x22;
+ byte2 = (byte2 - 31) + 0xa1;
}
else
{
- adj = -0x21;
- adj_byte2 = byte2 - (0x7e + 0x21);
+ adj = 0xa1 - 0x21;
+ byte2 = (byte2 - 126) + 0xa1;
}
if (byte1 <= 0x9f)
- adj_byte1 = ((byte1 - 112) >> 1) + adj;
+ byte1 = ((byte1 - 112) << 1) + adj;
else
- adj_byte1 = ((byte1 - 112) >> 1) + adj;
- index = adj_byte1 * 0xfe + adj_byte2;
+ byte1 = ((byte1 - 176) << 1) + adj;
+ break;
+ default:
+ return WEOF;
+ }
+
+ /* find conversion in jp2uc arrays */
+
+ /* handle larger ranges first */
+ if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3)
+ {
+ index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1);
+ return b02cf[index];
+ }
+ else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6)
+ {
+ index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1);
+ return d02f4[index];
+ }
+
+ /* handle smaller ranges here */
+ switch (byte1)
+ {
+ case 0xA1:
+ return (wint_t)a1[byte2 - 0xa1];
+ case 0xA2:
+ ret = a2[byte2 - 0xa1];
+ if (ret != 0)
+ return (wint_t)ret;
+ break;
+ case 0xA3:
+ if (a3[byte2 - 0xa1])
+ return (wint_t)(0xff00 + (byte2 - 0xa0));
+ break;
+ case 0xA4:
+ if (byte2 <= 0xf3)
+ return (wint_t)(0x3000 + (byte2 - 0x60));
+ break;
+ case 0xA5:
+ if (byte2 <= 0xf6)
+ return (wint_t)(0x3000 + byte2);
+ break;
+ case 0xA6:
+ ret = 0;
+ if (byte2 <= 0xd8)
+ ret = (wint_t)a6[byte2 - 0xa1];
+ if (ret != 0)
+ return ret;
+ break;
+ case 0xA7:
+ ret = 0;
+ if (byte2 <= 0xf1)
+ ret = (wint_t)a7[byte2 - 0xa1];
+ if (ret != 0)
+ return ret;
+ break;
+ case 0xA8:
+ if (byte2 <= 0xc0)
+ return (wint_t)a8[byte2 - 0xa1];
break;
default:
return WEOF;
}
- if (index < 0 || index > (sizeof(jp2uc) / sizeof(unsigned short)))
- return WEOF;
-
- return (wint_t)jp2uc[index];
+ return WEOF;
}
#endif /* MB_CAPABLE */
diff --git a/newlib/libc/ctype/jp2uc.h b/newlib/libc/ctype/jp2uc.h
index a38009d..04eb67d 100644
--- a/newlib/libc/ctype/jp2uc.h
+++ b/newlib/libc/ctype/jp2uc.h
@@ -1,6 +1,7 @@
/* based on eucjp-208A.txt */
-static unsigned short jp2uc[] = {
+/* a1 is contiguous from a1a1 to a1fe */
+static unsigned short a1[] = {
0x3000,
0x3001,
0x3002,
@@ -94,7 +95,11 @@ static unsigned short jp2uc[] = {
0x25CB,
0x25CF,
0x25CE,
- 0x25C7,
+ 0x25C7
+};
+
+/* a2 has a number of holes between a2a1 and a2fe which we fill with 0x0000 */
+static unsigned short a2[] = {
0x25C6,
0x25A1,
0x25A0,
@@ -109,6 +114,17 @@ static unsigned short jp2uc[] = {
0x2191,
0x2193,
0x3013,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x2208,
0x220B,
0x2286,
@@ -117,6 +133,14 @@ static unsigned short jp2uc[] = {
0x2283,
0x222A,
0x2229,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x2227,
0x2228,
0x00AC,
@@ -124,6 +148,17 @@ static unsigned short jp2uc[] = {
0x21D4,
0x2200,
0x2203,
+ 0x2229,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x2220,
0x22A5,
0x2312,
@@ -139,6 +174,13 @@ static unsigned short jp2uc[] = {
0x2235,
0x222B,
0x222C,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x212B,
0x2030,
0x266F,
@@ -147,238 +189,118 @@ static unsigned short jp2uc[] = {
0x2020,
0x2021,
0x00B6,
- 0x25EF,
- 0xFF10,
- 0xFF11,
- 0xFF12,
- 0xFF13,
- 0xFF14,
- 0xFF15,
- 0xFF16,
- 0xFF17,
- 0xFF18,
- 0xFF19,
- 0xFF21,
- 0xFF22,
- 0xFF23,
- 0xFF24,
- 0xFF25,
- 0xFF26,
- 0xFF27,
- 0xFF28,
- 0xFF29,
- 0xFF2A,
- 0xFF2B,
- 0xFF2C,
- 0xFF2D,
- 0xFF2E,
- 0xFF2F,
- 0xFF30,
- 0xFF31,
- 0xFF32,
- 0xFF33,
- 0xFF34,
- 0xFF35,
- 0xFF36,
- 0xFF37,
- 0xFF38,
- 0xFF39,
- 0xFF3A,
- 0xFF41,
- 0xFF42,
- 0xFF43,
- 0xFF44,
- 0xFF45,
- 0xFF46,
- 0xFF47,
- 0xFF48,
- 0xFF49,
- 0xFF4A,
- 0xFF4B,
- 0xFF4C,
- 0xFF4D,
- 0xFF4E,
- 0xFF4F,
- 0xFF50,
- 0xFF51,
- 0xFF52,
- 0xFF53,
- 0xFF54,
- 0xFF55,
- 0xFF56,
- 0xFF57,
- 0xFF58,
- 0xFF59,
- 0xFF5A,
- 0x3041,
- 0x3042,
- 0x3043,
- 0x3044,
- 0x3045,
- 0x3046,
- 0x3047,
- 0x3048,
- 0x3049,
- 0x304A,
- 0x304B,
- 0x304C,
- 0x304D,
- 0x304E,
- 0x304F,
- 0x3050,
- 0x3051,
- 0x3052,
- 0x3053,
- 0x3054,
- 0x3055,
- 0x3056,
- 0x3057,
- 0x3058,
- 0x3059,
- 0x305A,
- 0x305B,
- 0x305C,
- 0x305D,
- 0x305E,
- 0x305F,
- 0x3060,
- 0x3061,
- 0x3062,
- 0x3063,
- 0x3064,
- 0x3065,
- 0x3066,
- 0x3067,
- 0x3068,
- 0x3069,
- 0x306A,
- 0x306B,
- 0x306C,
- 0x306D,
- 0x306E,
- 0x306F,
- 0x3070,
- 0x3071,
- 0x3072,
- 0x3073,
- 0x3074,
- 0x3075,
- 0x3076,
- 0x3077,
- 0x3078,
- 0x3079,
- 0x307A,
- 0x307B,
- 0x307C,
- 0x307D,
- 0x307E,
- 0x307F,
- 0x3080,
- 0x3081,
- 0x3082,
- 0x3083,
- 0x3084,
- 0x3085,
- 0x3086,
- 0x3087,
- 0x3088,
- 0x3089,
- 0x308A,
- 0x308B,
- 0x308C,
- 0x308D,
- 0x308E,
- 0x308F,
- 0x3090,
- 0x3091,
- 0x3092,
- 0x3093,
- 0x30A1,
- 0x30A2,
- 0x30A3,
- 0x30A4,
- 0x30A5,
- 0x30A6,
- 0x30A7,
- 0x30A8,
- 0x30A9,
- 0x30AA,
- 0x30AB,
- 0x30AC,
- 0x30AD,
- 0x30AE,
- 0x30AF,
- 0x30B0,
- 0x30B1,
- 0x30B2,
- 0x30B3,
- 0x30B4,
- 0x30B5,
- 0x30B6,
- 0x30B7,
- 0x30B8,
- 0x30B9,
- 0x30BA,
- 0x30BB,
- 0x30BC,
- 0x30BD,
- 0x30BE,
- 0x30BF,
- 0x30C0,
- 0x30C1,
- 0x30C2,
- 0x30C3,
- 0x30C4,
- 0x30C5,
- 0x30C6,
- 0x30C7,
- 0x30C8,
- 0x30C9,
- 0x30CA,
- 0x30CB,
- 0x30CC,
- 0x30CD,
- 0x30CE,
- 0x30CF,
- 0x30D0,
- 0x30D1,
- 0x30D2,
- 0x30D3,
- 0x30D4,
- 0x30D5,
- 0x30D6,
- 0x30D7,
- 0x30D8,
- 0x30D9,
- 0x30DA,
- 0x30DB,
- 0x30DC,
- 0x30DD,
- 0x30DE,
- 0x30DF,
- 0x30E0,
- 0x30E1,
- 0x30E2,
- 0x30E3,
- 0x30E4,
- 0x30E5,
- 0x30E6,
- 0x30E7,
- 0x30E8,
- 0x30E9,
- 0x30EA,
- 0x30EB,
- 0x30EC,
- 0x30ED,
- 0x30EE,
- 0x30EF,
- 0x30F0,
- 0x30F1,
- 0x30F2,
- 0x30F3,
- 0x30F4,
- 0x30F5,
- 0x30F6,
+ 0x222C,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x25EF
+};
+
+
+/* a3a1 to a3fe is mostly contiguous. Conversion output values are
+ of the form 0xFFxx where xx is (yy - 0xA0) where the input is 0xA3yy */
+static unsigned char a3[] = {
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1,
+ 1
+};
+
+/* a4 is contiguous from a4a1 to a4f3 */
+/* transform = 0x30xx where xx = last byte - 0x60 */
+
+/* a5 is contiguous from a5a1 to a5f6 */
+/* transform = 0x30xx where xx = last byte */
+
+/* a6 is mostly contiguous from a6a1 to a6d8 */
+static unsigned short a6[] = {
0x0391,
0x0392,
0x0393,
@@ -403,6 +325,14 @@ static unsigned short jp2uc[] = {
0x03A7,
0x03A8,
0x03A9,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x03B1,
0x03B2,
0x03B3,
@@ -426,7 +356,11 @@ static unsigned short jp2uc[] = {
0x03C6,
0x03C7,
0x03C8,
- 0x03C9,
+ 0x03C9
+};
+
+/* a7 is mostly contiguous from a7a1 to a7f1 */
+static unsigned short a7[] = {
0x0410,
0x0411,
0x0412,
@@ -460,6 +394,21 @@ static unsigned short jp2uc[] = {
0x042D,
0x042E,
0x042F,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
+ 0x0000,
0x0430,
0x0431,
0x0432,
@@ -492,7 +441,11 @@ static unsigned short jp2uc[] = {
0x044C,
0x044D,
0x044E,
- 0x044F,
+ 0x044F
+};
+
+/* a8 is contiguous from a8a1 to a8c0 */
+static unsigned short a8[] = {
0x2500,
0x2502,
0x250C,
@@ -524,7 +477,13 @@ static unsigned short jp2uc[] = {
0x2530,
0x2525,
0x2538,
- 0x2542,
+ 0x2542
+};
+
+/* no conversion a9 to af */
+
+/* b0a1 to cfd3 is contiguous except for illegal sequences with 0xfe */
+static unsigned short b02cf[] = {
0x4E9C,
0x5516,
0x5A03,
@@ -3489,7 +3448,12 @@ static unsigned short jp2uc[] = {
0x6900,
0x6E7E,
0x7897,
- 0x8155,
+ 0x8155
+};
+
+/* d0a1 to f4a6 is contiguous */
+
+static unsigned short d02f4[] = {
0x5F0C,
0x4E10,
0x4E15,
@@ -6879,5 +6843,7 @@ static unsigned short jp2uc[] = {
0x9059,
0x7464,
0x51DC,
- 0x7199
+ 0x7199
};
+
+/* f5 to fe is non-existent */