From db6af3ebf46a83b885455dc03a3c2c1c2c2dedec Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 28 Dec 2011 06:19:42 -0500 Subject: Add uchar.h support, part 1 c16 support for locales other than the C locale is still missing. --- iconv/gconv_builtin.h | 23 +++++++++++++++- iconv/gconv_int.h | 4 ++- iconv/gconv_simple.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 97 insertions(+), 5 deletions(-) (limited to 'iconv') diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h index ef9ab8d..fd736a4 100644 --- a/iconv/gconv_builtin.h +++ b/iconv/gconv_builtin.h @@ -1,5 +1,5 @@ /* Builtin transformations. - Copyright (C) 1997-1999, 2000-2002, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-1999, 2000-2002, 2006, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -122,3 +122,24 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1, __gconv_transform_internal_ucs2reverse, NULL, 4, 4, 2, 2) #endif + + +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16//", 1, "=ascii->UTF-16", + __gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1) + +BUILTIN_TRANSFORMATION ("UTF-16//", "ANSI_X3.4-1968//", 1, "=UTF-16->ascii", + __gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1) + +#if BYTE_ORDER == BIG_ENDIAN +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16BE//", 1, "=ascii->UTF-16BE", + __gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1) + +BUILTIN_TRANSFORMATION ("UTF-16BE//", "ANSI_X3.4-1968//", 1, "=UTF-16BE->ascii", + __gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1) +#else +BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "UTF-16LE//", 1, "=ascii->UTF-16LE", + __gconv_transform_ascii_utf16, NULL, 2, 2, 1, 1) + +BUILTIN_TRANSFORMATION ("UTF-16LE//", "ANSI_X3.4-1968//", 1, "=UTF-16LE->ascii", + __gconv_transform_utf16_ascii, NULL, 2, 2, 1, 1) +#endif diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h index fd11220..80253dd 100644 --- a/iconv/gconv_int.h +++ b/iconv/gconv_int.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1997-2005, 2006, 2007 Free Software Foundation, Inc. +/* Copyright (C) 1997-2005, 2006, 2007, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -303,6 +303,8 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le); __BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal); __BUILTIN_TRANSFORM (__gconv_transform_internal_utf16); __BUILTIN_TRANSFORM (__gconv_transform_utf16_internal); +__BUILTIN_TRANSFORM (__gconv_transform_ascii_utf16); +__BUILTIN_TRANSFORM (__gconv_transform_utf16_ascii); # undef __BUITLIN_TRANSFORM /* Specialized conversion function for a single byte to INTERNAL, recognizing diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index e34f377..b0ef3e6 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -1,5 +1,5 @@ /* Simple transformations functions. - Copyright (C) 1997-2005, 2007, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1997. @@ -965,7 +965,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, cnt = 2; \ ch &= 0x1f; \ } \ - else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ + else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1)) \ { \ /* We expect three bytes. */ \ cnt = 3; \ @@ -1221,7 +1221,7 @@ ucs4le_internal_loop_single (struct __gconv_step *step, else \ { \ put16 (outptr, val); \ - outptr += sizeof (uint16_t); \ + outptr += sizeof (uint16_t); \ inptr += 4; \ } \ } @@ -1320,3 +1320,72 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define LOOP_NEED_FLAGS #include #include + + +/* Convert from ISO 646-IRV to UTF-16. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 1 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP ascii_utf16_loop +#define TO_LOOP ascii_utf16_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_ascii_utf16 +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__builtin_expect (*inptr > '\x7f', 0)) \ + { \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *((uint16_t *) outptr) = *inptr++; \ + outptr += sizeof (uint16_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include +#include + + +/* Convert from UTF-16 to ISO 646-IRV. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 2 +#define MIN_NEEDED_TO 1 +#define FROM_DIRECTION 1 +#define FROM_LOOP utf16_ascii_loop +#define TO_LOOP utf16_ascii_loop /* This is not used. */ +#define FUNCTION_NAME __gconv_transform_utf16_ascii +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY \ + { \ + if (__builtin_expect (*((const uint16_t *) inptr) > 0x7f, 0)) \ + { \ + UNICODE_TAG_HANDLER (*((const uint16_t *) inptr), 2); \ + STANDARD_TO_LOOP_ERR_HANDLER (2); \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *outptr++ = *((const uint16_t *) inptr); \ + inptr += sizeof (uint16_t); \ + } \ + } +#define LOOP_NEED_FLAGS +#include +#include -- cgit v1.1