From 755104edc75c53f4a0e7440334e944ad3c6b32fc Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Mon, 18 Sep 2000 22:41:47 +0000 Subject: Update. 2000-09-18 Ulrich Drepper * version.h (VERSION): Bump to 2.1.94. * malloc/mtrace.c (mtrace): Mark stream as close on exec. 2000-09-17 Bruno Haible * iconvdata/utf-16.c (BODY for TO_LOOP): Reject UCS-4 input in the range 0xD800..0xDFFF. * iconvdata/unicode.c (BODY for TO_LOOP): Likewise. (BODY for FROM_LOOP): Likewise. * iconv/gconv_simple.c (ucs2_internal_loop): Likewise. (internal_ucs2_loop): Likewise. (ucs2reverse_internal_loop): Likewise. (internal_ucs2reverse_loop): Likewise. 2000-09-17 Bruno Haible * iconvdata/utf-16.c (gconv_init): Add missing slashes to encoding names. 2000-09-17 Bruno Haible * iconvdata/tst-table-from.c (main): Fix test for error on stdout. * iconvdata/tst-table-to.c (main): Likewise. 2000-09-17 Bruno Haible * iconvdata/iso-ir-165.c (__isoir165_from_tab): Renamed from __isoir165_tab. * iconvdata/cns11643.h (__cns11643l1_to_ucs4_tab): New declaration. * iconvdata/iso-2022-cn-ext.c: Include "cns11643.h". (GB7590_set, GB13132_set, CNS11643_3_set, CNS11643_4_set, CNS11643_5_set, CNS11643_6_set, CNS11643_7_set): Change enum values. (BODY for FROM_LOOP): Fix buffer overrun. Treat CNS11643 plane 3. Return __GCONV_INCOMPLETE_INPUT instead of __GCONV_EMPTY_INPUT. (BODY for TO_LOOP): Fix usage of `set' vs. `used'. Fix typo that caused GB2312 to be used instead of ISO-IR-165. Treat CNS11643 plane 3. Fix shift sequences. Output announcement for SS2 and SS3 encodings when needed. When outputting an announcement, don't clear most other announcements. 2000-09-17 Bruno Haible * iconvdata/iso-2022-cn.c (BODY for FROM_LOOP): Fix buffer overrun. (BODY for TO_LOOP): Fix usage of `set' vs. `used'. 2000-09-14 Bruno Haible * intl/Versions: Add bind_textdomain_codeset. --- iconv/gconv_simple.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 9 deletions(-) (limited to 'iconv/gconv_simple.c') diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c index a41e1b5..70c43c8 100644 --- a/iconv/gconv_simple.c +++ b/iconv/gconv_simple.c @@ -773,7 +773,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step, } \ else \ /* It's an one byte sequence. */ \ - /* XXX unaligned. */ \ *((uint32_t *) outptr)++ = *inptr++; \ } #define LOOP_NEED_FLAGS @@ -797,7 +796,6 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define LOOPFCT FROM_LOOP #define BODY \ { \ - /* XXX unaligned. */ \ if (__builtin_expect (*((uint32_t *) inptr), 0) > 0x7f) \ { \ STANDARD_ERR_HANDLER (4); \ @@ -1147,7 +1145,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO #define LOOPFCT FROM_LOOP #define BODY \ - *((uint32_t *) outptr)++ = *((uint16_t *) inptr)++; + { \ + uint16_t u1 = *((uint16_t *) inptr); \ + \ + if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr)++ = u1; \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS #include #include @@ -1168,12 +1186,34 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define LOOPFCT FROM_LOOP #define BODY \ { \ - if (__builtin_expect (*((uint32_t *) inptr), 0) >= 0x10000) \ + uint32_t val = *((uint32_t *) inptr); \ + \ + if (__builtin_expect (val, 0) >= 0x10000) \ { \ STANDARD_ERR_HANDLER (4); \ } \ + else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ else \ - *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \ + { \ + *((uint16_t *) outptr)++ = val; \ + inptr += 4; \ + } \ } #define LOOP_NEED_FLAGS #include @@ -1195,8 +1235,27 @@ ucs4le_internal_loop_single (struct __gconv_step *step, #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO #define LOOPFCT FROM_LOOP #define BODY \ - *((uint32_t *) outptr)++ = bswap_16 (*(uint16_t *) inptr); \ - inptr += 2; + { \ + uint16_t u1 = bswap_16 (*((uint16_t *) inptr)); \ + \ + if (__builtin_expect (u1 >= 0xd800 && u1 < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; \ + } \ + \ + *((uint32_t *) outptr)++ = u1; \ + inptr += 2; \ + } +#define LOOP_NEED_FLAGS #include #include @@ -1222,8 +1281,28 @@ ucs4le_internal_loop_single (struct __gconv_step *step, { \ STANDARD_ERR_HANDLER (4); \ } \ - *((uint16_t *) outptr)++ = bswap_16 (val); \ - inptr += 4; \ + else if (__builtin_expect (val >= 0xd800 && val < 0xe000, 0)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + *((uint16_t *) outptr)++ = bswap_16 (val); \ + inptr += 4; \ + } \ } #define LOOP_NEED_FLAGS #include -- cgit v1.1