aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--posix/regcomp.c9
2 files changed, 16 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 2ef08f0..2ee6a12 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2018-08-10 Paul Eggert <eggert@cs.ucla.edu>
+
+ regex: Gnulib unibyte RRI uses bytes not chars
+ Adjust the non-glibc code to agree with what Gawk needs for
+ rational range interpretation (RRI) for regular expression ranges.
+ In unibyte locales, Gawk wants ranges to use the underlying byte
+ rather than the character code point. This change does not affect
+ glibc proper.
+ * posix/regcomp.c (parse_byte) [!LIBC && RE_ENABLE_I18N]:
+ In unibyte locales, use the byte value rather than
+ running it through btowc.
+
2018-08-10 Joseph Myers <joseph@codesourcery.com>
* sysdeps/generic/math-tests-snan.h: New file.
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 3b0a3c6..e81652f 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -2684,15 +2684,14 @@ parse_dup_op (bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,
# ifdef RE_ENABLE_I18N
/* Convert the byte B to the corresponding wide character. In a
- unibyte locale, treat B as itself if it is an encoding error.
- In a multibyte locale, return WEOF if B is an encoding error. */
+ unibyte locale, treat B as itself. In a multibyte locale, return
+ WEOF if B is an encoding error. */
static wint_t
parse_byte (unsigned char b, re_charset_t *mbcset)
{
- wint_t wc = __btowc (b);
- return wc == WEOF && !mbcset ? b : wc;
+ return mbcset == NULL ? b : __btowc (b);
}
-#endif
+# endif
/* Local function for parse_bracket_exp only used in case of NOT _LIBC.
Build the range expression which starts from START_ELEM, and ends