Update.

2004-11-08 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (utf8_sb_map): Define. (free_dfa_content): Don't free dfa->sb_char if it's a pointer to utf8_sb_map. (init_dfa): Use utf8_sb_map instead of initializing memory when the encoding is UTF-8. * posix/regcomp.c (init_dfa): Get the codeset name outside glibc as well. Check if it is spelled UTF8 as well as UTF-8, and check case-insensitively. Set dfa->map_notascii manually when outside glibc. * posix/regex_internal.c (build_wcs_upper_buffer) [!_LIBC]: Enable optimizations based on map_notascii. * posix/regex_internal.h [HAVE_LANGINFO_H || HAVE_LANGINFO_CODESET || _LIBC]: Include langinfo.h. * posix/regex_internal.h (struct re_backref_cache_entry): Add "more" field. * posix/regexec.c (check_dst_limits): Hoist computation of the source and destination bkref_idx out of the loop. Pass it to check_dst_limits_calc_pos. (check_dst_limits_calc_pos_1): New function, containing the recursive loop of check_dst_limits_calc_pos; uses the "more" field of struct re_backref_cache to control the loop. (check_dst_limits_calc_pos): Store into "boundaries" the position relative to lim's start and end positions. Do not accept eclosures, accept bkref_idx instead. Call check_dst_limits_calc_pos_1 to do the work. (sift_states_bkref): Use the "more" field of struct re_backref_cache to control the loop. A big "if" was turned into a continue and the function was reindented. (get_subexp): Use the "more" field of struct re_backref_cache to control the loop. (match_ctx_add_entry): Initialize the bkref_ents' "more" field. (search_cur_bkref_entry): Return -1 if out of bounds. * posix/regexec.c (empty_set): Remove. (sift_states_backward): Remove cur_src variable. Move inner loop to build_sifted_states. (build_sifted_states): Extract from sift_states_backward. Do not use empty_set. (update_cur_sifted_state): Do not use empty_set. Special case dest_nodes->nelem == 0.
author: Ulrich Drepper <drepper@redhat.com> 2004-11-08 22:49:44 +0000
committer: Ulrich Drepper <drepper@redhat.com> 2004-11-08 22:49:44 +0000
commit: e40a38b383fdbc616eb110e7cd6f780d010783cc (patch)
tree: 5a5a8e9acbf760879b7eefcc46414f1454199eea /posix/regex_internal.c
parent: d2c38eb3facb84db061289f20ff8a210f91e4115 (diff)
download: glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.zip
glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.tar.gz
glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.tar.bz2
1 files changed, 5 insertions, 11 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 96f1137..609719f 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -293,9 +293,8 @@ build_wcs_upper_buffer (pstr)
   byte_idx = pstr->valid_len;
   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
 
-#ifdef _LIBC
-  /* The following optimization assumes that the wchar_t encoding is
-     always ISO 10646.  */
+  /* The following optimization assumes that ASCII characters can be
+     mapped to wide characters with a simple cast.  */
   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
     {
       while (byte_idx < end_idx)
@@ -309,8 +308,7 @@ build_wcs_upper_buffer (pstr)
 	      pstr->mbs[byte_idx]
 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
 	      /* The next step uses the assumption that wchar_t is encoded
-		 with ISO 10646: all ASCII values can be converted like
-		 this.  */
+		 ASCII-safe: all ASCII values can be converted like this.  */
 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
 	      ++byte_idx;
 	      continue;
@@ -368,14 +366,11 @@ build_wcs_upper_buffer (pstr)
       return REG_NOERROR;
     }
   else
-#endif
     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
       {
 	wchar_t wc;
 	const char *p;
-#ifdef _LIBC
-offsets_needed:
-#endif
+      offsets_needed:
 	remain_len = end_idx - byte_idx;
 	prev_st = pstr->cur_state;
 	if (BE (pstr->trans != NULL, 0))
@@ -647,7 +642,6 @@ re_string_reconstruct (pstr, idx, eflags)
 	      int wcs_idx;
 	      wint_t wc = WEOF;
 
-#ifdef _LIBC
 	      if (pstr->is_utf8)
 		{
 		  const unsigned char *raw, *p, *q, *end;
@@ -687,7 +681,7 @@ re_string_reconstruct (pstr, idx, eflags)
 			break;
 		      }
 		}
-#endif
+
 	      if (wc == WEOF)
 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
 	      if (BE (pstr->valid_len, 0))
author	Ulrich Drepper <drepper@redhat.com>	2004-11-08 22:49:44 +0000
committer	Ulrich Drepper <drepper@redhat.com>	2004-11-08 22:49:44 +0000
commit	e40a38b383fdbc616eb110e7cd6f780d010783cc (patch)
tree	5a5a8e9acbf760879b7eefcc46414f1454199eea /posix/regex_internal.c
parent	d2c38eb3facb84db061289f20ff8a210f91e4115 (diff)
download	glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.zip glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.tar.gz glibc-e40a38b383fdbc616eb110e7cd6f780d010783cc.tar.bz2