From acb5ee2e561276d64c6e26ef4b82f59a4db5ae90 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 May 2000 20:22:51 +0000 Subject: Update. 2000-05-24 Ulrich Drepper * locale/programs/ld-collate.c (struct element_t): Add mbseqorder and wcseqorder members. (struct locale_collate_t): Likewise. (collate_finish): Assign collation sequence value to each character. Create tables for output. (collate_output): Write out tables with collation sequence information. * locale/C-collate.c: Provide C locale data for collation sequence table. * locale/langinfo.h: Add _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * locale/categories.def: Add entries for _NL_COLLATE_COLLSEQMB and _NL_COLLATE_COLLSEQWC. * posix/fnmatch.c: Define SUFFIX and WIDE_CHAR_VERSION before include fnmatch_loop.c. * posix/fnmatch_loop.c: Don't use strcoll while determining whether character is matched by range expression. Use collation sequence table. Outside glibc fall back on simple character value comparison. --- posix/fnmatch_loop.c | 135 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 118 insertions(+), 17 deletions(-) (limited to 'posix/fnmatch_loop.c') diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c index 5f6c057..831bd06 100644 --- a/posix/fnmatch_loop.c +++ b/posix/fnmatch_loop.c @@ -31,6 +31,16 @@ FCT (pattern, string, no_leading_period, flags) { register const CHAR *p = pattern, *n = string; register UCHAR c; +#ifdef _LIBC + const UCHAR *collseq = (const UCHAR *) + _NL_CURRENT(LC_COLLATE, CONCAT(_NL_COLLATE_COLLSEQ,SUFFIX)); +# ifdef WIDE_CHAR_VERSION + const wint_t *names = (const wint_t *) + _NL_CURRENT (LC_COLLATE, _NL_COLLATE_NAMES); + size_t size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE); + size_t layers = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS); +# endif +#endif while ((c = *p++) != L('\0')) { @@ -210,9 +220,9 @@ FCT (pattern, string, no_leading_period, flags) /* Leave room for the null. */ CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; size_t c1 = 0; -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) wctype_t wt; -# endif +#endif const CHAR *startp = p; for (;;) @@ -240,7 +250,7 @@ FCT (pattern, string, no_leading_period, flags) } str[c1] = L('\0'); -# if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) +#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) wt = IS_CHAR_CLASS (str); if (wt == 0) /* Invalid character class name. */ @@ -248,7 +258,7 @@ FCT (pattern, string, no_leading_period, flags) if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) goto matched; -# else +#else if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n)) || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n)) || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n)) @@ -262,7 +272,7 @@ FCT (pattern, string, no_leading_period, flags) || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n)) || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n))) goto matched; -# endif +#endif } else if (c == L('\0')) /* [ (unterminated) loses. */ @@ -279,27 +289,117 @@ FCT (pattern, string, no_leading_period, flags) if (c == L('-') && *p != L(']')) { - /* It is a range. */ - CHAR lo[2]; - CHAR fc[2]; +#if _LIBC + /* We have to find the collation sequence + value for C. Collation sequence is nothing + we can regularly access. The sequence + value is defined by the order in which the + definitions of the collation values for the + various characters appear in the source + file. A strange concept, nowhere + documented. */ + int32_t fseqidx; + int32_t lseqidx; UCHAR cend = *p++; +# ifdef WIDE_CHAR_VERSION + size_t cnt; +# endif + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) cend = *p++; if (cend == L('\0')) return FNM_NOMATCH; - lo[0] = cold; - lo[1] = L('\0'); - fc[0] = fn; - fc[1] = L('\0'); - if (STRCOLL (lo, fc) <= 0) +# ifdef WIDE_CHAR_VERSION + /* Search in the `names' array for the characters. */ + fseqidx = fn % size; + cnt = 0; + while (names[fseqidx] != fn) { - CHAR hi[2]; - hi[0] = FOLD (cend); - hi[1] = L('\0'); - if (STRCOLL (fc, hi) <= 0) + if (++cnt == layers) + /* XXX We don't know anything about + the character we are supposed to + match. This means we are failing. */ + goto range_not_matched; + + fseqidx += size; + } + lseqidx = cold % size; + cnt = 0; + while (names[lseqidx] != cold) + { + if (++cnt == layers) + { + lseqidx = -1; + break; + } + lseqidx += size; + } +# else + fseqidx = fn; + lseqidx = cold; +# endif + + /* XXX It is not entirely clear to me how to handle + characters which are not mentioned in the + collation specification. */ + if ( +# ifdef WIDE_CHAR_VERSION + lseqidx == -1 || +# endif + collseq[lseqidx] <= collseq[fseqidx]) + { + /* We have to look at the upper bound. */ + int32_t hseqidx; + + cend = FOLD (cend); +# ifdef WIDE_CHAR_VERSION + hseqidx = cend % size; + cnt = 0; + while (names[hseqidx] != cend) + { + if (++cnt == layers) + { + /* Hum, no information about the upper + bound. The matching succeeds if the + lower bound is matched exactly. */ + if (lseqidx == -1 || cold != fn) + goto range_not_matched; + + goto matched; + } + } +# else + hseqidx = cend; +# endif + + if ( +# ifdef WIDE_CHAR_VERSION + (lseqidx == -1 + && collseq[fseqidx] == collseq[hseqidx]) || +# endif + collseq[fseqidx] <= collseq[hseqidx]) goto matched; } +# ifdef WIDE_CHAR_VERSION + range_not_matched: +# endif +#else + /* We use a boring value comparison of the character + values. This is better than comparing using + `strcoll' since the latter would have surprising + and sometimes fatal consequences. */ + UCHAR cend = *p++; + + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) + cend = *p++; + if (cend == L('\0')) + return FNM_NOMATCH; + + /* It is a range. */ + if (cold <= fc && fc <= c) + goto matched; +#endif c = *p++; } @@ -371,3 +471,4 @@ FCT (pattern, string, no_leading_period, flags) #undef STRCOLL #undef L #undef BTOWC +#undef SUFFIX -- cgit v1.1