aboutsummaryrefslogtreecommitdiff
path: root/locale/weight.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-12-25 23:41:39 +0000
committerUlrich Drepper <drepper@redhat.com>1999-12-25 23:41:39 +0000
commit450bf66ef223ad83e7032920652445817865770b (patch)
tree1bfd6848a2453f4ad2c9cdca8e4e4c817e995798 /locale/weight.h
parentce40141c6b68a40687f460450e1d07a0a78e1559 (diff)
downloadglibc-450bf66ef223ad83e7032920652445817865770b.zip
glibc-450bf66ef223ad83e7032920652445817865770b.tar.gz
glibc-450bf66ef223ad83e7032920652445817865770b.tar.bz2
Update.
1999-12-25 Ulrich Drepper <drepper@cygnus.com> * locale/C-collate.c (_nl_C_LC_COLLATE): Add one more entry for the indirect table. * locale/langinfo.h: Likewise. * locale/categories.def: Likewise. Remove reference to postload functions. * locale/lc-collate.c (_nl_postload_collate): Removed. Also remove __collate_tablemb, __collate_weightmb, and __collate_extramb. * locale/localeinfo.h: Remove declaration for removed variables above. Remove prototype for _nl_get_era_entry. * locale/weight.h: Complete rewrite for new collate implementation. * locale/programs/ld-collate.c: Many changes to make output file usable in strxfrm/strcoll. * string/strxfrm.c: Complete rewrite for new collate implementation. * wcsmbs/wcsxfrm.c: Don't use strxfrm.c, implement dummy implementation locally. 1999-12-25 Shinya Hanataka <hanataka@abyss.rim.or.jp> * locale/programs/ld-ctype.c (allocate_arrays): Correctly assign transformation values for chars >255. * wctype/wctrans.c: Return pointer unmodified.
Diffstat (limited to 'locale/weight.h')
-rw-r--r--locale/weight.h251
1 files changed, 83 insertions, 168 deletions
diff --git a/locale/weight.h b/locale/weight.h
index 6e31e2d..356ee57 100644
--- a/locale/weight.h
+++ b/locale/weight.h
@@ -17,191 +17,106 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
-#include <alloca.h>
-#include <errno.h>
-#include <langinfo.h>
-#include "localeinfo.h"
-
-#ifndef STRING_TYPE
-# error STRING_TYPE not defined
-#endif
+/* Find index of weight. */
+static inline int32_t
+findidx (const unsigned char **cpp)
+{
+ int_fast32_t i = table[*(*cpp)++];
+ const unsigned char *cp;
-#ifndef USTRING_TYPE
-# error USTRING_TYPE not defined
-#endif
+ if (i >= 0)
+ /* This is an index into the weight table. Cool. */
+ return i;
-typedef struct weight_t
-{
- struct weight_t *prev;
- struct weight_t *next;
- struct data_pair
+ /* Oh well, more than one sequence starting with this byte.
+ Search for the correct one. */
+ cp = &extra[-i];
+ while (1)
{
- int number;
- const uint32_t *value;
- } data[0];
-} weight_t;
-
-
-/* The following five macros grant access to the values in the
- collate locale file that do not depend on byte order. */
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
-# define collate_nrules \
- (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES))
-# define collate_hash_size \
- (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_SIZE))
-# define collate_hash_layers \
- (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_HASH_LAYERS))
-# define collate_undefined \
- (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_UNDEFINED_WC))
-# define collate_rules \
- ((uint32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_RULES))
-
-static __inline void get_weight (const STRING_TYPE **str, weight_t *result);
-static __inline void
-get_weight (const STRING_TYPE **str, weight_t *result)
-#else
-# define collate_nrules \
- current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word
-# define collate_hash_size \
- current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_SIZE)].word
-# define collate_hash_layers \
- current->values[_NL_ITEM_INDEX (_NL_COLLATE_HASH_LAYERS)].word
-# define collate_undefined \
- current->values[_NL_ITEM_INDEX (_NL_COLLATE_UNDEFINED_WC)].word
-# define collate_rules \
- ((uint32_t *) current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULES)].string)
-
-static __inline void get_weight (const STRING_TYPE **str, weight_t *result,
- struct locale_data *current,
- const uint32_t *__collate_tablewc,
- const uint32_t *__collate_extrawc);
-static __inline void
-get_weight (const STRING_TYPE **str, weight_t *result,
- struct locale_data *current, const uint32_t *__collate_tablewc,
- const uint32_t *__collate_extrawc)
-#endif
-{
- unsigned int ch = *((USTRING_TYPE *) (*str))++;
- size_t slot;
+ size_t nhere;
+ const unsigned char *usrc = *cpp;
- if (sizeof (STRING_TYPE) == 1)
- slot = ch * (collate_nrules + 1);
- else
- {
- const size_t level_size = collate_hash_size * (collate_nrules + 1);
- size_t level;
+ /* The first thing is the index. */
+ i = *((int32_t *) cp);
+ cp += sizeof (int32_t);
- slot = (ch % collate_hash_size) * (collate_nrules + 1);
+ /* Next is the length of the byte sequence. These are always
+ short byte sequences so there is no reason to call any
+ function (even if they are inlined). */
+ nhere = *cp++;
- level = 0;
- while (__collate_tablewc[slot] != (uint32_t) ch)
+ if (i >= 0)
{
- if (__collate_tablewc[slot + 1] == 0
- || ++level >= collate_hash_layers)
- {
- size_t idx = collate_undefined;
- size_t cnt;
+ /* It is a single character. If it matches we found our
+ index. Note that at the end of each list there is an
+ entry of length zero which represents the single byte
+ sequence. The first (and here only) byte was tested
+ already. */
+ size_t cnt;
- for (cnt = 0; cnt < collate_nrules; ++cnt)
- {
- result->data[cnt].number = __collate_extrawc[idx++];
- result->data[cnt].value = &__collate_extrawc[idx];
- idx += result->data[cnt].number;
- }
- /* The Unix standard requires that a character outside
- the domain is signalled by setting `errno'. */
- __set_errno (EINVAL);
- return;
- }
- slot += level_size;
- }
- }
+ for (cnt = 0; cnt < nhere; ++cnt)
+ if (cp[cnt] != usrc[cnt])
+ break;
- if (__collate_tablewc[slot + 1] != (uint32_t) FORWARD_CHAR)
- {
- /* We have a simple form. One value for each weight. */
- size_t cnt;
+ if (cnt == nhere)
+ {
+ /* Found it. */
+ *cpp += nhere;
+ return i;
+ }
- for (cnt = 0; cnt < collate_nrules; ++cnt)
- {
- result->data[cnt].number = 1;
- result->data[cnt].value = &__collate_tablewc[slot + 1 + cnt];
+ /* Up to the next entry. */
+ cp += nhere;
}
- return;
- }
+ else
+ {
+ /* This is a range of characters. First decide whether the
+ current byte sequence lies in the range. */
+ size_t cnt;
+ size_t offset = 0;
- /* We now look for any collation element which starts with CH.
- There might none, but the last list member is a catch-all case
- because it is simple the character CH. The value of this entry
- might be the same as UNDEFINED. */
- slot = __collate_tablewc[slot + 2];
+ for (cnt = 0; cnt < nhere; ++cnt)
+ if (cp[cnt] != usrc[cnt])
+ break;
- while (1)
- {
- size_t idx;
+ if (cnt != nhere)
+ {
+ if (cp[cnt] > usrc[cnt])
+ {
+ /* Cannot be in this range. */
+ cp += 2 * nhere;
+ continue;
+ }
- /* This is a comparison between a uint32_t array (aka wchar_t) and
- an 8-bit string. */
- for (idx = 0; __collate_extrawc[slot + 2 + idx] != 0; ++idx)
- if (__collate_extrawc[slot + 2 + idx] != (uint32_t) (*str)[idx])
- break;
+ /* Test against the end of the range. */
+ for (cnt = 0; cnt < nhere; ++cnt)
+ if (cp[nhere + cnt] != usrc[cnt])
+ break;
- /* When the loop finished with all character of the collation
- element used, we found the longest prefix. */
- if (__collate_extrawc[slot + 2 + idx] == 0)
- {
- size_t cnt;
+ if (cnt != nhere && cp[nhere + cnt] < usrc[cnt])
+ {
+ /* Cannot be in this range. */
+ cp += 2 * nhere;
+ continue;
+ }
- *str += idx;
- idx += slot + 3;
- for (cnt = 0; cnt < collate_nrules; ++cnt)
- {
- result->data[cnt].number = __collate_extrawc[idx++];
- result->data[cnt].value = &__collate_extrawc[idx];
- idx += result->data[cnt].number;
+ /* This range matches the next characters. Now find
+ the offset in the indirect table. */
+ for (cnt = 0; cp[cnt] == usrc[cnt]; ++cnt);
+
+ do
+ {
+ offset <<= 8;
+ offset += usrc[cnt] - cp[cnt];
+ }
+ while (++cnt < nhere);
}
- return;
- }
- /* To next entry in list. */
- slot += __collate_extrawc[slot];
+ *cpp += nhere;
+ return offset;
+ }
}
-}
-
-/* To process a string efficiently we retrieve all information about
- the string at once. The following macro constructs a double linked
- list of this information. It is a macro because we use `alloca'
- and we use a double linked list because of the backward collation
- order.
-
- We have this strange extra macro since the functions which use the
- given locale (not the global one) cannot use the global tables. */
-#ifndef USE_IN_EXTENDED_LOCALE_MODEL
-# define call_get_weight(strp, newp) get_weight ((strp), (newp))
-#else
-# define call_get_weight(strp, newp) \
- get_weight ((strp), (newp), current, collate_table, collate_extra)
-#endif
-
-#define get_string(str, forw, backw) \
- do \
- { \
- weight_t *newp; \
- while (*str != '\0') \
- { \
- newp = (weight_t *) alloca (sizeof (weight_t) \
- + (collate_nrules \
- * sizeof (struct data_pair))); \
- \
- newp->prev = backw; \
- if (backw == NULL) \
- forw = newp; \
- else \
- backw->next = newp; \
- newp->next = NULL; \
- backw = newp; \
- call_get_weight (&str, newp); \
- } \
- } \
- while (0)
+ /* NOTREACHED */
+ return 0x43219876;
+}