Update.

1999-12-20 Ulrich Drepper <drepper@cygnus.com> * locale/categories.def: Remove most of the collate definitions. * locale/langinfo.h: Comment out corresponding definitions. * locale/programs/locale-spec.c (locale_special): Don't recognize the collate names yet. * locale/programs/ld-collate.c: Correct and optimize computation of weights. Set up list of all definitions correctly. Start writing function to generate output file. * locale/programs/ld-ctype.c (allocate_arrays): Increment counter in loop to compute default mapping.
author: Ulrich Drepper <drepper@redhat.com> 1999-12-20 22:08:01 +0000
committer: Ulrich Drepper <drepper@redhat.com> 1999-12-20 22:08:01 +0000
commit: b06c53e77dd2f6598ac8c45fa8f021db1bc17145 (patch)
tree: 152a936d40224a7ddb60177347650a5d3726ca10 /locale
parent: 9197f26fd5948b29616a4183c00d8e2898712a05 (diff)
download: glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.zip
glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.tar.gz
glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.tar.bz2
5 files changed, 209 insertions, 55 deletions
diff --git a/locale/categories.def b/locale/categories.def
index 4a61787..d94840c 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -43,20 +43,6 @@ DEFINE_CATEGORY
  LC_COLLATE, "LC_COLLATE",
  (
   DEFINE_ELEMENT (_NL_COLLATE_NRULES,         "collate-nrules",           std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_RULES,          "collate-rules",            std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_HASH_SIZE,      "collate-hash-size",        std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_HASH_LAYERS,    "collate-hash-layers",      std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_TABLEWC,        "collate-tablewc",          std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_UNDEFINED_WC,   "collate-undefined-wc",     std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_EXTRAWC,        "collate-extrawc",          std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_ELEM_HASH_SIZE, "collate-elem-hash-size",   std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_ELEM_HASH,      "collate-elem-hash",        std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_ELEM_STR_POOL,  "collate-elem-str-pool",    std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_ELEM_VAL,       "collate-elem-val",         std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH_SIZE, "collate-symb-hash-size",   std, word)
-  DEFINE_ELEMENT (_NL_COLLATE_SYMB_HASH,      "collate-symb-hash",        std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_SYMB_STR_POOL,  "collate-symb-str-pool",    std, string)
-  DEFINE_ELEMENT (_NL_COLLATE_SYMB_CLASSWC,   "collate-symb-classwc",     std, string)
   ), _nl_postload_collate)
 
 
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 1248a46..44f4064 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -231,12 +231,11 @@ enum
      This information is accessed by the strcoll and strxfrm functions.
      These `nl_langinfo' names are used only internally.  */
   _NL_COLLATE_NRULES = _NL_ITEM (LC_COLLATE, 0),
-  _NL_COLLATE_RULES,
+#if 0
+  _NL_COLLATE_TABLEMB,
   _NL_COLLATE_HASH_SIZE,
   _NL_COLLATE_HASH_LAYERS,
-  _NL_COLLATE_TABLEMB,
   _NL_COLLATE_TABLEWC,
-  _NL_COLLATE_UNDEFINED_MB,
   _NL_COLLATE_UNDEFINED_WC,
   _NL_COLLATE_EXTRAMB,
   _NL_COLLATE_EXTRAWC,
@@ -251,6 +250,7 @@ enum
   _NL_COLLATE_SYMB_STR_POOL,
   _NL_COLLATE_SYMB_CLASSMB,
   _NL_COLLATE_SYMB_CLASSWC,
+#endif
   _NL_NUM_LC_COLLATE,
 
   /* LC_CTYPE category: character classification.
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 87005e8..52c4e3c 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -73,9 +73,17 @@ struct element_t
 
   const char *mbs;
   const uint32_t *wcs;
-  int mborder;
+  int *mborder;
   int wcorder;
 
+  /* The following is a bit mask which bits are set if this element is
+     used in the appropriate level.  Interesting for the singlebyte
+     weight computation.
+
+     XXX The type here restricts the number of levels to 32.  It could
+     we changed if necessary but I doubt this is necessary.  */
+  unsigned int used_in_level;
+
   struct element_list_t *weights;
 
   /* Where does the definition come from.  */
@@ -191,8 +199,8 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
 
   newp = (struct element_t *) obstack_alloc (&collate->mempool,
 					     sizeof (*newp));
-  newp->name = name == NULL ? NULL : obstack_copy (&collate->mempool,
-						   name, namelen);
+  newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
+						    name, namelen);
   if (mbs != NULL)
     newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
   else
@@ -207,8 +215,9 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
     }
   else
     newp->wcs = NULL;
-  newp->mborder = 0;
+  newp->mborder = NULL;
   newp->wcorder = 0;
+  newp->used_in_level = 0;
 
   /* Will be allocated later.  */
   newp->weights = NULL;
@@ -477,7 +486,7 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
       else if (find_entry (&collate->elem_table, str, len,
 			   (void **) &result) != 0)
 	{
-	  /* It's also no collation element.  So it is an character
+	  /* It's also no collation element.  So it is a character
 	     element defined later.  */
 	  result = new_element (collate, NULL, 0, NULL, str, len);
 	  if (result != NULL)
@@ -493,11 +502,20 @@ find_element (struct linereader *ldfile, struct locale_collate_t *collate,
 static void
 unlink_element (struct locale_collate_t *collate)
 {
-  if (collate->cursor->next != NULL)
-    collate->cursor->next->last = collate->cursor->last;
-  if (collate->cursor->last != NULL)
-    collate->cursor->last->next = collate->cursor->next;
-  collate->cursor = collate->cursor->last;
+  if (collate->cursor == collate->start)
+    {
+      assert (collate->cursor->next == NULL);
+      assert (collate->cursor->last == NULL);
+      collate->cursor = NULL;
+    }
+  else
+    {
+      if (collate->cursor->next != NULL)
+	collate->cursor->next->last = collate->cursor->last;
+      if (collate->cursor->last != NULL)
+	collate->cursor->last->next = collate->cursor->next;
+      collate->cursor = collate->cursor->last;
+    }
 }
 
 
@@ -516,6 +534,11 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
   elem->next = collate->cursor ? collate->cursor->next : NULL;
   if (collate->cursor != NULL)
     collate->cursor->next = elem;
+  if (collate->start == NULL)
+    {
+      assert (collate->cursor == NULL);
+      collate->start = elem;
+    }
   elem->weights = (struct element_list_t *)
     obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
   memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
@@ -566,7 +589,8 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 	  const char *cp = arg->val.str.startmb;
 	  int cnt = 0;
 	  struct element_t *charelem;
-	  void *base = obstack_base (&collate->mempool);
+	  struct element_t **weights = NULL;
+	  int max = 0;
 
 	  if (*cp == '\0')
 	    {
@@ -581,18 +605,17 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 	      if (*cp == '<')
 		{
 		  /* Ahh, it's a bsymbol.  That's what we want.  */
-		  const char *startp = cp;
+		  const char *startp = ++cp;
 
-		  while (*++cp != '>')
+		  while (*cp != '>')
 		    {
 		      if (*cp == ldfile->escape_char)
 			++cp;
 		      if (*cp == '\0')
-			{
-			  /* It's a syntax error.  */
-			  obstack_free (&collate->mempool, base);
-			  goto syntax;
-			}
+			/* It's a syntax error.  */
+			goto syntax;
+
+		      ++cp;
 		    }
 
 		    charelem = find_element (ldfile, collate, startp,
@@ -606,7 +629,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 		     what this means.  We interpret all characters in the
 		     string as if that would be bsymbols.  Otherwise we
 		     would have to match back to bsymbols somehow and this
-		     is also not what people normally expect.  */
+		     is normally not what people normally expect.  */
 		  charelem = find_element (ldfile, collate, cp++, 1, NULL);
 		}
 
@@ -618,14 +641,25 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
 		}
 
 	      /* Add the pointer.  */
-	      obstack_ptr_grow (&collate->mempool, charelem);
-	      ++cnt;
+	      if (cnt >= max)
+		{
+		  struct element_t **newp;
+		  max += 10;
+		  newp = (struct element_t **)
+		    alloca (max * sizeof (struct element_t *));
+		  memcpy (newp, weights, cnt * sizeof (struct element_t *));
+		  weights = newp;
+		}
+	      weights[cnt++] = charelem;
 	    }
 	  while (*cp != '\0');
 
 	  /* Now store the information.  */
 	  elem->weights[weight_cnt].w = (struct element_t **)
-	    obstack_finish (&collate->mempool);
+	    obstack_alloc (&collate->mempool,
+			   cnt * sizeof (struct element_t *));
+	  memcpy (elem->weights[weight_cnt].w, weights,
+		  cnt * sizeof (struct element_t *));
 	  elem->weights[weight_cnt].cnt = cnt;
 
 	  /* We don't need the string anymore.  */
@@ -946,10 +980,20 @@ order for `%.*s' already defined at %s:%zu"),
 
 		  /* Enqueue the new element.  */
 		  elem->last = collate->cursor;
-		  elem->next = collate->cursor->next;
-		  elem->last->next = elem;
-		  if (elem->next != NULL)
-		    elem->next->last = elem;
+		  if (collate->cursor != NULL)
+		    elem->next = NULL;
+		  else
+		    {
+		      elem->next = collate->cursor->next;
+		      elem->last->next = elem;
+		      if (elem->next != NULL)
+			elem->next->last = elem;
+		    }
+		  if (collate->start == NULL)
+		    {
+		      assert (collate->cursor == NULL);
+		      collate->start = elem;
+		    }
 		  collate->cursor = elem;
 
 		 /* Add the weight value.  We take them from the
@@ -1232,10 +1276,69 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
      The multibyte case is easy.  We simply sort into an array with
      256 elements.  */
   struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
-  int mbact = 2;
-  int wcact = 2;
-  struct element_t *runp = collate->start;
+  int mbact[nrules];
+  int wcact;
+  struct element_t *runp;
+  int i;
+  int need_undefined = 0;
+
+  /* If this assertion is hit change the type in `element_t'.  */
+  assert (nrules <= sizeof (runp->used_in_level) * 8);
+
+  /* Find out which elements are used at which level.  At the same
+     time we find out whether we have any undefined symbols.  */
+  runp = collate->start;
+  while (runp != NULL)
+    {
+      if (runp->mbs != NULL)
+	{
+	  for (i = 0; i < nrules; ++i)
+	    {
+	      int j;
+
+	      for (j = 0; j < runp->weights[i].cnt; ++j)
+		/* A NULL pointer as the weight means IGNORE.  */
+		if (runp->weights[i].w[j] != NULL)
+		  {
+		    if (runp->weights[i].w[j]->weights == NULL)
+		      {
+			error_at_line (0, 0, runp->file, runp->line,
+				       _("symbol `%s' not defined"),
+				       runp->weights[i].w[j]->name);
 
+			need_undefined = 1;
+			runp->weights[i].w[j] = &collate->undefined;
+		      }
+		    else
+		      /* Set the bit for the level.  */
+		      runp->weights[i].w[j]->used_in_level |= 1 << i;
+		  }
+	    }
+	}
+
+      /* Up to the next entry.  */
+      runp = runp->next;
+    }
+
+  /* Walk through the list of defined sequences and assign weights.  Also
+     create the data structure which will allow generating the single byte
+     character based tables.
+
+     Since at each time only the weights for each of the rules are
+     only compared to other weights for this rule it is possible to
+     assign more compact weight values than simply counting all
+     weights in sequence.  We can assign weights from 2 one for each
+     rule individually and only for those elements, which are actually
+     used for this rule.
+
+     Why is this important?  It is not for the wide char table.  But
+     it is for the singlebyte output since here larger numbers have to
+     be encoded to make it possible to emit the value as a byte
+     string.  */
+  for (i = 0; i < nrules; ++i)
+    mbact[i] = 2;
+  wcact = 2;
+  runp = collate->start;
   while (runp != NULL)
     {
       if (runp->mbs != NULL)
@@ -1243,10 +1346,20 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 	  struct element_t **eptr;
 
 	  /* Determine the order.  */
-	  runp->mborder = mbact++;
+	  if (runp->used_in_level != 0)
+	    {
+	      runp->mborder = (int *) obstack_alloc (&collate->mempool,
+						     nrules * sizeof (int));
+
+	      for (i = 0; i < nrules; ++i)
+		if ((runp->used_in_level & (1 << i)) != 0)
+		  runp->mborder[i] = mbact[i]++;
+		else
+		  runp->mborder[i] = 0;
+	    }
 
 	  /* Find the point where to insert in the list.  */
-	  eptr = &collate->mbheads[(unsigned int) runp->mbs[0]];
+	  eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
 	  while (*eptr != NULL)
 	    {
 	      /* Check which string is larger, the one we want to insert
@@ -1269,6 +1382,31 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
       /* Up to the next entry.  */
       runp = runp->next;
     }
+
+  /* Find out whether any of the `mbheads' entries is unset.  In this
+     case we use the UNDEFINED entry.  */
+  for (i = 1; i < 256; ++i)
+    if (collate->mbheads[i] == NULL)
+      {
+	need_undefined = 1;
+	collate->mbheads[i] = &collate->undefined;
+      }
+
+  /* Now determine whether the UNDEFINED entry is needed and if yes,
+     whether it was defined.  */
+  if (need_undefined && collate->undefined.file == NULL)
+    {
+      error (0, 0, _("no definition of `UNDEFINED'"));
+
+      /* Add UNDEFINED at the end.  */
+      collate->undefined.mborder =
+	(int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
+
+      for (i = 0; i < nrules; ++i)
+	collate->undefined.mborder[i] = mbact[i]++;
+
+      collate->undefined.wcorder = wcact++;
+    }
 }
 
 
@@ -1276,6 +1414,33 @@ void
 collate_output (struct localedef_t *locale, struct charmap_t *charmap,
 		const char *output_path)
 {
+  struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
+  const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
+  struct iovec iov[2 + nelems];
+  struct locale_file data;
+  uint32_t idx[nelems];
+  size_t cnt;
+
+  data.magic = LIMAGIC (LC_COLLATE);
+  data.n = nelems;
+  iov[0].iov_base = (void *) &data;
+  iov[0].iov_len = sizeof (data);
+
+  iov[1].iov_base = (void *) idx;
+  iov[1].iov_len = sizeof (idx);
+
+  idx[0] = iov[0].iov_len + iov[1].iov_len;
+  cnt = 0;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_NRULES));
+  iov[2 + cnt].iov_base = &collate->nrules;
+  iov[2 + cnt].iov_len = sizeof (uint32_t);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
+
+  write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
 }
 
 
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index 5dcb013..86d0860 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -3071,11 +3071,14 @@ Computing table size for character classes might take a while..."),
 	  ctype->map32[idx][idx2] = ctype->map_collection[idx][idx2];
 
       while (idx2 < ctype->map_collection_act[idx])
-	if (ctype->map_collection[idx][idx2] != 0)
-	  *find_idx (ctype, &ctype->map32[idx],
-		     &ctype->map_collection_max[idx],
-		     &ctype->map_collection_act[idx],
-		     ctype->names[idx2]) = ctype->map_collection[idx][idx2];
+	{
+	  if (ctype->map_collection[idx][idx2] != 0)
+	    *find_idx (ctype, &ctype->map32[idx],
+		       &ctype->map_collection_max[idx],
+		       &ctype->map_collection_act[idx],
+		       ctype->names[idx2]) = ctype->map_collection[idx][idx2];
+	  ++idx2;
+	}
     }
 
   /* Extra array for class and map names.  */
diff --git a/locale/programs/locale-spec.c b/locale/programs/locale-spec.c
index 368306c..048dead 100644
--- a/locale/programs/locale-spec.c
+++ b/locale/programs/locale-spec.c
@@ -36,6 +36,7 @@ void
 locale_special (const char *name, int show_category_name,
 		int show_keyword_name)
 {
+#if 0
   /* "collate-elements": print collation elements of locale.  */
   if (strcmp (name, "collate-elements") == 0)
     {
@@ -59,7 +60,6 @@ locale_special (const char *name, int show_category_name,
 		printf ("%s<%s>", first ? "" : ";",
 			&__collate_element_strings[idx]);
 
-#if 0
 		/* We don't print the string.  This is only confusing
 		   because only the programs have to know the
 		   encoding.  The code is left in place because it
@@ -85,7 +85,6 @@ locale_special (const char *name, int show_category_name,
 
 		  putchar ('"');
 		}
-#endif
 		first = 0;
 	      }
 	}
@@ -125,4 +124,5 @@ locale_special (const char *name, int show_category_name,
       putchar ('\n');
       return;
     }
+#endif
 }
author	Ulrich Drepper <drepper@redhat.com>	1999-12-20 22:08:01 +0000
committer	Ulrich Drepper <drepper@redhat.com>	1999-12-20 22:08:01 +0000
commit	b06c53e77dd2f6598ac8c45fa8f021db1bc17145 (patch)
tree	152a936d40224a7ddb60177347650a5d3726ca10 /locale
parent	9197f26fd5948b29616a4183c00d8e2898712a05 (diff)
download	glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.zip glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.tar.gz glibc-b06c53e77dd2f6598ac8c45fa8f021db1bc17145.tar.bz2