CollationElementIterator.java, [...]: New versions from GNU classpath.

2004-06-01 Michael Koch <konqueror@gmx.de> * java/text/CollationElementIterator.java, java/text/CollationKey.java, java/text/RuleBasedCollator.java: New versions from GNU classpath. * testsuite/libjava.mauve/xfails: Removed all java.text.CollationElementIterator tests. From-SVN: r82510
author: Michael Koch <konqueror@gmx.de> 2004-05-31 22:16:31 +0000
committer: Michael Koch <mkoch@gcc.gnu.org> 2004-05-31 22:16:31 +0000
commit: 57807c317869610e07a85bf8bf95747638230ed7 (patch)
tree: 84eadd76d1df1c8b46bd05dd00699bc802f78611 /libjava/java
parent: f7dbd56c9a1e2605f54a05c2f613cf086f32634b (diff)
download: gcc-57807c317869610e07a85bf8bf95747638230ed7.zip
gcc-57807c317869610e07a85bf8bf95747638230ed7.tar.gz
gcc-57807c317869610e07a85bf8bf95747638230ed7.tar.bz2
3 files changed, 935 insertions, 287 deletions
diff --git a/libjava/java/text/CollationElementIterator.java b/libjava/java/text/CollationElementIterator.java
index 1b5a617..a4c2100 100644
--- a/libjava/java/text/CollationElementIterator.java
+++ b/libjava/java/text/CollationElementIterator.java
@@ -38,6 +38,8 @@ exception statement from your version. */
 
 package java.text;
 
+import java.util.Vector;
+
 /* Written using "Java Class Libraries", 2nd edition, plus online
  * API docs for JDK 1.2 from http://www.javasoft.com.
  * Status: Believed complete and correct to JDK 1.1.
@@ -74,13 +76,25 @@ public final class CollationElementIterator
   String text;
 
   /**
+   * This is the index into the collation decomposition where we are currently scanning.
+   */
+  int index;
+
+  /**
    * This is the index into the String where we are currently scanning.
    */
   int textIndex;
 
-  // A piece of lookahead.
-  boolean lookahead_set;
-  int lookahead;
+  /**
+   * Array containing the collation decomposition of the
+   * text given to the constructor.
+   */
+  private Object[] text_decomposition;
+
+  /**
+   * Array containing the index of the specified block.
+   */
+  private int[] text_indexes;
 
   /**
    * This method initializes a new instance of <code>CollationElementIterator</code>
@@ -97,6 +111,35 @@ public final class CollationElementIterator
     setText (text);    
   }
 
+  RuleBasedCollator.CollationElement nextBlock()
+  {
+    if (index >= text_decomposition.length)
+      return null;
+    
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+    
+    textIndex = text_indexes[index+1];
+
+    index++;
+
+    return e;
+  }
+
+  RuleBasedCollator.CollationElement previousBlock()
+  {
+    if (index == 0)
+      return null;
+    
+    index--;
+    RuleBasedCollator.CollationElement e =
+      (RuleBasedCollator.CollationElement) text_decomposition[index];
+
+    textIndex = text_indexes[index+1];
+    
+    return e;
+  }
+
   /**
    * This method returns the collation ordering value of the next character sequence
    * in the string (it may be an extended character following collation rules).
@@ -107,10 +150,29 @@ public final class CollationElementIterator
    */
   public int next()
   {
-    if (textIndex == text.length())
+    RuleBasedCollator.CollationElement e = nextBlock();
+
+    if (e == null)
       return NULLORDER;
+    
+    return e.getValue();
+  }
+
+  /**
+   * This method returns the collation ordering value of the previous character
+   * in the string.  This method will return <code>NULLORDER</code> if the
+   * beginning of the string was reached.
+   *
+   * @return The collation ordering value.
+   */
+  public int previous()
+  {
+    RuleBasedCollator.CollationElement e = previousBlock();
 
-    return collator.ceiNext (this);
+    if (e == null)
+      return NULLORDER;
+    
+    return e.getValue();
   }
 
   /**
@@ -133,9 +195,8 @@ public final class CollationElementIterator
    */
   public void reset()
   {
+    index = 0;
     textIndex = 0;
-    lookahead_set = false;
-    lookahead = 0;
   }
 
   /**
@@ -176,10 +237,152 @@ public final class CollationElementIterator
    */
   public void setText(String text)
   {
+    int idx = 0;
+    int idx_idx = 0;
+    int alreadyExpanded = 0;
+    int idxToMove = 0;
+
     this.text = text;
-    this.textIndex = 0;
-    this.lookahead_set = false;
-    this.lookahead = 0;
+    this.index = 0;
+
+    String work_text = text.intern();
+
+    Vector v = new Vector();
+    Vector vi = new Vector();
+
+    // Build element collection ordered as they come in "text".
+    while (idx < work_text.length())
+      {
+	String key, key_old;
+
+	Object object = null;
+	int p = 1;
+	
+	// IMPROVE: use a TreeMap with a prefix-ordering rule.
+	key_old = key = null;
+	do
+	  {
+	    if (object != null)
+	      key_old = key;
+	    key = work_text.substring (idx, idx+p);
+	    object = collator.prefix_tree.get (key);
+	    if (object != null && idx < alreadyExpanded)
+	      {
+		RuleBasedCollator.CollationElement prefix = (RuleBasedCollator.CollationElement)object;
+		if (prefix.expansion != null && 
+		    prefix.expansion.startsWith(work_text.substring(0, idx)))
+		{
+		  object = null;
+		  key = key_old;
+		}
+	      }
+	    p++;
+	  }
+	while (idx+p <= work_text.length());
+	
+	if (object == null)
+	  key = key_old;
+	
+	RuleBasedCollator.CollationElement prefix =
+	  (RuleBasedCollator.CollationElement) collator.prefix_tree.get (key);
+
+	/*
+	 * First case: There is no such sequence in the database.
+	 * We will have to build one from the context.
+	 */
+	if (prefix == null)
+	  {
+	    /*
+	     * We are dealing with sequences in an expansion. They
+	     * are treated as accented characters (tertiary order).
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultAccentedElement (work_text.charAt (idx));
+		
+		v.add (e);
+		vi.add (new Integer(idx_idx));
+		idx++;
+		alreadyExpanded--;
+		if (alreadyExpanded == 0)
+		  {
+		    /* There is not any characters left in the expansion set.
+		     * We can increase the pointer in the source string.
+		     */
+		    idx_idx += idxToMove;
+		    idxToMove = 0; 
+		  }
+		else
+		  idx_idx++;
+	      }
+	    else
+	      {
+		/* This is a normal character. */
+		RuleBasedCollator.CollationElement e =
+		  collator.getDefaultElement (work_text.charAt (idx));
+		Integer i_ref = new Integer(idx_idx);
+
+		/* Don't forget to mark it as a special sequence so the
+		 * string can be ordered.
+		 */
+		v.add (RuleBasedCollator.SPECIAL_UNKNOWN_SEQ);
+		vi.add (i_ref);
+		v.add (e);
+		vi.add (i_ref);
+		idx_idx++;
+		idx++;
+	      }
+	    continue;
+	  }
+ 
+	/*
+	 * Second case: Here we have found a matching sequence.
+	 * Here we have an expansion string prepend it to the "work text" and
+	 * add the corresponding sorting element. We must also mark 
+	 */
+	if (prefix.expansion != null)
+	  {
+	    work_text = prefix.expansion
+	      + work_text.substring (idx+prefix.key.length());
+	    idx = 0;
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    if (alreadyExpanded == 0)
+	      idxToMove = prefix.key.length();
+	    alreadyExpanded += prefix.expansion.length()-prefix.key.length();
+	  }
+	else
+	  {
+	    /* Third case: the simplest. We have got the prefix and it
+	     * has not to be expanded.
+	     */
+	    v.add (prefix);
+	    vi.add (new Integer(idx_idx));
+	    idx += prefix.key.length();
+	    /* If the sequence is in an expansion, we must decrease the
+	     * counter.
+	     */
+	    if (alreadyExpanded > 0)
+	      {
+		alreadyExpanded -= prefix.key.length();
+		if (alreadyExpanded == 0)
+		  {
+		    idx_idx += idxToMove;
+		    idxToMove = 0;
+		  }
+	      } else
+		idx_idx += prefix.key.length();
+	  }
+      }
+    
+    text_decomposition = v.toArray();
+    text_indexes = new int[vi.size()+1];
+    for (int i = 0; i < vi.size(); i++) 
+      {
+	text_indexes[i] = ((Integer)vi.elementAt(i)).intValue();
+      }
+    text_indexes[vi.size()] = text.length();
   }
 
   /**
@@ -215,4 +418,50 @@ public final class CollationElementIterator
   {
     return textIndex;
   }
+
+  /**
+   * This method sets the iteration index position into the current
+   * <code>String</code> to the specified value.  This value must not
+   * be negative and must not be greater than the last index position
+   * in the <code>String</code>.
+   *
+   * @param offset The new iteration index position.
+   *
+   * @exception IllegalArgumentException If the new offset is not valid.
+   */
+  public void setOffset(int offset)
+  {
+    if (offset < 0)
+      throw new IllegalArgumentException("Negative offset: " + offset);
+
+    if (offset > (text.length() - 1))
+      throw new IllegalArgumentException("Offset too large: " + offset);
+    
+    for (index = 0; index < text_decomposition.length; index++)
+      {	
+	if (offset <= text_indexes[index])
+	  break;
+      }
+    /*
+     * As text_indexes[0] == 0, we should not have to take care whether index is
+     * greater than 0. It is always.
+     */
+    if (text_indexes[index] == offset)
+      textIndex = offset;
+    else
+      textIndex = text_indexes[index-1];
+  }
+
+  /**
+   * This method returns the maximum length of any expansion sequence that
+   * ends with the specified collation order value.  (Whatever that means).
+   *
+   * @param value The collation order value
+   *
+   * @param The maximum length of an expansion sequence.
+   */
+  public int getMaxExpansion(int value)
+  {
+    return 1;
+  }
 }
diff --git a/libjava/java/text/CollationKey.java b/libjava/java/text/CollationKey.java
index abc28b2..4e6aca2 100644
--- a/libjava/java/text/CollationKey.java
+++ b/libjava/java/text/CollationKey.java
@@ -78,24 +78,13 @@ public final class CollationKey implements Comparable
   /**
    * This is the bit value for this key.
    */
-  private int[] key;
+  private byte[] key;
 
-  CollationKey(Collator collator, CollationElementIterator iter,
-	       String originalText, int strength)
+  CollationKey (Collator collator, String originalText, byte[] key)
   {
     this.collator = collator;
     this.originalText = originalText;
-
-    // Compute size of required array.
-    int size = 0;
-    while (RuleBasedCollator.next(iter, strength)
-	   != CollationElementIterator.NULLORDER)
-      ++size;
-
-    iter.reset();
-    key = new int[size];
-    for (int i = 0; i < size; i++)
-      key[i] = RuleBasedCollator.next(iter, strength);
+    this.key = key;
   }
 
   /**
@@ -205,15 +194,6 @@ public final class CollationKey implements Comparable
    */
   public byte[] toByteArray()
   {
-    byte[] r = new byte[4 * key.length];
-    int off = 0;
-    for (int i = 0; i < key.length; ++i)
-      {
-	r[off++] = (byte) ((key[i] >>> 24) & 255);
-	r[off++] = (byte) ((key[i] >>> 16) & 255);
-	r[off++] = (byte) ((key[i] >>>  8) & 255);
-	r[off++] = (byte) ((key[i]       ) & 255);
-      }
-    return r;
+    return key;
   }
 }
diff --git a/libjava/java/text/RuleBasedCollator.java b/libjava/java/text/RuleBasedCollator.java
index 64c2ac2..23f238e 100644
--- a/libjava/java/text/RuleBasedCollator.java
+++ b/libjava/java/text/RuleBasedCollator.java
@@ -38,7 +38,7 @@ exception statement from your version. */
 package java.text;
 
 import java.util.Enumeration;
-import java.util.Hashtable;
+import java.util.HashMap;
 import java.util.Vector;
 
 /* Written using "Java Class Libraries", 2nd edition, plus online
@@ -147,32 +147,104 @@ public class RuleBasedCollator extends Collator
    * This class describes what rank has a character (or a sequence of characters) 
    * in the lexicographic order. Each element in a rule has a collation element.
    */
-  final class CollationElement
+  final static class CollationElement
   {
     String key;
-    char relation;
-
-    CollationElement(String key, char relation)
+    int primary;
+    short secondary;
+    short tertiary;
+    short equality;
+    boolean ignore;
+    String expansion;
+
+    CollationElement(String key, int primary, short secondary, short tertiary,
+		     short equality, String expansion, boolean ignore)
     {
       this.key = key;
-      this.relation = relation;
+      this.primary = primary;
+      this.secondary = secondary;
+      this.tertiary = tertiary;
+      this.equality = equality;
+      this.ignore = ignore;
+      this.expansion = expansion;
+    }
+
+    final int getValue()
+    {
+      return (primary << 16) + (secondary << 8) + tertiary;
     }
   }
 
-  // True if we are using French-style accent ordering.
-  private boolean frenchAccents;
+  /**
+   * Basic collation instruction (internal format) to build the series of
+   * collation elements. It contains an instruction which specifies the new
+   * state of the generator. The sequence of instruction should not contain
+   * RESET (it is used by
+   * {@link #mergeRules(int,java.lang.String,java.util.Vector,java.util.Vector)})
+   * as a temporary state while merging two sets of instructions.
+   */
+  final static class CollationSorter
+  {
+    static final int GREATERP = 0;
+    static final int GREATERS = 1;
+    static final int GREATERT = 2;
+    static final int EQUAL = 3;
+    static final int RESET = 4;
+    static final int INVERSE_SECONDARY = 5;
+    
+    int comparisonType;
+    String textElement;
+    int hashText;
+    int offset;
+    boolean ignore;
+
+    String expansionOrdering;
+  }
 
   /**
    * This the the original rule string.
    */
   private String rules;
 
-  // This maps strings onto collation values.
-  private Hashtable map;
-  
-  // An entry in this hash means that more lookahead is required for
-  // the prefix string.
-  private Hashtable prefixes;
+  /**
+   * This is the table of collation element values
+   */
+  private Object[] ce_table;
+
+  /**
+   * Quick-prefix finder.
+   */
+  HashMap prefix_tree;
+
+  /**
+   * This is the value of the last sequence entered into
+   * <code>ce_table</code>. It is used to compute the
+   * ordering value of unspecified character.
+   */
+  private int last_primary_value;
+
+  /**
+   * This is the value of the last secondary sequence of the
+   * primary 0, entered into
+   * <code>ce_table</code>. It is used to compute the
+   * ordering value of an unspecified accented character.
+   */
+  private int last_tertiary_value;
+
+  /**
+   * This variable is true if accents need to be sorted
+   * in the other direction.
+   */
+  private boolean inverseAccentComparison;
+
+  /**
+   * This collation element is special to unknown sequence.
+   * The JDK uses it to mark and sort the characters which has
+   * no collation rules.
+   */
+  static final CollationElement SPECIAL_UNKNOWN_SEQ = 
+    new CollationElement("", (short) 32767, (short) 0, (short) 0,
+			 (short) 0, null, false);
   
   /**
    * This method initializes a new instance of <code>RuleBasedCollator</code>
@@ -192,128 +264,313 @@ public class RuleBasedCollator extends Collator
     
     this.rules = rules;
 
-    // We keep each rule in order in a vector.  At the end we traverse
-    // the vector and compute collation values from it.
-    int insertion_index = 0;
-    Vector vec = new Vector ();
+    buildCollationVector(parseString(rules));
+    buildPrefixAccess();
+  }
 
+  /**
+   * This method returns the number of common characters at the beginning
+   * of the string of the two parameters.
+   *
+   * @param prefix A string considered as a prefix to test against
+   * the other string.
+   * @param s A string to test the prefix against.
+   * @return The number of common characters.
+   */
+  static int findPrefixLength(String prefix, String s)
+  {
     int index;
-    StringBuffer argument = new StringBuffer();
-    int len = rules.length();
+    int len = prefix.length();
 
-    for (index = 0; index < len; ++index)
+    for (index = 0; index < len && index < s.length(); ++index)
       {
-	char c = rules.charAt(index);
+	if (prefix.charAt(index) != s.charAt(index))
+	  return index;
+      }
 
-	// Just skip whitespace.
-	if (Character.isWhitespace(c))
-	  continue;
 
-	// Modifier.
-	if (c == '@')
+    return index;
+  }
+
+  /**
+   * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
+   * checks whether it is possible to find an anchor point for the rules to be merged and
+   * then insert them at that precise point.
+   *
+   * @param offset Offset in the string containing rules of the beginning of the rules
+   * being merged in.
+   * @param starter Text of the rules being merged.
+   * @param main Repository of all already parsed rules.
+   * @param patch Rules to be merged into the repository.
+   * @throws ParseException if it is impossible to find an anchor point for the new rules.
+   */
+  private void mergeRules(int offset, String starter, Vector main, Vector patch)
+    throws ParseException 
+  {
+    Enumeration elements = main.elements();
+    int insertion_point = -1;
+    int max_length = 0;
+    
+    /* We must check that no rules conflict with another already present. If it
+     * is the case delete the old rule. 
+     */
+    
+    /* For the moment good old O(N^2) algorithm.
+     */
+    for (int i = 0; i < patch.size(); i++)
+      {
+	int j = 0;
+	
+	while (j < main.size())
 	  {
-	    frenchAccents = true;
-	    continue;
+	    CollationSorter rule1 = (CollationSorter) patch.elementAt(i);
+	    CollationSorter rule2 = (CollationSorter) main.elementAt(j);
+	    
+	    if (rule1.textElement.equals(rule2.textElement))
+	      main.removeElementAt(j);
+	    else
+	      j++;
 	  }
+      }
 
-	// Check for relation or reset operator.
-	if (! (c == '<' || c == ';' || c == ',' || c == '=' || c == '&'))
-	  throw new ParseException ("invalid character", index);
-
-	++index;
-	while (index < len)
+    // Find the insertion point... O(N)
+    for (int i = 0; i < main.size(); i++)
+      {
+	CollationSorter sorter = (CollationSorter) main.elementAt(i);
+	int length = findPrefixLength(starter, sorter.textElement);
+		
+	if (length > max_length)
 	  {
-	    if (! Character.isWhitespace(rules.charAt(index)))
-	      break;
-	    ++index;
+	    max_length = length;
+	    insertion_point = i+1;
 	  }
-	if (index == len)
-	  throw new ParseException ("missing argument", index);
-
-	int save = index;
-	index = text_argument (rules, index, argument);
-	if (argument.length() == 0)
-	  throw new ParseException ("invalid character", save);
-	String arg = argument.toString();
-	int item_index = -1;
-        
-        for (int j = 0; j < vec.size(); ++j)
-          {
-            CollationElement e = (CollationElement) vec.elementAt (j);
+      }
 
-            if (arg.equals (e.key))
-              {
-                item_index = j;
-                break;
-              }
-          }
+    if (insertion_point < 0)
+      throw new ParseException("no insertion point found for " + starter, offset);
+
+    if (max_length < starter.length())
+      {
+	/*
+	 * We need to expand the first entry. It must be sorted
+	 * like if it was the reference key itself (like the spec
+	 * said. So the first entry is special: the element is
+	 * replaced by the specified text element for the sorting.
+	 * This text replace the old one for comparisons. However
+	 * to preserve the behaviour we replace the first key (corresponding
+	 * to the found prefix) by a new code rightly ordered in the
+	 * sequence. The rest of the subsequence must be appended
+	 * to the end of the sequence.
+	 */
+	CollationSorter sorter = (CollationSorter) patch.elementAt(0);
+	CollationSorter expansionPrefix =
+	  (CollationSorter) main.elementAt(insertion_point-1);
 	
-	if (c != '&')
+	sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
+		
+	main.insertElementAt(sorter, insertion_point);
+	
+	/*
+	 * This is a new set of rules. Append to the list.
+	 */
+	patch.removeElementAt(0);
+	insertion_point++;
+      }
+
+    // Now insert all elements of patch at the insertion point.
+    for (int i = 0; i < patch.size(); i++)
+      main.insertElementAt(patch.elementAt(i), i+insertion_point);
+  }
+
+  /**
+   * This method parses a string and build a set of sorting instructions. The parsing
+   * may only be partial on the case the rules are to be merged sometime later.
+   * 
+   * @param stop_on_reset If this parameter is true then the parser stops when it
+   * encounters a reset instruction. In the other case, it tries to parse the subrules
+   * and merged it in the same repository.
+   * @param v Output vector for the set of instructions.
+   * @param base_offset Offset in the string to begin parsing.
+   * @param rules Rules to be parsed.
+   * @return -1 if the parser reached the end of the string, an integer representing the
+   * offset in the string at which it stopped parsing. 
+   * @throws ParseException if something turned wrong during the parsing. To get details
+   * decode the message.
+   */
+  private int subParseString(boolean stop_on_reset, Vector v,
+			     int base_offset, String rules)
+    throws ParseException
+  {
+    boolean ignoreChars = (base_offset == 0);
+    int operator = -1;
+    StringBuffer sb = new StringBuffer();
+    boolean doubleQuote = false;
+    boolean eatingChars = false;
+    boolean nextIsModifier = false;
+    boolean isModifier = false;
+    int i;
+    
+main_parse_loop:
+    for (i = 0; i < rules.length(); i++)
+      {
+	char c = rules.charAt(i);
+	int type = -1;
+	
+	if (!eatingChars &&
+	    ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
+	      continue;
+
+	isModifier = nextIsModifier;
+	nextIsModifier = false;
+
+	if (eatingChars && c != '\'')
 	  {
-	    // If the argument already appears in the vector, then we
-	    // must remove it in order to re-order.
-	    if (item_index != -1)
-	      {
-		vec.removeElementAt(item_index);
-		if (insertion_index >= item_index)
-		  --insertion_index;
-	      }
-	    CollationElement r = new CollationElement (arg, c);
-	    vec.insertElementAt(r, insertion_index);
-	    ++insertion_index;
+	    doubleQuote = false;
+	    sb.append(c);
+	    continue;
 	  }
-	else
+	if (doubleQuote && eatingChars)
 	  {
-	    // Reset.
-	    if (item_index == -1)
-	      throw
-		new ParseException ("argument to reset not previously seen",
-				    save);
-	    insertion_index = item_index + 1;
+	    sb.append(c);
+	    doubleQuote = false;
+	    continue;
 	  }
 
-	// Ugly: in this case the resulting INDEX comes from
-	// text_argument, which returns the index of the next
-	// character we should examine.
-	--index;
-      }
+	switch (c) {
+	case '!':
+	  throw new ParseException
+	    ("Modifier '!' is not yet supported by Classpath", i+base_offset);
+	case '<':
+	  type = CollationSorter.GREATERP;
+	  break;
+	case ';':
+	  type = CollationSorter.GREATERS;
+	  break;
+	case ',':
+	  type = CollationSorter.GREATERT;
+	  break;
+	case '=':
+	  type = CollationSorter.EQUAL;
+	  break;
+	case '\'':
+	  eatingChars = !eatingChars;
+	  doubleQuote = true;
+	  break;
+	case '@':
+	  if (ignoreChars)
+	    throw new ParseException
+	      ("comparison list has not yet been started. You may only use"
+	       + "(<,;=&)", i+base_offset);
+	  // Inverse the order of secondaries from now on.
+	  nextIsModifier = true;
+	  type = CollationSorter.INVERSE_SECONDARY;
+	  break;
+	case '&':
+	  type = CollationSorter.RESET;
+	  if (stop_on_reset)
+	    break main_parse_loop;
+	  break;
+	default:
+	  if (operator < 0)
+	    throw new ParseException
+	      ("operator missing at " + (i+base_offset), i+base_offset);
+	  if (!eatingChars &&
+	      ((c >= 0x21 && c <= 0x2F) 
+	       || (c >= 0x3A && c <= 0x40)
+	       || (c >= 0x5B && c <= 0x60)
+	       || (c >= 0x7B && c <= 0x7E)))
+	    throw new ParseException
+	      ("unquoted punctuation character '"+c+"'", i+base_offset);
+
+	  //type = ignoreChars ? CollationSorter.IGNORE : -1;
+	  sb.append(c);
+	  break;
+	}
 
-    // Now construct a hash table that maps strings onto their
-    // collation values.
-    int primary = 0;
-    int secondary = 0;
-    int tertiary = 0;
-    this.map = new Hashtable ();
-    this.prefixes = new Hashtable ();
-    Enumeration e = vec.elements();
-    while (e.hasMoreElements())
-      {
-	CollationElement r = (CollationElement) e.nextElement();
-	switch (r.relation)
+	if (type  < 0)
+	  continue;
+
+	if (operator < 0)
 	  {
-	  case '<':
-	    ++primary;
-	    secondary = 0;
-	    tertiary = 0;
-	    break;
-	  case ';':
-	    ++secondary;
-	    tertiary = 0;
-	    break;
-	  case ',':
-	    ++tertiary;
-	    break;
-	  case '=':
-	    break;
+	    operator = type;
+	    continue;
 	  }
-	// This must match CollationElementIterator.
-	map.put(r.key, new Integer (primary << 16
-				    | secondary << 8 | tertiary));
 
-	// Make a map of all lookaheads we might need.
-	for (int i = r.key.length() - 1; i >= 1; --i)
-	  prefixes.put(r.key.substring(0, i), Boolean.TRUE);
+	if (sb.length() == 0 && !isModifier)
+	  throw new ParseException
+	    ("text element empty at " + (i+base_offset), i+base_offset);
+
+	if (operator == CollationSorter.RESET)
+	  {
+	    /* Reposition in the sorting list at the position
+	     * indicated by the text element.
+	     */
+	    String subrules = rules.substring(i);
+	    Vector sorted_rules = new Vector();
+	    int idx;
+
+	    // Parse the subrules but do not iterate through all
+	    // sublist. This is the priviledge of the first call.
+	    idx = subParseString(true, sorted_rules, base_offset+i, subrules);
+    
+	    // Merge new parsed rules into the list.
+	    mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
+	    sb.setLength(0);
+	    
+	    // Reset state to none.
+	    operator = -1;
+	    type = -1;
+	    // We have found a new subrule at 'idx' but it has not been parsed.
+	    if (idx >= 0)
+	      {
+		i += idx-1;
+		continue main_parse_loop;
+	      }
+	    else
+		// No more rules.
+		break main_parse_loop;
+	  }
+
+	CollationSorter sorter = new CollationSorter();
+	
+	if (operator == CollationSorter.GREATERP)
+	  ignoreChars = false;
+
+	sorter.comparisonType = operator;
+	sorter.textElement = sb.toString();
+	sorter.hashText = sorter.textElement.hashCode();
+	sorter.offset = base_offset+rules.length();
+	sorter.ignore = ignoreChars;
+	sb.setLength(0);
+
+	v.add(sorter);
+	operator = type;
+      }
+
+    if (operator >= 0)
+      {
+	CollationSorter sorter = new CollationSorter();
+	int pos = rules.length() + base_offset;
+
+	if ((sb.length() != 0 && nextIsModifier)
+	    || (sb.length() == 0 && !nextIsModifier && !eatingChars))
+	  throw new ParseException("text element empty at " + pos, pos);
+
+	if (operator == CollationSorter.GREATERP)
+	  ignoreChars = false;
+
+	sorter.comparisonType = operator;
+	sorter.textElement = sb.toString();
+ 	sorter.hashText = sorter.textElement.hashCode();
+	sorter.offset = base_offset+pos;
+	sorter.ignore = ignoreChars;
+	v.add(sorter);
       }
+
+    if (i == rules.length())
+      return -1;
+    else
+      return i;
   }
 
   /**
@@ -323,90 +580,130 @@ public class RuleBasedCollator extends Collator
    */
   public Object clone()
   {
-    RuleBasedCollator c = (RuleBasedCollator) super.clone ();
-    c.map = (Hashtable) map.clone ();
-    c.prefixes = (Hashtable) map.clone ();
-    return c;
+    return super.clone();
   }
 
-  // A helper for CollationElementIterator.next().
-  int ceiNext (CollationElementIterator cei)
+  /**
+   * This method completely parses a string 'rules' containing sorting rules.
+   *
+   * @param rules String containing the rules to be parsed. 
+   * @return A set of sorting instructions stored in a Vector.
+   * @throws ParseException if something turned wrong during the parsing. To get details
+   * decode the message.
+   */
+  private Vector parseString(String rules) 
+    throws ParseException
   {
-    if (cei.lookahead_set)
-      {
-	cei.lookahead_set = false;
-	return cei.lookahead;
-      }
+    Vector v = new Vector();
 
-    int save = cei.textIndex;
-    int max = cei.text.length();
-    String s = null;
-
-    // It is possible to have a case where `abc' has a mapping, but
-    // neither `ab' nor `abd' do.  In this case we must treat `abd' as
-    // nothing special.
-    boolean found = false;
+    // result of the first subParseString is not absolute (may be -1 or a
+    // positive integer). But we do not care.
+    subParseString(false, v, 0, rules);
+    
+    return v;
+  }
 
-    int i;
-    for (i = save + 1; i <= max; ++i)
+  /**
+   * This method uses the sorting instructions built by {@link #parseString}
+   * to build collation elements which can be directly used to sort strings.
+   *
+   * @param parsedElements Parsed instructions stored in a Vector.
+   * @throws ParseException if the order of the instructions are not valid.
+   */
+  private void buildCollationVector(Vector parsedElements)
+    throws ParseException
+  {
+    int primary_seq = 0;
+    int last_tertiary_seq = 0;
+    short secondary_seq = 0;
+    short tertiary_seq = 0;
+    short equality_seq = 0;
+    boolean inverseComparisons = false;
+    final boolean DECREASING = false;
+    final boolean INCREASING = true;
+    boolean secondaryType = INCREASING;
+    Vector v = new Vector();
+
+    // elts is completely sorted.
+element_loop:
+    for (int i = 0; i < parsedElements.size(); i++)
       {
-	s = cei.text.substring(save, i);
-	if (prefixes.get(s) == null)
-	  break;
-	found = true;
-      }
-    // Assume s != null.
+	CollationSorter elt = (CollationSorter) parsedElements.elementAt(i);
+	boolean ignoreChar = false;
 
-    Object obj = map.get(s);
-    // The special case.
-    while (found && obj == null && s.length() > 1)
-      {
-	--i;
-	s = cei.text.substring(save, i);
-	obj = map.get(s);
+	switch (elt.comparisonType)
+	  {
+	  case CollationSorter.GREATERP:
+	    primary_seq++;
+	    if (inverseComparisons)
+	      {
+		secondary_seq = Short.MAX_VALUE;
+		secondaryType = DECREASING;
+	      }
+	    else
+	      {
+		secondary_seq = 0;
+		secondaryType = INCREASING;
+	      }
+	    tertiary_seq = 0;
+	    equality_seq = 0;
+	    inverseComparisons = false;
+	    break;
+	  case CollationSorter.GREATERS:
+	    if (secondaryType == DECREASING)
+	      secondary_seq--;
+	    else
+	      secondary_seq++;
+	    tertiary_seq = 0;
+	    equality_seq = 0;
+	    break;
+	  case CollationSorter.INVERSE_SECONDARY:
+	    inverseComparisons = true;
+	    continue element_loop;
+	  case CollationSorter.GREATERT:
+	    tertiary_seq++;
+	    if (primary_seq == 0)
+	      last_tertiary_seq = tertiary_seq;
+	    equality_seq = 0;
+	    break;
+	  case CollationSorter.EQUAL:
+	    equality_seq++;
+	    break;
+	  case CollationSorter.RESET:
+	    throw new ParseException
+	      ("Invalid reached state 'RESET'. Internal error", elt.offset);
+	  default:
+	    throw new ParseException
+	      ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
+	  }
+
+	v.add(new CollationElement(elt.textElement, primary_seq,
+				   secondary_seq, tertiary_seq,
+				   equality_seq, elt.expansionOrdering, elt.ignore));
       }
 
-    // Update state.
-    cei.textIndex = i;
+    this.inverseAccentComparison = inverseComparisons; 
 
-    if (obj == null)
-      {
-	// This idea, and the values, come from JDK.
-	// assert (s.length() == 1)
-	cei.lookahead_set = true;
-	cei.lookahead = s.charAt(0) << 8;
-	return 0x7fff << 16;
-      }
+    ce_table = v.toArray();
 
-    return ((Integer) obj).intValue();
+    last_primary_value = primary_seq+1;
+    last_tertiary_value = last_tertiary_seq+1;
   }
 
-  // A helper for compareTo() that returns the next character that has
-  // a nonzero ordering at the indicated strength.  This is also used
-  // in CollationKey.
-  static final int next (CollationElementIterator iter, int strength)
+  /**
+   * Build a tree where all keys are the texts of collation elements and data is
+   * the collation element itself. The tree is used when extracting all prefix
+   * for a given text.
+   */
+  private void buildPrefixAccess()
   {
-    while (true)
+    prefix_tree = new HashMap();
+
+    for (int i = 0; i < ce_table.length; i++)
       {
-	int os = iter.next();
-	if (os == CollationElementIterator.NULLORDER)
-	  return os;
-	int c = 0;
-	switch (strength)
-	  {
-	  case PRIMARY:
-	    c = os & ~0xffff;
-	    break;
-	  case SECONDARY:
-	    c = os & ~0x00ff;
-	    break;
-	  case TERTIARY:
-	  case IDENTICAL:
-	    c = os;
-	    break;
-	  }
-	if (c != 0)
-	  return c;
+	CollationElement e = (CollationElement) ce_table[i];
+
+	prefix_tree.put(e.key, e);
       }
   }
 
@@ -425,34 +722,115 @@ public class RuleBasedCollator extends Collator
   public int compare(String source, String target)
   {
     CollationElementIterator cs, ct;
+    CollationElement ord1block = null;
+    CollationElement ord2block = null;
+    boolean advance_block_1 = true;
+    boolean advance_block_2 = true;
 
-    cs = new CollationElementIterator(this, source);
-    ct = new CollationElementIterator(this, target);
+    cs = getCollationElementIterator(source);
+    ct = getCollationElementIterator(target);
 
     for(;;)
       {
-	int os = next (cs, strength);
-	int ot = next (ct, strength);
+	int ord1;
+	int ord2;
+
+	/*
+	 * We have to check whether the characters are ignorable.
+	 * If it is the case then forget them. 
+	 */
+	if (advance_block_1)
+	  {
+	    ord1block = cs.nextBlock();
+	    if (ord1block != null && ord1block.ignore)
+	      continue;
+	  }
+	
+	if (advance_block_2)
+	  {
+	    ord2block = ct.nextBlock();
+	    if (ord2block != null && ord2block.ignore)
+	      {
+	        advance_block_1 = false;
+	        continue;
+	      }
+	 }
+	else
+	  advance_block_2 = true;
 
-	if (os == CollationElementIterator.NULLORDER
-	    && ot == CollationElementIterator.NULLORDER)
-	  break;
-	else if (os == CollationElementIterator.NULLORDER)
+	if (!advance_block_1)
+	  advance_block_1 = true;
+
+	if (ord1block != null)
+	  ord1 = ord1block.getValue();
+	else
 	  {
-	    // Source string is shorter, so return "less than".
+	    if (ord2block == null)
+	      return 0;
 	    return -1;
 	  }
-	else if (ot == CollationElementIterator.NULLORDER)
+
+	if (ord2block == null)
+	  return 1;
+	
+	ord2 = ord2block.getValue();
+	
+	// We know chars are totally equal, so skip
+        if (ord1 == ord2)
 	  {
-	    // Target string is shorter, so return "greater than".
-	    return 1;
+	    if (getStrength() == IDENTICAL)
+	      if (!ord1block.key.equals(ord2block.key))
+		return ord1block.key.compareTo(ord2block.key);
+	    continue;
 	  }
 
-	if (os != ot)
-	  return os - ot;
-      }
+        // Check for primary strength differences
+        int prim1 = CollationElementIterator.primaryOrder(ord1); 
+        int prim2 = CollationElementIterator.primaryOrder(ord2); 
+	
+	if (prim1 == 0 && getStrength() < TERTIARY)
+	  {
+            advance_block_2 = false;
+	    continue;
+	  }
+	else if (prim2 == 0 && getStrength() < TERTIARY)
+	  {
+	    advance_block_1 = false;
+	    continue;
+	  }
+
+        if (prim1 < prim2)
+          return -1;
+        else if (prim1 > prim2)
+          return 1;
+        else if (getStrength() == PRIMARY)
+          continue;
+
+        // Check for secondary strength differences
+        int sec1 = CollationElementIterator.secondaryOrder(ord1);
+        int sec2 = CollationElementIterator.secondaryOrder(ord2);
+
+	if (sec1 < sec2)
+          return -1;
+        else if (sec1 > sec2)
+          return 1;
+        else if (getStrength() == SECONDARY)
+          continue;
+
+        // Check for tertiary differences
+        int tert1 = CollationElementIterator.tertiaryOrder(ord1);
+        int tert2 = CollationElementIterator.tertiaryOrder(ord2);
+
+        if (tert1 < tert2)
+          return -1;
+        else if (tert1 > tert2)
+          return 1;
+	else if (getStrength() == TERTIARY)
+	  continue;
 
-    return 0;
+	// Apparently JDK does this (at least for my test case).
+	return ord1block.key.compareTo(ord2block.key);    
+      }
   }
 
   /**
@@ -467,13 +845,54 @@ public class RuleBasedCollator extends Collator
    */
   public boolean equals(Object obj)
   {
-    if (! (obj instanceof RuleBasedCollator) || ! super.equals(obj))
+    if (obj == this)
+      return true;
+    else
       return false;
-    RuleBasedCollator rbc = (RuleBasedCollator) obj;
-    // FIXME: this is probably wrong.  Instead we should compare maps
-    // directly.
-    return (frenchAccents == rbc.frenchAccents
-	    && rules.equals(rbc.rules));
+  }
+
+  /**
+   * This method builds a default collation element without invoking
+   * the database created from the rules passed to the constructor.
+   *
+   * @param c Character which needs a collation element.
+   * @return A valid brand new CollationElement instance.
+   */
+  CollationElement getDefaultElement(char c)
+  {
+    int v;
+
+    // Preliminary support for generic accent sorting inversion (I don't know if all
+    // characters in the range should be sorted backward). This is the place
+    // to fix this if needed.
+    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+      v = 0x0361 - ((int) c - 0x02B9);
+    else
+      v = (short) c;
+    return new CollationElement("" + c, last_primary_value + v,
+				(short) 0, (short) 0, (short) 0, null, false);
+  }
+
+  /**
+   * This method builds a default collation element for an accented character
+   * without invoking the database created from the rules passed to the constructor.
+   *
+   * @param c Character which needs a collation element.
+   * @return A valid brand new CollationElement instance.
+   */
+  CollationElement getDefaultAccentedElement(char c)
+  {
+    int v;
+
+    // Preliminary support for generic accent sorting inversion (I don't know if all
+    // characters in the range should be sorted backward). This is the place
+    // to fix this if needed.
+    if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
+      v = 0x0361 - ((int) c - 0x02B9);
+    else
+      v = (short) c;
+    return new CollationElement("" + c, (short) 0,
+				(short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
   }
 
   /**
@@ -489,13 +908,7 @@ public class RuleBasedCollator extends Collator
    */
   public CollationElementIterator getCollationElementIterator(String source)
   {
-    int len = source.length();
-    StringBuffer expand = new StringBuffer(len);
-    
-    for (int index = 0; index < len; ++index)
-      decomposeCharacter(source.charAt(index), expand);
-    
-    return new CollationElementIterator(this, expand.toString());
+    return new CollationElementIterator(this, source);
   }
 
   /**
@@ -517,7 +930,7 @@ public class RuleBasedCollator extends Collator
 	 c = source.next())
       decomposeCharacter(c, expand);
 
-    return new CollationElementIterator(this, expand.toString());
+    return getCollationElementIterator(expand.toString());
   }
 
   /**
@@ -533,8 +946,52 @@ public class RuleBasedCollator extends Collator
    */
   public CollationKey getCollationKey(String source)
   {
-    return new CollationKey(this, getCollationElementIterator(source), source,
-			    strength);
+    CollationElementIterator cei = getCollationElementIterator(source);
+    Vector vect = new Vector(25);
+
+    int ord = cei.next();
+    cei.reset(); //set to start of string
+
+    while (ord != CollationElementIterator.NULLORDER)
+      {
+	// If the primary order is null, it means this is an ignorable
+	// character.
+	if (CollationElementIterator.primaryOrder(ord) == 0)
+	  {
+            ord = cei.next();
+	    continue;
+	  }
+        switch (getStrength())
+          {
+            case PRIMARY:
+	      ord = CollationElementIterator.primaryOrder(ord);
+	      break;
+	      
+            case SECONDARY:
+	      ord = CollationElementIterator.primaryOrder(ord) << 8;
+	      ord |= CollationElementIterator.secondaryOrder(ord);
+
+            default:
+               break;
+          }
+
+        vect.add(new Integer(ord)); 
+	ord = cei.next(); //increment to next key
+      }
+
+    Object[] objarr = vect.toArray();
+    byte[] key = new byte[objarr.length * 4];
+
+    for (int i = 0; i < objarr.length; i++)
+      {
+        int j = ((Integer) objarr[i]).intValue();
+        key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
+        key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
+        key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
+        key [i * 4 + 3] = (byte) (j & 0x000000FF);
+      }
+
+    return new CollationKey(this, source, key);
   }
 
   /**
@@ -555,44 +1012,6 @@ public class RuleBasedCollator extends Collator
    */
   public int hashCode()
   {
-    return (frenchAccents ? 1231 : 1237
-	    ^ rules.hashCode()
-	    ^ map.hashCode()
-	    ^ prefixes.hashCode());
+    return System.identityHashCode(this);
   }
-
-  private final boolean is_special (char c)
-  {
-    // Rules from JCL book.
-    return ((c >= 0x0021 && c <= 0x002f)
-	    || (c >= 0x003a && c <= 0x0040)
-	    || (c >= 0x005b && c <= 0x0060)
-	    || (c >= 0x007b && c <= 0x007e));
-  }
-
-  private final int text_argument (String rules, int index,
-				   StringBuffer result)
-  {
-    result.setLength(0);
-    int len = rules.length();
-    while (index < len)
-      {
-	char c = rules.charAt (index);
-	if (c == '\''
-            && index + 2 < len
-	    && rules.charAt (index + 2) == '\'')
-          {
-            result.append (rules.charAt (index + 1));
-            index += 2;
-          }
-	else if (is_special (c))
-	  return index;
-        else if (!Character.isWhitespace (c))
-          result.append (c);
-        
-        ++index;
-      }
-    return index;
-  }
-
 }
author	Michael Koch <konqueror@gmx.de>	2004-05-31 22:16:31 +0000
committer	Michael Koch <mkoch@gcc.gnu.org>	2004-05-31 22:16:31 +0000
commit	57807c317869610e07a85bf8bf95747638230ed7 (patch)
tree	84eadd76d1df1c8b46bd05dd00699bc802f78611 /libjava/java
parent	f7dbd56c9a1e2605f54a05c2f613cf086f32634b (diff)
download	gcc-57807c317869610e07a85bf8bf95747638230ed7.zip gcc-57807c317869610e07a85bf8bf95747638230ed7.tar.gz gcc-57807c317869610e07a85bf8bf95747638230ed7.tar.bz2