1 files changed, 320 insertions, 32 deletions
diff --git a/libjava/classpath/java/awt/font/NumericShaper.java b/libjava/classpath/java/awt/font/NumericShaper.java
index efbdcd4..add1c6a 100644
--- a/libjava/classpath/java/awt/font/NumericShaper.java
+++ b/libjava/classpath/java/awt/font/NumericShaper.java
@@ -39,99 +39,387 @@ exception statement from your version. */
 package java.awt.font;
 
 import java.io.Serializable;
+import java.lang.Character.UnicodeBlock;
 
 /**
+ * This class handles numeric shaping.  A shaper can either be contextual
+ * or not.  A non-contextual shaper will always translate ASCII digits
+ * in its input into the target Unicode range.  A contextual shaper will
+ * change the target Unicode range depending on the characters it has
+ * previously processed.
+ *
  * @author Michael Koch
+ * @author Tom Tromey
+ *
  * @since 1.4
+ * @specnote This class does not handle LIMBU or OSMANYA.
+ * @specnote The JDK does not seem to properly handle ranges without a
+ * digit zero, such as TAMIL.  This implementation does.
  */
 public final class NumericShaper implements Serializable
 {
   private static final long serialVersionUID = -8022764705923730308L;
-  
+
+  /** Convenience constant representing all the valid Unicode ranges.  */
   public static final int ALL_RANGES  = 524287;
+
+  /**
+   * Constant representing the Unicode ARABIC range.  Shaping done
+   * using this range will translate to the arabic decimal characters.
+   * Use EASTERN_ARABIC if you want to shape to the eastern arabic
+   * (also known as the extended arabic) decimal characters.
+   */
   public static final int ARABIC  = 2;
+
+  /** Constant representing the Unicode BENGALI range.  */
   public static final int BENGALI  = 16;
+
+  /** Constant representing the Unicode DEVANAGARI range.  */
   public static final int DEVANAGARI  = 8;
+
+  /**
+   * Constant representing the Unicode extended arabic range.
+   * In Unicode there are two different sets of arabic digits;
+   * this selects the extended or eastern set.
+   */
   public static final int EASTERN_ARABIC  = 4;
+
+  /**
+   * Constant representing the Unicode ETHIOPIC range.  Note that
+   * there is no digit zero in this range; an ASCII digit zero
+   * is left unchanged when shaping to this range.
+   */
   public static final int ETHIOPIC  = 65536;
+
+  /**
+   * Constant representing the Unicode EUROPEAN range.  For
+   * contextual shaping purposes, characters in the various
+   * extended Latin character blocks are recognized as EUROPEAN.
+   */
   public static final int EUROPEAN  = 1;
+
+  /** Constant representing the Unicode GUJARATI range.  */
   public static final int GUJARATI  = 64;
+
+  /** Constant representing the Unicode GURMUKHI range.  */
   public static final int GURMUKHI  = 32;
+
+  /** Constant representing the Unicode KANNADA range.  */
   public static final int KANNADA  = 1024;
+
+  /** Constant representing the Unicode KHMER range.  */
   public static final int KHMER  = 131072;
+
+  /** Constant representing the Unicode LAO range.  */
   public static final int LAO  = 8192;
+
+  /** Constant representing the Unicode MALAYALAM range.  */
   public static final int MALAYALAM  = 2048;
+
+  /** Constant representing the Unicode MONGOLIAN range.  */
   public static final int MONGOLIAN  = 262144;
+
+  /** Constant representing the Unicode MYANMAR range.  */
   public static final int MYANMAR  = 32768;
+
+  /** Constant representing the Unicode ORIYA range.  */
   public static final int ORIYA  = 128;
+
+  /**
+   * Constant representing the Unicode TAMIL range.  Note that
+   * there is no digit zero in this range; an ASCII digit zero
+   * is left unchanged when shaping to this range.
+   */
   public static final int TAMIL  = 256;
+
+  /** Constant representing the Unicode TELUGU range.  */
   public static final int TELUGU  = 512;
+
+  /** Constant representing the Unicode THAI range.  */
   public static final int THAI  = 4096;
+
+  /** Constant representing the Unicode TIBETAN range.  */
   public static final int TIBETAN  = 16384;
 
-  private int ranges;
-  private int context;
-  
-  private NumericShaper (int ranges, int context)
+  /**
+   * This table holds the zero digits for each language.  This is hard-coded
+   * because the values will not change and the table layout is tied to the
+   * other constants in this class in any case.  In the two places where a
+   * language does not have a zero digit, the character immediately preceeding
+   * the one digit is used instead.  These languages are special-cased in
+   * the shaping code.
+   */
+  private static final char[] zeroDigits =
   {
-    this.ranges = ranges;
-    this.context = context;
+    '0',      // EUROPEAN
+    '\u0660', // ARABIC
+    '\u06f0', // EASTERN_ARABIC
+    '\u0966', // DEVANAGARI
+    '\u09e6', // BENGALI
+    '\u0a66', // GURMUKHI
+    '\u0ae6', // GUJARATI
+    '\u0b66', // ORIYA
+    '\u0be6', // TAMIL - special case as there is no digit zero
+    '\u0c66', // TELUGU
+    '\u0ce6', // KANNADA
+    '\u0d66', // MALAYALAM
+    '\u0e50', // THAI
+    '\u0ed0', // LAO
+    '\u0f20', // TIBETAN
+    '\u1040', // MYANMAR
+    '\u1368', // ETHIOPIC - special case as there is no digit zero
+    '\u17e0', // KHMER
+    '\u1810'  // MONGOLIAN
+  };
+
+  /**
+   * The default initial context for this shaper, specified as
+   * an integer from 0 to 18.
+   */
+  private int key;
+
+  /**
+   * The target ranges handled by this shaper.  If the shaper
+   * is not contextual, the high bit of this field will be set.
+   * @specnote This was discovered by reading the serialization spec
+   */
+  private int mask;
+
+  /**
+   * Create a new numeric shaper.  The key given is a constant from
+   * this class, the constructor turns it into its internal form.
+   * @param key the key to use, as one of the manifest constants
+   * @param mask a mask of languages to shape for
+   */
+  private NumericShaper (int key, int mask)
+  {
+    // This internal form is a bit goofy, but it is specified by
+    // the serialization spec.
+    this.key = Integer.numberOfTrailingZeros(key);
+    this.mask = mask;
   }
 
-  public boolean equals (Object obj)
+  /**
+   * Return an integer representing all the languages for which this
+   * shaper will shape.  The result is taken by "or"ing together
+   * the constants representing the various languages.
+   */
+  public int getRanges ()
   {
-    if (! (obj instanceof NumericShaper))
-      return false;
+    return mask & ALL_RANGES;
+  }
 
-    NumericShaper tmp = (NumericShaper) obj;
-    
-    return (ranges == tmp.ranges
-            && context == tmp.context);
+  /**
+   * Return true if this shaper is contextual, false if it is not.
+   */
+  public boolean isContextual ()
+  {
+    return mask > 0;
   }
 
-  public static NumericShaper getContextualShaper (int ranges)
+  /**
+   * Shape the text in the given array.  The starting context will
+   * be the context passed to the shaper at creation time.
+   * @param text the text to shape
+   * @param start the index of the starting character of the array
+   * @param count the number of characters in the array
+   */
+  public void shape (char[] text, int start, int count)
   {
-    throw new Error ("not implemented");
+    shape (text, start, count, 1 << key);
   }
 
-  public static NumericShaper getContextualShaper (int ranges,
-                                                   int defaultContext)
+  /**
+   * Given a unicode block object, return corresponding language constant.
+   * If the block is not recognized, returns zero.  Note that as there
+   * is no separate ARABIC block in Character, this case must
+   * be specially handled by the caller; EASTERN_ARABIC is preferred when
+   * both are specified.
+   * @param b the unicode block to classify
+   * @return the language constant, or zero if not recognized
+   */
+  private int classify(UnicodeBlock b)
   {
-    throw new Error ("not implemented");
+    if (b == null)
+      return 0;
+    // ARABIC is handled by the caller; from testing we know
+    // that EASTERN_ARABIC takes precedence.
+    if (b == UnicodeBlock.ARABIC)
+      return EASTERN_ARABIC;
+    if (b == UnicodeBlock.BENGALI)
+      return BENGALI;
+    if (b == UnicodeBlock.DEVANAGARI)
+      return DEVANAGARI;
+    if (b == UnicodeBlock.ETHIOPIC)
+      return ETHIOPIC;
+    if (b == UnicodeBlock.BASIC_LATIN
+        || b == UnicodeBlock.LATIN_1_SUPPLEMENT
+        || b == UnicodeBlock.LATIN_EXTENDED_A
+        || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
+        || b == UnicodeBlock.LATIN_EXTENDED_B)
+      return EUROPEAN;
+    if (b == UnicodeBlock.GUJARATI)
+      return GUJARATI;
+    if (b == UnicodeBlock.GURMUKHI)
+      return GURMUKHI;
+    if (b == UnicodeBlock.KANNADA)
+      return KANNADA;
+    if (b == UnicodeBlock.KHMER)
+      return KHMER;
+    if (b == UnicodeBlock.LAO)
+      return LAO;
+    if (b == UnicodeBlock.MALAYALAM)
+      return MALAYALAM;
+    if (b == UnicodeBlock.MONGOLIAN)
+      return MONGOLIAN;
+    if (b == UnicodeBlock.MYANMAR)
+      return MYANMAR;
+    if (b == UnicodeBlock.ORIYA)
+      return ORIYA;
+    if (b == UnicodeBlock.TAMIL)
+      return TAMIL;
+    if (b == UnicodeBlock.TELUGU)
+      return TELUGU;
+    if (b == UnicodeBlock.THAI)
+      return THAI;
+    if (b == UnicodeBlock.TIBETAN)
+      return TIBETAN;
+    return 0;
   }
 
-  public int getRanges ()
+  /**
+   * Shape the given text, using the indicated initial context.
+   * If this shaper is not a contextual shaper, then the given context
+   * will be ignored.
+   * @param text the text to shape
+   * @param start the index of the first character of the text to shape
+   * @param count the number of characters to shape in the text
+   * @param context the initial context
+   * @throws IllegalArgumentException if the initial context is invalid
+   */
+  public void shape (char[] text, int start, int count, int context)
   {
-    return ranges;
+    int currentContext;
+    if (isContextual())
+      {
+        if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0)
+          throw new IllegalArgumentException("invalid context argument");
+        // If the indicated context is not one we are handling, reset it.
+        if ((context & mask) == 0)
+          currentContext = -1;
+        else
+          currentContext = Integer.numberOfTrailingZeros(context);
+      }
+    else
+      currentContext = key;
+
+    for (int i = 0; i < count; ++i)
+      {
+        char c = text[start + i];
+        if (c >= '0' && c <= '9')
+          {
+            if (currentContext >= 0)
+              {
+                // Shape into the current context.
+                if (c == '0'
+                  && ((1 << currentContext) == TAMIL
+                      || (1 << currentContext) == ETHIOPIC))
+                  {
+                    // No digit 0 in this context; do nothing.
+                  }
+                else
+                  text[start + i]
+                    = (char) (zeroDigits[currentContext] + c - '0');
+              }
+          }
+        else if (isContextual())
+          {
+            // if c is in a group, set currentContext; else reset it.
+            int group = classify(UnicodeBlock.of(c));
+            // Specially handle ARABIC.
+            if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0
+                && (mask & ARABIC) != 0)
+              group = ARABIC;
+            if ((mask & group) != 0)
+              {
+                // The character was classified as being in a group
+                // we recognize, and it was selected by the shaper.
+                // So, change the context.
+                currentContext = Integer.numberOfTrailingZeros(group);
+              }
+          }
+      }
   }
 
-  public static NumericShaper getShaper (int singleRange)
+  public boolean equals (Object obj)
   {
-    throw new Error ("not implemented");
+    if (! (obj instanceof NumericShaper))
+      return false;
+    NumericShaper tmp = (NumericShaper) obj;
+    return key == tmp.key && mask == tmp.mask;
   }
 
   public int hashCode ()
   {
-    throw new Error ("not implemented");
+    return key ^ mask;
   }
 
-  public boolean isContextual ()
+  public String toString ()
   {
-    throw new Error ("not implemented");
+    // For debugging only.
+    return "key=" + key + "; mask=" + mask;
   }
 
-  public void shape (char[] text, int start, int count)
+  /**
+   * Return a non-contextual shaper which can shape to a single range.
+   * All ASCII digits in the input text are translated to this language.
+   * @param singleRange the target language
+   * @return a non-contextual shaper for this language
+   * @throws IllegalArgumentException if the argument does not name a
+   * single language, as specified by the constants declared in this class
+   */
+  public static NumericShaper getShaper (int singleRange)
   {
-    shape (text, start, count, context);
+    if (Integer.bitCount(singleRange) != 1)
+      throw new IllegalArgumentException("more than one bit set in argument");
+    if ((singleRange & ~ALL_RANGES) != 0)
+      throw new IllegalArgumentException("argument out of range");
+    return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange);
   }
 
-  public void shape (char[] text, int start, int count, int context)
+  /**
+   * Return a contextual shaper which can shape to any of the indicated
+   * languages.  The default initial context for this shaper is EUROPEAN.
+   * @param ranges the ranges to shape to
+   * @return a contextual shaper which will target any of these ranges
+   * @throws IllegalArgumentException if the argument specifies an
+   * unrecognized range
+   */
+  public static NumericShaper getContextualShaper (int ranges)
   {
-    throw new Error ("not implemented");
+    if ((ranges & ~ALL_RANGES) != 0)
+      throw new IllegalArgumentException("argument out of range");
+    return new NumericShaper(EUROPEAN, ranges);
   }
 
-  public String toString ()
+  /**
+   * Return a contextual shaper which can shape to any of the indicated
+   * languages.  The default initial context for this shaper is given as
+   * an argument.
+   * @param ranges the ranges to shape to
+   * @param defaultContext the default initial context
+   * @return a contextual shaper which will target any of these ranges
+   * @throws IllegalArgumentException if the ranges argument specifies an
+   * unrecognized range, or if the defaultContext argument does not specify
+   * a single valid range
+   */
+  public static NumericShaper getContextualShaper (int ranges,
+                                                   int defaultContext)
   {
-    throw new Error ("not implemented");
+    if (Integer.bitCount(defaultContext) != 1)
+      throw new IllegalArgumentException("more than one bit set in context");
+    if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0)
+      throw new IllegalArgumentException("argument out of range");
+    return new NumericShaper(defaultContext, ranges);
   }
 }