diff options
Diffstat (limited to 'libjava/classpath/java/awt/font/NumericShaper.java')
-rw-r--r-- | libjava/classpath/java/awt/font/NumericShaper.java | 352 |
1 files changed, 320 insertions, 32 deletions
diff --git a/libjava/classpath/java/awt/font/NumericShaper.java b/libjava/classpath/java/awt/font/NumericShaper.java index efbdcd4..add1c6a 100644 --- a/libjava/classpath/java/awt/font/NumericShaper.java +++ b/libjava/classpath/java/awt/font/NumericShaper.java @@ -39,99 +39,387 @@ exception statement from your version. */ package java.awt.font; import java.io.Serializable; +import java.lang.Character.UnicodeBlock; /** + * This class handles numeric shaping. A shaper can either be contextual + * or not. A non-contextual shaper will always translate ASCII digits + * in its input into the target Unicode range. A contextual shaper will + * change the target Unicode range depending on the characters it has + * previously processed. + * * @author Michael Koch + * @author Tom Tromey + * * @since 1.4 + * @specnote This class does not handle LIMBU or OSMANYA. + * @specnote The JDK does not seem to properly handle ranges without a + * digit zero, such as TAMIL. This implementation does. */ public final class NumericShaper implements Serializable { private static final long serialVersionUID = -8022764705923730308L; - + + /** Convenience constant representing all the valid Unicode ranges. */ public static final int ALL_RANGES = 524287; + + /** + * Constant representing the Unicode ARABIC range. Shaping done + * using this range will translate to the arabic decimal characters. + * Use EASTERN_ARABIC if you want to shape to the eastern arabic + * (also known as the extended arabic) decimal characters. + */ public static final int ARABIC = 2; + + /** Constant representing the Unicode BENGALI range. */ public static final int BENGALI = 16; + + /** Constant representing the Unicode DEVANAGARI range. */ public static final int DEVANAGARI = 8; + + /** + * Constant representing the Unicode extended arabic range. + * In Unicode there are two different sets of arabic digits; + * this selects the extended or eastern set. + */ public static final int EASTERN_ARABIC = 4; + + /** + * Constant representing the Unicode ETHIOPIC range. Note that + * there is no digit zero in this range; an ASCII digit zero + * is left unchanged when shaping to this range. + */ public static final int ETHIOPIC = 65536; + + /** + * Constant representing the Unicode EUROPEAN range. For + * contextual shaping purposes, characters in the various + * extended Latin character blocks are recognized as EUROPEAN. + */ public static final int EUROPEAN = 1; + + /** Constant representing the Unicode GUJARATI range. */ public static final int GUJARATI = 64; + + /** Constant representing the Unicode GURMUKHI range. */ public static final int GURMUKHI = 32; + + /** Constant representing the Unicode KANNADA range. */ public static final int KANNADA = 1024; + + /** Constant representing the Unicode KHMER range. */ public static final int KHMER = 131072; + + /** Constant representing the Unicode LAO range. */ public static final int LAO = 8192; + + /** Constant representing the Unicode MALAYALAM range. */ public static final int MALAYALAM = 2048; + + /** Constant representing the Unicode MONGOLIAN range. */ public static final int MONGOLIAN = 262144; + + /** Constant representing the Unicode MYANMAR range. */ public static final int MYANMAR = 32768; + + /** Constant representing the Unicode ORIYA range. */ public static final int ORIYA = 128; + + /** + * Constant representing the Unicode TAMIL range. Note that + * there is no digit zero in this range; an ASCII digit zero + * is left unchanged when shaping to this range. + */ public static final int TAMIL = 256; + + /** Constant representing the Unicode TELUGU range. */ public static final int TELUGU = 512; + + /** Constant representing the Unicode THAI range. */ public static final int THAI = 4096; + + /** Constant representing the Unicode TIBETAN range. */ public static final int TIBETAN = 16384; - private int ranges; - private int context; - - private NumericShaper (int ranges, int context) + /** + * This table holds the zero digits for each language. This is hard-coded + * because the values will not change and the table layout is tied to the + * other constants in this class in any case. In the two places where a + * language does not have a zero digit, the character immediately preceeding + * the one digit is used instead. These languages are special-cased in + * the shaping code. + */ + private static final char[] zeroDigits = { - this.ranges = ranges; - this.context = context; + '0', // EUROPEAN + '\u0660', // ARABIC + '\u06f0', // EASTERN_ARABIC + '\u0966', // DEVANAGARI + '\u09e6', // BENGALI + '\u0a66', // GURMUKHI + '\u0ae6', // GUJARATI + '\u0b66', // ORIYA + '\u0be6', // TAMIL - special case as there is no digit zero + '\u0c66', // TELUGU + '\u0ce6', // KANNADA + '\u0d66', // MALAYALAM + '\u0e50', // THAI + '\u0ed0', // LAO + '\u0f20', // TIBETAN + '\u1040', // MYANMAR + '\u1368', // ETHIOPIC - special case as there is no digit zero + '\u17e0', // KHMER + '\u1810' // MONGOLIAN + }; + + /** + * The default initial context for this shaper, specified as + * an integer from 0 to 18. + */ + private int key; + + /** + * The target ranges handled by this shaper. If the shaper + * is not contextual, the high bit of this field will be set. + * @specnote This was discovered by reading the serialization spec + */ + private int mask; + + /** + * Create a new numeric shaper. The key given is a constant from + * this class, the constructor turns it into its internal form. + * @param key the key to use, as one of the manifest constants + * @param mask a mask of languages to shape for + */ + private NumericShaper (int key, int mask) + { + // This internal form is a bit goofy, but it is specified by + // the serialization spec. + this.key = Integer.numberOfTrailingZeros(key); + this.mask = mask; } - public boolean equals (Object obj) + /** + * Return an integer representing all the languages for which this + * shaper will shape. The result is taken by "or"ing together + * the constants representing the various languages. + */ + public int getRanges () { - if (! (obj instanceof NumericShaper)) - return false; + return mask & ALL_RANGES; + } - NumericShaper tmp = (NumericShaper) obj; - - return (ranges == tmp.ranges - && context == tmp.context); + /** + * Return true if this shaper is contextual, false if it is not. + */ + public boolean isContextual () + { + return mask > 0; } - public static NumericShaper getContextualShaper (int ranges) + /** + * Shape the text in the given array. The starting context will + * be the context passed to the shaper at creation time. + * @param text the text to shape + * @param start the index of the starting character of the array + * @param count the number of characters in the array + */ + public void shape (char[] text, int start, int count) { - throw new Error ("not implemented"); + shape (text, start, count, 1 << key); } - public static NumericShaper getContextualShaper (int ranges, - int defaultContext) + /** + * Given a unicode block object, return corresponding language constant. + * If the block is not recognized, returns zero. Note that as there + * is no separate ARABIC block in Character, this case must + * be specially handled by the caller; EASTERN_ARABIC is preferred when + * both are specified. + * @param b the unicode block to classify + * @return the language constant, or zero if not recognized + */ + private int classify(UnicodeBlock b) { - throw new Error ("not implemented"); + if (b == null) + return 0; + // ARABIC is handled by the caller; from testing we know + // that EASTERN_ARABIC takes precedence. + if (b == UnicodeBlock.ARABIC) + return EASTERN_ARABIC; + if (b == UnicodeBlock.BENGALI) + return BENGALI; + if (b == UnicodeBlock.DEVANAGARI) + return DEVANAGARI; + if (b == UnicodeBlock.ETHIOPIC) + return ETHIOPIC; + if (b == UnicodeBlock.BASIC_LATIN + || b == UnicodeBlock.LATIN_1_SUPPLEMENT + || b == UnicodeBlock.LATIN_EXTENDED_A + || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL + || b == UnicodeBlock.LATIN_EXTENDED_B) + return EUROPEAN; + if (b == UnicodeBlock.GUJARATI) + return GUJARATI; + if (b == UnicodeBlock.GURMUKHI) + return GURMUKHI; + if (b == UnicodeBlock.KANNADA) + return KANNADA; + if (b == UnicodeBlock.KHMER) + return KHMER; + if (b == UnicodeBlock.LAO) + return LAO; + if (b == UnicodeBlock.MALAYALAM) + return MALAYALAM; + if (b == UnicodeBlock.MONGOLIAN) + return MONGOLIAN; + if (b == UnicodeBlock.MYANMAR) + return MYANMAR; + if (b == UnicodeBlock.ORIYA) + return ORIYA; + if (b == UnicodeBlock.TAMIL) + return TAMIL; + if (b == UnicodeBlock.TELUGU) + return TELUGU; + if (b == UnicodeBlock.THAI) + return THAI; + if (b == UnicodeBlock.TIBETAN) + return TIBETAN; + return 0; } - public int getRanges () + /** + * Shape the given text, using the indicated initial context. + * If this shaper is not a contextual shaper, then the given context + * will be ignored. + * @param text the text to shape + * @param start the index of the first character of the text to shape + * @param count the number of characters to shape in the text + * @param context the initial context + * @throws IllegalArgumentException if the initial context is invalid + */ + public void shape (char[] text, int start, int count, int context) { - return ranges; + int currentContext; + if (isContextual()) + { + if (Integer.bitCount(context) != 1 || (context & ~ALL_RANGES) != 0) + throw new IllegalArgumentException("invalid context argument"); + // If the indicated context is not one we are handling, reset it. + if ((context & mask) == 0) + currentContext = -1; + else + currentContext = Integer.numberOfTrailingZeros(context); + } + else + currentContext = key; + + for (int i = 0; i < count; ++i) + { + char c = text[start + i]; + if (c >= '0' && c <= '9') + { + if (currentContext >= 0) + { + // Shape into the current context. + if (c == '0' + && ((1 << currentContext) == TAMIL + || (1 << currentContext) == ETHIOPIC)) + { + // No digit 0 in this context; do nothing. + } + else + text[start + i] + = (char) (zeroDigits[currentContext] + c - '0'); + } + } + else if (isContextual()) + { + // if c is in a group, set currentContext; else reset it. + int group = classify(UnicodeBlock.of(c)); + // Specially handle ARABIC. + if (group == EASTERN_ARABIC && (mask & EASTERN_ARABIC) == 0 + && (mask & ARABIC) != 0) + group = ARABIC; + if ((mask & group) != 0) + { + // The character was classified as being in a group + // we recognize, and it was selected by the shaper. + // So, change the context. + currentContext = Integer.numberOfTrailingZeros(group); + } + } + } } - public static NumericShaper getShaper (int singleRange) + public boolean equals (Object obj) { - throw new Error ("not implemented"); + if (! (obj instanceof NumericShaper)) + return false; + NumericShaper tmp = (NumericShaper) obj; + return key == tmp.key && mask == tmp.mask; } public int hashCode () { - throw new Error ("not implemented"); + return key ^ mask; } - public boolean isContextual () + public String toString () { - throw new Error ("not implemented"); + // For debugging only. + return "key=" + key + "; mask=" + mask; } - public void shape (char[] text, int start, int count) + /** + * Return a non-contextual shaper which can shape to a single range. + * All ASCII digits in the input text are translated to this language. + * @param singleRange the target language + * @return a non-contextual shaper for this language + * @throws IllegalArgumentException if the argument does not name a + * single language, as specified by the constants declared in this class + */ + public static NumericShaper getShaper (int singleRange) { - shape (text, start, count, context); + if (Integer.bitCount(singleRange) != 1) + throw new IllegalArgumentException("more than one bit set in argument"); + if ((singleRange & ~ALL_RANGES) != 0) + throw new IllegalArgumentException("argument out of range"); + return new NumericShaper(singleRange, Integer.MIN_VALUE | singleRange); } - public void shape (char[] text, int start, int count, int context) + /** + * Return a contextual shaper which can shape to any of the indicated + * languages. The default initial context for this shaper is EUROPEAN. + * @param ranges the ranges to shape to + * @return a contextual shaper which will target any of these ranges + * @throws IllegalArgumentException if the argument specifies an + * unrecognized range + */ + public static NumericShaper getContextualShaper (int ranges) { - throw new Error ("not implemented"); + if ((ranges & ~ALL_RANGES) != 0) + throw new IllegalArgumentException("argument out of range"); + return new NumericShaper(EUROPEAN, ranges); } - public String toString () + /** + * Return a contextual shaper which can shape to any of the indicated + * languages. The default initial context for this shaper is given as + * an argument. + * @param ranges the ranges to shape to + * @param defaultContext the default initial context + * @return a contextual shaper which will target any of these ranges + * @throws IllegalArgumentException if the ranges argument specifies an + * unrecognized range, or if the defaultContext argument does not specify + * a single valid range + */ + public static NumericShaper getContextualShaper (int ranges, + int defaultContext) { - throw new Error ("not implemented"); + if (Integer.bitCount(defaultContext) != 1) + throw new IllegalArgumentException("more than one bit set in context"); + if ((ranges & ~ALL_RANGES) != 0 || (defaultContext & ~ALL_RANGES) != 0) + throw new IllegalArgumentException("argument out of range"); + return new NumericShaper(defaultContext, ranges); } } |