aboutsummaryrefslogtreecommitdiff
path: root/libjava/java/lang/Character.java
diff options
context:
space:
mode:
authorMark Wielaard <mark@klomp.org>2007-03-05 17:27:44 +0000
committerTom Tromey <tromey@gcc.gnu.org>2007-03-05 17:27:44 +0000
commit666ff4f65d256dec0974941605cf76db0b138a76 (patch)
tree7094a7844e13e49f92832894c3b7a7f8ff638d8b /libjava/java/lang/Character.java
parentb48a45922ddcdfd0aa2cd3ff24c7c158d20c1f2e (diff)
downloadgcc-666ff4f65d256dec0974941605cf76db0b138a76.zip
gcc-666ff4f65d256dec0974941605cf76db0b138a76.tar.gz
gcc-666ff4f65d256dec0974941605cf76db0b138a76.tar.bz2
Character.java: Re-merged with Classpath.
2007-03-05 Mark Wielaard <mark@klomp.org> * java/lang/Character.java: Re-merged with Classpath. * java/lang/natString.cc (nativeCompareTo): Renamed from compareTo. * java/lang/StringBuilder.java: Re-merged with Classpath. * java/lang/String.java: Re-merged with Classpath. (nativeCompareTo): Renamed from compareTo. * java/lang/StringBuffer.java: Re-merged with Classpath. * jni.cc (_Jv_JNI_GetAnyMethodID): Split calls to append. From-SVN: r122560
Diffstat (limited to 'libjava/java/lang/Character.java')
-rw-r--r--libjava/java/lang/Character.java448
1 files changed, 316 insertions, 132 deletions
diff --git a/libjava/java/lang/Character.java b/libjava/java/lang/Character.java
index ec6d2a4..0cc9d3c 100644
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
@@ -1,5 +1,6 @@
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
- Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006, 2007
+ Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -54,7 +55,7 @@ import java.util.Locale;
/**
* Wrapper class for the primitive char data type. In addition, this class
* allows one to retrieve property information and perform transformations
- * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
+ * on the defined characters in the Unicode Standard, Version 4.0.0.
* java.lang.Character is designed to be very dynamic, and as such, it
* retrieves information on the Unicode character set from a separate
* database, gnu.java.lang.CharData, which can be easily upgraded.
@@ -62,7 +63,7 @@ import java.util.Locale;
* <p>For predicates, boundaries are used to describe
* the set of characters for which the method will return true.
* This syntax uses fairly normal regular expression notation.
- * See 5.13 of the Unicode Standard, Version 3.0, for the
+ * See 5.13 of the Unicode Standard, Version 4.0, for the
* boundary specification.
*
* <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
@@ -72,10 +73,11 @@ import java.util.Locale;
* @author Paul N. Fisher
* @author Jochen Hoenicke
* @author Eric Blake (ebb9@email.byu.edu)
+ * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
* @since 1.0
- * @status updated to 1.4
+ * @status partly updated to 1.5; some things still missing
*/
-public final class Character implements Serializable, Comparable
+public final class Character implements Serializable, Comparable<Character>
{
/**
* A subset of Unicode blocks.
@@ -160,10 +162,8 @@ public final class Character implements Serializable, Comparable
/** The canonical name of the block according to the Unicode standard. */
private final String canonicalName;
- /** Constants for the <code>forName()</code> method */
- private static final int CANONICAL_NAME = 0;
- private static final int NO_SPACES_NAME = 1;
- private static final int CONSTANT_NAME = 2;
+ /** Enumeration for the <code>forName()</code> method */
+ private enum NameType { CANONICAL, NO_SPACES, CONSTANT; };
/**
* Constructor for strictly defined blocks.
@@ -173,7 +173,7 @@ public final class Character implements Serializable, Comparable
* @param name the block name
*/
private UnicodeBlock(int start, int end, String name,
- String canonicalName)
+ String canonicalName)
{
super(name);
this.start = start;
@@ -207,8 +207,8 @@ public final class Character implements Serializable, Comparable
public static UnicodeBlock of(int codePoint)
{
if (codePoint > MAX_CODE_POINT)
- throw new IllegalArgumentException("The supplied integer value is " +
- "too large to be a codepoint.");
+ throw new IllegalArgumentException("The supplied integer value is " +
+ "too large to be a codepoint.");
// Simple binary search for the correct block.
int low = 0;
int hi = sets.length - 1;
@@ -262,59 +262,51 @@ public final class Character implements Serializable, Comparable
*/
public static final UnicodeBlock forName(String blockName)
{
- int type;
+ NameType type;
if (blockName.indexOf(' ') != -1)
- type = CANONICAL_NAME;
+ type = NameType.CANONICAL;
else if (blockName.indexOf('_') != -1)
- type = CONSTANT_NAME;
+ type = NameType.CONSTANT;
else
- type = NO_SPACES_NAME;
+ type = NameType.NO_SPACES;
Collator usCollator = Collator.getInstance(Locale.US);
usCollator.setStrength(Collator.PRIMARY);
/* Special case for deprecated blocks not in sets */
switch (type)
{
- case CANONICAL_NAME:
+ case CANONICAL:
if (usCollator.compare(blockName, "Surrogates Area") == 0)
return SURROGATES_AREA;
break;
- case NO_SPACES_NAME:
+ case NO_SPACES:
if (usCollator.compare(blockName, "SurrogatesArea") == 0)
return SURROGATES_AREA;
break;
- case CONSTANT_NAME:
+ case CONSTANT:
if (usCollator.compare(blockName, "SURROGATES_AREA") == 0)
return SURROGATES_AREA;
break;
}
/* Other cases */
- int setLength = sets.length;
switch (type)
{
- case CANONICAL_NAME:
- for (int i = 0; i < setLength; i++)
- {
- UnicodeBlock block = sets[i];
- if (usCollator.compare(blockName, block.canonicalName) == 0)
- return block;
- }
+ case CANONICAL:
+ for (UnicodeBlock block : sets)
+ if (usCollator.compare(blockName, block.canonicalName) == 0)
+ return block;
break;
- case NO_SPACES_NAME:
- for (int i = 0; i < setLength; i++)
- {
- UnicodeBlock block = sets[i];
- String nsName = block.canonicalName.replaceAll(" ","");
- if (usCollator.compare(blockName, nsName) == 0)
- return block;
- }
- break;
- case CONSTANT_NAME:
- for (int i = 0; i < setLength; i++)
- {
- UnicodeBlock block = sets[i];
- if (usCollator.compare(blockName, block.toString()) == 0)
- return block;
- }
+ case NO_SPACES:
+ for (UnicodeBlock block : sets)
+ {
+ String nsName = block.canonicalName.replaceAll(" ","");
+ if (usCollator.compare(blockName, nsName) == 0)
+ return block;
+ }
+ break;
+ case CONSTANT:
+ for (UnicodeBlock block : sets)
+ if (usCollator.compare(blockName, block.toString()) == 0)
+ return block;
break;
}
throw new IllegalArgumentException("No Unicode block found for " +
@@ -1517,10 +1509,11 @@ public final class Character implements Serializable, Comparable
* this. These are also returned from calls to <code>of(int)</code>
* and <code>of(char)</code>.
*/
+ @Deprecated
public static final UnicodeBlock SURROGATES_AREA
= new UnicodeBlock(0xD800, 0xDFFF,
"SURROGATES_AREA",
- "Surrogates Area");
+ "Surrogates Area");
/**
* The defined subsets.
@@ -1699,11 +1692,78 @@ public final class Character implements Serializable, Comparable
public static final char MAX_VALUE = '\uFFFF';
/**
+ * The minimum Unicode 4.0 code point. This value is <code>0</code>.
+ * @since 1.5
+ */
+ public static final int MIN_CODE_POINT = 0;
+
+ /**
+ * The maximum Unicode 4.0 code point, which is greater than the range
+ * of the char data type.
+ * This value is <code>0x10FFFF</code>.
+ * @since 1.5
+ */
+ public static final int MAX_CODE_POINT = 0x10FFFF;
+
+ /**
+ * The minimum Unicode high surrogate code unit, or
+ * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+ * This value is <code>'\uD800'</code>.
+ * @since 1.5
+ */
+ public static final char MIN_HIGH_SURROGATE = '\uD800';
+
+ /**
+ * The maximum Unicode high surrogate code unit, or
+ * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+ * This value is <code>'\uDBFF'</code>.
+ * @since 1.5
+ */
+ public static final char MAX_HIGH_SURROGATE = '\uDBFF';
+
+ /**
+ * The minimum Unicode low surrogate code unit, or
+ * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+ * This value is <code>'\uDC00'</code>.
+ * @since 1.5
+ */
+ public static final char MIN_LOW_SURROGATE = '\uDC00';
+
+ /**
+ * The maximum Unicode low surrogate code unit, or
+ * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+ * This value is <code>'\uDFFF'</code>.
+ * @since 1.5
+ */
+ public static final char MAX_LOW_SURROGATE = '\uDFFF';
+
+ /**
+ * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
+ * This value is <code>'\uD800'</code>.
+ * @since 1.5
+ */
+ public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+ /**
+ * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
+ * This value is <code>'\uDFFF'</code>.
+ * @since 1.5
+ */
+ public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
+ /**
+ * The lowest possible supplementary Unicode code point (the first code
+ * point outside the basic multilingual plane (BMP)).
+ * This value is <code>0x10000</code>.
+ */
+ public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
+
+ /**
* Class object representing the primitive char data type.
*
* @since 1.1
*/
- public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
+ public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');
/**
* The number of bits needed to represent a <code>char</code>.
@@ -2089,71 +2149,6 @@ public final class Character implements Serializable, Comparable
private static final int MIRROR_MASK = 0x40;
/**
- * Min value for supplementary code point.
- *
- * @since 1.5
- */
- public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
-
- /**
- * Min value for code point.
- *
- * @since 1.5
- */
- public static final int MIN_CODE_POINT = 0;
-
-
- /**
- * Max value for code point.
- *
- * @since 1.5
- */
- public static final int MAX_CODE_POINT = 0x010ffff;
-
-
- /**
- * Minimum high surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MIN_HIGH_SURROGATE = '\ud800';
-
- /**
- * Maximum high surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MAX_HIGH_SURROGATE = '\udbff';
-
- /**
- * Minimum low surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MIN_LOW_SURROGATE = '\udc00';
-
- /**
- * Maximum low surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MAX_LOW_SURROGATE = '\udfff';
-
- /**
- * Minimum surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
-
- /**
- * Maximum low surrogate code in UTF-16 encoding.
- *
- * @since 1.5
- */
- public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
-
- /**
* Grabs an attribute offset from the Unicode attribute database. The lower
* 5 bits are the character type, the next 2 bits are flags, and the top
* 9 bits are the offset into the attribute tables. Note that the top 9
@@ -2504,6 +2499,209 @@ public final class Character implements Serializable, Comparable
| (1 << MODIFIER_LETTER)
| (1 << OTHER_LETTER))) != 0;
}
+
+ /**
+ * Returns the index into the given CharSequence that is offset
+ * <code>codePointOffset</code> code points from <code>index</code>.
+ * @param seq the CharSequence
+ * @param index the start position in the CharSequence
+ * @param codePointOffset the number of code points offset from the start
+ * position
+ * @return the index into the CharSequence that is codePointOffset code
+ * points offset from index
+ *
+ * @throws NullPointerException if seq is null
+ * @throws IndexOutOfBoundsException if index is negative or greater than the
+ * length of the sequence.
+ * @throws IndexOutOfBoundsException if codePointOffset is positive and the
+ * subsequence from index to the end of seq has fewer than codePointOffset
+ * code points
+ * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+ * subsequence from the start of seq to index has fewer than
+ * (-codePointOffset) code points
+ * @since 1.5
+ */
+ public static int offsetByCodePoints(CharSequence seq,
+ int index,
+ int codePointOffset)
+ {
+ int len = seq.length();
+ if (index < 0 || index > len)
+ throw new IndexOutOfBoundsException();
+
+ int numToGo = codePointOffset;
+ int offset = index;
+ int adjust = 1;
+ if (numToGo >= 0)
+ {
+ for (; numToGo > 0; offset++)
+ {
+ numToGo--;
+ if (Character.isHighSurrogate(seq.charAt(offset))
+ && (offset + 1) < len
+ && Character.isLowSurrogate(seq.charAt(offset + 1)))
+ offset++;
+ }
+ return offset;
+ }
+ else
+ {
+ numToGo *= -1;
+ for (; numToGo > 0;)
+ {
+ numToGo--;
+ offset--;
+ if (Character.isLowSurrogate(seq.charAt(offset))
+ && (offset - 1) >= 0
+ && Character.isHighSurrogate(seq.charAt(offset - 1)))
+ offset--;
+ }
+ return offset;
+ }
+ }
+
+ /**
+ * Returns the index into the given char subarray that is offset
+ * <code>codePointOffset</code> code points from <code>index</code>.
+ * @param a the char array
+ * @param start the start index of the subarray
+ * @param count the length of the subarray
+ * @param index the index to be offset
+ * @param codePointOffset the number of code points offset from <code>index
+ * </code>
+ * @return the index into the char array
+ *
+ * @throws NullPointerException if a is null
+ * @throws IndexOutOfBoundsException if start or count is negative or if
+ * start + count is greater than the length of the array
+ * @throws IndexOutOfBoundsException if index is less than start or larger
+ * than start + count
+ * @throws IndexOutOfBoundsException if codePointOffset is positive and the
+ * subarray from index to start + count - 1 has fewer than codePointOffset
+ * code points.
+ * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+ * subarray from start to index - 1 has fewer than (-codePointOffset) code
+ * points
+ * @since 1.5
+
+ */
+ public static int offsetByCodePoints(char[] a,
+ int start,
+ int count,
+ int index,
+ int codePointOffset)
+ {
+ int len = a.length;
+ int end = start + count;
+ if (start < 0 || count < 0 || end > len || index < start || index > end)
+ throw new IndexOutOfBoundsException();
+
+ int numToGo = codePointOffset;
+ int offset = index;
+ int adjust = 1;
+ if (numToGo >= 0)
+ {
+ for (; numToGo > 0; offset++)
+ {
+ numToGo--;
+ if (Character.isHighSurrogate(a[offset])
+ && (offset + 1) < len
+ && Character.isLowSurrogate(a[offset + 1]))
+ offset++;
+ }
+ return offset;
+ }
+ else
+ {
+ numToGo *= -1;
+ for (; numToGo > 0;)
+ {
+ numToGo--;
+ offset--;
+ if (Character.isLowSurrogate(a[offset])
+ && (offset - 1) >= 0
+ && Character.isHighSurrogate(a[offset - 1]))
+ offset--;
+ if (offset < start)
+ throw new IndexOutOfBoundsException();
+ }
+ return offset;
+ }
+
+ }
+
+ /**
+ * Returns the number of Unicode code points in the specified range of the
+ * given CharSequence. The first char in the range is at position
+ * beginIndex and the last one is at position endIndex - 1. Paired
+ * surrogates (supplementary characters are represented by a pair of chars -
+ * one from the high surrogates and one from the low surrogates)
+ * count as just one code point.
+ * @param seq the CharSequence to inspect
+ * @param beginIndex the beginning of the range
+ * @param endIndex the end of the range
+ * @return the number of Unicode code points in the given range of the
+ * sequence
+ * @throws NullPointerException if seq is null
+ * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
+ * larger than the length of seq, or if beginIndex is greater than endIndex.
+ * @since 1.5
+ */
+ public static int codePointCount(CharSequence seq, int beginIndex,
+ int endIndex)
+ {
+ int len = seq.length();
+ if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
+ throw new IndexOutOfBoundsException();
+
+ int count = 0;
+ for (int i = beginIndex; i < endIndex; i++)
+ {
+ count++;
+ // If there is a pairing, count it only once.
+ if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
+ && isLowSurrogate(seq.charAt(i + 1)))
+ i ++;
+ }
+ return count;
+ }
+
+ /**
+ * Returns the number of Unicode code points in the specified range of the
+ * given char array. The first char in the range is at position
+ * offset and the length of the range is count. Paired surrogates
+ * (supplementary characters are represented by a pair of chars -
+ * one from the high surrogates and one from the low surrogates)
+ * count as just one code point.
+ * @param a the char array to inspect
+ * @param offset the beginning of the range
+ * @param count the length of the range
+ * @return the number of Unicode code points in the given range of the
+ * array
+ * @throws NullPointerException if a is null
+ * @throws IndexOutOfBoundsException if offset or count is negative or if
+ * offset + countendIndex is larger than the length of a.
+ * @since 1.5
+ */
+ public static int codePointCount(char[] a, int offset,
+ int count)
+ {
+ int len = a.length;
+ int end = offset + count;
+ if (offset < 0 || count < 0 || end > len)
+ throw new IndexOutOfBoundsException();
+
+ int counter = 0;
+ for (int i = offset; i < end; i++)
+ {
+ counter++;
+ // If there is a pairing, count it only once.
+ if (isHighSurrogate(a[i]) && (i + 1) < end
+ && isLowSurrogate(a[i + 1]))
+ i ++;
+ }
+ return counter;
+ }
/**
* Determines if a character is a Unicode letter or a Unicode digit. This
@@ -3497,30 +3695,13 @@ public final class Character implements Serializable, Comparable
}
/**
- * Compares an object to this Character. Assuming the object is a
- * Character object, this method performs the same comparison as
- * compareTo(Character).
- *
- * @param o object to compare
- * @return the comparison value
- * @throws ClassCastException if o is not a Character object
- * @throws NullPointerException if o is null
- * @see #compareTo(Character)
- * @since 1.2
- */
- public int compareTo(Object o)
- {
- return compareTo((Character) o);
- }
-
- /**
* Returns an <code>Character</code> object wrapping the value.
* In contrast to the <code>Character</code> constructor, this method
* will cache some values. It is used by boxing conversion.
*
* @param val the value to wrap
* @return the <code>Character</code>
- *
+ *
* @since 1.5
*/
public static Character valueOf(char val)
@@ -3529,9 +3710,9 @@ public final class Character implements Serializable, Comparable
return new Character(val);
synchronized (charCache)
{
- if (charCache[val - MIN_VALUE] == null)
- charCache[val - MIN_VALUE] = new Character(val);
- return charCache[val - MIN_VALUE];
+ if (charCache[val - MIN_VALUE] == null)
+ charCache[val - MIN_VALUE] = new Character(val);
+ return charCache[val - MIN_VALUE];
}
}
@@ -3559,6 +3740,9 @@ public final class Character implements Serializable, Comparable
*/
public static char[] toChars(int codePoint)
{
+ if (!isValidCodePoint(codePoint))
+ throw new IllegalArgumentException("Illegal Unicode code point : "
+ + codePoint);
char[] result = new char[charCount(codePoint)];
int ignore = toChars(codePoint, result, 0);
return result;
@@ -3776,7 +3960,7 @@ public final class Character implements Serializable, Comparable
*/
public static int codePointAt(char[] chars, int index, int limit)
{
- if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+ if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
throw new IndexOutOfBoundsException();
char high = chars[index];
if (! isHighSurrogate(high) || ++index >= limit)