aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Tromey <tromey@redhat.com>2006-01-07 00:46:28 +0000
committerTom Tromey <tromey@gcc.gnu.org>2006-01-07 00:46:28 +0000
commit37d41553c0da489e399559baca2e3affaeda13c1 (patch)
tree81b09ce77cc017aa1cefb4f34cac3d128da8a7fb
parent2b15cf3b5076c3c460d710c013d186d3365bf516 (diff)
downloadgcc-37d41553c0da489e399559baca2e3affaeda13c1.zip
gcc-37d41553c0da489e399559baca2e3affaeda13c1.tar.gz
gcc-37d41553c0da489e399559baca2e3affaeda13c1.tar.bz2
Character.java (SIZE, [...]): New fields from Classpath.
* java/lang/Character.java (SIZE, MAX_CACHE, charCache, MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath. (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE, MAX_LOW_SURROGATE): Javadoc fixes. (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate, isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New methods from Classpath. * java/lang/String.java (codePointAt, codePointBefore, codePointCount, contains, replace): New methods from Classpath. (contentEquals): Declare. * java/lang/natString.cc (contentEquals): New method. From-SVN: r109445
-rw-r--r--libjava/ChangeLog14
-rw-r--r--libjava/java/lang/Character.java275
-rw-r--r--libjava/java/lang/String.java130
-rw-r--r--libjava/java/lang/natString.cc15
4 files changed, 426 insertions, 8 deletions
diff --git a/libjava/ChangeLog b/libjava/ChangeLog
index 76142c5..a5c5c40 100644
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,17 @@
+2006-01-06 Tom Tromey <tromey@redhat.com>
+
+ * java/lang/Character.java (SIZE, MAX_CACHE, charCache,
+ MIN_SURROGATE, MAX_SURROGATE): New fields from Classpath.
+ (MIN_HIGH_SURROGATE, MAX_HIGH_SURROGATE, MIN_LOW_SURROGATE,
+ MAX_LOW_SURROGATE): Javadoc fixes.
+ (valueOf, reverseBytes, isHighSurrogate, isLowSurrogate,
+ isSurrogatePair, toCodePoint, codePointAt, codePointBefore): New
+ methods from Classpath.
+ * java/lang/String.java (codePointAt, codePointBefore,
+ codePointCount, contains, replace): New methods from Classpath.
+ (contentEquals): Declare.
+ * java/lang/natString.cc (contentEquals): New method.
+
2005-12-26 Anthony Green <green@redhat.com>
* gnu/java/nio/SocketChannelImpl.java (read): Compute the right amount
diff --git a/libjava/java/lang/Character.java b/libjava/java/lang/Character.java
index aa29e0b..3cb73d0 100644
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
@@ -1,5 +1,5 @@
/* java.lang.Character -- Wrapper class for char, and Unicode subsets
- Copyright (C) 1998, 1999, 2001, 2002, 2005 Free Software Foundation, Inc.
+ Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -1040,6 +1040,18 @@ public final class Character implements Serializable, Comparable
public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
/**
+ * The number of bits needed to represent a <code>char</code>.
+ * @since 1.5
+ */
+ public static final int SIZE = 16;
+
+ // This caches some Character values, and is used by boxing
+ // conversions via valueOf(). We must cache at least 0..127;
+ // this constant controls how much we actually cache.
+ private static final int MAX_CACHE = 127;
+ private static Character[] charCache = new Character[MAX_CACHE + 1];
+
+ /**
* Lu = Letter, Uppercase (Informative).
*
* @since 1.1
@@ -1434,34 +1446,48 @@ public final class Character implements Serializable, Comparable
/**
- * Minimum high surrrogate code in UTF-16 encoding.
+ * Minimum high surrogate code in UTF-16 encoding.
*
* @since 1.5
*/
public static final char MIN_HIGH_SURROGATE = '\ud800';
/**
- * Maximum high surrrogate code in UTF-16 encoding.
+ * Maximum high surrogate code in UTF-16 encoding.
*
* @since 1.5
*/
public static final char MAX_HIGH_SURROGATE = '\udbff';
/**
- * Minimum low surrrogate code in UTF-16 encoding.
+ * Minimum low surrogate code in UTF-16 encoding.
*
* @since 1.5
*/
public static final char MIN_LOW_SURROGATE = '\udc00';
/**
- * Maximum low surrrogate code in UTF-16 encoding.
+ * Maximum low surrogate code in UTF-16 encoding.
*
* @since 1.5
*/
public static final char MAX_LOW_SURROGATE = '\udfff';
/**
+ * Minimum surrogate code in UTF-16 encoding.
+ *
+ * @since 1.5
+ */
+ public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+ /**
+ * Maximum low surrogate code in UTF-16 encoding.
+ *
+ * @since 1.5
+ */
+ public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
+ /**
* Grabs an attribute offset from the Unicode attribute database. The lower
* 5 bits are the character type, the next 2 bits are flags, and the top
* 9 bits are the offset into the attribute tables. Note that the top 9
@@ -2213,6 +2239,37 @@ public final class Character implements Serializable, Comparable
}
/**
+ * Returns an <code>Character</code> object wrapping the value.
+ * In contrast to the <code>Character</code> constructor, this method
+ * will cache some values. It is used by boxing conversion.
+ *
+ * @param val the value to wrap
+ * @return the <code>Character</code>
+ *
+ * @since 1.5
+ */
+ public static Character valueOf(char val)
+ {
+ if (val > MAX_CACHE)
+ return new Character(val);
+ synchronized (charCache)
+ {
+ if (charCache[val - MIN_VALUE] == null)
+ charCache[val - MIN_VALUE] = new Character(val);
+ return charCache[val - MIN_VALUE];
+ }
+ }
+
+ /**
+ * Reverse the bytes in val.
+ * @since 1.5
+ */
+ public static char reverseBytes(char val)
+ {
+ return (char) (((val >> 8) & 0xff) | ((val << 8) & 0xff00));
+ }
+
+ /**
* Converts a unicode code point to a UTF-16 representation of that
* code point.
*
@@ -2280,7 +2337,7 @@ public final class Character implements Serializable, Comparable
* Return number of 16-bit characters required to represent the given
* code point.
*
- * @param codePoint a uncode code point
+ * @param codePoint a unicode code point
*
* @return 2 if codePoint >= 0x10000, 1 otherwise.
*
@@ -2325,4 +2382,210 @@ public final class Character implements Serializable, Comparable
{
return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT;
}
+
+ /**
+ * Return true if the given character is a high surrogate.
+ * @param ch the character
+ * @return true if the character is a high surrogate character
+ *
+ * @since 1.5
+ */
+ public static boolean isHighSurrogate(char ch)
+ {
+ return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
+ }
+
+ /**
+ * Return true if the given character is a low surrogate.
+ * @param ch the character
+ * @return true if the character is a low surrogate character
+ *
+ * @since 1.5
+ */
+ public static boolean isLowSurrogate(char ch)
+ {
+ return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
+ }
+
+ /**
+ * Return true if the given characters compose a surrogate pair.
+ * This is true if the first character is a high surrogate and the
+ * second character is a low surrogate.
+ * @param ch1 the first character
+ * @param ch2 the first character
+ * @return true if the characters compose a surrogate pair
+ *
+ * @since 1.5
+ */
+ public static boolean isSurrogatePair(char ch1, char ch2)
+ {
+ return isHighSurrogate(ch1) && isLowSurrogate(ch2);
+ }
+
+ /**
+ * Given a valid surrogate pair, this returns the corresponding
+ * code point.
+ * @param high the high character of the pair
+ * @param low the low character of the pair
+ * @return the corresponding code point
+ *
+ * @since 1.5
+ */
+ public static int toCodePoint(char high, char low)
+ {
+ return ((high - MIN_HIGH_SURROGATE) << 10) + (low - MIN_LOW_SURROGATE);
+ }
+
+ /**
+ * Get the code point at the specified index in the CharSequence.
+ * This is like CharSequence#charAt(int), but if the character is
+ * the start of a surrogate pair, and there is a following
+ * character, and this character completes the pair, then the
+ * corresponding supplementary code point is returned. Otherwise,
+ * the character at the index is returned.
+ *
+ * @param sequence the CharSequence
+ * @param index the index of the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * @since 1.5
+ */
+ public static int codePointAt(CharSequence sequence, int index)
+ {
+ int len = sequence.length();
+ if (index < 0 || index >= len)
+ throw new IndexOutOfBoundsException();
+ char high = sequence.charAt(index);
+ if (! isHighSurrogate(high) || ++index >= len)
+ return high;
+ char low = sequence.charAt(index);
+ if (! isLowSurrogate(low))
+ return high;
+ return toCodePoint(high, low);
+ }
+
+ /**
+ * Get the code point at the specified index in the CharSequence.
+ * If the character is the start of a surrogate pair, and there is a
+ * following character, and this character completes the pair, then
+ * the corresponding supplementary code point is returned.
+ * Otherwise, the character at the index is returned.
+ *
+ * @param chars the character array in which to look
+ * @param index the index of the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * @since 1.5
+ */
+ public static int codePointAt(char[] chars, int index)
+ {
+ return codePointAt(chars, index, chars.length);
+ }
+
+ /**
+ * Get the code point at the specified index in the CharSequence.
+ * If the character is the start of a surrogate pair, and there is a
+ * following character within the specified range, and this
+ * character completes the pair, then the corresponding
+ * supplementary code point is returned. Otherwise, the character
+ * at the index is returned.
+ *
+ * @param chars the character array in which to look
+ * @param index the index of the codepoint to get, starting at 0
+ * @param limit the limit past which characters should not be examined
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;=
+ * limit, or if limit is negative or &gt;= the length of the array
+ * @since 1.5
+ */
+ public static int codePointAt(char[] chars, int index, int limit)
+ {
+ if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+ throw new IndexOutOfBoundsException();
+ char high = chars[index];
+ if (! isHighSurrogate(high) || ++index >= limit)
+ return high;
+ char low = chars[index];
+ if (! isLowSurrogate(low))
+ return high;
+ return toCodePoint(high, low);
+ }
+
+ /**
+ * Get the code point before the specified index. This is like
+ * #codePointAt(char[], int), but checks the characters at
+ * <code>index-1</code> and <code>index-2</code> to see if they form
+ * a supplementary code point. If they do not, the character at
+ * <code>index-1</code> is returned.
+ *
+ * @param chars the character array
+ * @param index the index just past the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * @since 1.5
+ */
+ public static int codePointBefore(char[] chars, int index)
+ {
+ return codePointBefore(chars, index, 1);
+ }
+
+ /**
+ * Get the code point before the specified index. This is like
+ * #codePointAt(char[], int), but checks the characters at
+ * <code>index-1</code> and <code>index-2</code> to see if they form
+ * a supplementary code point. If they do not, the character at
+ * <code>index-1</code> is returned. The start parameter is used to
+ * limit the range of the array which may be examined.
+ *
+ * @param chars the character array
+ * @param index the index just past the codepoint to get, starting at 0
+ * @param start the index before which characters should not be examined
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is &gt; start or &gt;
+ * the length of the array, or if limit is negative or &gt;= the
+ * length of the array
+ * @since 1.5
+ */
+ public static int codePointBefore(char[] chars, int index, int start)
+ {
+ if (index < start || index > chars.length
+ || start < 0 || start >= chars.length)
+ throw new IndexOutOfBoundsException();
+ --index;
+ char low = chars[index];
+ if (! isLowSurrogate(low) || --index < start)
+ return low;
+ char high = chars[index];
+ if (! isHighSurrogate(high))
+ return low;
+ return toCodePoint(high, low);
+ }
+
+ /**
+ * Get the code point before the specified index. This is like
+ * #codePointAt(CharSequence, int), but checks the characters at
+ * <code>index-1</code> and <code>index-2</code> to see if they form
+ * a supplementary code point. If they do not, the character at
+ * <code>index-1</code> is returned.
+ *
+ * @param sequence the CharSequence
+ * @param index the index just past the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * @since 1.5
+ */
+ public static int codePointBefore(CharSequence sequence, int index)
+ {
+ int len = sequence.length();
+ if (index < 1 || index > len)
+ throw new IndexOutOfBoundsException();
+ --index;
+ char low = sequence.charAt(index);
+ if (! isLowSurrogate(low) || --index < 0)
+ return low;
+ char high = sequence.charAt(index);
+ if (! isHighSurrogate(high))
+ return low;
+ return toCodePoint(high, low);
+ }
} // class Character
diff --git a/libjava/java/lang/String.java b/libjava/java/lang/String.java
index 95ad1fe..3e0bfbe 100644
--- a/libjava/java/lang/String.java
+++ b/libjava/java/lang/String.java
@@ -1,5 +1,5 @@
/* String.java -- immutable character sequences; the object of string literals
- Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
+ Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -455,6 +455,40 @@ public final class String implements Serializable, Comparable, CharSequence
public native char charAt(int index);
/**
+ * Get the code point at the specified index. This is like #charAt(int),
+ * but if the character is the start of a surrogate pair, and the
+ * following character completes the pair, then the corresponding
+ * supplementary code point is returned.
+ * @param index the index of the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * @since 1.5
+ */
+ public synchronized int codePointAt(int index)
+ {
+ // Use the CharSequence overload as we get better range checking
+ // this way.
+ return Character.codePointAt(this, index);
+ }
+
+ /**
+ * Get the code point before the specified index. This is like
+ * #codePointAt(int), but checks the characters at <code>index-1</code> and
+ * <code>index-2</code> to see if they form a supplementary code point.
+ * @param index the index just past the codepoint to get, starting at 0
+ * @return the codepoint at the specified index
+ * @throws IndexOutOfBoundsException if index is negative or &gt;= length()
+ * (while unspecified, this is a StringIndexOutOfBoundsException)
+ * @since 1.5
+ */
+ public synchronized int codePointBefore(int index)
+ {
+ // Use the CharSequence overload as we get better range checking
+ // this way.
+ return Character.codePointBefore(this, index);
+ }
+
+ /**
* Copies characters from this String starting at a specified start index,
* ending at a specified stop index, to a character array starting at
* a specified destination begin index.
@@ -566,6 +600,18 @@ public final class String implements Serializable, Comparable, CharSequence
public native boolean contentEquals(StringBuffer buffer);
/**
+ * Compares the given CharSequence to this String. This is true if
+ * the CharSequence has the same content as this String at this
+ * moment.
+ *
+ * @param seq the CharSequence to compare to
+ * @return true if CharSequence has the same character sequence
+ * @throws NullPointerException if the given CharSequence is null
+ * @since 1.5
+ */
+ public native boolean contentEquals(CharSequence seq);
+
+ /**
* Compares a String to this String, ignoring case. This does not handle
* multi-character capitalization exceptions; instead the comparison is
* made on a character-by-character basis, and is true if:<br><ul>
@@ -1259,6 +1305,88 @@ public final class String implements Serializable, Comparable, CharSequence
*/
public native String intern();
+ /**
+ * Return the number of code points between two indices in the
+ * <code>String</code>. An unpaired surrogate counts as a
+ * code point for this purpose. Characters outside the indicated
+ * range are not examined, even if the range ends in the middle of a
+ * surrogate pair.
+ *
+ * @param start the starting index
+ * @param end one past the ending index
+ * @return the number of code points
+ * @since 1.5
+ */
+ public synchronized int codePointCount(int start, int end)
+ {
+ if (start < 0 || end >= count || start > end)
+ throw new StringIndexOutOfBoundsException();
+
+ int count = 0;
+ while (start < end)
+ {
+ char base = charAt(start);
+ if (base < Character.MIN_HIGH_SURROGATE
+ || base > Character.MAX_HIGH_SURROGATE
+ || start == end
+ || start == count
+ || charAt(start + 1) < Character.MIN_LOW_SURROGATE
+ || charAt(start + 1) > Character.MAX_LOW_SURROGATE)
+ {
+ // Nothing.
+ }
+ else
+ {
+ // Surrogate pair.
+ ++start;
+ }
+ ++start;
+ ++count;
+ }
+ return count;
+ }
+
+ /**
+ * Returns true iff this String contains the sequence of Characters
+ * described in s.
+ * @param s the CharSequence
+ * @return true iff this String contains s
+ *
+ * @since 1.5
+ */
+ public boolean contains (CharSequence s)
+ {
+ return this.indexOf(s.toString()) != -1;
+ }
+
+ /**
+ * Returns a string that is this string with all instances of the sequence
+ * represented by <code>target</code> replaced by the sequence in
+ * <code>replacement</code>.
+ * @param target the sequence to be replaced
+ * @param replacement the sequence used as the replacement
+ * @return the string constructed as above
+ */
+ public String replace (CharSequence target, CharSequence replacement)
+ {
+ String targetString = target.toString();
+ String replaceString = replacement.toString();
+ int targetLength = target.length();
+ int replaceLength = replacement.length();
+
+ int startPos = this.indexOf(targetString);
+ StringBuilder result = new StringBuilder(this);
+ while (startPos != -1)
+ {
+ // Replace the target with the replacement
+ result.replace(startPos, startPos + targetLength, replaceString);
+
+ // Search for a new occurrence of the target
+ startPos = result.indexOf(targetString, startPos + replaceLength);
+ }
+ return result.toString();
+ }
+
private native void init(char[] chars, int offset, int count,
boolean dont_copy);
diff --git a/libjava/java/lang/natString.cc b/libjava/java/lang/natString.cc
index c8f3129..3f63081 100644
--- a/libjava/java/lang/natString.cc
+++ b/libjava/java/lang/natString.cc
@@ -1,6 +1,6 @@
// natString.cc - Implementation of java.lang.String native methods.
-/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation
+/* Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation
This file is part of libgcj.
@@ -15,6 +15,7 @@ details. */
#include <gcj/cni.h>
#include <java/lang/Character.h>
+#include <java/lang/CharSequence.h>
#include <java/lang/String.h>
#include <java/lang/IndexOutOfBoundsException.h>
#include <java/lang/ArrayIndexOutOfBoundsException.h>
@@ -564,6 +565,18 @@ java::lang::String::contentEquals(java::lang::StringBuffer* buffer)
return true;
}
+jboolean
+java::lang::String::contentEquals(java::lang::CharSequence *seq)
+{
+ if (seq->length() != count)
+ return false;
+ jchar *value = JvGetStringChars(this);
+ for (int i = 0; i < count; ++i)
+ if (value[i] != seq->charAt(i))
+ return false;
+ return true;
+}
+
jchar
java::lang::String::charAt(jint i)
{