diff options
Diffstat (limited to 'libjava/classpath/gnu/java/text')
9 files changed, 640 insertions, 640 deletions
diff --git a/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java b/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java index c2aae96..2a89ae0 100644 --- a/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java +++ b/libjava/classpath/gnu/java/text/AttributedFormatBuffer.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -56,13 +56,13 @@ public class AttributedFormatBuffer implements FormatBuffer private final ArrayList ranges; private final ArrayList attributes; private int[] a_ranges; - private HashMap[] a_attributes; + private HashMap[] a_attributes; private int startingRange; AttributedCharacterIterator.Attribute defaultAttr; /** * This constructor accepts a StringBuffer. If the buffer contains - * already some characters they will not be attributed. + * already some characters they will not be attributed. */ public AttributedFormatBuffer(CPStringBuilder buffer) { @@ -72,8 +72,8 @@ public class AttributedFormatBuffer implements FormatBuffer this.defaultAttr = null; if (buffer.length() != 0) { - this.startingRange = buffer.length(); - addAttribute(buffer.length(), null); + this.startingRange = buffer.length(); + addAttribute(buffer.length(), null); } else this.startingRange = -1; @@ -96,16 +96,16 @@ public class AttributedFormatBuffer implements FormatBuffer * * @param new_range A new range to insert in the list. * @param attr A new attribute to insert in the list. - */ + */ private final void addAttribute(int new_range, AttributedCharacterIterator.Attribute attr) { HashMap map; if (attr != null) { - map = new HashMap(); - map.put(attr, attr); - attributes.add(map); + map = new HashMap(); + map.put(attr, attr); + attributes.add(map); } else attributes.add(null); @@ -119,7 +119,7 @@ public class AttributedFormatBuffer implements FormatBuffer startingRange = 0; buffer.append(s); } - + public void append(String s, AttributedCharacterIterator.Attribute attr) { setDefaultAttribute(attr); @@ -135,11 +135,11 @@ public class AttributedFormatBuffer implements FormatBuffer setDefaultAttribute(null); if (ranges != null) { - for (int i = 0; i < ranges.length; i++) - { - this.ranges.add(new Integer(ranges[i] + curPos)); - this.attributes.add(attrs[i]); - } + for (int i = 0; i < ranges.length; i++) + { + this.ranges.add(new Integer(ranges[i] + curPos)); + this.attributes.add(attrs[i]); + } } startingRange = buffer.length(); buffer.append(s); @@ -168,7 +168,7 @@ public class AttributedFormatBuffer implements FormatBuffer if (startingRange != currentPos && startingRange >= 0) { - addAttribute(currentPos, defaultAttr); + addAttribute(currentPos, defaultAttr); } defaultAttr = attr; startingRange = currentPos; @@ -212,7 +212,7 @@ public class AttributedFormatBuffer implements FormatBuffer a_ranges = new int[ranges.size()]; for (int i = 0; i < a_ranges.length; i++) a_ranges[i] = ((Integer)(ranges.get (i))).intValue(); - + a_attributes = new HashMap[attributes.size()]; System.arraycopy(attributes.toArray(), 0, a_attributes, 0, a_attributes.length); } @@ -239,7 +239,7 @@ public class AttributedFormatBuffer implements FormatBuffer } /** - * This method returns the array containing the map on the + * This method returns the array containing the map on the * attributes. * * @return An array of {@link java.util.Map} containing the attributes. diff --git a/libjava/classpath/gnu/java/text/BaseBreakIterator.java b/libjava/classpath/gnu/java/text/BaseBreakIterator.java index 4afd8ae..b69f698 100644 --- a/libjava/classpath/gnu/java/text/BaseBreakIterator.java +++ b/libjava/classpath/gnu/java/text/BaseBreakIterator.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -70,7 +70,7 @@ public abstract class BaseBreakIterator extends BreakIterator /** * Return the first boundary after <code>pos</code>. - * This has the side effect of setting the index of the + * This has the side effect of setting the index of the * CharacterIterator. */ public int following (int pos) @@ -98,19 +98,19 @@ public abstract class BaseBreakIterator extends BreakIterator int r = iter.getIndex (); if (n > 0) { - while (n > 0 && r != DONE) - { - r = next (); - --n; - } + while (n > 0 && r != DONE) + { + r = next (); + --n; + } } else if (n < 0) { - while (n < 0 && r != DONE) - { - r = previous (); - ++n; - } + while (n < 0 && r != DONE) + { + r = previous (); + ++n; + } } return r; } diff --git a/libjava/classpath/gnu/java/text/CharacterBreakIterator.java b/libjava/classpath/gnu/java/text/CharacterBreakIterator.java index 5274543..565eb9b 100644 --- a/libjava/classpath/gnu/java/text/CharacterBreakIterator.java +++ b/libjava/classpath/gnu/java/text/CharacterBreakIterator.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -111,44 +111,44 @@ public class CharacterBreakIterator extends BaseBreakIterator char c; for (char prev = CharacterIterator.DONE; iter.getIndex() < end; prev = c) { - c = iter.next(); - if (c == CharacterIterator.DONE) - break; - int type = Character.getType(c); - - // Break after paragraph separators. - if (type == Character.PARAGRAPH_SEPARATOR) - break; - - // Now we need some lookahead. - char ahead = iter.next(); - iter.previous(); - if (ahead == CharacterIterator.DONE) - break; - int aheadType = Character.getType(ahead); - - if (aheadType != Character.NON_SPACING_MARK - && ! isLowSurrogate (ahead) - && ! isLVT (ahead)) - break; - if (! isLVT (c) && isLVT (ahead)) - break; - if (isL (c) && ! isLVT (ahead) - && aheadType != Character.NON_SPACING_MARK) - break; - if (isV (c) && ! isV (ahead) && !isT (ahead) - && aheadType != Character.NON_SPACING_MARK) - break; - if (isT (c) && ! isT (ahead) - && aheadType != Character.NON_SPACING_MARK) - break; - - if (! isHighSurrogate (c) && isLowSurrogate (ahead)) - break; - if (isHighSurrogate (c) && ! isLowSurrogate (ahead)) - break; - if (! isHighSurrogate (prev) && isLowSurrogate (c)) - break; + c = iter.next(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR) + break; + + // Now we need some lookahead. + char ahead = iter.next(); + iter.previous(); + if (ahead == CharacterIterator.DONE) + break; + int aheadType = Character.getType(ahead); + + if (aheadType != Character.NON_SPACING_MARK + && ! isLowSurrogate (ahead) + && ! isLVT (ahead)) + break; + if (! isLVT (c) && isLVT (ahead)) + break; + if (isL (c) && ! isLVT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + if (isV (c) && ! isV (ahead) && !isT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + if (isT (c) && ! isT (ahead) + && aheadType != Character.NON_SPACING_MARK) + break; + + if (! isHighSurrogate (c) && isLowSurrogate (ahead)) + break; + if (isHighSurrogate (c) && ! isLowSurrogate (ahead)) + break; + if (! isHighSurrogate (prev) && isLowSurrogate (c)) + break; } return iter.getIndex(); @@ -161,51 +161,51 @@ public class CharacterBreakIterator extends BaseBreakIterator while (iter.getIndex() >= iter.getBeginIndex()) { - char c = iter.previous(); - if (c == CharacterIterator.DONE) - break; - int type = Character.getType(c); - - if (type != Character.NON_SPACING_MARK - && ! isLowSurrogate (c) - && ! isLVT (c)) - break; - - // Now we need some lookahead. - char ahead = iter.previous(); - if (ahead == CharacterIterator.DONE) - { - iter.next(); - break; - } - char ahead2 = iter.previous(); - iter.next(); - iter.next(); - if (ahead2 == CharacterIterator.DONE) - break; - int aheadType = Character.getType(ahead); - - if (aheadType == Character.PARAGRAPH_SEPARATOR) - break; - - if (isLVT (c) && ! isLVT (ahead)) - break; - if (! isLVT (c) && type != Character.NON_SPACING_MARK - && isL (ahead)) - break; - if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK - && isV (ahead)) - break; - if (! isT (c) && type != Character.NON_SPACING_MARK - && isT (ahead)) - break; - - if (isLowSurrogate (c) && ! isHighSurrogate (ahead)) - break; - if (! isLowSurrogate (c) && isHighSurrogate (ahead)) - break; - if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2)) - break; + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + if (type != Character.NON_SPACING_MARK + && ! isLowSurrogate (c) + && ! isLVT (c)) + break; + + // Now we need some lookahead. + char ahead = iter.previous(); + if (ahead == CharacterIterator.DONE) + { + iter.next(); + break; + } + char ahead2 = iter.previous(); + iter.next(); + iter.next(); + if (ahead2 == CharacterIterator.DONE) + break; + int aheadType = Character.getType(ahead); + + if (aheadType == Character.PARAGRAPH_SEPARATOR) + break; + + if (isLVT (c) && ! isLVT (ahead)) + break; + if (! isLVT (c) && type != Character.NON_SPACING_MARK + && isL (ahead)) + break; + if (! isV (c) && ! isT (c) && type != Character.NON_SPACING_MARK + && isV (ahead)) + break; + if (! isT (c) && type != Character.NON_SPACING_MARK + && isT (ahead)) + break; + + if (isLowSurrogate (c) && ! isHighSurrogate (ahead)) + break; + if (! isLowSurrogate (c) && isHighSurrogate (ahead)) + break; + if (isLowSurrogate (ahead) && ! isHighSurrogate (ahead2)) + break; } return iter.getIndex(); diff --git a/libjava/classpath/gnu/java/text/FormatBuffer.java b/libjava/classpath/gnu/java/text/FormatBuffer.java index e6b6820..590b16c 100644 --- a/libjava/classpath/gnu/java/text/FormatBuffer.java +++ b/libjava/classpath/gnu/java/text/FormatBuffer.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -42,14 +42,14 @@ import java.util.HashMap; /** * This interface describes a modifiable buffer which contains attributed * characters. The implementation may or may not implements attributes. It - * aims to greatly simplify and clarify the implementation of java.text + * aims to greatly simplify and clarify the implementation of java.text * formatters. The buffer may be appended or have its tail cut. It may also * be completely cleant up. * * @author Guilhem Lavaux <guilhem@kaffe.org> * @date April 10, 2004 */ -public interface FormatBuffer +public interface FormatBuffer { /** * This method appends a simple string to the buffer. This part of @@ -72,8 +72,8 @@ public interface FormatBuffer /** * This method appends a simple string to the buffer. This part of * the buffer will be attributed using the specified ranges and attributes. - * To have an example on how to specify ranges see {@link gnu.java.text.FormatCharacterIterator}. - * + * To have an example on how to specify ranges see {@link gnu.java.text.FormatCharacterIterator}. + * * @param s The string to append to the buffer. * @param ranges The ranges describing how the attributes should be applied * to the string. @@ -121,7 +121,7 @@ public interface FormatBuffer * @param length Number of characters to cut at the end of the buffer. */ public void cutTail(int length); - + /** * This method resets completely the buffer. */ diff --git a/libjava/classpath/gnu/java/text/FormatCharacterIterator.java b/libjava/classpath/gnu/java/text/FormatCharacterIterator.java index 60773aa..889394c 100644 --- a/libjava/classpath/gnu/java/text/FormatCharacterIterator.java +++ b/libjava/classpath/gnu/java/text/FormatCharacterIterator.java @@ -1,4 +1,4 @@ -/* FormatCharacter.java -- Implementation of AttributedCharacterIterator for +/* FormatCharacter.java -- Implementation of AttributedCharacterIterator for formatters. Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. @@ -8,7 +8,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -78,7 +78,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator } /** - * This constructor take a string <code>s</code>, a set of ranges + * This constructor take a string <code>s</code>, a set of ranges * and the corresponding attributes. This is used to build an iterator. * The array <code>ranges</code> should be formatted as follow: * each element of <code>ranges</code> specifies the index in the string @@ -98,10 +98,10 @@ public class FormatCharacterIterator implements AttributedCharacterIterator this.ranges = ranges; this.attributes = attributes; } - - /* + + /* * The following methods are inherited from AttributedCharacterIterator, - * and thus are already documented. + * and thus are already documented. */ public Set getAllAttributeKeys() @@ -111,7 +111,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator else return new HashSet(); } - + public Map getAttributes() { if (attributes != null && attributes[attributeIndex] != null) @@ -119,7 +119,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator else return new HashMap(); } - + public Object getAttribute (AttributedCharacterIterator.Attribute attrib) { if (attributes != null && attributes[attributeIndex] != null) @@ -127,7 +127,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator else return null; } - + public int getRunLimit(Set reqAttrs) { if (attributes == null) @@ -138,19 +138,19 @@ public class FormatCharacterIterator implements AttributedCharacterIterator do { - currentAttrIndex++; - if (currentAttrIndex == attributes.length) - return formattedString.length(); - if (attributes[currentAttrIndex] == null) - break; - newKeys = attributes[currentAttrIndex].keySet(); + currentAttrIndex++; + if (currentAttrIndex == attributes.length) + return formattedString.length(); + if (attributes[currentAttrIndex] == null) + break; + newKeys = attributes[currentAttrIndex].keySet(); } while (newKeys.containsAll (reqAttrs)); return ranges[currentAttrIndex-1]; } - - public int getRunLimit (AttributedCharacterIterator.Attribute attribute) + + public int getRunLimit (AttributedCharacterIterator.Attribute attribute) { Set s = new HashSet(); @@ -164,38 +164,38 @@ public class FormatCharacterIterator implements AttributedCharacterIterator return formattedString.length(); if (attributes[attributeIndex] == null) { - for (int i=attributeIndex+1;i<attributes.length;i++) - if (attributes[i] != null) - return ranges[i-1]; - return formattedString.length(); + for (int i=attributeIndex+1;i<attributes.length;i++) + if (attributes[i] != null) + return ranges[i-1]; + return formattedString.length(); } return getRunLimit (attributes[attributeIndex].keySet()); } - + public int getRunStart (Set reqAttrs) { if (attributes == null) return formattedString.length(); - + int currentAttrIndex = attributeIndex; Set newKeys = null; do { - if (currentAttrIndex == 0) - return 0; + if (currentAttrIndex == 0) + return 0; - currentAttrIndex--; - if (attributes[currentAttrIndex] == null) - break; - newKeys = attributes[currentAttrIndex].keySet(); + currentAttrIndex--; + if (attributes[currentAttrIndex] == null) + break; + newKeys = attributes[currentAttrIndex].keySet(); } while (newKeys.containsAll (reqAttrs)); - + return (currentAttrIndex > 0) ? ranges[currentAttrIndex-1] : 0; - } - + } + public int getRunStart() { if (attributes == null) @@ -203,19 +203,19 @@ public class FormatCharacterIterator implements AttributedCharacterIterator if (attributes[attributeIndex] == null) { - for (int i=attributeIndex;i>0;i--) - if (attributes[i] != null) - return ranges[attributeIndex-1]; - return 0; + for (int i=attributeIndex;i>0;i--) + if (attributes[i] != null) + return ranges[attributeIndex-1]; + return 0; } return getRunStart (attributes[attributeIndex].keySet()); } - - public int getRunStart (AttributedCharacterIterator.Attribute attribute) + + public int getRunStart (AttributedCharacterIterator.Attribute attribute) { Set s = new HashSet(); - + s.add (attribute); return getRunStart (s); } @@ -224,7 +224,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator { return new FormatCharacterIterator (formattedString, ranges, attributes); } - + /* * The following methods are inherited from CharacterIterator and thus * are already documented. @@ -234,29 +234,29 @@ public class FormatCharacterIterator implements AttributedCharacterIterator { return formattedString.charAt (charIndex); } - + public char first() { charIndex = 0; attributeIndex = 0; return formattedString.charAt (0); } - + public int getBeginIndex() { return 0; } - + public int getEndIndex() { return formattedString.length(); } - + public int getIndex() { return charIndex; } - + public char last() { charIndex = formattedString.length()-1; @@ -264,53 +264,53 @@ public class FormatCharacterIterator implements AttributedCharacterIterator attributeIndex = attributes.length-1; return formattedString.charAt (charIndex); } - + public char next() { charIndex++; if (charIndex >= formattedString.length()) { - charIndex = getEndIndex(); - return DONE; + charIndex = getEndIndex(); + return DONE; } if (attributes != null) { - if (charIndex >= ranges[attributeIndex]) - attributeIndex++; + if (charIndex >= ranges[attributeIndex]) + attributeIndex++; } return formattedString.charAt (charIndex); } - + public char previous() { charIndex--; if (charIndex < 0) { - charIndex = 0; - return DONE; + charIndex = 0; + return DONE; } - + if (attributes != null) { - if (charIndex < ranges[attributeIndex]) - attributeIndex--; + if (charIndex < ranges[attributeIndex]) + attributeIndex--; } return formattedString.charAt (charIndex); } - + public char setIndex (int position) { if (position < 0 || position > formattedString.length()) throw new IllegalArgumentException ("position is out of range"); - + charIndex = position; if (attributes != null) { - for (attributeIndex=0;attributeIndex<attributes.length; - attributeIndex++) - if (ranges[attributeIndex] > charIndex) - break; - attributeIndex--; + for (attributeIndex=0;attributeIndex<attributes.length; + attributeIndex++) + if (ranges[attributeIndex] > charIndex) + break; + attributeIndex--; } if (charIndex == formattedString.length()) return DONE; @@ -334,64 +334,64 @@ public class FormatCharacterIterator implements AttributedCharacterIterator int i = 0, j = 0; debug("merging " + attributes.length + " attrs"); - + while (i < this.ranges.length && j < ranges.length) { - if (this.attributes[i] != null) - { - new_attributes.add (this.attributes[i]); - if (attributes[j] != null) - this.attributes[i].putAll (attributes[j]); - } - else - { - new_attributes.add (attributes[j]); - } - if (this.ranges[i] == ranges[j]) - { - new_ranges.add (new Integer (ranges[j])); - i++; - j++; - } - else if (this.ranges[i] < ranges[j]) - { - new_ranges.add (new Integer (this.ranges[i])); - i++; - } - else - { - new_ranges.add (new Integer (ranges[j])); - j++; - } + if (this.attributes[i] != null) + { + new_attributes.add (this.attributes[i]); + if (attributes[j] != null) + this.attributes[i].putAll (attributes[j]); + } + else + { + new_attributes.add (attributes[j]); + } + if (this.ranges[i] == ranges[j]) + { + new_ranges.add (new Integer (ranges[j])); + i++; + j++; + } + else if (this.ranges[i] < ranges[j]) + { + new_ranges.add (new Integer (this.ranges[i])); + i++; + } + else + { + new_ranges.add (new Integer (ranges[j])); + j++; + } } - + if (i != this.ranges.length) { - for (;i<this.ranges.length;i++) - { - new_attributes.add (this.attributes[i]); - new_ranges.add (new Integer (this.ranges[i])); - } + for (;i<this.ranges.length;i++) + { + new_attributes.add (this.attributes[i]); + new_ranges.add (new Integer (this.ranges[i])); + } } if (j != ranges.length) { - for (;j<ranges.length;j++) - { - new_attributes.add (attributes[j]); - new_ranges.add (new Integer (ranges[j])); - } + for (;j<ranges.length;j++) + { + new_attributes.add (attributes[j]); + new_ranges.add (new Integer (ranges[j])); + } } this.attributes = new HashMap[new_attributes.size()]; this.ranges = new int[new_ranges.size()]; System.arraycopy (new_attributes.toArray(), 0, this.attributes, - 0, this.attributes.length); + 0, this.attributes.length); for (i=0;i<new_ranges.size();i++) { - this.ranges[i] = ((Integer)new_ranges.elementAt (i)).intValue(); + this.ranges[i] = ((Integer)new_ranges.elementAt (i)).intValue(); } - + dumpTable(); } @@ -410,22 +410,22 @@ public class FormatCharacterIterator implements AttributedCharacterIterator do { - formattedString = formattedString + String.valueOf (c); - // TODO: Reduce the size of the output array. - more_attributes.add (iterator.getAttributes()); - more_ranges.add (new Integer (formattedString.length())); - // END TOOD - c = iterator.next(); - } + formattedString = formattedString + String.valueOf (c); + // TODO: Reduce the size of the output array. + more_attributes.add (iterator.getAttributes()); + more_ranges.add (new Integer (formattedString.length())); + // END TOOD + c = iterator.next(); + } while (c != DONE); HashMap[] new_attributes = new HashMap[attributes.length - + more_attributes.size()]; + + more_attributes.size()]; int[] new_ranges = new int[ranges.length + more_ranges.size()]; - + System.arraycopy (attributes, 0, new_attributes, 0, attributes.length); System.arraycopy (more_attributes.toArray(), 0, new_attributes, - attributes.length, more_attributes.size()); + attributes.length, more_attributes.size()); System.arraycopy (ranges, 0, new_ranges, 0, ranges.length); Object[] new_ranges_array = more_ranges.toArray(); @@ -458,7 +458,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator ranges = new_ranges; attributes = new_attributes; - } + } /** * This method appends a string without attributes. It is completely @@ -470,7 +470,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator public void append (String text) { append (text, null); - } + } /** * This method adds a set of attributes to a range of character. The @@ -481,7 +481,7 @@ public class FormatCharacterIterator implements AttributedCharacterIterator * @param range_start Lower bound of the range of characters which will receive the * attribute. * @param range_end Upper bound of the range of characters which will receive the - * attribute. + * attribute. * * @throws IllegalArgumentException if ranges are out of bounds. */ @@ -502,30 +502,30 @@ public class FormatCharacterIterator implements AttributedCharacterIterator private void dumpTable() { int start_range = 0; - + if (!DEBUG) return; System.out.println("Dumping internal table:"); for (int i = 0; i < ranges.length; i++) { - System.out.print("\t" + start_range + " => " + ranges[i] + ":"); - if (attributes[i] == null) - System.out.println("null"); - else - { - Set keyset = attributes[i].keySet(); - if (keyset != null) - { - Iterator keys = keyset.iterator(); - - while (keys.hasNext()) - System.out.print(" " + keys.next()); - } - else - System.out.println("keySet null"); - System.out.println(); - } + System.out.print("\t" + start_range + " => " + ranges[i] + ":"); + if (attributes[i] == null) + System.out.println("null"); + else + { + Set keyset = attributes[i].keySet(); + if (keyset != null) + { + Iterator keys = keyset.iterator(); + + while (keys.hasNext()) + System.out.print(" " + keys.next()); + } + else + System.out.println("keySet null"); + System.out.println(); + } } System.out.println(); System.out.flush(); diff --git a/libjava/classpath/gnu/java/text/LineBreakIterator.java b/libjava/classpath/gnu/java/text/LineBreakIterator.java index ad07479..7e44121 100644 --- a/libjava/classpath/gnu/java/text/LineBreakIterator.java +++ b/libjava/classpath/gnu/java/text/LineBreakIterator.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -65,23 +65,23 @@ public class LineBreakIterator extends BaseBreakIterator // Some methods to tell us different properties of characters. private final boolean isNb (char c) { - return (c == 0x00a0 // NO-BREAK SPACE - || c == 0x2011 // NON-BREAKING HYPHEN - || c == 0xfeff); // ZERO WITH NO-BREAK SPACE + return (c == 0x00a0 // NO-BREAK SPACE + || c == 0x2011 // NON-BREAKING HYPHEN + || c == 0xfeff); // ZERO WITH NO-BREAK SPACE } private final boolean isClose (int type) { return (type == Character.END_PUNCTUATION - // Unicode book says "comma, period, ...", which I take to - // mean "Po" class. - || type == Character.OTHER_PUNCTUATION); + // Unicode book says "comma, period, ...", which I take to + // mean "Po" class. + || type == Character.OTHER_PUNCTUATION); } private final boolean isIdeo (char c) { - return (c >= 0x3040 && c <= 0x309f // Hiragana - || c >= 0x30a0 && c <= 0x30ff // Katakana - || c >= 0x4e00 && c <= 0x9fff // Han - || c >= 0x3100 && c <= 0x312f); // Bopomofo + return (c >= 0x3040 && c <= 0x309f // Hiragana + || c >= 0x30a0 && c <= 0x30ff // Katakana + || c >= 0x4e00 && c <= 0x9fff // Han + || c >= 0x3100 && c <= 0x312f); // Bopomofo } public int next () @@ -92,54 +92,54 @@ public class LineBreakIterator extends BaseBreakIterator while (iter.getIndex() < end) { - char c = iter.current(); - int type = Character.getType(c); - - char n = iter.next(); - - if (n == CharacterIterator.DONE - || type == Character.PARAGRAPH_SEPARATOR - || type == Character.LINE_SEPARATOR) - break; - - // Handle two cases where we must scan for non-spacing marks. - int start = iter.getIndex(); - if (type == Character.SPACE_SEPARATOR - || type == Character.START_PUNCTUATION - || isIdeo (c)) - { - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.NON_SPACING_MARK) - n = iter.next(); - if (n == CharacterIterator.DONE) - break; - - if (type == Character.SPACE_SEPARATOR) - { - int nt = Character.getType(n); - if (nt != Character.NON_SPACING_MARK - && nt != Character.SPACE_SEPARATOR - && ! isNb (n)) - break; - } - else if (type == Character.START_PUNCTUATION) - { - if (isIdeo (n)) - { - // Open punctuation followed by non spacing marks - // and then ideograph does not have a break in - // it. So skip all this. - start = iter.getIndex(); - } - } - else - { - // Ideograph preceded this character. - if (isClose (Character.getType(n))) - break; - } - } - iter.setIndex(start); + char c = iter.current(); + int type = Character.getType(c); + + char n = iter.next(); + + if (n == CharacterIterator.DONE + || type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Handle two cases where we must scan for non-spacing marks. + int start = iter.getIndex(); + if (type == Character.SPACE_SEPARATOR + || type == Character.START_PUNCTUATION + || isIdeo (c)) + { + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + if (type == Character.SPACE_SEPARATOR) + { + int nt = Character.getType(n); + if (nt != Character.NON_SPACING_MARK + && nt != Character.SPACE_SEPARATOR + && ! isNb (n)) + break; + } + else if (type == Character.START_PUNCTUATION) + { + if (isIdeo (n)) + { + // Open punctuation followed by non spacing marks + // and then ideograph does not have a break in + // it. So skip all this. + start = iter.getIndex(); + } + } + else + { + // Ideograph preceded this character. + if (isClose (Character.getType(n))) + break; + } + } + iter.setIndex(start); } return iter.getIndex(); @@ -153,40 +153,40 @@ public class LineBreakIterator extends BaseBreakIterator while (iter.getIndex() >= start) { - char c = iter.previous(); - if (c == CharacterIterator.DONE) - break; - int type = Character.getType(c); - - char n = iter.previous(); - if (n == CharacterIterator.DONE) - break; - iter.next(); - - int nt = Character.getType(n); - // Break after paragraph separators. - if (nt == Character.PARAGRAPH_SEPARATOR - || nt == Character.LINE_SEPARATOR) - break; - - // Skip non-spacing marks. - int init = iter.getIndex(); - while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK) - { - n = iter.previous(); - nt = Character.getType(n); - } - - if (nt == Character.SPACE_SEPARATOR - && type != Character.SPACE_SEPARATOR - && type != Character.NON_SPACING_MARK - && ! isNb (c)) - break; - if (! isClose (type) && isIdeo (n)) - break; - if (isIdeo (c) && nt != Character.START_PUNCTUATION) - break; - iter.setIndex(init); + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + + int nt = Character.getType(n); + // Break after paragraph separators. + if (nt == Character.PARAGRAPH_SEPARATOR + || nt == Character.LINE_SEPARATOR) + break; + + // Skip non-spacing marks. + int init = iter.getIndex(); + while (n != CharacterIterator.DONE && nt == Character.NON_SPACING_MARK) + { + n = iter.previous(); + nt = Character.getType(n); + } + + if (nt == Character.SPACE_SEPARATOR + && type != Character.SPACE_SEPARATOR + && type != Character.NON_SPACING_MARK + && ! isNb (c)) + break; + if (! isClose (type) && isIdeo (n)) + break; + if (isIdeo (c) && nt != Character.START_PUNCTUATION) + break; + iter.setIndex(init); } return iter.getIndex(); diff --git a/libjava/classpath/gnu/java/text/SentenceBreakIterator.java b/libjava/classpath/gnu/java/text/SentenceBreakIterator.java index f91d269..4da9df2 100644 --- a/libjava/classpath/gnu/java/text/SentenceBreakIterator.java +++ b/libjava/classpath/gnu/java/text/SentenceBreakIterator.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -70,66 +70,66 @@ public class SentenceBreakIterator extends BaseBreakIterator while (iter.getIndex() < end) { - char c = iter.current(); - if (c == CharacterIterator.DONE) - break; - int type = Character.getType(c); - - char n = iter.next(); - if (n == CharacterIterator.DONE) - break; - - // Always break after paragraph separator. - if (type == Character.PARAGRAPH_SEPARATOR) - break; - - if (c == '!' || c == '?') - { - // Skip close punctuation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.END_PUNCTUATION) - n = iter.next(); - // Skip (java) space, line and paragraph separators. - while (n != CharacterIterator.DONE && Character.isWhitespace(n)) - n = iter.next(); - - // There's always a break somewhere after `!' or `?'. - break; - } - - if (c == '.') - { - int save = iter.getIndex(); - // Skip close punctuation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.END_PUNCTUATION) - n = iter.next(); - // Skip (java) space, line and paragraph separators. - // We keep count because we need at least one for this period to - // represent a terminator. - int spcount = 0; - while (n != CharacterIterator.DONE && Character.isWhitespace(n)) - { - n = iter.next(); - ++spcount; - } - if (spcount > 0) - { - int save2 = iter.getIndex(); - // Skip over open puncutation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.START_PUNCTUATION) - n = iter.next(); - // Next character must not be lower case. - if (n == CharacterIterator.DONE - || ! Character.isLowerCase(n)) - { - iter.setIndex(save2); - break; - } - } - iter.setIndex(save); - } + char c = iter.current(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + // Always break after paragraph separator. + if (type == Character.PARAGRAPH_SEPARATOR) + break; + + if (c == '!' || c == '?') + { + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.next(); + // Skip (java) space, line and paragraph separators. + while (n != CharacterIterator.DONE && Character.isWhitespace(n)) + n = iter.next(); + + // There's always a break somewhere after `!' or `?'. + break; + } + + if (c == '.') + { + int save = iter.getIndex(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.next(); + // Skip (java) space, line and paragraph separators. + // We keep count because we need at least one for this period to + // represent a terminator. + int spcount = 0; + while (n != CharacterIterator.DONE && Character.isWhitespace(n)) + { + n = iter.next(); + ++spcount; + } + if (spcount > 0) + { + int save2 = iter.getIndex(); + // Skip over open puncutation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.START_PUNCTUATION) + n = iter.next(); + // Next character must not be lower case. + if (n == CharacterIterator.DONE + || ! Character.isLowerCase(n)) + { + iter.setIndex(save2); + break; + } + } + iter.setIndex(save); + } } return iter.getIndex(); @@ -143,86 +143,86 @@ public class SentenceBreakIterator extends BaseBreakIterator while (iter.getIndex() >= start) { - char c = iter.previous(); - if (c == CharacterIterator.DONE) - break; - - char n = iter.previous(); - if (n == CharacterIterator.DONE) - break; - iter.next(); - int nt = Character.getType(n); - - if (! Character.isLowerCase(c) - && (nt == Character.START_PUNCTUATION - || Character.isWhitespace(n))) - { - int save = iter.getIndex(); - int save_nt = nt; - char save_n = n; - // Skip open punctuation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.START_PUNCTUATION) - n = iter.previous(); - if (n == CharacterIterator.DONE) - break; - if (Character.isWhitespace(n)) - { - // Must have at least one (java) space after the `.'. - int save2 = iter.getIndex(); - while (n != CharacterIterator.DONE - && Character.isWhitespace(n)) - n = iter.previous(); - // Skip close punctuation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.END_PUNCTUATION) - n = iter.previous(); - if (n == CharacterIterator.DONE || n == '.') - { - // Communicate location of actual end. - period = iter.getIndex(); - iter.setIndex(save2); - break; - } - } - iter.setIndex(save); - nt = save_nt; - n = save_n; - } - - if (nt == Character.PARAGRAPH_SEPARATOR) - { - // Communicate location of actual end. - period = iter.getIndex(); - break; - } - else if (Character.isWhitespace(n) - || nt == Character.END_PUNCTUATION) - { - int save = iter.getIndex(); - // Skip (java) space, line and paragraph separators. - while (n != CharacterIterator.DONE - && Character.isWhitespace(n)) - n = iter.previous(); - // Skip close punctuation. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.END_PUNCTUATION) - n = iter.previous(); - int here = iter.getIndex(); - iter.setIndex(save); - if (n == CharacterIterator.DONE || n == '!' || n == '?') - { - // Communicate location of actual end. - period = here; - break; - } - } - else if (n == '!' || n == '?') - { - // Communicate location of actual end. - period = iter.getIndex(); - break; - } + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + int nt = Character.getType(n); + + if (! Character.isLowerCase(c) + && (nt == Character.START_PUNCTUATION + || Character.isWhitespace(n))) + { + int save = iter.getIndex(); + int save_nt = nt; + char save_n = n; + // Skip open punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.START_PUNCTUATION) + n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + if (Character.isWhitespace(n)) + { + // Must have at least one (java) space after the `.'. + int save2 = iter.getIndex(); + while (n != CharacterIterator.DONE + && Character.isWhitespace(n)) + n = iter.previous(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.previous(); + if (n == CharacterIterator.DONE || n == '.') + { + // Communicate location of actual end. + period = iter.getIndex(); + iter.setIndex(save2); + break; + } + } + iter.setIndex(save); + nt = save_nt; + n = save_n; + } + + if (nt == Character.PARAGRAPH_SEPARATOR) + { + // Communicate location of actual end. + period = iter.getIndex(); + break; + } + else if (Character.isWhitespace(n) + || nt == Character.END_PUNCTUATION) + { + int save = iter.getIndex(); + // Skip (java) space, line and paragraph separators. + while (n != CharacterIterator.DONE + && Character.isWhitespace(n)) + n = iter.previous(); + // Skip close punctuation. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.END_PUNCTUATION) + n = iter.previous(); + int here = iter.getIndex(); + iter.setIndex(save); + if (n == CharacterIterator.DONE || n == '!' || n == '?') + { + // Communicate location of actual end. + period = here; + break; + } + } + else if (n == '!' || n == '?') + { + // Communicate location of actual end. + period = iter.getIndex(); + break; + } } return iter.getIndex(); diff --git a/libjava/classpath/gnu/java/text/StringFormatBuffer.java b/libjava/classpath/gnu/java/text/StringFormatBuffer.java index fc8d08e..2367fcc 100644 --- a/libjava/classpath/gnu/java/text/StringFormatBuffer.java +++ b/libjava/classpath/gnu/java/text/StringFormatBuffer.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -41,7 +41,7 @@ import java.util.HashMap; /** * This class is an implementation of a FormatBuffer without attributes. - * + * * @author Guilhem Lavaux <guilhem@kaffe.org> * @date April 10, 2004 */ @@ -64,7 +64,7 @@ public class StringFormatBuffer implements FormatBuffer { buffer.append(s); } - + public void append(String s, AttributedCharacterIterator.Attribute attr) { buffer.append(s); @@ -111,7 +111,7 @@ public class StringFormatBuffer implements FormatBuffer } /** - * This method returns the internal {@link java.lang.StringBuffer} which + * This method returns the internal {@link java.lang.StringBuffer} which * contains the string of character. */ public StringBuffer getBuffer() diff --git a/libjava/classpath/gnu/java/text/WordBreakIterator.java b/libjava/classpath/gnu/java/text/WordBreakIterator.java index f140369..fded4bf 100644 --- a/libjava/classpath/gnu/java/text/WordBreakIterator.java +++ b/libjava/classpath/gnu/java/text/WordBreakIterator.java @@ -7,7 +7,7 @@ GNU Classpath is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. - + GNU Classpath is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU @@ -84,78 +84,78 @@ public class WordBreakIterator extends BaseBreakIterator while (iter.getIndex() < end) { - char c = iter.current(); - if (c == CharacterIterator.DONE) - break; - int type = Character.getType(c); - - char n = iter.next(); - if (n == CharacterIterator.DONE) - break; - - // Break after paragraph separators. - if (type == Character.PARAGRAPH_SEPARATOR - || type == Character.LINE_SEPARATOR) - break; - - // Break between letters and non-letters. - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - boolean is_letter = Character.isLetter(c); - if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK - && Character.isLetter(n)) - break; - - // Always break after certain symbols, such as punctuation. - // This heuristic is derived from hints in the JCL book and is - // not part of Unicode. It seems to be right, however. - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - if (c != '\'' - && (type == Character.DASH_PUNCTUATION - || type == Character.START_PUNCTUATION - || type == Character.END_PUNCTUATION - || type == Character.CONNECTOR_PUNCTUATION - || type == Character.OTHER_PUNCTUATION - || type == Character.MATH_SYMBOL - || type == Character.CURRENCY_SYMBOL - || type == Character.MODIFIER_SYMBOL - || type == Character.OTHER_SYMBOL - || type == Character.FORMAT - || type == Character.CONTROL)) - break; - - boolean is_hira = isHira (c); - boolean is_kata = isKata (c); - boolean is_han = isHan (c); - - // Special case Japanese. - if (! is_hira && ! is_kata && ! is_han - && type != Character.NON_SPACING_MARK - && (isHira (n) || isKata (n) || isHan (n))) - break; - - if (is_hira || is_kata || is_han || is_letter) - { - // Now we need to do some lookahead. We might need to do - // quite a bit of lookahead, so we save our position and - // restore it later. - int save = iter.getIndex(); - // Skip string of non spacing marks. - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.NON_SPACING_MARK) - n = iter.next(); - if (n == CharacterIterator.DONE) - break; - if ((is_hira && ! isHira (n)) - || (is_kata && ! isHira (n) && ! isKata (n)) - || (is_han && ! isHira (n) && ! isHan (n)) - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - || (is_letter && ! Character.isLetter(n) && n != '\'')) - break; - iter.setIndex(save); - } + char c = iter.current(); + if (c == CharacterIterator.DONE) + break; + int type = Character.getType(c); + + char n = iter.next(); + if (n == CharacterIterator.DONE) + break; + + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Break between letters and non-letters. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + boolean is_letter = Character.isLetter(c); + if (c != '\'' && ! is_letter && type != Character.NON_SPACING_MARK + && Character.isLetter(n)) + break; + + // Always break after certain symbols, such as punctuation. + // This heuristic is derived from hints in the JCL book and is + // not part of Unicode. It seems to be right, however. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (c != '\'' + && (type == Character.DASH_PUNCTUATION + || type == Character.START_PUNCTUATION + || type == Character.END_PUNCTUATION + || type == Character.CONNECTOR_PUNCTUATION + || type == Character.OTHER_PUNCTUATION + || type == Character.MATH_SYMBOL + || type == Character.CURRENCY_SYMBOL + || type == Character.MODIFIER_SYMBOL + || type == Character.OTHER_SYMBOL + || type == Character.FORMAT + || type == Character.CONTROL)) + break; + + boolean is_hira = isHira (c); + boolean is_kata = isKata (c); + boolean is_han = isHan (c); + + // Special case Japanese. + if (! is_hira && ! is_kata && ! is_han + && type != Character.NON_SPACING_MARK + && (isHira (n) || isKata (n) || isHan (n))) + break; + + if (is_hira || is_kata || is_han || is_letter) + { + // Now we need to do some lookahead. We might need to do + // quite a bit of lookahead, so we save our position and + // restore it later. + int save = iter.getIndex(); + // Skip string of non spacing marks. + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.next(); + if (n == CharacterIterator.DONE) + break; + if ((is_hira && ! isHira (n)) + || (is_kata && ! isHira (n) && ! isKata (n)) + || (is_han && ! isHira (n) && ! isHan (n)) + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + || (is_letter && ! Character.isLetter(n) && n != '\'')) + break; + iter.setIndex(save); + } } return iter.getIndex(); @@ -169,80 +169,80 @@ public class WordBreakIterator extends BaseBreakIterator while (iter.getIndex() >= start) { - char c = iter.previous(); - if (c == CharacterIterator.DONE) - break; - - boolean is_hira = isHira (c); - boolean is_kata = isKata (c); - boolean is_han = isHan (c); - boolean is_letter = Character.isLetter(c); - - char n = iter.previous(); - if (n == CharacterIterator.DONE) - break; - iter.next(); - int type = Character.getType(n); - // Break after paragraph separators. - if (type == Character.PARAGRAPH_SEPARATOR - || type == Character.LINE_SEPARATOR) - break; - - // Break between letters and non-letters. - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - if (n != '\'' && ! Character.isLetter(n) - && type != Character.NON_SPACING_MARK - && is_letter) - break; - - // Always break after certain symbols, such as punctuation. - // This heuristic is derived from hints in the JCL book and is - // not part of Unicode. It seems to be right, however. - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - if (n != '\'' - && (type == Character.DASH_PUNCTUATION - || type == Character.START_PUNCTUATION - || type == Character.END_PUNCTUATION - || type == Character.CONNECTOR_PUNCTUATION - || type == Character.OTHER_PUNCTUATION - || type == Character.MATH_SYMBOL - || type == Character.CURRENCY_SYMBOL - || type == Character.MODIFIER_SYMBOL - || type == Character.OTHER_SYMBOL - || type == Character.FORMAT - || type == Character.CONTROL)) - break; - - // Special case Japanese. - if ((is_hira || is_kata || is_han) - && ! isHira (n) && ! isKata (n) && ! isHan (n) - && type != Character.NON_SPACING_MARK) - break; - - // We might have to skip over non spacing marks to see what's - // on the other side. - if (! is_hira || (! is_letter && c != '\'')) - { - int save = iter.getIndex(); - while (n != CharacterIterator.DONE - && Character.getType(n) == Character.NON_SPACING_MARK) - n = iter.previous(); - iter.setIndex(save); - // This is a strange case: a bunch of non-spacing marks at - // the beginning. We treat the current location as a word - // break. - if (n == CharacterIterator.DONE) - break; - if ((isHira (n) && ! is_hira) - || (isKata (n) && ! is_hira && ! is_kata) - || (isHan (n) && ! is_hira && ! is_han) - // FIXME: we treat apostrophe as part of a word. This - // is an English-ism. - || (! is_letter && c != '\'' && Character.isLetter(n))) - break; - } + char c = iter.previous(); + if (c == CharacterIterator.DONE) + break; + + boolean is_hira = isHira (c); + boolean is_kata = isKata (c); + boolean is_han = isHan (c); + boolean is_letter = Character.isLetter(c); + + char n = iter.previous(); + if (n == CharacterIterator.DONE) + break; + iter.next(); + int type = Character.getType(n); + // Break after paragraph separators. + if (type == Character.PARAGRAPH_SEPARATOR + || type == Character.LINE_SEPARATOR) + break; + + // Break between letters and non-letters. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (n != '\'' && ! Character.isLetter(n) + && type != Character.NON_SPACING_MARK + && is_letter) + break; + + // Always break after certain symbols, such as punctuation. + // This heuristic is derived from hints in the JCL book and is + // not part of Unicode. It seems to be right, however. + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + if (n != '\'' + && (type == Character.DASH_PUNCTUATION + || type == Character.START_PUNCTUATION + || type == Character.END_PUNCTUATION + || type == Character.CONNECTOR_PUNCTUATION + || type == Character.OTHER_PUNCTUATION + || type == Character.MATH_SYMBOL + || type == Character.CURRENCY_SYMBOL + || type == Character.MODIFIER_SYMBOL + || type == Character.OTHER_SYMBOL + || type == Character.FORMAT + || type == Character.CONTROL)) + break; + + // Special case Japanese. + if ((is_hira || is_kata || is_han) + && ! isHira (n) && ! isKata (n) && ! isHan (n) + && type != Character.NON_SPACING_MARK) + break; + + // We might have to skip over non spacing marks to see what's + // on the other side. + if (! is_hira || (! is_letter && c != '\'')) + { + int save = iter.getIndex(); + while (n != CharacterIterator.DONE + && Character.getType(n) == Character.NON_SPACING_MARK) + n = iter.previous(); + iter.setIndex(save); + // This is a strange case: a bunch of non-spacing marks at + // the beginning. We treat the current location as a word + // break. + if (n == CharacterIterator.DONE) + break; + if ((isHira (n) && ! is_hira) + || (isKata (n) && ! is_hira && ! is_kata) + || (isHan (n) && ! is_hira && ! is_han) + // FIXME: we treat apostrophe as part of a word. This + // is an English-ism. + || (! is_letter && c != '\'' && Character.isLetter(n))) + break; + } } return iter.getIndex(); |