aboutsummaryrefslogtreecommitdiff
path: root/libjava/gnu/java
diff options
context:
space:
mode:
authorJesse Rosenstock <jmr@ugcs.caltech.edu>2002-11-11 07:36:41 +0000
committerMichael Koch <mkoch@gcc.gnu.org>2002-11-11 07:36:41 +0000
commit8a423d779f598036c5e9048fb949494a7a98d564 (patch)
treeff604598749b5c8d280139c1d1f78191270f1841 /libjava/gnu/java
parentac7bc6bb2f59e4445e6f4469f558a40a913edbe1 (diff)
downloadgcc-8a423d779f598036c5e9048fb949494a7a98d564.zip
gcc-8a423d779f598036c5e9048fb949494a7a98d564.tar.gz
gcc-8a423d779f598036c5e9048fb949494a7a98d564.tar.bz2
ISO_8859_1.java, [...]: New files.
2002-11-11 Jesse Rosenstock <jmr@ugcs.caltech.edu> * gnu/java/nio/charset/ISO_8859_1.java, gnu/java/nio/charset/Provider.java, gnu/java/nio/charset/US_ASCII.java, gnu/java/nio/charset/UTF_16.java, gnu/java/nio/charset/UTF_16BE.java, gnu/java/nio/charset/UTF_16Decoder.java, gnu/java/nio/charset/UTF_16Encoder.java, gnu/java/nio/charset/UTF_16LE.java, gnu/java/nio/charset/UTF_8.java: New files. * Makefile.am (): Added new files. * Makefile.in: Regenerated. From-SVN: r59013
Diffstat (limited to 'libjava/gnu/java')
-rw-r--r--libjava/gnu/java/nio/charset/ISO_8859_1.java132
-rw-r--r--libjava/gnu/java/nio/charset/Provider.java135
-rw-r--r--libjava/gnu/java/nio/charset/US_ASCII.java137
-rw-r--r--libjava/gnu/java/nio/charset/UTF_16.java75
-rw-r--r--libjava/gnu/java/nio/charset/UTF_16BE.java75
-rw-r--r--libjava/gnu/java/nio/charset/UTF_16Decoder.java169
-rw-r--r--libjava/gnu/java/nio/charset/UTF_16Encoder.java153
-rw-r--r--libjava/gnu/java/nio/charset/UTF_16LE.java75
-rw-r--r--libjava/gnu/java/nio/charset/UTF_8.java279
9 files changed, 1230 insertions, 0 deletions
diff --git a/libjava/gnu/java/nio/charset/ISO_8859_1.java b/libjava/gnu/java/nio/charset/ISO_8859_1.java
new file mode 100644
index 0000000..f29fa26
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/ISO_8859_1.java
@@ -0,0 +1,132 @@
+/* ISO_8859_1.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * ISO-8859-1 charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class ISO_8859_1 extends Charset
+{
+ ISO_8859_1 ()
+ {
+ super ("ISO-8859-1", new String[]{"ISO-LATIN-1"});
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII || cs instanceof ISO_8859_1;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new Decoder (this);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new Encoder (this);
+ }
+
+ private static final class Decoder extends CharsetDecoder
+ {
+ private Decoder (Charset cs)
+ {
+ super (cs, 1.0f, 1.0f);
+ }
+
+ protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ while (in.hasRemaining ())
+ {
+ byte b = in.get ();
+
+ if (!out.hasRemaining ())
+ {
+ in.position (in.position () - 1);
+ return CoderResult.OVERFLOW;
+ }
+
+ out.put ((char) (b & 0xFF));
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ }
+
+ private static final class Encoder extends CharsetEncoder
+ {
+ private Encoder (Charset cs)
+ {
+ super (cs, 1.0f, 1.0f);
+ }
+
+ protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ while (in.hasRemaining ())
+ {
+ char c = in.get ();
+
+ if (c > 0xFF)
+ {
+ in.position (in.position () - 1);
+ return CoderResult.unmappableForLength (1);
+ }
+ if (!out.hasRemaining ())
+ {
+ in.position (in.position () - 1);
+ return CoderResult.OVERFLOW;
+ }
+
+ out.put ((byte) c);
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/Provider.java b/libjava/gnu/java/nio/charset/Provider.java
new file mode 100644
index 0000000..13f6371
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/Provider.java
@@ -0,0 +1,135 @@
+/* Provider.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.charset.Charset;
+import java.nio.charset.spi.CharsetProvider;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+
+/**
+ * Charset provider for the required charsets. Used by
+ * {@link Charset#charsetForName} and * {@link Charset#availableCharsets}.
+ *
+ * @author Jesse Rosenstock
+ * @see Charset
+ */
+public final class Provider extends CharsetProvider
+{
+ private static Provider singleton;
+
+ static
+ {
+ synchronized (Provider.class)
+ {
+ singleton = null;
+ }
+ }
+
+ /**
+ * Map from charset name to charset canonical name.
+ */
+ private final HashMap canonicalNames;
+
+ /**
+ * Map from canonical name to Charset.
+ * TODO: We may want to use soft references. We would then need to keep
+ * track of the class name to regenerate the object.
+ */
+ private final HashMap charsets;
+
+ private Provider ()
+ {
+ // FIXME: We might need to make the name comparison case insensitive.
+ // Verify this with the Sun JDK.
+ canonicalNames = new HashMap ();
+ charsets = new HashMap ();
+
+ // US-ASCII aka ISO646-US
+ addCharset (new US_ASCII ());
+
+ // ISO-8859-1 aka ISO-LATIN-1
+ addCharset (new ISO_8859_1 ());
+
+ // UTF-8
+ addCharset (new UTF_8 ());
+
+ // UTF-16BE
+ addCharset (new UTF_16BE ());
+
+ // UTF-16LE
+ addCharset (new UTF_16LE ());
+
+ // UTF-16
+ addCharset (new UTF_16 ());
+ }
+
+ public Iterator charsets ()
+ {
+ return Collections.unmodifiableCollection (charsets.values ())
+ .iterator ();
+ }
+
+ public Charset charsetForName (String charsetName)
+ {
+ return (Charset) charsets.get (canonicalize (charsetName));
+ }
+
+ private Object canonicalize (String charsetName)
+ {
+ Object o = canonicalNames.get (charsetName);
+ return o == null ? charsetName : o;
+ }
+
+ private void addCharset (Charset cs)
+ {
+ String canonicalName = cs.name ();
+ charsets.put (canonicalName, cs);
+
+ for (Iterator i = cs.aliases ().iterator (); i.hasNext (); )
+ canonicalNames.put (i.next (), canonicalName);
+ }
+
+ public static synchronized Provider provider ()
+ {
+ if (singleton == null)
+ singleton = new Provider ();
+ return singleton;
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/US_ASCII.java b/libjava/gnu/java/nio/charset/US_ASCII.java
new file mode 100644
index 0000000..a1ff251
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/US_ASCII.java
@@ -0,0 +1,137 @@
+/* US_ASCII.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * US-ASCII charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class US_ASCII extends Charset
+{
+ US_ASCII ()
+ {
+ super ("US-ASCII", new String[]{"ISO646-US"});
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new Decoder (this);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new Encoder (this);
+ }
+
+ private static final class Decoder extends CharsetDecoder
+ {
+ private Decoder (Charset cs)
+ {
+ super (cs, 1.0f, 1.0f);
+ }
+
+ protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ while (in.hasRemaining ())
+ {
+ byte b = in.get ();
+
+ if (b < 0)
+ {
+ in.position (in.position () - 1);
+ return CoderResult.malformedForLength (1);
+ }
+ if (!out.hasRemaining ())
+ {
+ in.position (in.position () - 1);
+ return CoderResult.OVERFLOW;
+ }
+
+ out.put ((char) b);
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ }
+
+ private static final class Encoder extends CharsetEncoder
+ {
+ private Encoder (Charset cs)
+ {
+ super (cs, 1.0f, 1.0f);
+ }
+
+ protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ while (in.hasRemaining ())
+ {
+ char c = in.get ();
+
+ if (c > Byte.MAX_VALUE)
+ {
+ in.position (in.position () - 1);
+ return CoderResult.unmappableForLength (1);
+ }
+ if (!out.hasRemaining ())
+ {
+ in.position (in.position () - 1);
+ return CoderResult.OVERFLOW;
+ }
+
+ out.put ((byte) c);
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16.java b/libjava/gnu/java/nio/charset/UTF_16.java
new file mode 100644
index 0000000..18c9be7
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16.java
@@ -0,0 +1,75 @@
+/* UTF_16.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16 charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16 extends Charset
+{
+ UTF_16 ()
+ {
+ super ("UTF-16", null);
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+ || cs instanceof UTF_8 || cs instanceof UTF_16BE
+ || cs instanceof UTF_16LE || cs instanceof UTF_16;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new UTF_16Decoder (this, UTF_16Decoder.UNKNOWN_ENDIAN);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, false);
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16BE.java b/libjava/gnu/java/nio/charset/UTF_16BE.java
new file mode 100644
index 0000000..6fb28cd
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16BE.java
@@ -0,0 +1,75 @@
+/* UTF_16BE.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16BE charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16BE extends Charset
+{
+ UTF_16BE ()
+ {
+ super ("UTF-16BE", null);
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+ || cs instanceof UTF_8 || cs instanceof UTF_16BE
+ || cs instanceof UTF_16LE || cs instanceof UTF_16;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new UTF_16Decoder (this, UTF_16Decoder.BIG_ENDIAN);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new UTF_16Encoder (this, UTF_16Encoder.BIG_ENDIAN, true);
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16Decoder.java b/libjava/gnu/java/nio/charset/UTF_16Decoder.java
new file mode 100644
index 0000000..c8e474d
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16Decoder.java
@@ -0,0 +1,169 @@
+/* UTF_16Decoder.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * Decoder for UTF-16, UTF-15LE, and UTF-16BE.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16Decoder extends CharsetDecoder
+{
+ // byte orders
+ static final int BIG_ENDIAN = 0;
+ static final int LITTLE_ENDIAN = 1;
+ static final int UNKNOWN_ENDIAN = 2;
+
+ private static final char BYTE_ORDER_MARK = '\uFEFF';
+ private static final char REVERSED_BYTE_ORDER_MARK = '\uFFFE';
+
+ private final int originalByteOrder;
+ private int byteOrder;
+
+ UTF_16Decoder (Charset cs, int byteOrder)
+ {
+ super (cs, 0.5f, 1.0f);
+ this.originalByteOrder = byteOrder;
+ this.byteOrder = byteOrder;
+ }
+
+ protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+
+ int inPos = in.position ();
+ try
+ {
+ while (in.remaining () >= 2)
+ {
+ byte b1 = in.get ();
+ byte b2 = in.get ();
+
+ // handle byte order mark
+ if (byteOrder == UNKNOWN_ENDIAN)
+ {
+ char c = (char) ((b1 << 8) | b2);
+ if (c == BYTE_ORDER_MARK)
+ {
+ byteOrder = BIG_ENDIAN;
+ inPos += 2;
+ continue;
+ }
+ else if (c == REVERSED_BYTE_ORDER_MARK)
+ {
+ byteOrder = LITTLE_ENDIAN;
+ inPos += 2;
+ continue;
+ }
+ else
+ {
+ // assume big endian, do not consume bytes,
+ // continue with normal processing
+ byteOrder = BIG_ENDIAN;
+ }
+ }
+
+ char c = byteOrder == BIG_ENDIAN ? (char) ((b1 << 8) | b2)
+ : (char) ((b2 << 8) | b1);
+
+ if (0xD800 <= c && c <= 0xDFFF)
+ {
+ // c is a surrogate
+
+ // make sure c is a high surrogate
+ if (c > 0xDBFF)
+ return CoderResult.malformedForLength (2);
+ if (in.remaining () < 2)
+ return CoderResult.UNDERFLOW;
+ byte b3 = in.get ();
+ byte b4 = in.get ();
+ char d = byteOrder == BIG_ENDIAN ? (char) ((b3 << 8) | b4)
+ : (char) ((b4 << 8) | b3);
+ // make sure d is a low surrogate
+ if (d < 0xDC00 || d > 0xDFFF)
+ return CoderResult.malformedForLength (2);
+ out.put (c);
+ out.put (d);
+ inPos += 4;
+ }
+ else
+ {
+ if (!out.hasRemaining ())
+ return CoderResult.UNDERFLOW;
+ out.put (c);
+ inPos += 2;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ finally
+ {
+ in.position (inPos);
+ }
+ }
+
+ /**
+ * Writes <code>c</code> to <code>out</code> in the byte order
+ * specified by <code>byteOrder</code>.
+ **/
+ private void put (ByteBuffer out, char c)
+ {
+ if (byteOrder == BIG_ENDIAN)
+ {
+ out.put ((byte) (c >> 8));
+ out.put ((byte) (c & 0xFF));
+ }
+ else
+ {
+ out.put ((byte) (c & 0xFF));
+ out.put ((byte) (c >> 8));
+ }
+ }
+
+ protected void implReset ()
+ {
+ byteOrder = originalByteOrder;
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16Encoder.java b/libjava/gnu/java/nio/charset/UTF_16Encoder.java
new file mode 100644
index 0000000..b0cb9ed
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16Encoder.java
@@ -0,0 +1,153 @@
+/* UTF_16Encoder.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * Encoder for UTF-16, UTF-15LE, and UTF-16BE.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16Encoder extends CharsetEncoder
+{
+ // byte orders
+ static final int BIG_ENDIAN = 0;
+ static final int LITTLE_ENDIAN = 1;
+
+ private static final char BYTE_ORDER_MARK = '\uFEFF';
+
+ private final int byteOrder;
+ private final boolean useByteOrderMark;
+ private boolean needsByteOrderMark;
+
+ UTF_16Encoder (Charset cs, int byteOrder, boolean useByteOrderMark)
+ {
+ super (cs, 2.0f,
+ useByteOrderMark ? 4.0f : 2.0f,
+ byteOrder == BIG_ENDIAN
+ ? new byte[] { (byte) 0xFF, (byte) 0xFD }
+ : new byte[] { (byte) 0xFD, (byte) 0xFF });
+ this.byteOrder = byteOrder;
+ this.useByteOrderMark = useByteOrderMark;
+ this.needsByteOrderMark = useByteOrderMark;
+ }
+
+ protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+
+ if (needsByteOrderMark)
+ {
+ if (out.remaining () < 2)
+ return CoderResult.OVERFLOW;
+ put (out, BYTE_ORDER_MARK);
+ needsByteOrderMark = false;
+ }
+
+ int inPos = in.position ();
+ try
+ {
+ while (in.hasRemaining ())
+ {
+ char c = in.get ();
+
+ if (0xD800 <= c && c <= 0xDFFF)
+ {
+ // c is a surrogate
+
+ // make sure c is a high surrogate
+ if (c > 0xDBFF)
+ return CoderResult.malformedForLength (1);
+ if (in.remaining () < 1)
+ return CoderResult.UNDERFLOW;
+ char d = in.get ();
+ // make sure d is a low surrogate
+ if (d < 0xDC00 || d > 0xDFFF)
+ return CoderResult.malformedForLength (1);
+ put (out, c);
+ put (out, d);
+ inPos += 2;
+ }
+ else
+ {
+ if (out.remaining () < 2)
+ return CoderResult.OVERFLOW;
+ put (out, c);
+ inPos++;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ finally
+ {
+ in.position (inPos);
+ }
+ }
+
+ /**
+ * Writes <code>c</code> to <code>out</code> in the byte order
+ * specified by <code>byteOrder</code>.
+ **/
+ private void put (ByteBuffer out, char c)
+ {
+ if (byteOrder == BIG_ENDIAN)
+ {
+ out.put ((byte) (c >> 8));
+ out.put ((byte) (c & 0xFF));
+ }
+ else
+ {
+ out.put ((byte) (c & 0xFF));
+ out.put ((byte) (c >> 8));
+ }
+ }
+
+ protected void implReset ()
+ {
+ needsByteOrderMark = useByteOrderMark;
+ }
+
+ // TODO: override canEncode(char) and canEncode(CharSequence)
+ // for performance
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_16LE.java b/libjava/gnu/java/nio/charset/UTF_16LE.java
new file mode 100644
index 0000000..b914ae0
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_16LE.java
@@ -0,0 +1,75 @@
+/* UTF_16LE.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-16LE charset.
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_16LE extends Charset
+{
+ UTF_16LE ()
+ {
+ super ("UTF-16LE", null);
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+ || cs instanceof UTF_8 || cs instanceof UTF_16BE
+ || cs instanceof UTF_16LE || cs instanceof UTF_16;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new UTF_16Decoder (this, UTF_16Decoder.LITTLE_ENDIAN);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new UTF_16Encoder (this, UTF_16Encoder.LITTLE_ENDIAN, true);
+ }
+}
diff --git a/libjava/gnu/java/nio/charset/UTF_8.java b/libjava/gnu/java/nio/charset/UTF_8.java
new file mode 100644
index 0000000..aa623b2
--- /dev/null
+++ b/libjava/gnu/java/nio/charset/UTF_8.java
@@ -0,0 +1,279 @@
+/* UTF_8.java --
+ Copyright (C) 2002 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+02111-1307 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.java.nio.charset;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+/**
+ * UTF-8 charset.
+ *
+ * <p> UTF-8 references:
+ * <ul>
+ * <li> <a href="http://ietf.org/rfc/rfc2279.txt">RFC 2279</a>
+ * <li> The <a href="http://www.unicode.org/unicode/standard/standard.html">
+ * Unicode standard</a> and
+ * <a href="http://www.unicode.org/versions/corrigendum1.html">
+ * Corrigendum</a>
+ * </ul>
+ *
+ * @author Jesse Rosenstock
+ */
+final class UTF_8 extends Charset
+{
+ UTF_8 ()
+ {
+ super ("UTF-8", null);
+ }
+
+ public boolean contains (Charset cs)
+ {
+ return cs instanceof US_ASCII || cs instanceof ISO_8859_1
+ || cs instanceof UTF_8 || cs instanceof UTF_16BE
+ || cs instanceof UTF_16LE || cs instanceof UTF_16;
+ }
+
+ public CharsetDecoder newDecoder ()
+ {
+ return new Decoder (this);
+ }
+
+ public CharsetEncoder newEncoder ()
+ {
+ return new Encoder (this);
+ }
+
+ private static final class Decoder extends CharsetDecoder
+ {
+ private Decoder (Charset cs)
+ {
+ super (cs, 1.0f, 1.0f);
+ }
+
+ protected CoderResult decodeLoop (ByteBuffer in, CharBuffer out)
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ int inPos = 0;
+ try
+ {
+ while (in.hasRemaining ())
+ {
+ char c;
+ byte b1 = in.get ();
+ int highNibble = (b1 >> 4) & 0xF;
+
+ switch (highNibble)
+ {
+ case 0: case 1: case 2: case 3:
+ case 4: case 5: case 6: case 7:
+ if (out.remaining () < 1)
+ return CoderResult.OVERFLOW;
+ out.put ((char) b1);
+ inPos++;
+ break;
+
+ case 0xC: case 0xD:
+ byte b2;
+ if (in.remaining () < 1)
+ return CoderResult.UNDERFLOW;
+ if (out.remaining () < 1)
+ return CoderResult.OVERFLOW;
+ if (!isContinuation (b2 = in.get ()))
+ return CoderResult.malformedForLength (1);
+ c = (char) (((b1 & 0x1F) << 6) | (b2 & 0x3F));
+ // check that we had the shortest encoding
+ if (c <= 0x7F)
+ return CoderResult.malformedForLength (2);
+ out.put (c);
+ inPos += 2;
+ break;
+
+ case 0xE:
+ byte b3;
+ if (in.remaining () < 2)
+ return CoderResult.UNDERFLOW;
+ if (out.remaining () < 1)
+ return CoderResult.OVERFLOW;
+ if (!isContinuation (b2 = in.get ()))
+ return CoderResult.malformedForLength (1);
+ if (!isContinuation (b3 = in.get ()))
+ return CoderResult.malformedForLength (1);
+ c = (char) (((b1 & 0x0F) << 12)
+ | ((b2 & 0x3F) << 6)
+ | (b3 & 0x3F));
+ // check that we had the shortest encoding
+ if (c <= 0x7FF)
+ return CoderResult.malformedForLength (3);
+ out.put (c);
+ inPos += 3;
+ break;
+
+ default:
+ return CoderResult.malformedForLength (1);
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ finally
+ {
+ // In case we did a get(), then encountered an error, reset the
+ // position to before the error. If there was no error, this
+ // will benignly reset the position to the value it already has.
+ in.position (inPos);
+ }
+ }
+
+ private static boolean isContinuation (byte b)
+ {
+ return (b & 0xC0) == 0x80;
+ }
+ }
+
+ private static final class Encoder extends CharsetEncoder
+ {
+ private Encoder (Charset cs)
+ {
+ // According to
+ // http://www-106.ibm.com/developerworks/unicode/library/utfencodingforms/index.html
+ // On average, English takes slightly over one unit per code point.
+ // Most Latin-script languages take about 1.1 bytes. Greek, Russian,
+ // Arabic and Hebrew take about 1.7 bytes, and most others (including
+ // Japanese, Chinese, Korean and Hindi) take about 3 bytes.
+ // We assume we will be dealing with latin scripts, and use 1.1
+ // for averageBytesPerChar.
+ super (cs, 1.1f, 4.0f);
+ }
+
+ protected CoderResult encodeLoop (CharBuffer in, ByteBuffer out)
+ {
+ int inPos = 0;
+ try
+ {
+ // TODO: Optimize this in the case in.hasArray() / out.hasArray()
+ while (in.hasRemaining ())
+ {
+ int remaining = out.remaining ();
+ char c = in.get ();
+
+ // UCS-4 range (hex.) UTF-8 octet sequence (binary)
+ // 0000 0000-0000 007F 0xxxxxxx
+ // 0000 0080-0000 07FF 110xxxxx 10xxxxxx
+ // 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
+
+ // Scalar Value UTF-16 byte 1 byte 2 byte 3 byte 4
+ // 0000 0000 0xxx xxxx 0000 0000 0xxx xxxx 0xxx xxxx
+ // 0000 0yyy yyxx xxxx 0000 0yyy yyxx xxxx 110y yyyy 10xx xxxx
+ // zzzz yyyy yyxx xxxx zzzz yyyy yyxx xxxx 1110 zzzz 10yy yyyy 10xx xxxx
+ // u uuuu zzzz yyyy yyxx xxxx 1101 10ww wwzz zzyy 1111 0uuu 10uu zzzz 10yy yyyy 10xx xxxx
+ // + 1101 11yy yyxx xxxx
+ // Note: uuuuu = wwww + 1
+
+ if (c <= 0x7F)
+ {
+ if (remaining < 1)
+ return CoderResult.OVERFLOW;
+ out.put ((byte) c);
+ inPos++;
+ }
+ else if (c <= 0x7FF)
+ {
+ if (remaining < 2)
+ return CoderResult.OVERFLOW;
+ out.put ((byte) (0xC0 | (c >> 6)));
+ out.put ((byte) (0x80 | (c & 0x3F)));
+ inPos++;
+ }
+ else if (0xD800 <= c && c <= 0xDFFF)
+ {
+ if (remaining < 4)
+ return CoderResult.OVERFLOW;
+
+ // we got a low surrogate without a preciding high one
+ if (c > 0xDBFF)
+ return CoderResult.malformedForLength (1);
+
+ // high surrogates
+ if (!in.hasRemaining ())
+ return CoderResult.UNDERFLOW;
+
+ char d = in.get ();
+
+ // make sure d is a low surrogate
+ if (d < 0xDC00 || d > 0xDFFF)
+ return CoderResult.malformedForLength (1);
+
+ // make the 32 bit value
+ // int value2 = (c - 0xD800) * 0x400 + (d - 0xDC00) + 0x10000;
+ int value = (((c & 0x3FF) << 10) | (d & 0x3FF)) + 0x10000;
+ // assert value == value2;
+ out.put ((byte) (0xF0 | (value >> 18)));
+ out.put ((byte) (0x80 | ((value >> 12) & 0x3F)));
+ out.put ((byte) (0x80 | ((value >> 6) & 0x3F)));
+ out.put ((byte) (0x80 | ((value ) & 0x3F)));
+
+ inPos += 2;
+ }
+ else
+ {
+ if (remaining < 3)
+ return CoderResult.OVERFLOW;
+
+ out.put ((byte) (0xE0 | (c >> 12)));
+ out.put ((byte) (0x80 | ((c >> 6) & 0x3F)));
+ out.put ((byte) (0x80 | (c & 0x3F)));
+ inPos++;
+ }
+ }
+
+ return CoderResult.UNDERFLOW;
+ }
+ finally
+ {
+ // In case we did a get(), then encountered an error, reset the
+ // position to before the error. If there was no error, this
+ // will benignly reset the position to the value it already has.
+ in.position (inPos);
+ }
+ }
+ }
+}