aboutsummaryrefslogtreecommitdiff
path: root/libjava/gnu
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/gnu')
-rw-r--r--libjava/gnu/gcj/convert/IOConverter.java19
-rw-r--r--libjava/gnu/gcj/convert/Input_8859_1.java4
-rw-r--r--libjava/gnu/gcj/convert/Input_ASCII.java37
-rw-r--r--libjava/gnu/gcj/convert/Output_8859_1.java12
-rw-r--r--libjava/gnu/gcj/convert/Output_ASCII.java54
-rw-r--r--libjava/gnu/gcj/convert/natIconv.cc115
6 files changed, 220 insertions, 21 deletions
diff --git a/libjava/gnu/gcj/convert/IOConverter.java b/libjava/gnu/gcj/convert/IOConverter.java
index c986624..9b5fbad 100644
--- a/libjava/gnu/gcj/convert/IOConverter.java
+++ b/libjava/gnu/gcj/convert/IOConverter.java
@@ -18,6 +18,10 @@ public abstract class IOConverter
// Map encoding aliases to our canonical form.
static private Hashtable hash = new Hashtable ();
+ // True if we have to do byte-order conversions on iconv()
+ // arguments.
+ static protected boolean iconv_byte_swap;
+
static
{
// Manually maintained aliases. Note that the value must be our
@@ -25,6 +29,17 @@ public abstract class IOConverter
hash.put ("ISO-Latin-1", "8859_1");
// All aliases after this point are automatically generated by the
// `encodings.pl' script. Run it to make any corrections.
+ hash.put ("ANSI_X3.4-1968", "ASCII");
+ hash.put ("iso-ir-6", "ASCII");
+ hash.put ("ANSI_X3.4-1986", "ASCII");
+ hash.put ("ISO_646.irv:1991", "ASCII");
+ hash.put ("ASCII", "ASCII");
+ hash.put ("ISO646-US", "ASCII");
+ hash.put ("US-ASCII", "ASCII");
+ hash.put ("us", "ASCII");
+ hash.put ("IBM367", "ASCII");
+ hash.put ("cp367", "ASCII");
+ hash.put ("csASCII", "ASCII");
hash.put ("ISO_8859-1:1987", "8859_1");
hash.put ("iso-ir-100", "8859_1");
hash.put ("ISO_8859-1", "8859_1");
@@ -41,8 +56,12 @@ public abstract class IOConverter
hash.put ("Extended_UNIX_Code_Packed_Format_for_Japanese", "EUCJIS");
hash.put ("csEUCPkdFmtJapanese", "EUCJIS");
hash.put ("EUC-JP", "EUCJIS");
+
+ iconv_byte_swap = iconv_init ();
}
+ private static native boolean iconv_init ();
+
// Turn an alias into the canonical form.
protected static final String canonicalize (String name)
{
diff --git a/libjava/gnu/gcj/convert/Input_8859_1.java b/libjava/gnu/gcj/convert/Input_8859_1.java
index 6c70034..bd5f779 100644
--- a/libjava/gnu/gcj/convert/Input_8859_1.java
+++ b/libjava/gnu/gcj/convert/Input_8859_1.java
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999 Free Software Foundation
+/* Copyright (C) 1999, 2000 Free Software Foundation
This file is part of libgcj.
@@ -9,7 +9,7 @@ details. */
package gnu.gcj.convert;
/**
- * Convert ISO-Latin-1 (8851-1) text to Unicode.
+ * Convert ISO-Latin-1 (8859-1) text to Unicode.
* @author Per Bothner <bothner@cygnus.com>
* @date March 1999.
*/
diff --git a/libjava/gnu/gcj/convert/Input_ASCII.java b/libjava/gnu/gcj/convert/Input_ASCII.java
new file mode 100644
index 0000000..cb531e9
--- /dev/null
+++ b/libjava/gnu/gcj/convert/Input_ASCII.java
@@ -0,0 +1,37 @@
+/* Copyright (C) 2000 Free Software Foundation
+
+ This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
+details. */
+
+package gnu.gcj.convert;
+
+/**
+ * Convert ASCII text to Unicode.
+ * @date October 2000
+ */
+
+public class Input_ASCII extends BytesToUnicode
+{
+ public String getName() { return "ASCII"; }
+
+ public int read (char[] outbuffer, int outpos, int count)
+ {
+ int origpos = outpos;
+ // Make sure fields of this are in registers.
+ int inpos = this.inpos;
+ byte[] inbuffer = this.inbuffer;
+ int inavail = this.inlength - inpos;
+ int outavail = count;
+ if (outavail > inavail)
+ outavail = inavail;
+ while (--outavail >= 0)
+ {
+ outbuffer[outpos++] = (char) (inbuffer[inpos++] & 0x7f);
+ }
+ this.inpos = inpos;
+ return outpos - origpos;
+ }
+}
diff --git a/libjava/gnu/gcj/convert/Output_8859_1.java b/libjava/gnu/gcj/convert/Output_8859_1.java
index ac04ad6..7ae6a61 100644
--- a/libjava/gnu/gcj/convert/Output_8859_1.java
+++ b/libjava/gnu/gcj/convert/Output_8859_1.java
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999 Free Software Foundation
+/* Copyright (C) 1999, 2000 Free Software Foundation
This file is part of libgcj.
@@ -10,9 +10,9 @@ package gnu.gcj.convert;
/**
* Convert Unicode ISO-Latin-1 (8851-1) text.
- * The high-order byte of each character is truncated.
+ * Unrecognized characters are printed as `?'.
* @author Per Bothner <bothner@cygnus.com>
- * @date Match 1999.
+ * @date March 1999.
*/
public class Output_8859_1 extends UnicodeToBytes
@@ -30,7 +30,8 @@ public class Output_8859_1 extends UnicodeToBytes
inlength = avail;
for (int i = inlength; --i >= 0; )
{
- buf[count++] = (byte) inbuffer[inpos++];
+ char c = inbuffer[inpos++];
+ buf[count++] = (byte) ((c > 0xff) ? '?' : c);
}
this.count = count;
return inlength;
@@ -45,7 +46,8 @@ public class Output_8859_1 extends UnicodeToBytes
inlength = avail;
for (int i = inlength; --i >= 0; )
{
- buf[count++] = (byte) str.charAt(inpos++);
+ char c = str.charAt(inpos++);
+ buf[count++] = (byte) ((c > 0xff) ? '?' : c);
}
this.count = count;
return inlength;
diff --git a/libjava/gnu/gcj/convert/Output_ASCII.java b/libjava/gnu/gcj/convert/Output_ASCII.java
new file mode 100644
index 0000000..9f33645
--- /dev/null
+++ b/libjava/gnu/gcj/convert/Output_ASCII.java
@@ -0,0 +1,54 @@
+/* Copyright (C) 2000 Free Software Foundation
+
+ This file is part of libgcj.
+
+This software is copyrighted work licensed under the terms of the
+Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
+details. */
+
+package gnu.gcj.convert;
+
+/**
+ * Convert Unicode ASCII
+ * Unrecognized characters are printed as `?'.
+ * @date October 2000
+ */
+
+public class Output_ASCII extends UnicodeToBytes
+{
+ public String getName() { return "ASCII"; }
+
+ /**
+ * @return number of chars converted. */
+ public int write (char[] inbuffer, int inpos, int inlength)
+ {
+ int count = this.count;
+ byte[] buf = this.buf;
+ int avail = buf.length - count;
+ if (inlength > avail)
+ inlength = avail;
+ for (int i = inlength; --i >= 0; )
+ {
+ char c = inbuffer[inpos++];
+ buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
+ }
+ this.count = count;
+ return inlength;
+ }
+
+ public int write (String str, int inpos, int inlength, char[] work)
+ {
+ int count = this.count;
+ byte[] buf = this.buf;
+ int avail = buf.length - count;
+ if (inlength > avail)
+ inlength = avail;
+ for (int i = inlength; --i >= 0; )
+ {
+ char c = str.charAt(inpos++);
+ buf[count++] = (byte) ((c > 0x7f) ? '?' : c);
+ }
+ this.count = count;
+ return inlength;
+ }
+}
diff --git a/libjava/gnu/gcj/convert/natIconv.cc b/libjava/gnu/gcj/convert/natIconv.cc
index 061779c..d346b14 100644
--- a/libjava/gnu/gcj/convert/natIconv.cc
+++ b/libjava/gnu/gcj/convert/natIconv.cc
@@ -44,13 +44,13 @@ gnu::gcj::convert::Input_iconv::init (jstring encoding)
iconv_t h = iconv_open ("UCS-2", buffer);
if (h == (iconv_t) -1)
- JvThrow (new java::io::UnsupportedEncodingException);
+ throw new java::io::UnsupportedEncodingException (encoding);
JvAssert (h != NULL);
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
#else /* HAVE_ICONV */
// If no iconv, just throw an exception.
- JvThrow (new java::io::UnsupportedEncodingException);
+ throw new java::io::UnsupportedEncodingException (encoding);
#endif /* HAVE_ICONV */
}
@@ -75,7 +75,7 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
jchar *out = elements (outbuffer);
size_t inavail = inlength - inpos;
size_t old_in = inavail;
- size_t outavail = count;
+ size_t outavail = count * sizeof (jchar);
size_t old_out = outavail;
char *inbuf = (char *) &bytes[inpos];
@@ -86,8 +86,20 @@ gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
&outbuf, &outavail);
// FIXME: what if R==-1?
+ if (iconv_byte_swap)
+ {
+ size_t max = (old_out - outavail) / sizeof (jchar);
+ for (size_t i = 0; i < max; ++i)
+ {
+ // Byte swap.
+ jchar c = (((out[outpos + i] & 0xff) << 8)
+ | ((out[outpos + i] >> 8) & 0xff));
+ outbuf[i] = c;
+ }
+ }
+
inpos += old_in - inavail;
- return old_out - outavail;
+ return (old_out - outavail) / sizeof (jchar);
#else /* HAVE_ICONV */
return -1;
#endif /* HAVE_ICONV */
@@ -104,13 +116,13 @@ gnu::gcj::convert::Output_iconv::init (jstring encoding)
iconv_t h = iconv_open (buffer, "UCS-2");
if (h == (iconv_t) -1)
- JvThrow (new java::io::UnsupportedEncodingException);
+ throw new java::io::UnsupportedEncodingException (encoding);
JvAssert (h != NULL);
handle = reinterpret_cast<gnu::gcj::RawData *> (h);
#else /* HAVE_ICONV */
// If no iconv, just throw an exception.
- JvThrow (new java::io::UnsupportedEncodingException);
+ throw new java::io::UnsupportedEncodingException (encoding);
#endif /* HAVE_ICONV */
}
@@ -128,14 +140,15 @@ gnu::gcj::convert::Output_iconv::finalize (void)
jint
gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
- jint inpos, jint count)
+ jint inpos, jint inlength)
{
#ifdef HAVE_ICONV
jchar *chars = elements (inbuffer);
jbyte *out = elements (buf);
+ jchar *temp_buffer = NULL;
- size_t inavail = count;
- size_t old_in = count;
+ size_t inavail = inlength * sizeof (jchar);
+ size_t old_in = inavail;
size_t outavail = buf->length - count;
size_t old_out = outavail;
@@ -143,14 +156,88 @@ gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
char *inbuf = (char *) &chars[inpos];
char *outbuf = (char *) &out[count];
- size_t r = iconv_adapter (iconv, (iconv_t) handle,
- &inbuf, &inavail,
- &outbuf, &outavail);
- // FIXME: what if R==-1?
+ if (iconv_byte_swap)
+ {
+ // Ugly performance penalty -- don't use losing systems!
+ temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar));
+ for (int i = 0; i < inlength; ++i)
+ {
+ // Byte swap.
+ jchar c = (((chars[inpos + i] & 0xff) << 8)
+ | ((chars[inpos + i] >> 8) & 0xff));
+ temp_buffer[i] = c;
+ }
+ inbuf = (char *) temp_buffer;
+ }
+
+ // If the conversion fails on the very first character, then we
+ // assume that the character can't be represented in the output
+ // encoding. There's nothing useful we can do here, so we simply
+ // omit that character. Note that we can't check `errno' because
+ // glibc 2.1.3 doesn't set it correctly. We could check it if we
+ // really needed to, but we'd have to disable support for 2.1.3.
+ size_t loop_old_in = old_in;
+ while (1)
+ {
+ size_t r = iconv_adapter (iconv, (iconv_t) handle,
+ &inbuf, &inavail,
+ &outbuf, &outavail);
+ if (r == -1 && inavail == loop_old_in)
+ {
+ inavail -= 2;
+ if (inavail == 0)
+ break;
+ loop_old_in -= 2;
+ inbuf += 2;
+ }
+ else
+ break;
+ }
+
+ if (temp_buffer != NULL)
+ _Jv_Free (temp_buffer);
count += old_out - outavail;
- return old_in - inavail;
+ return (old_in - inavail) / sizeof (jchar);
#else /* HAVE_ICONV */
return -1;
#endif /* HAVE_ICONV */
}
+
+jboolean
+gnu::gcj::convert::IOConverter::iconv_init (void)
+{
+ // Some versions of iconv() always return their UCS-2 results in
+ // big-endian order, and they also require UCS-2 inputs to be in
+ // big-endian order. For instance, glibc 2.1.3 does this. If the
+ // UTF-8=>UCS-2 iconv converter has this feature, then we assume
+ // that all UCS-2 converters do. (This might not be the best
+ // heuristic, but is is all we've got.)
+ jboolean result = false;
+#ifdef HAVE_ICONV
+ iconv_t handle = iconv_open ("UCS-2", "UTF-8");
+ if (handle != (iconv_t) -1)
+ {
+ jchar c;
+ unsigned char in[3];
+ char *inp, *outp;
+ size_t inc, outc, r;
+
+ // This is the UTF-8 encoding of \ufeff.
+ in[0] = 0xef;
+ in[1] = 0xbb;
+ in[2] = 0xbf;
+
+ inp = (char *) in;
+ inc = 3;
+ outp = (char *) &c;
+ outc = 2;
+
+ r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc);
+ // Conversion must be complete for us to use the result.
+ if (r != (size_t) -1 && inc == 0 && outc == 0)
+ result = (c != 0xfeff);
+ }
+#endif /* HAVE_ICONV */
+ return result;
+}