diff options
Diffstat (limited to 'libjava/java/net/URLEncoder.java')
-rw-r--r-- | libjava/java/net/URLEncoder.java | 176 |
1 files changed, 129 insertions, 47 deletions
diff --git a/libjava/java/net/URLEncoder.java b/libjava/java/net/URLEncoder.java index 6590dcf..f39b300 100644 --- a/libjava/java/net/URLEncoder.java +++ b/libjava/java/net/URLEncoder.java @@ -1,71 +1,153 @@ -// URLEncoder.java - Provides a method for encoding strings according to -// application/x-www-form-urlencoded MIME type. +/* URLEncoder.java -- Class to convert strings to a properly encoded URL + Copyright (C) 1998, 1999, 2001 Free Software Foundation, Inc. -/* Copyright (C) 1999 Free Software Foundation +This file is part of GNU Classpath. - This file is part of libgcj. +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. -This software is copyrighted work licensed under the terms of the -Libgcj License. Please consult the file "LIBGCJ_LICENSE" for -details. */ +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +As a special exception, if you link this library with other files to +produce an executable, this library does not by itself cause the +resulting executable to be covered by the GNU General Public License. +This exception does not however invalidate any other reasons why the +executable file might be covered by the GNU General Public License. */ package java.net; -import java.io.UnsupportedEncodingException; -/** - * @author Warren Levy <warrenl@cygnus.com> - * @date April 22, 1999. - */ +import java.io.UnsupportedEncodingException; /** - * Written using on-line Java Platform 1.2 API Specification, as well + * Written using on-line Java Platform 1.2/1.4 API Specification, as well * as "The Java Class Libraries", 2nd edition (Addison-Wesley, 1998). * Status: Believed complete and correct. */ + /** + * This utility class contains static methods that converts a + * string into a fully encoded URL string in x-www-form-urlencoded + * format. This format replaces certain disallowed characters with + * encoded equivalents. All upper case and lower case letters in the + * US alphabet remain as is, the space character (' ') is replaced with + * '+' sign, and all other characters are converted to a "%XX" format + * where XX is the hexadecimal representation of that character in a + * certain encoding (by default "UTF-8"). + * <p> + * This method is very useful for encoding strings to be sent to CGI scripts + * + * @author Aaron M. Renn (arenn@urbanophile.com) + * @author Warren Levy <warrenl@cygnus.com> + * @author Mark Wielaard (mark@klomp.org) + */ public class URLEncoder { - // This method, per the JCL, is conservative in that it encodes - // some "allowable" characters as % triplets. + /** + * This method translates the passed in string into x-www-form-urlencoded + * format using the standard "UTF-8" character encoding to hex-encode the + * unsafe characters. + * + * @param s The String to convert + * + * @return The converted String + */ public static String encode(String s) { - // Get the bytes in ISO-Latin-1 (i.e. 8859_1) per the JCL. - // Even though it is the default in most cases, it's specified here - // just in case System.getProperty("file.encoding") is not "8859_1". - String result = ""; try { - byte[] buf = s.getBytes("8859_1"); - int start = 0; - for (int i = 0; i < buf.length; i++) - // For efficiency, check the byte in order of most likely - // possibility so as to minimize the number of comparisons. - // Hence, exclude all the alphanumeric & allowed special chars first. - if ((buf[i] >= 'a' && buf[i] <= 'z') || - (buf[i] >= 'A' && buf[i] <= 'Z') || - (buf[i] >= '0' && buf[i] <= '9') || - buf[i] == '-' || buf[i] == '_' || buf[i] == '.' || buf[i] == '*') - ; // This is the most likely case so exclude first for efficiency. - else if (buf[i] == ' ') - buf[i] = (byte) '+'; // Replace space char with plus symbol. - else - { - result = result + new String(buf, start, i - start, "8859_1") + - "%" + Integer.toHexString(((int) buf[i]) & 0xFF); - start = i + 1; - } - - // Append remainder of allowable chars from the string, if any. - if (start < buf.length) - result = result + - new String(buf, start, buf.length - start, "8859_1"); + return encode(s, "UTF-8"); } - catch (UnsupportedEncodingException ex) + catch (UnsupportedEncodingException uee) { - // This should never happen as "8859_1" is the default encoding. + // Should never happen since UTF-8 should always be supported return s; } + } + + /** + * This method translates the passed in string into x-www-form-urlencoded + * format using the character encoding to hex-encode the unsafe characters. + * + * @param s The String to convert + * @param encoding The encoding to use for unsafe characters + * + * @return The converted String + * + * @since 1.4 + */ + public static String encode(String s, String encoding) + throws UnsupportedEncodingException + { + StringBuffer result = new StringBuffer(); + int length = s.length(); + int start = 0; + int i = 0; - return result; + while (true) + { + while ( i < length && isSafe(s.charAt(i)) ) + i++; + + // Safe character can just be added + result.append(s.substring(start, i)); + + // Are we done? + if (i >= length) + return result.toString(); + else if (s.charAt(i) == ' ') + { + result.append('+'); // Replace space char with plus symbol. + i++; + } + else + { + // Get all unsafe characters + start = i; + char c; + while ( i < length && (c = s.charAt(i)) != ' ' && !isSafe(c) ) + i++; + + // Convert them to %XY encoded strings + String unsafe = s.substring(start,i); + byte bytes[] = unsafe.getBytes(encoding); + for (int j = 0; j < bytes.length; j++) + { + result.append('%'); + result.append(Integer.toHexString(((int) bytes[j]) & 0xFF)); + } + } + start = i; + } } -} + + /** + * Private static method that returns true if the given char is either + * a uppercase or lowercase letter from 'a' till 'z', or a digit froim + * '0' till '9', or one of the characters '-', '_', '.' or '*'. Such + * 'safe' character don't have to be url encoded. + */ + private static boolean isSafe(char c) + { + return ((c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '*'); + } + + /** + * Private constructor that does nothing. Included to avoid a default + * public constructor being created by the compiler. + */ + private URLEncoder() { } + +} // class URLEncoder |