diff options
Diffstat (limited to 'libjava/gnu/xml/util')
-rw-r--r-- | libjava/gnu/xml/util/DoParse.java | 306 | ||||
-rw-r--r-- | libjava/gnu/xml/util/DomParser.java | 804 | ||||
-rw-r--r-- | libjava/gnu/xml/util/Resolver.java | 263 | ||||
-rw-r--r-- | libjava/gnu/xml/util/SAXNullTransformerFactory.java | 675 | ||||
-rw-r--r-- | libjava/gnu/xml/util/XCat.java | 1609 | ||||
-rw-r--r-- | libjava/gnu/xml/util/XHTMLWriter.java | 112 | ||||
-rw-r--r-- | libjava/gnu/xml/util/XMLWriter.java | 1927 | ||||
-rw-r--r-- | libjava/gnu/xml/util/package.html | 20 |
8 files changed, 5716 insertions, 0 deletions
diff --git a/libjava/gnu/xml/util/DoParse.java b/libjava/gnu/xml/util/DoParse.java new file mode 100644 index 0000000..23721de --- /dev/null +++ b/libjava/gnu/xml/util/DoParse.java @@ -0,0 +1,306 @@ +/* DoParse.java -- + Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.io.*; + +import org.xml.sax.*; +import org.xml.sax.helpers.XMLReaderFactory; +import org.xml.sax.ext.*; + +// import gnu.xml.pipeline.*; +// Added imports directly to fix a GCJ problem. +import gnu.xml.pipeline.CallFilter; +import gnu.xml.pipeline.DomConsumer; +import gnu.xml.pipeline.EventConsumer; +import gnu.xml.pipeline.EventFilter; +import gnu.xml.pipeline.LinkFilter; +import gnu.xml.pipeline.NSFilter; +import gnu.xml.pipeline.PipelineFactory; +import gnu.xml.pipeline.TeeConsumer; +import gnu.xml.pipeline.TextConsumer; +import gnu.xml.pipeline.ValidationConsumer; +import gnu.xml.pipeline.WellFormednessFilter; +import gnu.xml.pipeline.XIncludeFilter; +import gnu.xml.pipeline.XsltFilter; + + +/** + * This class provides a driver which may be invoked from the command line + * to process a document using a SAX2 parser and a specified XML processing + * pipeline. + * This facilitates some common types of command line tools, such as parsing an + * XML document in order test it for well formedness or validity. + * + * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which + * supports both of the standardized extension handlers (for declaration + * and lexical events). That parser will be used to produce events. + * + * <p>The first parameter to the command gives the name of the document that + * will be given to that processor. If it is a file name, it is converted + * to a URL first. + * + * <p>The second parameter describes a simple processing pipeline, and will + * be used as input to {@link gnu.xml.pipeline.PipelineFactory} + * methods which identify the processing to be done. Examples of such a + * pipeline include <pre> + * + * nsfix | validate <em>to validate the input document </em> + * nsfix | write ( stdout ) <em>to echo the file as XML text</em> + * dom | nsfix | write ( stdout ) <em>parse into DOM, print the result</em> + * </pre> + * + * <p> Relatively complex pipelines can be described on the command line, but + * not all interesting ones will require as little configuration as can be done + * in that way. Put filters like "nsfix", perhaps followed by "validate", + * at the front of the pipeline so they can be optimized out if a parser + * supports those modes natively. + * + * <p> If the parsing is aborted for any reason, the JVM will exit with a + * failure code. If a validating parse was done then both validation and + * well formedness errors will cause a failure. A non-validating parse + * will report failure on well formedness errors. + * + * @see gnu.xml.pipeline.PipelineFactory + * + * @author David Brownell + */ +final public class DoParse +{ + private DoParse () { /* no instances allowed */ } + + // first reported nonrecoverable error + private static SAXParseException fatal; + + // error categories + private static int errorCount; + private static int fatalCount; + + /** + * Command line invoker for this class; pass a filename or URL + * as the first argument, and a pipeline description as the second. + * Make sure to use filters to condition the input to stages that + * require it; an <em>nsfix</em> filter will be a common requirement, + * to restore syntax that SAX2 parsers delete by default. Some + * conditioning filters may be eliminated by setting parser options. + * (For example, "nsfix" can set the "namespace-prefixes" feature to + * a non-default value of "true". In the same way, "validate" can set + * the "validation" feature to "true".) + */ + public static void main (String argv []) + throws IOException + { + int exitStatus = 1; + + if (argv.length != 2) { + System.err.println ("Usage: DoParse [filename|URL] pipeline-spec"); + System.err.println ("Example pipeline specs:"); + System.err.println (" 'nsfix | validate'"); + System.err.println ( + " ... restore namespace syntax, validate"); + System.err.println (" 'nsfix | write ( stdout )'"); + System.err.println ( + " ... restore namespace syntax, write to stdout as XML" + ); + System.exit (1); + } + + try { + // + // Get input source for specified document (or try ;-) + // + argv [0] = Resolver.getURL (argv [0]); + InputSource input = new InputSource (argv [0]); + + // + // Get the producer, using the system default parser (which + // can be overridden for this particular invocation). + // + // And the pipeline, using commandline options. + // + XMLReader producer; + EventConsumer consumer; + + producer = XMLReaderFactory.createXMLReader (); + + // + // XXX pipeline factory now has a pre-tokenized input + // method, use it ... that way at least some params + // can be written using quotes (have spaces, ...) + // + consumer = PipelineFactory.createPipeline (argv [1]); + + // + // XXX want commandline option for tweaking error handler. + // Want to be able to present warnings. + // + producer.setErrorHandler (new MyErrorHandler ()); + + // XXX need facility enabling resolving to local DTDs + + // + // Parse. The pipeline may get optimized a bit, so we + // can't always fail cleanly for validation without taking + // a look at the filter stages. + // + EventFilter.bind (producer, consumer); + producer.parse (input); + + try { + if (producer.getFeature ( + "http://org.xml/sax/features/validation")) + exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; + else if (fatalCount == 0) + exitStatus = 0; + } catch (SAXException e) { + if (hasValidator (consumer)) + exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0; + else if (fatalCount == 0) + exitStatus = 0; + } + + } catch (java.net.MalformedURLException e) { + System.err.println ("** Malformed URL: " + e.getMessage ()); + System.err.println ("Is '" + argv [0] + "' a non-existent file?"); + e.printStackTrace (); + // e.g. FNF + + } catch (SAXParseException e) { + if (e != fatal) { + System.err.print (printParseException ("Parsing Aborted", e)); + e.printStackTrace (); + if (e.getException () != null) { + System.err.println ("++ Wrapped exception:"); + e.getException ().printStackTrace (); + } + } + + } catch (SAXException e) { + Exception x = e; + if (e.getException () != null) + x = e.getException (); + x.printStackTrace (); + + } catch (Throwable t) { + t.printStackTrace (); + } + + System.exit (exitStatus); + } + + // returns true if saw a validator (before end or unrecognized node) + // false otherwise + private static boolean hasValidator (EventConsumer e) + { + if (e == null) + return false; + if (e instanceof ValidationConsumer) + return true; + if (e instanceof TeeConsumer) { + TeeConsumer t = (TeeConsumer) e; + return hasValidator (t.getFirst ()) + || hasValidator (t.getRest ()); + } + if (e instanceof WellFormednessFilter + || e instanceof NSFilter + ) + return hasValidator (((EventFilter)e).getNext ()); + + // else ... gee, we can't know. Assume not. + + return false; + } + + static class MyErrorHandler implements ErrorHandler + { + // dump validation errors, but continue + public void error (SAXParseException e) + throws SAXParseException + { + errorCount++; + System.err.print (printParseException ("Error", e)); + } + + public void warning (SAXParseException e) + throws SAXParseException + { + // System.err.print (printParseException ("Warning", e)); + } + + // try to continue fatal errors, in case a parser reports more + public void fatalError (SAXParseException e) + throws SAXParseException + { + fatalCount++; + if (fatal == null) + fatal = e; + System.err.print (printParseException ("Nonrecoverable Error", e)); + } + } + + static private String printParseException ( + String label, + SAXParseException e + ) { + StringBuffer buf = new StringBuffer (); + int temp; + + buf.append ("** "); + buf.append (label); + buf.append (": "); + buf.append (e.getMessage ()); + buf.append ('\n'); + if (e.getSystemId () != null) { + buf.append (" URI: "); + buf.append (e.getSystemId ()); + buf.append ('\n'); + } + if ((temp = e.getLineNumber ()) != -1) { + buf.append (" line: "); + buf.append (temp); + buf.append ('\n'); + } + if ((temp = e.getColumnNumber ()) != -1) { + buf.append (" char: "); + buf.append (temp); + buf.append ('\n'); + } + + return buf.toString (); + } +} diff --git a/libjava/gnu/xml/util/DomParser.java b/libjava/gnu/xml/util/DomParser.java new file mode 100644 index 0000000..0a75d54 --- /dev/null +++ b/libjava/gnu/xml/util/DomParser.java @@ -0,0 +1,804 @@ +/* DomParser.java -- + Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.util.Enumeration; +import java.util.Locale; + +import org.xml.sax.*; +import org.xml.sax.helpers.AttributesImpl; +import org.xml.sax.helpers.NamespaceSupport; +import org.xml.sax.ext.DeclHandler; +import org.xml.sax.ext.DefaultHandler2; +import org.xml.sax.ext.LexicalHandler; + +import org.w3c.dom.*; + + +/** + * This parser emits SAX2 parsing events as it traverses a DOM tree, using + * any conformant implementation of DOM. It exposes all SAX1 features, + * and the following SAX2 features and properties (as + * identified by standard URIs which are not fully provided here). Note + * that if a Level 1 DOM implementation is given, then this behaves as if + * namespaces were disabled, and namespace prefixes were enabled. </p> + * + * <table border="1" width='100%' cellpadding='3' cellspacing='0'> + * <tr bgcolor='#ccccff'> + * <th><font size='+1'>Name</font></th> + * <th><font size='+1'>Notes</font></th></tr> + * + * <tr><td colspan=2><center><em>Features ... URL prefix is + * <b>http://xml.org/sax/features/</b></em></center></td></tr> + * + * <tr><td>(URL)/external-general-entities</td> + * <td>false (does no parsing)</td></tr> + * <tr><td>(URL)/external-parameter-entities</td> + * <td>false (does no parsing)</td></tr> + * <tr><td>(URL)/namespaces</td> + * <td>Value is fixed at <em>true</em></td></tr> + * <tr><td>(URL)/namespace-prefixes</td> + * <td>Value is settable, defaulting to <em>false</em> + * (<code>xmlns</code> attributes hidden, and names aren't prefixed) + * </td></tr> + * <tr><td>(URL)/string-interning</td> + * <td>Value is fixed at <em>false</em> (DOM provides no + * guarantees as to interning)</td></tr> + * <tr><td>(URL)/validation</td> + * <td>false (does no parsing)</td></tr> + * <tr><td>(URL)/lexical-handler/parameter-entities</td> + * <td>false (DOM doesn't do parameter entities)</td></tr> + * + * <tr><td colspan=2><center><em>Properties ... URL prefix is + * <b>http://xml.org/sax/properties/</b></em></center></td></tr> + * + * + * <tr><td>(URL)/dom-node</td> + * <td>This property may be set before parsing to hold a DOM + * <em>Document</em> node; any arguments given to <em>parse</em> + * methods are ignored. When retrieved + * during a parse, this value contains the "current" DOM node. + * </td></tr> + * <tr><td>(URL)/declaration-handler</td> + * <td>A declaration handler may be provided. Declaration of external + * general entities is exposed, but not parameter entities; none of the + * entity names reported here will begin with "%". </td></tr> + * <tr><td>(URL)/lexical-handler</td> + * <td>A lexical handler may be provided. While the start and end of + * any external subset are reported, expansion of other parameter + * entities (e.g. inside attribute list declarations) is not exposed. + * Expansion of general entities within attributes is also not exposed + * (see below).</td></tr> + * </table> + * + * <P> The consequences of modifying a DOM document tree as it is being walked + * by this "parser" are unspecified; don't do it! </P> + * + * @author David Brownell + */ +final public class DomParser implements XMLReader +{ + // Stuff used internally to route events correctly + private DefaultHandler2 defaultHandler = new DefaultHandler2 (); + + // per-parse SAX stuff + private ContentHandler contentHandler = defaultHandler; + private DTDHandler dtdHandler = defaultHandler; + private DeclHandler declHandler = defaultHandler; + private LexicalHandler lexicalHandler = defaultHandler; + + // shared context + private ErrorHandler errHandler = defaultHandler; + private EntityResolver resolver = defaultHandler; + private Locale locale = Locale.getDefault (); + + // parser state + private Node start; + private Node current; + private boolean isL2; + private boolean showNamespaces = true; + private boolean showXML1_0 = false; + private NamespaceSupport prefixStack = new NamespaceSupport (); + private boolean isDocument; + + + /** + * Constructs an unitialized <b>SAX2</b> parser. + */ + public DomParser () { + } + + /** + * Constructs an <b>SAX2</b> parser initialized to traverse the specified + * DOM tree. If the node is a document, the startDocument() and + * endDocument() calls bracket the calls exposing children. + */ + public DomParser (Node node) { + setStart (node); + } + + + // stuff that most components in an application should be sharing: + // resolver and error locale. + + /** + * <b>SAX2</b>: Returns the object used when resolving external + * entities during parsing (both general and parameter entities). + */ + public EntityResolver getEntityResolver () + { + return resolver; + } + + /** + * <b>SAX1</b>: Provides an object which may be used when resolving external + * entities during parsing (both general and parameter entities). + */ + public void setEntityResolver (EntityResolver resolver) + { + if (resolver == null) + resolver = defaultHandler; + this.resolver = resolver; + } + + /** + * <b>SAX1</b>: Identifies the locale which the parser should use for the + * diagnostics it provides. + * + * @exception SAXException as defined in the specification for + * <em>org.xml.sax.Parser.setLocale()</em> + */ + public void setLocale (Locale locale) + throws SAXException + { + if (locale == null) + locale = Locale.getDefault (); + this.locale = locale; + } + + + // different modules will tend to handle error handling the same, + // but it may not be the same through the whole app + + /** + * <b>SAX2</b>: Returns the object used to receive callbacks for XML + * errors of all levels (fatal, nonfatal, warning). + */ + public ErrorHandler getErrorHandler () + { + return errHandler; + } + + /** + * <b>SAX1</b>: Provides an object which receives callbacks for XML errors + * of all levels (fatal, nonfatal, warning). + */ + public void setErrorHandler (ErrorHandler handler) + { + if (handler == null) + handler = defaultHandler; + errHandler = handler; + } + + + // stuff different parts of a module will handle differently + + /** + * <b>SAX2</b>: Returns the object used to report the logical + * content of an XML document. + */ + public ContentHandler getContentHandler () + { + return contentHandler; + } + + /** + * <b>SAX2</b>: Assigns the object used to report the logical + * content of an XML document. + */ + public void setContentHandler (ContentHandler handler) + { + if (handler == null) + handler = defaultHandler; + contentHandler = handler; + } + + /** + * <b>SAX2</b>: Returns the object used to process declarations related + * to notations and unparsed entities. + */ + public DTDHandler getDTDHandler () + { + return dtdHandler; + } + + /** + * <b>SAX1</b>: Provides an object which may be used to intercept + * declarations related to notations and unparsed entities. + */ + public void setDTDHandler (DTDHandler handler) + { + if (handler == null) + handler = defaultHandler; + dtdHandler = handler; + } + + + /** + * <b>SAX1</b>: Parses the previously provided DOM document (the + * input parameter is ignored). When this returns, that same + * document may be parsed again without needing a "reset". + * + * @param uri ignored (pass an empty string) + * @exception SAXException as defined in the specification for + * <em>org.xml.sax.Parser.parse()</em> + */ + public void parse (String uri) throws SAXException + { + parse (); + } + + /** + * <b>SAX1</b>: Parses the previously provided DOM document (the + * input parameter is ignored). When this returns, that same + * document may be parsed again without needing a "reset". + * + * @param input ignored + * @exception SAXException as defined in the specification for + * <em>org.xml.sax.Parser.parse()</em> + */ + public void parse (InputSource input) throws SAXException + { + parse (); + } + + private void parse () throws SAXException + { + try { + walk (); + } finally { + if (isDocument) + contentHandler.endDocument (); + current = null; + prefixStack.reset (); + } + } + + private boolean getIsL2 (Node node) + { + DOMImplementation impl; + Document doc; + + if (node instanceof Document) + doc = (Document) node; + else + doc = node.getOwnerDocument (); + if (doc == null) + throw new RuntimeException ("? unowned node - L2 DTD ?"); + impl = doc.getImplementation (); + return impl.hasFeature ("XML", "2.0"); + } + + + private static final String FEATURES = "http://xml.org/sax/features/"; + private static final String HANDLERS = "http://xml.org/sax/properties/"; + + /** + * <b>SAX2</b>: Tells whether this parser supports the specified feature. + */ + public boolean getFeature (String name) + throws SAXNotRecognizedException, SAXNotSupportedException + { + // basically, none are relevant -- they relate more to + // parsing than to walking a "parse tree". + + // FIXME: DOM feature to expose interning? + + if ((FEATURES + "validation").equals (name) + || (FEATURES + "external-general-entities") + .equals (name) + || (FEATURES + "external-parameter-entities") + .equals (name) + || (FEATURES + "string-interning").equals (name) + ) + return false; + + if ((FEATURES + "namespaces").equals (name)) + return showNamespaces; + if ((FEATURES + "namespace-prefixes").equals (name)) + return showXML1_0; + + throw new SAXNotRecognizedException (name); + } + + /** + * <b>SAX2</b>: Returns the specified property. At this time only + * the declaration and lexical handlers, and current the "DOM" node, + * are supported. + */ + public Object getProperty (String name) + throws SAXNotRecognizedException, SAXNotSupportedException + { + if ((HANDLERS + "declaration-handler").equals (name)) + return declHandler == defaultHandler ? null : declHandler; + if ((HANDLERS + "lexical-handler").equals (name)) + return lexicalHandler == defaultHandler ? null : lexicalHandler; + + if ((HANDLERS + "dom-node").equals (name)) + return current; + + // unknown properties + throw new SAXNotRecognizedException (name); + } + + /** + * <b>SAX2</b>: Sets the state of features supported in this parser. + * Only the namespace support features are mutable. + */ + public void setFeature (String name, boolean state) + throws SAXNotRecognizedException, SAXNotSupportedException + { + if (current != null) + throw new IllegalStateException ("feature change midparse"); + + boolean value = getFeature (name); + + if (value == state) + return; + + if ((FEATURES + "namespaces").equals (name)) { + if (!showXML1_0 && state == false) + throw new SAXNotSupportedException ("Illegal namespace " + + "processing configuration"); + showNamespaces = state; + return; + } + if ((FEATURES + "namespace-prefixes").equals (name)) { + if (!showNamespaces && state == false) + throw new SAXNotSupportedException ("Illegal namespace " + + "processing configuration"); + showXML1_0 = state; + return; + } + + throw new SAXNotSupportedException (name); + } + + /** + * <b>SAX2</b>: Assigns the specified property. At this time only + * declaration and lexical handlers, and the initial DOM document, are + * supported. These must not be changed to values of the wrong type. + * Like SAX1 handlers, these handlers may be changed at any time. + * Like SAX1 input source or document URI, the initial DOM document + * may not be changed during a parse. + */ + public void setProperty (String name, Object state) + throws SAXNotRecognizedException, SAXNotSupportedException + { + if ((HANDLERS + "declaration-handler").equals (name)) { + if (!(state instanceof DeclHandler || state == null)) + throw new SAXNotSupportedException (name); + declHandler = (DeclHandler) state; + return; + } + + if ((HANDLERS + "lexical-handler").equals (name)) { + if (!(state instanceof LexicalHandler || state == null)) + throw new SAXNotSupportedException (name); + lexicalHandler = (LexicalHandler) state; + return; + } + + if ((HANDLERS + "dom-node").equals (name)) { + if (state == null || state instanceof Node) { + if (current != null) + throw new SAXNotSupportedException ( + "property is readonly during parse: " + name); + setStart ((Node) state); + return; + } + throw new SAXNotSupportedException ("not a DOM Node"); + } + + // unknown properties + throw new SAXNotRecognizedException (name); + } + + private void setStart (Node property) + { + start = property; + if (start != null) { + isL2 = getIsL2 (start); + isDocument = (start instanceof Document); + } + } + + // + // Non-recursive walk, using DOM state when backtracking is needed + // + private void walk () + throws SAXException + { + int type; + NamedNodeMap nodes; + int length; + AttributesImpl attrs = new AttributesImpl (); + char chars []; + String ns, local; + + synchronized (this) { + if (current != null) + throw new IllegalStateException ("already walking tree"); + + // JVM guarantees assignments are atomic; so no other + // thread could get this far till this walk's done. + current = start; + } + + for (;;) { + type = current.getNodeType (); + + // + // First, visit the current node, including any "start" calls + // + switch (type) { + + case Node.DOCUMENT_NODE: + contentHandler.startDocument (); + break; + + case Node.ELEMENT_NODE: + nodes = current.getAttributes (); + length = nodes.getLength (); + prefixStack.pushContext (); + for (int i = 0; i < length; i++) { + Attr attr = (Attr) nodes.item (i); + String name = attr.getNodeName (); + + if (showNamespaces && name.startsWith ("xmlns")) { + String prefix; + String uri; + + // NOTE: DOM L2 (CR2+ and REC) violate the + // Namespaces REC, treat "xmlns" like a strange + // attribute instead of a magic token + if ("xmlns".equals (name)) + prefix = ""; + else + prefix = name.substring (6); + uri = attr.getNodeValue (); + + prefixStack.declarePrefix (prefix, uri); + contentHandler.startPrefixMapping (prefix, uri); + + if (!showXML1_0) + continue; + } + + // + // NOTE: DOM doesn't record the attribute type info + // which SAX exposes; so this always reports CDATA. + // + // NOTE: SAX doesn't expose the isSpecified info which + // DOM exposes; that's discarded here. Similarly with + // the information DOM hides inside itself about what + // the default values for an attribute are. + // + if (showNamespaces) { + if (isL2) { + if ((ns = attr.getNamespaceURI ()) == null) + ns = ""; + // Note: SAX2 and DOM handle "local" names + // differently + if ((local = attr.getLocalName ()) == null) + local = name; + } else { +// XXX + throw new RuntimeException ( + "NYI, ns lookup when parsing L1 DOM"); + } + } else + ns = local = ""; + attrs.addAttribute (ns, local, name, + "CDATA", attr.getNodeValue ()); + } + if (showNamespaces) { + if (isL2) { + if ((ns = current.getNamespaceURI ()) == null) + ns = ""; + // Note: SAX2 and DOM handle "local" names differently + if ((local = current.getLocalName ()) == null) + local = current.getNodeName (); + } else { +// XXX + throw new RuntimeException ( + "NYI, ns lookup when parsing L1 DOM"); + } + } else + ns = local = ""; + contentHandler.startElement (ns, local, + current.getNodeName (), attrs); + if (length != 0) + attrs.clear (); + break; + + case Node.CDATA_SECTION_NODE: + lexicalHandler.startCDATA (); + chars = current.getNodeValue ().toCharArray (); + contentHandler.characters (chars, 0, chars.length); + lexicalHandler.endCDATA (); + break; + + case Node.COMMENT_NODE: + chars = current.getNodeValue ().toCharArray (); + lexicalHandler.comment (chars, 0, chars.length); + break; + + case Node.DOCUMENT_TYPE_NODE: + { + DocumentType doctype = (DocumentType) current; + + // + // Only DOM L2 supports recreating even some DTDs in full. + // + if (isL2) { + lexicalHandler.startDTD (doctype.getName (), + doctype.getPublicId (), + doctype.getSystemId ()); + } else + lexicalHandler.startDTD (doctype.getName (), + null, null); + + // + // The only sure way to recreate is to provide both the + // internal and external subsets. Otherwise, only part + // of the job can be done ... because from the DTD, DOM + // discards both the critical data, like the attribute and + // element declarations, as well as the PIs and comments + // that are used to hold their documentation. + // + // Even the entity and notation declarations that it can + // expose can't be recorded without proprietary extensions. + // + // We construct a comment to tell what we know about how + // (in)complete this particular really DTD is. + // + { + String message; + char buf []; + + // + // Though DOM L2 lets the whole doctype be recreated, + // SAX2 can't represent it (input or output). + // So this will be the typical case. + // + if (isL2 && doctype.getInternalSubset () != null) + message = + " Full DTD known; can't be shown using SAX2. "; + + // + // Otherwise, we'll concoct a partial DTD. If there's + // any more data here at all, it was provided using a + // (proprietary) extension to DOM. + // + else + message = + " This DTD was was recreated using incomplete DOM L2 records. "; + + buf = message.toCharArray (); + lexicalHandler.comment (buf, 0, buf.length); + } + + // report notations first + nodes = doctype.getNotations (); + length = nodes.getLength (); + for (int i = 0; i < length; i++) { + Notation notation = (Notation) nodes.item (i); + dtdHandler.notationDecl ( + notation.getNodeName (), + notation.getPublicId (), + notation.getSystemId ()); + } + + // then parsed and unparsed external general entities + nodes = doctype.getEntities (); + length = nodes.getLength (); + for (int i = 0; i < length; i++) { + Entity entity = (Entity) nodes.item (i); + String notation = entity.getNotationName (); + + if (notation != null) + dtdHandler.unparsedEntityDecl ( + entity.getNodeName (), + entity.getPublicId (), + entity.getSystemId (), + notation); + else if (entity.getSystemId () != null) + declHandler.externalEntityDecl ( + entity.getNodeName (), + entity.getPublicId (), + entity.getSystemId ()); + + // + // NOTE: DOM doesn't clearly provide internal + // entity support; but in case someone tries to + // fudge such support, we defend ourselves above. + // + // NOTE: DOM doesn't expose parameter entities + // (thank you thank you thank you thank you) + // + } + + // + // NOTE: DOM (levels 1 and 2) doesn't expose real + // typing information (element or attribute decls), + // as exposed by SAX2 declaration handlers. + // + lexicalHandler.endDTD (); + } + break; + + case Node.ENTITY_REFERENCE_NODE: + // this isn't done except (a) in content, and + // (b) not within a start tag (att value) + lexicalHandler.startEntity (current.getNodeName ()); + break; + + case Node.PROCESSING_INSTRUCTION_NODE: + contentHandler.processingInstruction ( + current.getNodeName (), current.getNodeValue ()); + break; + + case Node.TEXT_NODE: + chars = current.getNodeValue ().toCharArray (); + contentHandler.characters (chars, 0, chars.length); + break; + + default: + // e.g. fragments, entities, notations, attributes + throw new SAXException ("Illegal DOM Node type in Document: " + + current.getNodeType ()); + } + + // + // Then, pick the next node to visit. If the next node isn't + // a child, an "end" call may be needed before moving on. + // If there's no next node, we're done. + // + Node next; + + switch (type) { + case Node.DOCUMENT_NODE: + case Node.ELEMENT_NODE: + case Node.ENTITY_REFERENCE_NODE: + // + // For elements that can have children, visit those + // children before any siblings (i.e. depth first) + // and after visiting this node (i.e. preorder) + // + next = current.getFirstChild (); + if (next != null) { + current = next; + break; + } + // + // Else treat this like other childless nodes, but + // handle this node's "end" immediately. + // + callEnd (current); + + // FALLTHROUGH + + case Node.CDATA_SECTION_NODE: + case Node.COMMENT_NODE: + case Node.DOCUMENT_TYPE_NODE: + case Node.ENTITY_NODE: + case Node.PROCESSING_INSTRUCTION_NODE: + case Node.TEXT_NODE: + // + // Use next sibling, if there is one. + // Else, climb up a level (calling "end") + // until we find an ancestral sibling + // or until we we climb off the top (FINISH) + // + for (;;) { + if ((next = current.getNextSibling ()) != null) + break; + current = current.getParentNode (); + if (current == null || current == start) + return; + callEnd (current); + } + current = next; + break; + + default: + throw new SAXException ( + "Illegal DOM Node type found: " + current.getNodeType ()); + } + } + } + + private void callEnd (Node node) throws SAXException + { + switch (node.getNodeType ()) { + // only these three container types may ever be found + // directly inside a Document. + case Node.DOCUMENT_NODE: + // for SAX conformance, endDocument must always + // be called ... it's done in a "finally" clause) + return; + + case Node.ELEMENT_NODE: + if (showNamespaces) { + if (isL2) + contentHandler.endElement ( + node.getNamespaceURI (), + node.getLocalName (), + node.getNodeName ()); + else +// XXX + throw new RuntimeException ( + "NYI, ns lookup when parsing L1 DOM"); + for (Enumeration e = prefixStack.getDeclaredPrefixes (); + e.hasMoreElements (); + ) { + contentHandler.endPrefixMapping ((String) e.nextElement ()); + } + } else + contentHandler.endElement ("", "", node.getNodeName ()); + prefixStack.popContext (); + return; + + case Node.ENTITY_REFERENCE_NODE: + // see above -- in content, outside start tags. + lexicalHandler.endEntity (node.getNodeName ()); + return; + + // these can be given at the top level + case Node.DOCUMENT_FRAGMENT_NODE: + case Node.ATTRIBUTE_NODE: + return; + + default: + throw new SAXException ( + "Illegal DOM container type found: " + + current.getNodeType ()); + } + } +} diff --git a/libjava/gnu/xml/util/Resolver.java b/libjava/gnu/xml/util/Resolver.java new file mode 100644 index 0000000..4ca0787 --- /dev/null +++ b/libjava/gnu/xml/util/Resolver.java @@ -0,0 +1,263 @@ +/* Resolver.java -- + Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.io.*; + +import java.util.Dictionary; +import java.util.Hashtable; + +import org.xml.sax.*; + + +/** + * Utility implementation of a SAX resolver, which can be used to improve + * network utilization of SAX based XML components. It does this by + * supporting local caches of external entities. + * SAX parsers <em>should</em> use such local caches when possible. + * + * @see XCat + */ +public class Resolver implements EntityResolver, Cloneable +{ + /** + * Updates a dictionary used to map PUBLIC identifiers to file names, + * so that it uses the mappings in a specified directory. + * + * @param mappings Array of string pairs, where the first member + * of each pair is a PUBLIC identifier and the second is the + * name of a file, relative to the specified directory. + * @param directory File holding the specified files. + */ + public static void addDirectoryMapping ( + Dictionary table, + String mappings [][], + File directory + ) throws IOException + { + for (int i = 0; i < mappings.length; i++) { + File file = new File (directory, mappings [i][1]); + String temp; + + if (!file.exists ()) // ?? log a warning ?? + continue; + + temp = fileToURL (file); + table.put (mappings [i][0], temp); + } + } + + // FIXME: these *URL routines don't quite belong here, except + // that they're all in the same spirit of making it easy to + // use local filesystem URIs with XML parsers. + + /** + * Provides the URL for a named file, without relying on the JDK 1.2 + * {@link java.io.File#toURL File.toURL}() utility method. + * + * @param filename the file name to convert. Relative file names + * are resolved the way the JVM resolves them (current to the + * process-global current working directory). + * + * @exception IOException if the file does not exist + */ + public static String fileNameToURL (String filename) + throws IOException + { + return fileToURL (new File (filename)); + } + + /** + * Provides the URL for a file, without relying on the JDK 1.2 + * {@link java.io.File#toURL File.toURL}() utility method. + * + * @param f the file to convert. Relative file names + * are resolved the way the JVM resolves them (current to the + * process-global current working directory). + * + * @exception IOException if the file does not exist + */ + public static String fileToURL (File f) + throws IOException + { + String temp; + + // NOTE: the javax.xml.parsers.DocumentBuilder and + // javax.xml.transform.stream.StreamSource versions + // of this don't have this test. Some JVM versions + // don't report this error sanely through URL code. + if (!f.exists ()) + throw new IOException ("no such file: " + f.getName ()); + + // FIXME: getAbsolutePath() seems buggy; I'm seeing components + // like "/foo/../" which are clearly not "absolute" + // and should have been resolved with the filesystem. + + // Substituting "/" would be wrong, "foo" may have been + // symlinked ... the URL code will make that change + // later, so that things can get _really_ broken! + + temp = f.getAbsolutePath (); + + if (File.separatorChar != '/') + temp = temp.replace (File.separatorChar, '/'); + if (!temp.startsWith ("/")) + temp = "/" + temp; + if (!temp.endsWith ("/") && f.isDirectory ()) + temp = temp + "/"; + return "file:" + temp; + } + + + /** + * Returns a URL string. Note that if a malformed URL is provided, or + * the parameter names a nonexistent file, the resulting URL may be + * malformed. + * + * @param fileOrURL If this is the name of a file which exists, + * then its URL is returned. Otherwise the argument is returned. + */ + public static String getURL (String fileOrURL) + { + try { + return fileNameToURL (fileOrURL); + } catch (Exception e) { + return fileOrURL; + } + } + + + + // note: cloneable, this is just copied; unguarded against mods + private Dictionary pubidMapping; + + /** + * Constructs a resolver which understands how to map PUBLIC identifiers + * to other URIs, typically for local copies of standard DTD components. + * + * @param dictionary maps PUBLIC identifiers to URIs. This is not + * copied; subsequent modifications will be reported through the + * resolution operations. + */ + public Resolver (Dictionary dict) + { pubidMapping = dict; } + + + // FIXME: want notion of a "system default" resolver, presumably + // loaded with all sorts of useful stuff. At the same time need + // a notion of resolver chaining (failure --> next) so that subsystems + // can set up things that won't interfere with other ones. + + /** + * This parses most MIME content type strings that have <em>charset=...</em> + * encoding declarations to and returns the specified encoding. This + * conforms to RFC 3023, and is useful when constructing InputSource + * objects from URLConnection objects or other objects using MIME + * content typing. + * + * @param contentType the MIME content type that will be parsed; must + * not be null. + * @return the appropriate encoding, or null if the content type is + * not text and there's no <code>charset=...</code> attribute + */ + static public String getEncoding (String contentType) + { + // currently a dumb parsing algorithm that works "mostly" and handles + // ..anything...charset=ABC + // ..anything...charset=ABC;otherAttr=DEF + // ..anything...charset=ABC (comment);otherAttr=DEF + // ..anything...charset= "ABC" (comment);otherAttr=DEF + + int temp; + String encoding; + String defValue = null; + + if (contentType.startsWith ("text/")) + defValue = contentType.startsWith ("text/html") + ? "ISO-8859-1" : "US-ASCII"; + + // Assumes 'charset' is only an attribute name, not part + // of a value, comment, or other attribute name + // ALSO assumes no escaped values like "\;" or "\)" + if ((temp = contentType.indexOf ("charset")) != -1) { + // strip out everything up to '=' ... + temp = contentType.indexOf ('=', temp); + if (temp == -1) + return defValue; + encoding = contentType.substring (temp + 1); + // ... and any subsequent attributes + if ((temp = encoding.indexOf (';')) != -1) + encoding = encoding.substring (0, temp); + // ... and any comments after value + if ((temp = encoding.indexOf ('(')) != -1) + encoding = encoding.substring (0, temp); + // ... then whitespace, and any (double) quotes + encoding = encoding.trim (); + if (encoding.charAt (0) == '"') + encoding = encoding.substring (1, encoding.length () - 1); + } else + encoding = defValue; + return encoding; + } + + + /** + * Uses a local dictionary of public identifiers to resolve URIs, + * normally with the goal of minimizing network traffic or latencies. + */ + public InputSource resolveEntity (String publicId, String systemId) + throws IOException, SAXException + { + InputSource retval = null; + String uri; + + if (publicId != null + && ((uri = (String) pubidMapping.get (publicId)) != null)) { + retval = new InputSource (uri); + retval.setPublicId (publicId); + } + + // Could do URN resolution here + + // URL resolution always done by parser + + // FIXME: chain to "next" resolver + + return retval; + } +} diff --git a/libjava/gnu/xml/util/SAXNullTransformerFactory.java b/libjava/gnu/xml/util/SAXNullTransformerFactory.java new file mode 100644 index 0000000..3b199f6 --- /dev/null +++ b/libjava/gnu/xml/util/SAXNullTransformerFactory.java @@ -0,0 +1,675 @@ +/* SAXNullTransformerFactory.java -- + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.URL; +import java.net.URLConnection; +import java.util.Hashtable; +import java.util.Properties; + +import gnu.xml.dom.Consumer; +import gnu.xml.dom.DomDocument; +import gnu.xml.pipeline.DomConsumer; +import gnu.xml.pipeline.EventFilter; + +import javax.xml.transform.*; +import javax.xml.transform.dom.*; +import javax.xml.transform.sax.*; +import javax.xml.transform.stream.*; + +import org.xml.sax.*; +import org.xml.sax.helpers.XMLReaderFactory; +import org.xml.sax.helpers.LocatorImpl; + + +/** + * Implements null transforms. XSLT stylesheets are not supported. + * This class provides a way to translate three representations of + * XML data (SAX event stream, DOM tree, and XML text) into each other. + * In essence it's a thinnish wrapper around basic SAX event + * <a href="../pipeline/package-summary.html">pipeline</a> facilities, which + * exposes only limited functionality. The <em>javax.xml.transform</em> + * functionality is implemented as follows: <ul> + * + * <li>The {@link javax.xml.transform.sax.SAXSource SAXSource} class + * just wraps an {@link XMLReader} and {@link InputSource}, while the + * {@link javax.xml.transform.sax.SAXResult SAXResult} class is less + * functional than a {@link gnu.xml.pipeline.EventConsumer EventConsumer}. + * (Notably, it drops all but one declaration from any DTD.)</li> + * + * <li>The {@link javax.xml.transform.dom.DOMSource DOMSource} class + * corresponds to special SAX parsers like {@link DomParser}, and the + * {@link javax.xml.transform.dom.DOMResult DOMResult} class corresponds + * to a {@link gnu.xml.pipeline.DomConsumer DomConsumer}.</li> + * + * <li>The {@link javax.xml.transform.stream.StreamSource StreamSource} + * class corresponds to a SAX {@link InputSource}, and the + * {@link javax.xml.transform.stream.StreamResult StreamResult} class + * corresponds to a {@link gnu.xml.pipeline.TextConsumer TextConsumer}.</li> + * + * </ul> + * + * <p><em>This implementation is preliminary.</em> + * + * @see gnu.xml.pipeline.XsltFilter + * + * @author David Brownell + */ +public class SAXNullTransformerFactory extends SAXTransformerFactory +{ + + private ErrorListener errListener; + private URIResolver uriResolver; + + /** Default constructor */ + public SAXNullTransformerFactory () { } + + // + // only has stuff that makes sense with null transforms + // + + /** + * Returns true if the requested feature is supported. + * All three kinds of input and output are accepted: + * XML text, SAX events, and DOM nodes. + */ + public boolean getFeature (String feature) + { + return SAXTransformerFactory.FEATURE.equals (feature) + || SAXResult.FEATURE.equals (feature) + || SAXSource.FEATURE.equals (feature) + || DOMResult.FEATURE.equals (feature) + || DOMSource.FEATURE.equals (feature) + || StreamResult.FEATURE.equals (feature) + || StreamSource.FEATURE.equals (feature) + ; + } + + public void setFeature(String name, boolean value) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException(name); + } + + + /** Throws an exception (no implementation attributes are supported) */ + public void setAttribute (String key, Object value) + { + throw new IllegalArgumentException (); + } + + /** Throws an exception (no implementation attributes are supported) */ + public Object getAttribute (String key) + { + throw new IllegalArgumentException (); + } + + /** (not yet implemented) */ + public Source getAssociatedStylesheet (Source source, + String media, + String title, + String charset) + throws TransformerConfigurationException + { + // parse, and find the appropriate xsl-stylesheet PI contents + throw new IllegalArgumentException (); + } + + public Transformer newTransformer () + throws TransformerConfigurationException + { + return new NullTransformer (); + } + + /** + * Returns a TransformerHandler that knows how to generate output + * in all three standard formats. Output text is generated using + * {@link XMLWriter}, and the GNU implementation of + * {@link DomDocument DOM} is used. + * + * @see SAXResult + * @see StreamResult + * @see DOMResult + */ + public TransformerHandler newTransformerHandler () + throws TransformerConfigurationException + { + NullTransformer transformer = new NullTransformer (); + return transformer.handler; + } + + // + // Stuff that depends on XSLT support, which we don't provide + // + private static final String noXSLT = "No XSLT support"; + + /** Throws an exception (XSLT is not supported). */ + public Transformer newTransformer (Source stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public Templates newTemplates (Source stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public TemplatesHandler newTemplatesHandler () + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public TransformerHandler newTransformerHandler (Source stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public TransformerHandler newTransformerHandler (Templates stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public XMLFilter newXMLFilter (Source stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Throws an exception (XSLT is not supported). */ + public XMLFilter newXMLFilter (Templates stylesheet) + throws TransformerConfigurationException + { + throw new TransformerConfigurationException (noXSLT); + } + + /** Returns the value assigned by {@link #setErrorListener}. */ + public ErrorListener getErrorListener () + { + return errListener; + } + + /** Assigns a value that would be used when parsing stylesheets */ + public void setErrorListener (ErrorListener e) + { + errListener = e; + } + + /** Returns the value assigned by {@link #setURIResolver}. */ + public URIResolver getURIResolver () + { + return uriResolver; + } + + /** Assigns a value that would be used when parsing stylesheets */ + public void setURIResolver (URIResolver u) + { + uriResolver = u; + } + + + // + // Helper classes. These might in theory be subclassed + // by an XSLT implementation, if they were exported. + // + + static class DomTerminus + extends DomConsumer + { + + DomTerminus (DOMResult result) + throws SAXException + { + // won't really throw SAXException + super (DomDocument.class); + setHandler (new DomHandler (this, result)); + } + + } + + static class DomHandler + extends Consumer.Backdoor + { + + private DOMResult result; + + DomHandler (DomConsumer c, DOMResult r) + throws SAXException + { + // won't really throw SAXException + super (c); + result = r; + } + + public void endDocument () + throws SAXException + { + super.endDocument (); + result.setNode (getDocument ()); + } + + } + + private static OutputStream getOutputStream (String uri) + throws IOException + { + // JDK stupidity: file "protocol does not support output" ... + if (uri.startsWith ("file:")) + return new FileOutputStream (uri.substring (5)); + + // Otherwise ... + URL url = new URL (uri); + URLConnection conn = url.openConnection (); + + conn.setDoOutput (true); + return conn.getOutputStream (); + } + + + static class NullHandler + extends EventFilter + implements TransformerHandler + { + + private String systemId; + private Transformer transformer; + + NullHandler (Transformer t) + { + transformer = t; + } + + public Transformer getTransformer () + { + return transformer; + } + + public String getSystemId () + { + return systemId; + } + + public void setSystemId (String id) + { + systemId = id; + } + + public void setResult (Result result) + { + if (result.getSystemId () != null) + systemId = result.getSystemId (); + + try + { + + // output to partial SAX event stream? + if (result instanceof SAXResult) + { + SAXResult r = (SAXResult) result; + + setContentHandler (r.getHandler ()); + setProperty (LEXICAL_HANDLER, r.getLexicalHandler ()); + // DTD info is filtered out by javax.transform + + // output to DOM tree? + } + else if (result instanceof DOMResult) + { + DomTerminus out = new DomTerminus ((DOMResult) result); + + setContentHandler (out.getContentHandler ()); + setProperty (LEXICAL_HANDLER, + out.getProperty (LEXICAL_HANDLER)); + // save DTD-derived info, if any. + setDTDHandler (out.getDTDHandler ()); + setProperty (DECL_HANDLER, + out.getProperty (DECL_HANDLER)); + + // node is saved into result on endDocument() + + // output to (XML) text? + } + else if (result instanceof StreamResult) + { + StreamResult r = (StreamResult) result; + XMLWriter out; + + // FIXME: when do output properties take effect? + // encoding, standalone decl, xml/xhtml/... ... + + // FIXME: maybe put nsfix filter up front + + try + { + if (r.getWriter () != null) + out = new XMLWriter (r.getWriter ()); + else if (r.getOutputStream () != null) + out = new XMLWriter (r.getOutputStream ()); + else if (r.getSystemId () != null) + out = new XMLWriter ( + getOutputStream (r.getSystemId ())); + else + throw new IllegalArgumentException ( + "bad StreamResult"); + } + catch (IOException e) + { + e.printStackTrace (); + // on jdk 1.4, pass the root cause ... + throw new IllegalArgumentException (e.getMessage ()); + } + + // out.setExpandingEntities (true); + // out.setPrettyPrinting (true); + // out.setXhtml (true); + + setContentHandler (out); + setProperty (LEXICAL_HANDLER, out); + // save DTD info, if any; why not? + setDTDHandler (out); + setProperty (DECL_HANDLER, out); + } + + } + catch (SAXException e) + { + // SAXNotSupportedException or SAXNotRecognizedException: + // "can't happen" ... but SAXException for DOM build probs + // could happen, so ... + // on jdk 1.4, pass the root cause ... + throw new IllegalArgumentException (e.getMessage ()); + } + } + } + + // an interface that adds no value + static class LocatorAdapter + extends LocatorImpl + implements SourceLocator + { + + LocatorAdapter (SAXParseException e) + { + setSystemId (e.getSystemId ()); + setPublicId (e.getPublicId ()); + setLineNumber (e.getLineNumber ()); + setColumnNumber (e.getColumnNumber ()); + } + + } + + // another interface that adds no value + static class ListenerAdapter + implements ErrorHandler + { + + NullTransformer transformer; + + ListenerAdapter (NullTransformer t) + { + transformer = t; + } + + private TransformerException map (SAXParseException e) + { + return new TransformerException ( + e.getMessage (), + new LocatorAdapter (e), + e); + } + + public void error (SAXParseException e) + throws SAXParseException + { + try + { + if (transformer.errListener != null) + transformer.errListener.error (map (e)); + } + catch (TransformerException ex) + { + transformer.ex = ex; + throw e; + } + } + + public void fatalError (SAXParseException e) + throws SAXParseException + { + try + { + if (transformer.errListener != null) + transformer.errListener.fatalError (map (e)); + else + throw map (e); + } catch (TransformerException ex) { + transformer.ex = ex; + throw e; + } + } + + public void warning (SAXParseException e) + throws SAXParseException + { + try + { + if (transformer.errListener != null) + transformer.errListener.warning (map (e)); + } + catch (TransformerException ex) + { + transformer.ex = ex; + throw e; + } + } + } + + static class NullTransformer + extends Transformer + { + + private URIResolver uriResolver; + private Properties props = new Properties (); + private Hashtable params = new Hashtable (7); + + ErrorListener errListener = null; + TransformerException ex = null; + NullHandler handler; + + NullTransformer () + { + super (); + handler = new NullHandler (this); + } + + public ErrorListener getErrorListener () + { + return errListener; + } + + public void setErrorListener (ErrorListener e) + { + errListener = e; + } + + public URIResolver getURIResolver () + { + return uriResolver; + } + + public void setURIResolver (URIResolver u) + { + uriResolver = u; + } + + public void setOutputProperties (Properties p) + { + props = (Properties) p.clone (); + } + + public Properties getOutputProperties () + { + return (Properties) props.clone (); + } + + public void setOutputProperty (String name, String value) + { + props.setProperty (name, value); + } + + public String getOutputProperty (String name) + { + return props.getProperty (name); + } + + public void clearParameters () + { + params.clear (); + } + + public void setParameter (String name, Object value) + { + props.put (name, value); + } + + public Object getParameter (String name) + { + return props.get (name); + } + + public void transform (Source in, Result out) + throws TransformerException + { + try + { + XMLReader producer; + InputSource input; + + // Input from DOM? + if (in instanceof DOMSource) + { + DOMSource source = (DOMSource) in; + + if (source.getNode () == null) + throw new IllegalArgumentException ("no DOM node"); + producer = new DomParser (source.getNode ()); + input = null; + + // Input from SAX? + } + else if (in instanceof SAXSource) + { + SAXSource source = (SAXSource) in; + + producer = source.getXMLReader (); + if (producer == null) + producer = XMLReaderFactory.createXMLReader (); + + input = source.getInputSource (); + if (input == null) + { + if (source.getSystemId () != null) + input = new InputSource (source.getSystemId ()); + else + throw new IllegalArgumentException ( + "missing SAX input"); + } + + // Input from a stream or something? + } + else + { + producer = XMLReaderFactory.createXMLReader (); + input = SAXSource.sourceToInputSource (in); + if (input == null) + throw new IllegalArgumentException ("missing input"); + } + + // preserve original namespace prefixes + try + { + producer.setFeature(handler.FEATURE_URI + "namespace-prefixes", + true); + } + catch (Exception e) + { + /* ignore */ + // FIXME if we couldn't, "NsFix" stage before the output .. + } + + // arrange the output + handler.setResult (out); + handler.bind (producer, handler); + + // then parse ... single element pipeline + producer.parse (input); + + } + catch (IOException e) + { + throw new TransformerException ("transform failed", e); + + } + catch (SAXException e) + { + if (ex == null && ex.getCause () == e) + throw ex; + else + throw new TransformerException ("transform failed", e); + + } + finally + { + ex = null; + } + } + } + +} diff --git a/libjava/gnu/xml/util/XCat.java b/libjava/gnu/xml/util/XCat.java new file mode 100644 index 0000000..269c97b --- /dev/null +++ b/libjava/gnu/xml/util/XCat.java @@ -0,0 +1,1609 @@ +/* XCat.java -- + Copyright (C) 2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + + +package gnu.xml.util; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.URL; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.StringTokenizer; +import java.util.Stack; +import java.util.Vector; + +import org.xml.sax.Attributes; +import org.xml.sax.ErrorHandler; +import org.xml.sax.InputSource; +import org.xml.sax.Locator; +import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXParseException; +import org.xml.sax.XMLReader; + +import org.xml.sax.ext.DefaultHandler2; +import org.xml.sax.ext.EntityResolver2; + +import org.xml.sax.helpers.XMLReaderFactory; + +/** + * Packages <a href= + "http://www.oasis-open.org/committees/entity/spec-2001-08-06.html" + >OASIS XML Catalogs</a>, + * primarily for entity resolution by parsers. + * That specification defines an XML syntax for mappings between + * identifiers declared in DTDs (particularly PUBLIC identifiers) and + * locations. SAX has always supported such mappings, but conventions for + * an XML file syntax to maintain them have previously been lacking. + * + * <p> This has three main operational modes. The primary intended mode is + * to create a resolver, then preloading it with one or more site-standard + * catalogs before using it with one or more SAX parsers: <pre> + * XCat catalog = new XCat (); + * catalog.setErrorHandler (diagnosticErrorHandler); + * catalog.loadCatalog ("file:/local/catalogs/catalog.cat"); + * catalog.loadCatalog ("http://shared/catalog.cat"); + * ... + * catalog.disableLoading (); + * parser1.setEntityResolver (catalog); + * parser2.setEntityResolver (catalog); + * ...</pre> + * + * <p>A second mode is to arrange that your application uses instances of + * this class as its entity resolver, and automatically loads catalogs + * referenced by <em><?oasis-xml-catalog...?></em> processing + * instructions found before the DTD in documents it parses. + * It would then discard the resolver after each parse. + * + * <p> A third mode applies catalogs in contexts other than entity + * resolution for parsers. + * The {@link #resolveURI resolveURI()} method supports resolving URIs + * stored in XML application data, rather than inside DTDs. + * Catalogs would be loaded as shown above, and the catalog could + * be used concurrently for parser entity resolution and for + * application URI resolution. + * </p> + * + * <center><hr width='70%'></center> + * + * <p>Errors in catalogs implicitly loaded (during resolution) are ignored + * beyond being reported through any <em>ErrorHandler</em> assigned using + * {@link #setErrorHandler setErrorHandler()}. SAX exceptions + * thrown from such a handler won't abort resolution, although throwing a + * <em>RuntimeException</em> or <em>Error</em> will normally abort both + * resolution and parsing. Useful diagnostic information is available to + * any <em>ErrorHandler</em> used to report problems, or from any exception + * thrown from an explicit {@link #loadCatalog loadCatalog()} invocation. + * Applications can use that information as troubleshooting aids. + * + * <p>While this class requires <em>SAX2 Extensions 1.1</em> classes in + * its class path, basic functionality does not require using a SAX2 + * parser that supports the extended entity resolution functionality. + * See the original SAX1 + * {@link #resolveEntity(java.lang.String,java.lang.String) resolveEntity()} + * method for a list of restrictions which apply when it is used with + * older SAX parsers. + * + * @see EntityResolver2 + * + * @author David Brownell + */ +public class XCat implements EntityResolver2 +{ + private Catalog catalogs []; + private boolean usingPublic = true; + private boolean loadingPermitted = true; + private boolean unified = true; + private String parserClass; + private ErrorHandler errorHandler; + + // private EntityResolver next; // chain to next if we fail... + + // + // NOTE: This is a straightforward implementation, and if + // there are lots of "nextCatalog" or "delegate*" entries + // in use, two tweaks would be worth considering: + // + // - Centralize some sort of cache (key by URI) for individual + // resolvers. That'd avoid multiple copies of a given catalog. + // + // - Have resolution track what catalogs (+modes) have been + // searched. This would support loop detection. + // + + + /** + * Initializes without preloading a catalog. + * This API is convenient when you may want to arrange that catalogs + * are automatically loaded when explicitly referenced in documents, + * using the <em>oasis-xml-catalog</em> processing instruction. + * In such cases you won't usually be able to preload catalogs. + */ + public XCat () { } + + /** + * Initializes, and preloads a catalog using the default SAX parser. + * This API is convenient when you operate with one or more standard + * catalogs. + * + * <p> This just delegates to {@link #loadCatalog loadCatalog()}; + * see it for exception information. + * + * @param uri absolute URI for the catalog file. + */ + public XCat (String uri) + throws SAXException, IOException + { loadCatalog (uri); } + + + /** + * Loads an OASIS XML Catalog. + * It is appended to the list of currently active catalogs, or + * reloaded if a catalog with the same URI was already loaded. + * Callers have control over what parser is used, how catalog parsing + * errors are reported, and whether URIs will be resolved consistently. + * + * <p> The OASIS specification says that errors detected when loading + * catalogs "must recover by ignoring the catalog entry file that + * failed, and proceeding." In this API, that action can be the + * responsibility of applications, when they explicitly load any + * catalog using this method. + * + * <p>Note that catalogs referenced by this one will not be loaded + * at this time. Catalogs referenced through <em>nextCatalog</em> + * or <em>delegate*</em> elements are normally loaded only if needed. + * + * @see #setErrorHandler + * @see #setParserClass + * @see #setUnified + * + * @param uri absolute URI for the catalog file. + * + * @exception IOException As thrown by the parser, typically to + * indicate problems reading data from that URI. + * @exception SAXException As thrown by the parser, typically to + * indicate problems parsing data from that URI. It may also + * be thrown if the parser doesn't support necessary handlers. + * @exception IllegalStateException When attempting to load a + * catalog after loading has been {@link #disableLoading disabled}, + * such as after any entity or URI lookup has been performed. + */ + public synchronized void loadCatalog (String uri) + throws SAXException, IOException + { + Catalog catalog; + int index = -1; + + if (!loadingPermitted) + throw new IllegalStateException (); + + uri = normalizeURI (uri); + if (catalogs != null) { + // maybe just reload + for (index = 0; index < catalogs.length; index++) + if (uri.equals (catalogs [index].catalogURI)) + break; + } + catalog = loadCatalog (parserClass, errorHandler, uri, unified); + + // add to list of catalogs + if (catalogs == null) { + index = 0; + catalogs = new Catalog [1]; + } else if (index == catalogs.length) { + Catalog tmp []; + + tmp = new Catalog [index + 1]; + System.arraycopy (catalogs, 0, tmp, 0, index); + catalogs = tmp; + } + catalogs [index] = catalog; + } + + + /** + * "New Style" external entity resolution for parsers. + * Calls to this method prevent explicit loading of additional catalogs + * using {@link #loadCatalog loadCatalog()}. + * + * <p>This supports the full core catalog functionality for locating + * (and relocating) parsed entities that have been declared in a + * document's DTD. + * + * @param name Entity name, such as "dudley", "%nell", or "[dtd]". + * @param publicId Either a normalized public ID, or null. + * @param baseURI Absolute base URI associated with systemId. + * @param systemId URI found in entity declaration (may be + * relative to baseURI). + * + * @return Input source for accessing the external entity, or null + * if no mapping was found. The input source may have opened + * the stream, and will have a fully resolved URI. + * + * @see #getExternalSubset + */ + public InputSource resolveEntity ( + String name, // UNUSED ... systemId is always non-null + String publicId, + String baseURI, // UNUSED ... it just lets sysId be relative + String systemId + ) throws SAXException, IOException + { + if (loadingPermitted) + disableLoading (); + + try { + // steps as found in OASIS XML catalog spec 7.1.2 + // steps 1, 8 involve looping over the list of catalogs + for (int i = 0; i < catalogs.length; i++) { + InputSource retval; + retval = catalogs [i].resolve (usingPublic, publicId, systemId); + if (retval != null) + return retval;; + } + } catch (DoneDelegation x) { + // done! + } + // step 9 involves returning "no match" + return null; + } + + + /** + * "New Style" parser callback to add an external subset. + * For documents that don't include an external subset, this may + * return one according to <em>doctype</em> catalog entries. + * (This functionality is not a core part of the OASIS XML Catalog + * specification, though it's presented in an appendix.) + * If no such entry is defined, this returns null to indicate that + * this document will not be modified to include such a subset. + * Calls to this method prevent explicit loading of additional catalogs + * using {@link #loadCatalog loadCatalog()}. + * + * <p><em>Warning:</em> That catalog functionality can be dangerous. + * It can provide definitions of general entities, and thereby mask + * certain well formedess errors. + * + * @param name Name of the document element, either as declared in + * a DOCTYPE declaration or as observed in the text. + * @param baseURI Document's base URI (absolute). + * + * @return Input source for accessing the external subset, or null + * if no mapping was found. The input source may have opened + * the stream, and will have a fully resolved URI. + */ + public InputSource getExternalSubset (String name, String baseURI) + throws SAXException, IOException + { + if (loadingPermitted) + disableLoading (); + try { + for (int i = 0; i < catalogs.length; i++) { + InputSource retval = catalogs [i].getExternalSubset (name); + if (retval != null) + return retval; + } + } catch (DoneDelegation x) { + // done! + } + return null; + } + + + /** + * "Old Style" external entity resolution for parsers. + * This API provides only core functionality. + * Calls to this method prevent explicit loading of additional catalogs + * using {@link #loadCatalog loadCatalog()}. + * + * <p>The functional limitations of this interface include:</p><ul> + * + * <li>Since system IDs will be absolutized before the resolver + * sees them, matching against relative URIs won't work. + * This may affect <em>system</em>, <em>rewriteSystem</em>, + * and <em>delegateSystem</em> catalog entries. + * + * <li>Because of that absolutization, documents declaring entities + * with system IDs using URI schemes that the JVM does not recognize + * may be unparsable. URI schemes such as <em>file:/</em>, + * <em>http://</em>, <em>https://</em>, and <em>ftp://</em> + * will usually work reliably. + * + * <li>Because missing external subsets can't be provided, the + * <em>doctype</em> catalog entries will be ignored. + * (The {@link #getExternalSubset getExternalSubset()} method is + * a "New Style" resolution option.) + * + * </ul> + * + * <p>Applications can tell whether this limited functionality will be + * used: if the feature flag associated with the {@link EntityResolver2} + * interface is not <em>true</em>, the limitations apply. Applications + * can't usually know whether a given document and catalog will trigger + * those limitations. The issue can only be bypassed by operational + * procedures such as not using catalogs or documents which involve + * those features. + * + * @param publicId Either a normalized public ID, or null + * @param systemId Always an absolute URI. + * + * @return Input source for accessing the external entity, or null + * if no mapping was found. The input source may have opened + * the stream, and will have a fully resolved URI. + */ + final public InputSource resolveEntity (String publicId, String systemId) + throws SAXException, IOException + { + return resolveEntity (null, publicId, null, systemId); + } + + + /** + * Resolves a URI reference that's not defined to the DTD. + * This is intended for use with URIs found in document text, such as + * <em>xml-stylesheet</em> processing instructions and in attribute + * values, where they are not recognized as URIs by XML parsers. + * Calls to this method prevent explicit loading of additional catalogs + * using {@link #loadCatalog loadCatalog()}. + * + * <p>This functionality is supported by the OASIS XML Catalog + * specification, but will never be invoked by an XML parser. + * It corresponds closely to functionality for mapping system + * identifiers for entities declared in DTDs; closely enough that + * this implementation's default behavior is that they be + * identical, to minimize potential confusion. + * + * <p>This method could be useful when implementing the + * {@link javax.xml.transform.URIResolver} interface, wrapping the + * input source in a {@link javax.xml.transform.sax.SAXSource}. + * + * @see #isUnified + * @see #setUnified + * + * @param baseURI The relevant base URI as specified by the XML Base + * specification. This recognizes <em>xml:base</em> attributes + * as overriding the actual (physical) base URI. + * @param uri Either an absolute URI, or one relative to baseURI + * + * @return Input source for accessing the mapped URI, or null + * if no mapping was found. The input source may have opened + * the stream, and will have a fully resolved URI. + */ + public InputSource resolveURI (String baseURI, String uri) + throws SAXException, IOException + { + if (loadingPermitted) + disableLoading (); + + // NOTE: baseURI isn't used here, but caller MUST have it, + // and heuristics _might_ use it in the future ... plus, + // it's symmetric with resolveEntity (). + + // steps 1, 6 involve looping + try { + for (int i = 0; i < catalogs.length; i++) { + InputSource tmp = catalogs [i].resolveURI (uri); + if (tmp != null) + return tmp; + } + } catch (DoneDelegation x) { + // done + } + // step 7 reports no match + return null; + } + + + /** + * Records that catalog loading is no longer permitted. + * Loading is automatically disabled when lookups are performed, + * and should be manually disabled when <em>startDTD()</em> (or + * any other DTD declaration callback) is invoked, or at the latest + * when the document root element is seen. + */ + public synchronized void disableLoading () + { + // NOTE: this method and loadCatalog() are synchronized + // so that it's impossible to load (top level) catalogs + // after lookups start. Likewise, deferred loading is also + // synchronized (for "next" and delegated catalogs) to + // ensure that parsers can share resolvers. + loadingPermitted = false; + } + + + /** + * Returns the error handler used to report catalog errors. + * Null is returned if the parser's default error handling + * will be used. + * + * @see #setErrorHandler + */ + public ErrorHandler getErrorHandler () + { return errorHandler; } + + /** + * Assigns the error handler used to report catalog errors. + * These errors may come either from the SAX2 parser or + * from the catalog parsing code driven by the parser. + * + * <p> If you're sharing the resolver between parsers, don't + * change this once lookups have begun. + * + * @see #getErrorHandler + * + * @param parser The error handler, or null saying to use the default + * (no diagnostics, and only fatal errors terminate loading). + */ + public void setErrorHandler (ErrorHandler handler) + { errorHandler = handler; } + + + /** + * Returns the name of the SAX2 parser class used to parse catalogs. + * Null is returned if the system default is used. + * @see #setParserClass + */ + public String getParserClass () + { return parserClass; } + + /** + * Names the SAX2 parser class used to parse catalogs. + * + * <p> If you're sharing the resolver between parsers, don't change + * this once lookups have begun. + * + * <p> Note that in order to properly support the <em>xml:base</em> + * attribute and relative URI resolution, the SAX parser used to parse + * the catalog must provide a {@link Locator} and support the optional + * declaration and lexical handlers. + * + * @see #getParserClass + * + * @param parser The parser class name, or null saying to use the + * system default SAX2 parser. + */ + public void setParserClass (String parser) + { parserClass = parser; } + + + /** + * Returns true (the default) if all methods resolve + * a given URI in the same way. + * Returns false if calls resolving URIs as entities (such as + * {@link #resolveEntity resolveEntity()}) use different catalog entries + * than those resolving them as URIs ({@link #resolveURI resolveURI()}), + * which will generally produce different results. + * + * <p>The OASIS XML Catalog specification defines two related schemes + * to map URIs "as URIs" or "as system IDs". + * URIs use <em>uri</em>, <em>rewriteURI</em>, and <em>delegateURI</em> + * elements. System IDs do the same things with <em>systemId</em>, + * <em>rewriteSystemId</em>, and <em>delegateSystemId</em>. + * It's confusing and error prone to maintain two parallel copies of + * such data. Accordingly, this class makes that behavior optional. + * The <em>unified</em> interpretation of URI mappings is preferred, + * since it prevents surprises where one URI gets mapped to different + * contents depending on whether the reference happens to have come + * from a DTD (or not). + * + * @see #setUnified + */ + public boolean isUnified () + { return unified; } + + /** + * Assigns the value of the flag returned by {@link #isUnified}. + * Set it to false to be strictly conformant with the OASIS XML Catalog + * specification. Set it to true to make all mappings for a given URI + * give the same result, regardless of the reason for the mapping. + * + * <p>Don't change this once you've loaded the first catalog. + * + * @param value new flag setting + */ + public void setUnified (boolean value) + { unified = value; } + + + /** + * Returns true (the default) if a catalog's public identifier + * mappings will be used. + * When false is returned, such mappings are ignored except when + * system IDs are discarded, such as for + * entities using the <em>urn:publicid:</em> URI scheme in their + * system identifiers. (See RFC 3151 for information about that + * URI scheme. Using it in system identifiers may not work well + * with many SAX parsers unless the <em>resolve-dtd-uris</em> + * feature flag is set to false.) + * @see #setUsingPublic + */ + public boolean isUsingPublic () + { return usingPublic; } + + /** + * Specifies which catalog search mode is used. + * By default, public identifier mappings are able to override system + * identifiers when both are available. + * Applications may choose to ignore public + * identifier mappings in such cases, so that system identifiers + * declared in DTDs will only be overridden by an explicit catalog + * match for that system ID. + * + * <p> If you're sharing the resolver between parsers, don't + * change this once lookups have begun. + * @see #isUsingPublic + * + * @param value true to always use public identifier mappings, + * false to only use them for system ids using the <em>urn:publicid:</em> + * URI scheme. + */ + public void setUsingPublic (boolean value) + { usingPublic = value; } + + + + // hmm, what's this do? :) + private static Catalog loadCatalog ( + String parserClass, + ErrorHandler eh, + String uri, + boolean unified + ) throws SAXException, IOException + { + XMLReader parser; + Loader loader; + boolean doesIntern = false; + + if (parserClass == null) + parser = XMLReaderFactory.createXMLReader (); + else + parser = XMLReaderFactory.createXMLReader (parserClass); + if (eh != null) + parser.setErrorHandler (eh); + // resolve-dtd-entities is at default value (unrecognized == true) + + try { + doesIntern = parser.getFeature ( + "http://xml.org/sax/features/string-interning"); + } catch (SAXNotRecognizedException e) { } + + loader = new Loader (doesIntern, eh, unified); + loader.cat.parserClass = parserClass; + loader.cat.catalogURI = uri; + + parser.setContentHandler (loader); + parser.setProperty ( + "http://xml.org/sax/properties/declaration-handler", + loader); + parser.setProperty ( + "http://xml.org/sax/properties/lexical-handler", + loader); + parser.parse (uri); + + return loader.cat; + } + + // perform one or both the normalizations for public ids + private static String normalizePublicId (boolean full, String publicId) + { + if (publicId.startsWith ("urn:publicid:")) { + StringBuffer buf = new StringBuffer (); + char chars [] = publicId.toCharArray (); +boolean hasbug = false; + + for (int i = 13; i < chars.length; i++) { + switch (chars [i]) { + case '+': buf.append (' '); continue; + case ':': buf.append ("//"); continue; + case ';': buf.append ("::"); continue; + case '%': +// FIXME unhex that char! meanwhile, warn and fallthrough ... + hasbug = true; + default: buf.append (chars [i]); continue; + } + } + publicId = buf.toString (); +if (hasbug) +System.err.println ("nyet unhexing public id: " + publicId); + full = true; + } + + // SAX parsers do everything except that URN mapping, but + // we can't trust other sources to normalize correctly + if (full) { + StringTokenizer tokens; + String token; + + tokens = new StringTokenizer (publicId, " \r\n"); + publicId = null; + while (tokens.hasMoreTokens ()) { + if (publicId == null) + publicId = tokens.nextToken (); + else + publicId += " " + tokens.nextToken (); + } + } + return publicId; + } + + private static boolean isUriExcluded (int c) + { return c <= 0x20 || c >= 0x7f || "\"<>^`{|}".indexOf (c) != -1; } + + private static int hexNibble (int c) + { + if (c < 10) + return c + '0'; + return ('a' - 10) + c; + } + + // handles URIs with "excluded" characters + private static String normalizeURI (String systemId) + { + int length = systemId.length (); + + for (int i = 0; i < length; i++) { + char c = systemId.charAt (i); + + // escape non-ASCII plus "excluded" characters + if (isUriExcluded (c)) { + byte buf []; + ByteArrayOutputStream out; + int b; + + // a JVM that doesn't know UTF8 and 8859_1 is unusable! + try { + buf = systemId.getBytes ("UTF8"); + out = new ByteArrayOutputStream (buf.length + 10); + + for (i = 0; i < buf.length; i++) { + b = buf [i] & 0x0ff; + if (isUriExcluded (b)) { + out.write ((int) '%'); + out.write (hexNibble (b >> 4)); + out.write (hexNibble (b & 0x0f)); + } else + out.write (b); + } + return out.toString ("8859_1"); + } catch (IOException e) { + throw new RuntimeException ( + "can't normalize URI: " + e.getMessage ()); + } + } + } + return systemId; + } + + // thrown to mark authoritative end of a search + private static class DoneDelegation extends SAXException + { + DoneDelegation () { } + } + + + /** + * Represents a OASIS XML Catalog, and encapsulates much of + * the catalog functionality. + */ + private static class Catalog + { + // loading infrastructure + String catalogURI; + ErrorHandler eh; + boolean unified; + String parserClass; + + // catalog data + boolean hasPreference; + boolean usingPublic; + + Hashtable publicIds; + Hashtable publicDelegations; + + Hashtable systemIds; + Hashtable systemRewrites; + Hashtable systemDelegations; + + Hashtable uris; + Hashtable uriRewrites; + Hashtable uriDelegations; + + Hashtable doctypes; + + Vector next; + + // nonpublic! + Catalog () { } + + + // steps as found in OASIS XML catalog spec 7.1.2 + private InputSource locatePublicId (String publicId) + throws SAXException, IOException + { + // 5. return (first) 'public' entry + if (publicIds != null) { + String retval = (String) publicIds.get (publicId); + if (retval != null) { + // IF the URI is accessible ... + return new InputSource (retval); + } + } + + // 6. return delegatePublic catalog match [complex] + if (publicDelegations != null) + return checkDelegations (publicDelegations, publicId, + publicId, null); + + return null; + } + + // steps as found in OASIS XML catalog spec 7.1.2 or 7.2.2 + private InputSource mapURI ( + String uri, + Hashtable ids, + Hashtable rewrites, + Hashtable delegations + ) throws SAXException, IOException + { + // 7.1.2: 2. return (first) 'system' entry + // 7.2.2: 2. return (first) 'uri' entry + if (ids != null) { + String retval = (String) ids.get (uri); + if (retval != null) { + // IF the URI is accessible ... + return new InputSource (retval); + } + } + + // 7.1.2: 3. return 'rewriteSystem' entries + // 7.2.2: 3. return 'rewriteURI' entries + if (rewrites != null) { + String prefix = null; + String replace = null; + int prefixLen = -1; + + for (Enumeration e = rewrites.keys (); + e.hasMoreElements (); + /* NOP */) { + String temp = (String) e.nextElement (); + int len = -1; + + if (!uri.startsWith (temp)) + continue; + if (prefix != null + && (len = temp.length ()) < prefixLen) + continue; + prefix = temp; + prefixLen = len; + replace = (String) rewrites.get (temp); + } + if (prefix != null) { + StringBuffer buf = new StringBuffer (replace); + buf.append (uri.substring (prefixLen)); + // IF the URI is accessible ... + return new InputSource (buf.toString ()); + } + } + + // 7.1.2: 4. return 'delegateSystem' catalog match [complex] + // 7.2.2: 4. return 'delegateURI' catalog match [complex] + if (delegations != null) + return checkDelegations (delegations, uri, null, uri); + + return null; + } + + + /** + * Returns a URI for an external entity. + */ + public InputSource resolve ( + boolean usingPublic, + String publicId, + String systemId + ) throws SAXException, IOException + { + boolean preferSystem; + InputSource retval; + + if (hasPreference) + preferSystem = !this.usingPublic; + else + preferSystem = !usingPublic; + + if (publicId != null) + publicId = normalizePublicId (false, publicId); + + // behavior here matches section 7.1.1 of the oasis spec + if (systemId != null) { + if (systemId.startsWith ("urn:publicid:")) { + String temp = normalizePublicId (true, systemId); + if (publicId == null) { + publicId = temp; + systemId = null; + } else if (!publicId.equals (temp)) { + // error; ok to recover by: + systemId = null; + } + } else + systemId = normalizeURI (systemId); + } + + if (systemId == null && publicId == null) + return null; + + if (systemId != null) { + retval = mapURI (systemId, systemIds, systemRewrites, + systemDelegations); + if (retval != null) { + retval.setPublicId (publicId); + return retval; + } + } + + if (publicId != null + && !(systemId != null && preferSystem)) { + retval = locatePublicId (publicId); + if (retval != null) { + retval.setPublicId (publicId); + return retval; + } + } + + // 7. apply nextCatalog entries + if (next != null) { + int length = next.size (); + for (int i = 0; i < length; i++) { + Catalog n = getNext (i); + retval = n.resolve (usingPublic, publicId, systemId); + if (retval != null) + return retval; + } + } + + return null; + } + + /** + * Maps one URI into another, for resources that are not defined + * using XML external entity or notation syntax. + */ + public InputSource resolveURI (String uri) + throws SAXException, IOException + { + if (uri.startsWith ("urn:publicid:")) + return resolve (true, normalizePublicId (true, uri), null); + + InputSource retval; + + uri = normalizeURI (uri); + + // 7.2.2 steps 2-4 + retval = mapURI (uri, uris, uriRewrites, uriDelegations); + if (retval != null) + return retval; + + // 7.2.2 step 5. apply nextCatalog entries + if (next != null) { + int length = next.size (); + for (int i = 0; i < length; i++) { + Catalog n = getNext (i); + retval = n.resolveURI (uri); + if (retval != null) + return retval; + } + } + + return null; + } + + + /** + * Finds the external subset associated with a given root element. + */ + public InputSource getExternalSubset (String name) + throws SAXException, IOException + { + if (doctypes != null) { + String value = (String) doctypes.get (name); + if (value != null) { + // IF the URI is accessible ... + return new InputSource (value); + } + } + if (next != null) { + int length = next.size (); + for (int i = 0; i < length; i++) { + Catalog n = getNext (i); + if (n == null) + continue; + InputSource retval = n.getExternalSubset (name); + if (retval != null) + return retval; + } + } + return null; + } + + private synchronized Catalog getNext (int i) + throws SAXException, IOException + { + Object obj; + + if (next == null || i < 0 || i >= next.size ()) + return null; + obj = next.elementAt (i); + if (obj instanceof Catalog) + return (Catalog) obj; + + // ok, we deferred reading that catalog till now. + // load and cache it. + Catalog cat = null; + + try { + cat = loadCatalog (parserClass, eh, (String) obj, unified); + next.setElementAt (cat, i); + } catch (SAXException e) { + // must fail quietly, says the OASIS spec + } catch (IOException e) { + // same applies here + } + return cat; + } + + private InputSource checkDelegations ( + Hashtable delegations, + String id, + String publicId, // only one of public/system + String systemId // will be non-null... + ) throws SAXException, IOException + { + Vector matches = null; + int length = 0; + + // first, see if any prefixes match. + for (Enumeration e = delegations.keys (); + e.hasMoreElements (); + /* NOP */) { + String prefix = (String) e.nextElement (); + + if (!id.startsWith (prefix)) + continue; + if (matches == null) + matches = new Vector (); + + // maintain in longer->shorter sorted order + // NOTE: assumes not many matches will fire! + int index; + + for (index = 0; index < length; index++) { + String temp = (String) matches.elementAt (index); + if (prefix.length () > temp.length ()) { + matches.insertElementAt (prefix, index); + break; + } + } + if (index == length) + matches.addElement (prefix); + length++; + } + if (matches == null) + return null; + + // now we know the list of catalogs to replace our "top level" + // list ... we use it here, rather than somehow going back and + // restarting, since this helps avoid reading most catalogs. + // this assumes stackspace won't be a problem. + for (int i = 0; i < length; i++) { + Catalog catalog = null; + InputSource result; + + // get this catalog. we may not have read it yet. + synchronized (delegations) { + Object prefix = matches.elementAt (i); + Object cat = delegations.get (prefix); + + if (cat instanceof Catalog) + catalog = (Catalog) cat; + else { + try { + // load and cache that catalog + catalog = loadCatalog (parserClass, eh, + (String) cat, unified); + delegations.put (prefix, catalog); + } catch (SAXException e) { + // must ignore, says the OASIS spec + } catch (IOException e) { + // same applies here + } + } + } + + // ignore failed loads, and proceed + if (catalog == null) + continue; + + // we have a catalog ... resolve! + // usingPublic value can't matter, there's no choice + result = catalog.resolve (true, publicId, systemId); + if (result != null) + return result; + } + + // if there were no successes, the entire + // lookup failed (all the way to top level) + throw new DoneDelegation (); + } + } + + + /** This is the namespace URI used for OASIS XML Catalogs. */ + private static final String catalogNamespace = + "urn:oasis:names:tc:entity:xmlns:xml:catalog"; + + + /** + * Loads/unmarshals one catalog. + */ + private static class Loader extends DefaultHandler2 + { + private boolean preInterned; + private ErrorHandler handler; + private boolean unified; + private int ignoreDepth; + private Locator locator; + private boolean started; + private Hashtable externals; + private Stack bases; + + Catalog cat = new Catalog (); + + + /** + * Constructor. + * @param flag true iff the parser already interns strings. + * @param eh Errors and warnings are delegated to this. + * @param unified true keeps one table for URI mappings; + * false matches OASIS spec, storing mappings + * for URIs and SYSTEM ids in parallel tables. + */ + Loader (boolean flag, ErrorHandler eh, boolean unified) + { + preInterned = flag; + handler = eh; + this.unified = unified; + cat.unified = unified; + cat.eh = eh; + } + + + // strips out fragments + private String nofrag (String uri) + throws SAXException + { + if (uri.indexOf ('#') != -1) { + warn ("URI with fragment: " + uri); + uri = uri.substring (0, uri.indexOf ('#')); + } + return uri; + } + + // absolutizes relative URIs + private String absolutize (String uri) + throws SAXException + { + // avoid creating URLs if they're already absolutized, + // or if the URI is already using a known scheme + if (uri.startsWith ("file:/") + || uri.startsWith ("http:/") + || uri.startsWith ("https:/") + || uri.startsWith ("ftp:/") + || uri.startsWith ("urn:") + ) + return uri; + + // otherwise, let's hope the JDK handles this URI scheme. + try { + URL base = (URL) bases.peek (); + return new URL (base, uri).toString (); + } catch (Exception e) { + fatal ("can't absolutize URI: " + uri); + return null; + } + } + + // recoverable error + private void error (String message) + throws SAXException + { + if (handler == null) + return; + handler.error (new SAXParseException (message, locator)); + } + + // nonrecoverable error + private void fatal (String message) + throws SAXException + { + SAXParseException spe; + + spe = new SAXParseException (message, locator); + if (handler != null) + handler.fatalError (spe); + throw spe; + } + + // low severity problem + private void warn (String message) + throws SAXException + { + if (handler == null) + return; + handler.warning (new SAXParseException (message, locator)); + } + + // callbacks: + + public void setDocumentLocator (Locator l) + { locator = l; } + + public void startDocument () + throws SAXException + { + if (locator == null) + error ("no locator!"); + bases = new Stack (); + String uri = locator.getSystemId (); + try { + bases.push (new URL (uri)); + } catch (IOException e) { + fatal ("bad document base URI: " + uri); + } + } + + public void endDocument () + throws SAXException + { + try { + if (!started) + error ("not a catalog!"); + } finally { + locator = null; + handler = null; + externals = null; + bases = null; + } + } + + // XML Base support for external entities. + + // NOTE: expects parser is in default "resolve-dtd-uris" mode. + public void externalEntityDecl (String name, String pub, String sys) + throws SAXException + { + if (externals == null) + externals = new Hashtable (); + if (externals.get (name) == null) + externals.put (name, pub); + } + + public void startEntity (String name) + throws SAXException + { + if (externals == null) + return; + String uri = (String) externals.get (name); + + // NOTE: breaks if an EntityResolver substitutes these URIs. + // If toplevel loader supports one, must intercept calls... + if (uri != null) { + try { + bases.push (new URL (uri)); + } catch (IOException e) { + fatal ("entity '" + name + "', bad URI: " + uri); + } + } + } + + public void endEntity (String name) + { + if (externals == null) + return; + String value = (String) externals.get (name); + + if (value != null) + bases.pop (); + } + + /** + * Processes catalog elements, saving their data. + */ + public void startElement (String namespace, String local, + String qName, Attributes atts) + throws SAXException + { + // must ignore non-catalog elements, and their contents + if (ignoreDepth != 0 || !catalogNamespace.equals (namespace)) { + ignoreDepth++; + return; + } + + // basic sanity checks + if (!preInterned) + local = local.intern (); + if (!started) { + started = true; + if ("catalog" != local) + fatal ("root element not 'catalog': " + local); + } + + // Handle any xml:base attribute + String xmlbase = atts.getValue ("xml:base"); + + if (xmlbase != null) { + URL base = (URL) bases.peek (); + try { + base = new URL (base, xmlbase); + } catch (IOException e) { + fatal ("can't resolve xml:base attribute: " + xmlbase); + } + bases.push (base); + } else + bases.push (bases.peek ()); + + // fetch multi-element attributes, apply standard tweaks + // values (uri, catalog, rewritePrefix) get normalized too, + // as a precaution and since we may compare the values + String catalog = atts.getValue ("catalog"); + if (catalog != null) + catalog = normalizeURI (absolutize (catalog)); + + String rewritePrefix = atts.getValue ("rewritePrefix"); + if (rewritePrefix != null) + rewritePrefix = normalizeURI (absolutize (rewritePrefix)); + + String systemIdStartString; + systemIdStartString = atts.getValue ("systemIdStartString"); + if (systemIdStartString != null) { + systemIdStartString = normalizeURI (systemIdStartString); + // unmatchable <rewriteSystemId>, <delegateSystemId> elements + if (systemIdStartString.startsWith ("urn:publicid:")) { + error ("systemIdStartString is really a publicId!!"); + return; + } + } + + String uri = atts.getValue ("uri"); + if (uri != null) + uri = normalizeURI (absolutize (uri)); + + String uriStartString; + uriStartString = atts.getValue ("uriStartString"); + if (uriStartString != null) { + uriStartString = normalizeURI (uriStartString); + // unmatchable <rewriteURI>, <delegateURI> elements + if (uriStartString.startsWith ("urn:publicid:")) { + error ("uriStartString is really a publicId!!"); + return; + } + } + + // strictly speaking "group" and "catalog" shouldn't nest + // ... arbitrary restriction, no evident motivation + +// FIXME stack "prefer" settings (two elements only!) and use +// them to populate different public mapping/delegation tables + + if ("catalog" == local || "group" == local) { + String prefer = atts.getValue ("prefer"); + + if (prefer != null && !"public".equals (prefer)) { + if (!"system".equals (prefer)) { + error ("in <" + local + " ... prefer='...'>, " + + "assuming 'public'"); + prefer = "public"; + } + } + if (prefer != null) { + if ("catalog" == local) { + cat.hasPreference = true; + cat.usingPublic = "public".equals (prefer); + } else { + if (!cat.hasPreference || cat.usingPublic + != "public".equals (prefer)) { +fatal ("<group prefer=...> case not handled"); + } + } + } else if ("group" == local && cat.hasPreference) { +fatal ("<group prefer=...> case not handled"); + } + + // + // PUBLIC ids: cleanly set up for id substitution + // + } else if ("public" == local) { + String publicId = atts.getValue ("publicId"); + String value = null; + + if (publicId == null || uri == null) { + error ("expecting <public publicId=... uri=.../>"); + return; + } + publicId = normalizePublicId (true, publicId); + uri = nofrag (uri); + if (cat.publicIds == null) + cat.publicIds = new Hashtable (); + else + value = (String) cat.publicIds.get (publicId); + if (value != null) { + if (!value.equals (uri)) + warn ("ignoring <public...> entry for " + publicId); + } else + cat.publicIds.put (publicId, uri); + + } else if ("delegatePublic" == local) { + String publicIdStartString; + Object value = null; + + publicIdStartString = atts.getValue ("publicIdStartString"); + if (publicIdStartString == null || catalog == null) { + error ("expecting <delegatePublic " + + "publicIdStartString=... catalog=.../>"); + return; + } + publicIdStartString = normalizePublicId (true, + publicIdStartString); + if (cat.publicDelegations == null) + cat.publicDelegations = new Hashtable (); + else + value = cat.publicDelegations.get (publicIdStartString); + if (value != null) { + if (!value.equals (catalog)) + warn ("ignoring <delegatePublic...> entry for " + + uriStartString); + } else + cat.publicDelegations.put (publicIdStartString, catalog); + + + // + // SYSTEM ids: need substitution due to operational issues + // + } else if ("system" == local) { + String systemId = atts.getValue ("systemId"); + String value = null; + + if (systemId == null || uri == null) { + error ("expecting <system systemId=... uri=.../>"); + return; + } + systemId = normalizeURI (systemId); + uri = nofrag (uri); + if (systemId.startsWith ("urn:publicid:")) { + error ("systemId is really a publicId!!"); + return; + } + if (cat.systemIds == null) { + cat.systemIds = new Hashtable (); + if (unified) + cat.uris = cat.systemIds; + } else + value = (String) cat.systemIds.get (systemId); + if (value != null) { + if (!value.equals (uri)) + warn ("ignoring <system...> entry for " + systemId); + } else + cat.systemIds.put (systemId, uri); + + } else if ("rewriteSystem" == local) { + String value = null; + + if (systemIdStartString == null || rewritePrefix == null + || systemIdStartString.length () == 0 + || rewritePrefix.length () == 0 + ) { + error ("expecting <rewriteSystem " + + "systemIdStartString=... rewritePrefix=.../>"); + return; + } + if (cat.systemRewrites == null) { + cat.systemRewrites = new Hashtable (); + if (unified) + cat.uriRewrites = cat.systemRewrites; + } else + value = (String) cat.systemRewrites.get ( + systemIdStartString); + if (value != null) { + if (!value.equals (rewritePrefix)) + warn ("ignoring <rewriteSystem...> entry for " + + systemIdStartString); + } else + cat.systemRewrites.put (systemIdStartString, + rewritePrefix); + + } else if ("delegateSystem" == local) { + Object value = null; + + if (systemIdStartString == null || catalog == null) { + error ("expecting <delegateSystem " + + "systemIdStartString=... catalog=.../>"); + return; + } + if (cat.systemDelegations == null) { + cat.systemDelegations = new Hashtable (); + if (unified) + cat.uriDelegations = cat.systemDelegations; + } else + value = cat.systemDelegations.get (systemIdStartString); + if (value != null) { + if (!value.equals (catalog)) + warn ("ignoring <delegateSystem...> entry for " + + uriStartString); + } else + cat.systemDelegations.put (systemIdStartString, catalog); + + + // + // URI: just like "system" ID support, except that + // fragment IDs are disallowed in "system" elements. + // + } else if ("uri" == local) { + String name = atts.getValue ("name"); + String value = null; + + if (name == null || uri == null) { + error ("expecting <uri name=... uri=.../>"); + return; + } + if (name.startsWith ("urn:publicid:")) { + error ("name is really a publicId!!"); + return; + } + name = normalizeURI (name); + if (cat.uris == null) { + cat.uris = new Hashtable (); + if (unified) + cat.systemIds = cat.uris; + } else + value = (String) cat.uris.get (name); + if (value != null) { + if (!value.equals (uri)) + warn ("ignoring <uri...> entry for " + name); + } else + cat.uris.put (name, uri); + + } else if ("rewriteURI" == local) { + String value = null; + + if (uriStartString == null || rewritePrefix == null + || uriStartString.length () == 0 + || rewritePrefix.length () == 0 + ) { + error ("expecting <rewriteURI " + + "uriStartString=... rewritePrefix=.../>"); + return; + } + if (cat.uriRewrites == null) { + cat.uriRewrites = new Hashtable (); + if (unified) + cat.systemRewrites = cat.uriRewrites; + } else + value = (String) cat.uriRewrites.get (uriStartString); + if (value != null) { + if (!value.equals (rewritePrefix)) + warn ("ignoring <rewriteURI...> entry for " + + uriStartString); + } else + cat.uriRewrites.put (uriStartString, rewritePrefix); + + } else if ("delegateURI" == local) { + Object value = null; + + if (uriStartString == null || catalog == null) { + error ("expecting <delegateURI " + + "uriStartString=... catalog=.../>"); + return; + } + if (cat.uriDelegations == null) { + cat.uriDelegations = new Hashtable (); + if (unified) + cat.systemDelegations = cat.uriDelegations; + } else + value = cat.uriDelegations.get (uriStartString); + if (value != null) { + if (!value.equals (catalog)) + warn ("ignoring <delegateURI...> entry for " + + uriStartString); + } else + cat.uriDelegations.put (uriStartString, catalog); + + // + // NON-DELEGATING approach to modularity + // + } else if ("nextCatalog" == local) { + if (catalog == null) { + error ("expecting <nextCatalog catalog=.../>"); + return; + } + if (cat.next == null) + cat.next = new Vector (); + cat.next.addElement (catalog); + + // + // EXTENSIONS from appendix E + // + } else if ("doctype" == local) { + String name = atts.getValue ("name"); + String value = null; + + if (name == null || uri == null) { + error ("expecting <doctype name=... uri=.../>"); + return; + } + name = normalizeURI (name); + if (cat.doctypes == null) + cat.doctypes = new Hashtable (); + else + value = (String) cat.doctypes.get (name); + if (value != null) { + if (!value.equals (uri)) + warn ("ignoring <doctype...> entry for " + + uriStartString); + } else + cat.doctypes.put (name, uri); + + + // + // RESERVED ... ignore (like reserved attributes) but warn + // + } else { + warn ("ignoring unknown catalog element: " + local); + ignoreDepth++; + } + } + + public void endElement (String uri, String local, String qName) + throws SAXException + { + if (ignoreDepth != 0) + ignoreDepth--; + else + bases.pop (); + } + } +} diff --git a/libjava/gnu/xml/util/XHTMLWriter.java b/libjava/gnu/xml/util/XHTMLWriter.java new file mode 100644 index 0000000..c1502b7 --- /dev/null +++ b/libjava/gnu/xml/util/XHTMLWriter.java @@ -0,0 +1,112 @@ +/* XHTMLWriter.java -- + Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; + + +/** + * This extends XMLWriter to create a class which defaults to writing + * XHTML text, preferring the US-ASCII encoding. It adds no unique + * functionality, only changing the defaults slightly to simplify writing + * XHTML processing components by providing a bean class whose properties + * have more convenient defaults. An artifact of using the US-ASCII + * encoding is that no XML declaration is written, so that HTML tools + * that can't accept them will not become confused. Components can treat + * the output as UTF-8, ISO-8859-1, or US-ASCII without incurring any + * data loss. + * + * @author David Brownell + */ +public class XHTMLWriter extends XMLWriter +{ + /** + * Constructs this handler with System.out used to write + * SAX events using the US-ASCII encoding, as XHTML. + */ + public XHTMLWriter () + throws IOException + { + this (System.out); + } + + /** + * Constructs this handler such that the specified output stream + * is used to write SAX events in the US-ASCII encoding, as XHTML. + * + * @param out Where US-ASCII encoding of the stream of SAX + * events will be sent. + */ + public XHTMLWriter (OutputStream out) + throws IOException + { + // not all JVMs understand "ASCII" as an encoding name, so + // we use 8859_1 (they all seem to handle that one) and + // make the echo handler filter out non-ASCII characters + this (new OutputStreamWriter (out, "8859_1"), "US-ASCII"); + } + + /** + * Constructs this handler such that the specified output stream + * is used to write SAX events as XHTML. + * + * @param out Where the stream of SAX events will be written. + */ + public XHTMLWriter (Writer out) + { + this (out, null); + } + + /** + * Constructs this handler such that the specified output stream + * is used to write SAX events as XHTML, labeled with the specified + * encoding. + * + * @param out Where the stream of SAX events will be written. + * @param encoding If non-null, this names the encoding to be + * placed in the encoding declaration. + */ + public XHTMLWriter (Writer out, String encoding) + { + super (out, encoding); + setXhtml (true); + } +} diff --git a/libjava/gnu/xml/util/XMLWriter.java b/libjava/gnu/xml/util/XMLWriter.java new file mode 100644 index 0000000..feb9e4f --- /dev/null +++ b/libjava/gnu/xml/util/XMLWriter.java @@ -0,0 +1,1927 @@ +/* XMLWriter.java -- + Copyright (C) 1999,2000,2001 Free Software Foundation, Inc. + +This file is part of GNU Classpath. + +GNU Classpath is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Classpath is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Classpath; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +02111-1307 USA. + +Linking this library statically or dynamically with other modules is +making a combined work based on this library. Thus, the terms and +conditions of the GNU General Public License cover the whole +combination. + +As a special exception, the copyright holders of this library give you +permission to link this library with independent modules to produce an +executable, regardless of the license terms of these independent +modules, and to copy and distribute the resulting executable under +terms of your choice, provided that you also meet, for each linked +independent module, the terms and conditions of the license of that +module. An independent module is a module which is not derived from +or based on this library. If you modify this library, you may extend +this exception to your version of the library, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. */ + +package gnu.xml.util; + +import java.io.BufferedWriter; +import java.io.CharConversionException; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.Stack; + +import org.xml.sax.*; +import org.xml.sax.ext.*; +import org.xml.sax.helpers.*; + + +/** + * This class is a SAX handler which writes all its input as a well formed + * XML or XHTML document. If driven using SAX2 events, this output may + * include a recreated document type declaration, subject to limitations + * of SAX (no internal subset exposed) or DOM (the important declarations, + * with their documentation, are discarded). + * + * <p> By default, text is generated "as-is", but some optional modes + * are supported. Pretty-printing is supported, to make life easier + * for people reading the output. XHTML (1.0) output has can be made + * particularly pretty; all the built-in character entities are known. + * Canonical XML can also be generated, assuming the input is properly + * formed. + * + * <hr> + * + * <p> Some of the methods on this class are intended for applications to + * use directly, rather than as pure SAX2 event callbacks. Some of those + * methods access the JavaBeans properties (used to tweak output formats, + * for example canonicalization and pretty printing). Subclasses + * are expected to add new behaviors, not to modify current behavior, so + * many such methods are final.</p> + * + * <p> The <em>write*()</em> methods may be slightly simpler for some + * applications to use than direct callbacks. For example, they support + * a simple policy for encoding data items as the content of a single element. + * + * <p> To reuse an XMLWriter you must provide it with a new Writer, since + * this handler closes the writer it was given as part of its endDocument() + * handling. (XML documents have an end of input, and the way to encode + * that on a stream is to close it.) </p> + * + * <hr> + * + * <p> Note that any relative URIs in the source document, as found in + * entity and notation declarations, ought to have been fully resolved by + * the parser providing events to this handler. This means that the + * output text should only have fully resolved URIs, which may not be + * the desired behavior in cases where later binding is desired. </p> + * + * <p> <em>Note that due to SAX2 defaults, you may need to manually + * ensure that the input events are XML-conformant with respect to namespace + * prefixes and declarations. {@link gnu.xml.pipeline.NSFilter} is + * one solution to this problem, in the context of processing pipelines.</em> + * Something as simple as connecting this handler to a parser might not + * generate the correct output. Another workaround is to ensure that the + * <em>namespace-prefixes</em> feature is always set to true, if you're + * hooking this directly up to some XMLReader implementation. + * + * @see gnu.xml.pipeline.TextConsumer + * + * @author David Brownell + */ +public class XMLWriter + implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler +{ + // text prints/escapes differently depending on context + // CTX_ENTITY ... entity literal value + // CTX_ATTRIBUTE ... attribute literal value + // CTX_CONTENT ... content of an element + // CTX_UNPARSED ... CDATA, comment, PI, names, etc + // CTX_NAME ... name or nmtoken, no escapes possible + private static final int CTX_ENTITY = 1; + private static final int CTX_ATTRIBUTE = 2; + private static final int CTX_CONTENT = 3; + private static final int CTX_UNPARSED = 4; + private static final int CTX_NAME = 5; + +// FIXME: names (element, attribute, PI, notation, etc) are not +// currently written out with range checks (escapeChars). +// In non-XHTML, some names can't be directly written; panic! + + private static String sysEOL; + + static { + try { + sysEOL = System.getProperty ("line.separator", "\n"); + + // don't use the system's EOL if it's illegal XML. + if (!isLineEnd (sysEOL)) + sysEOL = "\n"; + + } catch (SecurityException e) { + sysEOL = "\n"; + } + } + + private static boolean isLineEnd (String eol) + { + return "\n".equals (eol) + || "\r".equals (eol) + || "\r\n".equals (eol); + } + + private Writer out; + private boolean inCDATA; + private int elementNestLevel; + private String eol = sysEOL; + + private short dangerMask; + private StringBuffer stringBuf; + private Locator locator; + private ErrorHandler errHandler; + + private boolean expandingEntities = false; + private int entityNestLevel; + private boolean xhtml; + private boolean startedDoctype; + private String encoding; + + private boolean canonical; + private boolean inDoctype; + private boolean inEpilogue; + + // pretty printing controls + private boolean prettyPrinting; + private int column; + private boolean noWrap; + private Stack space = new Stack (); + + // this is not a hard'n'fast rule -- longer lines are OK, + // but are to be avoided. Here, prettyprinting is more to + // show structure "cleanly" than to be precise about it. + // better to have ragged layout than one line 24Kb long. + private static final int lineLength = 75; + + + /** + * Constructs this handler with System.out used to write SAX events + * using the UTF-8 encoding. Avoid using this except when you know + * it's safe to close System.out at the end of the document. + */ + public XMLWriter () throws IOException + { this (System.out); } + + /** + * Constructs a handler which writes all input to the output stream + * in the UTF-8 encoding, and closes it when endDocument is called. + * (Yes it's annoying that this throws an exception -- but there's + * really no way around it, since it's barely possible a JDK may + * exist somewhere that doesn't know how to emit UTF-8.) + */ + public XMLWriter (OutputStream out) throws IOException + { + this (new OutputStreamWriter (out, "UTF8")); + } + + /** + * Constructs a handler which writes all input to the writer, and then + * closes the writer when the document ends. If an XML declaration is + * written onto the output, and this class can determine the name of + * the character encoding for this writer, that encoding name will be + * included in the XML declaration. + * + * <P> See the description of the constructor which takes an encoding + * name for imporant information about selection of encodings. + * + * @param writer XML text is written to this writer. + */ + public XMLWriter (Writer writer) + { + this (writer, null); + } + + /** + * Constructs a handler which writes all input to the writer, and then + * closes the writer when the document ends. If an XML declaration is + * written onto the output, this class will use the specified encoding + * name in that declaration. If no encoding name is specified, no + * encoding name will be declared unless this class can otherwise + * determine the name of the character encoding for this writer. + * + * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode") + * output encodings are fully lossless with respect to XML data. If you + * use any other encoding you risk having your data be silently mangled + * on output, as the standard Java character encoding subsystem silently + * maps non-encodable characters to a question mark ("?") and will not + * report such errors to applications. + * + * <p> For a few other encodings the risk can be reduced. If the writer is + * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1", + * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which + * can't be encoded in those encodings will be written safely. Where + * relevant, the XHTML entity names will be used; otherwise, numeric + * character references will be emitted. + * + * <P> However, there remain a number of cases where substituting such + * entity or character references is not an option. Such references are + * not usable within a DTD, comment, PI, or CDATA section. Neither may + * they be used when element, attribute, entity, or notation names have + * the problematic characters. + * + * @param writer XML text is written to this writer. + * @param encoding if non-null, and an XML declaration is written, + * this is the name that will be used for the character encoding. + */ + public XMLWriter (Writer writer, String encoding) + { + setWriter (writer, encoding); + } + + private void setEncoding (String encoding) + { + if (encoding == null && out instanceof OutputStreamWriter) + encoding = ((OutputStreamWriter)out).getEncoding (); + + if (encoding != null) { + encoding = encoding.toUpperCase (); + + // Use official encoding names where we know them, + // avoiding the Java-only names. When using common + // encodings where we can easily tell if characters + // are out of range, we'll escape out-of-range + // characters using character refs for safety. + + // I _think_ these are all the main synonyms for these! + if ("UTF8".equals (encoding)) { + encoding = "UTF-8"; + } else if ("US-ASCII".equals (encoding) + || "ASCII".equals (encoding)) { + dangerMask = (short) 0xff80; + encoding = "US-ASCII"; + } else if ("ISO-8859-1".equals (encoding) + || "8859_1".equals (encoding) + || "ISO8859_1".equals (encoding)) { + dangerMask = (short) 0xff00; + encoding = "ISO-8859-1"; + } else if ("UNICODE".equals (encoding) + || "UNICODE-BIG".equals (encoding) + || "UNICODE-LITTLE".equals (encoding)) { + encoding = "UTF-16"; + + // TODO: UTF-16BE, UTF-16LE ... no BOM; what + // release of JDK supports those Unicode names? + } + + if (dangerMask != 0) + stringBuf = new StringBuffer (); + } + + this.encoding = encoding; + } + + + /** + * Resets the handler to write a new text document. + * + * @param writer XML text is written to this writer. + * @param encoding if non-null, and an XML declaration is written, + * this is the name that will be used for the character encoding. + * + * @exception IllegalStateException if the current + * document hasn't yet ended (with {@link #endDocument}) + */ + final public void setWriter (Writer writer, String encoding) + { + if (out != null) + throw new IllegalStateException ( + "can't change stream in mid course"); + out = writer; + if (out != null) + setEncoding (encoding); + if (!(out instanceof BufferedWriter)) + out = new BufferedWriter (out); + space.push ("default"); + } + + /** + * Assigns the line ending style to be used on output. + * @param eolString null to use the system default; else + * "\n", "\r", or "\r\n". + */ + final public void setEOL (String eolString) + { + if (eolString == null) + eol = sysEOL; + else if (!isLineEnd (eolString)) + eol = eolString; + else + throw new IllegalArgumentException (eolString); + } + + /** + * Assigns the error handler to be used to present most fatal + * errors. + */ + public void setErrorHandler (ErrorHandler handler) + { + errHandler = handler; + } + + /** + * Used internally and by subclasses, this encapsulates the logic + * involved in reporting fatal errors. It uses locator information + * for good diagnostics, if available, and gives the application's + * ErrorHandler the opportunity to handle the error before throwing + * an exception. + */ + protected void fatal (String message, Exception e) + throws SAXException + { + SAXParseException x; + + if (locator == null) + x = new SAXParseException (message, null, null, -1, -1, e); + else + x = new SAXParseException (message, locator, e); + if (errHandler != null) + errHandler.fatalError (x); + throw x; + } + + + // JavaBeans properties + + /** + * Controls whether the output should attempt to follow the "transitional" + * XHTML rules so that it meets the "HTML Compatibility Guidelines" + * appendix in the XHTML specification. A "transitional" Document Type + * Declaration (DTD) is placed near the beginning of the output document, + * instead of whatever DTD would otherwise have been placed there, and + * XHTML empty elements are printed specially. When writing text in + * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal + * entity names are used (in preference to character references) when + * writing content characters which can't be expressed in those encodings. + * + * <p> When this option is enabled, it is the caller's responsibility + * to ensure that the input is otherwise valid as XHTML. Things to + * be careful of in all cases, as described in the appendix referenced + * above, include: <ul> + * + * <li> Element and attribute names must be in lower case, both + * in the document and in any CSS style sheet. + * <li> All XML constructs must be valid as defined by the XHTML + * "transitional" DTD (including all familiar constructs, + * even deprecated ones). + * <li> The root element must be "html". + * <li> Elements that must be empty (such as <em><br></em> + * must have no content. + * <li> Use both <em>lang</em> and <em>xml:lang</em> attributes + * when specifying language. + * <li> Similarly, use both <em>id</em> and <em>name</em> attributes + * when defining elements that may be referred to through + * URI fragment identifiers ... and make sure that the + * value is a legal NMTOKEN, since not all such HTML 4.0 + * identifiers are valid in XML. + * <li> Be careful with character encodings; make sure you provide + * a <em><meta http-equiv="Content-type" + * content="text/xml;charset=..." /></em> element in + * the HTML "head" element, naming the same encoding + * used to create this handler. Also, if that encoding + * is anything other than US-ASCII, make sure that if + * the document is given a MIME content type, it has + * a <em>charset=...</em> attribute with that encoding. + * </ul> + * + * <p> Additionally, some of the oldest browsers have additional + * quirks, to address with guidelines such as: <ul> + * + * <li> Processing instructions may be rendered, so avoid them. + * (Similarly for an XML declaration.) + * <li> Embedded style sheets and scripts should not contain XML + * markup delimiters: &, <, and ]]> are trouble. + * <li> Attribute values should not have line breaks or multiple + * consecutive white space characters. + * <li> Use no more than one of the deprecated (transitional) + * <em><isindex></em> elements. + * <li> Some boolean attributes (such as <em>compact, checked, + * disabled, readonly, selected,</em> and more) confuse + * some browsers, since they only understand minimized + * versions which are illegal in XML. + * </ul> + * + * <p> Also, some characteristics of the resulting output may be + * a function of whether the document is later given a MIME + * content type of <em>text/html</em> rather than one indicating + * XML (<em>application/xml</em> or <em>text/xml</em>). Worse, + * some browsers ignore MIME content types and prefer to rely URI + * name suffixes -- so an "index.xml" could always be XML, never + * XHTML, no matter its MIME type. + */ + final public void setXhtml (boolean value) + { + if (locator != null) + throw new IllegalStateException ("started parsing"); + xhtml = value; + if (xhtml) + canonical = false; + } + + /** + * Returns true if the output attempts to echo the input following + * "transitional" XHTML rules and matching the "HTML Compatibility + * Guidelines" so that an HTML version 3 browser can read the output + * as HTML; returns false (the default) othewise. + */ + final public boolean isXhtml () + { + return xhtml; + } + + /** + * Controls whether the output text contains references to + * entities (the default), or instead contains the expanded + * values of those entities. + */ + final public void setExpandingEntities (boolean value) + { + if (locator != null) + throw new IllegalStateException ("started parsing"); + expandingEntities = value; + if (!expandingEntities) + canonical = false; + } + + /** + * Returns true if the output will have no entity references; + * returns false (the default) otherwise. + */ + final public boolean isExpandingEntities () + { + return expandingEntities; + } + + /** + * Controls pretty-printing, which by default is not enabled + * (and currently is most useful for XHTML output). + * Pretty printing enables structural indentation, sorting of attributes + * by name, line wrapping, and potentially other mechanisms for making + * output more or less readable. + * + * <p> At this writing, structural indentation and line wrapping are + * enabled when pretty printing is enabled and the <em>xml:space</em> + * attribute has the value <em>default</em> (its other legal value is + * <em>preserve</em>, as defined in the XML specification). The three + * XHTML element types which use another value are recognized by their + * names (namespaces are ignored). + * + * <p> Also, for the record, the "pretty" aspect of printing here + * is more to provide basic structure on outputs that would otherwise + * risk being a single long line of text. For now, expect the + * structure to be ragged ... unless you'd like to submit a patch + * to make this be more strictly formatted! + * + * @exception IllegalStateException thrown if this method is invoked + * after output has begun. + */ + final public void setPrettyPrinting (boolean value) + { + if (locator != null) + throw new IllegalStateException ("started parsing"); + prettyPrinting = value; + if (prettyPrinting) + canonical = false; + } + + /** + * Returns value of flag controlling pretty printing. + */ + final public boolean isPrettyPrinting () + { + return prettyPrinting; + } + + + /** + * Sets the output style to be canonicalized. Input events must + * meet requirements that are slightly more stringent than the + * basic well-formedness ones, and include: <ul> + * + * <li> Namespace prefixes must not have been changed from those + * in the original document. (This may only be ensured by setting + * the SAX2 XMLReader <em>namespace-prefixes</em> feature flag; + * by default, it is cleared.) + * + * <li> Redundant namespace declaration attributes have been + * removed. (If an ancestor element defines a namespace prefix + * and that declaration hasn't been overriden, an element must + * not redeclare it.) + * + * <li> If comments are not to be included in the canonical output, + * they must first be removed from the input event stream; this + * <em>Canonical XML with comments</em> by default. + * + * <li> If the input character encoding was not UCS-based, the + * character data must have been normalized using Unicode + * Normalization Form C. (UTF-8 and UTF-16 are UCS-based.) + * + * <li> Attribute values must have been normalized, as is done + * by any conformant XML processor which processes all external + * parameter entities. + * + * <li> Similarly, attribute value defaulting has been performed. + * + * </ul> + * + * <p> Note that fragments of XML documents, as specified by an XPath + * node set, may be canonicalized. In such cases, elements may need + * some fixup (for <em>xml:*</em> attributes and application-specific + * context). + * + * @exception IllegalArgumentException if the output encoding + * is anything other than UTF-8. + */ + final public void setCanonical (boolean value) + { + if (value && !"UTF-8".equals (encoding)) + throw new IllegalArgumentException ("encoding != UTF-8"); + canonical = value; + if (canonical) { + prettyPrinting = xhtml = false; + expandingEntities = true; + eol = "\n"; + } + } + + + /** + * Returns value of flag controlling canonical output. + */ + final public boolean isCanonical () + { + return canonical; + } + + + /** + * Flushes the output stream. When this handler is used in long lived + * pipelines, it can be important to flush buffered state, for example + * so that it can reach the disk as part of a state checkpoint. + */ + final public void flush () + throws IOException + { + if (out != null) + out.flush (); + } + + + // convenience routines + +// FIXME: probably want a subclass that holds a lot of these... +// and maybe more! + + /** + * Writes the string as if characters() had been called on the contents + * of the string. This is particularly useful when applications act as + * producers and write data directly to event consumers. + */ + final public void write (String data) + throws SAXException + { + char buf [] = data.toCharArray (); + characters (buf, 0, buf.length); + } + + + /** + * Writes an element that has content consisting of a single string. + * @see #writeEmptyElement + * @see #startElement + */ + public void writeElement ( + String uri, + String localName, + String qName, + Attributes atts, + String content + ) throws SAXException + { + if (content == null || content.length () == 0) { + writeEmptyElement (uri, localName, qName, atts); + return; + } + startElement (uri, localName, qName, atts); + char chars [] = content.toCharArray (); + characters (chars, 0, chars.length); + endElement (uri, localName, qName); + } + + + /** + * Writes an element that has content consisting of a single integer, + * encoded as a decimal string. + * @see #writeEmptyElement + * @see #startElement + */ + public void writeElement ( + String uri, + String localName, + String qName, + Attributes atts, + int content + ) throws SAXException + { + writeElement (uri, localName, qName, atts, Integer.toString (content)); + } + + + // SAX1 ContentHandler + /** <b>SAX1</b>: provides parser status information */ + final public void setDocumentLocator (Locator l) + { + locator = l; + } + + + // URL for dtd that validates against all normal HTML constructs + private static final String xhtmlFullDTD = + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"; + + + /** + * <b>SAX1</b>: indicates the beginning of a document parse. + * If you're writing (well formed) fragments of XML, neither + * this nor endDocument should be called. + */ + // NOT final + public void startDocument () + throws SAXException + { + try { + if (out == null) + throw new IllegalStateException ( + "null Writer given to XMLWriter"); + + // Not all parsers provide the locator we want; this also + // flags whether events are being sent to this object yet. + // We could only have this one call if we only printed whole + // documents ... but we also print fragments, so most of the + // callbacks here replicate this test. + + if (locator == null) + locator = new LocatorImpl (); + + // Unless the data is in US-ASCII or we're canonicalizing, write + // the XML declaration if we know the encoding. US-ASCII won't + // normally get mangled by web server confusion about the + // character encodings used. Plus, it's an easy way to + // ensure we can write ASCII that's unlikely to confuse + // elderly HTML parsers. + + if (!canonical + && dangerMask != (short) 0xff80 + && encoding != null) { + rawWrite ("<?xml version='1.0'"); + rawWrite (" encoding='" + encoding + "'"); + rawWrite ("?>"); + newline (); + } + + if (xhtml) { + + rawWrite ("<!DOCTYPE html PUBLIC"); + newline (); + rawWrite (" '-//W3C//DTD XHTML 1.0 Transitional//EN'"); + newline (); + rawWrite (" '"); + // NOTE: URL (above) matches the REC + rawWrite (xhtmlFullDTD); + rawWrite ("'>"); + newline (); + newline (); + + // fake the rest of the handler into ignoring + // everything until the root element, so any + // XHTML DTD comments, PIs, etc are ignored + startedDoctype = true; + } + + entityNestLevel = 0; + + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * <b>SAX1</b>: indicates the completion of a parse. + * Note that all complete SAX event streams make this call, even + * if an error is reported during a parse. + */ + // NOT final + public void endDocument () + throws SAXException + { + try { + if (!canonical) { + newline (); + newline (); + } + out.close (); + out = null; + locator = null; + } catch (IOException e) { + fatal ("can't write", e); + } + } + + // XHTML elements declared as EMPTY print differently + final private static boolean isEmptyElementTag (String tag) + { + switch (tag.charAt (0)) { + case 'a': return "area".equals (tag); + case 'b': return "base".equals (tag) + || "basefont".equals (tag) + || "br".equals (tag); + case 'c': return "col".equals (tag); + case 'f': return "frame".equals (tag); + case 'h': return "hr".equals (tag); + case 'i': return "img".equals (tag) + || "input".equals (tag) + || "isindex".equals (tag); + case 'l': return "link".equals (tag); + case 'm': return "meta".equals (tag); + case 'p': return "param".equals (tag); + } + return false; + } + + private static boolean indentBefore (String tag) + { + // basically indent before block content + // and within structure like tables, lists + switch (tag.charAt (0)) { + case 'a': return "applet".equals (tag); + case 'b': return "body".equals (tag) + || "blockquote".equals (tag); + case 'c': return "center".equals (tag); + case 'f': return "frame".equals (tag) + || "frameset".equals (tag); + case 'h': return "head".equals (tag); + case 'm': return "meta".equals (tag); + case 'o': return "object".equals (tag); + case 'p': return "param".equals (tag) + || "pre".equals (tag); + case 's': return "style".equals (tag); + case 't': return "title".equals (tag) + || "td".equals (tag) + || "th".equals (tag); + } + // ... but not inline elements like "em", "b", "font" + return false; + } + + private static boolean spaceBefore (String tag) + { + // blank line AND INDENT before certain structural content + switch (tag.charAt (0)) { + case 'h': return "h1".equals (tag) + || "h2".equals (tag) + || "h3".equals (tag) + || "h4".equals (tag) + || "h5".equals (tag) + || "h6".equals (tag) + || "hr".equals (tag); + case 'l': return "li".equals (tag); + case 'o': return "ol".equals (tag); + case 'p': return "p".equals (tag); + case 't': return "table".equals (tag) + || "tr".equals (tag); + case 'u': return "ul".equals (tag); + } + return false; + } + + // XHTML DTDs say these three have xml:space="preserve" + private static boolean spacePreserve (String tag) + { + return "pre".equals (tag) + || "style".equals (tag) + || "script".equals (tag); + } + + /** + * <b>SAX2</b>: ignored. + */ + final public void startPrefixMapping (String prefix, String uri) + {} + + /** + * <b>SAX2</b>: ignored. + */ + final public void endPrefixMapping (String prefix) + {} + + private void writeStartTag ( + String name, + Attributes atts, + boolean isEmpty + ) throws SAXException, IOException + { + rawWrite ('<'); + rawWrite (name); + + // write out attributes ... sorting is particularly useful + // with output that's been heavily defaulted. + if (atts != null && atts.getLength () != 0) { + + // Set up to write, with optional sorting + int indices [] = new int [atts.getLength ()]; + + for (int i= 0; i < indices.length; i++) + indices [i] = i; + + // optionally sort + +// FIXME: canon xml demands xmlns nodes go first, +// and sorting by URI first (empty first) then localname +// it should maybe use a different sort + + if (canonical || prettyPrinting) { + + // insertion sort by attribute name + for (int i = 1; i < indices.length; i++) { + int n = indices [i], j; + String s = atts.getQName (n); + + for (j = i - 1; j >= 0; j--) { + if (s.compareTo (atts.getQName (indices [j])) + >= 0) + break; + indices [j + 1] = indices [j]; + } + indices [j + 1] = n; + } + } + + // write, sorted or no + for (int i= 0; i < indices.length; i++) { + String s = atts.getQName (indices [i]); + + if (s == null || "".equals (s)) + throw new IllegalArgumentException ("no XML name"); + rawWrite (" "); + rawWrite (s); + rawWrite ("="); + writeQuotedValue (atts.getValue (indices [i]), + CTX_ATTRIBUTE); + } + } + if (isEmpty) + rawWrite (" /"); + rawWrite ('>'); + } + + /** + * <b>SAX2</b>: indicates the start of an element. + * When XHTML is in use, avoid attribute values with + * line breaks or multiple whitespace characters, since + * not all user agents handle them correctly. + */ + final public void startElement ( + String uri, + String localName, + String qName, + Attributes atts + ) throws SAXException + { + startedDoctype = false; + + if (locator == null) + locator = new LocatorImpl (); + + if (qName == null || "".equals (qName)) + throw new IllegalArgumentException ("no XML name"); + + try { + if (entityNestLevel != 0) + return; + if (prettyPrinting) { + String whitespace = null; + + if (xhtml && spacePreserve (qName)) + whitespace = "preserve"; + else if (atts != null) + whitespace = atts.getValue ("xml:space"); + if (whitespace == null) + whitespace = (String) space.peek (); + space.push (whitespace); + + if ("default".equals (whitespace)) { + if (xhtml) { + if (spaceBefore (qName)) { + newline (); + doIndent (); + } else if (indentBefore (qName)) + doIndent (); + // else it's inlined, modulo line length + // FIXME: incrementing element nest level + // for inlined elements causes ugliness + } else + doIndent (); + } + } + elementNestLevel++; + writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName)); + + if (xhtml) { +// FIXME: if this is an XHTML "pre" element, turn +// off automatic wrapping. + } + + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * Writes an empty element. + * @see #startElement + */ + public void writeEmptyElement ( + String uri, + String localName, + String qName, + Attributes atts + ) throws SAXException + { + if (canonical) { + startElement (uri, localName, qName, atts); + endElement (uri, localName, qName); + } else { + try { + writeStartTag (qName, atts, true); + } catch (IOException e) { + fatal ("can't write", e); + } + } + } + + + /** <b>SAX2</b>: indicates the end of an element */ + final public void endElement (String uri, String localName, String qName) + throws SAXException + { + if (qName == null || "".equals (qName)) + throw new IllegalArgumentException ("no XML name"); + + try { + elementNestLevel--; + if (entityNestLevel != 0) + return; + if (xhtml && isEmptyElementTag (qName)) + return; + rawWrite ("</"); + rawWrite (qName); + rawWrite ('>'); + + if (prettyPrinting) { + if (!space.empty ()) + space.pop (); + else + fatal ("stack discipline", null); + } + if (elementNestLevel == 0) + inEpilogue = true; + + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX1</b>: reports content characters */ + final public void characters (char ch [], int start, int length) + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + + try { + if (entityNestLevel != 0) + return; + if (inCDATA) { + escapeChars (ch, start, length, CTX_UNPARSED); + } else { + escapeChars (ch, start, length, CTX_CONTENT); + } + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX1</b>: reports ignorable whitespace */ + final public void ignorableWhitespace (char ch [], int start, int length) + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + + try { + if (entityNestLevel != 0) + return; + // don't forget to map NL to CRLF, CR, etc + escapeChars (ch, start, length, CTX_CONTENT); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * <b>SAX1</b>: reports a PI. + * This doesn't check for illegal target names, such as "xml" or "XML", + * or namespace-incompatible ones like "big:dog"; the caller is + * responsible for ensuring those names are legal. + */ + final public void processingInstruction (String target, String data) + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + + // don't print internal subset for XHTML + if (xhtml && startedDoctype) + return; + + // ancient HTML browsers might render these ... their loss. + // to prevent: "if (xhtml) return;". + + try { + if (entityNestLevel != 0) + return; + if (canonical && inEpilogue) + newline (); + rawWrite ("<?"); + rawWrite (target); + rawWrite (' '); + escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED); + rawWrite ("?>"); + if (elementNestLevel == 0 && !(canonical && inEpilogue)) + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX1</b>: indicates a non-expanded entity reference */ + public void skippedEntity (String name) + throws SAXException + { + try { + rawWrite ("&"); + rawWrite (name); + rawWrite (";"); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + // SAX2 LexicalHandler + + /** <b>SAX2</b>: called before parsing CDATA characters */ + final public void startCDATA () + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + + if (canonical) + return; + + try { + inCDATA = true; + if (entityNestLevel == 0) + rawWrite ("<![CDATA["); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX2</b>: called after parsing CDATA characters */ + final public void endCDATA () + throws SAXException + { + if (canonical) + return; + + try { + inCDATA = false; + if (entityNestLevel == 0) + rawWrite ("]]>"); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * <b>SAX2</b>: called when the doctype is partially parsed + * Note that this, like other doctype related calls, is ignored + * when XHTML is in use. + */ + final public void startDTD (String name, String publicId, String systemId) + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + if (xhtml) + return; + try { + inDoctype = startedDoctype = true; + if (canonical) + return; + rawWrite ("<!DOCTYPE "); + rawWrite (name); + rawWrite (' '); + + if (!expandingEntities) { + if (publicId != null) + rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' "); + else if (systemId != null) + rawWrite ("SYSTEM '" + systemId + "' "); + } + + rawWrite ('['); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX2</b>: called after the doctype is parsed */ + final public void endDTD () + throws SAXException + { + inDoctype = false; + if (canonical || xhtml) + return; + try { + rawWrite ("]>"); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * <b>SAX2</b>: called before parsing a general entity in content + */ + final public void startEntity (String name) + throws SAXException + { + try { + boolean writeEOL = true; + + // Predefined XHTML entities (for characters) will get + // mapped back later. + if (xhtml || expandingEntities) + return; + + entityNestLevel++; + if (name.equals ("[dtd]")) + return; + if (entityNestLevel != 1) + return; + if (!name.startsWith ("%")) { + writeEOL = false; + rawWrite ('&'); + } + rawWrite (name); + rawWrite (';'); + if (writeEOL) + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** + * <b>SAX2</b>: called after parsing a general entity in content + */ + final public void endEntity (String name) + throws SAXException + { + if (xhtml || expandingEntities) + return; + entityNestLevel--; + } + + /** + * <b>SAX2</b>: called when comments are parsed. + * When XHTML is used, the old HTML tradition of using comments + * to for inline CSS, or for JavaScript code is discouraged. + * This is because XML processors are encouraged to discard, on + * the grounds that comments are for users (and perhaps text + * editors) not programs. Instead, use external scripts + */ + final public void comment (char ch [], int start, int length) + throws SAXException + { + if (locator == null) + locator = new LocatorImpl (); + + // don't print internal subset for XHTML + if (xhtml && startedDoctype) + return; + // don't print comment in doctype for canon xml + if (canonical && inDoctype) + return; + + try { + boolean indent; + + if (prettyPrinting && space.empty ()) + fatal ("stack discipline", null); + indent = prettyPrinting && "default".equals (space.peek ()); + if (entityNestLevel != 0) + return; + if (indent) + doIndent (); + if (canonical && inEpilogue) + newline (); + rawWrite ("<!--"); + escapeChars (ch, start, length, CTX_UNPARSED); + rawWrite ("-->"); + if (indent) + doIndent (); + if (elementNestLevel == 0 && !(canonical && inEpilogue)) + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + // SAX1 DTDHandler + + /** <b>SAX1</b>: called on notation declarations */ + final public void notationDecl (String name, + String publicId, String systemId) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) + return; + + if (entityNestLevel != 0) + return; + rawWrite ("<!NOTATION " + name + " "); + if (publicId != null) + rawWrite ("PUBLIC \"" + publicId + '"'); + else + rawWrite ("SYSTEM "); + if (systemId != null) + rawWrite ('"' + systemId + '"'); + rawWrite (">"); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX1</b>: called on unparsed entity declarations */ + final public void unparsedEntityDecl (String name, + String publicId, String systemId, + String notationName) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) { + // FIXME: write to temporary buffer, and make the start + // of the root element write these declarations. + return; + } + + if (entityNestLevel != 0) + return; + rawWrite ("<!ENTITY " + name + " "); + if (publicId != null) + rawWrite ("PUBLIC \"" + publicId + '"'); + else + rawWrite ("SYSTEM "); + rawWrite ('"' + systemId + '"'); + rawWrite (" NDATA " + notationName + ">"); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + // SAX2 DeclHandler + + /** <b>SAX2</b>: called on attribute declarations */ + final public void attributeDecl (String eName, String aName, + String type, String mode, String value) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) + return; + if (entityNestLevel != 0) + return; + rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' '); + rawWrite (type); + rawWrite (' '); + if (mode != null) + rawWrite (mode + ' '); + if (value != null) + writeQuotedValue (value, CTX_ATTRIBUTE); + rawWrite ('>'); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX2</b>: called on element declarations */ + final public void elementDecl (String name, String model) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) + return; + if (entityNestLevel != 0) + return; + rawWrite ("<!ELEMENT " + name + ' ' + model + '>'); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX2</b>: called on external entity declarations */ + final public void externalEntityDecl ( + String name, + String publicId, + String systemId) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) + return; + if (entityNestLevel != 0) + return; + rawWrite ("<!ENTITY "); + if (name.startsWith ("%")) { + rawWrite ("% "); + rawWrite (name.substring (1)); + } else + rawWrite (name); + if (publicId != null) + rawWrite (" PUBLIC \"" + publicId + '"'); + else + rawWrite (" SYSTEM "); + rawWrite ('"' + systemId + "\">"); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + /** <b>SAX2</b>: called on internal entity declarations */ + final public void internalEntityDecl (String name, String value) + throws SAXException + { + if (xhtml) + return; + try { + // At this time, only SAX2 callbacks start these. + if (!startedDoctype) + return; + if (entityNestLevel != 0) + return; + rawWrite ("<!ENTITY "); + if (name.startsWith ("%")) { + rawWrite ("% "); + rawWrite (name.substring (1)); + } else + rawWrite (name); + rawWrite (' '); + writeQuotedValue (value, CTX_ENTITY); + rawWrite ('>'); + newline (); + } catch (IOException e) { + fatal ("can't write", e); + } + } + + private void writeQuotedValue (String value, int code) + throws SAXException, IOException + { + char buf [] = value.toCharArray (); + int off = 0, len = buf.length; + + // we can't add line breaks to attribute/entity/... values + noWrap = true; + rawWrite ('"'); + escapeChars (buf, off, len, code); + rawWrite ('"'); + noWrap = false; + } + + // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1 + // (Latin/1) characters, all codes: 160-255 (0xA0-0xFF). + // Codes 128-159 have no assigned values. + private static final String HTMLlat1x [] = { + // 160 + "nbsp", "iexcl", "cent", "pound", "curren", + "yen", "brvbar", "sect", "uml", "copy", + + // 170 + "ordf", "laquo", "not", "shy", "reg", + "macr", "deg", "plusmn", "sup2", "sup3", + + // 180 + "acute", "micro", "para", "middot", "cedil", + "sup1", "ordm", "raquo", "frac14", "frac12", + + // 190 + "frac34", "iquest", "Agrave", "Aacute", "Acirc", + "Atilde", "Auml", "Aring", "AElig", "Ccedil", + + // 200 + "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", + "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", + + // 210 + "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", + "times", "Oslash", "Ugrave", "Uacute", "Ucirc", + + // 220 + "Uuml", "Yacute", "THORN", "szlig", "agrave", + "aacute", "acirc", "atilde", "auml", "aring", + + // 230 + "aelig", "ccedil", "egrave", "eacute", "ecirc", + "euml", "igrave", "iacute", "icirc", "iuml", + + // 240 + "eth", "ntilde", "ograve", "oacute", "ocirc", + "otilde", "ouml", "divide", "oslash", "ugrave", + + // 250 + "uacute", "ucirc", "uuml", "yacute", "thorn", + "yuml" + }; + + // From "HTMLsymbolx.ent" ... some of the symbols that + // we can conveniently handle. Entities for the Greek. + // alphabet (upper and lower cases) are compact. + private static final String HTMLsymbolx_GR [] = { + // 913 + "Alpha", "Beta", "Gamma", "Delta", "Epsilon", + "Zeta", "Eta", "Theta", "Iota", "Kappa", + + // 923 + "Lambda", "Mu", "Nu", "Xi", "Omicron", + "Pi", "Rho", null, "Sigma", "Tau", + + // 933 + "Upsilon", "Phi", "Chi", "Psi", "Omega" + }; + + private static final String HTMLsymbolx_gr [] = { + // 945 + "alpha", "beta", "gamma", "delta", "epsilon", + "zeta", "eta", "theta", "iota", "kappa", + + // 955 + "lambda", "mu", "nu", "xi", "omicron", + "pi", "rho", "sigmaf", "sigma", "tau", + + // 965 + "upsilon", "phi", "chi", "psi", "omega" + }; + + + // General routine to write text and substitute predefined + // entities (XML, and a special case for XHTML) as needed. + private void escapeChars (char buf [], int off, int len, int code) + throws SAXException, IOException + { + int first = 0; + + if (off < 0) { + off = 0; + len = buf.length; + } + for (int i = 0; i < len; i++) { + String esc; + char c = buf [off + i]; + + switch (c) { + // Note that CTX_ATTRIBUTE isn't explicitly tested here; + // all syntax delimiters are escaped in CTX_ATTRIBUTE, + // otherwise it's similar to CTX_CONTENT + + // ampersand flags entity references; entity replacement + // text has unexpanded references, other text doesn't. + case '&': + if (code == CTX_ENTITY || code == CTX_UNPARSED) + continue; + esc = "amp"; + break; + + // attributes and text may NOT have literal '<', but + // entities may have markup constructs + case '<': + if (code == CTX_ENTITY || code == CTX_UNPARSED) + continue; + esc = "lt"; + break; + + // as above re markup constructs; but otherwise + // except when canonicalizing, this is for consistency + case '>': + if (code == CTX_ENTITY || code == CTX_UNPARSED) + continue; + esc = "gt"; + break; + case '\'': + if (code == CTX_CONTENT || code == CTX_UNPARSED) + continue; + if (canonical) + continue; + esc = "apos"; + break; + + // needed when printing quoted attribute/entity values + case '"': + if (code == CTX_CONTENT || code == CTX_UNPARSED) + continue; + esc = "quot"; + break; + + // make line ends work per host OS convention + case '\n': + esc = eol; + break; + + // + // No other characters NEED special treatment ... except + // for encoding-specific issues, like whether the character + // can really be represented in that encoding. + // + default: + // + // There are characters we can never write safely; getting + // them is an error. + // + // (a) They're never legal in XML ... detected by range + // checks, and (eventually) by remerging surrogate + // pairs on output. (Easy error for apps to prevent.) + // + // (b) This encoding can't represent them, and we + // can't make reference substitution (e.g. inside + // CDATA sections, names, PI data, etc). (Hard for + // apps to prevent, except by using UTF-8 or UTF-16 + // as their output encoding.) + // + // We know a very little bit about what characters + // the US-ASCII and ISO-8859-1 encodings support. For + // other encodings we can't detect the second type of + // error at all. (Never an issue for UTF-8 or UTF-16.) + // + +// FIXME: CR in CDATA is an error; in text, turn to a char ref + +// FIXME: CR/LF/TAB in attributes should become char refs + + if ((c > 0xfffd) + || ((c < 0x0020) && !((c == 0x0009) + || (c == 0x000A) || (c == 0x000D))) + || (((c & dangerMask) != 0) + && (code == CTX_UNPARSED))) { + + // if case (b) in CDATA, we might end the section, + // write a reference, then restart ... possible + // in one DOM L3 draft. + + throw new CharConversionException ( + "Illegal or non-writable character: U+" + + Integer.toHexString (c)); + } + + // + // If the output encoding represents the character + // directly, let it do so! Else we'll escape it. + // + if ((c & dangerMask) == 0) + continue; + esc = null; + + // Avoid numeric refs where symbolic ones exist, as + // symbolic ones make more sense to humans reading! + if (xhtml) { + // all the HTMLlat1x.ent entities + // (all the "ISO-8859-1" characters) + if (c >= 160 && c <= 255) + esc = HTMLlat1x [c - 160]; + + // not quite half the HTMLsymbolx.ent entities + else if (c >= 913 && c <= 937) + esc = HTMLsymbolx_GR [c - 913]; + else if (c >= 945 && c <= 969) + esc = HTMLsymbolx_gr [c - 945]; + + else switch (c) { + // all of the HTMLspecialx.ent entities + case 338: esc = "OElig"; break; + case 339: esc = "oelig"; break; + case 352: esc = "Scaron"; break; + case 353: esc = "scaron"; break; + case 376: esc = "Yuml"; break; + case 710: esc = "circ"; break; + case 732: esc = "tilde"; break; + case 8194: esc = "ensp"; break; + case 8195: esc = "emsp"; break; + case 8201: esc = "thinsp"; break; + case 8204: esc = "zwnj"; break; + case 8205: esc = "zwj"; break; + case 8206: esc = "lrm"; break; + case 8207: esc = "rlm"; break; + case 8211: esc = "ndash"; break; + case 8212: esc = "mdash"; break; + case 8216: esc = "lsquo"; break; + case 8217: esc = "rsquo"; break; + case 8218: esc = "sbquo"; break; + case 8220: esc = "ldquo"; break; + case 8221: esc = "rdquo"; break; + case 8222: esc = "bdquo"; break; + case 8224: esc = "dagger"; break; + case 8225: esc = "Dagger"; break; + case 8240: esc = "permil"; break; + case 8249: esc = "lsaquo"; break; + case 8250: esc = "rsaquo"; break; + case 8364: esc = "euro"; break; + + // the other HTMLsymbox.ent entities + case 402: esc = "fnof"; break; + case 977: esc = "thetasym"; break; + case 978: esc = "upsih"; break; + case 982: esc = "piv"; break; + case 8226: esc = "bull"; break; + case 8230: esc = "hellip"; break; + case 8242: esc = "prime"; break; + case 8243: esc = "Prime"; break; + case 8254: esc = "oline"; break; + case 8260: esc = "frasl"; break; + case 8472: esc = "weierp"; break; + case 8465: esc = "image"; break; + case 8476: esc = "real"; break; + case 8482: esc = "trade"; break; + case 8501: esc = "alefsym"; break; + case 8592: esc = "larr"; break; + case 8593: esc = "uarr"; break; + case 8594: esc = "rarr"; break; + case 8595: esc = "darr"; break; + case 8596: esc = "harr"; break; + case 8629: esc = "crarr"; break; + case 8656: esc = "lArr"; break; + case 8657: esc = "uArr"; break; + case 8658: esc = "rArr"; break; + case 8659: esc = "dArr"; break; + case 8660: esc = "hArr"; break; + case 8704: esc = "forall"; break; + case 8706: esc = "part"; break; + case 8707: esc = "exist"; break; + case 8709: esc = "empty"; break; + case 8711: esc = "nabla"; break; + case 8712: esc = "isin"; break; + case 8713: esc = "notin"; break; + case 8715: esc = "ni"; break; + case 8719: esc = "prod"; break; + case 8721: esc = "sum"; break; + case 8722: esc = "minus"; break; + case 8727: esc = "lowast"; break; + case 8730: esc = "radic"; break; + case 8733: esc = "prop"; break; + case 8734: esc = "infin"; break; + case 8736: esc = "ang"; break; + case 8743: esc = "and"; break; + case 8744: esc = "or"; break; + case 8745: esc = "cap"; break; + case 8746: esc = "cup"; break; + case 8747: esc = "int"; break; + case 8756: esc = "there4"; break; + case 8764: esc = "sim"; break; + case 8773: esc = "cong"; break; + case 8776: esc = "asymp"; break; + case 8800: esc = "ne"; break; + case 8801: esc = "equiv"; break; + case 8804: esc = "le"; break; + case 8805: esc = "ge"; break; + case 8834: esc = "sub"; break; + case 8835: esc = "sup"; break; + case 8836: esc = "nsub"; break; + case 8838: esc = "sube"; break; + case 8839: esc = "supe"; break; + case 8853: esc = "oplus"; break; + case 8855: esc = "otimes"; break; + case 8869: esc = "perp"; break; + case 8901: esc = "sdot"; break; + case 8968: esc = "lceil"; break; + case 8969: esc = "rceil"; break; + case 8970: esc = "lfloor"; break; + case 8971: esc = "rfloor"; break; + case 9001: esc = "lang"; break; + case 9002: esc = "rang"; break; + case 9674: esc = "loz"; break; + case 9824: esc = "spades"; break; + case 9827: esc = "clubs"; break; + case 9829: esc = "hearts"; break; + case 9830: esc = "diams"; break; + } + } + + // else escape with numeric char refs + if (esc == null) { + stringBuf.setLength (0); + stringBuf.append ("#x"); + stringBuf.append (Integer.toHexString (c).toUpperCase ()); + esc = stringBuf.toString (); + + // FIXME: We don't write surrogate pairs correctly. + // They should work as one ref per character, since + // each pair is one character. For reading back into + // Unicode, it matters beginning in Unicode 3.1 ... + } + break; + } + if (i != first) + rawWrite (buf, off + first, i - first); + first = i + 1; + if (esc == eol) + newline (); + else { + rawWrite ('&'); + rawWrite (esc); + rawWrite (';'); + } + } + if (first < len) + rawWrite (buf, off + first, len - first); + } + + + + private void newline () + throws SAXException, IOException + { + out.write (eol); + column = 0; + } + + private void doIndent () + throws SAXException, IOException + { + int space = elementNestLevel * 2; + + newline (); + column = space; + // track tabs only at line starts + while (space > 8) { + out.write ("\t"); + space -= 8; + } + while (space > 0) { + out.write (" "); + space -= 2; + } + } + + private void rawWrite (char c) + throws IOException + { + out.write (c); + column++; + } + + private void rawWrite (String s) + throws SAXException, IOException + { + if (prettyPrinting && "default".equals (space.peek ())) { + char data [] = s.toCharArray (); + rawWrite (data, 0, data.length); + } else { + out.write (s); + column += s.length (); + } + } + + // NOTE: if xhtml, the REC gives some rules about whitespace + // which we could follow ... notably, many places where conformant + // agents "must" consolidate/normalize whitespace. Line ends can + // be removed there, etc. This may not be the right place to do + // such mappings though. + + // Line buffering may help clarify algorithms and improve results. + + // It's likely xml:space needs more attention. + + private void rawWrite (char buf [], int offset, int length) + throws SAXException, IOException + { + boolean wrap; + + if (prettyPrinting && space.empty ()) + fatal ("stack discipline", null); + + wrap = prettyPrinting && "default".equals (space.peek ()); + if (!wrap) { + out.write (buf, offset, length); + column += length; + return; + } + + // we're pretty printing and want to fill lines out only + // to the desired line length. + while (length > 0) { + int target = lineLength - column; + boolean wrote = false; + + // Do we even have a problem? + if (target > length || noWrap) { + out.write (buf, offset, length); + column += length; + return; + } + + // break the line at a space character, trying to fill + // as much of the line as possible. + char c; + + for (int i = target - 1; i >= 0; i--) { + if ((c = buf [offset + i]) == ' ' || c == '\t') { + i++; + out.write (buf, offset, i); + doIndent (); + offset += i; + length -= i; + wrote = true; + break; + } + } + if (wrote) + continue; + + // no space character permitting break before target + // line length is filled. So, take the next one. + if (target < 0) + target = 0; + for (int i = target; i < length; i++) + if ((c = buf [offset + i]) == ' ' || c == '\t') { + i++; + out.write (buf, offset, i); + doIndent (); + offset += i; + length -= i; + wrote = true; + break; + } + if (wrote) + continue; + + // no such luck. + out.write (buf, offset, length); + column += length; + break; + } + } +} diff --git a/libjava/gnu/xml/util/package.html b/libjava/gnu/xml/util/package.html new file mode 100644 index 0000000..6e6c0d7 --- /dev/null +++ b/libjava/gnu/xml/util/package.html @@ -0,0 +1,20 @@ +<!DOCTYPE html PUBLIC + "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/1999/PR-xhtml1-19991210/DTD/xhtml1-transitional.dtd"> + +<html><head><title> org.brownell.xml package </title> </head> +<!-- +/* + * Copyright (C) 1999,2000 The Free Software Foundation, Inc. + */ +--> +<body> + <p> This package contains XML utilities, including SAX2 XML writers + and a parser of DOM trees, plus a command line driver. + That <a href="DoParse.html">driver</a> + connects parsers simple processing pipelines. + It can be handy for command line validation or + transformation tasks, possibly in batch mode, + or within Makefiles. </p> + +</body></html> |