aboutsummaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/xml/aelfred2
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/xml/aelfred2')
-rw-r--r--libjava/classpath/gnu/xml/aelfred2/JAXPFactory.java230
-rw-r--r--libjava/classpath/gnu/xml/aelfred2/SAXDriver.java1609
-rw-r--r--libjava/classpath/gnu/xml/aelfred2/XmlParser.java5831
-rw-r--r--libjava/classpath/gnu/xml/aelfred2/XmlReader.java373
-rw-r--r--libjava/classpath/gnu/xml/aelfred2/package.html506
5 files changed, 0 insertions, 8549 deletions
diff --git a/libjava/classpath/gnu/xml/aelfred2/JAXPFactory.java b/libjava/classpath/gnu/xml/aelfred2/JAXPFactory.java
deleted file mode 100644
index 0944427..0000000
--- a/libjava/classpath/gnu/xml/aelfred2/JAXPFactory.java
+++ /dev/null
@@ -1,230 +0,0 @@
-/* JAXPFactory.java --
- Copyright (C) 2001 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version. */
-
-package gnu.xml.aelfred2;
-
-import java.util.Enumeration;
-import java.util.Hashtable;
-
-import org.xml.sax.Parser;
-import org.xml.sax.XMLReader;
-import org.xml.sax.SAXException;
-import org.xml.sax.SAXNotRecognizedException;
-import org.xml.sax.SAXNotSupportedException;
-import org.xml.sax.helpers.XMLReaderAdapter;
-
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-
-
-/**
- * Configurable factory to create an Ælfred2 JAXP parser; required
- * to bootstrap using JAXP. You should use SAX2 directly where possible,
- * rather than through JAXP, since that gives you better control.
- * This class would normally be configured as a platform default factory.
- *
- * @author David Brownell
- */
-public final class JAXPFactory
- extends SAXParserFactory
-{
-
- private Hashtable flags = new Hashtable();
-
- /**
- * Constructs a factory which normally returns a non-validating
- * parser.
- */
- public JAXPFactory()
- {
- }
-
- public SAXParser newSAXParser()
- throws ParserConfigurationException, SAXException
- {
- JaxpParser jaxp = new JaxpParser();
- Enumeration e = flags.keys();
- XMLReader parser = jaxp.getXMLReader();
-
- parser.setFeature(SAXDriver.FEATURE + "namespaces",
- isNamespaceAware());
- parser.setFeature(SAXDriver.FEATURE + "validation",
- isValidating());
- // that makes SAX2 feature flags trump JAXP
-
- while (e.hasMoreElements())
- {
- String uri = (String) e.nextElement();
- Boolean value = (Boolean) flags.get(uri);
- parser.setFeature(uri, value.booleanValue());
- }
-
- return jaxp;
- }
-
- // yes, this "feature transfer" mechanism doesn't play well
-
- public void setFeature(String name, boolean value)
- throws ParserConfigurationException, SAXNotRecognizedException,
- SAXNotSupportedException
- {
- try
- {
- // force "early" detection of errors where possible
- // (flags can't necessarily be set before parsing)
- new JaxpParser().getXMLReader().setFeature(name, value);
-
- flags.put(name, Boolean.valueOf(value));
- }
- catch (SAXNotRecognizedException e)
- {
- throw new SAXNotRecognizedException(name);
- }
- catch (SAXNotSupportedException e)
- {
- throw new SAXNotSupportedException(name);
- }
- catch (Exception e)
- {
- throw new ParserConfigurationException(e.getClass().getName()
- + ": "
- + e.getMessage());
- }
- }
-
- public boolean getFeature(String name)
- throws ParserConfigurationException, SAXNotRecognizedException,
- SAXNotSupportedException
- {
- Boolean value = (Boolean) flags.get(name);
-
- if (value != null)
- {
- return value.booleanValue();
- }
- else
- {
- try
- {
- return new JaxpParser().getXMLReader().getFeature(name);
- }
- catch (SAXNotRecognizedException e)
- {
- throw new SAXNotRecognizedException(name);
- }
- catch (SAXNotSupportedException e)
- {
- throw new SAXNotSupportedException(name);
- }
- catch (SAXException e)
- {
- throw new ParserConfigurationException(e.getClass().getName()
- + ": "
- + e.getMessage());
- }
- }
- }
-
- private static class JaxpParser
- extends SAXParser
- {
-
- private XmlReader ae2 = new XmlReader();
- private XMLReaderAdapter parser = null;
-
- JaxpParser()
- {
- }
-
- public void setProperty(String id, Object value)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- ae2.setProperty(id, value);
- }
-
- public Object getProperty(String id)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- return ae2.getProperty(id);
- }
-
- public Parser getParser()
- throws SAXException
- {
- if (parser == null)
- {
- parser = new XMLReaderAdapter(ae2);
- }
- return parser;
- }
-
- public XMLReader getXMLReader ()
- throws SAXException
- {
- return ae2;
- }
-
- public boolean isNamespaceAware()
- {
- try
- {
- return ae2.getFeature(SAXDriver.FEATURE + "namespaces");
- }
- catch (Exception e)
- {
- throw new Error();
- }
- }
-
- public boolean isValidating()
- {
- try
- {
- return ae2.getFeature(SAXDriver.FEATURE + "validation");
- }
- catch (Exception e)
- {
- throw new Error();
- }
- }
-
- // TODO isXIncludeAware()
-
- }
-
-}
diff --git a/libjava/classpath/gnu/xml/aelfred2/SAXDriver.java b/libjava/classpath/gnu/xml/aelfred2/SAXDriver.java
deleted file mode 100644
index 6ce1470..0000000
--- a/libjava/classpath/gnu/xml/aelfred2/SAXDriver.java
+++ /dev/null
@@ -1,1609 +0,0 @@
-/* SAXDriver.java --
- Copyright (C) 1999,2000,2001,2004 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version.
-
-Portions derived from code which carried the following notice:
-
- Copyright (c) 1997, 1998 by Microstar Software Ltd.
-
- AElfred is free for both commercial and non-commercial use and
- redistribution, provided that Microstar's copyright and disclaimer are
- retained intact. You are free to modify AElfred for your own use and
- to redistribute AElfred with your modifications, provided that the
- modifications are clearly documented.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- merchantability or fitness for a particular purpose. Please use it AT
- YOUR OWN RISK.
-*/
-
-package gnu.xml.aelfred2;
-
-import java.io.*;
-
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.util.Locale;
-import java.util.Stack;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Enumeration;
-import java.util.Iterator;
-import java.util.List;
-
-import org.xml.sax.*;
-import org.xml.sax.ext.*;
-import org.xml.sax.helpers.NamespaceSupport;
-
-
-/**
- * An enhanced SAX2 version of Microstar's Ælfred XML parser.
- * The enhancements primarily relate to significant improvements in
- * conformance to the XML specification, and SAX2 support. Performance
- * has been improved. See the package level documentation for more
- * information.
- *
- * <table border="1" width='100%' cellpadding='3' cellspacing='0'>
- * <tr bgcolor='#ccccff'>
- * <th><font size='+1'>Name</font></th>
- * <th><font size='+1'>Notes</font></th></tr>
- *
- * <tr><td colspan=2><center><em>Features ... URL prefix is
- * <b>http://xml.org/sax/features/</b></em></center></td></tr>
- *
- * <tr><td>(URL)/external-general-entities</td>
- * <td>Value defaults to <em>true</em></td></tr>
- * <tr><td>(URL)/external-parameter-entities</td>
- * <td>Value defaults to <em>true</em></td></tr>
- * <tr><td>(URL)/is-standalone</td>
- * <td>(PRELIMINARY) Returns true iff the document's parsing
- * has started (some non-error event after <em>startDocument()</em>
- * was reported) and the document's standalone flag is set.</td></tr>
- * <tr><td>(URL)/namespace-prefixes</td>
- * <td>Value defaults to <em>false</em> (but XML 1.0 names are
- * always reported)</td></tr>
- * <tr><td>(URL)/lexical-handler/parameter-entities</td>
- * <td>Value is fixed at <em>true</em></td></tr>
- * <tr><td>(URL)/namespaces</td>
- * <td>Value defaults to <em>true</em></td></tr>
- * <tr><td>(URL)/resolve-dtd-uris</td>
- * <td>(PRELIMINARY) Value defaults to <em>true</em></td></tr>
- * <tr><td>(URL)/string-interning</td>
- * <td>Value is fixed at <em>true</em></td></tr>
- * <tr><td>(URL)/use-attributes2</td>
- * <td>(PRELIMINARY) Value is fixed at <em>true</em></td></tr>
- * <tr><td>(URL)/use-entity-resolver2</td>
- * <td>(PRELIMINARY) Value defaults to <em>true</em></td></tr>
- * <tr><td>(URL)/validation</td>
- * <td>Value is fixed at <em>false</em></td></tr>
- *
- * <tr><td colspan=2><center><em>Handler Properties ... URL prefix is
- * <b>http://xml.org/sax/properties/</b></em></center></td></tr>
- *
- * <tr><td>(URL)/declaration-handler</td>
- * <td>A declaration handler may be provided. </td></tr>
- * <tr><td>(URL)/lexical-handler</td>
- * <td>A lexical handler may be provided. </td></tr>
- * </table>
- *
- * <p>This parser currently implements the SAX1 Parser API, but
- * it may not continue to do so in the future.
- *
- * @author Written by David Megginson (version 1.2a from Microstar)
- * @author Updated by David Brownell &lt;dbrownell@users.sourceforge.net&gt;
- * @see org.xml.sax.Parser
- */
-final public class SAXDriver
- implements Locator, Attributes2, XMLReader, Parser, AttributeList
-{
-
- private final DefaultHandler2 base = new DefaultHandler2();
- private XmlParser parser;
-
- private EntityResolver entityResolver = base;
- private EntityResolver2 resolver2 = null;
- private ContentHandler contentHandler = base;
- private DTDHandler dtdHandler = base;
- private ErrorHandler errorHandler = base;
- private DeclHandler declHandler = base;
- private LexicalHandler lexicalHandler = base;
-
- private String elementName;
- private Stack entityStack;
-
- // one vector (of object/struct): faster, smaller
- private List attributesList;
-
- private boolean namespaces = true;
- private boolean xmlNames = false;
- private boolean extGE = true;
- private boolean extPE = true;
- private boolean resolveAll = true;
- private boolean useResolver2 = true;
-
- // package private to allow (read-only) access in XmlParser
- boolean stringInterning = true;
-
- private int attributeCount;
- private boolean attributes;
- private String[] nsTemp;
- private NamespaceSupport prefixStack;
-
- //
- // Constructor.
- //
-
- /**
- * Constructs a SAX Parser.
- */
- public SAXDriver()
- {
- reset();
- }
-
- private void reset()
- {
- elementName = null;
- entityStack = new Stack();
- attributesList = Collections.synchronizedList(new ArrayList());
- attributeCount = 0;
- attributes = false;
- nsTemp = new String[3];
- prefixStack = null;
- }
-
-
- //
- // Implementation of org.xml.sax.Parser.
- //
-
- /**
- * <b>SAX1</b>: Sets the locale used for diagnostics; currently,
- * only locales using the English language are supported.
- * @param locale The locale for which diagnostics will be generated
- */
- public void setLocale(Locale locale)
- throws SAXException
- {
- if ("en".equals(locale.getLanguage()))
- {
- return;
- }
- throw new SAXException ("AElfred2 only supports English locales.");
- }
-
- /**
- * <b>SAX2</b>: Returns the object used when resolving external
- * entities during parsing (both general and parameter entities).
- */
- public EntityResolver getEntityResolver()
- {
- return (entityResolver == base) ? null : entityResolver;
- }
-
- /**
- * <b>SAX1, SAX2</b>: Set the entity resolver for this parser.
- * @param handler The object to receive entity events.
- */
- public void setEntityResolver(EntityResolver resolver)
- {
- if (resolver instanceof EntityResolver2)
- {
- resolver2 = (EntityResolver2) resolver;
- }
- else
- {
- resolver2 = null;
- }
- if (resolver == null)
- {
- resolver = base;
- }
- entityResolver = resolver;
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to process declarations related
- * to notations and unparsed entities.
- */
- public DTDHandler getDTDHandler()
- {
- return (dtdHandler == base) ? null : dtdHandler;
- }
-
- /**
- * <b>SAX1, SAX2</b>: Set the DTD handler for this parser.
- * @param handler The object to receive DTD events.
- */
- public void setDTDHandler(DTDHandler handler)
- {
- if (handler == null)
- {
- handler = base;
- }
- this.dtdHandler = handler;
- }
-
-
- /**
- * <b>SAX1</b>: Set the document handler for this parser. If a
- * content handler was set, this document handler will supplant it.
- * The parser is set to report all XML 1.0 names rather than to
- * filter out "xmlns" attributes (the "namespace-prefixes" feature
- * is set to true).
- *
- * @deprecated SAX2 programs should use the XMLReader interface
- * and a ContentHandler.
- *
- * @param handler The object to receive document events.
- */
- public void setDocumentHandler(DocumentHandler handler)
- {
- contentHandler = new Adapter(handler);
- xmlNames = true;
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to report the logical
- * content of an XML document.
- */
- public ContentHandler getContentHandler()
- {
- return (contentHandler == base) ? null : contentHandler;
- }
-
- /**
- * <b>SAX2</b>: Assigns the object used to report the logical
- * content of an XML document. If a document handler was set,
- * this content handler will supplant it (but XML 1.0 style name
- * reporting may remain enabled).
- */
- public void setContentHandler(ContentHandler handler)
- {
- if (handler == null)
- {
- handler = base;
- }
- contentHandler = handler;
- }
-
- /**
- * <b>SAX1, SAX2</b>: Set the error handler for this parser.
- * @param handler The object to receive error events.
- */
- public void setErrorHandler(ErrorHandler handler)
- {
- if (handler == null)
- {
- handler = base;
- }
- this.errorHandler = handler;
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to receive callbacks for XML
- * errors of all levels (fatal, nonfatal, warning); this is never null;
- */
- public ErrorHandler getErrorHandler()
- {
- return (errorHandler == base) ? null : errorHandler;
- }
-
- /**
- * <b>SAX1, SAX2</b>: Auxiliary API to parse an XML document, used mostly
- * when no URI is available.
- * If you want anything useful to happen, you should set
- * at least one type of handler.
- * @param source The XML input source. Don't set 'encoding' unless
- * you know for a fact that it's correct.
- * @see #setEntityResolver
- * @see #setDTDHandler
- * @see #setContentHandler
- * @see #setErrorHandler
- * @exception SAXException The handlers may throw any SAXException,
- * and the parser normally throws SAXParseException objects.
- * @exception IOException IOExceptions are normally through through
- * the parser if there are problems reading the source document.
- */
- public void parse(InputSource source)
- throws SAXException, IOException
- {
- synchronized (base)
- {
- parser = new XmlParser();
- if (namespaces)
- {
- prefixStack = new NamespaceSupport();
- }
- else if (!xmlNames)
- {
- throw new IllegalStateException();
- }
- parser.setHandler(this);
-
- try
- {
- Reader r = source.getCharacterStream();
- InputStream in = source.getByteStream();
-
- parser.doParse(source.getSystemId(),
- source.getPublicId(),
- r,
- in,
- source.getEncoding());
- }
- catch (SAXException e)
- {
- throw e;
- }
- catch (IOException e)
- {
- throw e;
- }
- catch (RuntimeException e)
- {
- throw e;
- }
- catch (Exception e)
- {
- throw new SAXParseException(e.getMessage(), this, e);
- }
- finally
- {
- contentHandler.endDocument();
- reset();
- }
- }
- }
-
- /**
- * <b>SAX1, SAX2</b>: Preferred API to parse an XML document, using a
- * system identifier (URI).
- */
- public void parse(String systemId)
- throws SAXException, IOException
- {
- parse(new InputSource(systemId));
- }
-
- //
- // Implementation of SAX2 "XMLReader" interface
- //
- static final String FEATURE = "http://xml.org/sax/features/";
- static final String PROPERTY = "http://xml.org/sax/properties/";
-
- /**
- * <b>SAX2</b>: Tells the value of the specified feature flag.
- *
- * @exception SAXNotRecognizedException thrown if the feature flag
- * is neither built in, nor yet assigned.
- */
- public boolean getFeature(String featureId)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- if ((FEATURE + "validation").equals(featureId))
- {
- return false;
- }
-
- // external entities (both types) are optionally included
- if ((FEATURE + "external-general-entities").equals(featureId))
- {
- return extGE;
- }
- if ((FEATURE + "external-parameter-entities").equals(featureId))
- {
- return extPE;
- }
-
- // element/attribute names are as written in document; no mangling
- if ((FEATURE + "namespace-prefixes").equals(featureId))
- {
- return xmlNames;
- }
-
- // report element/attribute namespaces?
- if ((FEATURE + "namespaces").equals(featureId))
- {
- return namespaces;
- }
-
- // all PEs and GEs are reported
- if ((FEATURE + "lexical-handler/parameter-entities").equals(featureId))
- {
- return true;
- }
-
- // default is true
- if ((FEATURE + "string-interning").equals(featureId))
- {
- return stringInterning;
- }
-
- // EXTENSIONS 1.1
-
- // always returns isSpecified info
- if ((FEATURE + "use-attributes2").equals(featureId))
- {
- return true;
- }
-
- // meaningful between startDocument/endDocument
- if ((FEATURE + "is-standalone").equals(featureId))
- {
- if (parser == null)
- {
- throw new SAXNotSupportedException(featureId);
- }
- return parser.isStandalone();
- }
-
- // optionally don't absolutize URIs in declarations
- if ((FEATURE + "resolve-dtd-uris").equals(featureId))
- {
- return resolveAll;
- }
-
- // optionally use resolver2 interface methods, if possible
- if ((FEATURE + "use-entity-resolver2").equals(featureId))
- {
- return useResolver2;
- }
-
- throw new SAXNotRecognizedException(featureId);
- }
-
- // package private
- DeclHandler getDeclHandler()
- {
- return declHandler;
- }
-
- // package private
- boolean resolveURIs()
- {
- return resolveAll;
- }
-
- /**
- * <b>SAX2</b>: Returns the specified property.
- *
- * @exception SAXNotRecognizedException thrown if the property value
- * is neither built in, nor yet stored.
- */
- public Object getProperty(String propertyId)
- throws SAXNotRecognizedException
- {
- if ((PROPERTY + "declaration-handler").equals(propertyId))
- {
- return (declHandler == base) ? null : declHandler;
- }
-
- if ((PROPERTY + "lexical-handler").equals(propertyId))
- {
- return (lexicalHandler == base) ? null : lexicalHandler;
- }
-
- // unknown properties
- throw new SAXNotRecognizedException(propertyId);
- }
-
- /**
- * <b>SAX2</b>: Sets the state of feature flags in this parser. Some
- * built-in feature flags are mutable.
- */
- public void setFeature(String featureId, boolean value)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- boolean state;
-
- // Features with a defined value, we just change it if we can.
- state = getFeature (featureId);
-
- if (state == value)
- {
- return;
- }
- if (parser != null)
- {
- throw new SAXNotSupportedException("not while parsing");
- }
-
- if ((FEATURE + "namespace-prefixes").equals(featureId))
- {
- // in this implementation, this only affects xmlns reporting
- xmlNames = value;
- // forcibly prevent illegal parser state
- if (!xmlNames)
- {
- namespaces = true;
- }
- return;
- }
-
- if ((FEATURE + "namespaces").equals(featureId))
- {
- namespaces = value;
- // forcibly prevent illegal parser state
- if (!namespaces)
- {
- xmlNames = true;
- }
- return;
- }
-
- if ((FEATURE + "external-general-entities").equals(featureId))
- {
- extGE = value;
- return;
- }
- if ((FEATURE + "external-parameter-entities").equals(featureId))
- {
- extPE = value;
- return;
- }
- if ((FEATURE + "resolve-dtd-uris").equals(featureId))
- {
- resolveAll = value;
- return;
- }
-
- if ((FEATURE + "use-entity-resolver2").equals(featureId))
- {
- useResolver2 = value;
- return;
- }
-
- throw new SAXNotRecognizedException(featureId);
- }
-
- /**
- * <b>SAX2</b>: Assigns the specified property. Like SAX1 handlers,
- * these may be changed at any time.
- */
- public void setProperty(String propertyId, Object value)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- // see if the property is recognized
- getProperty(propertyId);
-
- // Properties with a defined value, we just change it if we can.
-
- if ((PROPERTY + "declaration-handler").equals(propertyId))
- {
- if (value == null)
- {
- declHandler = base;
- }
- else if (!(value instanceof DeclHandler))
- {
- throw new SAXNotSupportedException(propertyId);
- }
- else
- {
- declHandler = (DeclHandler) value;
- }
- return ;
- }
-
- if ((PROPERTY + "lexical-handler").equals(propertyId))
- {
- if (value == null)
- {
- lexicalHandler = base;
- }
- else if (!(value instanceof LexicalHandler))
- {
- throw new SAXNotSupportedException(propertyId);
- }
- else
- {
- lexicalHandler = (LexicalHandler) value;
- }
- return;
- }
-
- throw new SAXNotSupportedException(propertyId);
- }
-
- //
- // This is where the driver receives XmlParser callbacks and translates
- // them into SAX callbacks. Some more callbacks have been added for
- // SAX2 support.
- //
-
- void startDocument()
- throws SAXException
- {
- contentHandler.setDocumentLocator(this);
- contentHandler.startDocument();
- attributesList.clear();
- }
-
- void skippedEntity(String name)
- throws SAXException
- {
- contentHandler.skippedEntity(name);
- }
-
- InputSource getExternalSubset(String name, String baseURI)
- throws SAXException, IOException
- {
- if (resolver2 == null || !useResolver2 || !extPE)
- {
- return null;
- }
- return resolver2.getExternalSubset(name, baseURI);
- }
-
- InputSource resolveEntity(boolean isPE, String name,
- InputSource in, String baseURI)
- throws SAXException, IOException
- {
- InputSource source;
-
- // external entities might be skipped
- if (isPE && !extPE)
- {
- return null;
- }
- if (!isPE && !extGE)
- {
- return null;
- }
-
- // ... or not
- lexicalHandler.startEntity(name);
- if (resolver2 != null && useResolver2)
- {
- source = resolver2.resolveEntity(name, in.getPublicId(),
- baseURI, in.getSystemId());
- if (source == null)
- {
- in.setSystemId(absolutize(baseURI,
- in.getSystemId(), false));
- source = in;
- }
- }
- else
- {
- in.setSystemId(absolutize(baseURI,
- in.getSystemId(),
- entityResolver != base));
- source = entityResolver.resolveEntity(in.getPublicId(),
- in.getSystemId());
- if (source == null)
- {
- source = in;
- }
- }
- startExternalEntity(name, source.getSystemId(), true);
- return source;
- }
-
- // absolutize a system ID relative to the specified base URI
- // (temporarily) package-visible for external entity decls
- String absolutize(String baseURI, String systemId, boolean nice)
- throws MalformedURLException, SAXException
- {
- // FIXME normalize system IDs -- when?
- // - Convert to UTF-8
- // - Map reserved and non-ASCII characters to %HH
-
- try
- {
- if (baseURI == null)
- {
- if (XmlParser.uriWarnings)
- {
- warn ("No base URI; hope this SYSTEM id is absolute: "
- + systemId);
- }
- return new URL(systemId).toString();
- }
- else
- {
- return new URL(new URL(baseURI), systemId).toString();
- }
- }
- catch (MalformedURLException e)
- {
- // Let unknown URI schemes pass through unless we need
- // the JVM to map them to i/o streams for us...
- if (!nice)
- {
- throw e;
- }
-
- // sometimes sysids for notations or unparsed entities
- // aren't really URIs...
- warn("Can't absolutize SYSTEM id: " + e.getMessage());
- return systemId;
- }
- }
-
- void startExternalEntity(String name, String systemId, boolean stackOnly)
- throws SAXException
- {
- // The following warning was deleted because the application has the
- // option of not setting systemId. Sun's JAXP or Xerces seems to
- // ignore this case.
- /*
- if (systemId == null)
- warn ("URI was not reported to parser for entity " + name);
- */
- if (!stackOnly) // spliced [dtd] needs startEntity
- {
- lexicalHandler.startEntity(name);
- }
- entityStack.push(systemId);
- }
-
- void endExternalEntity(String name)
- throws SAXException
- {
- if (!"[document]".equals(name))
- {
- lexicalHandler.endEntity(name);
- }
- entityStack.pop();
- }
-
- void startInternalEntity(String name)
- throws SAXException
- {
- lexicalHandler.startEntity(name);
- }
-
- void endInternalEntity(String name)
- throws SAXException
- {
- lexicalHandler.endEntity(name);
- }
-
- void doctypeDecl(String name, String publicId, String systemId)
- throws SAXException
- {
- lexicalHandler.startDTD(name, publicId, systemId);
-
- // ... the "name" is a declaration and should be given
- // to the DeclHandler (but sax2 doesn't).
-
- // the IDs for the external subset are lexical details,
- // as are the contents of the internal subset; but sax2
- // doesn't provide the internal subset "pre-parse"
- }
-
- void notationDecl(String name, String publicId, String systemId,
- String baseUri)
- throws SAXException
- {
- try
- {
- dtdHandler.notationDecl(name, publicId,
- (resolveAll && systemId != null)
- ? absolutize(baseUri, systemId, true)
- : systemId);
- }
- catch (IOException e)
- {
- // "can't happen"
- throw new SAXParseException(e.getMessage(), this, e);
- }
- }
-
- void unparsedEntityDecl(String name, String publicId, String systemId,
- String baseUri, String notation)
- throws SAXException
- {
- try
- {
- dtdHandler.unparsedEntityDecl(name, publicId,
- resolveAll
- ? absolutize(baseUri, systemId, true)
- : systemId,
- notation);
- }
- catch (IOException e)
- {
- // "can't happen"
- throw new SAXParseException(e.getMessage(), this, e);
- }
- }
-
- void endDoctype()
- throws SAXException
- {
- lexicalHandler.endDTD();
- }
-
- private void declarePrefix(String prefix, String uri)
- throws SAXException
- {
- int index = uri.indexOf(':');
-
- // many versions of nwalsh docbook stylesheets
- // have bogus URLs; so this can't be an error...
- if (index < 1 && uri.length() != 0)
- {
- warn("relative URI for namespace: " + uri);
- }
-
- // FIXME: char [0] must be ascii alpha; chars [1..index]
- // must be ascii alphanumeric or in "+-." [RFC 2396]
-
- //Namespace Constraints
- //name for xml prefix must be http://www.w3.org/XML/1998/namespace
- boolean prefixEquality = prefix.equals("xml");
- boolean uriEquality = uri.equals("http://www.w3.org/XML/1998/namespace");
- if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality))
- {
- fatal("xml is by definition bound to the namespace name " +
- "http://www.w3.org/XML/1998/namespace");
- }
-
- //xmlns prefix declaration is illegal but xml prefix declaration is llegal...
- if (prefixEquality && uriEquality)
- {
- return;
- }
-
- //name for xmlns prefix must be http://www.w3.org/2000/xmlns/
- prefixEquality = prefix.equals("xmlns");
- uriEquality = uri.equals("http://www.w3.org/2000/xmlns/");
- if ((prefixEquality || uriEquality) && !(prefixEquality && uriEquality))
- {
- fatal("http://www.w3.org/2000/xmlns/ is by definition bound" +
- " to prefix xmlns");
- }
-
- //even if the uri is http://www.w3.org/2000/xmlns/
- // it is illegal to declare it
- if (prefixEquality && uriEquality)
- {
- fatal ("declaring the xmlns prefix is illegal");
- }
-
- uri = uri.intern();
- prefixStack.declarePrefix(prefix, uri);
- contentHandler.startPrefixMapping(prefix, uri);
- }
-
- void attribute(String qname, String value, boolean isSpecified)
- throws SAXException
- {
- if (!attributes)
- {
- attributes = true;
- if (namespaces)
- {
- prefixStack.pushContext();
- }
- }
-
- // process namespace decls immediately;
- // then maybe forget this as an attribute
- if (namespaces)
- {
- int index;
-
- // default NS declaration?
- if (stringInterning)
- {
- if ("xmlns" == qname)
- {
- declarePrefix("", value);
- if (!xmlNames)
- {
- return;
- }
- }
- // NS prefix declaration?
- else if ((index = qname.indexOf(':')) == 5
- && qname.startsWith("xmlns"))
- {
- String prefix = qname.substring(6);
-
- if (prefix.equals(""))
- {
- fatal("missing prefix " +
- "in namespace declaration attribute");
- }
- if (value.length() == 0)
- {
- verror("missing URI in namespace declaration attribute: "
- + qname);
- }
- else
- {
- declarePrefix(prefix, value);
- }
- if (!xmlNames)
- {
- return;
- }
- }
- }
- else
- {
- if ("xmlns".equals(qname))
- {
- declarePrefix("", value);
- if (!xmlNames)
- {
- return;
- }
- }
- // NS prefix declaration?
- else if ((index = qname.indexOf(':')) == 5
- && qname.startsWith("xmlns"))
- {
- String prefix = qname.substring(6);
-
- if (value.length() == 0)
- {
- verror("missing URI in namespace decl attribute: "
- + qname);
- }
- else
- {
- declarePrefix(prefix, value);
- }
- if (!xmlNames)
- {
- return;
- }
- }
- }
- }
- // remember this attribute ...
- attributeCount++;
-
- // attribute type comes from querying parser's DTD records
- attributesList.add(new Attribute(qname, value, isSpecified));
-
- }
-
- void startElement(String elname)
- throws SAXException
- {
- ContentHandler handler = contentHandler;
-
- //
- // NOTE: this implementation of namespace support adds something
- // like six percent to parsing CPU time, in a large (~50 MB)
- // document that doesn't use namespaces at all. (Measured by PC
- // sampling, with a bug where endElement processing was omitted.)
- // [Measurement referred to older implementation, older JVM ...]
- //
- // It ought to become notably faster in such cases. Most
- // costs are the prefix stack calling Hashtable.get() (2%),
- // String.hashCode() (1.5%) and about 1.3% each for pushing
- // the context, and two chunks of name processing.
- //
-
- if (!attributes)
- {
- if (namespaces)
- {
- prefixStack.pushContext();
- }
- }
- else if (namespaces)
- {
-
- // now we can patch up namespace refs; we saw all the
- // declarations, so now we'll do the Right Thing
- Iterator itt = attributesList.iterator();
- while (itt.hasNext())
- {
- Attribute attribute = (Attribute) itt.next();
- String qname = attribute.name;
- int index;
-
- // default NS declaration?
- if (stringInterning)
- {
- if ("xmlns" == qname)
- {
- continue;
- }
- }
- else
- {
- if ("xmlns".equals(qname))
- {
- continue;
- }
- }
- //Illegal in the new Namespaces Draft
- //should it be only in 1.1 docs??
- if (qname.equals (":"))
- {
- fatal("namespace names consisting of a single colon " +
- "character are invalid");
- }
- index = qname.indexOf(':');
-
- // NS prefix declaration?
- if (index == 5 && qname.startsWith("xmlns"))
- {
- continue;
- }
-
- // it's not a NS decl; patch namespace info items
- if (prefixStack.processName(qname, nsTemp, true) == null)
- {
- fatal("undeclared attribute prefix in: " + qname);
- }
- else
- {
- attribute.nameSpace = nsTemp[0];
- attribute.localName = nsTemp[1];
- }
- }
- }
-
- // save element name so attribute callbacks work
- elementName = elname;
- if (namespaces)
- {
- if (prefixStack.processName(elname, nsTemp, false) == null)
- {
- fatal("undeclared element prefix in: " + elname);
- nsTemp[0] = nsTemp[1] = "";
- }
- handler.startElement(nsTemp[0], nsTemp[1], elname, this);
- }
- else
- {
- handler.startElement("", "", elname, this);
- }
- // elementName = null;
-
- // elements with no attributes are pretty common!
- if (attributes)
- {
- attributesList.clear();
- attributeCount = 0;
- attributes = false;
- }
- }
-
- void endElement(String elname)
- throws SAXException
- {
- ContentHandler handler = contentHandler;
-
- if (!namespaces)
- {
- handler.endElement("", "", elname);
- return;
- }
- prefixStack.processName(elname, nsTemp, false);
- handler.endElement(nsTemp[0], nsTemp[1], elname);
-
- Enumeration prefixes = prefixStack.getDeclaredPrefixes();
-
- while (prefixes.hasMoreElements())
- {
- handler.endPrefixMapping((String) prefixes.nextElement());
- }
- prefixStack.popContext();
- }
-
- void startCDATA()
- throws SAXException
- {
- lexicalHandler.startCDATA();
- }
-
- void charData(char[] ch, int start, int length)
- throws SAXException
- {
- contentHandler.characters(ch, start, length);
- }
-
- void endCDATA()
- throws SAXException
- {
- lexicalHandler.endCDATA();
- }
-
- void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException
- {
- contentHandler.ignorableWhitespace(ch, start, length);
- }
-
- void processingInstruction(String target, String data)
- throws SAXException
- {
- contentHandler.processingInstruction(target, data);
- }
-
- void comment(char[] ch, int start, int length)
- throws SAXException
- {
- if (lexicalHandler != base)
- {
- lexicalHandler.comment(ch, start, length);
- }
- }
-
- void fatal(String message)
- throws SAXException
- {
- SAXParseException fatal;
-
- fatal = new SAXParseException(message, this);
- errorHandler.fatalError(fatal);
-
- // Even if the application can continue ... we can't!
- throw fatal;
- }
-
- // We can safely report a few validity errors that
- // make layered SAX2 DTD validation more conformant
- void verror(String message)
- throws SAXException
- {
- SAXParseException err;
-
- err = new SAXParseException(message, this);
- errorHandler.error(err);
- }
-
- void warn(String message)
- throws SAXException
- {
- SAXParseException err;
-
- err = new SAXParseException(message, this);
- errorHandler.warning(err);
- }
-
- //
- // Implementation of org.xml.sax.Attributes.
- //
-
- /**
- * <b>SAX1 AttributeList, SAX2 Attributes</b> method
- * (don't invoke on parser);
- */
- public int getLength()
- {
- return attributesList.size();
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public String getURI(int index)
- {
- if (index < 0 || index >= attributesList.size())
- {
- return null;
- }
- return ((Attribute) attributesList.get(index)).nameSpace;
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public String getLocalName(int index)
- {
- if (index < 0 || index >= attributesList.size())
- {
- return null;
- }
- Attribute attr = (Attribute) attributesList.get(index);
- // FIXME attr.localName is sometimes null, why?
- if (namespaces && attr.localName == null)
- {
- // XXX fix this here for now
- int ci = attr.name.indexOf(':');
- attr.localName = (ci == -1) ? attr.name :
- attr.name.substring(ci + 1);
- }
- return (attr.localName == null) ? "" : attr.localName;
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public String getQName(int index)
- {
- if (index < 0 || index >= attributesList.size())
- {
- return null;
- }
- Attribute attr = (Attribute) attributesList.get(index);
- return (attr.name == null) ? "" : attr.name;
- }
-
- /**
- * <b>SAX1 AttributeList</b> method (don't invoke on parser);
- */
- public String getName(int index)
- {
- return getQName(index);
- }
-
- /**
- * <b>SAX1 AttributeList, SAX2 Attributes</b> method
- * (don't invoke on parser);
- */
- public String getType(int index)
- {
- if (index < 0 || index >= attributesList.size())
- {
- return null;
- }
- String type = parser.getAttributeType(elementName, getQName(index));
- if (type == null)
- {
- return "CDATA";
- }
- // ... use DeclHandler.attributeDecl to see enumerations
- if (type == "ENUMERATION")
- {
- return "NMTOKEN";
- }
- return type;
- }
-
- /**
- * <b>SAX1 AttributeList, SAX2 Attributes</b> method
- * (don't invoke on parser);
- */
- public String getValue(int index)
- {
- if (index < 0 || index >= attributesList.size())
- {
- return null;
- }
- return ((Attribute) attributesList.get(index)).value;
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public int getIndex(String uri, String local)
- {
- int length = getLength();
-
- for (int i = 0; i < length; i++)
- {
- if (!getURI(i).equals(uri))
- {
- continue;
- }
- if (getLocalName(i).equals(local))
- {
- return i;
- }
- }
- return -1;
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public int getIndex(String xmlName)
- {
- int length = getLength();
-
- for (int i = 0; i < length; i++)
- {
- if (getQName(i).equals(xmlName))
- {
- return i;
- }
- }
- return -1;
- }
-
- /**
- * <b>SAX2 Attributes</b> method (don't invoke on parser);
- */
- public String getType(String uri, String local)
- {
- int index = getIndex(uri, local);
-
- if (index < 0)
- {
- return null;
- }
- return getType(index);
- }
-
- /**
- * <b>SAX1 AttributeList, SAX2 Attributes</b> method
- * (don't invoke on parser);
- */
- public String getType(String xmlName)
- {
- int index = getIndex(xmlName);
-
- if (index < 0)
- {
- return null;
- }
- return getType(index);
- }
-
- /**
- * <b>SAX Attributes</b> method (don't invoke on parser);
- */
- public String getValue(String uri, String local)
- {
- int index = getIndex(uri, local);
-
- if (index < 0)
- {
- return null;
- }
- return getValue(index);
- }
-
- /**
- * <b>SAX1 AttributeList, SAX2 Attributes</b> method
- * (don't invoke on parser);
- */
- public String getValue(String xmlName)
- {
- int index = getIndex(xmlName);
-
- if (index < 0)
- {
- return null;
- }
- return getValue(index);
- }
-
- //
- // Implementation of org.xml.sax.ext.Attributes2
- //
-
- /** @return false unless the attribute was declared in the DTD.
- * @throws java.lang.ArrayIndexOutOfBoundsException
- * When the supplied index does not identify an attribute.
- */
- public boolean isDeclared(int index)
- {
- if (index < 0 || index >= attributeCount)
- {
- throw new ArrayIndexOutOfBoundsException();
- }
- String type = parser.getAttributeType(elementName, getQName(index));
- return (type != null);
- }
-
- /** @return false unless the attribute was declared in the DTD.
- * @throws java.lang.IllegalArgumentException
- * When the supplied names do not identify an attribute.
- */
- public boolean isDeclared(String qName)
- {
- int index = getIndex(qName);
- if (index < 0)
- {
- throw new IllegalArgumentException();
- }
- String type = parser.getAttributeType(elementName, qName);
- return (type != null);
- }
-
- /** @return false unless the attribute was declared in the DTD.
- * @throws java.lang.IllegalArgumentException
- * When the supplied names do not identify an attribute.
- */
- public boolean isDeclared(String uri, String localName)
- {
- int index = getIndex(uri, localName);
- return isDeclared(index);
- }
-
- /**
- * <b>SAX-ext Attributes2</b> method (don't invoke on parser);
- */
- public boolean isSpecified(int index)
- {
- return ((Attribute) attributesList.get(index)).specified;
- }
-
- /**
- * <b>SAX-ext Attributes2</b> method (don't invoke on parser);
- */
- public boolean isSpecified(String uri, String local)
- {
- int index = getIndex (uri, local);
- return isSpecified(index);
- }
-
- /**
- * <b>SAX-ext Attributes2</b> method (don't invoke on parser);
- */
- public boolean isSpecified(String xmlName)
- {
- int index = getIndex (xmlName);
- return isSpecified(index);
- }
-
- //
- // Implementation of org.xml.sax.Locator.
- //
-
- /**
- * <b>SAX Locator</b> method (don't invoke on parser);
- */
- public String getPublicId()
- {
- return null; // FIXME track public IDs too
- }
-
- /**
- * <b>SAX Locator</b> method (don't invoke on parser);
- */
- public String getSystemId()
- {
- if (entityStack.empty())
- {
- return null;
- }
- else
- {
- return (String) entityStack.peek();
- }
- }
-
- /**
- * <b>SAX Locator</b> method (don't invoke on parser);
- */
- public int getLineNumber()
- {
- return parser.getLineNumber();
- }
-
- /**
- * <b>SAX Locator</b> method (don't invoke on parser);
- */
- public int getColumnNumber()
- {
- return parser.getColumnNumber();
- }
-
- // adapter between SAX2 content handler and SAX1 document handler callbacks
- private static class Adapter
- implements ContentHandler
- {
-
- private DocumentHandler docHandler;
-
- Adapter(DocumentHandler dh)
- {
- docHandler = dh;
- }
-
- public void setDocumentLocator(Locator l)
- {
- docHandler.setDocumentLocator(l);
- }
-
- public void startDocument()
- throws SAXException
- {
- docHandler.startDocument();
- }
-
- public void processingInstruction(String target, String data)
- throws SAXException
- {
- docHandler.processingInstruction(target, data);
- }
-
- public void startPrefixMapping(String prefix, String uri)
- {
- /* ignored */
- }
-
- public void startElement(String namespace,
- String local,
- String name,
- Attributes attrs)
- throws SAXException
- {
- docHandler.startElement(name, (AttributeList) attrs);
- }
-
- public void characters(char[] buf, int offset, int len)
- throws SAXException
- {
- docHandler.characters(buf, offset, len);
- }
-
- public void ignorableWhitespace(char[] buf, int offset, int len)
- throws SAXException
- {
- docHandler.ignorableWhitespace(buf, offset, len);
- }
-
- public void skippedEntity(String name)
- {
- /* ignored */
- }
-
- public void endElement(String u, String l, String name)
- throws SAXException
- {
- docHandler.endElement(name);
- }
-
- public void endPrefixMapping(String prefix)
- {
- /* ignored */
- }
-
- public void endDocument()
- throws SAXException
- {
- docHandler.endDocument();
- }
- }
-
- private static class Attribute
- {
-
- String name;
- String value;
- String nameSpace;
- String localName;
- boolean specified;
-
- Attribute(String name, String value, boolean specified)
- {
- this.name = name;
- this.value = value;
- this.nameSpace = "";
- this.specified = specified;
- }
-
- }
-
-}
diff --git a/libjava/classpath/gnu/xml/aelfred2/XmlParser.java b/libjava/classpath/gnu/xml/aelfred2/XmlParser.java
deleted file mode 100644
index 813593d..0000000
--- a/libjava/classpath/gnu/xml/aelfred2/XmlParser.java
+++ /dev/null
@@ -1,5831 +0,0 @@
-/* XmlParser.java --
- Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version.
-
-Partly derived from code which carried the following notice:
-
- Copyright (c) 1997, 1998 by Microstar Software Ltd.
-
- AElfred is free for both commercial and non-commercial use and
- redistribution, provided that Microstar's copyright and disclaimer are
- retained intact. You are free to modify AElfred for your own use and
- to redistribute AElfred with your modifications, provided that the
- modifications are clearly documented.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- merchantability or fitness for a particular purpose. Please use it AT
- YOUR OWN RISK.
-*/
-
-package gnu.xml.aelfred2;
-
-import gnu.java.security.action.GetPropertyAction;
-
-import java.io.BufferedInputStream;
-import java.io.CharConversionException;
-import java.io.EOFException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-import java.net.URL;
-import java.net.URLConnection;
-import java.security.AccessController;
-
-import java.util.Iterator;
-import java.util.HashMap;
-import java.util.LinkedList;
-
-import org.xml.sax.InputSource;
-import org.xml.sax.SAXException;
-
-
-/**
- * Parse XML documents and return parse events through call-backs.
- * Use the <code>SAXDriver</code> class as your entry point, as all
- * internal parser interfaces are subject to change.
- *
- * @author Written by David Megginson &lt;dmeggins@microstar.com&gt;
- * (version 1.2a with bugfixes)
- * @author Updated by David Brownell &lt;dbrownell@users.sourceforge.net&gt;
- * @see SAXDriver
- */
-final class XmlParser
-{
-
- // avoid slow per-character readCh()
- private final static boolean USE_CHEATS = true;
-
- ////////////////////////////////////////////////////////////////////////
- // Constants.
- ////////////////////////////////////////////////////////////////////////
-
- //
- // Constants for element content type.
- //
-
- /**
- * Constant: an element has not been declared.
- * @see #getElementContentType
- */
- public final static int CONTENT_UNDECLARED = 0;
-
- /**
- * Constant: the element has a content model of ANY.
- * @see #getElementContentType
- */
- public final static int CONTENT_ANY = 1;
-
- /**
- * Constant: the element has declared content of EMPTY.
- * @see #getElementContentType
- */
- public final static int CONTENT_EMPTY = 2;
-
- /**
- * Constant: the element has mixed content.
- * @see #getElementContentType
- */
- public final static int CONTENT_MIXED = 3;
-
- /**
- * Constant: the element has element content.
- * @see #getElementContentType
- */
- public final static int CONTENT_ELEMENTS = 4;
-
-
- //
- // Constants for the entity type.
- //
-
- /**
- * Constant: the entity has not been declared.
- * @see #getEntityType
- */
- public final static int ENTITY_UNDECLARED = 0;
-
- /**
- * Constant: the entity is internal.
- * @see #getEntityType
- */
- public final static int ENTITY_INTERNAL = 1;
-
- /**
- * Constant: the entity is external, non-parsable data.
- * @see #getEntityType
- */
- public final static int ENTITY_NDATA = 2;
-
- /**
- * Constant: the entity is external XML data.
- * @see #getEntityType
- */
- public final static int ENTITY_TEXT = 3;
-
- //
- // Attribute type constants are interned literal strings.
- //
-
- //
- // Constants for supported encodings. "external" is just a flag.
- //
- private final static int ENCODING_EXTERNAL = 0;
- private final static int ENCODING_UTF_8 = 1;
- private final static int ENCODING_ISO_8859_1 = 2;
- private final static int ENCODING_UCS_2_12 = 3;
- private final static int ENCODING_UCS_2_21 = 4;
- private final static int ENCODING_UCS_4_1234 = 5;
- private final static int ENCODING_UCS_4_4321 = 6;
- private final static int ENCODING_UCS_4_2143 = 7;
- private final static int ENCODING_UCS_4_3412 = 8;
- private final static int ENCODING_ASCII = 9;
-
- //
- // Constants for attribute default value.
- //
-
- /**
- * Constant: the attribute is not declared.
- * @see #getAttributeDefaultValueType
- */
- public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
-
- /**
- * Constant: the attribute has a literal default value specified.
- * @see #getAttributeDefaultValueType
- * @see #getAttributeDefaultValue
- */
- public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
-
- /**
- * Constant: the attribute was declared #IMPLIED.
- * @see #getAttributeDefaultValueType
- */
- public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
-
- /**
- * Constant: the attribute was declared #REQUIRED.
- * @see #getAttributeDefaultValueType
- */
- public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
-
- /**
- * Constant: the attribute was declared #FIXED.
- * @see #getAttributeDefaultValueType
- * @see #getAttributeDefaultValue
- */
- public final static int ATTRIBUTE_DEFAULT_FIXED = 34;
-
- //
- // Constants for input.
- //
- private final static int INPUT_NONE = 0;
- private final static int INPUT_INTERNAL = 1;
- private final static int INPUT_STREAM = 3;
- private final static int INPUT_READER = 5;
-
- //
- // Flags for reading literals.
- //
- // expand general entity refs (attribute values in dtd and content)
- private final static int LIT_ENTITY_REF = 2;
- // normalize this value (space chars) (attributes, public ids)
- private final static int LIT_NORMALIZE = 4;
- // literal is an attribute value
- private final static int LIT_ATTRIBUTE = 8;
- // don't expand parameter entities
- private final static int LIT_DISABLE_PE = 16;
- // don't expand [or parse] character refs
- private final static int LIT_DISABLE_CREF = 32;
- // don't parse general entity refs
- private final static int LIT_DISABLE_EREF = 64;
- // literal is a public ID value
- private final static int LIT_PUBID = 256;
-
- //
- // Flags affecting PE handling in DTDs (if expandPE is true).
- // PEs expand with space padding, except inside literals.
- //
- private final static int CONTEXT_NORMAL = 0;
- private final static int CONTEXT_LITERAL = 1;
-
- // Emit warnings for relative URIs with no base URI.
- static boolean uriWarnings;
- static
- {
- String key = "gnu.xml.aelfred2.XmlParser.uriWarnings";
- GetPropertyAction a = new GetPropertyAction(key);
- uriWarnings = "true".equals(AccessController.doPrivileged(a));
- }
-
- //
- // The current XML handler interface.
- //
- private SAXDriver handler;
-
- //
- // I/O information.
- //
- private Reader reader; // current reader
- private InputStream is; // current input stream
- private int line; // current line number
- private int column; // current column number
- private int sourceType; // type of input source
- private LinkedList inputStack; // stack of input soruces
- private URLConnection externalEntity; // current external entity
- private int encoding; // current character encoding
- private int currentByteCount; // bytes read from current source
- private InputSource scratch; // temporary
-
- //
- // Buffers for decoded but unparsed character input.
- //
- private char[] readBuffer;
- private int readBufferPos;
- private int readBufferLength;
- private int readBufferOverflow; // overflow from last data chunk.
-
- //
- // Buffer for undecoded raw byte input.
- //
- private final static int READ_BUFFER_MAX = 16384;
- private byte[] rawReadBuffer;
-
-
- //
- // Buffer for attribute values, char refs, DTD stuff.
- //
- private static int DATA_BUFFER_INITIAL = 4096;
- private char[] dataBuffer;
- private int dataBufferPos;
-
- //
- // Buffer for parsed names.
- //
- private static int NAME_BUFFER_INITIAL = 1024;
- private char[] nameBuffer;
- private int nameBufferPos;
-
- //
- // Save any standalone flag
- //
- private boolean docIsStandalone;
-
- //
- // Hashtables for DTD information on elements, entities, and notations.
- // Populated until we start ignoring decls (because of skipping a PE)
- //
- private HashMap elementInfo;
- private HashMap entityInfo;
- private HashMap notationInfo;
- private boolean skippedPE;
-
- //
- // Element type currently in force.
- //
- private String currentElement;
- private int currentElementContent;
-
- //
- // Stack of entity names, to detect recursion.
- //
- private LinkedList entityStack;
-
- //
- // PE expansion is enabled in most chunks of the DTD, not all.
- // When it's enabled, literals are treated differently.
- //
- private boolean inLiteral;
- private boolean expandPE;
- private boolean peIsError;
-
- //
- // can't report entity expansion inside two constructs:
- // - attribute expansions (internal entities only)
- // - markup declarations (parameter entities only)
- //
- private boolean doReport;
-
- //
- // Symbol table, for caching interned names.
- //
- // These show up wherever XML names or nmtokens are used: naming elements,
- // attributes, PIs, notations, entities, and enumerated attribute values.
- //
- // NOTE: This hashtable doesn't grow. The default size is intended to be
- // rather large for most documents. Example: one snapshot of the DocBook
- // XML 4.1 DTD used only about 350 such names. As a rule, only pathological
- // documents (ones that don't reuse names) should ever see much collision.
- //
- // Be sure that SYMBOL_TABLE_LENGTH always stays prime, for best hashing.
- // "2039" keeps the hash table size at about two memory pages on typical
- // 32 bit hardware.
- //
- private final static int SYMBOL_TABLE_LENGTH = 2039;
-
- private Object[][] symbolTable;
-
- //
- // Hash table of attributes found in current start tag.
- //
- private String[] tagAttributes;
- private int tagAttributePos;
-
- //
- // Utility flag: have we noticed a CR while reading the last
- // data chunk? If so, we will have to go back and normalise
- // CR or CR/LF line ends.
- //
- private boolean sawCR;
-
- //
- // Utility flag: are we in CDATA? If so, whitespace isn't ignorable.
- //
- private boolean inCDATA;
-
- //
- // Xml version.
- //
- private static final int XML_10 = 0;
- private static final int XML_11 = 1;
- private int xmlVersion = XML_10;
-
- //////////////////////////////////////////////////////////////////////
- // Constructors.
- ////////////////////////////////////////////////////////////////////////
-
- /**
- * Construct a new parser with no associated handler.
- * @see #setHandler
- * @see #parse
- */
- // package private
- XmlParser()
- {
- }
-
- /**
- * Set the handler that will receive parsing events.
- * @param handler The handler to receive callback events.
- * @see #parse
- */
- // package private
- void setHandler(SAXDriver handler)
- {
- this.handler = handler;
- }
-
- /**
- * Parse an XML document from the character stream, byte stream, or URI
- * that you provide (in that order of preference). Any URI that you
- * supply will become the base URI for resolving relative URI, and may
- * be used to acquire a reader or byte stream.
- *
- * <p> Only one thread at a time may use this parser; since it is
- * private to this package, post-parse cleanup is done by the caller,
- * which MUST NOT REUSE the parser (just null it).
- *
- * @param systemId Absolute URI of the document; should never be null,
- * but may be so iff a reader <em>or</em> a stream is provided.
- * @param publicId The public identifier of the document, or null.
- * @param reader A character stream; must be null if stream isn't.
- * @param stream A byte input stream; must be null if reader isn't.
- * @param encoding The suggested encoding, or null if unknown.
- * @exception java.lang.Exception Basically SAXException or IOException
- */
- // package private
- void doParse(String systemId, String publicId, Reader reader,
- InputStream stream, String encoding)
- throws Exception
- {
- if (handler == null)
- {
- throw new IllegalStateException("no callback handler");
- }
-
- initializeVariables();
-
- // predeclare the built-in entities here (replacement texts)
- // we don't need to intern(), since we're guaranteed literals
- // are always (globally) interned.
- setInternalEntity("amp", "&#38;");
- setInternalEntity("lt", "&#60;");
- setInternalEntity("gt", "&#62;");
- setInternalEntity("apos", "&#39;");
- setInternalEntity("quot", "&#34;");
-
- try
- {
- // pushURL first to ensure locator is correct in startDocument
- // ... it might report an IO or encoding exception.
- handler.startDocument();
- pushURL(false, "[document]",
- // default baseURI: null
- new ExternalIdentifiers(publicId, systemId, null),
- reader, stream, encoding, false);
-
- parseDocument();
- }
- catch (EOFException e)
- {
- //empty input
- error("empty document, with no root element.");
- }
- finally
- {
- if (reader != null)
- {
- try
- {
- reader.close();
- }
- catch (IOException e)
- {
- /* ignore */
- }
- }
- if (stream != null)
- {
- try
- {
- stream.close();
- }
- catch (IOException e)
- {
- /* ignore */
- }
- }
- if (is != null)
- {
- try
- {
- is.close();
- }
- catch (IOException e)
- {
- /* ignore */
- }
- }
- scratch = null;
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // Error reporting.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Report an error.
- * @param message The error message.
- * @param textFound The text that caused the error (or null).
- * @see SAXDriver#error
- * @see #line
- */
- private void error(String message, String textFound, String textExpected)
- throws SAXException
- {
- if (textFound != null)
- {
- message = message + " (found \"" + textFound + "\")";
- }
- if (textExpected != null)
- {
- message = message + " (expected \"" + textExpected + "\")";
- }
- handler.fatal(message);
-
- // "can't happen"
- throw new SAXException(message);
- }
-
- /**
- * Report a serious error.
- * @param message The error message.
- * @param textFound The text that caused the error (or null).
- */
- private void error(String message, char textFound, String textExpected)
- throws SAXException
- {
- error(message, Character.toString(textFound), textExpected);
- }
-
- /**
- * Report typical case fatal errors.
- */
- private void error(String message)
- throws SAXException
- {
- handler.fatal(message);
- }
-
- //////////////////////////////////////////////////////////////////////
- // Major syntactic productions.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Parse an XML document.
- * <pre>
- * [1] document ::= prolog element Misc*
- * </pre>
- * <p>This is the top-level parsing function for a single XML
- * document. As a minimum, a well-formed document must have
- * a document element, and a valid document must have a prolog
- * (one with doctype) as well.
- */
- private void parseDocument()
- throws Exception
- {
- try
- { // added by MHK
- boolean sawDTD = parseProlog();
- require('<');
- parseElement(!sawDTD);
- }
- catch (EOFException ee)
- { // added by MHK
- error("premature end of file", "[EOF]", null);
- }
-
- try
- {
- parseMisc(); //skip all white, PIs, and comments
- char c = readCh(); //if this doesn't throw an exception...
- error("unexpected characters after document end", c, null);
- }
- catch (EOFException e)
- {
- return;
- }
- }
-
- static final char[] startDelimComment = { '<', '!', '-', '-' };
- static final char[] endDelimComment = { '-', '-' };
-
- /**
- * Skip a comment.
- * <pre>
- * [15] Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
- * </pre>
- * <p> (The <code>&lt;!--</code> has already been read.)
- */
- private void parseComment()
- throws Exception
- {
- char c;
- boolean saved = expandPE;
-
- expandPE = false;
- parseUntil(endDelimComment);
- require('>');
- expandPE = saved;
- handler.comment(dataBuffer, 0, dataBufferPos);
- dataBufferPos = 0;
- }
-
- static final char[] startDelimPI = { '<', '?' };
- static final char[] endDelimPI = { '?', '>' };
-
- /**
- * Parse a processing instruction and do a call-back.
- * <pre>
- * [16] PI ::= '&lt;?' PITarget
- * (S (Char* - (Char* '?&gt;' Char*)))?
- * '?&gt;'
- * [17] PITarget ::= Name - ( ('X'|'x') ('M'|m') ('L'|l') )
- * </pre>
- * <p> (The <code>&lt;?</code> has already been read.)
- */
- private void parsePI()
- throws SAXException, IOException
- {
- String name;
- boolean saved = expandPE;
-
- expandPE = false;
- name = readNmtoken(true);
- //NE08
- if (name.indexOf(':') >= 0)
- {
- error("Illegal character(':') in processing instruction name ",
- name, null);
- }
- if ("xml".equalsIgnoreCase(name))
- {
- error("Illegal processing instruction target", name, null);
- }
- if (!tryRead(endDelimPI))
- {
- requireWhitespace();
- parseUntil(endDelimPI);
- }
- expandPE = saved;
- handler.processingInstruction(name, dataBufferToString());
- }
-
- static final char[] endDelimCDATA = { ']', ']', '>' };
-
- private boolean isDirtyCurrentElement;
-
- /**
- * Parse a CDATA section.
- * <pre>
- * [18] CDSect ::= CDStart CData CDEnd
- * [19] CDStart ::= '&lt;![CDATA['
- * [20] CData ::= (Char* - (Char* ']]&gt;' Char*))
- * [21] CDEnd ::= ']]&gt;'
- * </pre>
- * <p> (The '&lt;![CDATA[' has already been read.)
- */
- private void parseCDSect()
- throws Exception
- {
- parseUntil(endDelimCDATA);
- dataBufferFlush();
- }
-
- /**
- * Parse the prolog of an XML document.
- * <pre>
- * [22] prolog ::= XMLDecl? Misc* (Doctypedecl Misc*)?
- * </pre>
- * <p>We do not look for the XML declaration here, because it was
- * handled by pushURL ().
- * @see pushURL
- * @return true if a DTD was read.
- */
- private boolean parseProlog()
- throws Exception
- {
- parseMisc();
-
- if (tryRead("<!DOCTYPE"))
- {
- parseDoctypedecl();
- parseMisc();
- return true;
- }
- return false;
- }
-
- private void checkLegalVersion(String version)
- throws SAXException
- {
- int len = version.length();
- for (int i = 0; i < len; i++)
- {
- char c = version.charAt(i);
- if ('0' <= c && c <= '9')
- {
- continue;
- }
- if (c == '_' || c == '.' || c == ':' || c == '-')
- {
- continue;
- }
- if ('a' <= c && c <= 'z')
- {
- continue;
- }
- if ('A' <= c && c <= 'Z')
- {
- continue;
- }
- error ("illegal character in version", version, "1.0");
- }
- }
-
- /**
- * Parse the XML declaration.
- * <pre>
- * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?&gt;'
- * [24] VersionInfo ::= S 'version' Eq
- * ("'" VersionNum "'" | '"' VersionNum '"' )
- * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')*
- * [32] SDDecl ::= S 'standalone' Eq
- * ( "'"" ('yes' | 'no') "'"" | '"' ("yes" | "no") '"' )
- * [80] EncodingDecl ::= S 'encoding' Eq
- * ( "'" EncName "'" | "'" EncName "'" )
- * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
- * </pre>
- * <p> (The <code>&lt;?xml</code> and whitespace have already been read.)
- * @return the encoding in the declaration, uppercased; or null
- * @see #parseTextDecl
- * @see #setupDecoding
- */
- private String parseXMLDecl(boolean ignoreEncoding)
- throws SAXException, IOException
- {
- String version;
- String encodingName = null;
- String standalone = null;
- int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
- String inputEncoding = null;
-
- switch (this.encoding)
- {
- case ENCODING_EXTERNAL:
- case ENCODING_UTF_8:
- inputEncoding = "UTF-8";
- break;
- case ENCODING_ISO_8859_1:
- inputEncoding = "ISO-8859-1";
- break;
- case ENCODING_UCS_2_12:
- inputEncoding = "UTF-16BE";
- break;
- case ENCODING_UCS_2_21:
- inputEncoding = "UTF-16LE";
- break;
- }
-
- // Read the version.
- require("version");
- parseEq();
- checkLegalVersion(version = readLiteral(flags));
- if (!version.equals("1.0"))
- {
- if (version.equals("1.1"))
- {
- handler.warn("expected XML version 1.0, not: " + version);
- xmlVersion = XML_11;
- }
- else
- {
- error("illegal XML version", version, "1.0 or 1.1");
- }
- }
- else
- {
- xmlVersion = XML_10;
- }
- // Try reading an encoding declaration.
- boolean white = tryWhitespace();
-
- if (tryRead("encoding"))
- {
- if (!white)
- {
- error("whitespace required before 'encoding='");
- }
- parseEq();
- encodingName = readLiteral(flags);
- if (!ignoreEncoding)
- {
- setupDecoding(encodingName);
- }
- }
-
- // Try reading a standalone declaration
- if (encodingName != null)
- {
- white = tryWhitespace();
- }
- if (tryRead("standalone"))
- {
- if (!white)
- {
- error("whitespace required before 'standalone='");
- }
- parseEq();
- standalone = readLiteral(flags);
- if ("yes".equals(standalone))
- {
- docIsStandalone = true;
- }
- else if (!"no".equals(standalone))
- {
- error("standalone flag must be 'yes' or 'no'");
- }
- }
-
- skipWhitespace();
- require("?>");
-
- if (inputEncoding == null)
- {
- inputEncoding = encodingName;
- }
- return encodingName;
- }
-
- /**
- * Parse a text declaration.
- * <pre>
- * [79] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?&gt;'
- * [80] EncodingDecl ::= S 'encoding' Eq
- * ( '"' EncName '"' | "'" EncName "'" )
- * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
- * </pre>
- * <p> (The <code>&lt;?xml</code>' and whitespace have already been read.)
- * @return the encoding in the declaration, uppercased; or null
- * @see #parseXMLDecl
- * @see #setupDecoding
- */
- private String parseTextDecl(boolean ignoreEncoding)
- throws SAXException, IOException
- {
- String encodingName = null;
- int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
-
- // Read an optional version.
- if (tryRead ("version"))
- {
- String version;
- parseEq();
- checkLegalVersion(version = readLiteral(flags));
-
- if (version.equals("1.1"))
- {
- if (xmlVersion == XML_10)
- {
- error("external subset has later version number.", "1.0",
- version);
- }
- handler.warn("expected XML version 1.0, not: " + version);
- xmlVersion = XML_11;
- }
- else if (!version.equals("1.0"))
- {
- error("illegal XML version", version, "1.0 or 1.1");
- }
- requireWhitespace();
- }
-
- // Read the encoding.
- require("encoding");
- parseEq();
- encodingName = readLiteral(flags);
- if (!ignoreEncoding)
- {
- setupDecoding(encodingName);
- }
- skipWhitespace();
- require("?>");
-
- return encodingName;
- }
-
- /**
- * Sets up internal state so that we can decode an entity using the
- * specified encoding. This is used when we start to read an entity
- * and we have been given knowledge of its encoding before we start to
- * read any data (e.g. from a SAX input source or from a MIME type).
- *
- * <p> It is also used after autodetection, at which point only very
- * limited adjustments to the encoding may be used (switching between
- * related builtin decoders).
- *
- * @param encodingName The name of the encoding specified by the user.
- * @exception IOException if the encoding isn't supported either
- * internally to this parser, or by the hosting JVM.
- * @see #parseXMLDecl
- * @see #parseTextDecl
- */
- private void setupDecoding(String encodingName)
- throws SAXException, IOException
- {
- encodingName = encodingName.toUpperCase();
-
- // ENCODING_EXTERNAL indicates an encoding that wasn't
- // autodetected ... we can use builtin decoders, or
- // ones from the JVM (InputStreamReader).
-
- // Otherwise we can only tweak what was autodetected, and
- // only for single byte (ASCII derived) builtin encodings.
-
- // ASCII-derived encodings
- if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL)
- {
- if (encodingName.equals("ISO-8859-1")
- || encodingName.equals("8859_1")
- || encodingName.equals("ISO8859_1"))
- {
- encoding = ENCODING_ISO_8859_1;
- return;
- }
- else if (encodingName.equals("US-ASCII")
- || encodingName.equals("ASCII"))
- {
- encoding = ENCODING_ASCII;
- return;
- }
- else if (encodingName.equals("UTF-8")
- || encodingName.equals("UTF8"))
- {
- encoding = ENCODING_UTF_8;
- return;
- }
- else if (encoding != ENCODING_EXTERNAL)
- {
- // used to start with a new reader ...
- throw new UnsupportedEncodingException(encodingName);
- }
- // else fallthrough ...
- // it's ASCII-ish and something other than a builtin
- }
-
- // Unicode and such
- if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21)
- {
- if (!(encodingName.equals("ISO-10646-UCS-2")
- || encodingName.equals("UTF-16")
- || encodingName.equals("UTF-16BE")
- || encodingName.equals("UTF-16LE")))
- {
- error("unsupported Unicode encoding", encodingName, "UTF-16");
- }
- return;
- }
-
- // four byte encodings
- if (encoding == ENCODING_UCS_4_1234
- || encoding == ENCODING_UCS_4_4321
- || encoding == ENCODING_UCS_4_2143
- || encoding == ENCODING_UCS_4_3412)
- {
- // Strictly: "UCS-4" == "UTF-32BE"; also, "UTF-32LE" exists
- if (!encodingName.equals("ISO-10646-UCS-4"))
- {
- error("unsupported 32-bit encoding", encodingName,
- "ISO-10646-UCS-4");
- }
- return;
- }
-
- // assert encoding == ENCODING_EXTERNAL
- // if (encoding != ENCODING_EXTERNAL)
- // throw new RuntimeException ("encoding = " + encoding);
-
- if (encodingName.equals("UTF-16BE"))
- {
- encoding = ENCODING_UCS_2_12;
- return;
- }
- if (encodingName.equals("UTF-16LE"))
- {
- encoding = ENCODING_UCS_2_21;
- return;
- }
-
- // We couldn't use the builtin decoders at all. But we can try to
- // create a reader, since we haven't messed up buffering. Tweak
- // the encoding name if necessary.
-
- if (encodingName.equals("UTF-16")
- || encodingName.equals("ISO-10646-UCS-2"))
- {
- encodingName = "Unicode";
- }
- // Ignoring all the EBCDIC aliases here
-
- reader = new InputStreamReader(is, encodingName);
- sourceType = INPUT_READER;
- }
-
- /**
- * Parse miscellaneous markup outside the document element and DOCTYPE
- * declaration.
- * <pre>
- * [27] Misc ::= Comment | PI | S
- * </pre>
- */
- private void parseMisc()
- throws Exception
- {
- while (true)
- {
- skipWhitespace();
- if (tryRead(startDelimPI))
- {
- parsePI();
- }
- else if (tryRead(startDelimComment))
- {
- parseComment();
- }
- else
- {
- return;
- }
- }
- }
-
- /**
- * Parse a document type declaration.
- * <pre>
- * [28] doctypedecl ::= '&lt;!DOCTYPE' S Name (S ExternalID)? S?
- * ('[' (markupdecl | PEReference | S)* ']' S?)? '&gt;'
- * </pre>
- * <p> (The <code>&lt;!DOCTYPE</code> has already been read.)
- */
- private void parseDoctypedecl()
- throws Exception
- {
- String rootName;
- ExternalIdentifiers ids;
-
- // Read the document type name.
- requireWhitespace();
- rootName = readNmtoken(true);
-
- // Read the External subset's IDs
- skipWhitespace();
- ids = readExternalIds(false, true);
-
- // report (a) declaration of name, (b) lexical info (ids)
- handler.doctypeDecl(rootName, ids.publicId, ids.systemId);
-
- // Internal subset is parsed first, if present
- skipWhitespace();
- if (tryRead('['))
- {
-
- // loop until the subset ends
- while (true)
- {
- doReport = expandPE = true;
- skipWhitespace();
- doReport = expandPE = false;
- if (tryRead(']'))
- {
- break; // end of subset
- }
- else
- {
- // WFC, PEs in internal subset (only between decls)
- peIsError = expandPE = true;
- parseMarkupdecl();
- peIsError = expandPE = false;
- }
- }
- }
- skipWhitespace();
- require('>');
-
- // Read the external subset, if any
- InputSource subset;
-
- if (ids.systemId == null)
- {
- subset = handler.getExternalSubset(rootName,
- handler.getSystemId());
- }
- else
- {
- subset = null;
- }
- if (ids.systemId != null || subset != null)
- {
- pushString(null, ">");
-
- // NOTE: [dtd] is so we say what SAX2 expects,
- // though it's misleading (subset, not entire dtd)
- if (ids.systemId != null)
- {
- pushURL(true, "[dtd]", ids, null, null, null, true);
- }
- else
- {
- handler.warn("modifying document by adding external subset");
- pushURL(true, "[dtd]",
- new ExternalIdentifiers(subset.getPublicId(),
- subset.getSystemId(),
- null),
- subset.getCharacterStream(),
- subset.getByteStream(),
- subset.getEncoding(),
- false);
- }
-
- // Loop until we end up back at '>'
- while (true)
- {
- doReport = expandPE = true;
- skipWhitespace();
- doReport = expandPE = false;
- if (tryRead('>'))
- {
- break;
- }
- else
- {
- expandPE = true;
- parseMarkupdecl();
- expandPE = false;
- }
- }
-
- // the ">" string isn't popped yet
- if (inputStack.size() != 1)
- {
- error("external subset has unmatched '>'");
- }
- }
-
- // done dtd
- handler.endDoctype();
- expandPE = false;
- doReport = true;
- }
-
- /**
- * Parse a markup declaration in the internal or external DTD subset.
- * <pre>
- * [29] markupdecl ::= elementdecl | Attlistdecl | EntityDecl
- * | NotationDecl | PI | Comment
- * [30] extSubsetDecl ::= (markupdecl | conditionalSect
- * | PEReference | S) *
- * </pre>
- * <p> Reading toplevel PE references is handled as a lexical issue
- * by the caller, as is whitespace.
- */
- private void parseMarkupdecl()
- throws Exception
- {
- char[] saved = null;
- boolean savedPE = expandPE;
-
- // prevent "<%foo;" and ensures saved entity is right
- require('<');
- unread('<');
- expandPE = false;
-
- if (tryRead("<!ELEMENT"))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parseElementDecl();
- }
- else if (tryRead("<!ATTLIST"))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parseAttlistDecl();
- }
- else if (tryRead("<!ENTITY"))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parseEntityDecl();
- }
- else if (tryRead("<!NOTATION"))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parseNotationDecl();
- }
- else if (tryRead(startDelimPI))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parsePI();
- }
- else if (tryRead(startDelimComment))
- {
- saved = readBuffer;
- expandPE = savedPE;
- parseComment();
- }
- else if (tryRead("<!["))
- {
- saved = readBuffer;
- expandPE = savedPE;
- if (inputStack.size() > 0)
- {
- parseConditionalSect(saved);
- }
- else
- {
- error("conditional sections illegal in internal subset");
- }
- }
- else
- {
- error("expected markup declaration");
- }
-
- // VC: Proper Decl/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Declaration/PE nesting");
- }
- }
-
- /**
- * Parse an element, with its tags.
- * <pre>
- * [39] element ::= EmptyElementTag | STag content ETag
- * [40] STag ::= '&lt;' Name (S Attribute)* S? '&gt;'
- * [44] EmptyElementTag ::= '&lt;' Name (S Attribute)* S? '/&gt;'
- * </pre>
- * <p> (The '&lt;' has already been read.)
- * <p>NOTE: this method actually chains onto parseContent (), if necessary,
- * and parseContent () will take care of calling parseETag ().
- */
- private void parseElement(boolean maybeGetSubset)
- throws Exception
- {
- String gi;
- char c;
- int oldElementContent = currentElementContent;
- String oldElement = currentElement;
- ElementDecl element;
-
- // This is the (global) counter for the
- // array of specified attributes.
- tagAttributePos = 0;
-
- // Read the element type name.
- gi = readNmtoken(true);
-
- // If we saw no DTD, and this is the document root element,
- // let the application modify the input stream by providing one.
- if (maybeGetSubset)
- {
- InputSource subset = handler.getExternalSubset(gi,
- handler.getSystemId());
- if (subset != null)
- {
- String publicId = subset.getPublicId();
- String systemId = subset.getSystemId();
-
- handler.warn("modifying document by adding DTD");
- handler.doctypeDecl(gi, publicId, systemId);
- pushString(null, ">");
-
- // NOTE: [dtd] is so we say what SAX2 expects,
- // though it's misleading (subset, not entire dtd)
- pushURL(true, "[dtd]",
- new ExternalIdentifiers(publicId, systemId, null),
- subset.getCharacterStream(),
- subset.getByteStream(),
- subset.getEncoding(),
- false);
-
- // Loop until we end up back at '>'
- while (true)
- {
- doReport = expandPE = true;
- skipWhitespace();
- doReport = expandPE = false;
- if (tryRead('>'))
- {
- break;
- }
- else
- {
- expandPE = true;
- parseMarkupdecl();
- expandPE = false;
- }
- }
-
- // the ">" string isn't popped yet
- if (inputStack.size() != 1)
- {
- error("external subset has unmatched '>'");
- }
-
- handler.endDoctype();
- }
- }
-
- // Determine the current content type.
- currentElement = gi;
- element = (ElementDecl) elementInfo.get(gi);
- currentElementContent = getContentType(element, CONTENT_ANY);
-
- // Read the attributes, if any.
- // After this loop, "c" is the closing delimiter.
- boolean white = tryWhitespace();
- c = readCh();
- while (c != '/' && c != '>')
- {
- unread(c);
- if (!white)
- {
- error("need whitespace between attributes");
- }
- parseAttribute(gi);
- white = tryWhitespace();
- c = readCh();
- }
-
- // Supply any defaulted attributes.
- Iterator atts = declaredAttributes(element);
- if (atts != null)
- {
- String aname;
-loop:
- while (atts.hasNext())
- {
- aname = (String) atts.next();
- // See if it was specified.
- for (int i = 0; i < tagAttributePos; i++)
- {
- if (tagAttributes[i] == aname)
- {
- continue loop;
- }
- }
- // ... or has a default
- String value = getAttributeDefaultValue(gi, aname);
-
- if (value == null)
- {
- continue;
- }
- handler.attribute(aname, value, false);
- }
- }
-
- // Figure out if this is a start tag
- // or an empty element, and dispatch an
- // event accordingly.
- switch (c)
- {
- case '>':
- handler.startElement(gi);
- parseContent();
- break;
- case '/':
- require('>');
- handler.startElement(gi);
- handler.endElement(gi);
- break;
- }
-
- // Restore the previous state.
- currentElement = oldElement;
- currentElementContent = oldElementContent;
- }
-
- /**
- * Parse an attribute assignment.
- * <pre>
- * [41] Attribute ::= Name Eq AttValue
- * </pre>
- * @param name The name of the attribute's element.
- * @see SAXDriver#attribute
- */
- private void parseAttribute(String name)
- throws Exception
- {
- String aname;
- String type;
- String value;
- int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
-
- // Read the attribute name.
- aname = readNmtoken(true);
- type = getAttributeType(name, aname);
-
- // Parse '='
- parseEq();
-
- // Read the value, normalizing whitespace
- // unless it is CDATA.
- if (handler.stringInterning)
- {
- if (type == "CDATA" || type == null)
- {
- value = readLiteral(flags);
- }
- else
- {
- value = readLiteral(flags | LIT_NORMALIZE);
- }
- }
- else
- {
- if (type == null || type.equals("CDATA"))
- {
- value = readLiteral(flags);
- }
- else
- {
- value = readLiteral(flags | LIT_NORMALIZE);
- }
- }
-
- // WFC: no duplicate attributes
- for (int i = 0; i < tagAttributePos; i++)
- {
- if (aname.equals(tagAttributes [i]))
- {
- error("duplicate attribute", aname, null);
- }
- }
-
- // Inform the handler about the
- // attribute.
- handler.attribute(aname, value, true);
- dataBufferPos = 0;
-
- // Note that the attribute has been
- // specified.
- if (tagAttributePos == tagAttributes.length)
- {
- String newAttrib[] = new String[tagAttributes.length * 2];
- System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
- tagAttributes = newAttrib;
- }
- tagAttributes[tagAttributePos++] = aname;
- }
-
- /**
- * Parse an equals sign surrounded by optional whitespace.
- * <pre>
- * [25] Eq ::= S? '=' S?
- * </pre>
- */
- private void parseEq()
- throws SAXException, IOException
- {
- skipWhitespace();
- require('=');
- skipWhitespace();
- }
-
- /**
- * Parse an end tag.
- * <pre>
- * [42] ETag ::= '</' Name S? '>'
- * </pre>
- * <p>NOTE: parseContent () chains to here, we already read the
- * "&lt;/".
- */
- private void parseETag()
- throws Exception
- {
- require(currentElement);
- skipWhitespace();
- require('>');
- handler.endElement(currentElement);
- // not re-reporting any SAXException re bogus end tags,
- // even though that diagnostic might be clearer ...
- }
-
- /**
- * Parse the content of an element.
- * <pre>
- * [43] content ::= (element | CharData | Reference
- * | CDSect | PI | Comment)*
- * [67] Reference ::= EntityRef | CharRef
- * </pre>
- * <p> NOTE: consumes ETtag.
- */
- private void parseContent()
- throws Exception
- {
- char c;
-
- while (true)
- {
- // consume characters (or ignorable whitspace) until delimiter
- parseCharData();
-
- // Handle delimiters
- c = readCh();
- switch (c)
- {
- case '&': // Found "&"
- c = readCh();
- if (c == '#')
- {
- parseCharRef();
- }
- else
- {
- unread(c);
- parseEntityRef(true);
- }
- isDirtyCurrentElement = true;
- break;
-
- case '<': // Found "<"
- dataBufferFlush();
- c = readCh();
- switch (c)
- {
- case '!': // Found "<!"
- c = readCh();
- switch (c)
- {
- case '-': // Found "<!-"
- require('-');
- isDirtyCurrentElement = false;
- parseComment();
- break;
- case '[': // Found "<!["
- isDirtyCurrentElement = false;
- require("CDATA[");
- handler.startCDATA();
- inCDATA = true;
- parseCDSect();
- inCDATA = false;
- handler.endCDATA();
- break;
- default:
- error("expected comment or CDATA section", c, null);
- break;
- }
- break;
-
- case '?': // Found "<?"
- isDirtyCurrentElement = false;
- parsePI();
- break;
-
- case '/': // Found "</"
- isDirtyCurrentElement = false;
- parseETag();
- return;
-
- default: // Found "<" followed by something else
- isDirtyCurrentElement = false;
- unread(c);
- parseElement(false);
- break;
- }
- }
- }
- }
-
- /**
- * Parse an element type declaration.
- * <pre>
- * [45] elementdecl ::= '&lt;!ELEMENT' S Name S contentspec S? '&gt;'
- * </pre>
- * <p> NOTE: the '&lt;!ELEMENT' has already been read.
- */
- private void parseElementDecl()
- throws Exception
- {
- String name;
-
- requireWhitespace();
- // Read the element type name.
- name = readNmtoken(true);
-
- requireWhitespace();
- // Read the content model.
- parseContentspec(name);
-
- skipWhitespace();
- require('>');
- }
-
- /**
- * Content specification.
- * <pre>
- * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | elements
- * </pre>
- */
- private void parseContentspec(String name)
- throws Exception
- {
- // FIXME: move elementDecl() into setElement(), pass EMTPY/ANY ...
- if (tryRead("EMPTY"))
- {
- setElement(name, CONTENT_EMPTY, null, null);
- if (!skippedPE)
- {
- handler.getDeclHandler().elementDecl(name, "EMPTY");
- }
- return;
- }
- else if (tryRead("ANY"))
- {
- setElement(name, CONTENT_ANY, null, null);
- if (!skippedPE)
- {
- handler.getDeclHandler().elementDecl(name, "ANY");
- }
- return;
- }
- else
- {
- String model;
- char[] saved;
-
- require('(');
- saved = readBuffer;
- dataBufferAppend('(');
- skipWhitespace();
- if (tryRead("#PCDATA"))
- {
- dataBufferAppend("#PCDATA");
- parseMixed(saved);
- model = dataBufferToString();
- setElement(name, CONTENT_MIXED, model, null);
- }
- else
- {
- parseElements(saved);
- model = dataBufferToString();
- setElement(name, CONTENT_ELEMENTS, model, null);
- }
- if (!skippedPE)
- {
- handler.getDeclHandler().elementDecl(name, model);
- }
- }
- }
-
- /**
- * Parse an element-content model.
- * <pre>
- * [47] elements ::= (choice | seq) ('?' | '*' | '+')?
- * [49] choice ::= '(' S? cp (S? '|' S? cp)+ S? ')'
- * [50] seq ::= '(' S? cp (S? ',' S? cp)* S? ')'
- * </pre>
- *
- * <p> NOTE: the opening '(' and S have already been read.
- *
- * @param saved Buffer for entity that should have the terminal ')'
- */
- private void parseElements(char[] saved)
- throws Exception
- {
- char c;
- char sep;
-
- // Parse the first content particle
- skipWhitespace();
- parseCp();
-
- // Check for end or for a separator.
- skipWhitespace();
- c = readCh();
- switch (c)
- {
- case ')':
- // VC: Proper Group/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Group/PE nesting");
- }
-
- dataBufferAppend(')');
- c = readCh();
- switch (c)
- {
- case '*':
- case '+':
- case '?':
- dataBufferAppend(c);
- break;
- default:
- unread(c);
- }
- return;
- case ',': // Register the separator.
- case '|':
- sep = c;
- dataBufferAppend(c);
- break;
- default:
- error("bad separator in content model", c, null);
- return;
- }
-
- // Parse the rest of the content model.
- while (true)
- {
- skipWhitespace();
- parseCp();
- skipWhitespace();
- c = readCh();
- if (c == ')')
- {
- // VC: Proper Group/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Group/PE nesting");
- }
-
- dataBufferAppend(')');
- break;
- }
- else if (c != sep)
- {
- error("bad separator in content model", c, null);
- return;
- }
- else
- {
- dataBufferAppend(c);
- }
- }
-
- // Check for the occurrence indicator.
- c = readCh();
- switch (c)
- {
- case '?':
- case '*':
- case '+':
- dataBufferAppend(c);
- return;
- default:
- unread(c);
- return;
- }
- }
-
- /**
- * Parse a content particle.
- * <pre>
- * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
- * </pre>
- */
- private void parseCp()
- throws Exception
- {
- if (tryRead('('))
- {
- dataBufferAppend('(');
- parseElements(readBuffer);
- }
- else
- {
- dataBufferAppend(readNmtoken(true));
- char c = readCh();
- switch (c)
- {
- case '?':
- case '*':
- case '+':
- dataBufferAppend(c);
- break;
- default:
- unread(c);
- break;
- }
- }
- }
-
- /**
- * Parse mixed content.
- * <pre>
- * [51] Mixed ::= '(' S? ( '#PCDATA' (S? '|' S? Name)*) S? ')*'
- * | '(' S? ('#PCDATA') S? ')'
- * </pre>
- *
- * @param saved Buffer for entity that should have the terminal ')'
- */
- private void parseMixed(char[] saved)
- throws Exception
- {
- // Check for PCDATA alone.
- skipWhitespace();
- if (tryRead(')'))
- {
- // VC: Proper Group/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Group/PE nesting");
- }
-
- dataBufferAppend(")*");
- tryRead('*');
- return;
- }
-
- // Parse mixed content.
- skipWhitespace();
- while (!tryRead(")"))
- {
- require('|');
- dataBufferAppend('|');
- skipWhitespace();
- dataBufferAppend(readNmtoken(true));
- skipWhitespace();
- }
-
- // VC: Proper Group/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Group/PE nesting");
- }
-
- require('*');
- dataBufferAppend(")*");
- }
-
- /**
- * Parse an attribute list declaration.
- * <pre>
- * [52] AttlistDecl ::= '&lt;!ATTLIST' S Name AttDef* S? '&gt;'
- * </pre>
- * <p>NOTE: the '&lt;!ATTLIST' has already been read.
- */
- private void parseAttlistDecl()
- throws Exception
- {
- String elementName;
-
- requireWhitespace();
- elementName = readNmtoken(true);
- boolean white = tryWhitespace();
- while (!tryRead('>'))
- {
- if (!white)
- {
- error("whitespace required before attribute definition");
- }
- parseAttDef(elementName);
- white = tryWhitespace();
- }
- }
-
- /**
- * Parse a single attribute definition.
- * <pre>
- * [53] AttDef ::= S Name S AttType S DefaultDecl
- * </pre>
- */
- private void parseAttDef(String elementName)
- throws Exception
- {
- String name;
- String type;
- String enumer = null;
-
- // Read the attribute name.
- name = readNmtoken(true);
-
- // Read the attribute type.
- requireWhitespace();
- type = readAttType();
-
- // Get the string of enumerated values if necessary.
- if (handler.stringInterning)
- {
- if ("ENUMERATION" == type || "NOTATION" == type)
- {
- enumer = dataBufferToString();
- }
- }
- else
- {
- if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
- {
- enumer = dataBufferToString();
- }
- }
-
- // Read the default value.
- requireWhitespace();
- parseDefault(elementName, name, type, enumer);
- }
-
- /**
- * Parse the attribute type.
- * <pre>
- * [54] AttType ::= StringType | TokenizedType | EnumeratedType
- * [55] StringType ::= 'CDATA'
- * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
- * | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
- * [57] EnumeratedType ::= NotationType | Enumeration
- * </pre>
- */
- private String readAttType()
- throws Exception
- {
- if (tryRead('('))
- {
- parseEnumeration(false);
- return "ENUMERATION";
- }
- else
- {
- String typeString = readNmtoken(true);
- if (handler.stringInterning)
- {
- if ("NOTATION" == typeString)
- {
- parseNotationType();
- return typeString;
- }
- else if ("CDATA" == typeString
- || "ID" == typeString
- || "IDREF" == typeString
- || "IDREFS" == typeString
- || "ENTITY" == typeString
- || "ENTITIES" == typeString
- || "NMTOKEN" == typeString
- || "NMTOKENS" == typeString)
- {
- return typeString;
- }
- }
- else
- {
- if ("NOTATION".equals(typeString))
- {
- parseNotationType();
- return typeString;
- }
- else if ("CDATA".equals(typeString)
- || "ID".equals(typeString)
- || "IDREF".equals(typeString)
- || "IDREFS".equals(typeString)
- || "ENTITY".equals(typeString)
- || "ENTITIES".equals(typeString)
- || "NMTOKEN".equals(typeString)
- || "NMTOKENS".equals(typeString))
- {
- return typeString;
- }
- }
- error("illegal attribute type", typeString, null);
- return null;
- }
- }
-
- /**
- * Parse an enumeration.
- * <pre>
- * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
- * </pre>
- * <p>NOTE: the '(' has already been read.
- */
- private void parseEnumeration(boolean isNames)
- throws Exception
- {
- dataBufferAppend('(');
-
- // Read the first token.
- skipWhitespace();
- dataBufferAppend(readNmtoken(isNames));
- // Read the remaining tokens.
- skipWhitespace();
- while (!tryRead(')'))
- {
- require('|');
- dataBufferAppend('|');
- skipWhitespace();
- dataBufferAppend(readNmtoken (isNames));
- skipWhitespace();
- }
- dataBufferAppend(')');
- }
-
- /**
- * Parse a notation type for an attribute.
- * <pre>
- * [58] NotationType ::= 'NOTATION' S '(' S? NameNtoks
- * (S? '|' S? name)* S? ')'
- * </pre>
- * <p>NOTE: the 'NOTATION' has already been read
- */
- private void parseNotationType()
- throws Exception
- {
- requireWhitespace();
- require('(');
-
- parseEnumeration(true);
- }
-
- /**
- * Parse the default value for an attribute.
- * <pre>
- * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
- * | (('#FIXED' S)? AttValue)
- * </pre>
- */
- private void parseDefault(String elementName, String name,
- String type, String enumer)
- throws Exception
- {
- int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
- String value = null;
- int flags = LIT_ATTRIBUTE;
- boolean saved = expandPE;
- String defaultType = null;
-
- // LIT_ATTRIBUTE forces '<' checks now (ASAP) and turns whitespace
- // chars to spaces (doesn't matter when that's done if it doesn't
- // interfere with char refs expanding to whitespace).
-
- if (!skippedPE)
- {
- flags |= LIT_ENTITY_REF;
- if (handler.stringInterning)
- {
- if ("CDATA" != type)
- {
- flags |= LIT_NORMALIZE;
- }
- }
- else
- {
- if (!"CDATA".equals(type))
- {
- flags |= LIT_NORMALIZE;
- }
- }
- }
-
- expandPE = false;
- if (tryRead('#'))
- {
- if (tryRead("FIXED"))
- {
- defaultType = "#FIXED";
- valueType = ATTRIBUTE_DEFAULT_FIXED;
- requireWhitespace();
- value = readLiteral(flags);
- }
- else if (tryRead("REQUIRED"))
- {
- defaultType = "#REQUIRED";
- valueType = ATTRIBUTE_DEFAULT_REQUIRED;
- }
- else if (tryRead("IMPLIED"))
- {
- defaultType = "#IMPLIED";
- valueType = ATTRIBUTE_DEFAULT_IMPLIED;
- }
- else
- {
- error("illegal keyword for attribute default value");
- }
- }
- else
- {
- value = readLiteral(flags);
- }
- expandPE = saved;
- setAttribute(elementName, name, type, enumer, value, valueType);
- if (handler.stringInterning)
- {
- if ("ENUMERATION" == type)
- {
- type = enumer;
- }
- else if ("NOTATION" == type)
- {
- type = "NOTATION " + enumer;
- }
- }
- else
- {
- if ("ENUMERATION".equals(type))
- {
- type = enumer;
- }
- else if ("NOTATION".equals(type))
- {
- type = "NOTATION " + enumer;
- }
- }
- if (!skippedPE)
- {
- handler.getDeclHandler().attributeDecl(elementName, name, type,
- defaultType, value);
- }
- }
-
- /**
- * Parse a conditional section.
- * <pre>
- * [61] conditionalSect ::= includeSect || ignoreSect
- * [62] includeSect ::= '&lt;![' S? 'INCLUDE' S? '['
- * extSubsetDecl ']]&gt;'
- * [63] ignoreSect ::= '&lt;![' S? 'IGNORE' S? '['
- * ignoreSectContents* ']]&gt;'
- * [64] ignoreSectContents ::= Ignore
- * ('&lt;![' ignoreSectContents* ']]&gt;' Ignore )*
- * [65] Ignore ::= Char* - (Char* ( '&lt;![' | ']]&gt;') Char* )
- * </pre>
- * <p> NOTE: the '&gt;![' has already been read.
- */
- private void parseConditionalSect(char[] saved)
- throws Exception
- {
- skipWhitespace();
- if (tryRead("INCLUDE"))
- {
- skipWhitespace();
- require('[');
- // VC: Proper Conditional Section/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Conditional Section/PE nesting");
- }
- skipWhitespace();
- while (!tryRead("]]>"))
- {
- parseMarkupdecl();
- skipWhitespace();
- }
- }
- else if (tryRead("IGNORE"))
- {
- skipWhitespace();
- require('[');
- // VC: Proper Conditional Section/PE Nesting
- if (readBuffer != saved)
- {
- handler.verror("Illegal Conditional Section/PE nesting");
- }
- int nesting = 1;
- char c;
- expandPE = false;
- for (int nest = 1; nest > 0; )
- {
- c = readCh();
- switch (c)
- {
- case '<':
- if (tryRead("!["))
- {
- nest++;
- }
- break;
- case ']':
- if (tryRead("]>"))
- {
- nest--;
- }
- }
- }
- expandPE = true;
- }
- else
- {
- error("conditional section must begin with INCLUDE or IGNORE");
- }
- }
-
- private void parseCharRef()
- throws SAXException, IOException
- {
- parseCharRef(true /* do flushDataBuffer by default */);
- }
-
- /**
- * Try to read a character reference without consuming data from buffer.
- * <pre>
- * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
- * </pre>
- * <p>NOTE: the '&#' has already been read.
- */
- private void tryReadCharRef()
- throws SAXException, IOException
- {
- int value = 0;
- char c;
-
- if (tryRead('x'))
- {
-loop1:
- while (true)
- {
- c = readCh();
- if (c == ';')
- {
- break loop1;
- }
- else
- {
- int n = Character.digit(c, 16);
- if (n == -1)
- {
- error("illegal character in character reference", c, null);
- break loop1;
- }
- value *= 16;
- value += n;
- }
- }
- }
- else
- {
-loop2:
- while (true)
- {
- c = readCh();
- if (c == ';')
- {
- break loop2;
- }
- else
- {
- int n = Character.digit(c, 10);
- if (n == -1)
- {
- error("illegal character in character reference", c, null);
- break loop2;
- }
- value *= 10;
- value += n;
- }
- }
- }
-
- // check for character refs being legal XML
- if ((value < 0x0020
- && ! (value == '\n' || value == '\t' || value == '\r'))
- || (value >= 0xD800 && value <= 0xDFFF)
- || value == 0xFFFE || value == 0xFFFF
- || value > 0x0010ffff)
- {
- error("illegal XML character reference U+"
- + Integer.toHexString(value));
- }
-
- // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
- // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
- if (value > 0x0010ffff)
- {
- // too big for surrogate
- error("character reference " + value + " is too large for UTF-16",
- Integer.toString(value), null);
- }
-
- }
-
- /**
- * Read and interpret a character reference.
- * <pre>
- * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
- * </pre>
- * <p>NOTE: the '&#' has already been read.
- */
- private void parseCharRef(boolean doFlush)
- throws SAXException, IOException
- {
- int value = 0;
- char c;
-
- if (tryRead('x'))
- {
-loop1:
- while (true)
- {
- c = readCh();
- if (c == ';')
- {
- break loop1;
- }
- else
- {
- int n = Character.digit(c, 16);
- if (n == -1)
- {
- error("illegal character in character reference", c, null);
- break loop1;
- }
- value *= 16;
- value += n;
- }
- }
- }
- else
- {
-loop2:
- while (true)
- {
- c = readCh();
- if (c == ';')
- {
- break loop2;
- }
- else
- {
- int n = Character.digit(c, 10);
- if (n == -1)
- {
- error("illegal character in character reference", c, null);
- break loop2;
- }
- value *= 10;
- value += c - '0';
- }
- }
- }
-
- // check for character refs being legal XML
- if ((value < 0x0020
- && ! (value == '\n' || value == '\t' || value == '\r'))
- || (value >= 0xD800 && value <= 0xDFFF)
- || value == 0xFFFE || value == 0xFFFF
- || value > 0x0010ffff)
- {
- error("illegal XML character reference U+"
- + Integer.toHexString(value));
- }
-
- // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
- // (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
- if (value <= 0x0000ffff)
- {
- // no surrogates needed
- dataBufferAppend((char) value);
- }
- else if (value <= 0x0010ffff)
- {
- value -= 0x10000;
- // > 16 bits, surrogate needed
- dataBufferAppend((char) (0xd800 | (value >> 10)));
- dataBufferAppend((char) (0xdc00 | (value & 0x0003ff)));
- }
- else
- {
- // too big for surrogate
- error("character reference " + value + " is too large for UTF-16",
- Integer.toString(value), null);
- }
- if (doFlush)
- {
- dataBufferFlush();
- }
- }
-
- /**
- * Parse and expand an entity reference.
- * <pre>
- * [68] EntityRef ::= '&' Name ';'
- * </pre>
- * <p>NOTE: the '&amp;' has already been read.
- * @param externalAllowed External entities are allowed here.
- */
- private void parseEntityRef(boolean externalAllowed)
- throws SAXException, IOException
- {
- String name;
-
- name = readNmtoken(true);
- require(';');
- switch (getEntityType(name))
- {
- case ENTITY_UNDECLARED:
- // NOTE: XML REC describes amazingly convoluted handling for
- // this case. Nothing as meaningful as being a WFness error
- // unless the processor might _legitimately_ not have seen a
- // declaration ... which is what this implements.
- String message;
-
- message = "reference to undeclared general entity " + name;
- if (skippedPE && !docIsStandalone)
- {
- handler.verror(message);
- // we don't know this entity, and it might be external...
- if (externalAllowed)
- {
- handler.skippedEntity(name);
- }
- }
- else
- {
- error(message);
- }
- break;
- case ENTITY_INTERNAL:
- pushString(name, getEntityValue(name));
-
- //workaround for possible input pop before marking
- //the buffer reading position
- char t = readCh();
- unread(t);
- int bufferPosMark = readBufferPos;
-
- int end = readBufferPos + getEntityValue(name).length();
- for (int k = readBufferPos; k < end; k++)
- {
- t = readCh();
- if (t == '&')
- {
- t = readCh();
- if (t == '#')
- {
- //try to match a character ref
- tryReadCharRef();
-
- //everything has been read
- if (readBufferPos >= end)
- {
- break;
- }
- k = readBufferPos;
- continue;
- }
- else if (Character.isLetter(t))
- {
- //looks like an entity ref
- unread(t);
- readNmtoken(true);
- require(';');
-
- //everything has been read
- if (readBufferPos >= end)
- {
- break;
- }
- k = readBufferPos;
- continue;
- }
- error(" malformed entity reference");
- }
-
- }
- readBufferPos = bufferPosMark;
- break;
- case ENTITY_TEXT:
- if (externalAllowed)
- {
- pushURL(false, name, getEntityIds(name),
- null, null, null, true);
- }
- else
- {
- error("reference to external entity in attribute value.",
- name, null);
- }
- break;
- case ENTITY_NDATA:
- if (externalAllowed)
- {
- error("unparsed entity reference in content", name, null);
- }
- else
- {
- error("reference to external entity in attribute value.",
- name, null);
- }
- break;
- default:
- throw new RuntimeException();
- }
- }
-
- /**
- * Parse and expand a parameter entity reference.
- * <pre>
- * [69] PEReference ::= '%' Name ';'
- * </pre>
- * <p>NOTE: the '%' has already been read.
- */
- private void parsePEReference()
- throws SAXException, IOException
- {
- String name;
-
- name = "%" + readNmtoken(true);
- require(';');
- switch (getEntityType(name))
- {
- case ENTITY_UNDECLARED:
- // VC: Entity Declared
- handler.verror("reference to undeclared parameter entity " + name);
-
- // we should disable handling of all subsequent declarations
- // unless this is a standalone document (info discarded)
- break;
- case ENTITY_INTERNAL:
- if (inLiteral)
- {
- pushString(name, getEntityValue(name));
- }
- else
- {
- pushString(name, ' ' + getEntityValue(name) + ' ');
- }
- break;
- case ENTITY_TEXT:
- if (!inLiteral)
- {
- pushString(null, " ");
- }
- pushURL(true, name, getEntityIds(name), null, null, null, true);
- if (!inLiteral)
- {
- pushString(null, " ");
- }
- break;
- }
- }
-
- /**
- * Parse an entity declaration.
- * <pre>
- * [70] EntityDecl ::= GEDecl | PEDecl
- * [71] GEDecl ::= '&lt;!ENTITY' S Name S EntityDef S? '&gt;'
- * [72] PEDecl ::= '&lt;!ENTITY' S '%' S Name S PEDef S? '&gt;'
- * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
- * [74] PEDef ::= EntityValue | ExternalID
- * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
- * | 'PUBLIC' S PubidLiteral S SystemLiteral
- * [76] NDataDecl ::= S 'NDATA' S Name
- * </pre>
- * <p>NOTE: the '&lt;!ENTITY' has already been read.
- */
- private void parseEntityDecl()
- throws Exception
- {
- boolean peFlag = false;
- int flags = 0;
-
- // Check for a parameter entity.
- expandPE = false;
- requireWhitespace();
- if (tryRead('%'))
- {
- peFlag = true;
- requireWhitespace();
- }
- expandPE = true;
-
- // Read the entity name, and prepend
- // '%' if necessary.
- String name = readNmtoken(true);
- //NE08
- if (name.indexOf(':') >= 0)
- {
- error("Illegal character(':') in entity name ", name, null);
- }
- if (peFlag)
- {
- name = "%" + name;
- }
-
- // Read the entity value.
- requireWhitespace();
- char c = readCh();
- unread (c);
- if (c == '"' || c == '\'')
- {
- // Internal entity ... replacement text has expanded refs
- // to characters and PEs, but not to general entities
- String value = readLiteral(flags);
- setInternalEntity(name, value);
- }
- else
- {
- // Read the external IDs
- ExternalIdentifiers ids = readExternalIds(false, false);
-
- // Check for NDATA declaration.
- boolean white = tryWhitespace();
- if (!peFlag && tryRead("NDATA"))
- {
- if (!white)
- {
- error("whitespace required before NDATA");
- }
- requireWhitespace();
- String notationName = readNmtoken(true);
- if (!skippedPE)
- {
- setExternalEntity(name, ENTITY_NDATA, ids, notationName);
- handler.unparsedEntityDecl(name, ids.publicId, ids.systemId,
- ids.baseUri, notationName);
- }
- }
- else if (!skippedPE)
- {
- setExternalEntity(name, ENTITY_TEXT, ids, null);
- handler.getDeclHandler()
- .externalEntityDecl(name, ids.publicId,
- handler.resolveURIs()
- // FIXME: ASSUMES not skipped
- // "false" forces error on bad URI
- ? handler.absolutize(ids.baseUri,
- ids.systemId,
- false)
- : ids.systemId);
- }
- }
-
- // Finish the declaration.
- skipWhitespace();
- require('>');
- }
-
- /**
- * Parse a notation declaration.
- * <pre>
- * [82] NotationDecl ::= '&lt;!NOTATION' S Name S
- * (ExternalID | PublicID) S? '&gt;'
- * [83] PublicID ::= 'PUBLIC' S PubidLiteral
- * </pre>
- * <P>NOTE: the '&lt;!NOTATION' has already been read.
- */
- private void parseNotationDecl()
- throws Exception
- {
- String nname;
- ExternalIdentifiers ids;
-
- requireWhitespace();
- nname = readNmtoken(true);
- //NE08
- if (nname.indexOf(':') >= 0)
- {
- error("Illegal character(':') in notation name ", nname, null);
- }
- requireWhitespace();
-
- // Read the external identifiers.
- ids = readExternalIds(true, false);
-
- // Register the notation.
- setNotation(nname, ids);
-
- skipWhitespace();
- require('>');
- }
-
- /**
- * Parse character data.
- * <pre>
- * [14] CharData ::= [^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)
- * </pre>
- */
- private void parseCharData()
- throws Exception
- {
- char c;
- int state = 0;
- boolean pureWhite = false;
-
- // assert (dataBufferPos == 0);
-
- // are we expecting pure whitespace? it might be dirty...
- if ((currentElementContent == CONTENT_ELEMENTS) && !isDirtyCurrentElement)
- {
- pureWhite = true;
- }
-
- // always report right out of readBuffer
- // to minimize (pointless) buffer copies
- while (true)
- {
- int lineAugment = 0;
- int columnAugment = 0;
- int i;
-
-loop:
- for (i = readBufferPos; i < readBufferLength; i++)
- {
- switch (c = readBuffer[i])
- {
- case '\n':
- lineAugment++;
- columnAugment = 0;
- // pureWhite unmodified
- break;
- case '\r': // should not happen!!
- case '\t':
- case ' ':
- // pureWhite unmodified
- columnAugment++;
- break;
- case '&':
- case '<':
- columnAugment++;
- // pureWhite unmodified
- // CLEAN end of text sequence
- state = 1;
- break loop;
- case ']':
- // that's not a whitespace char, and
- // can not terminate pure whitespace either
- pureWhite = false;
- if ((i + 2) < readBufferLength)
- {
- if (readBuffer [i + 1] == ']'
- && readBuffer [i + 2] == '>')
- {
- // ERROR end of text sequence
- state = 2;
- break loop;
- }
- }
- else
- {
- // FIXME missing two end-of-buffer cases
- }
- columnAugment++;
- break;
- default:
- if ((c < 0x0020 || c > 0xFFFD)
- || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
- && xmlVersion == XML_11))
- {
- error("illegal XML character U+"
- + Integer.toHexString(c));
- }
- // that's not a whitespace char
- pureWhite = false;
- columnAugment++;
- }
- }
-
- // report text thus far
- if (lineAugment > 0)
- {
- line += lineAugment;
- column = columnAugment;
- }
- else
- {
- column += columnAugment;
- }
-
- // report characters/whitspace
- int length = i - readBufferPos;
-
- if (length != 0)
- {
- if (pureWhite)
- {
- handler.ignorableWhitespace(readBuffer,
- readBufferPos, length);
- }
- else
- {
- handler.charData(readBuffer, readBufferPos, length);
- }
- readBufferPos = i;
- }
-
- if (state != 0)
- {
- break;
- }
-
- // fill next buffer from this entity, or
- // pop stack and continue with previous entity
- unread(readCh());
- }
- if (!pureWhite)
- {
- isDirtyCurrentElement = true;
- }
- // finish, maybe with error
- if (state != 1) // finish, no error
- {
- error("character data may not contain ']]>'");
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // High-level reading and scanning methods.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Require whitespace characters.
- */
- private void requireWhitespace()
- throws SAXException, IOException
- {
- char c = readCh();
- if (isWhitespace(c))
- {
- skipWhitespace();
- }
- else
- {
- error("whitespace required", c, null);
- }
- }
-
- /**
- * Skip whitespace characters.
- * <pre>
- * [3] S ::= (#x20 | #x9 | #xd | #xa)+
- * </pre>
- */
- private void skipWhitespace()
- throws SAXException, IOException
- {
- // Start with a little cheat. Most of
- // the time, the white space will fall
- // within the current read buffer; if
- // not, then fall through.
- if (USE_CHEATS)
- {
- int lineAugment = 0;
- int columnAugment = 0;
-
-loop:
- for (int i = readBufferPos; i < readBufferLength; i++)
- {
- switch (readBuffer[i])
- {
- case ' ':
- case '\t':
- case '\r':
- columnAugment++;
- break;
- case '\n':
- lineAugment++;
- columnAugment = 0;
- break;
- case '%':
- if (expandPE)
- {
- break loop;
- }
- // else fall through...
- default:
- readBufferPos = i;
- if (lineAugment > 0)
- {
- line += lineAugment;
- column = columnAugment;
- }
- else
- {
- column += columnAugment;
- }
- return;
- }
- }
- }
-
- // OK, do it the slow way.
- char c = readCh ();
- while (isWhitespace(c))
- {
- c = readCh();
- }
- unread(c);
- }
-
- /**
- * Read a name or (when parsing an enumeration) name token.
- * <pre>
- * [5] Name ::= (Letter | '_' | ':') (NameChar)*
- * [7] Nmtoken ::= (NameChar)+
- * </pre>
- */
- private String readNmtoken(boolean isName)
- throws SAXException, IOException
- {
- char c;
-
- if (USE_CHEATS)
- {
-loop:
- for (int i = readBufferPos; i < readBufferLength; i++)
- {
- c = readBuffer[i];
- switch (c)
- {
- case '%':
- if (expandPE)
- {
- break loop;
- }
- // else fall through...
-
- // What may legitimately come AFTER a name/nmtoken?
- case '<': case '>': case '&':
- case ',': case '|': case '*': case '+': case '?':
- case ')':
- case '=':
- case '\'': case '"':
- case '[':
- case ' ': case '\t': case '\r': case '\n':
- case ';':
- case '/':
- int start = readBufferPos;
- if (i == start)
- {
- error("name expected", readBuffer[i], null);
- }
- readBufferPos = i;
- return intern(readBuffer, start, i - start);
-
- default:
- // FIXME ... per IBM's OASIS test submission, these:
- // ? U+06dd
- // Combining U+309B
- //these switches are kind of ugly but at least we won't
- //have to go over the whole lits for each char
- if (isName && i == readBufferPos)
- {
- char c2 = (char) (c & 0x00f0);
- switch (c & 0xff00)
- {
- //starting with 01
- case 0x0100:
- switch (c2)
- {
- case 0x0030:
- if (c == 0x0132 || c == 0x0133 || c == 0x013f)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x0040:
- if (c == 0x0140 || c == 0x0149)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x00c0:
- if (c == 0x01c4 || c == 0x01cc)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x00f0:
- if (c == 0x01f1 || c == 0x01f3)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x00b0:
- if (c == 0x01f1 || c == 0x01f3)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- default:
- if (c == 0x017f)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- }
-
- break;
- //starting with 11
- case 0x1100:
- switch (c2)
- {
- case 0x0000:
- if (c == 0x1104 || c == 0x1108 ||
- c == 0x110a || c == 0x110d)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x0030:
- if (c == 0x113b || c == 0x113f)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x0040:
- if (c == 0x1141 || c == 0x114d
- || c == 0x114f )
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x0050:
- if (c == 0x1151 || c == 0x1156)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x0060:
- if (c == 0x1162 || c == 0x1164
- || c == 0x1166 || c == 0x116b
- || c == 0x116f)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- case 0x00b0:
- if (c == 0x11b6 || c == 0x11b9
- || c == 0x11bb || c == 0x116f)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- break;
- default:
- if (c == 0x1174 || c == 0x119f
- || c == 0x11ac || c == 0x11c3
- || c == 0x11f1)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- }
- break;
- default:
- if (c == 0x0e46 || c == 0x1011
- || c == 0x212f || c == 0x0587
- || c == 0x0230 )
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- }
- }
- // punt on exact tests from Appendix A; approximate
- // them using the Unicode ID start/part rules
- if (i == readBufferPos && isName)
- {
- if (!Character.isUnicodeIdentifierStart(c)
- && c != ':' && c != '_')
- {
- error("Not a name start character, U+"
- + Integer.toHexString(c));
- }
- }
- else if (!Character.isUnicodeIdentifierPart(c)
- && c != '-' && c != ':' && c != '_' && c != '.'
- && !isExtender(c))
- {
- error("Not a name character, U+"
- + Integer.toHexString(c));
- }
- }
- }
- }
-
- nameBufferPos = 0;
-
- // Read the first character.
- while (true)
- {
- c = readCh();
- switch (c)
- {
- case '%':
- case '<': case '>': case '&':
- case ',': case '|': case '*': case '+': case '?':
- case ')':
- case '=':
- case '\'': case '"':
- case '[':
- case ' ': case '\t': case '\n': case '\r':
- case ';':
- case '/':
- unread(c);
- if (nameBufferPos == 0)
- {
- error ("name expected");
- }
- // punt on exact tests from Appendix A, but approximate them
- if (isName
- && !Character.isUnicodeIdentifierStart(nameBuffer[0])
- && ":_".indexOf(nameBuffer[0]) == -1)
- {
- error("Not a name start character, U+"
- + Integer.toHexString(nameBuffer[0]));
- }
- String s = intern(nameBuffer, 0, nameBufferPos);
- nameBufferPos = 0;
- return s;
- default:
- // punt on exact tests from Appendix A, but approximate them
-
- if ((nameBufferPos != 0 || !isName)
- && !Character.isUnicodeIdentifierPart(c)
- && ":-_.".indexOf(c) == -1
- && !isExtender(c))
- {
- error("Not a name character, U+"
- + Integer.toHexString(c));
- }
- if (nameBufferPos >= nameBuffer.length)
- {
- nameBuffer =
- (char[]) extendArray(nameBuffer,
- nameBuffer.length, nameBufferPos);
- }
- nameBuffer[nameBufferPos++] = c;
- }
- }
- }
-
- private static boolean isExtender(char c)
- {
- // [88] Extender ::= ...
- return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387
- || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005
- || (c >= 0x3031 && c <= 0x3035)
- || (c >= 0x309d && c <= 0x309e)
- || (c >= 0x30fc && c <= 0x30fe);
- }
-
- /**
- * Read a literal. With matching single or double quotes as
- * delimiters (and not embedded!) this is used to parse:
- * <pre>
- * [9] EntityValue ::= ... ([^%&amp;] | PEReference | Reference)* ...
- * [10] AttValue ::= ... ([^<&] | Reference)* ...
- * [11] SystemLiteral ::= ... (URLchar - "'")* ...
- * [12] PubidLiteral ::= ... (PubidChar - "'")* ...
- * </pre>
- * as well as the quoted strings in XML and text declarations
- * (for version, encoding, and standalone) which have their
- * own constraints.
- */
- private String readLiteral(int flags)
- throws SAXException, IOException
- {
- char delim, c;
- int startLine = line;
- boolean saved = expandPE;
- boolean savedReport = doReport;
-
- // Find the first delimiter.
- delim = readCh();
- if (delim != '"' && delim != '\'')
- {
- error("expected '\"' or \"'\"", delim, null);
- return null;
- }
- inLiteral = true;
- if ((flags & LIT_DISABLE_PE) != 0)
- {
- expandPE = false;
- }
- doReport = false;
-
- // Each level of input source has its own buffer; remember
- // ours, so we won't read the ending delimiter from any
- // other input source, regardless of entity processing.
- char[] ourBuf = readBuffer;
-
- // Read the literal.
- try
- {
- c = readCh();
- boolean ampRead = false;
-loop:
- while (! (c == delim && readBuffer == ourBuf))
- {
- switch (c)
- {
- // attributes and public ids are normalized
- // in almost the same ways
- case '\n':
- case '\r':
- if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
- {
- c = ' ';
- }
- break;
- case '\t':
- if ((flags & LIT_ATTRIBUTE) != 0)
- {
- c = ' ';
- }
- break;
- case '&':
- c = readCh();
- // Char refs are expanded immediately, except for
- // all the cases where it's deferred.
- if (c == '#')
- {
- if ((flags & LIT_DISABLE_CREF) != 0)
- {
- dataBufferAppend('&');
- break;
- }
- parseCharRef(false /* Do not do flushDataBuffer */);
-
- // exotic WFness risk: this is an entity literal,
- // dataBuffer [dataBufferPos - 1] == '&', and
- // following chars are a _partial_ entity/char ref
-
- // It looks like an entity ref ...
- }
- else
- {
- unread(c);
- // Expand it?
- if ((flags & LIT_ENTITY_REF) > 0)
- {
- parseEntityRef(false);
- if (String.valueOf(readBuffer).equals("&#38;"))
- {
- ampRead = true;
- }
- //Is it just data?
- }
- else if ((flags & LIT_DISABLE_EREF) != 0)
- {
- dataBufferAppend('&');
-
- // OK, it will be an entity ref -- expanded later.
- }
- else
- {
- String name = readNmtoken(true);
- require(';');
- dataBufferAppend('&');
- dataBufferAppend(name);
- dataBufferAppend(';');
- }
- }
- c = readCh();
- continue loop;
-
- case '<':
- // and why? Perhaps so "&foo;" expands the same
- // inside and outside an attribute?
- if ((flags & LIT_ATTRIBUTE) != 0)
- {
- error("attribute values may not contain '<'");
- }
- break;
-
- // We don't worry about case '%' and PE refs, readCh does.
-
- default:
- break;
- }
- dataBufferAppend(c);
- c = readCh();
- }
- }
- catch (EOFException e)
- {
- error("end of input while looking for delimiter (started on line "
- + startLine + ')', null, Character.toString(delim));
- }
- inLiteral = false;
- expandPE = saved;
- doReport = savedReport;
-
- // Normalise whitespace if necessary.
- if ((flags & LIT_NORMALIZE) > 0)
- {
- dataBufferNormalize();
- }
-
- // Return the value.
- return dataBufferToString();
- }
-
- /**
- * Try reading external identifiers.
- * A system identifier is not required for notations.
- * @param inNotation Are we parsing a notation decl?
- * @param isSubset Parsing external subset decl (may be omitted)?
- * @return A three-member String array containing the identifiers,
- * or nulls. Order: public, system, baseURI.
- */
- private ExternalIdentifiers readExternalIds(boolean inNotation,
- boolean isSubset)
- throws Exception
- {
- char c;
- ExternalIdentifiers ids = new ExternalIdentifiers();
- int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
-
- if (tryRead("PUBLIC"))
- {
- requireWhitespace();
- ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags);
- if (inNotation)
- {
- skipWhitespace();
- c = readCh();
- unread(c);
- if (c == '"' || c == '\'')
- {
- ids.systemId = readLiteral(flags);
- }
- }
- else
- {
- requireWhitespace();
- ids.systemId = readLiteral(flags);
- }
-
- for (int i = 0; i < ids.publicId.length(); i++)
- {
- c = ids.publicId.charAt(i);
- if (c >= 'a' && c <= 'z')
- {
- continue;
- }
- if (c >= 'A' && c <= 'Z')
- {
- continue;
- }
- if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(c) != -1)
- {
- continue;
- }
- error("illegal PUBLIC id character U+"
- + Integer.toHexString(c));
- }
- }
- else if (tryRead("SYSTEM"))
- {
- requireWhitespace();
- ids.systemId = readLiteral(flags);
- }
- else if (!isSubset)
- {
- error("missing SYSTEM or PUBLIC keyword");
- }
-
- if (ids.systemId != null)
- {
- if (ids.systemId.indexOf('#') != -1)
- {
- handler.verror("SYSTEM id has a URI fragment: " + ids.systemId);
- }
- ids.baseUri = handler.getSystemId();
- if (ids.baseUri == null && uriWarnings)
- {
- handler.warn("No base URI; hope URI is absolute: "
- + ids.systemId);
- }
- }
-
- return ids;
- }
-
- /**
- * Test if a character is whitespace.
- * <pre>
- * [3] S ::= (#x20 | #x9 | #xd | #xa)+
- * </pre>
- * @param c The character to test.
- * @return true if the character is whitespace.
- */
- private final boolean isWhitespace(char c)
- {
- if (c > 0x20)
- {
- return false;
- }
- if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d)
- {
- return true;
- }
- return false; // illegal ...
- }
-
- //////////////////////////////////////////////////////////////////////
- // Utility routines.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Add a character to the data buffer.
- */
- private void dataBufferAppend(char c)
- {
- // Expand buffer if necessary.
- if (dataBufferPos >= dataBuffer.length)
- {
- dataBuffer = (char[]) extendArray(dataBuffer,
- dataBuffer.length, dataBufferPos);
- }
- dataBuffer[dataBufferPos++] = c;
- }
-
- /**
- * Add a string to the data buffer.
- */
- private void dataBufferAppend(String s)
- {
- dataBufferAppend(s.toCharArray(), 0, s.length());
- }
-
- /**
- * Append (part of) a character array to the data buffer.
- */
- private void dataBufferAppend(char[] ch, int start, int length)
- {
- dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length,
- dataBufferPos + length);
-
- System.arraycopy(ch, start, dataBuffer, dataBufferPos, length);
- dataBufferPos += length;
- }
-
- /**
- * Normalise space characters in the data buffer.
- */
- private void dataBufferNormalize()
- {
- int i = 0;
- int j = 0;
- int end = dataBufferPos;
-
- // Skip spaces at the start.
- while (j < end && dataBuffer[j] == ' ')
- {
- j++;
- }
-
- // Skip whitespace at the end.
- while (end > j && dataBuffer[end - 1] == ' ')
- {
- end --;
- }
-
- // Start copying to the left.
- while (j < end)
- {
-
- char c = dataBuffer[j++];
-
- // Normalise all other spaces to
- // a single space.
- if (c == ' ')
- {
- while (j < end && dataBuffer[j++] == ' ')
- {
- continue;
- }
- dataBuffer[i++] = ' ';
- dataBuffer[i++] = dataBuffer[j - 1];
- }
- else
- {
- dataBuffer[i++] = c;
- }
- }
-
- // The new length is <= the old one.
- dataBufferPos = i;
- }
-
- /**
- * Convert the data buffer to a string.
- */
- private String dataBufferToString()
- {
- String s = new String(dataBuffer, 0, dataBufferPos);
- dataBufferPos = 0;
- return s;
- }
-
- /**
- * Flush the contents of the data buffer to the handler, as
- * appropriate, and reset the buffer for new input.
- */
- private void dataBufferFlush()
- throws SAXException
- {
- if (currentElementContent == CONTENT_ELEMENTS
- && dataBufferPos > 0
- && !inCDATA)
- {
- // We can't just trust the buffer to be whitespace, there
- // are (error) cases when it isn't
- for (int i = 0; i < dataBufferPos; i++)
- {
- if (!isWhitespace(dataBuffer[i]))
- {
- handler.charData(dataBuffer, 0, dataBufferPos);
- dataBufferPos = 0;
- }
- }
- if (dataBufferPos > 0)
- {
- handler.ignorableWhitespace(dataBuffer, 0, dataBufferPos);
- dataBufferPos = 0;
- }
- }
- else if (dataBufferPos > 0)
- {
- handler.charData(dataBuffer, 0, dataBufferPos);
- dataBufferPos = 0;
- }
- }
-
- /**
- * Require a string to appear, or throw an exception.
- * <p><em>Precondition:</em> Entity expansion is not required.
- * <p><em>Precondition:</em> data buffer has no characters that
- * will get sent to the application.
- */
- private void require(String delim)
- throws SAXException, IOException
- {
- int length = delim.length();
- char[] ch;
-
- if (length < dataBuffer.length)
- {
- ch = dataBuffer;
- delim.getChars(0, length, ch, 0);
- }
- else
- {
- ch = delim.toCharArray();
- }
-
- if (USE_CHEATS && length <= (readBufferLength - readBufferPos))
- {
- int offset = readBufferPos;
-
- for (int i = 0; i < length; i++, offset++)
- {
- if (ch[i] != readBuffer[offset])
- {
- error ("required string", null, delim);
- }
- }
- readBufferPos = offset;
-
- }
- else
- {
- for (int i = 0; i < length; i++)
- {
- require(ch[i]);
- }
- }
- }
-
- /**
- * Require a character to appear, or throw an exception.
- */
- private void require(char delim)
- throws SAXException, IOException
- {
- char c = readCh();
-
- if (c != delim)
- {
- error("required character", c, Character.toString(delim));
- }
- }
-
- /**
- * Create an interned string from a character array.
- * &AElig;lfred uses this method to create an interned version
- * of all names and name tokens, so that it can test equality
- * with <code>==</code> instead of <code>String.equals ()</code>.
- *
- * <p>This is much more efficient than constructing a non-interned
- * string first, and then interning it.
- *
- * @param ch an array of characters for building the string.
- * @param start the starting position in the array.
- * @param length the number of characters to place in the string.
- * @return an interned string.
- * @see #intern (String)
- * @see java.lang.String#intern
- */
- public String intern(char[] ch, int start, int length)
- {
- int index = 0;
- int hash = 0;
- Object[] bucket;
-
- // Generate a hash code. This is a widely used string hash,
- // often attributed to Brian Kernighan.
- for (int i = start; i < start + length; i++)
- {
- hash = 31 * hash + ch[i];
- }
- hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH;
-
- // Get the bucket -- consists of {array,String} pairs
- if ((bucket = symbolTable[hash]) == null)
- {
- // first string in this bucket
- bucket = new Object[8];
-
- // Search for a matching tuple, and
- // return the string if we find one.
- }
- else
- {
- while (index < bucket.length)
- {
- char[] chFound = (char[]) bucket[index];
-
- // Stop when we hit an empty entry.
- if (chFound == null)
- {
- break;
- }
-
- // If they're the same length, check for a match.
- if (chFound.length == length)
- {
- for (int i = 0; i < chFound.length; i++)
- {
- // continue search on failure
- if (ch[start + i] != chFound[i])
- {
- break;
- }
- else if (i == length - 1)
- {
- // That's it, we have a match!
- return (String) bucket[index + 1];
- }
- }
- }
- index += 2;
- }
- // Not found -- we'll have to add it.
-
- // Do we have to grow the bucket?
- bucket = (Object[]) extendArray(bucket, bucket.length, index);
- }
- symbolTable[hash] = bucket;
-
- // OK, add it to the end of the bucket -- "local" interning.
- // Intern "globally" to let applications share interning benefits.
- // That is, "!=" and "==" work on our strings, not just equals().
- String s = new String(ch, start, length).intern();
- bucket[index] = s.toCharArray();
- bucket[index + 1] = s;
- return s;
- }
-
- /**
- * Ensure the capacity of an array, allocating a new one if
- * necessary. Usually extends only for name hash collisions.
- */
- private Object extendArray(Object array, int currentSize, int requiredSize)
- {
- if (requiredSize < currentSize)
- {
- return array;
- }
- else
- {
- Object newArray = null;
- int newSize = currentSize * 2;
-
- if (newSize <= requiredSize)
- {
- newSize = requiredSize + 1;
- }
-
- if (array instanceof char[])
- {
- newArray = new char[newSize];
- }
- else if (array instanceof Object[])
- {
- newArray = new Object[newSize];
- }
- else
- {
- throw new RuntimeException();
- }
-
- System.arraycopy(array, 0, newArray, 0, currentSize);
- return newArray;
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // XML query routines.
- //////////////////////////////////////////////////////////////////////
-
- boolean isStandalone()
- {
- return docIsStandalone;
- }
-
- //
- // Elements
- //
-
- private int getContentType(ElementDecl element, int defaultType)
- {
- int retval;
-
- if (element == null)
- {
- return defaultType;
- }
- retval = element.contentType;
- if (retval == CONTENT_UNDECLARED)
- {
- retval = defaultType;
- }
- return retval;
- }
-
- /**
- * Look up the content type of an element.
- * @param name The element type name.
- * @return An integer constant representing the content type.
- * @see #CONTENT_UNDECLARED
- * @see #CONTENT_ANY
- * @see #CONTENT_EMPTY
- * @see #CONTENT_MIXED
- * @see #CONTENT_ELEMENTS
- */
- public int getElementContentType(String name)
- {
- ElementDecl element = (ElementDecl) elementInfo.get(name);
- return getContentType(element, CONTENT_UNDECLARED);
- }
-
- /**
- * Register an element.
- * Array format:
- * [0] element type name
- * [1] content model (mixed, elements only)
- * [2] attribute hash table
- */
- private void setElement(String name, int contentType,
- String contentModel, HashMap attributes)
- throws SAXException
- {
- if (skippedPE)
- {
- return;
- }
-
- ElementDecl element = (ElementDecl) elementInfo.get(name);
-
- // first <!ELEMENT ...> or <!ATTLIST ...> for this type?
- if (element == null)
- {
- element = new ElementDecl();
- element.contentType = contentType;
- element.contentModel = contentModel;
- element.attributes = attributes;
- elementInfo.put(name, element);
- return;
- }
-
- // <!ELEMENT ...> declaration?
- if (contentType != CONTENT_UNDECLARED)
- {
- // ... following an associated <!ATTLIST ...>
- if (element.contentType == CONTENT_UNDECLARED)
- {
- element.contentType = contentType;
- element.contentModel = contentModel;
- }
- else
- {
- // VC: Unique Element Type Declaration
- handler.verror("multiple declarations for element type: "
- + name);
- }
- }
-
- // first <!ATTLIST ...>, before <!ELEMENT ...> ?
- else if (attributes != null)
- {
- element.attributes = attributes;
- }
- }
-
- /**
- * Look up the attribute hash table for an element.
- * The hash table is the second item in the element array.
- */
- private HashMap getElementAttributes(String name)
- {
- ElementDecl element = (ElementDecl) elementInfo.get(name);
- return (element == null) ? null : element.attributes;
- }
-
- //
- // Attributes
- //
-
- /**
- * Get the declared attributes for an element type.
- * @param elname The name of the element type.
- * @return An iterator over all the attributes declared for
- * a specific element type. The results will be valid only
- * after the DTD (if any) has been parsed.
- * @see #getAttributeType
- * @see #getAttributeEnumeration
- * @see #getAttributeDefaultValueType
- * @see #getAttributeDefaultValue
- * @see #getAttributeExpandedValue
- */
- private Iterator declaredAttributes(ElementDecl element)
- {
- HashMap attlist;
-
- if (element == null)
- {
- return null;
- }
- if ((attlist = element.attributes) == null)
- {
- return null;
- }
- return attlist.keySet().iterator();
- }
-
- /**
- * Get the declared attributes for an element type.
- * @param elname The name of the element type.
- * @return An iterator over all the attributes declared for
- * a specific element type. The results will be valid only
- * after the DTD (if any) has been parsed.
- * @see #getAttributeType
- * @see #getAttributeEnumeration
- * @see #getAttributeDefaultValueType
- * @see #getAttributeDefaultValue
- * @see #getAttributeExpandedValue
- */
- public Iterator declaredAttributes(String elname)
- {
- return declaredAttributes((ElementDecl) elementInfo.get(elname));
- }
-
- /**
- * Retrieve the declared type of an attribute.
- * @param name The name of the associated element.
- * @param aname The name of the attribute.
- * @return An interend string denoting the type, or null
- * indicating an undeclared attribute.
- */
- public String getAttributeType(String name, String aname)
- {
- AttributeDecl attribute = getAttribute(name, aname);
- return (attribute == null) ? null : attribute.type;
- }
-
- /**
- * Retrieve the allowed values for an enumerated attribute type.
- * @param name The name of the associated element.
- * @param aname The name of the attribute.
- * @return A string containing the token list.
- */
- public String getAttributeEnumeration(String name, String aname)
- {
- AttributeDecl attribute = getAttribute(name, aname);
- // assert: attribute.enumeration is "ENUMERATION" or "NOTATION"
- return (attribute == null) ? null : attribute.enumeration;
- }
-
- /**
- * Retrieve the default value of a declared attribute.
- * @param name The name of the associated element.
- * @param aname The name of the attribute.
- * @return The default value, or null if the attribute was
- * #IMPLIED or simply undeclared and unspecified.
- * @see #getAttributeExpandedValue
- */
- public String getAttributeDefaultValue(String name, String aname)
- {
- AttributeDecl attribute = getAttribute(name, aname);
- return (attribute == null) ? null : attribute.value;
- }
-
- /*
-
-// FIXME: Leaving this in, until W3C finally resolves the confusion
-// between parts of the XML 2nd REC about when entity declararations
-// are guaranteed to be known. Current code matches what section 5.1
-// (conformance) describes, but some readings of the self-contradicting
-// text in 4.1 (the "Entity Declared" WFC and VC) seem to expect that
-// attribute expansion/normalization must be deferred in some cases
-// (just TRY to identify them!).
-
- * Retrieve the expanded value of a declared attribute.
- * <p>General entities (and char refs) will be expanded (once).
- * @param name The name of the associated element.
- * @param aname The name of the attribute.
- * @return The expanded default value, or null if the attribute was
- * #IMPLIED or simply undeclared
- * @see #getAttributeDefaultValue
- public String getAttributeExpandedValue (String name, String aname)
- throws Exception
- {
- AttributeDecl attribute = getAttribute (name, aname);
-
- if (attribute == null) {
- return null;
- } else if (attribute.defaultValue == null && attribute.value != null) {
- // we MUST use the same buf for both quotes else the literal
- // can't be properly terminated
- char buf [] = new char [1];
- int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE;
- String type = getAttributeType (name, aname);
-
- if (type != "CDATA" && type != null)
- flags |= LIT_NORMALIZE;
- buf [0] = '"';
- pushCharArray (null, buf, 0, 1);
- pushString (null, attribute.value);
- pushCharArray (null, buf, 0, 1);
- attribute.defaultValue = readLiteral (flags);
- }
- return attribute.defaultValue;
- }
- */
-
- /**
- * Retrieve the default value mode of a declared attribute.
- * @see #ATTRIBUTE_DEFAULT_SPECIFIED
- * @see #ATTRIBUTE_DEFAULT_IMPLIED
- * @see #ATTRIBUTE_DEFAULT_REQUIRED
- * @see #ATTRIBUTE_DEFAULT_FIXED
- */
- public int getAttributeDefaultValueType(String name, String aname)
- {
- AttributeDecl attribute = getAttribute(name, aname);
- return (attribute == null) ? ATTRIBUTE_DEFAULT_UNDECLARED :
- attribute.valueType;
- }
-
- /**
- * Register an attribute declaration for later retrieval.
- * Format:
- * - String type
- * - String default value
- * - int value type
- * - enumeration
- * - processed default value
- */
- private void setAttribute(String elName, String name, String type,
- String enumeration, String value, int valueType)
- throws Exception
- {
- HashMap attlist;
-
- if (skippedPE)
- {
- return;
- }
-
- // Create a new hashtable if necessary.
- attlist = getElementAttributes(elName);
- if (attlist == null)
- {
- attlist = new HashMap();
- }
-
- // ignore multiple attribute declarations!
- if (attlist.get(name) != null)
- {
- // warn ...
- return;
- }
- else
- {
- AttributeDecl attribute = new AttributeDecl();
- attribute.type = type;
- attribute.value = value;
- attribute.valueType = valueType;
- attribute.enumeration = enumeration;
- attlist.put(name, attribute);
-
- // save; but don't overwrite any existing <!ELEMENT ...>
- setElement(elName, CONTENT_UNDECLARED, null, attlist);
- }
- }
-
- /**
- * Retrieve the attribute declaration for the given element name and name.
- */
- private AttributeDecl getAttribute(String elName, String name)
- {
- HashMap attlist = getElementAttributes(elName);
- return (attlist == null) ? null : (AttributeDecl) attlist.get(name);
- }
-
- //
- // Entities
- //
-
- /**
- * Find the type of an entity.
- * @returns An integer constant representing the entity type.
- * @see #ENTITY_UNDECLARED
- * @see #ENTITY_INTERNAL
- * @see #ENTITY_NDATA
- * @see #ENTITY_TEXT
- */
- public int getEntityType(String ename)
- {
- EntityInfo entity = (EntityInfo) entityInfo.get(ename);
- return (entity == null) ? ENTITY_UNDECLARED : entity.type;
- }
-
- /**
- * Return an external entity's identifiers.
- * @param ename The name of the external entity.
- * @return The entity's public identifier, system identifier, and base URI.
- * Null if the entity was not declared as an external entity.
- * @see #getEntityType
- */
- public ExternalIdentifiers getEntityIds(String ename)
- {
- EntityInfo entity = (EntityInfo) entityInfo.get(ename);
- return (entity == null) ? null : entity.ids;
- }
-
- /**
- * Return an internal entity's replacement text.
- * @param ename The name of the internal entity.
- * @return The entity's replacement text, or null if
- * the entity was not declared as an internal entity.
- * @see #getEntityType
- */
- public String getEntityValue(String ename)
- {
- EntityInfo entity = (EntityInfo) entityInfo.get(ename);
- return (entity == null) ? null : entity.value;
- }
-
- /**
- * Register an entity declaration for later retrieval.
- */
- private void setInternalEntity(String eName, String value)
- throws SAXException
- {
- if (skippedPE)
- {
- return;
- }
-
- if (entityInfo.get(eName) == null)
- {
- EntityInfo entity = new EntityInfo();
- entity.type = ENTITY_INTERNAL;
- entity.value = value;
- entityInfo.put(eName, entity);
- }
- if (handler.stringInterning)
- {
- if ("lt" == eName || "gt" == eName || "quot" == eName
- || "apos" == eName || "amp" == eName)
- {
- return;
- }
- }
- else
- {
- if ("lt".equals(eName) || "gt".equals(eName) || "quot".equals(eName)
- || "apos".equals(eName) || "amp".equals(eName))
- {
- return;
- }
- }
- handler.getDeclHandler().internalEntityDecl(eName, value);
- }
-
- /**
- * Register an external entity declaration for later retrieval.
- */
- private void setExternalEntity(String eName, int eClass,
- ExternalIdentifiers ids, String nName)
- {
- if (entityInfo.get(eName) == null)
- {
- EntityInfo entity = new EntityInfo();
- entity.type = eClass;
- entity.ids = ids;
- entity.notationName = nName;
- entityInfo.put(eName, entity);
- }
- }
-
- //
- // Notations.
- //
-
- /**
- * Report a notation declaration, checking for duplicates.
- */
- private void setNotation(String nname, ExternalIdentifiers ids)
- throws SAXException
- {
- if (skippedPE)
- {
- return;
- }
-
- handler.notationDecl(nname, ids.publicId, ids.systemId, ids.baseUri);
- if (notationInfo.get(nname) == null)
- {
- notationInfo.put(nname, nname);
- }
- else
- {
- // VC: Unique Notation Name
- handler.verror("Duplicate notation name decl: " + nname);
- }
- }
-
- //
- // Location.
- //
-
- /**
- * Return the current line number.
- */
- public int getLineNumber()
- {
- return line;
- }
-
- /**
- * Return the current column number.
- */
- public int getColumnNumber()
- {
- return column;
- }
-
- //////////////////////////////////////////////////////////////////////
- // High-level I/O.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Read a single character from the readBuffer.
- * <p>The readDataChunk () method maintains the buffer.
- * <p>If we hit the end of an entity, try to pop the stack and
- * keep going.
- * <p> (This approach doesn't really enforce XML's rules about
- * entity boundaries, but this is not currently a validating
- * parser).
- * <p>This routine also attempts to keep track of the current
- * position in external entities, but it's not entirely accurate.
- * @return The next available input character.
- * @see #unread (char)
- * @see #readDataChunk
- * @see #readBuffer
- * @see #line
- * @return The next character from the current input source.
- */
- private char readCh()
- throws SAXException, IOException
- {
- // As long as there's nothing in the
- // read buffer, try reading more data
- // (for an external entity) or popping
- // the entity stack (for either).
- while (readBufferPos >= readBufferLength)
- {
- switch (sourceType)
- {
- case INPUT_READER:
- case INPUT_STREAM:
- readDataChunk();
- while (readBufferLength < 1)
- {
- popInput();
- if (readBufferLength < 1)
- {
- readDataChunk();
- }
- }
- break;
-
- default:
-
- popInput();
- break;
- }
- }
-
- char c = readBuffer[readBufferPos++];
-
- if (c == '\n')
- {
- line++;
- column = 0;
- }
- else
- {
- if (c == '<')
- {
- /* the most common return to parseContent () ... NOP */
- }
- else if (((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD)
- || ((c >= 0x007f) && (c <= 0x009f) && (c != 0x0085)
- && xmlVersion == XML_11))
- {
- error("illegal XML character U+" + Integer.toHexString(c));
- }
-
- // If we're in the DTD and in a context where PEs get expanded,
- // do so ... 1/14/2000 errata identify those contexts. There
- // are also spots in the internal subset where PE refs are fatal
- // errors, hence yet another flag.
- else if (c == '%' && expandPE)
- {
- if (peIsError)
- {
- error("PE reference within decl in internal subset.");
- }
- parsePEReference();
- return readCh();
- }
- column++;
- }
-
- return c;
- }
-
- /**
- * Push a single character back onto the current input stream.
- * <p>This method usually pushes the character back onto
- * the readBuffer.
- * <p>I don't think that this would ever be called with
- * readBufferPos = 0, because the methods always reads a character
- * before unreading it, but just in case, I've added a boundary
- * condition.
- * @param c The character to push back.
- * @see #readCh
- * @see #unread (char[])
- * @see #readBuffer
- */
- private void unread(char c)
- throws SAXException
- {
- // Normal condition.
- if (c == '\n')
- {
- line--;
- column = -1;
- }
- if (readBufferPos > 0)
- {
- readBuffer[--readBufferPos] = c;
- }
- else
- {
- pushString(null, Character.toString(c));
- }
- }
-
- /**
- * Push a char array back onto the current input stream.
- * <p>NOTE: you must <em>never</em> push back characters that you
- * haven't actually read: use pushString () instead.
- * @see #readCh
- * @see #unread (char)
- * @see #readBuffer
- * @see #pushString
- */
- private void unread(char[] ch, int length)
- throws SAXException
- {
- for (int i = 0; i < length; i++)
- {
- if (ch[i] == '\n')
- {
- line--;
- column = -1;
- }
- }
- if (length < readBufferPos)
- {
- readBufferPos -= length;
- }
- else
- {
- pushCharArray(null, ch, 0, length);
- }
- }
-
- /**
- * Push, or skip, a new external input source.
- * The source will be some kind of parsed entity, such as a PE
- * (including the external DTD subset) or content for the body.
- *
- * @param url The java.net.URL object for the entity.
- * @see SAXDriver#resolveEntity
- * @see #pushString
- * @see #sourceType
- * @see #pushInput
- * @see #detectEncoding
- * @see #sourceType
- * @see #readBuffer
- */
- private void pushURL(boolean isPE,
- String ename,
- ExternalIdentifiers ids,
- Reader reader,
- InputStream stream,
- String encoding,
- boolean doResolve)
- throws SAXException, IOException
- {
- boolean ignoreEncoding;
- String systemId;
- InputSource source;
-
- if (!isPE)
- {
- dataBufferFlush();
- }
-
- scratch.setPublicId(ids.publicId);
- scratch.setSystemId(ids.systemId);
-
- // See if we should skip or substitute the entity.
- // If we're not skipping, resolving reports startEntity()
- // and updates the (handler's) stack of URIs.
- if (doResolve)
- {
- // assert (stream == null && reader == null && encoding == null)
- source = handler.resolveEntity(isPE, ename, scratch, ids.baseUri);
- if (source == null)
- {
- handler.warn("skipping entity: " + ename);
- handler.skippedEntity(ename);
- if (isPE)
- {
- skippedPE = true;
- }
- return;
- }
-
- // we might be using alternate IDs/encoding
- systemId = source.getSystemId();
- // The following warning and setting systemId was deleted bcause
- // the application has the option of not setting systemId
- // provided that it has set the characte/byte stream.
- /*
- if (systemId == null) {
- handler.warn ("missing system ID, using " + ids.systemId);
- systemId = ids.systemId;
- }
- */
- }
- else
- {
- // "[document]", or "[dtd]" via getExternalSubset()
- scratch.setCharacterStream(reader);
- scratch.setByteStream(stream);
- scratch.setEncoding(encoding);
- source = scratch;
- systemId = ids.systemId;
- if (handler.stringInterning)
- {
- handler.startExternalEntity(ename, systemId,
- "[document]" == ename);
- }
- else
- {
- handler.startExternalEntity(ename, systemId,
- "[document]".equals(ename));
- }
- }
-
- // we may have been given I/O streams directly
- if (source.getCharacterStream() != null)
- {
- if (source.getByteStream() != null)
- error("InputSource has two streams!");
- reader = source.getCharacterStream();
- }
- else if (source.getByteStream() != null)
- {
- encoding = source.getEncoding();
- if (encoding == null)
- {
- stream = source.getByteStream();
- }
- else
- {
- try
- {
- reader = new InputStreamReader(source.getByteStream(),
- encoding);
- }
- catch (IOException e)
- {
- stream = source.getByteStream();
- }
- }
- }
- else if (systemId == null)
- {
- error("InputSource has no URI!");
- }
- scratch.setCharacterStream(null);
- scratch.setByteStream(null);
- scratch.setEncoding(null);
-
- // Push the existing status.
- pushInput(ename);
-
- // Create a new read buffer.
- // (Note the four-character margin)
- readBuffer = new char[READ_BUFFER_MAX + 4];
- readBufferPos = 0;
- readBufferLength = 0;
- readBufferOverflow = -1;
- is = null;
- line = 1;
- column = 0;
- currentByteCount = 0;
-
- // If there's an explicit character stream, just
- // ignore encoding declarations.
- if (reader != null)
- {
- sourceType = INPUT_READER;
- this.reader = reader;
- tryEncodingDecl(true);
- return;
- }
-
- // Else we handle the conversion, and need to ensure
- // it's done right.
- sourceType = INPUT_STREAM;
- if (stream != null)
- {
- is = stream;
- }
- else
- {
- // We have to open our own stream to the URL.
- URL url = new URL(systemId);
-
- externalEntity = url.openConnection();
- externalEntity.connect();
- is = externalEntity.getInputStream();
- }
-
- // If we get to here, there must be
- // an InputStream available.
- if (!is.markSupported())
- {
- is = new BufferedInputStream(is);
- }
-
- // Get any external encoding label.
- if (encoding == null && externalEntity != null)
- {
- // External labels can be untrustworthy; filesystems in
- // particular often have the wrong default for content
- // that wasn't locally originated. Those we autodetect.
- if (!"file".equals(externalEntity.getURL().getProtocol()))
- {
- int temp;
-
- // application/xml;charset=something;otherAttr=...
- // ... with many variants on 'something'
- encoding = externalEntity.getContentType();
-
- // MHK code (fix for Saxon 5.5.1/007):
- // protect against encoding==null
- if (encoding == null)
- {
- temp = -1;
- }
- else
- {
- temp = encoding.indexOf("charset");
- }
-
- // RFC 2376 sez MIME text defaults to ASCII, but since the
- // JDK will create a MIME type out of thin air, we always
- // autodetect when there's no explicit charset attribute.
- if (temp < 0)
- {
- encoding = null; // autodetect
- }
- else
- {
- // only this one attribute
- if ((temp = encoding.indexOf(';')) > 0)
- {
- encoding = encoding.substring(0, temp);
- }
-
- if ((temp = encoding.indexOf('=', temp + 7)) > 0)
- {
- encoding = encoding.substring(temp + 1);
-
- // attributes can have comment fields (RFC 822)
- if ((temp = encoding.indexOf('(')) > 0)
- {
- encoding = encoding.substring(0, temp);
- }
- // ... and values may be quoted
- if ((temp = encoding.indexOf('"')) > 0)
- {
- encoding =
- encoding.substring(temp + 1,
- encoding.indexOf('"', temp + 2));
- }
- encoding = encoding.trim();
- }
- else
- {
- handler.warn("ignoring illegal MIME attribute: "
- + encoding);
- encoding = null;
- }
- }
- }
- }
-
- // if we got an external encoding label, use it ...
- if (encoding != null)
- {
- this.encoding = ENCODING_EXTERNAL;
- setupDecoding(encoding);
- ignoreEncoding = true;
-
- // ... else autodetect from first bytes.
- }
- else
- {
- detectEncoding();
- ignoreEncoding = false;
- }
-
- // Read any XML or text declaration.
- // If we autodetected, it may tell us the "real" encoding.
- try
- {
- tryEncodingDecl(ignoreEncoding);
- }
- catch (UnsupportedEncodingException x)
- {
- encoding = x.getMessage();
-
- // if we don't handle the declared encoding,
- // try letting a JVM InputStreamReader do it
- try
- {
- if (sourceType != INPUT_STREAM)
- {
- throw x;
- }
-
- is.reset();
- readBufferPos = 0;
- readBufferLength = 0;
- readBufferOverflow = -1;
- line = 1;
- currentByteCount = column = 0;
-
- sourceType = INPUT_READER;
- this.reader = new InputStreamReader(is, encoding);
- is = null;
-
- tryEncodingDecl(true);
-
- }
- catch (IOException e)
- {
- error("unsupported text encoding",
- encoding,
- null);
- }
- }
- }
-
- /**
- * Check for an encoding declaration. This is the second part of the
- * XML encoding autodetection algorithm, relying on detectEncoding to
- * get to the point that this part can read any encoding declaration
- * in the document (using only US-ASCII characters).
- *
- * <p> Because this part starts to fill parser buffers with this data,
- * it's tricky to setup a reader so that Java's built-in decoders can be
- * used for the character encodings that aren't built in to this parser
- * (such as EUC-JP, KOI8-R, Big5, etc).
- *
- * @return any encoding in the declaration, uppercased; or null
- * @see detectEncoding
- */
- private String tryEncodingDecl(boolean ignoreEncoding)
- throws SAXException, IOException
- {
- // Read the XML/text declaration.
- if (tryRead("<?xml"))
- {
- if (tryWhitespace())
- {
- if (inputStack.size() > 0)
- {
- return parseTextDecl(ignoreEncoding);
- }
- else
- {
- return parseXMLDecl(ignoreEncoding);
- }
- }
- else
- {
- // <?xml-stylesheet ...?> or similar
- unread('l');
- unread('m');
- unread('x');
- unread('?');
- unread('<');
- }
- }
- return null;
- }
-
- /**
- * Attempt to detect the encoding of an entity.
- * <p>The trick here (as suggested in the XML standard) is that
- * any entity not in UTF-8, or in UCS-2 with a byte-order mark,
- * <b>must</b> begin with an XML declaration or an encoding
- * declaration; we simply have to look for "&lt;?xml" in various
- * encodings.
- * <p>This method has no way to distinguish among 8-bit encodings.
- * Instead, it sets up for UTF-8, then (possibly) revises its assumption
- * later in setupDecoding (). Any ASCII-derived 8-bit encoding
- * should work, but most will be rejected later by setupDecoding ().
- * @see #tryEncoding (byte[], byte, byte, byte, byte)
- * @see #tryEncoding (byte[], byte, byte)
- * @see #setupDecoding
- */
- private void detectEncoding()
- throws SAXException, IOException
- {
- byte[] signature = new byte[4];
-
- // Read the first four bytes for
- // autodetection.
- is.mark(4);
- is.read(signature);
- is.reset();
-
- //
- // FIRST: four byte encodings (who uses these?)
- //
- if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
- (byte) 0x00, (byte) 0x3c))
- {
- // UCS-4 must begin with "<?xml"
- // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234)
- // "UTF-32BE"
- encoding = ENCODING_UCS_4_1234;
- }
- else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
- (byte) 0x00, (byte) 0x00))
- {
- // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321)
- // "UTF-32LE"
- encoding = ENCODING_UCS_4_4321;
- }
- else if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
- (byte) 0x3c, (byte) 0x00))
- {
- // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143)
- encoding = ENCODING_UCS_4_2143;
- }
- else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
- (byte) 0x00, (byte) 0x00))
- {
- // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421)
- encoding = ENCODING_UCS_4_3412;
-
- // 00 00 fe ff UCS_4_1234 (with BOM)
- // ff fe 00 00 UCS_4_4321 (with BOM)
- }
-
- //
- // SECOND: two byte encodings
- // note ... with 1/14/2000 errata the XML spec identifies some
- // more "broken UTF-16" autodetection cases, with no XML decl,
- // which we don't handle here (that's legal too).
- //
- else if (tryEncoding(signature, (byte) 0xfe, (byte) 0xff))
- {
- // UCS-2 with a byte-order marker. (UTF-16)
- // 0xfe 0xff: UCS-2, big-endian (12)
- encoding = ENCODING_UCS_2_12;
- is.read(); is.read();
- }
- else if (tryEncoding(signature, (byte) 0xff, (byte) 0xfe))
- {
- // UCS-2 with a byte-order marker. (UTF-16)
- // 0xff 0xfe: UCS-2, little-endian (21)
- encoding = ENCODING_UCS_2_21;
- is.read(); is.read();
- }
- else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
- (byte) 0x00, (byte) 0x3f))
- {
- // UTF-16BE (otherwise, malformed UTF-16)
- // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark
- encoding = ENCODING_UCS_2_12;
- error("no byte-order mark for UCS-2 entity");
- }
- else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
- (byte) 0x3f, (byte) 0x00))
- {
- // UTF-16LE (otherwise, malformed UTF-16)
- // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark
- encoding = ENCODING_UCS_2_21;
- error("no byte-order mark for UCS-2 entity");
- }
-
- //
- // THIRD: ASCII-derived encodings, fixed and variable lengths
- //
- else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x3f,
- (byte) 0x78, (byte) 0x6d))
- {
- // ASCII derived
- // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING)
- encoding = ENCODING_UTF_8;
- prefetchASCIIEncodingDecl();
- }
- else if (signature[0] == (byte) 0xef
- && signature[1] == (byte) 0xbb
- && signature[2] == (byte) 0xbf)
- {
- // 0xef 0xbb 0xbf: UTF-8 BOM (not part of document text)
- // this un-needed notion slipped into XML 2nd ed through a
- // "non-normative" erratum; now required by MSFT and UDDI,
- // and E22 made it normative.
- encoding = ENCODING_UTF_8;
- is.read(); is.read(); is.read();
- }
- else
- {
- // 4c 6f a7 94 ... we don't understand EBCDIC flavors
- // ... but we COULD at least kick in some fixed code page
-
- // (default) UTF-8 without encoding/XML declaration
- encoding = ENCODING_UTF_8;
- }
- }
-
- /**
- * Check for a four-byte signature.
- * <p>Utility routine for detectEncoding ().
- * <p>Always looks for some part of "<?XML" in a specific encoding.
- * @param sig The first four bytes read.
- * @param b1 The first byte of the signature
- * @param b2 The second byte of the signature
- * @param b3 The third byte of the signature
- * @param b4 The fourth byte of the signature
- * @see #detectEncoding
- */
- private static boolean tryEncoding(byte[] sig, byte b1, byte b2,
- byte b3, byte b4)
- {
- return (sig[0] == b1 && sig[1] == b2
- && sig[2] == b3 && sig[3] == b4);
- }
-
- /**
- * Check for a two-byte signature.
- * <p>Looks for a UCS-2 byte-order mark.
- * <p>Utility routine for detectEncoding ().
- * @param sig The first four bytes read.
- * @param b1 The first byte of the signature
- * @param b2 The second byte of the signature
- * @see #detectEncoding
- */
- private static boolean tryEncoding(byte[] sig, byte b1, byte b2)
- {
- return ((sig[0] == b1) && (sig[1] == b2));
- }
-
- /**
- * This method pushes a string back onto input.
- * <p>It is useful either as the expansion of an internal entity,
- * or for backtracking during the parse.
- * <p>Call pushCharArray () to do the actual work.
- * @param s The string to push back onto input.
- * @see #pushCharArray
- */
- private void pushString(String ename, String s)
- throws SAXException
- {
- char[] ch = s.toCharArray();
- pushCharArray(ename, ch, 0, ch.length);
- }
-
- /**
- * Push a new internal input source.
- * <p>This method is useful for expanding an internal entity,
- * or for unreading a string of characters. It creates a new
- * readBuffer containing the characters in the array, instead
- * of characters converted from an input byte stream.
- * @param ch The char array to push.
- * @see #pushString
- * @see #pushURL
- * @see #readBuffer
- * @see #sourceType
- * @see #pushInput
- */
- private void pushCharArray(String ename, char[] ch, int start, int length)
- throws SAXException
- {
- // Push the existing status
- pushInput(ename);
- if (ename != null && doReport)
- {
- dataBufferFlush();
- handler.startInternalEntity(ename);
- }
- sourceType = INPUT_INTERNAL;
- readBuffer = ch;
- readBufferPos = start;
- readBufferLength = length;
- readBufferOverflow = -1;
- }
-
- /**
- * Save the current input source onto the stack.
- * <p>This method saves all of the global variables associated with
- * the current input source, so that they can be restored when a new
- * input source has finished. It also tests for entity recursion.
- * <p>The method saves the following global variables onto a stack
- * using a fixed-length array:
- * <ol>
- * <li>sourceType
- * <li>externalEntity
- * <li>readBuffer
- * <li>readBufferPos
- * <li>readBufferLength
- * <li>line
- * <li>encoding
- * </ol>
- * @param ename The name of the entity (if any) causing the new input.
- * @see #popInput
- * @see #sourceType
- * @see #externalEntity
- * @see #readBuffer
- * @see #readBufferPos
- * @see #readBufferLength
- * @see #line
- * @see #encoding
- */
- private void pushInput(String ename)
- throws SAXException
- {
- // Check for entity recursion.
- if (ename != null)
- {
- Iterator entities = entityStack.iterator();
- while (entities.hasNext())
- {
- String e = (String) entities.next();
- if (e != null && e == ename)
- {
- error("recursive reference to entity", ename, null);
- }
- }
- }
- entityStack.addLast(ename);
-
- // Don't bother if there is no current input.
- if (sourceType == INPUT_NONE)
- {
- return;
- }
-
- // Set up a snapshot of the current
- // input source.
- Input input = new Input();
-
- input.sourceType = sourceType;
- input.externalEntity = externalEntity;
- input.readBuffer = readBuffer;
- input.readBufferPos = readBufferPos;
- input.readBufferLength = readBufferLength;
- input.line = line;
- input.encoding = encoding;
- input.readBufferOverflow = readBufferOverflow;
- input.is = is;
- input.currentByteCount = currentByteCount;
- input.column = column;
- input.reader = reader;
-
- // Push it onto the stack.
- inputStack.addLast(input);
- }
-
- /**
- * Restore a previous input source.
- * <p>This method restores all of the global variables associated with
- * the current input source.
- * @exception java.io.EOFException
- * If there are no more entries on the input stack.
- * @see #pushInput
- * @see #sourceType
- * @see #externalEntity
- * @see #readBuffer
- * @see #readBufferPos
- * @see #readBufferLength
- * @see #line
- * @see #encoding
- */
- private void popInput()
- throws SAXException, IOException
- {
- String ename = (String) entityStack.removeLast();
-
- if (ename != null && doReport)
- {
- dataBufferFlush();
- }
- switch (sourceType)
- {
- case INPUT_STREAM:
- handler.endExternalEntity(ename);
- is.close();
- break;
- case INPUT_READER:
- handler.endExternalEntity(ename);
- reader.close();
- break;
- case INPUT_INTERNAL:
- if (ename != null && doReport)
- {
- handler.endInternalEntity(ename);
- }
- break;
- }
-
- // Throw an EOFException if there
- // is nothing else to pop.
- if (inputStack.isEmpty())
- {
- throw new EOFException("no more input");
- }
-
- Input input = (Input) inputStack.removeLast();
-
- sourceType = input.sourceType;
- externalEntity = input.externalEntity;
- readBuffer = input.readBuffer;
- readBufferPos = input.readBufferPos;
- readBufferLength = input.readBufferLength;
- line = input.line;
- encoding = input.encoding;
- readBufferOverflow = input.readBufferOverflow;
- is = input.is;
- currentByteCount = input.currentByteCount;
- column = input.column;
- reader = input.reader;
- }
-
- /**
- * Return true if we can read the expected character.
- * <p>Note that the character will be removed from the input stream
- * on success, but will be put back on failure. Do not attempt to
- * read the character again if the method succeeds.
- * @param delim The character that should appear next. For a
- * insensitive match, you must supply this in upper-case.
- * @return true if the character was successfully read, or false if
- * it was not.
- * @see #tryRead (String)
- */
- private boolean tryRead(char delim)
- throws SAXException, IOException
- {
- char c;
-
- // Read the character
- c = readCh();
-
- // Test for a match, and push the character
- // back if the match fails.
- if (c == delim)
- {
- return true;
- }
- else
- {
- unread(c);
- return false;
- }
- }
-
- /**
- * Return true if we can read the expected string.
- * <p>This is simply a convenience method.
- * <p>Note that the string will be removed from the input stream
- * on success, but will be put back on failure. Do not attempt to
- * read the string again if the method succeeds.
- * <p>This method will push back a character rather than an
- * array whenever possible (probably the majority of cases).
- * @param delim The string that should appear next.
- * @return true if the string was successfully read, or false if
- * it was not.
- * @see #tryRead (char)
- */
- private boolean tryRead(String delim)
- throws SAXException, IOException
- {
- return tryRead(delim.toCharArray());
- }
-
- private boolean tryRead(char[] ch)
- throws SAXException, IOException
- {
- char c;
-
- // Compare the input, character-
- // by character.
-
- for (int i = 0; i < ch.length; i++)
- {
- c = readCh();
- if (c != ch[i])
- {
- unread(c);
- if (i != 0)
- {
- unread(ch, i);
- }
- return false;
- }
- }
- return true;
- }
-
- /**
- * Return true if we can read some whitespace.
- * <p>This is simply a convenience method.
- * <p>This method will push back a character rather than an
- * array whenever possible (probably the majority of cases).
- * @return true if whitespace was found.
- */
- private boolean tryWhitespace()
- throws SAXException, IOException
- {
- char c;
- c = readCh();
- if (isWhitespace(c))
- {
- skipWhitespace();
- return true;
- }
- else
- {
- unread(c);
- return false;
- }
- }
-
- /**
- * Read all data until we find the specified string.
- * This is useful for scanning CDATA sections and PIs.
- * <p>This is inefficient right now, since it calls tryRead ()
- * for every character.
- * @param delim The string delimiter
- * @see #tryRead (String, boolean)
- * @see #readCh
- */
- private void parseUntil(String delim)
- throws SAXException, IOException
- {
- parseUntil(delim.toCharArray());
- }
-
- private void parseUntil(char[] delim)
- throws SAXException, IOException
- {
- char c;
- int startLine = line;
-
- try
- {
- while (!tryRead(delim))
- {
- c = readCh();
- dataBufferAppend(c);
- }
- }
- catch (EOFException e)
- {
- error("end of input while looking for delimiter "
- + "(started on line " + startLine
- + ')', null, new String(delim));
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // Low-level I/O.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Prefetch US-ASCII XML/text decl from input stream into read buffer.
- * Doesn't buffer more than absolutely needed, so that when an encoding
- * decl says we need to create an InputStreamReader, we can discard our
- * buffer and reset(). Caller knows the first chars of the decl exist
- * in the input stream.
- */
- private void prefetchASCIIEncodingDecl()
- throws SAXException, IOException
- {
- int ch;
- readBufferPos = readBufferLength = 0;
-
- is.mark(readBuffer.length);
- while (true)
- {
- ch = is.read();
- readBuffer[readBufferLength++] = (char) ch;
- switch (ch)
- {
- case (int) '>':
- return;
- case -1:
- error("file ends before end of XML or encoding declaration.",
- null, "?>");
- }
- if (readBuffer.length == readBufferLength)
- {
- error("unfinished XML or encoding declaration");
- }
- }
- }
-
- /**
- * Read a chunk of data from an external input source.
- * <p>This is simply a front-end that fills the rawReadBuffer
- * with bytes, then calls the appropriate encoding handler.
- * @see #encoding
- * @see #rawReadBuffer
- * @see #readBuffer
- * @see #filterCR
- * @see #copyUtf8ReadBuffer
- * @see #copyIso8859_1ReadBuffer
- * @see #copyUcs_2ReadBuffer
- * @see #copyUcs_4ReadBuffer
- */
- private void readDataChunk()
- throws SAXException, IOException
- {
- int count;
-
- // See if we have any overflow (filterCR sets for CR at end)
- if (readBufferOverflow > -1)
- {
- readBuffer[0] = (char) readBufferOverflow;
- readBufferOverflow = -1;
- readBufferPos = 1;
- sawCR = true;
- }
- else
- {
- readBufferPos = 0;
- sawCR = false;
- }
-
- // input from a character stream.
- if (sourceType == INPUT_READER)
- {
- count = reader.read(readBuffer,
- readBufferPos, READ_BUFFER_MAX - readBufferPos);
- if (count < 0)
- {
- readBufferLength = readBufferPos;
- }
- else
- {
- readBufferLength = readBufferPos + count;
- }
- if (readBufferLength > 0)
- {
- filterCR(count >= 0);
- }
- sawCR = false;
- return;
- }
-
- // Read as many bytes as possible into the raw buffer.
- count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX);
-
- // Dispatch to an encoding-specific reader method to populate
- // the readBuffer. In most parser speed profiles, these routines
- // show up at the top of the CPU usage chart.
- if (count > 0)
- {
- switch (encoding)
- {
- // one byte builtins
- case ENCODING_ASCII:
- copyIso8859_1ReadBuffer(count, (char) 0x0080);
- break;
- case ENCODING_UTF_8:
- copyUtf8ReadBuffer(count);
- break;
- case ENCODING_ISO_8859_1:
- copyIso8859_1ReadBuffer(count, (char) 0);
- break;
-
- // two byte builtins
- case ENCODING_UCS_2_12:
- copyUcs2ReadBuffer(count, 8, 0);
- break;
- case ENCODING_UCS_2_21:
- copyUcs2ReadBuffer(count, 0, 8);
- break;
-
- // four byte builtins
- case ENCODING_UCS_4_1234:
- copyUcs4ReadBuffer(count, 24, 16, 8, 0);
- break;
- case ENCODING_UCS_4_4321:
- copyUcs4ReadBuffer(count, 0, 8, 16, 24);
- break;
- case ENCODING_UCS_4_2143:
- copyUcs4ReadBuffer(count, 16, 24, 0, 8);
- break;
- case ENCODING_UCS_4_3412:
- copyUcs4ReadBuffer(count, 8, 0, 24, 16);
- break;
- }
- }
- else
- {
- readBufferLength = readBufferPos;
- }
-
- readBufferPos = 0;
-
- // Filter out all carriage returns if we've seen any
- // (including any saved from a previous read)
- if (sawCR)
- {
- filterCR(count >= 0);
- sawCR = false;
-
- // must actively report EOF, lest some CRs get lost.
- if (readBufferLength == 0 && count >= 0)
- {
- readDataChunk();
- }
- }
-
- if (count > 0)
- {
- currentByteCount += count;
- }
- }
-
- /**
- * Filter carriage returns in the read buffer.
- * CRLF becomes LF; CR becomes LF.
- * @param moreData true iff more data might come from the same source
- * @see #readDataChunk
- * @see #readBuffer
- * @see #readBufferOverflow
- */
- private void filterCR(boolean moreData)
- {
- int i, j;
-
- readBufferOverflow = -1;
-
-loop:
- for (i = j = readBufferPos; j < readBufferLength; i++, j++)
- {
- switch (readBuffer[j])
- {
- case '\r':
- if (j == readBufferLength - 1)
- {
- if (moreData)
- {
- readBufferOverflow = '\r';
- readBufferLength--;
- }
- else // CR at end of buffer
- {
- readBuffer[i++] = '\n';
- }
- break loop;
- }
- else if (readBuffer[j + 1] == '\n')
- {
- j++;
- }
- readBuffer[i] = '\n';
- break;
-
- case '\n':
- default:
- readBuffer[i] = readBuffer[j];
- break;
- }
- }
- readBufferLength = i;
- }
-
- /**
- * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
- * <p>When readDataChunk () calls this method, the raw bytes are in
- * rawReadBuffer, and the final characters will appear in
- * readBuffer.
- * <p>Note that as of Unicode 3.1, good practice became a requirement,
- * so that each Unicode character has exactly one UTF-8 representation.
- * @param count The number of bytes to convert.
- * @see #readDataChunk
- * @see #rawReadBuffer
- * @see #readBuffer
- * @see #getNextUtf8Byte
- */
- private void copyUtf8ReadBuffer(int count)
- throws SAXException, IOException
- {
- int i = 0;
- int j = readBufferPos;
- int b1;
- char c = 0;
-
- /*
- // check once, so the runtime won't (if it's smart enough)
- if (count < 0 || count > rawReadBuffer.length)
- throw new ArrayIndexOutOfBoundsException (Integer.toString (count));
- */
-
- while (i < count)
- {
- b1 = rawReadBuffer[i++];
-
- // Determine whether we are dealing
- // with a one-, two-, three-, or four-
- // byte sequence.
- if (b1 < 0)
- {
- if ((b1 & 0xe0) == 0xc0)
- {
- // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
- c = (char) (((b1 & 0x1f) << 6)
- | getNextUtf8Byte(i++, count));
- if (c < 0x0080)
- {
- encodingError("Illegal two byte UTF-8 sequence",
- c, 0);
- }
-
- //Sec 2.11
- // [1] the two-character sequence #xD #xA
- // [2] the two-character sequence #xD #x85
- if ((c == 0x0085 || c == 0x000a) && sawCR)
- {
- continue;
- }
-
- // Sec 2.11
- // [3] the single character #x85
-
- if (c == 0x0085 && xmlVersion == XML_11)
- {
- readBuffer[j++] = '\r';
- }
- }
- else if ((b1 & 0xf0) == 0xe0)
- {
- // 3-byte sequence:
- // zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
- // most CJKV characters
- c = (char) (((b1 & 0x0f) << 12) |
- (getNextUtf8Byte(i++, count) << 6) |
- getNextUtf8Byte(i++, count));
- //sec 2.11
- //[4] the single character #x2028
- if (c == 0x2028 && xmlVersion == XML_11)
- {
- readBuffer[j++] = '\r';
- sawCR = true;
- continue;
- }
- if (c < 0x0800 || (c >= 0xd800 && c <= 0xdfff))
- {
- encodingError("Illegal three byte UTF-8 sequence",
- c, 0);
- }
- }
- else if ((b1 & 0xf8) == 0xf0)
- {
- // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
- // = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
- // (uuuuu = wwww + 1)
- // "Surrogate Pairs" ... from the "Astral Planes"
- // Unicode 3.1 assigned the first characters there
- int iso646 = b1 & 07;
- iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
- iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
- iso646 = (iso646 << 6) + getNextUtf8Byte(i++, count);
-
- if (iso646 <= 0xffff)
- {
- encodingError("Illegal four byte UTF-8 sequence",
- iso646, 0);
- }
- else
- {
- if (iso646 > 0x0010ffff)
- {
- encodingError("UTF-8 value out of range for Unicode",
- iso646, 0);
- }
- iso646 -= 0x010000;
- readBuffer[j++] = (char) (0xd800 | (iso646 >> 10));
- readBuffer[j++] = (char) (0xdc00 | (iso646 & 0x03ff));
- continue;
- }
- }
- else
- {
- // The five and six byte encodings aren't supported;
- // they exceed the Unicode (and XML) range.
- encodingError("unsupported five or six byte UTF-8 sequence",
- 0xff & b1, i);
- // NOTREACHED
- c = 0;
- }
- }
- else
- {
- // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
- // (US-ASCII character, "common" case, one branch to here)
- c = (char) b1;
- }
- readBuffer[j++] = c;
- if (c == '\r')
- {
- sawCR = true;
- }
- }
- // How many characters have we read?
- readBufferLength = j;
- }
-
- /**
- * Return the next byte value in a UTF-8 sequence.
- * If it is not possible to get a byte from the current
- * entity, throw an exception.
- * @param pos The current position in the rawReadBuffer.
- * @param count The number of bytes in the rawReadBuffer
- * @return The significant six bits of a non-initial byte in
- * a UTF-8 sequence.
- * @exception EOFException If the sequence is incomplete.
- */
- private int getNextUtf8Byte(int pos, int count)
- throws SAXException, IOException
- {
- int val;
-
- // Take a character from the buffer
- // or from the actual input stream.
- if (pos < count)
- {
- val = rawReadBuffer[pos];
- }
- else
- {
- val = is.read();
- if (val == -1)
- {
- encodingError("unfinished multi-byte UTF-8 sequence at EOF",
- -1, pos);
- }
- }
-
- // Check for the correct bits at the start.
- if ((val & 0xc0) != 0x80)
- {
- encodingError("bad continuation of multi-byte UTF-8 sequence",
- val, pos + 1);
- }
-
- // Return the significant bits.
- return (val & 0x3f);
- }
-
- /**
- * Convert a buffer of US-ASCII or ISO-8859-1-encoded bytes into
- * UTF-16 characters.
- *
- * <p>When readDataChunk () calls this method, the raw bytes are in
- * rawReadBuffer, and the final characters will appear in
- * readBuffer.
- *
- * @param count The number of bytes to convert.
- * @param mask For ASCII conversion, 0x7f; else, 0xff.
- * @see #readDataChunk
- * @see #rawReadBuffer
- * @see #readBuffer
- */
- private void copyIso8859_1ReadBuffer(int count, char mask)
- throws IOException
- {
- int i, j;
- for (i = 0, j = readBufferPos; i < count; i++, j++)
- {
- char c = (char) (rawReadBuffer[i] & 0xff);
- if ((c & mask) != 0)
- {
- throw new CharConversionException("non-ASCII character U+"
- + Integer.toHexString(c));
- }
- if (c == 0x0085 && xmlVersion == XML_11)
- {
- c = '\r';
- }
- readBuffer[j] = c;
- if (c == '\r')
- {
- sawCR = true;
- }
- }
- readBufferLength = j;
- }
-
- /**
- * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters
- * (as used in Java string manipulation).
- *
- * <p>When readDataChunk () calls this method, the raw bytes are in
- * rawReadBuffer, and the final characters will appear in
- * readBuffer.
- * @param count The number of bytes to convert.
- * @param shift1 The number of bits to shift byte 1.
- * @param shift2 The number of bits to shift byte 2
- * @see #readDataChunk
- * @see #rawReadBuffer
- * @see #readBuffer
- */
- private void copyUcs2ReadBuffer(int count, int shift1, int shift2)
- throws SAXException
- {
- int j = readBufferPos;
-
- if (count > 0 && (count % 2) != 0)
- {
- encodingError("odd number of bytes in UCS-2 encoding", -1, count);
- }
- // The loops are faster with less internal brancing; hence two
- if (shift1 == 0)
- { // "UTF-16-LE"
- for (int i = 0; i < count; i += 2)
- {
- char c = (char) (rawReadBuffer[i + 1] << 8);
- c |= 0xff & rawReadBuffer[i];
- readBuffer[j++] = c;
- if (c == '\r')
- {
- sawCR = true;
- }
- }
- }
- else
- { // "UTF-16-BE"
- for (int i = 0; i < count; i += 2)
- {
- char c = (char) (rawReadBuffer[i] << 8);
- c |= 0xff & rawReadBuffer[i + 1];
- readBuffer[j++] = c;
- if (c == '\r')
- {
- sawCR = true;
- }
- }
- }
- readBufferLength = j;
- }
-
- /**
- * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
- *
- * <p>When readDataChunk () calls this method, the raw bytes are in
- * rawReadBuffer, and the final characters will appear in
- * readBuffer.
- * <p>Java has Unicode chars, and this routine uses surrogate pairs
- * for ISO-10646 values between 0x00010000 and 0x000fffff. An
- * exception is thrown if the ISO-10646 character has no Unicode
- * representation.
- *
- * @param count The number of bytes to convert.
- * @param shift1 The number of bits to shift byte 1.
- * @param shift2 The number of bits to shift byte 2
- * @param shift3 The number of bits to shift byte 2
- * @param shift4 The number of bits to shift byte 2
- * @see #readDataChunk
- * @see #rawReadBuffer
- * @see #readBuffer
- */
- private void copyUcs4ReadBuffer(int count, int shift1, int shift2,
- int shift3, int shift4)
- throws SAXException
- {
- int j = readBufferPos;
-
- if (count > 0 && (count % 4) != 0)
- {
- encodingError("number of bytes in UCS-4 encoding " +
- "not divisible by 4",
- -1, count);
- }
- for (int i = 0; i < count; i += 4)
- {
- int value = (((rawReadBuffer [i] & 0xff) << shift1) |
- ((rawReadBuffer [i + 1] & 0xff) << shift2) |
- ((rawReadBuffer [i + 2] & 0xff) << shift3) |
- ((rawReadBuffer [i + 3] & 0xff) << shift4));
- if (value < 0x0000ffff)
- {
- readBuffer [j++] = (char) value;
- if (value == (int) '\r')
- {
- sawCR = true;
- }
- }
- else if (value < 0x0010ffff)
- {
- value -= 0x010000;
- readBuffer[j++] = (char) (0xd8 | ((value >> 10) & 0x03ff));
- readBuffer[j++] = (char) (0xdc | (value & 0x03ff));
- }
- else
- {
- encodingError("UCS-4 value out of range for Unicode",
- value, i);
- }
- }
- readBufferLength = j;
- }
-
- /**
- * Report a character encoding error.
- */
- private void encodingError(String message, int value, int offset)
- throws SAXException
- {
- if (value != -1)
- {
- message = message + " (character code: 0x" +
- Integer.toHexString(value) + ')';
- error(message);
- }
- }
-
- //////////////////////////////////////////////////////////////////////
- // Local Variables.
- //////////////////////////////////////////////////////////////////////
-
- /**
- * Re-initialize the variables for each parse.
- */
- private void initializeVariables()
- {
- // First line
- line = 1;
- column = 0;
-
- // Set up the buffers for data and names
- dataBufferPos = 0;
- dataBuffer = new char[DATA_BUFFER_INITIAL];
- nameBufferPos = 0;
- nameBuffer = new char[NAME_BUFFER_INITIAL];
-
- // Set up the DTD hash tables
- elementInfo = new HashMap();
- entityInfo = new HashMap();
- notationInfo = new HashMap();
- skippedPE = false;
-
- // Set up the variables for the current
- // element context.
- currentElement = null;
- currentElementContent = CONTENT_UNDECLARED;
-
- // Set up the input variables
- sourceType = INPUT_NONE;
- inputStack = new LinkedList();
- entityStack = new LinkedList();
- externalEntity = null;
- tagAttributePos = 0;
- tagAttributes = new String[100];
- rawReadBuffer = new byte[READ_BUFFER_MAX];
- readBufferOverflow = -1;
-
- scratch = new InputSource();
-
- inLiteral = false;
- expandPE = false;
- peIsError = false;
-
- doReport = false;
-
- inCDATA = false;
-
- symbolTable = new Object[SYMBOL_TABLE_LENGTH][];
- }
-
- static class ExternalIdentifiers
- {
-
- String publicId;
- String systemId;
- String baseUri;
-
- ExternalIdentifiers()
- {
- }
-
- ExternalIdentifiers(String publicId, String systemId, String baseUri)
- {
- this.publicId = publicId;
- this.systemId = systemId;
- this.baseUri = baseUri;
- }
-
- }
-
- static class EntityInfo
- {
-
- int type;
- ExternalIdentifiers ids;
- String value;
- String notationName;
-
- }
-
- static class AttributeDecl
- {
-
- String type;
- String value;
- int valueType;
- String enumeration;
- String defaultValue;
-
- }
-
- static class ElementDecl
- {
-
- int contentType;
- String contentModel;
- HashMap attributes;
-
- }
-
- static class Input
- {
-
- int sourceType;
- URLConnection externalEntity;
- char[] readBuffer;
- int readBufferPos;
- int readBufferLength;
- int line;
- int encoding;
- int readBufferOverflow;
- InputStream is;
- int currentByteCount;
- int column;
- Reader reader;
-
- }
-
-}
diff --git a/libjava/classpath/gnu/xml/aelfred2/XmlReader.java b/libjava/classpath/gnu/xml/aelfred2/XmlReader.java
deleted file mode 100644
index e0a0476..0000000
--- a/libjava/classpath/gnu/xml/aelfred2/XmlReader.java
+++ /dev/null
@@ -1,373 +0,0 @@
-/* XmlReader.java --
- Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING. If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library. Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module. An independent module is a module which is not derived from
-or based on this library. If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so. If you do not wish to do so, delete this
-exception statement from your version. */
-
-package gnu.xml.aelfred2;
-
-import java.io.IOException;
-import java.util.Locale;
-
-import org.xml.sax.*;
-import org.xml.sax.ext.*;
-
-import gnu.xml.pipeline.EventFilter;
-import gnu.xml.pipeline.ValidationConsumer;
-
-
-/**
- * This SAX2 parser optionally layers a validator over the &AElig;lfred2
- * SAX2 parser. While this will not evaluate every XML validity constraint,
- * it does support all the validity constraints that are of any real utility
- * outside the strict SGML-compatible world. See the documentation for the
- * SAXDriver class for information about the SAX2 features and properties
- * that are supported, and documentation for the ValidationConsumer for
- * information about what validity constraints may not be supported.
- * (&AElig;lfred2 tests some of those, even in non-validating mode, to
- * achieve better conformance.)
- *
- * <p> Note that due to its internal construction, you can't change most
- * handlers until parse() returns. This diverges slightly from SAX, which
- * expects later binding to be supported. Early binding involves less
- * runtime overhead, which is an issue for event pipelines as used inside
- * this parser. Rather than relying on the parser to handle late binding
- * to your own handlers, do it yourself.
- *
- * @see SAXDriver
- * @see gnu.xml.pipeline.ValidationConsumer
- *
- * @author David Brownell
- */
-public final class XmlReader
- implements XMLReader
-{
-
- static class FatalErrorHandler
- extends DefaultHandler2
- {
-
- public void error(SAXParseException e)
- throws SAXException
- {
- throw e;
- }
-
- }
-
- private SAXDriver aelfred2 = new SAXDriver();
- private EventFilter filter = new EventFilter();
- private boolean isValidating;
- private boolean active;
-
- /**
- * Constructs a SAX Parser.
- */
- public XmlReader()
- {
- }
-
- /**
- * Constructs a SAX Parser, optionally treating validity errors
- * as if they were fatal errors.
- */
- public XmlReader(boolean invalidIsFatal)
- {
- if (invalidIsFatal)
- {
- setErrorHandler(new FatalErrorHandler());
- }
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to report the logical
- * content of an XML document.
- */
- public ContentHandler getContentHandler()
- {
- return filter.getContentHandler();
- }
-
- /**
- * <b>SAX2</b>: Assigns the object used to report the logical
- * content of an XML document.
- * @exception IllegalStateException if called mid-parse
- */
- public void setContentHandler(ContentHandler handler)
- {
- if (active)
- {
- throw new IllegalStateException("already parsing");
- }
- filter.setContentHandler(handler);
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to process declarations related
- * to notations and unparsed entities.
- */
- public DTDHandler getDTDHandler()
- {
- return filter.getDTDHandler();
- }
-
- /**
- * <b>SAX1</b> Assigns DTD handler
- * @exception IllegalStateException if called mid-parse
- */
- public void setDTDHandler(DTDHandler handler)
- {
- if (active)
- {
- throw new IllegalStateException("already parsing");
- }
- filter.setDTDHandler(handler);
- }
-
- /**
- * <b>SAX2</b>: Returns the object used when resolving external
- * entities during parsing (both general and parameter entities).
- */
- public EntityResolver getEntityResolver()
- {
- return aelfred2.getEntityResolver();
- }
-
- /**
- * <b>SAX1</b> Assigns parser's entity resolver
- */
- public void setEntityResolver(EntityResolver handler)
- {
- aelfred2.setEntityResolver(handler);
- }
-
- /**
- * <b>SAX2</b>: Returns the object used to receive callbacks for XML
- * errors of all levels (fatal, nonfatal, warning); this is never null;
- */
- public ErrorHandler getErrorHandler()
- {
- return aelfred2.getErrorHandler();
- }
-
- /**
- * <b>SAX1</b> Assigns error handler
- * @exception IllegalStateException if called mid-parse
- */
- public void setErrorHandler(ErrorHandler handler)
- {
- if (active)
- {
- throw new IllegalStateException("already parsing");
- }
- aelfred2.setErrorHandler(handler);
- }
-
- /**
- * <b>SAX2</b>: Assigns the specified property.
- * @exception IllegalStateException if called mid-parse
- */
- public void setProperty(String propertyId, Object value)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- if (active)
- {
- throw new IllegalStateException("already parsing");
- }
- if (getProperty(propertyId) != value)
- {
- filter.setProperty(propertyId, value);
- }
- }
-
- /**
- * <b>SAX2</b>: Returns the specified property.
- */
- public Object getProperty(String propertyId)
- throws SAXNotRecognizedException
- {
- if ((SAXDriver.PROPERTY + "declaration-handler").equals(propertyId)
- || (SAXDriver.PROPERTY + "lexical-handler").equals(propertyId))
- {
- return filter.getProperty(propertyId);
- }
- throw new SAXNotRecognizedException(propertyId);
- }
-
- private void forceValidating()
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes",
- true);
- aelfred2.setFeature(SAXDriver.FEATURE + "external-general-entities",
- true);
- aelfred2.setFeature(SAXDriver.FEATURE + "external-parameter-entities",
- true);
- }
-
- /**
- * <b>SAX2</b>: Sets the state of features supported in this parser.
- * Note that this parser requires reporting of namespace prefixes when
- * validating.
- */
- public void setFeature(String featureId, boolean state)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- boolean value = getFeature(featureId);
-
- if (state == value)
- {
- return;
- }
-
- if ((SAXDriver.FEATURE + "validation").equals(featureId))
- {
- if (active)
- {
- throw new SAXNotSupportedException("already parsing");
- }
- if (state)
- {
- forceValidating();
- }
- isValidating = state;
- }
- else
- {
- aelfred2.setFeature(featureId, state);
- }
- }
-
- /**
- * <b>SAX2</b>: Tells whether this parser supports the specified feature.
- * At this time, this directly parallels the underlying SAXDriver,
- * except that validation is optionally supported.
- *
- * @see SAXDriver
- */
- public boolean getFeature(String featureId)
- throws SAXNotRecognizedException, SAXNotSupportedException
- {
- if ((SAXDriver.FEATURE + "validation").equals(featureId))
- {
- return isValidating;
- }
-
- return aelfred2.getFeature(featureId);
- }
-
- /**
- * <b>SAX1</b>: Sets the locale used for diagnostics; currently,
- * only locales using the English language are supported.
- * @param locale The locale for which diagnostics will be generated
- */
- public void setLocale(Locale locale)
- throws SAXException
- {
- aelfred2.setLocale(locale);
- }
-
- /**
- * <b>SAX1</b>: Preferred API to parse an XML document, using a
- * system identifier (URI).
- */
- public void parse(String systemId)
- throws SAXException, IOException
- {
- parse(new InputSource(systemId));
- }
-
- /**
- * <b>SAX1</b>: Underlying API to parse an XML document, used
- * directly when no URI is available. When this is invoked,
- * and the parser is set to validate, some features will be
- * automatically reset to appropriate values: for reporting
- * namespace prefixes, and incorporating external entities.
- *
- * @param source The XML input source.
- *
- * @exception IllegalStateException if called mid-parse
- * @exception SAXException The handlers may throw any SAXException,
- * and the parser normally throws SAXParseException objects.
- * @exception IOException IOExceptions are normally through through
- * the parser if there are problems reading the source document.
- */
- public void parse(InputSource source)
- throws SAXException, IOException
- {
- EventFilter next;
- boolean nsdecls;
-
- synchronized (aelfred2)
- {
- if (active)
- {
- throw new IllegalStateException("already parsing");
- }
- active = true;
- }
-
- // set up the output pipeline
- if (isValidating)
- {
- forceValidating();
- next = new ValidationConsumer(filter);
- }
- else
- {
- next = filter;
- }
-
- // connect pipeline and error handler
- // don't let _this_ call to bind() affect xmlns* attributes
- nsdecls = aelfred2.getFeature(SAXDriver.FEATURE + "namespace-prefixes");
- EventFilter.bind(aelfred2, next);
- if (!nsdecls)
- {
- aelfred2.setFeature(SAXDriver.FEATURE + "namespace-prefixes",
- false);
- }
-
- // parse, clean up
- try
- {
- aelfred2.parse(source);
- }
- finally
- {
- active = false;
- }
- }
-
-}
diff --git a/libjava/classpath/gnu/xml/aelfred2/package.html b/libjava/classpath/gnu/xml/aelfred2/package.html
deleted file mode 100644
index e204258..0000000
--- a/libjava/classpath/gnu/xml/aelfred2/package.html
+++ /dev/null
@@ -1,506 +0,0 @@
-<!DOCTYPE html PUBLIC
- '-//W3C//DTD XHTML 1.0 Transitional//EN'
- 'http://www.w3.org/TR/xhtml1/DTD/transitional.dtd'>
-
-<html><head>
- <title>package overview</title>
-<!--
-/*
- * Copyright (C) 1999,2000,2001 The Free Software Foundation, Inc.
- */
--->
-</head><body>
-
-<p> This package contains &AElig;lfred2, which includes an
-enhanced SAX2-compatible version of the &AElig;lfred
-non-validating XML parser, a modular (and hence optional)
-DTD validating parser, and modular (and hence optional)
-JAXP glue to those.
-Use these like any other SAX2 parsers. </p>
-
-<ul>
- <li><a href="#about">About &AElig;lfred</a><ul>
- <li><a href="#principles">Design Principles</a></li>
- <li><a href="#name">About the Name &AElig;lfred</a></li>
- <li><a href="#encodings">Character Encodings</a></li>
- <li><a href="#violations">Known Conformance Violations</a></li>
- <li><a href="#copyright">Licensing</a></li>
- </ul></li>
-
- <li><a href="#changes">Changes Since the Last Microstar Release</a><ul>
- <li><a href="#sax2">SAX2 Support</a></li>
- <li><a href="#validation">Validation</a></li>
- <li><a href="#smaller">You Want Smaller?</a></li>
- <li><a href="#bugfixes">Bugs Fixed</a></li>
- </ul></li>
-
-</ul>
-
-<h2><a name="about">About &AElig;lfred</a></h2>
-
-<p>&AElig;lfred is a XML parser written in the java programming language.
-
-<h3><a name="principles">Design Principles</a></h3>
-
-<p>In most Java applets and applications, XML should not be the central
-feature; instead, XML is the means to another end, such as loading
-configuration information, reading meta-data, or parsing transactions.</p>
-
-<p> When an XML parser is only a single component of a much larger
-program, it cannot be large, slow, or resource-intensive. With Java
-applets, in particular, code size is a significant issue. The standard
-modem is still not operating at 56 Kbaud, or sometimes even with data
-compression. Assuming an uncompressed 28.8 Kbaud modem, only about
-3 KBytes can be downloaded in one second; compression often doubles
-that speed, but a V.90 modem may not provide another doubling. When
-used with embedded processors, similar size concerns apply. </p>
-
-<p> &AElig;lfred is designed for easy and efficient use over the Internet,
-based on the following principles: </p> <ol>
-
-<li> &AElig;lfred must be as small as possible, so that it doesn't add too
- much to an applet's download time. </li>
-
-<li> &AElig;lfred must use as few class files as possible, to minimize the
- number of HTTP connections necessary. (The use of JAR files has made this
- be less of a concern.) </li>
-
-<li> &AElig;lfred must be compatible with most or all Java implementations
- and platforms. (Write once, run anywhere.) </li>
-
-<li> &AElig;lfred must use as little memory as possible, so that it does
- not take away resources from the rest of your program. (It doesn't force
- you to use DOM or a similar costly data structure API.)</li>
-
-<li> &AElig;lfred must run as fast as possible, so that it does not slow down
- the rest of your program. </li>
-
-<li> &AElig;lfred must produce correct output for well-formed and valid
- documents, but need not reject every document that is not valid or
- not well-formed. (In &AElig;lfred2, correctness was a bigger concern
- than in the original version; and a validation option is available.) </li>
-
-<li> &AElig;lfred must provide full internationalization from the first
- release. (&AElig;lfred2 now automatically handles all encodings
- supported by the underlying JVM; previous versions handled only
- UTF-8, UTF_16, ASCII, and ISO-8859-1.)</li>
-
-</ol>
-
-<p>As you can see from this list, &AElig;lfred is designed for production
-use, but neither validation nor perfect conformance was a requirement.
-Good validating parsers exist, including one in this package,
-and you should use them as appropriate. (See conformance reviews
-available at <a href="http://www.xml.com/">http://www.xml.com</a>)
-</p>
-
-<p> One of the main goals of &AElig;lfred2 was to significantly improve
-conformance, while not significantly affecting the other goals stated above.
-Since the only use of this parser is with SAX, some classes could be
-removed, and so the overall size of &AElig;lfred was actually reduced.
-Subsequent performance work produced a notable speedup (over twenty
-percent on larger files). That is, the tradeoffs between speed, size, and
-conformance were re-targeted towards conformance and support of newer APIs
-(SAX2), with a a positive performance impact. </p>
-
-<p> The role anticipated for this version of &AElig;lfred is as a
-lightweight Free Software SAX parser that can be used in essentially every
-Java program where the handful of conformance violations (noted below)
-are acceptable.
-That certainly includes applets, and
-nowadays one must also mention embedded systems as being even more
-size-critical.
-At this writing, all parsers that are more conformant are
-significantly larger, even when counting the optional
-validation support in this version of &AElig;lfred. </p>
-
-
-<h3><a name="name">About the Name <em>&AElig;lfred</em></a></h3>
-
-<p>&AElig;lfred the Great (AElfred in ASCII) was King of Wessex, and
-some say of King of England, at the time of his death in 899 AD.
-&AElig;lfred introduced a wide-spread literacy program in the hope that
-his people would learn to read English, at least, if Latin was too
-difficult for them. This &AElig;lfred hopes to bring another sort of
-literacy to Java, using XML, at least, if full SGML is too difficult.</p>
-
-<p>The initial &AElig; ligature ("AE)" is also a reminder that XML is
-not limited to ASCII.</p>
-
-
-<h3><a name="encodings">Character Encodings</a></h3>
-
-<p> The &AElig;lfred parser currently builds in support for a handful
-of input encodings. Of course these include UTF-8 and UTF-16, which
-all XML parsers are required to support:</p> <ul>
-
- <li> UTF-8 ... the standard eight bit encoding, used unless
- you provide an encoding declaration or a MIME charset tag.</li>
-
- <li> US-ASCII ... an extremely common seven bit encoding,
- which happens to be a subset of UTF-8 and ISO-8859-1 as well
- as many other encodings. XHTML web pages using US-ASCII
- (without an encoding declaration) are probably more
- widely interoperable than those in any other encoding. </li>
-
- <li> ISO-8859-1 ... includes accented characters used in
- much of western Europe (but excluding the Euro currency
- symbol).</li>
-
- <li> UTF-16 ... with several variants, this encodes each
- sixteen bit Unicode character in sixteen bits of output.
- Variants include UTF-16BE (big endian, no byte order mark),
- UTF-16LE (little endian, no byte order mark), and
- ISO-10646-UCS-2 (an older and less used encoding, using a
- version of Unicode without surrogate pairs). This is
- essentially the native encoding used by Java. </li>
-
- <li> ISO-10646-UCS-4 ... a seldom-used four byte encoding,
- also known as UTF-32BE. Four byte order variants are supported,
- including one known as UTF-32LE. Some operating systems
- standardized on UCS-4 despite its significant size penalty,
- in anticipation that Unicode (even with surrogate pairs)
- would eventually become limiting. UCS-4 permits encoding
- of non-Unicode characters, which Java can't represent (and
- XML doesn't allow).
- </li>
-
- </ul>
-
-<p> If you use any encoding other than UTF-8 or UTF-16 you should
-make sure to label your data appropriately: </p>
-
-<blockquote>
-&lt;?xml version="1.0" encoding="<b>ISO-8859-15</b>"?&gt;
-</blockquote>
-
-<p> Encodings accessed through <code>java.io.InputStreamReader</code>
-are now fully supported for both external labels (such as MIME types)
-and internal types (as shown above).
-There is one limitation in the support for internal labels:
-the encodings must be derived from the US-ASCII encoding,
-the EBCDIC family of encodings is not recognized.
-Note that Java defines its
-own encoding names, which don't always correspond to the standard
-Internet encoding names defined by the IETF/IANA, and that Java
-may even <em>require</em> use of nonstandard encoding names.
-Please report
-such problems; some of them can be worked around in this parser,
-and many can be worked around by using external labels.
-</p>
-
-<p>Note that if you are using the Euro symbol with an fixed length
-eight bit encoding, you should probably be using the encoding label
-<em>iso-8859-15</em> or, with a Microsoft OS, <em>cp-1252</em>.
-Of course, UTF-8 and UTF-16 handle the Euro symbol directly.
-</p>
-
-
-<h3><a name="violations">Known Conformance Violations</a></h3>
-
-<p>Known conformance issues should be of negligible importance for
-most applications, and include: </p><ul>
-
- <li> Rather than following the voluminous "Appendix B" rules about
- what characters may appear in names (and name tokens), the Unicode
- rules embedded in <em>java.lang.Character</em> are used.
- This means mostly that some names are inappropriately accepted,
- though a few are inappropriately rejected. (It's much simpler
- to avoid that much special case code. Recent OASIS/NIST test
- cases may have these rules be realistically testable.) </li>
-
- <li> Text containing "]]&gt;" is not rejected unless it fully resides
- in an internal buffer ... which is, thankfully, the typical case. This
- text is illegal, but sometimes appears in illegal attempts to
- nest CDATA sections. (Not catching that boundary condition
- substantially simplifies parsing text.) </li>
-
- <li> Surrogate characters that aren't correctly paired are ignored
- rather than rejected, unless they were encoded using UTF-8. (This
- simplifies parsing text.) Unicode 3.1 assigned the first characters
- to those character codes, in early 2001, so few documents (or tools)
- use such characters in any case. </li>
-
- <li> Declarations following references to an undefined parameter
- entity reference are not ignored. (Not maintaining and using state
- about this validity error simplifies declaration handling; few
- XML parsers address this constraint in any case.) </li>
-
- <li> Well formedness constraints for general entity references
- are not enforced. (The code to handle the "content" production
- is merged with the element parsing code, making it hard to reuse
- for this additional situation.) </li>
-
-</ul>
-
-<p> When tested against the July 12, 1999 version of the OASIS
-XML Conformance test suite, an earlier version passed 1057 of 1067 tests.
-That contrasts with the original version, which passed 867. The
-current parser is top-ranked in terms of conformance, as is its
-validating sibling (which has some additional conformance violations
-imposed on it by SAX2 API deficiencies as well as some of the more
-curious SGML layering artifacts found in the XML specification). </p>
-
-<p> The XML 1.0 specification itself was not without problems,
-and after some delays the W3C has come out with a revised
-"second edition" specification. While that doesn't resolve all
-the problems identified the XML specification, many of the most
-egregious problems have been resolved. (You still need to drink
-magic Kool-Aid before some DTD-related issues make sense.)
-To the extent possible, this parser conforms to that second
-edition specification, and does well against corrected versions
-of the OASIS/NIST XML conformance test cases. See <a href=
-"http://xmlconf.sourceforge.net">http://xmlconf.sourceforge.net</a>
-for more information about SAX2/XML conformance testing. </p>
-
-
-<h3><a name="copyright">Copyright and distribution terms</a></h3>
-
-<p>
-The software in this package is distributed under the GNU General Public
-License (with a special exception described below).
-</p>
-
-<p>
-A copy of GNU General Public License (GPL) is included in this distribution,
-in the file COPYING. If you do not have the source code, it is available at:
-
- <a href="http://www.gnu.org/software/classpath/">http://www.gnu.org/software/classpath/</a>
-</p>
-
-<pre>
- Linking this library statically or dynamically with other modules is
- making a combined work based on this library. Thus, the terms and
- conditions of the GNU General Public License cover the whole
- combination.
-
- As a special exception, the copyright holders of this library give you
- permission to link this library with independent modules to produce an
- executable, regardless of the license terms of these independent
- modules, and to copy and distribute the resulting executable under
- terms of your choice, provided that you also meet, for each linked
- independent module, the terms and conditions of the license of that
- module. An independent module is a module which is not derived from
- or based on this library. If you modify this library, you may extend
- this exception to your version of the library, but you are not
- obligated to do so. If you do not wish to do so, delete this
- exception statement from your version.
-
- Parts derived from code which carried the following notice:
-
- Copyright (c) 1997, 1998 by Microstar Software Ltd.
-
- AElfred is free for both commercial and non-commercial use and
- redistribution, provided that Microstar's copyright and disclaimer are
- retained intact. You are free to modify AElfred for your own use and
- to redistribute AElfred with your modifications, provided that the
- modifications are clearly documented.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- merchantability or fitness for a particular purpose. Please use it AT
- YOUR OWN RISK.
-</pre>
-
-<p> Some of this documentation was modified from the original
-&AElig;lfred README.txt file. All of it has been updated. </p>
-
-</p>
-
-
-<h2><a name="changes">Changes Since the last Microstar Release</a></h2>
-
-<p> As noted above, Microstar has not updated this parser since
-the summer of 1998, when it released version 1.2a on its web site.
-This release is intended to benefit the developer community by
-refocusing the API on SAX2, and improving conformance to the extent
-that most developers should not need to use another XML parser. </p>
-
-<p> The code has been cleaned up (referring to the XML 1.0 spec in
-all the production numbers in
-comments, rather than some preliminary draft, for one example) and
-has been sped up a bit as well.
-JAXP support has been added, although developers are still
-strongly encouraged to use the SAX2 APIs directly. </p>
-
-
-<h3><a name="sax2">SAX2 Support</a></h3>
-
-<p> The original version of &AElig;lfred did not support the
-SAX2 APIs. </p>
-
-<p> This version supports the SAX2 APIs, exposing the standard
-boolean feature descriptors. It supports the "DeclHandler" property
-to provide access to all DTD declarations not already exposed
-through the SAX1 API. The "LexicalHandler" property is supported,
-exposing entity boundaries (including the unnamed external subset) and
-things like comments and CDATA boundaries. SAX1 compatibility is
-currently provided.</p>
-
-
-<h3><a name="validation">Validation</a></h3>
-
-<p> In the 'pipeline' package in this same software distribution is an
-<a href="../pipeline/ValidationConsumer.html">XML Validation component</a>
-using any full SAX2 event stream (including all document type declarations)
-to validate. There is now a <a href="XmlReader.html">XmlReader</a> class
-which combines that class and this enhanced &AElig;lfred parser, creating
-an optionally validating SAX2 parser. </p>
-
-<p> As noted in the documentation for that validating component, certain
-validity constraints can't reliably be tested by a layered validator.
-These include all constraints relying on
-layering violations (exposing XML at the level of tokens or below,
-required since XML isn't a context-free grammar), some that
-SAX2 doesn't support, and a few others. The resulting validating
-parser is conformant enough for most applications that aren't doing
-strange SGML tricks with DTDs.
-Moreover, that validating filter can be used without
-a parser ... any application component that emits SAX event streams
-can DTD-validate its output on demand. </p>
-
-<h3><a name="smaller">You want Smaller?</a></h3>
-
-<p> You'll have noticed that the original version of &AElig;lfred
-had small size as a top goal. &AElig;lfred2 normally includes a
-DTD validation layer, but you can package without that.
-Similarly, JAXP factory support is available but optional.
-Then the main added cost due to this revision are for
-supporting the SAX2 API itself; DTD validation is as
-cleanly layered as allowed by SAX2.</p>
-
-<h3><a name="bugfixes">Bugs Fixed</a></h3>
-
-<p> Bugs fixed in &AElig;lfred2 include: </p>
-
-<ol>
- <li> Originally &AElig;lfred didn't close file descriptors, which
- led to file descriptor leakage on programs which ran for any
- length of time. </li>
-
- <li> NOTATION declarations without system identifiers are
- now handled correctly. </li>
-
- <li> DTD events are now reported for all invocations of a
- given parser, not just the first one. </li>
-
- <li> More correct character handling: <ul>
-
- <li> Rejects out-of-range characters, both in text and in
- character references. </li>
-
- <li> Correctly handles character references that expand to
- surrogate pairs. </li>
-
- <li> Correctly handles UTF-8 encodings of surrogate pairs. </li>
-
- <li> Correctly handles Unicode 3.1 rules about illegal UTF-8
- encodings: there is only one legal encoding per character. </li>
-
- <li> PUBLIC identifiers are now rejected if they have illegal
- characters. </li>
-
- <li> The parser is more correct about what characters are allowed
- in names and name tokens. Uses Unicode rules (built in to Java)
- rather than the voluminous XML rules, although some extensions
- have been made to match XML rules more closely.</li>
-
- <li> Line ends are now normalized to newlines in all known
- cases. </li>
-
- </ul></li>
-
- <li> Certain validity errors were previously treated as well
- formedness violations. <ul>
-
- <li> Repeated declarations of an element type are no
- longer fatal errors. </li>
-
- <li> Undeclared parameter entity references are no longer
- fatal errors. </li>
-
- </ul></li>
-
- <li> Attribute handling is improved: <ul>
-
- <li> Whitespace must exist between attributes. </li>
-
- <li> Only one value for a given attribute is permitted. </li>
-
- <li> ATTLIST declarations don't need to declare attributes. </li>
-
- <li> Attribute values are normalized when required. </li>
-
- <li> Tabs in attribute values are normalized to spaces. </li>
-
- <li> Attribute values containing a literal "&lt;" are rejected. </li>
-
- </ul></li>
-
- <li> More correct entity handling: <ul>
-
- <li> Whitespace must precede NDATA when declaring unparsed
- entities.</li>
-
- <li> Parameter entity declarations may not have NDATA annotations. </li>
-
- <li> The XML specification has a bug in that it doesn't specify
- that certain contexts exist within which parameter entity
- expansion must not be performed. Lacking an offical erratum,
- this parser now disables such expansion inside comments,
- processing instructions, ignored sections, public identifiers,
- and parts of entity declarations. </li>
-
- <li> Entity expansions that include quote characters no longer
- confuse parsing of strings using such expansions. </li>
-
- <li> Whitespace in the values of internal entities is not mapped
- to space characters. </li>
-
- <li> General Entity references in attribute defaults within the
- DTD now cause fatal errors when the entity is not defined at the
- time it is referenced. </li>
-
- <li> Malformed general entity references in entity declarations are
- now detected. </li>
-
- </ul></li>
-
- <li> Neither conditional sections
- nor parameter entity references within markup declarations
- are permitted in the internal subset. </li>
-
- <li> Processing instructions whose target names are "XML"
- (ignoring case) are now rejected. </li>
-
- <li> Comments may not include "--".</li>
-
- <li> Most "]]&gt;" sequences in text are rejected. </li>
-
- <li> Correct syntax for standalone declarations is enforced. </li>
-
- <li> Setting a locale for diagnostics only produces an exception
- if the language of that locale isn't English. </li>
-
- <li> Some more encoding names are recognized. These include the
- Unicode 3.0 variants of UTF-16 (UTF-16BE, UTF-16LE) as well as
- US-ASCII and a few commonly seen synonyms. </li>
-
- <li> Text (from character content, PIs, or comments) large enough
- not to fit into internal buffers is now handled correctly even in
- some cases which were originally handled incorrectly.</li>
-
- <li> Content is now reported for element types for which attributes
- have been declared, but no content model is known. (Such documents
- are invalid, but may still be well formed.) </li>
-
-</ol>
-
-<p> Other bugs may also have been fixed. </p>
-
-<p> For better overall validation support, some of the validity
-constraints that can't be verified using the SAX2 event stream
-are now reported directly by &AElig;lfred2. </p>
-
-</body></html>
-