/*-- $Id: SAXHandler.java,v 1.24 2001/10/07 21:22:01 bmclaugh Exp $ Copyright (C) 2000 Brett McLaughlin & Jason Hunter. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the disclaimer that follows these conditions in the documentation and/or other materials provided with the distribution. 3. The name "JDOM" must not be used to endorse or promote products derived from this software without prior written permission. For written permission, please contact license@jdom.org. 4. Products derived from this software may not be called "JDOM", nor may "JDOM" appear in their name, without prior written permission from the JDOM Project Management (pm@jdom.org). In addition, we request (but do not require) that you include in the end-user documentation provided with the redistribution and/or in the software itself an acknowledgement equivalent to the following: "This product includes software developed by the JDOM Project (http://www.jdom.org/)." Alternatively, the acknowledgment may be graphical using the logos available at http://www.jdom.org/images/logos. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. This software consists of voluntary contributions made by many individuals on behalf of the JDOM Project and was originally created by Brett McLaughlin and Jason Hunter . For more information on the JDOM Project, please see . */ package org.jdom.input; import java.io.*; import java.lang.reflect.*; import java.net.*; import java.util.*; import org.jdom.*; import org.xml.sax.*; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.ext.DeclHandler; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** *

SAXHandler supports SAXBuilder

* * @author Brett McLaughlin * @author Jason Hunter * @author Philip Nelson */ public class SAXHandler extends DefaultHandler implements LexicalHandler, DeclHandler, DTDHandler { private static final String CVS_ID = "@(#) $RCSfile: SAXHandler.java,v $ $Revision: 1.24 $ $Date: 2001/10/07 21:22:01 $ $Name: $"; /** Document object being built */ private Document document; // Note: keeping a "current element" variable to avoid the constant // peek() calls to the top of the stack has shown to cause no noticeable // performance improvement. /** Element stack */ protected Stack stack; /** Indicator of where in the document we are */ protected boolean atRoot; /** Indicator of whether we are in a DTD */ protected boolean inDTD = false; /** Indicator of whether we are in a CDATA */ protected boolean inCDATA = false; /** Indicator of whether we should expand entities */ private boolean expand = true; /** Indicator of whether we are actively suppressing (non-expanding) a current entity */ protected boolean suppress = false; /** How many nested entities we're currently within */ private int entityDepth = 0; /** Temporary holder for namespaces that have been declared with * startPrefixMapping, but are not yet available on the element */ protected LinkedList declaredNamespaces; /** The namespaces in scope and actually attached to an element */ protected LinkedList availableNamespaces; /** Temporary holder for the internal subset */ private StringBuffer buffer = new StringBuffer(); /** The external entities defined in this document */ private Map externalEntities; /** The JDOMFactory used for JDOM object creation */ private JDOMFactory factory; /** Whether to ignore ignorable whitespace */ private boolean ignoringWhite = false; /** *

* This will set the Document to use. *

* * @param document Document being parsed. * @throws IOException when errors occur. * * @deprecated Deprecated in beta7, use SAXHandler() instead and let * SAXHandler create the Document, then retrieve it with getDocument() */ public SAXHandler(Document document) throws IOException { this(new DefaultJDOMFactory()); this.document = document; } /** *

* This will create a new SAXHandler that listens to SAX * events and creates a JDOM Document. The objects will be constructed * using the default factory. *

* * @throws IOException when errors occur. */ public SAXHandler() throws IOException { this((JDOMFactory)null); } /** *

* This will create a new SAXHandler that listens to SAX * events and creates a JDOM Document. The objects will be constructed * using the provided factory. *

* * @param factory JDOMFactory to be used for constructing * objects * @throws IOException when errors occur. */ public SAXHandler(JDOMFactory factory) throws IOException { if (factory != null) { this.factory = factory; } else { this.factory = new DefaultJDOMFactory(); } atRoot = true; stack = new Stack(); declaredNamespaces = new LinkedList(); availableNamespaces = new LinkedList(); availableNamespaces.add(Namespace.XML_NAMESPACE); externalEntities = new HashMap(); document = this.factory.document((Element)null); } /** *

* Returns the document. Should be called after parsing is complete. *

* * @return Document - Document that was built */ public Document getDocument() { return document; } /** *

* Returns the factory used for constructing objects. *

* * @return JDOMFactory - the factory used for * constructing objects. * * @see #SAXHandler(org.jdom.input.JDOMFactory) */ public JDOMFactory getFactory() { return factory; } /** *

* This sets whether or not to expand entities during the build. * A true means to expand entities as normal content. A false means to * leave entities unexpanded as EntityRef objects. The * default is true. *

* * @param expand boolean indicating whether entity expansion * should occur. */ public void setExpandEntities(boolean expand) { this.expand = expand; } /** *

* Returns whether or not entities will be expanded during the * build. *

* * @return boolean - whether entity expansion * will occur during build. * * @see #setExpandEntities */ public boolean getExpandEntities() { return expand; } /** *

* Specifies whether or not the parser should elminate whitespace in * element content (sometimes known as "ignorable whitespace") when * building the document. Only whitespace which is contained within * element content that has an element only content model will be * eliminated (see XML Rec 3.2.1). For this setting to take effect * requires that validation be turned on. The default value of this * setting is false. *

* * @param ignoringWhite Whether to ignore ignorable whitespace */ public void setIgnoringElementContentWhitespace(boolean ignoringWhite) { this.ignoringWhite = ignoringWhite; } /** *

* Returns whether or not the parser will elminate whitespace in * element content (sometimes known as "ignorable whitespace") when * building the document. *

* * @return boolean - whether ignorable whitespace will * be ignored during build. * * @see #setIgnoringElementContentWhitespace */ public boolean getIgnoringElementContentWhitespace() { return ignoringWhite; } /** * This is called when the parser encounters an external entity * declaration. *

* * @param name entity name * @param publicId public id * @param systemId system id * @throws SAXException when things go wrong */ public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { // Store the public and system ids for the name externalEntities.put(name, new String[]{publicId, systemId}); buffer.append(" \n"); } /** *

* This handles an attribute declaration in a DTD *

* * @param eName String element name of attribute * @param aName String attribute name * @param type String attribute type * @param valueDefault String default value of attribute * @param value String value of attribute */ public void attributeDecl(String eName, String aName, String type, String valueDefault, String value) throws SAXException { buffer.append(" \n"); } /** *

* Handle an element declaration in a DTD *

* * @param name String name of element * @param model String model of the element in DTD syntax */ public void elementDecl(String name, String model) throws SAXException { buffer.append(" \n"); } /** *

* Handle an internal entity declaration in a DTD. *

* * @param name String name of entity * @param value String value of the entity */ public void internalEntityDecl(String name, String value) throws SAXException { buffer.append(" \n"); } /** *

* This will indicate that a processing instruction (other than * the XML declaration) has been encountered. *

* * @param target String target of PI * @param data String * This will add the prefix mapping to the JDOM * Document object. *

* * @param prefix String namespace prefix. * @param uri String namespace URI. */ public void startPrefixMapping(String prefix, String uri) throws SAXException { if (suppress) return; Namespace ns = Namespace.getNamespace(prefix, uri); declaredNamespaces.add(ns); } /** *

* This will add the prefix mapping to the JDOM * Document object. *

* * @param prefix String namespace prefix. * @param uri String namespace URI. */ public void endPrefixMapping(String prefix) throws SAXException { if (suppress) return; // Remove the namespace from the available list // (Should find the namespace fast because recent adds // are at the front of the list. It may not be the head // tho because endPrefixMapping calls on the same element // can come in any order.) Iterator itr = availableNamespaces.iterator(); while (itr.hasNext()) { Namespace ns = (Namespace) itr.next(); if (prefix.equals(ns.getPrefix())) { itr.remove(); return; } } } /** *

* This reports the occurrence of an actual element. It will include * the element's attributes, with the exception of XML vocabulary * specific attributes, such as * xmlns:[namespace prefix] and * xsi:schemaLocation. *

* * @param namespaceURI String namespace URI this element * is associated with, or an empty * String * @param localName String name of element (with no * namespace prefix, if one is present) * @param qName String XML 1.0 version of element name: * [namespace prefix]:[localName] * @param atts Attributes list for this element * @throws SAXException when things go wrong */ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (suppress) return; Element element = null; if ((namespaceURI != null) && (!namespaceURI.equals(""))) { String prefix = ""; // Determine any prefix on the Element if (localName != qName) { int split = qName.indexOf(":"); prefix = qName.substring(0, split); } Namespace elementNamespace = Namespace.getNamespace(prefix, namespaceURI); element = factory.element(localName, elementNamespace); // Remove this namespace from those in the temp declared list /** * I've commented out these lines to ensure that element's that have a namespace * make those namespaces available to their attributes, which this seems to * break. However, I'm not 100% sure that this doesn't cause some other * problems. My gut feeling is "no", but I'm not sure, so I'm just commenting * it out. We'll remove for good in the next drop I think. * - Brett, 07/30/2001 */ /** Keeps these codes as it is, such that transferNamespaces won't adding * it to element's additional namespace (by addNamespaceDeclaration). *. -- Tom M. Yeh, 10/12/01 */ if (declaredNamespaces.size() > 0) { declaredNamespaces.remove(elementNamespace); } } else { element = factory.element(localName); } // Take leftover declared namespaces and add them to this element's // map of namespaces if (declaredNamespaces.size() > 0) { transferNamespaces(element); } // Handle attributes for (int i=0, len=atts.getLength(); i * This will take the supplied {@link Element} and * transfer its namespaces to the global namespace storage. *

* * @param element Element to read namespaces from. */ private void transferNamespaces(Element element) { Iterator i = declaredNamespaces.iterator(); while (i.hasNext()) { Namespace ns = (Namespace)i.next(); availableNamespaces.addFirst(ns); element.addNamespaceDeclaration(ns); } declaredNamespaces.clear(); } /** *

* For a given namespace prefix, this will return the * {@link Namespace} object for that prefix, * within the current scope. *

* * @param prefix namespace prefix. * @return Namespace - namespace for supplied prefix. */ /** Then, you don't need this method (and availableNamespaces) -- Tom M. Yeh private Namespace getNamespace(String prefix) { Iterator i = availableNamespaces.iterator(); while (i.hasNext()) { Namespace ns = (Namespace)i.next(); if (prefix.equals(ns.getPrefix())) { return ns; } } return Namespace.NO_NAMESPACE; } */ /** *

* This will report character data (within an element). *

* * @param ch char[] character array with character data * @param start int index in array where data starts. * @param length int length of data. * @throws SAXException when things go wrong */ public void characters(char[] ch, int start, int length) throws SAXException { if (suppress) return; if (length == 0) return; String data = new String(ch, start, length); /** * This is commented out because of some problems with * the inline DTDs that Xerces seems to have. if (!inDTD) { if (inEntity) { getCurrentElement().setContent(data); } else { getCurrentElement().addContent(data); } */ if (inCDATA) { getCurrentElement().addContent(factory.cdata(data)); } else { getCurrentElement().addContent(data); } } /** *

* Capture ignorable whitespace as text. If * setIgnoringElementContentWhitespace(true) has been called then this * method does nothing. *

* * @param ch [] - char array of ignorable whitespace * @param start int - starting position within array * @param length int - length of whitespace after start * @throws SAXException when things go wrong */ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (suppress) return; if (ignoringWhite) return; if (length == 0) return; getCurrentElement().addContent(new String(ch, start, length)); } /** *

* Indicates the end of an element * (</[element name]>) is reached. Note that * the parser does not distinguish between empty * elements and non-empty elements, so this will occur uniformly. *

* * @param namespaceURI String URI of namespace this * element is associated with * @param localName String name of element without prefix * @param qName String name of element in XML 1.0 form * @throws SAXException when things go wrong */ public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (suppress) return; try { Element element = (Element)stack.pop(); // Remove the namespaces that this element makes available List addl = element.getAdditionalNamespaces(); if (addl.size() > 0) { availableNamespaces.removeAll(addl); } } catch (EmptyStackException ex1) { throw new SAXException( "Ill-formed XML document (missing opening tag for " + localName + ")"); } if (stack.empty()) { atRoot = true; } } /** *

* This will signify that a DTD is being parsed, and can be * used to ensure that comments and other lexical structures * in the DTD are not added to the JDOM Document * object. *

* * @param name String name of element listed in DTD * @param publicId String public ID of DTD * @param systemId String syste ID of DTD */ public void startDTD(String name, String publicId, String systemId) throws SAXException { document.setDocType( factory.docType(name, publicId, systemId)); inDTD = true; } /** *

* This signifies that the reading of the DTD is complete. *

*/ public void endDTD() throws SAXException { document.getDocType().setInternalSubset(buffer.toString()); inDTD = false; } public void startEntity(String name) throws SAXException { entityDepth++; if (expand || entityDepth > 1) { // Short cut out if we're expanding or if we're nested return; } // Ignore DTD references, and translate the standard 5 if ((!inDTD) && (!name.equals("amp")) && (!name.equals("lt")) && (!name.equals("gt")) && (!name.equals("apos")) && (!name.equals("quot"))) { if (!expand) { String pub = null; String sys = null; String[] ids = (String[]) externalEntities.get(name); if (ids != null) { pub = ids[0]; // may be null, that's OK sys = ids[1]; // may be null, that's OK } EntityRef entity = factory.entityRef(name, pub, sys); getCurrentElement().addContent(entity); suppress = true; } } } public void endEntity(String name) throws SAXException { entityDepth--; if (entityDepth == 0) { // No way are we suppressing if not in an entity, // regardless of the "expand" value suppress = false; } } /** *

* Report a CDATA section - ignored in SAXBuilder. *

*/ public void startCDATA() throws SAXException { if (suppress) return; inCDATA = true; } /** *

* Report a CDATA section - ignored in SAXBuilder. *

*/ public void endCDATA() throws SAXException { if (suppress) return; inCDATA = false; } /** *

* This reports that a comments is parsed. If not in the * DTD, this comment is added to the current JDOM * Element, or the Document itself * if at that level. *

* * @param ch ch[] array of comment characters. * @param start int index to start reading from. * @param length int length of data. */ public void comment(char[] ch, int start, int length) throws SAXException { if (suppress) return; String commentText = new String(ch, start, length); if (inDTD) { String comment = new String(ch, start, length); buffer.append(" \n"); return; } if ((!inDTD) && (!commentText.equals(""))) { if (stack.empty()) { document.addContent( factory.comment(commentText)); } else { getCurrentElement().addContent( factory.comment(commentText)); } } } /** *

* Handle the declaration of a Notation in a DTD *

* * @param name name of the notation * @param publicID the public ID of the notation * @param systemID the system ID of the notation */ public void notationDecl(String name, String publicID, String systemID) throws SAXException { buffer.append(" \n"); } /** *

* Handler for unparsed entity declarations in the DTD *

* * @param name String of the unparsed entity decl * @param publicId String of the unparsed entity decl * @param systemId String of the unparsed entity decl * @param notationName String of the unparsed entity decl */ public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { buffer.append(" \n"); } /** *

* Returns the being-parsed element. *

* * @return Element - element at the top of the stack. */ protected Element getCurrentElement() throws SAXException { try { return (Element)(stack.peek()); } catch (EmptyStackException ex1) { throw new SAXException( "Ill-formed XML document (multiple root elements detected)"); } } }