package org.jdom.input; import java.io.*; import java.lang.reflect.*; import java.net.*; import java.util.*; import org.jdom.*; import org.xml.sax.*; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.ext.DeclHandler; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; /** *

SAXHandler supports SAXBuilder

* * @author Brett McLaughlin * @author Jason Hunter * @author Philip Nelson */ public class SAXHandler extends DefaultHandler implements LexicalHandler, DeclHandler, DTDHandler { private static final String CVS_ID = "@(#) $RCSfile: SAXHandler.java,v $ $Revision: 1.21 $ $Date: 2001/08/17 18:37:06 $ $Name: $"; /** Document object being built */ private Document document; // Note: keeping a "current element" variable to avoid the constant // peek() calls to the top of the stack has shown to cause no noticeable // performance improvement. /** Element stack */ protected Stack stack; /** Indicator of where in the document we are */ protected boolean atRoot; /** Indicator of whether we are in a DTD */ protected boolean inDTD = false; /** Indicator of whether we are in a CDATA */ protected boolean inCDATA = false; /** Indicator of whether we should expand entities */ private boolean expand = true; /** Indicator of whether we are actively suppressing (non-expanding) a current entity */ protected boolean suppress = false; /** How many nested entities we're currently within */ private int entityDepth = 0; /** Temporary holder for namespaces that have been declared with * startPrefixMapping, but are not yet available on the element */ protected LinkedList declaredNamespaces; /** The namespaces in scope and actually attached to an element */ protected LinkedList availableNamespaces; private Map externalEntities; /** The JDOMFactory used for JDOM object creation */ private JDOMFactory factory; /** Whether to ignore ignorable whitespace */ private boolean ignoringWhite = false; /** Temporary holder for the internal subset */ private StringBuffer buffer = new StringBuffer(); /** *

* This will create a new SAXHandler that listens to SAX * events and creates a JDOM Document. The objects will be constructed * using the default factory. *

* * @throws IOException when errors occur. */ public SAXHandler() throws IOException { this((JDOMFactory)null); } /** *

* This will set the Document to use. *

* * @param document Document being parsed. * @throws IOException when errors occur. * * @deprecated Deprecated in beta7, use SAXHandler() instead and let * SAXHandler create the Document, then retrieve it with getDocument() */ public SAXHandler(Document document) throws IOException { this(new DefaultJDOMFactory()); this.document = document; } /** *

* This will create a new SAXHandler that listens to SAX * events and creates a JDOM Document. The objects will be constructed * using the provided factory. *

* * @param factory JDOMFactory to be used for constructing * objects * @throws IOException when errors occur. */ public SAXHandler(JDOMFactory factory) throws IOException { if (factory != null) { this.factory = factory; } else { this.factory = new DefaultJDOMFactory(); } atRoot = true; stack = new Stack(); declaredNamespaces = new LinkedList(); availableNamespaces = new LinkedList(); availableNamespaces.add(Namespace.XML_NAMESPACE); externalEntities = new HashMap(); document = this.factory.document((Element)null); } /** *

* handle an attribute declaration in a DTD *

* * @param eName String element name of attribute * @param aName String attribute name * @param type String attribute type * @param valueDefault String default value of attribute * @param value String value of attribute */ public void attributeDecl(String eName, String aName, String type, String valueDefault, String value) { buffer.append(" \n"); } /** *

* This will report character data (within an element). *

* * @param ch char[] character array with character data * @param start int index in array where data starts. * @param length int length of data. * @throws SAXException when things go wrong */ public void characters(char[] ch, int start, int length) throws SAXException { if (suppress) return; if (length == 0) return; String data = new String(ch, start, length); if (inCDATA) { getCurrentElement().addContent(factory.cdata(data)); } else { getCurrentElement().addContent(data); } } /** *

* This reports that a comments is parsed. If not in the * DTD, this comment is added to the current JDOM * Element, or the Document itself * if at that level. *

* * @param ch ch[] array of comment characters. * @param start int index to start reading from. * @param length int length of data. */ public void comment(char[] ch, int start, int length) throws SAXException { if (suppress) return; if (inDTD) { String comment = new String(ch, start, length); buffer.append(" \n"); return; } String commentText = new String(ch, start, length); if ((!inDTD) && (!commentText.equals(""))) { if (stack.empty()) { document.addContent( factory.comment(commentText)); } else { getCurrentElement().addContent( factory.comment(commentText)); } } } /** *

* handle an element declaration in a DTD *

* * @param name String name of element * @param model String model of the element in DTD syntax */ public void elementDecl(String name, String model) { buffer.append(" \n"); } /** *

* Report a CDATA section - ignored in SAXBuilder. *

*/ public void endCDATA() throws SAXException { if (suppress) return; inCDATA = false; } /** *

* This signifies that the reading of the DTD is complete. *

*/ public void endDTD() throws SAXException { inDTD = false; document.getDocType().setInternalSubset(buffer.toString()); } /** *

* Indicates the end of an element * (</[element name]>) is reached. Note that * the parser does not distinguish between empty * elements and non-empty elements, so this will occur uniformly. *

* * @param namespaceURI String URI of namespace this * element is associated with * @param localName String name of element without prefix * @param qName String name of element in XML 1.0 form * @throws SAXException when things go wrong */ public void endElement(String namespaceURI, String localName, String qName) throws SAXException { if (suppress) return; try { Element element = (Element)stack.pop(); // Remove the namespaces that this element makes available List addl = element.getAdditionalNamespaces(); if (addl.size() > 0) { availableNamespaces.removeAll(addl); } } catch (EmptyStackException ex1) { throw new SAXException( "Ill-formed XML document (missing opening tag for " + localName + ")"); } if (stack.empty()) { atRoot = true; } } public void endEntity(String name) throws SAXException { entityDepth--; if (entityDepth == 0) { // No way are we suppressing if not in an entity, // regardless of the "expand" value suppress = false; } } /** *

* This will add the prefix mapping to the JDOM * Document object. *

* * @param prefix String namespace prefix. * @param uri String namespace URI. */ public void endPrefixMapping(String prefix) throws SAXException { if (suppress) return; // Remove the namespace from the available list // (Should find the namespace fast because recent adds // are at the front of the list. It may not be the head // tho because endPrefixMapping calls on the same element // can come in any order.) Iterator itr = availableNamespaces.iterator(); while (itr.hasNext()) { Namespace ns = (Namespace) itr.next(); if (prefix.equals(ns.getPrefix())) { itr.remove(); return; } } } /** * This is called when the parser encounters an external entity * declaration. *

* * @param name entity name * @param publicId public id * @param systemId system id * @throws SAXException when things go wrong */ public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException { // Store the public and system ids for the name externalEntities.put(name, new String[]{publicId, systemId}); buffer.append(" \n"); } /** *

* Returns the being-parsed element. *

* * @return Element - element at the top of the stack. */ protected Element getCurrentElement() throws SAXException { try { return (Element)(stack.peek()); } catch (EmptyStackException ex1) { throw new SAXException( "Ill-formed XML document (multiple root elements detected)"); } } /** *

* Returns the document. Should be called after parsing is complete. *

* * @return Document - Document that was built */ public Document getDocument() { return document; } /** *

* Returns whether or not entities will be expanded during the * build. *

* * @return boolean - whether entity expansion * will occur during build. * * @see #setExpandEntities */ public boolean getExpandEntities() { return expand; } /** *

* Returns the factory used for constructing objects. *

* * @return JDOMFactory - the factory used for * constructing objects. * * @see #SAXHandler(org.jdom.input.JDOMFactory) */ public JDOMFactory getFactory() { return factory; } /** *

* Returns whether or not the parser will elminate whitespace in * element content (sometimes known as "ignorable whitespace") when * building the document. *

* * @return boolean - whether ignorable whitespace will * be ignored during build. * * @see #setIgnoringElementContentWhitespace */ public boolean getIgnoringElementContentWhitespace() { return ignoringWhite; } /** *

* For a given namespace prefix, this will return the * {@link Namespace} object for that prefix, * within the current scope. *

* * @param prefix namespace prefix. * @return Namespace - namespace for supplied prefix. */ private Namespace getNamespace(String prefix) { Iterator i = availableNamespaces.iterator(); while (i.hasNext()) { Namespace ns = (Namespace)i.next(); if (prefix.equals(ns.getPrefix())) { return ns; } } return Namespace.NO_NAMESPACE; } /** *

* Capture ignorable whitespace as text. If * setIgnoringElementContentWhitespace(true) has been called then this * method does nothing. *

* * @param ch [] - char array of ignorable whitespace * @param start int - starting position within array * @param length int - length of whitespace after start * @throws SAXException when things go wrong */ public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (suppress) return; if (ignoringWhite) return; if (length == 0) return; getCurrentElement().addContent(new String(ch, start, length)); } /** *

* Handle the declaration of an entity in the internal subset *

* * @param name String name of entity listed in DTD * @param value String value of the entity */ public void internalEntityDecl(String name, String value) { buffer.append(" \n"); } /** *

* Handle the declaration of a Notation in a DTD *

* * @param name java.lang.String * @param publicID java.lang.String * @param systemID java.lang.String */ public void notationDecl(String name, String publicID, String systemID) { buffer.append(" \n"); } /** *

* This will indicate that a processing instruction (other than * the XML declaration) has been encountered. *

* * @param target String target of PI * @param data String * This sets whether or not to expand entities during the build. * A true means to expand entities as normal content. A false means to * leave entities unexpanded as EntityRef objects. The * default is true. *

* * @param expand boolean indicating whether entity expansion * should occur. */ public void setExpandEntities(boolean expand) { this.expand = expand; } /** *

* Specifies whether or not the parser should elminate whitespace in * element content (sometimes known as "ignorable whitespace") when * building the document. Only whitespace which is contained within * element content that has an element only content model will be * eliminated (see XML Rec 3.2.1). For this setting to take effect * requires that validation be turned on. The default value of this * setting is false. *

* * @param ignoringWhite Whether to ignore ignorable whitespace */ public void setIgnoringElementContentWhitespace(boolean ignoringWhite) { this.ignoringWhite = ignoringWhite; } /** *

* Report a CDATA section - ignored in SAXBuilder. *

*/ public void startCDATA() throws SAXException { if (suppress) return; inCDATA = true; } /** *

* This will signify that a DTD is being parsed, and can be * used to ensure that comments and other lexical structures * in the DTD are not added to the JDOM Document * object. *

* * @param name String name of element listed in DTD * @param publicId String public ID of DTD * @param systemId String syste ID of DTD */ public void startDTD(String name, String publicId, String systemId) throws SAXException { document.setDocType( factory.docType(name, publicId, systemId)); inDTD = true; } /** *

* This reports the occurrence of an actual element. It will include * the element's attributes, with the exception of XML vocabulary * specific attributes, such as * xmlns:[namespace prefix] and * xsi:schemaLocation. *

* * @param namespaceURI String namespace URI this element * is associated with, or an empty * String * @param localName String name of element (with no * namespace prefix, if one is present) * @param qName String XML 1.0 version of element name: * [namespace prefix]:[localName] * @param atts Attributes list for this element * @throws SAXException when things go wrong */ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (suppress) return; Element element = null; if ((namespaceURI != null) && (!namespaceURI.equals(""))) { String prefix = ""; // Determine any prefix on the Element if (localName != qName) { int split = qName.indexOf(":"); prefix = qName.substring(0, split); } Namespace elementNamespace = Namespace.getNamespace(prefix, namespaceURI); element = factory.element(localName, elementNamespace); // Remove this namespace from those in the temp declared list /** * I've commented out these lines to ensure that element's that have a namespace * make those namespaces available to their attributes, which this seems to * break. However, I'm not 100% sure that this doesn't cause some other * problems. My gut feeling is "no", but I'm not sure, so I'm just commenting * it out. We'll remove for good in the next drop I think. * - Brett, 07/30/2001 if (declaredNamespaces.size() > 0) { declaredNamespaces.remove(elementNamespace); } */ } else { element = factory.element(localName); } // Take leftover declared namespaces and add them to this element's // map of namespaces if (declaredNamespaces.size() > 0) { transferNamespaces(element); } // Handle attributes for (int i=0, len=atts.getLength(); i 1) { // Short cut out if we're expanding or if we're nested return; } // Ignore DTD references, and translate the standard 5 if ((!inDTD) && (!name.equals("amp")) && (!name.equals("lt")) && (!name.equals("gt")) && (!name.equals("apos")) && (!name.equals("quot"))) { if (!expand) { String pub = null; String sys = null; String[] ids = (String[]) externalEntities.get(name); if (ids != null) { pub = ids[0]; // may be null, that's OK sys = ids[1]; // may be null, that's OK } EntityRef entity = factory.entityRef(name, pub, sys); getCurrentElement().addContent(entity); suppress = true; } } } /** *

* This will add the prefix mapping to the JDOM * Document object. *

* * @param prefix String namespace prefix. * @param uri String namespace URI. */ public void startPrefixMapping(String prefix, String uri) throws SAXException { if (suppress) return; Namespace ns = Namespace.getNamespace(prefix, uri); declaredNamespaces.add(ns); } /** *

* This will take the supplied {@link Element} and * transfer its namespaces to the global namespace storage. *

* * @param element Element to read namespaces from. */ private void transferNamespaces(Element element) { Iterator i = declaredNamespaces.iterator(); while (i.hasNext()) { Namespace ns = (Namespace)i.next(); availableNamespaces.addFirst(ns); element.addNamespaceDeclaration(ns); } declaredNamespaces.clear(); } /** *

* handler for unparsed entity declarations in the DTD *

* * @param name String of the unparsed entity decl * @param publicId String of the unparsed entity decl * @param systemId String of the unparsed entity decl * @param notationName String of the unparsed entity decl */ public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { buffer.append(" \n");} }