No subject

Fri Aug 6 17:04:17 PDT 2004

ok; I was able to do clean round-trip with simple
XML-docs (XML file -> StAX -> JDom -> serialize to
file).

Oh, and the code compiled with latest CVS source, but
not with b10; I guess there have been a few API
changes recently, to get them in for 1.0?

Let me know if it looks good. It'd also be interesting
to know how performance compares to SAX-based
alternatives (there shouldn't be huge differences but
who knows).

-+ Tatu +-

__________________________________
Do you Yahoo!?
Yahoo! Mail is new and improved - Check it out!
http://promotions.yahoo.com/new_mail
--0-1221077319-1087445116=:11648
Content-Type: text/x-java; name="StAXBuilder.java"
Content-Description: StAXBuilder.java
Content-Disposition: inline; filename="StAXBuilder.java"

package org.jdom.input;

import java.util.HashMap;

import org.jdom.*;

import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;

/**
 * Builds a JDOM {@link org.jdom.Document org.jdom.Document} using a
 * {@link javax.xml.stream.XMLStreamReader}.
 */
public class StAXBuilder {

    /**
     * Map that contains conversion from textual attribute types StAX uses,
     * to int values JDOM uses.
     */
    final static HashMap attrTypes = new HashMap(32);
    static {
        attrTypes.put("CDATA", new Integer(Attribute.CDATA_TYPE));
        attrTypes.put("cdata", new Integer(Attribute.CDATA_TYPE));
        attrTypes.put("ID", new Integer(Attribute.ID_TYPE));
        attrTypes.put("id", new Integer(Attribute.ID_TYPE));
        attrTypes.put("IDREF", new Integer(Attribute.IDREF_TYPE));
        attrTypes.put("idref", new Integer(Attribute.IDREF_TYPE));
        attrTypes.put("IDREFS", new Integer(Attribute.IDREFS_TYPE));
        attrTypes.put("idrefs", new Integer(Attribute.IDREFS_TYPE));
        attrTypes.put("ENTITY", new Integer(Attribute.ENTITY_TYPE));
        attrTypes.put("entity", new Integer(Attribute.ENTITY_TYPE));
        attrTypes.put("ENTITIES", new Integer(Attribute.ENTITIES_TYPE));
        attrTypes.put("entities", new Integer(Attribute.ENTITIES_TYPE));
        attrTypes.put("NMTOKEN", new Integer(Attribute.NMTOKEN_TYPE));
        attrTypes.put("nmtoken", new Integer(Attribute.NMTOKEN_TYPE));
        attrTypes.put("NMTOKENS", new Integer(Attribute.NMTOKENS_TYPE));
        attrTypes.put("nmtokens", new Integer(Attribute.NMTOKENS_TYPE));
        attrTypes.put("NOTATION", new Integer(Attribute.NOTATION_TYPE));
        attrTypes.put("notation", new Integer(Attribute.NOTATION_TYPE));
        attrTypes.put("ENUMERATED", new Integer(Attribute.ENUMERATED_TYPE));
        attrTypes.put("enumerated", new Integer(Attribute.ENUMERATED_TYPE));
    }

    /** The factory for creating new JDOM objects */
    private JDOMFactory factory = new DefaultJDOMFactory();

    /**
     * Default constructor.
     */
    public StAXBuilder() {
    }

    /*
     * This sets a custom JDOMFactory for the builder.  Use this to build
     * the tree with your own subclasses of the JDOM classes.
     *
     * @param factory <code>JDOMFactory</code> to use
     */
    public void setFactory(JDOMFactory factory) {
        this.factory = factory;
    }

    /**
     * Returns the current {@link org.jdom.JDOMFactory} in use.
     * @return the factory in use
     */
    public JDOMFactory getFactory() {
        return factory;
    }

    /**
     * This will build a JDOM tree given a StAX stream reader.
     *
     * @param r Stream reader from which input is read.
     * @return <code>Document</code> - JDOM document object.
     * @throws XMLStreamException If the reader threw such exception (to
     *   indicate a parsing or I/O problem)
     */
    public Document build(XMLStreamReader r)
        throws XMLStreamException
    {
        /* Should we do sanity checking to see that r is positioned at
         * beginning? Not doing so will allow creating documents from
         * sub-trees, though?
         */
        Document doc = factory.document(null);
        buildTree(r, doc, null);
        return doc;
    }

    /**
     * This takes a <code>XMLStreamReader</code> and recursively builds up
     * a JDOM tree.
     *
     * @param node <code>Code</node> to examine.
     * @param doc JDOM <code>Document</code> being built.
     * @param current <code>Element</code> that is current parent; null
     *   indicates we are at root level.
     */
    private void buildTree(XMLStreamReader r, Document doc,
                           Element current)
        throws XMLStreamException
    {
        while (r.hasNext()) {
            Content child;

            switch (r.next()) {
            case XMLStreamConstants.CDATA:
                child = factory.cdata(r.getText());
                break;

            case XMLStreamConstants.CHARACTERS:
            case XMLStreamConstants.SPACE:
                /* Small complication: although (ignorable) white space
                 * is allowed in prolog/epilog, and StAX may report such
                 * event, JDOM barfs if trying to add it. Thus, let's just
                 * ignore all textual stuff outside the tree:
                 */
                child = (current == null) ? null : factory.text(r.getText());
                break;

            case XMLStreamConstants.COMMENT:
                child = factory.comment(r.getText());
                break;

            case XMLStreamConstants.DTD:
                /* !!! Note: StAX does not expose enough information about
                 *  doctype declaration (specifically, public and system id!);
                 *  should (re-)parse information... not yet implemented
                 */
                // TBI
                child = null;
                break;

            case XMLStreamConstants.END_DOCUMENT:
            case XMLStreamConstants.END_ELEMENT:
                /* Both of these indicate end of this level, actually; assuming
                 * reader does its own well-formedness checks, shouldn't need
                 * more checking here.
                 */
                return;

            case XMLStreamConstants.ENTITY_DECLARATION:
            case XMLStreamConstants.NOTATION_DECLARATION:
                /* Shouldn't really get these, but maybe some stream readers
                 * do provide the info. If so, better ignore it -- DTD event
                 * should have most/all we need.
                 */
                child = null;
                break;

            case XMLStreamConstants.ENTITY_REFERENCE:
                child = factory.entityRef(r.getLocalName());
                break;

            case XMLStreamConstants.PROCESSING_INSTRUCTION:
                child = factory.processingInstruction(r.getPITarget(), r.getPIData());
                break;

            case XMLStreamConstants.START_DOCUMENT:
                /* This should only be received at the beginning of document...
                 * so, should we indicate the problem or not?
                 */
                /* For now, let it pass: maybe some (broken) readers pass
                 * that info as first event in beginning of doc?
                 */
                child = null;
                break;

            case XMLStreamConstants.START_ELEMENT:
                // Ok, need to add a new element and recurse.
                {
                    Element newElem;
                    String elemPrefix = r.getPrefix(); // needed for special handling of elem's namespace
                    if (elemPrefix == null) {
                        elemPrefix = "";
                    }
                    {
                        String nsURI = r.getNamespaceURI();
                        newElem = factory.element(r.getLocalName(), elemPrefix,
                                                  (nsURI == null) ? "" : nsURI);
                    }
                    // Let's add element right away:
                    if (current == null) { // at root
                        doc.setRootElement(newElem);
                    } else {
                        factory.addContent(current, newElem);
                    }

                    // Any declared namespaces?
                    for (int i = 0, len = r.getNamespaceCount(); i < len; ++i) {
                        String prefix = r.getNamespacePrefix(i);
                        Namespace ns = Namespace.getNamespace(prefix, r.getNamespaceURI(i));
                        // JDOM has special handling for element's "own" ns:
                        if (prefix.equals(elemPrefix)) {
                            ; // already set by when it was constructed...
                        } else {
                            factory.addNamespaceDeclaration(newElem, ns);
                        }
                    }

                    // And then the attributes:
                    for (int i = 0, len = r.getAttributeCount(); i < len; ++i) {
                        Attribute attr;
                        int type = resolveAttrType(r.getAttributeType(i));
                        String prefix = r.getAttributePrefix(i);
                        if (prefix == null || prefix.length() == 0) {
                            // Attribute not in any namespace
                            attr = factory.attribute(r.getAttributeLocalName(i),
                                                     r.getAttributeValue(i),
                                                     Namespace.NO_NAMESPACE);
                        } else {
                            attr = factory.attribute(r.getAttributeLocalName(i),
                                                     r.getAttributeValue(i),
                                                     newElem.getNamespace(prefix));
                        }
                        factory.setAttribute(newElem, attr);
                    }

                    // And then let's recurse
                    buildTree(r, doc, newElem);
                }
                // Since we already added it, let's just loop again
                continue;

                // Should never get these, from a stream reader:
            case XMLStreamConstants.ATTRIBUTE:
            case XMLStreamConstants.NAMESPACE:
                throw new XMLStreamException("Unexpected iterator event type: "+r.getEventType()+"; should not receive such types (broken stream reader?)");

            default:
                throw new XMLStreamException("Unrecognized iterator event type: "+r.getEventType()+"; should not receive such types (broken stream reader?)");
            }

            if (child != null) {
                if (current == null) {
                    factory.addContent(doc, child);
                } else {
                    factory.addContent(current, child);
                }
            }
        }
    }

    private static int resolveAttrType(String typeStr) {
        if (typeStr != null && typeStr.length() > 0) {
            Integer I = (Integer) attrTypes.get(typeStr);
            if (I != null) {
                return I.intValue();
            }
        }
        return Attribute.UNDECLARED_TYPE;
    }

    /**
     * Trivial test driver for testing functionality.
     */
    public static void main(String[] args) throws Exception {
        if (args.length != 1) {
            System.err.println("Usage: java ... [file]");
            System.exit(1);
        }
        String filename = args[0];
        java.io.Reader r = new java.io.FileReader(filename);
        javax.xml.stream.XMLInputFactory f = javax.xml.stream.XMLInputFactory.newInstance();
        XMLStreamReader sr = f.createXMLStreamReader(r);

        Document domDoc = new StAXBuilder().build(sr);
        System.out.println("Done:");
        System.out.println("----- JDom -----");
        org.jdom.output.XMLOutputter outputter = new org.jdom.output.XMLOutputter();
        java.io.PrintWriter pw = new java.io.PrintWriter(System.out);
        outputter.output(domDoc, pw);
        pw.flush();
        System.out.println("----- /JDom -----");
    }

}

--0-1221077319-1087445116=:11648--