[jdom-interest] PATCH: thorough XMLOutputter change

Tue Jun 19 07:59:47 PDT 2001

Finally got my network card working again.  Anyone who wants to know
about PCMCIA support on Red Hat 7.1, just ask me :-)

Here's a diff for the changes I made to XMLOutputter over the weekend.

Note that using my test cases, I identified a bug in
suppressDeclaration -- the newly deprecated method.  Turns out it
didn't actually do anything :-) I say this not just to gloat, but to
report the awe and wonder I feel about Unit Testing.  I wrote a test
script, so every time I make a change I run "./build.sh && ./test.sh"
-- and so as soon as my merge compiled, it caught this bug for free.

     public void setSuppressDeclaration(boolean suppressDeclaration) {
-        this.omitDeclaration = omitDeclaration;
+        this.omitDeclaration = suppressDeclaration;
     }

I just noticed that the diff contains tab characters :-( This means
you should be careful about how you transfer it before applying the
patch.  If this is a problem, I can try to figure out how to get emacs
to convert tabs to spaces...

--- PATCH FOLLOWS:

Summary: (for the CVS log)

- fixed superfluous newline bug
- expanded comments on setLineSeparator
- fixed some typos in comments
- extracted methods: 
	maybePrintln(Writer, int)
	createNamespaceStack()
	printElementNamespace(...)
	printAdditionalNamespaces(...)
	isEmpty(Element)
	isStringOnly(List)
	isWhitespace(String)
	getElementText(Element)
- reorganized cases in printElementContent in order to extract
repeated conditional
- added debug method getSettings() and support methods toBytes(String)
and toByte(char)
- added protected inner NamespaceStack, to allow subclassing
- fixed bug in deprecated setSuppressDeclaration (!)

cvs diff -u src/java/org/jdom/output/XMLOutputter.java :

Index: src/java/org/jdom/output/XMLOutputter.java
===================================================================
RCS file: /home/cvspublic/jdom/src/java/org/jdom/output/XMLOutputter.java,v
retrieving revision 1.55
diff -u -r1.55 XMLOutputter.java
--- src/java/org/jdom/output/XMLOutputter.java	2001/06/18 20:21:00	1.55
+++ src/java/org/jdom/output/XMLOutputter.java	2001/06/19 13:33:58
@@ -244,7 +244,25 @@
      *  other end should normalize this.  --Rusty
      * </blockquote>
      *
+     * <p>
+     * To output "UNIX-style" documents, call
+     * <code>setLineSeparator("\n")</code>.  To output "Mac-style"
+     * documents, call <code>setLineSeparator("\r")</code>.  DOS-style
+     * documents use CR-LF ("\r\n"), which is the default.
+     * </p>
+     *
+     * <p>
+     * Note that this only applies to newlines generated by the
+     * outputter.  If you parse an XML document that contains newlines
+     * embedded inside a text node, and you do not call
+     * <code>setTextNormalize</code>, then the newlines will be output
+     * verbatim, as whatever you read them in as.  (If you <b>do</b>
+     * call <code>setTextNormalize</code>, then the newlines inside
+     * text nodes will be stripped and converted to a single space.)
+     * </p>
+     *
      * @see #setNewlines(boolean)
+     * @see #setTextNormalize(boolean)
      * @param separator <code>String</code> line separator to use.
      **/
     public void setLineSeparator(String separator) {
@@ -347,7 +365,7 @@
     /**
      * <p> This will set the indent <code>String</code> to use; this
      *   is usually a <code>String</code> of empty spaces. If you pass
-     *   null, mr the empty string (""), then no indentation will
+     *   null, or the empty string (""), then no indentation will
      *   happen. </p>
      * Default: none (null)
      *
@@ -380,7 +398,7 @@
     /**
      * <p>
      *   This will set the indent <code>String</code>'s size; an indentSize
-     *   of 4 would result in the indention being equivalent to the 
+     *   of 4 would result in the indentation being equivalent to the 
      *   <code>String</code> "&nbsp;&nbsp;&nbsp;&nbsp;" (four space chars).
      * </p>
      *
@@ -418,11 +436,27 @@
      * @param out <code>Writer</code> to write to
      */
     protected void maybePrintln(Writer out) throws IOException  {
+	maybePrintln(out, 0);
+    }
+
+    /**
+     * <p>     
+     * This will print a new line only if the newlines flag was set to
+     * true, and then print indents (only if indent is non-null)
+     * </p>
+     *
+     * @param out <code>Writer</code> to write to
+     * @param indentLevel current indent level (number of tabs)
+     */
+    protected void maybePrintln(Writer out, int indentLevel) throws IOException {
         if (newlines) {
             out.write(lineSeparator);
         }
-    }
+	indent(out, indentLevel);
 
+	//new Throwable("trace").printStackTrace(new PrintWriter(out));
+    }
+    
     /**
      * Get an OutputStreamWriter, use preferred encoding.
      */
@@ -494,7 +528,7 @@
             Object obj = i.next();
             if (obj instanceof Element) {
                 printElement(doc.getRootElement(), out, 0,
-                             new NamespaceStack());
+                             createNamespaceStack());
             }
             else if (obj instanceof Comment) {
                 printComment((Comment) obj, out);
@@ -526,7 +560,7 @@
     public void output(Element element, Writer out) throws IOException {
         // If this is the root element we could pre-initialize the 
         // namespace stack with the namespaces
-        printElement(element, out, 0, new NamespaceStack());
+        printElement(element, out, 0, createNamespaceStack());
         out.flush();
     }
     
@@ -558,7 +592,7 @@
     public void outputElementContent(Element element, Writer out)
                       throws IOException {
         List mixedContent = element.getMixedContent();
-        printElementContent(element, out, 0, new NamespaceStack(),
+        printElementContent(element, out, 0, createNamespaceStack(),
                             mixedContent);
         out.flush();
     }
@@ -864,16 +898,16 @@
             out.write("<?xml version=\"1.0\"");
             if (!omitEncoding) {
                 out.write(" encoding=\"" + encoding + "\"");
-            }
-            out.write("?>");
-
+	    }
+	    out.write("?>");
+	    
             // Print new line after decl always, even if no other new lines
             // Helps the output look better and is semantically
             // inconsequential
             out.write(lineSeparator);
         }
     }
-
+    
     /**
      * <p>
      * This will write the DOCTYPE declaration if one exists.
@@ -1003,63 +1037,16 @@
         // Mark our namespace starting point
         int previouslyDeclaredNamespaces = namespaces.size();
 
-        // Add namespace decl only if it's not the XML namespace and it's 
-        // not the NO_NAMESPACE with the prefix "" not yet mapped
-        // (we do output xmlns="" if the "" prefix was already used and we
-        // need to reclaim it for the NO_NAMESPACE)
-        Namespace ns = element.getNamespace();
-        if (ns != Namespace.XML_NAMESPACE &&
-            !(ns == Namespace.NO_NAMESPACE && namespaces.getURI("") == null)) {
-            String prefix = ns.getPrefix();        
-            String uri = namespaces.getURI(prefix);
-            if (!ns.getURI().equals(uri)) { // output a new namespace decl
-                namespaces.push(ns);
-                printNamespace(ns, out);
-            }
-        }
+	// print element's namespace (only if appropriate)
+	printElementNamespace(out, element, namespaces);
 
         // Print out additional namespace declarations
-        List additionalNamespaces = element.getAdditionalNamespaces();
-        if (additionalNamespaces != null) {
-            Iterator itr = additionalNamespaces.iterator();
-            while (itr.hasNext()) {
-                Namespace additional = (Namespace)itr.next();
-                String prefix = additional.getPrefix();        
-                String uri = namespaces.getURI(prefix);
-                if (!additional.getURI().equals(uri)) {
-                    namespaces.push(additional);
-                    printNamespace(additional, out);
-                }
-            }
-        }
+	printAdditionalNamespaces(out, element, namespaces);
 
         printAttributes(element.getAttributes(), element, out, namespaces);
-
-        // Calculate if the contents are String/CDATA only
-        // This helps later with the "empty" check
-        boolean stringOnly = true;
-        Iterator itr = mixedContent.iterator();
-        while (itr.hasNext()) {
-            Object o = itr.next();
-            if (!(o instanceof String) && !(o instanceof CDATA)) {
-                stringOnly = false;
-                break;
-            }
-        }
-
-        // Calculate if the content is empty
-        // We can handle "" string same as empty (CDATA has no effect here)
-        boolean empty = false;
-        if (stringOnly) {
-            String elementText =
-                textNormalize ? element.getTextNormalize() : element.getText();
-            if (elementText == null || elementText.equals("")) {
-                empty = true;
-            }
-        }
-
+	
         // If empty, print closing; if not empty, print content
-        if (empty) {
+        if (isEmpty(element)) {
             // Simply close up
             if (!expandEmptyElements) {
                 out.write(" />");
@@ -1098,24 +1085,9 @@
                                        int indentLevel,
                                        NamespaceStack namespaces,
                                        List mixedContent) throws IOException {
-        // get same local flags as printElement does
-        // a little redundant code-wise, but not performance-wise
-        boolean empty = mixedContent.size() == 0;
 
         // Calculate if the content is String/CDATA only
-        boolean stringOnly = true;
-        if (!empty) {
-            Iterator itr = mixedContent.iterator();
-            while (itr.hasNext()) {
-                Object o = itr.next();
-                if (!(o instanceof String) && !(o instanceof CDATA)) {
-                    stringOnly = false;
-                    break;
-                }
-            }
-        }
-
-        if (stringOnly) {
+        if (isStringOnly(mixedContent)) {
             Class justOutput = null;
             boolean endedWithWhite = false;
             Iterator itr = mixedContent.iterator();
@@ -1144,8 +1116,8 @@
                 }
             }
         }
-        else {
-            // Iterate through children
+        else {	// not stringOnly 	    
+	    // Iterate through children
             Object content = null;
             Class justOutput = null;
             boolean endedWithWhite = false;
@@ -1154,16 +1126,16 @@
             while (itr.hasNext()) {
                 content = itr.next();
                 // See if text, an element, a PI or a comment
-                if (content instanceof Comment) {
-                    if (!(justOutput == String.class && wasFullyWhite)) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
-                    }
-                    printComment((Comment) content, out);
-                    justOutput = Comment.class;
-                }
-                else if (content instanceof String) {
+                if (content instanceof String) {
                     String scontent = (String) content;
+
+		    // bugfix: don't print lines for whitespace that's
+		    // only sticking between close tags
+		    if (textNormalize && isWhitespace(scontent)) {
+			wasFullyWhite = true;
+			continue;
+		    }
+		    
                     if (justOutput == CDATA.class && 
                           textNormalize &&
                           startsWithWhite(scontent)) {
@@ -1171,40 +1143,13 @@
                     }
                     else if (justOutput != CDATA.class && 
                              justOutput != String.class) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
+                        maybePrintln(out, indentLevel);
                     }
                     printString(scontent, out);
                     endedWithWhite = endsWithWhite(scontent);
                     justOutput = String.class;
                     wasFullyWhite = (scontent.trim().length() == 0);
                 }
-                else if (content instanceof Element) {
-                    if (!(justOutput == String.class && wasFullyWhite)) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
-                    }
-                    printElement((Element) content, out,
-                                 indentLevel, namespaces);
-                    justOutput = Element.class;
-                }
-                else if (content instanceof EntityRef) {
-                    if (!(justOutput == String.class && wasFullyWhite)) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
-                    }
-                    printEntityRef((EntityRef) content, out);
-                    justOutput = EntityRef.class;
-                }
-                else if (content instanceof ProcessingInstruction) {
-                    if (!(justOutput == String.class && wasFullyWhite)) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
-                    }
-                    printProcessingInstruction((ProcessingInstruction) content,
-                                               out);
-                    justOutput = ProcessingInstruction.class;
-                }
                 else if (content instanceof CDATA) {
                     if (justOutput == String.class &&
                           textNormalize &&
@@ -1213,21 +1158,113 @@
                     }
                     else if (justOutput != String.class &&
                              justOutput != CDATA.class) {
-                        maybePrintln(out);
-                        indent(out, indentLevel);
+                        maybePrintln(out, indentLevel);
                     }
                     printCDATA((CDATA)content, out);
                     justOutput = CDATA.class;
                 }
-                // Unsupported types are *not* printed, nor should they exist
+                else {
+		    // not a string or CDATA
+
+		    // If we're not inside a whitespace run, then
+		    // print and indent before printing this content
+                    if (!(justOutput == String.class && wasFullyWhite)) {
+                        maybePrintln(out, indentLevel);
+                    }
+
+		    // Individual content printing cases
+		    if (content instanceof Comment) {
+			printComment((Comment) content, out);
+			justOutput = Comment.class;
+		    }
+		    else if (content instanceof Element) {
+			printElement((Element) content, out,
+				     indentLevel, namespaces);
+			justOutput = Element.class;
+		    }
+		    else if (content instanceof EntityRef) {
+			printEntityRef((EntityRef) content, out);
+			justOutput = EntityRef.class;
+		    }
+		    else if (content instanceof ProcessingInstruction) {
+			printProcessingInstruction((ProcessingInstruction) content,
+						   out);
+			justOutput = ProcessingInstruction.class;
+		    }
+
+		    // Unsupported types are *not* printed, nor should they exist
+		    // Should we throw an exception or assert?
+		} // else string/cdata
             }
-            maybePrintln(out);
-            indent(out, indentLevel - 1);
+	    // wrap up by "popping" the ident level and printing a newline
+	    maybePrintln(out, indentLevel - 1);
         }
     }  // printElementContent
 
+    /**
+     * @return true if string is all whitespace (space, tab, cr, lf only)
+     **/       
+    protected boolean isWhitespace(String s) {
+	char[] c = s.toCharArray();
+	for (int i=0; i<c.length; ++i) {
+ 	    if (" \t\n\r".indexOf(c[i]) == -1) {
+		return false;
+	    }
+	}
+	return true;
+    }
+
+    /**
+     * @return the text of the element, taking "textNormalize" into account
+     **/
+    protected String getElementText(Element element)
+    {
+	return textNormalize ? element.getTextNormalize() : element.getText();
+    }
 
     /**
+     * An empty element is defined as one with absolutely no content,
+     * or with only empty/null string content
+     *
+     * @return true if this is an empty element
+     **/
+    protected boolean isEmpty(Element element)
+    {
+        // Calculate if the content is empty
+        // We can handle "" string same as empty (CDATA has no effect here)	
+	List mixedContent = element.getMixedContent();
+	// an element with absolutely no content is empty
+	if (mixedContent.size() == 0)
+	    return true;
+	// an element with only string content, whose string content
+	// adds up to nothing, is empty
+	if (isStringOnly(mixedContent)) {
+            String elementText = getElementText(element);
+            if (elementText == null || elementText.equals("")) {
+		return true;
+            }
+	}
+	// anything else is not empty
+	return false;
+    }
+
+    /**
+     * @return true if the element's mixedContent list consists only of String or CDATA nodes (or is empty)
+     **/
+    protected boolean isStringOnly(List mixedContent)
+    {
+	// Calculate if the contents are String/CDATA only
+        Iterator itr = mixedContent.iterator();
+        while (itr.hasNext()) {
+            Object o = itr.next();
+            if (!(o instanceof String) && !(o instanceof CDATA)) {
+                return false;
+            }
+        }
+	return true;
+    }
+
+    /**
      * Print a string.  Escapes the element entities, trims interior
      * whitespace if necessary.
      **/     
@@ -1291,6 +1328,49 @@
     }
 
     /**
+     * Print and add an element's main namespace.  Called during printElement.
+     **/
+    protected void printElementNamespace(Writer out, Element element, NamespaceStack namespaces) throws IOException
+    {
+        // Add namespace decl only if it's not the XML namespace and it's 
+        // not the NO_NAMESPACE with the prefix "" not yet mapped
+        // (we do output xmlns="" if the "" prefix was already used and we
+        // need to reclaim it for the NO_NAMESPACE)
+        Namespace ns = element.getNamespace();
+        if (ns != Namespace.XML_NAMESPACE &&
+            !(ns == Namespace.NO_NAMESPACE && namespaces.getURI("") == null)) {
+            String prefix = ns.getPrefix();        
+            String uri = namespaces.getURI(prefix);
+            if (!ns.getURI().equals(uri)) { // output a new namespace decl
+                namespaces.push(ns);
+                printNamespace(ns, out);
+            }
+        }	
+    }
+
+    /**
+     * Print out additional namespace declarations for an Element.
+     * Called during printElement.
+     **/
+    protected void printAdditionalNamespaces(Writer out, Element element, NamespaceStack namespaces) throws IOException
+    {
+        // Print out additional namespace declarations
+        List additionalNamespaces = element.getAdditionalNamespaces();
+        if (additionalNamespaces != null) {
+            Iterator itr = additionalNamespaces.iterator();
+            while (itr.hasNext()) {
+                Namespace additional = (Namespace)itr.next();
+                String prefix = additional.getPrefix();        
+                String uri = namespaces.getURI(prefix);
+                if (!additional.getURI().equals(uri)) {
+                    namespaces.push(additional);
+                    printNamespace(additional, out);
+                }
+            }
+        }
+    }
+
+    /**
      * <p>
      * This will handle printing out an <code>{@link Attribute}</code> list.
      * </p>
@@ -1481,7 +1561,61 @@
         return (s.length() > 0 && s.charAt(s.length() - 1) <= ' ');
     }
 
+    public String getSettings() {
+	return
+	    "omitDeclaration = " + omitDeclaration + "\n" +
+	    "encoding = " + encoding + "\n" +
+	    "omitEncoding = " + omitEncoding + "\n" +
+	    "indent = '" + indent + "'" + "\n" +
+	    "expandEmptyElements = " + expandEmptyElements + "\n" +
+	    "newlines = " + newlines + "\n" +
+	    "lineSeparator = " + toBytes(lineSeparator) + "\n" +
+	    "textNormalize = " + textNormalize + "\n";
+    }
+
+    protected String toBytes(String x) {
+	StringBuffer buf = new StringBuffer();
+	for (int i=0; i<x.length(); ++i) {
+	    buf.append(toByte(x.charAt(i)));
+	}
+	return buf.toString();
+    }
+
+    protected String toByte(char ch) {
+	switch (ch) {
+	case '\r':
+	    return "\\r";
+	case '\n':
+	    return "\\n";
+	case '\t':
+	    return "\\t";
+	default:
+	    return ("[" + ((int)ch) + "]");
+	}
+    }
+
+    /**
+     * Factory for making new NamespaceStacks.  Relocated here to make
+     * it clearer.
+     **/
+    protected NamespaceStack createNamespaceStack() {
+	// actually returns a XMLOutputter.NamespaceStack (see below)
+	return new NamespaceStack();
+    }
+
     /**
+     * Our own null subclass of NamespaceStack.  This plays a little
+     * trick with Java access protection.  We want subclasses of
+     * XMLOutputter to be able to override protected methods that
+     * declare a NamespaceStack parameter, but we don't want to
+     * declare the parent NamespaceStack class as public.  
+     **/
+    protected class NamespaceStack
+	extends org.jdom.output.NamespaceStack
+    {
+    }
+
+    /**
      * <p> Ensure that text immediately preceded by or followed by an
      * element will be "padded" with a single space.  </p>
      * 
@@ -1510,7 +1644,7 @@
      * @deprecated Deprecated in beta7, use setOmitDeclaration() instead
      */
     public void setSuppressDeclaration(boolean suppressDeclaration) {
-        this.omitDeclaration = omitDeclaration;
+        this.omitDeclaration = suppressDeclaration;
     }
 
 }