[jdom-interest] getTextNormalize bug?
Tim Daly
tdaly at ans.net
Mon Nov 26 20:07:18 PST 2001
I wrote a simple program to read an XML file and write it to stdout.
The text of each Element is printed with the call:
Element element;
...
System.out.println(element.getTextNormalize());
During my testing I ran it on the build.xml file in jdom, thus:
java XMLCopy build.xml >foo.xml
Then I renamed foo.xml to build.xml and did:
./build.sh
The Ant program died with the message:
Character conversion error: "Unconvertible UTF-8 character beginning
with 0xa9" (line number may be too low).
The original line in the original build.xml file contains:
Copyright ©
(aside: we knew the problem was the copyright :-))
which got converted by getTextNormalize() into
Copyright (someStrangeCharacter)
Is this a bug in getTextNormalize?
My source code follows.
Tim Daly
daly at idsi.net
=====================================================================
package samples;
import org.jdom.*;
import org.jdom.input.SAXBuilder;
import org.jdom.input.DOMBuilder;
import org.jdom.output.*;
import java.util.*;
public class Count
{
static Stack stack = new Stack();
static int indent = 0;
public static void doIndent(int count)
{ if (count < 0)
indent=indent+count;
for(int i=0; i<indent; i++)
System.out.print(" ");
if (count > 0)
indent=indent+count;
}
public static void main(String[] args)
{ if (args.length == 0)
{ System.out.println("Usage: java Count URL1 URL2...");
return;
}
SAXBuilder saxBuilder = new SAXBuilder();
DOMBuilder domBuilder = new DOMBuilder();
DOMOutputter domOutputter = new DOMOutputter();
Document jdomDocument;
org.w3c.dom.Element domElement;
org.jdom.Element jdomElement;
org.w3c.dom.Document domDocument;
try
{ jdomDocument = saxBuilder.build(args[0]);
domElement = domOutputter.output(jdomDocument.getRootElement());
jdomElement = domBuilder.build(domElement);
count(jdomElement);
}
catch (JDOMException e)
{ System.out.println(args[0] + " is not a well formed XML document.");
System.out.println(e.getMessage());
}
}
public static void printAttributes(List attributes)
{ Iterator iterator = attributes.iterator();
while (iterator.hasNext())
{ Object o = iterator.next();
System.out.print(" "+((Attribute)o).getName()+" = \""+
((Attribute)o).getValue()+"\"");
}
}
public static void count(Element element)
{ doIndent(1);
System.out.print("<"+element.getName());
stack.push(element.getName());
printAttributes(element.getAttributes());
System.out.println(">");
String text = element.getTextNormalize();
if (! text.equals(""))
{ doIndent(0);
System.out.println(" "+text);
}
List children = element.getContent();
Iterator iterator = children.iterator();
while (iterator.hasNext())
{ Object o = iterator.next();
if (o instanceof Element)
count((Element) o);
}
doIndent(-1);
System.out.println("</"+(String)stack.pop()+">");
}
}
More information about the jdom-interest
mailing list