package test; import java.io.FileInputStream; import java.io.InputStream; import java.util.Iterator; import java.util.List; import org.apache.xerces.parsers.SAXParser; import org.jaxen.XPath; import org.jaxen.jdom.JDOMXPath; import org.jdom.Document; import org.jdom.Element; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; /** * * @author Per Norrman * */ public class Utf { private static String escape(char c) { StringBuffer b = new StringBuffer(); for (int i = 0; i < 4; i++) { b.append(Integer.toHexString(c & 0x000F).toUpperCase()); c >>>= 4; } b.append("u\\"); return b.reverse().toString(); } private static void decode(String s) { char[] ch = s.toCharArray(); for (int i = 0; i < ch.length; i++) { if(ch[i] < 128) { System.out.print(ch[i]); } else { System.out.print(escape(ch[i])); } } System.out.println(); } public static void main(String[] args) throws Exception { SAXBuilder builder = new SAXBuilder("org.apache.xerces.parsers.SAXParser"); InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream("test/in-utf-8.xml"); Document doc = builder.build(in); XPath path = new JDOMXPath("//mattext"); List result = path.selectNodes(doc); System.out.println("size="+result.size()); for (Iterator iter = result.iterator(); iter.hasNext();) { Element element = (Element) iter.next(); System.out.println("-->" + element.getText()); decode(element.getText()); } } }