[jdom-interest] Outputting escaped entities in element text
Bradley S. Huffman
hip at a.cs.okstate.edu
Wed Oct 2 19:23:21 PDT 2002
Here's a patch to XMLOutputter that does what you want.
It leaves any substring matching "&#[0-9]+;" or "&#x[0-9a-fA-F]+;" in a
Attribute value or Text untouched. This may or may not be what most people
want, but now it's in the archive for those who wish to find and apply it :)
Brad
*** XMLOutputter.java Wed Oct 2 20:44:48 2002
--- XMLOutputter.old Wed Oct 2 20:36:40 2002
***************
*** 1660,1666 ****
entity = """;
break;
case '&' :
! entity = "&";
break;
default :
entity = null;
--- 1660,1671 ----
entity = """;
break;
case '&' :
! if (escapeAmpersand(str, i)) {
! entity = "&";
! }
! else {
! entity = null;
! }
break;
default :
entity = null;
***************
*** 1719,1725 ****
entity = ">";
break;
case '&' :
! entity = "&";
break;
default :
entity = null;
--- 1724,1735 ----
entity = ">";
break;
case '&' :
! if (escapeAmpersand(str, i)) {
! entity = "&";
! }
! else {
! entity = null;
! }
break;
default :
entity = null;
***************
*** 1752,1757 ****
--- 1762,1814 ----
return (buffer == null) ? str : buffer.toString();
}
+ // Return true if need to escape a leading & in the substring
+ private boolean escapeAmpersand(String str, int start) {
+ if ((str.length() - start) < 4) {
+ return true;
+ }
+
+ int index = start;
+
+ if (str.charAt(start) != '&') {
+ return true;
+ }
+ index++;
+
+ if (str.charAt(start + 1) != '#') {
+ return true;
+ }
+ index++;
+
+ boolean isHex = false;
+ if (str.charAt(index) == 'x') { //XXX What about 'X'?
+ isHex = true;
+ index++;
+ }
+
+ for (; index < str.length(); index++) {
+ switch(str.charAt(index)) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ break;
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ if (!isHex) {
+ return true;
+ }
+ break;
+
+ case ';':
+ return false;
+
+ default:
+ return true;
+ }
+ }
+ return true;
+ }
+
/**
* Parse command-line arguments of the form <code>-omitEncoding
* -indentSize 3 …</code>.
More information about the jdom-interest
mailing list