[jdom-interest] Outputting escaped entities in element text

Bradley S. Huffman hip at a.cs.okstate.edu
Wed Oct 2 19:23:21 PDT 2002


Here's a patch to XMLOutputter that does what you want.

It leaves any substring matching "&#[0-9]+;" or "&#x[0-9a-fA-F]+;" in a
Attribute value or Text untouched. This may or may not be what most people
want, but now it's in the archive for those who wish to find and apply it :)

Brad

*** XMLOutputter.java	Wed Oct  2 20:44:48 2002
--- XMLOutputter.old	Wed Oct  2 20:36:40 2002
***************
*** 1660,1666 ****
                      entity = """;
                      break;
                  case '&' :
!                     entity = "&";
                      break;
                  default :
                      entity = null;
--- 1660,1671 ----
                      entity = """;
                      break;
                  case '&' :
!                     if (escapeAmpersand(str, i)) {
!                         entity = "&";
!                     }
!                     else {
!                         entity = null;
!                     }
                      break;
                  default :
                      entity = null;
***************
*** 1719,1725 ****
                      entity = ">";
                      break;
                  case '&' :
!                     entity = "&";
                      break;
                  default :
                      entity = null;
--- 1724,1735 ----
                      entity = ">";
                      break;
                  case '&' :
!                     if (escapeAmpersand(str, i)) {
!                         entity = "&";
!                     }
!                     else {
!                         entity = null;
!                     }
                      break;
                  default :
                      entity = null;
***************
*** 1752,1757 ****
--- 1762,1814 ----
          return (buffer == null) ? str : buffer.toString();
      }
  
+     // Return true if need to escape a leading & in the substring
+     private boolean escapeAmpersand(String str, int start) {
+         if ((str.length() - start) < 4) {
+             return true;
+         }
+ 
+         int index = start;
+ 
+         if (str.charAt(start) != '&') {
+             return true;
+         }
+         index++;
+ 
+         if (str.charAt(start + 1) != '#') {
+             return true;
+         }
+         index++;
+ 
+         boolean isHex = false;
+         if (str.charAt(index) == 'x') { //XXX What about 'X'?
+             isHex = true;
+             index++;
+         }
+ 
+         for (; index < str.length(); index++) {
+             switch(str.charAt(index)) {
+             case '0': case '1': case '2': case '3': case '4':
+             case '5': case '6': case '7': case '8': case '9':
+                 break;
+ 
+             case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+             case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+                 if (!isHex) {
+                     return true;
+                 }
+                 break;
+ 
+             case ';':
+                 return false;
+ 
+             default:
+                 return true;
+             }
+         }
+         return true;
+     }
+ 
      /**
       * Parse command-line arguments of the form <code>-omitEncoding
       * -indentSize 3 &#133;</code>.



More information about the jdom-interest mailing list