[jdom-interest] Base URI patch
Elliotte Rusty Harold
elharo at metalab.unc.edu
Wed May 7 10:39:40 PDT 2003
As discussed about a year ago, I have attached a patch that adds simple
base URI support to the JDOM Document class. So far this doesn't seem to
break anything. Some of the code for this was borrowed from XOM. I grant
the JDOM project the non-exclusive right to use this code under the JDOM
license.
While I was at it, I also corrected a few minor errors (extra hyphens) I
noticed in the Verifier class.
Step 2 will be writing the patch for the Builder class.
Step 3 will be adding base URI support to Element. This is by far the
trickiest step because of the need to handle xml:base attributes
sensibly. However, I've worked this out in XOM already so I've got a
pretty good idea of what we need.
--
Elliotte
-------------- next part --------------
? build
Index: src/java/org/jdom/Document.java
===================================================================
RCS file: /home/cvspublic/jdom/src/java/org/jdom/Document.java,v
retrieving revision 1.66
diff -r1.66 Document.java
98,99c98,100
< * as the root element and the supplied
< * <code>{@link DocType}</code> declaration.
---
> * as the root element, the supplied
> * <code>{@link DocType}</code> declaration, and the specified
> * base URI.
102a104
> * @param baseURI the URI from which this doucment was loaded.
107c109
< public Document(Element rootElement, DocType docType) {
---
> public Document(Element rootElement, DocType docType, String baseURI) {
111a114,131
> if (baseURI != null)
> setBaseURI(baseURI);
> }
>
> /**
> * This will create a new <code>Document</code>,
> * with the supplied <code>{@link Element}</code>
> * as the root element and the supplied
> * <code>{@link DocType}</code> declaration.
> *
> * @param rootElement <code>Element</code> for document root.
> * @param docType <code>DocType</code> declaration.
> * @throws IllegalAddException if the given docType object
> * is already attached to a document or the given
> * rootElement already has a parent
> */
> public Document(Element rootElement, DocType docType) {
> this(rootElement, docType, null);
125c145
< this(rootElement, null);
---
> this(rootElement, null, null);
436a457,487
> private String baseURI = null;
>
> /**
> *
> * <p>
> * Sets the effective URI from which this document was loaded,
> * and against which relative URLs in this document will be resolved.
> * </p>
> *
> * @param URI the base URI of this document
> *
> * @throws MalformedURIException if <code>URI</code> is
> * not a legal IRI
> */
> public final void setBaseURI(String uri) {
> this.baseURI = uri;
> }
>
> /**
> * <p>
> * Returns the URI from which this document was loaded,
> * or null if this is not known.
> * </p>
> *
> * @return the base URI of this document
> */
> public final String getBaseURI() {
> return baseURI;
> }
>
>
Index: src/java/org/jdom/Verifier.java
===================================================================
RCS file: /home/cvspublic/jdom/src/java/org/jdom/Verifier.java,v
retrieving revision 1.45
diff -r1.45 Verifier.java
86c86
< * @return <code>String</code> - reason name is illegal, or
---
> * @return <code>String</code> reason name is illegal, or
110c110
< * @return <code>String</code> - reason name is illegal, or
---
> * @return <code>String</code> reason name is illegal, or
150c150
< * @return <code>String</code> - reason name is illegal, or
---
> * @return <code>String</code> reason name is illegal, or
178c178
< * @return <code>String</code> - reason data is illegal, or
---
> * @return <code>String</code> reason data is illegal, or
201c201
< * @return <code>String</code> - reason name is illegal, or
---
> * @return <code>String</code> reason name is illegal, or
256c256
< * @return <code>String</code> - reason name is illegal, or
---
> * @return <code>String</code> reason name is illegal, or
288c288
< * @return <code>String</code> - reason for collision, or
---
> * @return <code>String</code> reason for collision, or
312c312
< * @return <code>String</code> - reason for collision, or
---
> * @return <code>String</code> reason for collision, or
332c332
< * @return <code>String</code> - reason for collision, or
---
> * @return <code>String</code> reason for collision, or
363c363
< * @return <code>String</code> - reason for collision, or
---
> * @return <code>String</code> reason for collision, or
382c382
< * @return <code>String</code> - reason for collision, or
---
> * @return <code>String</code> reason for collision, or
417c417
< * @return <code>String</code> - reason target is illegal, or
---
> * @return <code>String</code> reason target is illegal, or
453c453
< * @return <code>String</code> - reason data is illegal, or
---
> * @return <code>String</code> reason data is illegal, or
475c475
< * @return <code>String</code> - reason data is illegal, or
---
> * @return <code>String</code> reason data is illegal, or
525c525
< * @return <code>String</code> - reason public ID is illegal, or
---
> * @return <code>String</code> reason public ID is illegal, or
551c551
< * @return <code>String</code> - reason system literal is illegal, or
---
> * @return <code>String</code> reason system literal is illegal, or
577c577
< * @return <code>String</code> - reason the name is illegal, or
---
> * @return <code>String</code> reason the name is illegal, or
605a606,648
> /**
> * <p>
> * Checks a string to see if it is a legal RFC 2396 URI.
> * Both absolute and relative URIs are supported.
> * </p>
> *
> * @param uri <code>String</code> to check.
> * @return <code>String</code> reason the URI is illegal, or
> * <code>null</code> if OK.
> */
> public static String checkURI(String uri) {
> // URIs can be null or empty
> if ((uri == null) || (uri.equals(""))) {
> return null;
> }
>
> for (int i = 0; i < uri.length(); i++) {
> char test = uri.charAt(i);
> if (!isURICharacter(test)) {
> String msgNumber = "0x" + Integer.toHexString(test);
> if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
> return "URIs cannot contain " + msgNumber;
> } // end if
> if (test == '%') { // must be followed by two hexadecimal digits
> try {
> char firstDigit = uri.charAt(i+1);
> char secondDigit = uri.charAt(i+2);
> if (!isHexDigit(firstDigit) || !isHexDigit(secondDigit)) {
> return "Percent signs in URIs must be followed by "
> + "exactly two hexadecimal digits.";
> }
>
> }
> catch (StringIndexOutOfBoundsException e) {
> return "Percent signs in URIs must be followed by "
> + "exactly two hexadecimal digits.";
> }
> }
> } // end for
>
> // If we got here, everything is OK
> return null;
> }
607a651,711
> * <p>
> * This is a utility function for determining whether a specified
> * Unicode character is a hexadecimal digit as defined in RFC 2396;
> * that is, one of the ASCII characters 0-9, a-f, or A-F
> * </p>
> *
> * @param c to check for hex digit.
> * @return true if it's allowed, false otherwise.
> */
> public static boolean isHexDigit(char c) {
>
> // I suspect most characters passed to this method will be
> // correct hexadecimal digits, so I test for the true cases
> // first. If this proves to be a performance bottleneck
> // a switch statement or lookup table
> // might optimize this.
> if (c >= '0' && c <= '9') return true;
> if (c >= 'A' && c <= 'F') return true;
> if (c >= 'a' && c <= 'f') return true;
>
> return false;
> }
>
> /**
> * <p>
> * This is a utility function for determining whether
> * a specified Unicode character is legal in URI references
> * as determined by RFC 2396.
> * </p>
> *
> * @param c <code>char</code> to check for URI reference compliance.
> * @return true if it's allowed, false otherwise.
> */
> private static boolean isURICharacter(char c) {
> if (c >= 'a' && c <= 'z') return true;
> if (c >= 'A' && c <= 'Z') return true;
> if (c >= '0' && c <= '9') return true;
> if (c == '/') return true;
> if (c == '-') return true;
> if (c == '.') return true;
> if (c == '?') return true;
> if (c == ':') return true;
> if (c == '@') return true;
> if (c == '&') return true;
> if (c == '=') return true;
> if (c == '+') return true;
> if (c == '$') return true;
> if (c == ',') return true;
> if (c == '%') return true;
>
> if (c == '_') return true;
> if (c == '!') return true;
> if (c == '~') return true;
> if (c == '*') return true;
> if (c == '\'') return true;
> if (c == '(') return true;
> if (c == ')') return true;
> return false;
> }
>
> /**
613c717
< * @return <code>boolean</code> - true if it's a character,
---
> * @return <code>boolean</code> true if it's a character,
636c740
< * @return <code>boolean</code> - true if it's a name character,
---
> * @return <code>boolean</code> true if it's a name character,
654c758
< * @return <code>boolean</code> - true if it's a name start character,
---
> * @return <code>boolean</code> true if it's a name start character,
669c773
< * @return <code>boolean</code> - true if it's letter or digit,
---
> * @return <code>boolean</code> true if it's letter or digit,
683c787
< * @return <code>String</code> - true if it's a letter, false otherwise.
---
> * @return <code>String</code> true if it's a letter, false otherwise.
907c1011
< * @return <code>boolean</code> - true if it's a combining character,
---
> * @return <code>boolean</code> true if it's a combining character,
1042c1146
< * @return <code>String</code> - true if it's an extender, false otherwise.
---
> * @return <code>String</code> true if it's an extender, false otherwise.
1072c1176
< * @return <code>boolean</code> - true if it's a digit, false otherwise.
---
> * @return <code>boolean</code> true if it's a digit, false otherwise.
1097c1201,1202
< }
---
> }
>
More information about the jdom-interest
mailing list