[jdom-interest] Verifier Namespace patches
Elliotte Rusty Harold
elharo at metalab.unc.edu
Thu Jun 27 07:49:07 PDT 2002
I've attached two patches for the Verifier and TestVerifier classes
related to testing namespace URIs. I think these do all the checks that
are possible using the general URI specification. Specifically:
1. Each URI reference contains only characters allowed by RFC 2396.
2. No more than one # is included
3. Each percent sign is followed by two hexadecimal digits
Since namespace URIs may be relative, and may be URI references, there's
a not a lot more we can do.
For convenience, I've added two new public methods to the Verifier
class, which are used when testing the content of each URI:
public static boolean isURICharacter(char c)
public static boolean isHexDigit(char c)
However, if you'd prefer not to clutter the API, these could easily be
made private.
--
Elliotte
-------------- next part --------------
Index: src/java/org/jdom/Verifier.java
===================================================================
RCS file: /home/cvspublic/jdom/src/java/org/jdom/Verifier.java,v
retrieving revision 1.40
diff -d -u -r1.40 Verifier.java
--- src/java/org/jdom/Verifier.java 2002/06/26 01:44:26 1.40
+++ src/java/org/jdom/Verifier.java 2002/06/27 22:49:19
@@ -250,31 +250,52 @@
/**
* This will check the supplied name to see if it is legal for use as
- * a JDOM <code>{@link Namespace}</code> URI.
+ * a JDOM <code>{@link Namespace}</code> URI. (Technically it checks
+ * to see if this is a legal RFC 2396 URI reference, which is not quite the
+ * same thing as a URI.)
*
* @param uri <code>String</code> URI to check.
* @return <code>String</code> - reason name is illegal, or
* <code>null</code> if name is OK.
*/
public static String checkNamespaceURI(String uri) {
- // Manually do rules, since URIs can be null or empty
+ // URIs can be null or empty to indicate no namespace
if ((uri == null) || (uri.equals(""))) {
return null;
}
- // Cannot start with a number
- char first = uri.charAt(0);
- if (Character.isDigit(first)) {
- return "Namespace URIs cannot begin with a number";
- }
- // Cannot start with a $
- if (first == '$') {
- return "Namespace URIs cannot begin with a dollar sign ($)";
- }
- // Cannot start with a -
- if (first == '-') {
- return "Namespace URIs cannot begin with a hyphen (-)";
- }
+ // We need to make sure there are no more than one #
+ // in the proposed URI reference
+ int numberOfHashes = 0;
+ for (int i = 0; i < uri.length(); i++) {
+ char test = uri.charAt(i);
+ if (!isURICharacter(test)) {
+ if (test == '#') {
+ numberOfHashes++;
+ if (numberOfHashes > 1) {
+ return "URI references can contain at most one # character.";
+ }
+ }
+ else {
+ String msgNumber = "0x" + Integer.toHexString(test);
+ if (test <= 0x09) msgNumber = "0x0" + Integer.toHexString(test);
+ return "Namespace URIs cannot contain " + msgNumber;
+ }
+ } // end if
+ if (test == '%') { // must be followed by two hexadecimal digits
+ try {
+ char firstDigit = uri.charAt(i+1);
+ char secondDigit = uri.charAt(i+2);
+ if (!isHexDigit(firstDigit) || !isHexDigit(secondDigit)) {
+ return "Percent signs in URIs must be followed by exactly two hexadecimal digits.";
+ }
+
+ }
+ catch (StringIndexOutOfBoundsException e) {
+ return "Percent signs in URIs must be followed by exactly two hexadecimal digits.";
+ }
+ }
+ } // end for
// If we got here, everything is OK
return null;
@@ -1075,5 +1096,52 @@
if (c < 0x0F20) return false; if (c <= 0x0F29) return true;
return false;
- }
+ }
+
+ /**
+ * This is a utility function for determining whether a specified
+ * Unicode character is allowed in
+ * URIs as determined by RFC 2396. Note that the # character is allowed
+ * in URI references but <b>not</b> URIs. Thus this method returns false
+ * for that character.
+ *
+ * @param c <code>char</code> to check for URI compliance.
+ * @return <code>boolean</code> - true if it's allowed, false otherwise.
+ */
+ public static boolean isURICharacter(char c) {
+
+ if (c <= 0x0020) return false; if (c <= 0x0021) return true;
+ if (c <= 0x0023) return false; if (c <= 0x003B) return true;
+ if (c <= 0x003C) return false; if (c <= 0x003D) return true;
+
+ if (c <= 0x003E) return false; if (c <= 0x005A) return true;
+ if (c <= 0x005E) return false; if (c <= 0x005F) return true;
+ if (c <= 0x0060) return false; if (c <= 0x007A) return true;
+
+ if (c <= 0x007D) return false; if (c <= 0x007E) return true;
+
+ return false;
+ }
+
+ /**
+ * This is a utility function for determining whether a specified
+ * Unicode character is a hexadecimal digit as defined in RFC 2396;
+ * that is, one of the ASCII characters 0-9, a-f, or A-F
+ *
+ * @param c <code>char</code> to check for hex digit.
+ * @return <code>boolean</code> - true if it's allowed, false otherwise.
+ */
+ public static boolean isHexDigit(char c) {
+
+ // I suspect most characters passed to this method will be
+ // correct hexadecimal digits, so I test for the true cases
+ // first. If this proves to be a performance bottleneck a switch statement
+ // might optimize this.
+ if (c >= '0' && c <= '9') return true;
+ if (c >= 'A' && c <= 'F') return true;
+ if (c >= 'a' && c <= 'f') return true;
+
+ return false;
+ }
+
}
-------------- next part --------------
Index: src/java/org/jdom/test/cases/TestVerifier.java
===================================================================
RCS file: /home/cvspublic/jdom-test/src/java/org/jdom/test/cases/TestVerifier.java,v
retrieving revision 1.7
diff -d -u -r1.7 TestVerifier.java
--- src/java/org/jdom/test/cases/TestVerifier.java 2002/06/26 01:46:54 1.7
+++ src/java/org/jdom/test/cases/TestVerifier.java 2002/06/27 22:50:27
@@ -701,44 +701,65 @@
}
/**
- * Tests that checkNamespaceURI validates xml uri's.
- * A valid URI is alphanumeric characters and the reserved characters:
+ * Tests that checkNamespaceURI validates XML URI references.
+ * A valid URI is composed of alphanumeric ASCII characters and the reserved characters:
* ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
- *
- * The URI cannot begin with a digit, "-" or "$". It must have at least
- * one ":" separating the scheme from the scheme specific part
- *
- * XXX:TODO make this match the eventual specs for the Verifier class which is incomplete
+ * and the mark characters
+ * "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
*/
public void test_TCM__String_checkNamespaceURI_String() {
- //invalid start characters
- assertTrue("validated invalid URI with startin -" , !(Verifier.checkNamespaceURI('-' + "test")== null));
- assertTrue("validated invalid URI with starting digit" , !(Verifier.checkNamespaceURI("9") == null));
- assertTrue("validated invalid URI with starting $" , !(Verifier.checkNamespaceURI("$") == null));
- //valid tests
- assertTrue("invalidated valid null" , Verifier.checkNamespaceURI(null) == null);
- assertTrue("invalidated valid URI with :" , Verifier.checkNamespaceURI("test" + ':' + "local") == null);
- assertTrue("invalidated valid URI with _" , Verifier.checkNamespaceURI("test" + '_') == null);
- assertTrue("invalidated valid URI with ." , Verifier.checkNamespaceURI("test" + '.' + "URI") == null);
- assertTrue("invalidated valid URI with digit" , Verifier.checkNamespaceURI("test9") == null);
- assertTrue("invalidated valid URI with 0x00B7" , Verifier.checkNamespaceURI("test" + (char)0x00B7) == null);
- assertTrue("invalidated valid URI with 0x4E01" , Verifier.checkNamespaceURI("test" + (char)0x4E01) == null);
- assertTrue("invalidated valid URI with 0x0301" , Verifier.checkNamespaceURI("test" + (char)0x0301) == null);
- //check out of range values
-
- /** skip these tests until the time the checks are implemented
- assertTrue("validated invalid URI with xmlns" , !(Verifier.checkNamespaceURI("xmlns")== null));
- assertTrue("validated invalid URI with startin :" , !(Verifier.checkNamespaceURI(':' + "test")== null));
- assertTrue("validated invalid URI with starting ." , !(Verifier.checkNamespaceURI(".") == null));
-
+ //invalid characters
+ assertTrue("validated invalid URI with non-ASCII character" , Verifier.checkNamespaceURI("test" + (char)0x4E01) != null);
assertTrue("validated invalid URI with null" ,! (Verifier.checkNamespaceURI("test" + (char)0x0) == null));
assertTrue("validated invalid URI with null" ,! (Verifier.checkNamespaceURI("test" + (char)0x0 + "ing") == null));
assertTrue("validated invalid URI with null" ,! (Verifier.checkNamespaceURI((char)0x0 + "test") == null));
assertTrue("validated invalid URI with 0x01" ,! (Verifier.checkNamespaceURI((char)0x01 + "test") == null));
assertTrue("validated invalid URI with 0xD800" ,! (Verifier.checkNamespaceURI("test" + (char)0xD800) == null));
assertTrue("validated invalid URI with 0xD800" ,! (Verifier.checkNamespaceURI("test" + (char)0xD800 + "ing") == null));
- assertTrue("validated invalid URI with 0xD800" ,! (Verifier.checkNamespaceURI((char)0xD800 + "test") == null));
- */
+ assertTrue("validated invalid URI with 0xD800" ,! (Verifier.checkNamespaceURI((char)0xD800 + "test") == null));
+ assertTrue("validated invalid URI with 0x00B7" ,! (Verifier.checkNamespaceURI("test" + (char)0x00B7) == null));
+ assertTrue("validated invalid URI with 0x0301" ,! (Verifier.checkNamespaceURI("test" + (char)0x0301) == null));
+
+ // unwise = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
+ assertTrue("validated invalid URI with {" , Verifier.checkNamespaceURI("test{") != null);
+ assertTrue("validated invalid URI with }" , Verifier.checkNamespaceURI("test}") != null);
+ assertTrue("validated invalid URI with |" , Verifier.checkNamespaceURI("test|") != null);
+ assertTrue("validated invalid URI with \\" , Verifier.checkNamespaceURI("test\\") != null);
+ assertTrue("validated invalid URI with ^" , Verifier.checkNamespaceURI("test^") != null);
+ assertTrue("validated invalid URI with [" , Verifier.checkNamespaceURI("test[") != null);
+ assertTrue("validated invalid URI with ]" , Verifier.checkNamespaceURI("test]") != null);
+ assertTrue("validated invalid URI with `" , Verifier.checkNamespaceURI("test`") != null);
+ // delims = "<" | ">" | "#" | "%" | <">
+ assertTrue("validated invalid URI with <" , Verifier.checkNamespaceURI("test<") != null);
+ assertTrue("validated invalid URI with >" , Verifier.checkNamespaceURI("test>") != null);
+ assertTrue("validated invalid URI with \"" , Verifier.checkNamespaceURI("test\"") != null);
+ // # is a special case. It is illegal in a URI, but it is legal in a URI reference,
+ // and namespace URIs are actually URI references. However, a URI reference may contain
+ // at most one of these characters
+ assertTrue("validated invalid URI with multiple #" , Verifier.checkNamespaceURI("test##") != null);
+ assertTrue("validated invalid URI with multiple #" , Verifier.checkNamespaceURI("#test#") != null);
+ assertTrue("validated invalid URI with multiple #" , Verifier.checkNamespaceURI("test#test#test") != null);
+
+
+ //valid tests
+ assertTrue("invalidated valid URI with starting -" , (Verifier.checkNamespaceURI('-' + "test")== null));
+ assertTrue("invalidated valid URI with starting digit" , (Verifier.checkNamespaceURI("9") == null));
+ assertTrue("invalidated valid URI with starting $" , (Verifier.checkNamespaceURI("$") == null));
+ assertTrue("invalidated valid null" , Verifier.checkNamespaceURI(null) == null);
+ assertTrue("invalidated valid URI with :" , Verifier.checkNamespaceURI("test" + ':' + "local") == null);
+ assertTrue("invalidated valid URI with _" , Verifier.checkNamespaceURI("test" + '_') == null);
+ assertTrue("invalidated valid URI with ." , Verifier.checkNamespaceURI("test" + '.' + "URI") == null);
+ assertTrue("invalidated valid URI with digit" , Verifier.checkNamespaceURI("test9") == null);
+ // # is a special case. It is illegal in a URI, but it is legal in a URI reference,
+ // and namespace URIs are actually URI references
+ assertTrue("invalidated valid URI with #" , Verifier.checkNamespaceURI("test#") == null);
+ assertTrue("invalidated valid URI with #" , Verifier.checkNamespaceURI("#test") == null);
+ assertTrue("invalidated valid URI with #" , Verifier.checkNamespaceURI("test#test") == null);
+
+ // Check percent escaping
+ assertTrue("invalidated valid URI with %AD" , Verifier.checkNamespaceURI("test%AD") == null);
+ assertTrue("validated invalid URI with % AD" , Verifier.checkNamespaceURI("test% AD") != null);
+ assertTrue("validated invalid URI with %0" , Verifier.checkNamespaceURI("test%0") != null);
}
More information about the jdom-interest
mailing list