[jdom-interest] malformed URL exception exception in saxbuilder.build due to unreachable URL

cliff palmer palmercliff at gmail.com
Thu Feb 9 15:26:22 PST 2012


code below:

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.sql.Clob;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Collections;
import java.util.Map;
import javax.sql.DataSource;
import oracle.sql.CLOB;
import org.apache.log4j.Appender;
import org.apache.log4j.EnhancedPatternLayout;
import org.apache.log4j.Logger;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Attribute;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;

public class ProfileXMLAttributes implements AppDriver {
	public String driverClassName;
	static Logger logLogger;
	Appender logAppender;
	EnhancedPatternLayout logLayout;
	DataSource dataSource;
	JdbcTemplate jdbcTemplate;
	Connection conn;
	Statement stmt;
	ResultSet rs;
	ResultSetMetaData rsmd;
	SQLClob sqlClob;
	SAXBuilder saxBuilder;
	Document xmlDoc;
	Format xmlFmt;
	XMLOutputter xmlOutputter;
	Element xmlElement;
	Element xPathElement;
	Element rootElement;
	String pathString;
	static SQLClob theSQLClob;
	Map<String, Integer> tagMap;
	String tagKey;
	Integer tagValue;
	int badXML = 0;
	int rowsRead = 0;
	boolean goodXML;
	String msgID;

	/* =========================================================
		init is called once by the driver program to set up the
		run environment for logging, JDBC, JDOM, etc
		========================================================= */

	@Override
	public void init() {
		logLogger = Logger.getLogger(ProfileXMLAttributes.class.getName());
		dataSource = (DataSource) main.context.getBean("datasource");
		jdbcTemplate = new JdbcTemplate(dataSource);
		try{
			conn = DriverManager.getConnection("jdbc:oracle:thin:@10.1.6.78:1521:kbfcubs",
"kbfcubs", "kbfcubs");
		} catch (SQLException e) {
			logLogger.debug("Exception initializing connection");
			e.printStackTrace();
		}
		stmt = null;
		try{
			stmt = conn.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE,
ResultSet.CONCUR_READ_ONLY);
			stmt.setFetchSize(10000);
		} catch (SQLException e) {
			logLogger.debug("Exception initializing statement");
			e.printStackTrace();
		}
		rs = null;
		try {
			String xmlQuery = "select xml_id, xml_contents from xml_table";
			rs = stmt.executeQuery(xmlQuery);
		} catch (SQLException e) {
			logLogger.debug("Exception executing query");
			e.printStackTrace();
		}
		saxBuilder = new SAXBuilder();
		xmlFmt =  Format.getPrettyFormat().setEncoding("UTF-8");
		xmlOutputter = new XMLOutputter(xmlFmt);
		structureString = new ArrayList<String>();
		tagMap = new HashMap<String, Integer>();
	}

	/* =========================================================
		process is called once by the driver program to do all
		the work.  For each row returned by the JDBC query it
		instantiates the xml document in JDOM and then calls
		doProcess passing the rootElement to step through the
		xml tags.
		========================================================= */

	@Override
	public void process() throws SQLException {
		while(rs.next()) {
			xmlDoc = null;
			rowsRead ++;
			msgID = rs.getString(1);
			try {
				xmlDoc = saxBuilder.build(rs.getString(2));
			} catch (JDOMException e) {
				e.printStackTrace();
			} catch (IOException e) {
				goodXML = false;
				e.printStackTrace();
			}
			try {
				rootElement = xmlDoc.getRootElement();
				doProcess(rootElement);
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}

	/* =========================================================
		doProcess is called once by process for each row in the
		JDBC result set.  It is also called recursively as the
		xml tags are discovered. Each xml tag is added to a
		hash map and counts of the occurances of each tag are
		accumulated.
		========================================================= */


	private void doProcess (Element currentElement) {
		tagKey = new String(currentElement.getName());
		tagValue = tagMap.get(tagKey);
		if(tagValue == null) {
			tagValue = 1;
		} else {
			tagValue += 1;
		}
		tagMap.put(tagKey, tagValue);
		Iterator<Element> itr = currentElement.getChildren().iterator();
		boolean hasChildren = false;
		while (itr.hasNext()) {
			hasChildren = true;
			Object childElement = itr.next();
		}
		if (hasChildren == true) {
			itr = currentElement.getChildren().iterator();
			hasChildren = false;
		}
		while (itr.hasNext()) {
			Object childElement = itr.next();
			doProcess((Element) childElement);
		}
	}
}


On 2/9/12, cliff palmer <palmercliff at gmail.com> wrote:
> Hi Rolf
> I will post the code later, (sorry late for a meeting) but to answer
> your questions:
> - this error occurs when there is an "xmlns" declaration.  Since this
> is the first instance of an "xmlns" declaration I've encountered with
> JDOM and all of the URLs in the "xmlns" declaration that I have found
> point to the same bad address, I don't know if the problem is related
> to lookup of the URL or just the presence of an "xmlns" declaration.
> - the problem is predictable and occurs for each xml document that
> uses this bad URL in an "xmlns" declaration.
> - I've used the code (I will post it, I promise) to parse over 3
> million xml documents, passing a string containing the xml document
> (not a URL).  The value I pass to saxbuilder.build is the returned
> string from the JDBC call ResultSet.getString using a column number
> parameter.  I haven't been altering or converting the string returned
> from JDBC.
>
> Thanks Rolf and I will post the code as soon as the suits are done with me.
>
> Cliff
>
> On Thu, Feb 9, 2012 at 5:40 PM, Rolf Lear <jdom at tuis.net> wrote:
>> Hi Cliff.
>>
>> I think there's been some good pointers already, but just to make things
>> crystal clear... can you perhaps post the relevant code snippet you are
>> using to parse the document, and perhaps the first few lines of the
>> actual
>> XML too.
>>
>> Also, does this problem happen with *all* xml documents (the first one),
>> or
>> with just some of them?
>>
>> My guess is that Oliver has the right idea with parsing the wrong
>> string....
>> remember that the SaxBuilder.build(String) method expects the String to be
>> a
>> URL, not the actual XML content..... YTour stack trace indicates you are
>> calling this method...
>>
>> See the code here:
>> https://github.com/hunterhacker/jdom/blob/jdom-1.x/core/src/java/org/jdom/input/SAXBuilder.java#L986
>>
>> Anyway, seeing your code would help....
>>
>> Rolf
>>
>>
>> On 09/02/2012 3:54 PM, cliff palmer wrote:
>>>
>>> I'm reading through several hundred thousand existing XML documents
>>> building counts of XML tags and have encountered a
>>> Java.net.MalformedURL Exception raised by saxBuilder.build because the
>>> xmlns points to a URL that can not be reached.
>>> I am using JDOM 1.1.2.
>>> Is there a call or parameter setting that will cause saxBuilder to
>>> ignore namespaces when parsing?
>>> Thanks!
>>> Cliff
>>> _______________________________________________
>>> To control your jdom-interest membership:
>>> http://www.jdom.org/mailman/options/jdom-interest/youraddr@yourhost.com
>>>
>>
>


More information about the jdom-interest mailing list