Index: XMLDocumentHandlerSAX.java =================================================================== RCS file: /home/cvspublic/jakarta-lucene-sandbox/contributions/XML-Indexing-Demo/src/java/org/apache/lucenesandbox/xmlindexingdemo/XMLDocumentHandlerSAX.java,v retrieving revision 1.2 diff -u -r1.2 XMLDocumentHandlerSAX.java --- XMLDocumentHandlerSAX.java 11 Jan 2004 14:12:19 -0000 1.2 +++ XMLDocumentHandlerSAX.java 5 Apr 2004 20:11:39 -0000 @@ -1,53 +1,89 @@ package org.apache.lucenesandbox.xmlindexingdemo; -import org.xml.sax.*; -import javax.xml.parsers.*; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; -public class XMLDocumentHandlerSAX - extends HandlerBase { - /** A buffer for each XML element */ - private StringBuffer elementBuffer = new StringBuffer(); - - private Document mDocument; - - // constructor - public XMLDocumentHandlerSAX(File xmlFile) - throws ParserConfigurationException, SAXException, IOException { - SAXParserFactory spf = SAXParserFactory.newInstance(); - - SAXParser parser = spf.newSAXParser(); - parser.parse(xmlFile, this); - } - - // call at document start - public void startDocument() { - mDocument = new Document(); - } - - // call at element start - public void startElement(String localName, AttributeList atts) - throws SAXException { - elementBuffer.setLength(0); - } - - // call when cdata found - public void characters(char[] text, int start, int length) { - elementBuffer.append(text, start, length); - } - - // call at element end - public void endElement(String localName) - throws SAXException { - mDocument.add(Field.Text(localName, elementBuffer.toString())); - } - - public Document getDocument() { - return mDocument; - } +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +public class XMLDocumentHandlerSAX extends DefaultHandler { + /** A buffer for each XML element */ + private StringBuffer elementBuffer = new StringBuffer(); + + private Document mDocument; + + // constructor + public XMLDocumentHandlerSAX(File xmlFile) + throws ParserConfigurationException, SAXException, IOException { + SAXParserFactory spf = SAXParserFactory.newInstance(); + + // use validating parser? + //spf.setValidating(false); + // make parser name space aware? + //spf.setNamespaceAware(true); + + SAXParser parser = spf.newSAXParser(); + //System.out.println("parser is validating: " + parser.isValidating()); + try { + parser.parse(xmlFile, this); + } catch (org.xml.sax.SAXParseException spe) { + System.out.println("SAXParser caught SAXParseException at line: " + + spe.getLineNumber() + " column " + + spe.getColumnNumber()); + } + } + + // call at document start + public void startDocument() throws SAXException { + mDocument = new Document(); + } + + // call at element start + public void startElement(String namespaceURI, String localName, + String qualifiedName, Attributes attrs) throws SAXException { + String eName = localName; + if ("".equals(eName)) { + eName = qualifiedName; // namespaceAware = false + } + // list the attribute(s) + if (attrs != null) { + for (int i = 0; i < attrs.getLength(); i++) { + String aName = attrs.getLocalName(i); // Attr name + if ("".equals(aName)) { aName = attrs.getQName(i); } + // perform application specific action on attribute(s) + // for now just dump out attribute name and value + System.out.println("attr " + aName+"="+attrs.getValue(i)); + } + } + elementBuffer.setLength(0); + } + + // call when cdata found + public void characters(char[] text, int start, int length) + throws SAXException { + elementBuffer.append(text, start, length); + } + + // call at element end + public void endElement(String namespaceURI, String simpleName, + String qualifiedName) throws SAXException { + + String eName = simpleName; + if ("".equals(eName)) { + eName = qualifiedName; // namespaceAware = false + } + + mDocument.add(Field.Text(localName, elementBuffer.toString())); + } + + public Document getDocument() { + return mDocument; + } }