001 /* 002 $Id: XmlParser.java 4132 2006-10-18 08:24:58Z paulk $ 003 004 Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved. 005 006 Redistribution and use of this software and associated documentation 007 ("Software"), with or without modification, are permitted provided 008 that the following conditions are met: 009 010 1. Redistributions of source code must retain copyright 011 statements and notices. Redistributions must also contain a 012 copy of this document. 013 014 2. Redistributions in binary form must reproduce the 015 above copyright notice, this list of conditions and the 016 following disclaimer in the documentation and/or other 017 materials provided with the distribution. 018 019 3. The name "groovy" must not be used to endorse or promote 020 products derived from this Software without prior written 021 permission of The Codehaus. For written permission, 022 please contact info@codehaus.org. 023 024 4. Products derived from this Software may not be called "groovy" 025 nor may "groovy" appear in their names without prior written 026 permission of The Codehaus. "groovy" is a registered 027 trademark of The Codehaus. 028 029 5. Due credit should be given to The Codehaus - 030 http://groovy.codehaus.org/ 031 032 THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS 033 ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT 034 NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 035 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 036 THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 037 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 038 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 039 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 040 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 041 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 042 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 043 OF THE POSSIBILITY OF SUCH DAMAGE. 044 045 */ 046 package groovy.util; 047 048 import groovy.xml.QName; 049 import groovy.xml.FactorySupport; 050 051 import java.io.File; 052 import java.io.FileInputStream; 053 import java.io.IOException; 054 import java.io.InputStream; 055 import java.io.Reader; 056 import java.io.StringReader; 057 import java.util.ArrayList; 058 import java.util.HashMap; 059 import java.util.List; 060 import java.util.Map; 061 062 import javax.xml.parsers.ParserConfigurationException; 063 import javax.xml.parsers.SAXParser; 064 import javax.xml.parsers.SAXParserFactory; 065 066 import org.xml.sax.*; 067 068 /** 069 * A helper class for parsing XML into a tree of Node instances for 070 * a simple way of processing XML. This parser does not preserve the 071 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc. 072 * This parser ignores comments and processing instructions and converts the 073 * XML into a Node for each element in the XML with attributes 074 * and child Nodes and Strings. This simple model is sufficient for 075 * most simple use cases of processing XML. 076 * 077 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a> 078 * @version $Revision: 4132 $ 079 */ 080 public class XmlParser implements ContentHandler { 081 082 private StringBuffer bodyText = new StringBuffer(); 083 private List stack = new ArrayList(); 084 private Locator locator; 085 private XMLReader reader; 086 private Node parent; 087 private boolean trimWhitespace = true; 088 089 public XmlParser() throws ParserConfigurationException, SAXException { 090 this(false, true); 091 } 092 093 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException { 094 SAXParserFactory factory = FactorySupport.createSaxParserFactory(); 095 factory.setNamespaceAware(namespaceAware); 096 factory.setValidating(validating); 097 reader = factory.newSAXParser().getXMLReader(); 098 } 099 100 public XmlParser(XMLReader reader) { 101 this.reader = reader; 102 } 103 104 public XmlParser(SAXParser parser) throws SAXException { 105 reader = parser.getXMLReader(); 106 } 107 108 109 /** 110 * Parses the content of the given file as XML turning it into a tree 111 * of Nodes 112 */ 113 public Node parse(File file) throws IOException, SAXException { 114 InputSource input = new InputSource(new FileInputStream(file)); 115 input.setSystemId("file://" + file.getAbsolutePath()); 116 getXMLReader().parse(input); 117 return parent; 118 119 } 120 121 /** 122 * Parse the content of the specified input source into a tree of Nodes. 123 */ 124 public Node parse(InputSource input) throws IOException, SAXException { 125 getXMLReader().parse(input); 126 return parent; 127 } 128 129 /** 130 * Parse the content of the specified input stream into a tree of Nodes. 131 * Note that using this method will not provide the parser with any URI 132 * for which to find DTDs etc 133 */ 134 public Node parse(InputStream input) throws IOException, SAXException { 135 InputSource is = new InputSource(input); 136 getXMLReader().parse(is); 137 return parent; 138 } 139 140 /** 141 * Parse the content of the specified reader into a tree of Nodes. 142 * Note that using this method will not provide the parser with any URI 143 * for which to find DTDs etc 144 */ 145 public Node parse(Reader in) throws IOException, SAXException { 146 InputSource is = new InputSource(in); 147 getXMLReader().parse(is); 148 return parent; 149 } 150 151 /** 152 * Parse the content of the specified URI into a tree of Nodes 153 */ 154 public Node parse(String uri) throws IOException, SAXException { 155 InputSource is = new InputSource(uri); 156 getXMLReader().parse(is); 157 return parent; 158 } 159 160 /** 161 * A helper method to parse the given text as XML 162 * 163 * @param text 164 */ 165 public Node parseText(String text) throws IOException, SAXException { 166 return parse(new StringReader(text)); 167 } 168 // Delegated XMLReader methods 169 //------------------------------------------------------------------------ 170 171 /* (non-Javadoc) 172 * @see org.xml.sax.XMLReader#getDTDHandler() 173 */ 174 public DTDHandler getDTDHandler() { 175 return this.reader.getDTDHandler(); 176 } 177 178 /* (non-Javadoc) 179 * @see org.xml.sax.XMLReader#getEntityResolver() 180 */ 181 public EntityResolver getEntityResolver() { 182 return this.reader.getEntityResolver(); 183 } 184 185 /* (non-Javadoc) 186 * @see org.xml.sax.XMLReader#getErrorHandler() 187 */ 188 public ErrorHandler getErrorHandler() { 189 return this.reader.getErrorHandler(); 190 } 191 192 /* (non-Javadoc) 193 * @see org.xml.sax.XMLReader#getFeature(java.lang.String) 194 */ 195 public boolean getFeature(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { 196 return this.reader.getFeature(uri); 197 } 198 199 /* (non-Javadoc) 200 * @see org.xml.sax.XMLReader#getProperty(java.lang.String) 201 */ 202 public Object getProperty(final String uri) throws SAXNotRecognizedException, SAXNotSupportedException { 203 return this.reader.getProperty(uri); 204 } 205 206 /* (non-Javadoc) 207 * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) 208 */ 209 public void setDTDHandler(final DTDHandler dtdHandler) { 210 this.reader.setDTDHandler(dtdHandler); 211 } 212 213 /* (non-Javadoc) 214 * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) 215 */ 216 public void setEntityResolver(final EntityResolver entityResolver) { 217 this.reader.setEntityResolver(entityResolver); 218 } 219 220 /* (non-Javadoc) 221 * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) 222 */ 223 public void setErrorHandler(final ErrorHandler errorHandler) { 224 this.reader.setErrorHandler(errorHandler); 225 } 226 227 /* (non-Javadoc) 228 * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) 229 */ 230 public void setFeature(final String uri, final boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { 231 this.reader.setFeature(uri, value); 232 } 233 234 /* (non-Javadoc) 235 * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object) 236 */ 237 public void setProperty(final String uri, final Object value) throws SAXNotRecognizedException, SAXNotSupportedException { 238 this.reader.setProperty(uri, value); 239 } 240 241 // ContentHandler interface 242 //------------------------------------------------------------------------- 243 public void startDocument() throws SAXException { 244 parent = null; 245 } 246 247 public void endDocument() throws SAXException { 248 stack.clear(); 249 } 250 251 public void startElement(String namespaceURI, String localName, String qName, Attributes list) 252 throws SAXException { 253 addTextToNode(); 254 255 Object name = getElementName(namespaceURI, localName, qName); 256 257 int size = list.getLength(); 258 Map attributes = new HashMap(size); 259 for (int i = 0; i < size; i++) { 260 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i)); 261 String value = list.getValue(i); 262 attributes.put(attributeName, value); 263 } 264 parent = new Node(parent, name, attributes, new ArrayList()); 265 stack.add(parent); 266 } 267 268 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 269 addTextToNode(); 270 271 if (!stack.isEmpty()) { 272 stack.remove(stack.size() - 1); 273 if (!stack.isEmpty()) { 274 parent = (Node) stack.get(stack.size() - 1); 275 } 276 } 277 } 278 279 public void characters(char buffer[], int start, int length) throws SAXException { 280 bodyText.append(buffer, start, length); 281 } 282 283 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException { 284 } 285 286 public void endPrefixMapping(String prefix) throws SAXException { 287 } 288 289 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException { 290 } 291 292 public void processingInstruction(String target, String data) throws SAXException { 293 } 294 295 public Locator getDocumentLocator() { 296 return locator; 297 } 298 299 public void setDocumentLocator(Locator locator) { 300 this.locator = locator; 301 } 302 303 public void skippedEntity(String name) throws SAXException { 304 } 305 306 // Implementation methods 307 //------------------------------------------------------------------------- 308 protected XMLReader getXMLReader() { 309 reader.setContentHandler(this); 310 return reader; 311 } 312 313 protected void addTextToNode() { 314 String text = bodyText.toString(); 315 if (trimWhitespace) { 316 text = text.trim(); 317 } 318 if (text.length() > 0) { 319 parent.children().add(text); 320 } 321 bodyText = new StringBuffer(); 322 } 323 324 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException { 325 String name = localName; 326 if ((name == null) || (name.length() < 1)) { 327 name = qName; 328 } 329 if (namespaceURI == null || namespaceURI.length() <= 0) { 330 return name; 331 } 332 else { 333 return new QName(namespaceURI, name, qName); 334 } 335 } 336 }