001 /* 002 $Id: XmlParser.java,v 1.4 2004/04/15 17:35:14 jstrachan Exp $ 003 004 Copyright 2003 (C) James Strachan and Bob Mcwhirter. All Rights Reserved. 005 006 Redistribution and use of this software and associated documentation 007 ("Software"), with or without modification, are permitted provided 008 that the following conditions are met: 009 010 1. Redistributions of source code must retain copyright 011 statements and notices. Redistributions must also contain a 012 copy of this document. 013 014 2. Redistributions in binary form must reproduce the 015 above copyright notice, this list of conditions and the 016 following disclaimer in the documentation and/or other 017 materials provided with the distribution. 018 019 3. The name "groovy" must not be used to endorse or promote 020 products derived from this Software without prior written 021 permission of The Codehaus. For written permission, 022 please contact info@codehaus.org. 023 024 4. Products derived from this Software may not be called "groovy" 025 nor may "groovy" appear in their names without prior written 026 permission of The Codehaus. "groovy" is a registered 027 trademark of The Codehaus. 028 029 5. Due credit should be given to The Codehaus - 030 http://groovy.codehaus.org/ 031 032 THIS SOFTWARE IS PROVIDED BY THE CODEHAUS AND CONTRIBUTORS 033 ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT 034 NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 035 FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 036 THE CODEHAUS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 037 INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 038 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 039 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 040 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 041 STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 042 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 043 OF THE POSSIBILITY OF SUCH DAMAGE. 044 045 */ 046 package groovy.util; 047 048 import groovy.xml.QName; 049 050 import java.io.File; 051 import java.io.FileInputStream; 052 import java.io.IOException; 053 import java.io.InputStream; 054 import java.io.Reader; 055 import java.io.StringReader; 056 import java.security.AccessController; 057 import java.security.PrivilegedActionException; 058 import java.security.PrivilegedExceptionAction; 059 import java.util.ArrayList; 060 import java.util.HashMap; 061 import java.util.List; 062 import java.util.Map; 063 064 import javax.xml.parsers.ParserConfigurationException; 065 import javax.xml.parsers.SAXParser; 066 import javax.xml.parsers.SAXParserFactory; 067 068 import org.xml.sax.Attributes; 069 import org.xml.sax.ContentHandler; 070 import org.xml.sax.InputSource; 071 import org.xml.sax.Locator; 072 import org.xml.sax.SAXException; 073 import org.xml.sax.XMLReader; 074 075 /** 076 * A helper class for parsing XML into a tree of Node instances for 077 * a simple way of processing XML. This parser does not preserve the 078 * XML InfoSet - if thats what you need try using W3C DOM, dom4j, JDOM, XOM etc. 079 * This parser ignores comments and processing instructions and converts the 080 * XML into a Node for each element in the XML with attributes 081 * and child Nodes and Strings. This simple model is sufficient for 082 * most simple use cases of processing XML. 083 * 084 * @author <a href="mailto:james@coredevelopers.net">James Strachan</a> 085 * @version $Revision: 1.4 $ 086 */ 087 public class XmlParser implements ContentHandler { 088 089 private StringBuffer bodyText = new StringBuffer(); 090 private List stack = new ArrayList(); 091 private Locator locator; 092 private XMLReader reader; 093 private Node parent; 094 private boolean trimWhitespace = true; 095 096 public XmlParser() throws ParserConfigurationException, SAXException { 097 this(false, true); 098 } 099 100 public XmlParser(boolean validating, boolean namespaceAware) throws ParserConfigurationException, SAXException { 101 SAXParserFactory factory = null; 102 try { 103 factory = (SAXParserFactory) AccessController.doPrivileged(new PrivilegedExceptionAction() { 104 public Object run() throws ParserConfigurationException { 105 return SAXParserFactory.newInstance(); 106 } 107 }); 108 } catch (PrivilegedActionException pae) { 109 Exception e = pae.getException(); 110 if (e instanceof ParserConfigurationException) { 111 throw (ParserConfigurationException) e; 112 } else { 113 throw new RuntimeException(e); 114 } 115 } 116 factory.setNamespaceAware(namespaceAware); 117 factory.setValidating(validating); 118 119 SAXParser parser = factory.newSAXParser(); 120 reader = parser.getXMLReader(); 121 } 122 123 public XmlParser(XMLReader reader) { 124 this.reader = reader; 125 } 126 127 public XmlParser(SAXParser parser) throws SAXException { 128 reader = parser.getXMLReader(); 129 } 130 131 132 /** 133 * Parses the content of the given file as XML turning it into a tree 134 * of Nodes 135 */ 136 public Node parse(File file) throws IOException, SAXException { 137 138 InputSource input = new InputSource(new FileInputStream(file)); 139 input.setSystemId("file://" + file.getAbsolutePath()); 140 getXMLReader().parse(input); 141 return parent; 142 143 } 144 145 /** 146 * Parse the content of the specified input source into a tree of Nodes. 147 */ 148 public Node parse(InputSource input) throws IOException, SAXException { 149 getXMLReader().parse(input); 150 return parent; 151 } 152 153 /** 154 * Parse the content of the specified input stream into a tree of Nodes. 155 * Note that using this method will not provide the parser with any URI 156 * for which to find DTDs etc 157 */ 158 public Node parse(InputStream input) throws IOException, SAXException { 159 InputSource is = new InputSource(input); 160 getXMLReader().parse(is); 161 return parent; 162 } 163 164 /** 165 * Parse the content of the specified reader into a tree of Nodes. 166 * Note that using this method will not provide the parser with any URI 167 * for which to find DTDs etc 168 */ 169 public Node parse(Reader in) throws IOException, SAXException { 170 InputSource is = new InputSource(in); 171 getXMLReader().parse(is); 172 return parent; 173 } 174 175 /** 176 * Parse the content of the specified URI into a tree of Nodes 177 */ 178 public Node parse(String uri) throws IOException, SAXException { 179 InputSource is = new InputSource(uri); 180 getXMLReader().parse(is); 181 return parent; 182 } 183 184 /** 185 * A helper method to parse the given text as XML 186 * 187 * @param text 188 * @return 189 */ 190 public Node parseText(String text) throws IOException, SAXException { 191 return parse(new StringReader(text)); 192 } 193 194 195 // ContentHandler interface 196 //------------------------------------------------------------------------- 197 public void startDocument() throws SAXException { 198 parent = null; 199 } 200 201 public void endDocument() throws SAXException { 202 stack.clear(); 203 } 204 205 public void startElement(String namespaceURI, String localName, String qName, Attributes list) 206 throws SAXException { 207 addTextToNode(); 208 209 Object name = getElementName(namespaceURI, localName, qName); 210 211 int size = list.getLength(); 212 Map attributes = new HashMap(size); 213 for (int i = 0; i < size; i++) { 214 Object attributeName = getElementName(list.getURI(i), list.getLocalName(i), list.getQName(i)); 215 String value = list.getValue(i); 216 attributes.put(attributeName, value); 217 } 218 parent = new Node(parent, name, attributes, new ArrayList()); 219 stack.add(parent); 220 } 221 222 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 223 addTextToNode(); 224 225 if (!stack.isEmpty()) { 226 stack.remove(stack.size() - 1); 227 if (!stack.isEmpty()) { 228 parent = (Node) stack.get(stack.size() - 1); 229 } 230 } 231 } 232 233 public void characters(char buffer[], int start, int length) throws SAXException { 234 bodyText.append(buffer, start, length); 235 } 236 237 public void startPrefixMapping(String prefix, String namespaceURI) throws SAXException { 238 } 239 240 public void endPrefixMapping(String prefix) throws SAXException { 241 } 242 243 public void ignorableWhitespace(char buffer[], int start, int len) throws SAXException { 244 } 245 246 public void processingInstruction(String target, String data) throws SAXException { 247 } 248 249 public Locator getDocumentLocator() { 250 return locator; 251 } 252 253 public void setDocumentLocator(Locator locator) { 254 this.locator = locator; 255 } 256 257 public void skippedEntity(String name) throws SAXException { 258 } 259 260 // Implementation methods 261 //------------------------------------------------------------------------- 262 protected XMLReader getXMLReader() { 263 reader.setContentHandler(this); 264 return reader; 265 } 266 267 protected void addTextToNode() { 268 String text = bodyText.toString(); 269 if (trimWhitespace) { 270 text = text.trim(); 271 } 272 if (text.length() > 0) { 273 parent.children().add(text); 274 } 275 bodyText = new StringBuffer(); 276 } 277 278 protected Object getElementName(String namespaceURI, String localName, String qName) throws SAXException { 279 String name = localName; 280 if ((name == null) || (name.length() < 1)) { 281 name = qName; 282 } 283 if (namespaceURI == null || namespaceURI.length() <= 0) { 284 return name; 285 } 286 else { 287 return new QName(namespaceURI, name, qName); 288 } 289 } 290 }