001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.util.xml; 016 017 import java.io.IOException; 018 import java.io.InputStream; 019 import java.net.URL; 020 import java.util.ArrayList; 021 import java.util.HashMap; 022 import java.util.List; 023 import java.util.Map; 024 025 import javax.xml.parsers.ParserConfigurationException; 026 import javax.xml.parsers.SAXParser; 027 import javax.xml.parsers.SAXParserFactory; 028 029 import org.apache.commons.logging.Log; 030 import org.apache.commons.logging.LogFactory; 031 import org.apache.hivemind.ApplicationRuntimeException; 032 import org.apache.hivemind.HiveMind; 033 import org.apache.hivemind.Location; 034 import org.apache.hivemind.Resource; 035 import org.apache.hivemind.impl.LocationImpl; 036 import org.apache.tapestry.Tapestry; 037 import org.apache.tapestry.util.RegexpMatcher; 038 import org.xml.sax.Attributes; 039 import org.xml.sax.InputSource; 040 import org.xml.sax.Locator; 041 import org.xml.sax.SAXException; 042 import org.xml.sax.SAXParseException; 043 import org.xml.sax.helpers.DefaultHandler; 044 045 /** 046 * A simplified version of {@link org.apache.commons.digester.Digester}. This version is without as 047 * many bells and whistles but has some key features needed when parsing a document (rather than a 048 * configuration file): <br> 049 * <ul> 050 * <li>Notifications for each bit of text 051 * </ul> 052 * <li>Tracking of exact location within the document.</li> 053 * </ul> 054 * <p> 055 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more 056 * coding), in that there's a one-to-one relationship between an element and a rule. 057 * <p> 058 * Based on SAX2. 059 * 060 * @author Howard Lewis Ship 061 * @since 3.0 062 */ 063 064 public class RuleDirectedParser extends DefaultHandler 065 { 066 private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class); 067 068 private Resource _documentLocation; 069 070 private List _ruleStack = new ArrayList(); 071 072 private List _objectStack = new ArrayList(); 073 074 private Object _documentObject; 075 076 private Locator _locator; 077 078 private int _line = -1; 079 080 private int _column = -1; 081 082 private Location _location; 083 084 private static SAXParserFactory _parserFactory; 085 086 private SAXParser _parser; 087 088 private RegexpMatcher _matcher; 089 090 private String _uri; 091 092 private String _localName; 093 094 private String _qName; 095 096 /** 097 * Map of {@link IRule}keyed on the local name of the element. 098 */ 099 private Map _ruleMap = new HashMap(); 100 101 /** 102 * Used to accumlate content provided by 103 * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}. 104 */ 105 106 private StringBuffer _contentBuffer = new StringBuffer(); 107 108 /** 109 * Map of paths to external entities (such as the DTD) keyed on public id. 110 */ 111 112 private Map _entities = new HashMap(); 113 114 public Object parse(Resource documentLocation) 115 { 116 if (LOG.isDebugEnabled()) 117 LOG.debug("Parsing: " + documentLocation); 118 119 try 120 { 121 _documentLocation = documentLocation; 122 123 URL url = documentLocation.getResourceURL(); 124 125 if (url == null) 126 throw new DocumentParseException(Tapestry.format( 127 "RuleDrivenParser.resource-missing", 128 documentLocation), documentLocation); 129 130 return parse(url); 131 } 132 finally 133 { 134 _documentLocation = null; 135 _ruleStack.clear(); 136 _objectStack.clear(); 137 _documentObject = null; 138 139 _uri = null; 140 _localName = null; 141 _qName = null; 142 143 _line = -1; 144 _column = -1; 145 _location = null; 146 _locator = null; 147 148 _contentBuffer.setLength(0); 149 } 150 } 151 152 protected Object parse(URL url) 153 { 154 if (_parser == null) 155 _parser = constructParser(); 156 157 InputStream stream = null; 158 159 try 160 { 161 stream = url.openStream(); 162 } 163 catch (IOException ex) 164 { 165 throw new DocumentParseException(Tapestry.format( 166 "RuleDrivenParser.unable-to-open-resource", 167 url), _documentLocation, ex); 168 } 169 170 InputSource source = new InputSource(stream); 171 172 try 173 { 174 _parser.parse(source, this); 175 176 stream.close(); 177 } 178 catch (Exception ex) 179 { 180 throw new DocumentParseException(Tapestry.format( 181 "RuleDrivenParser.parse-error", 182 url, 183 ex.getMessage()), getLocation(), ex); 184 } 185 186 if (LOG.isDebugEnabled()) 187 LOG.debug("Document parsed as: " + _documentObject); 188 189 return _documentObject; 190 } 191 192 /** 193 * Returns an {@link ILocation}representing the current position within the document (depending 194 * on the parser, this may be accurate to column number level). 195 */ 196 197 public Location getLocation() 198 { 199 if (_locator == null) 200 return null; 201 202 int line = _locator.getLineNumber(); 203 int column = _locator.getColumnNumber(); 204 205 if (_line != line || _column != column) 206 { 207 _location = null; 208 _line = line; 209 _column = column; 210 } 211 212 if (_location == null) 213 _location = new LocationImpl(_documentLocation, _line, _column); 214 215 return _location; 216 } 217 218 /** 219 * Pushes an object onto the object stack. The first object pushed is the "document object", the 220 * root object returned by the parse. 221 */ 222 public void push(Object object) 223 { 224 if (_documentObject == null) 225 _documentObject = object; 226 227 push(_objectStack, object, "object stack"); 228 } 229 230 /** 231 * Returns the top object on the object stack. 232 */ 233 public Object peek() 234 { 235 return peek(_objectStack, 0); 236 } 237 238 /** 239 * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is 240 * the next-to-top object, etc. 241 */ 242 243 public Object peek(int depth) 244 { 245 return peek(_objectStack, depth); 246 } 247 248 /** 249 * Removes and returns the top object on the object stack. 250 */ 251 public Object pop() 252 { 253 return pop(_objectStack, "object stack"); 254 } 255 256 private Object pop(List list, String name) 257 { 258 Object result = list.remove(list.size() - 1); 259 260 if (LOG.isDebugEnabled()) 261 LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")"); 262 263 return result; 264 } 265 266 private Object peek(List list, int depth) 267 { 268 return list.get(list.size() - 1 - depth); 269 } 270 271 private void push(List list, Object object, String name) 272 { 273 if (LOG.isDebugEnabled()) 274 LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")"); 275 276 list.add(object); 277 } 278 279 /** 280 * Pushes a new rule onto the rule stack. 281 */ 282 283 protected void pushRule(IRule rule) 284 { 285 push(_ruleStack, rule, "rule stack"); 286 } 287 288 /** 289 * Returns the top rule on the stack. 290 */ 291 292 protected IRule peekRule() 293 { 294 return (IRule) peek(_ruleStack, 0); 295 } 296 297 protected IRule popRule() 298 { 299 return (IRule) pop(_ruleStack, "rule stack"); 300 } 301 302 public void addRule(String localElementName, IRule rule) 303 { 304 _ruleMap.put(localElementName, rule); 305 } 306 307 /** 308 * Registers a public id and corresponding input source. Generally, the source is a wrapper 309 * around an input stream to a package resource. 310 * 311 * @param publicId 312 * the public identifier to be registerred, generally the publicId of a DTD related 313 * to the document being parsed 314 * @param entityPath 315 * the resource path of the entity, typically a DTD file. Relative files names are 316 * expected to be stored in the same package as the class file, otherwise a leading 317 * slash is an absolute pathname within the classpath. 318 */ 319 320 public void registerEntity(String publicId, String entityPath) 321 { 322 if (LOG.isDebugEnabled()) 323 LOG.debug("Registering " + publicId + " as " + entityPath); 324 325 if (_entities == null) 326 _entities = new HashMap(); 327 328 _entities.put(publicId, entityPath); 329 } 330 331 protected IRule selectRule(String localName, Attributes attributes) 332 { 333 IRule rule = (IRule) _ruleMap.get(localName); 334 335 if (rule == null) 336 throw new DocumentParseException(Tapestry.format( 337 "RuleDrivenParser.no-rule-for-element", 338 localName), getLocation()); 339 340 return rule; 341 } 342 343 /** 344 * Uses the {@link Locator}to track the position in the document as a {@link ILocation}. This 345 * is invoked once (before the initial element is parsed) and the Locator is retained and 346 * queried as to the current file location. 347 * 348 * @see #getLocation() 349 */ 350 public void setDocumentLocator(Locator locator) 351 { 352 _locator = locator; 353 } 354 355 /** 356 * Accumulates the content in a buffer; the concatinated content is provided to the top rule 357 * just before any start or end tag. 358 */ 359 public void characters(char[] ch, int start, int length) throws SAXException 360 { 361 _contentBuffer.append(ch, start, length); 362 } 363 364 /** 365 * Pops the top rule off the stack and invokes {@link IRule#endElementt(RuleDirectedParser)}. 366 */ 367 public void endElement(String uri, String localName, String qName) throws SAXException 368 { 369 fireContentRule(); 370 371 _uri = uri; 372 _localName = localName; 373 _qName = qName; 374 375 popRule().endElement(this); 376 } 377 378 /** 379 * Ignorable content is ignored. 380 */ 381 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException 382 { 383 } 384 385 /** 386 * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto 387 * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}. 388 */ 389 public void startElement(String uri, String localName, String qName, Attributes attributes) 390 throws SAXException 391 { 392 fireContentRule(); 393 394 _uri = uri; 395 _localName = localName; 396 _qName = qName; 397 398 String name = extractName(uri, localName, qName); 399 400 IRule newRule = selectRule(name, attributes); 401 402 pushRule(newRule); 403 404 newRule.startElement(this, attributes); 405 } 406 407 private String extractName(String uri, String localName, String qName) 408 { 409 return HiveMind.isBlank(localName) ? qName : localName; 410 } 411 412 /** 413 * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2 414 * parser. 415 */ 416 protected synchronized SAXParser constructParser() 417 { 418 if (_parserFactory == null) 419 { 420 _parserFactory = SAXParserFactory.newInstance(); 421 configureParserFactory(_parserFactory); 422 } 423 424 try 425 { 426 return _parserFactory.newSAXParser(); 427 } 428 catch (SAXException ex) 429 { 430 throw new ApplicationRuntimeException(ex); 431 } 432 catch (ParserConfigurationException ex) 433 { 434 throw new ApplicationRuntimeException(ex); 435 } 436 437 } 438 439 /** 440 * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is 441 * invoked. The default implementation sets validating to true and namespaceAware to false, 442 */ 443 444 protected void configureParserFactory(SAXParserFactory factory) 445 { 446 factory.setValidating(true); 447 factory.setNamespaceAware(false); 448 } 449 450 /** 451 * Throws the exception. 452 */ 453 public void error(SAXParseException ex) throws SAXException 454 { 455 fatalError(ex); 456 } 457 458 /** 459 * Throws the exception. 460 */ 461 public void fatalError(SAXParseException ex) throws SAXException 462 { 463 // Sometimes, a bad parse "corrupts" a parser so that it doesn't 464 // work properly for future parses (of valid documents), 465 // so discard it here. 466 467 _parser = null; 468 469 throw ex; 470 } 471 472 /** 473 * Throws the exception. 474 */ 475 public void warning(SAXParseException ex) throws SAXException 476 { 477 fatalError(ex); 478 } 479 480 public InputSource resolveEntity(String publicId, String systemId) throws SAXException 481 { 482 String entityPath = null; 483 484 if (LOG.isDebugEnabled()) 485 LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = " 486 + systemId); 487 488 if (_entities != null) 489 entityPath = (String) _entities.get(publicId); 490 491 if (entityPath == null) 492 { 493 if (LOG.isDebugEnabled()) 494 LOG.debug("Entity not found, using " + systemId); 495 496 return null; 497 } 498 499 InputStream stream = getClass().getResourceAsStream(entityPath); 500 501 InputSource result = new InputSource(stream); 502 503 if (result != null && LOG.isDebugEnabled()) 504 LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")"); 505 506 return result; 507 } 508 509 /** 510 * Validates that the input value matches against the specified Perl5 pattern. If valid, the 511 * method simply returns. If not a match, then an error message is generated (using the errorKey 512 * and the input value) and a {@link InvalidStringException}is thrown. 513 */ 514 515 public void validate(String value, String pattern, String errorKey) 516 throws DocumentParseException 517 { 518 if (_matcher == null) 519 _matcher = new RegexpMatcher(); 520 521 if (_matcher.matches(pattern, value)) 522 return; 523 524 throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation()); 525 } 526 527 public Resource getDocumentLocation() 528 { 529 return _documentLocation; 530 } 531 532 /** 533 * Returns the localName for the current element. 534 * 535 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 536 * java.lang.String, org.xml.sax.Attributes) 537 */ 538 public String getLocalName() 539 { 540 return _localName; 541 } 542 543 /** 544 * Returns the qualified name for the current element. 545 * 546 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 547 * java.lang.String, org.xml.sax.Attributes) 548 */ 549 public String getQName() 550 { 551 return _qName; 552 } 553 554 /** 555 * Returns the URI for the current element. 556 * 557 * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, 558 * java.lang.String, org.xml.sax.Attributes) 559 */ 560 public String getUri() 561 { 562 return _uri; 563 } 564 565 private void fireContentRule() 566 { 567 String content = _contentBuffer.toString(); 568 _contentBuffer.setLength(0); 569 570 if (!_ruleStack.isEmpty()) 571 peekRule().content(this, content); 572 } 573 574 }