001// Copyright 2004, 2005 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.tapestry.util.xml;
016
017import java.io.IOException;
018import java.io.InputStream;
019import java.net.URL;
020import java.util.ArrayList;
021import java.util.HashMap;
022import java.util.List;
023import java.util.Map;
024
025import javax.xml.parsers.ParserConfigurationException;
026import javax.xml.parsers.SAXParser;
027import javax.xml.parsers.SAXParserFactory;
028
029import org.apache.commons.logging.Log;
030import org.apache.commons.logging.LogFactory;
031import org.apache.hivemind.ApplicationRuntimeException;
032import org.apache.hivemind.HiveMind;
033import org.apache.hivemind.Location;
034import org.apache.hivemind.Resource;
035import org.apache.hivemind.impl.LocationImpl;
036import org.apache.tapestry.Tapestry;
037import org.apache.tapestry.util.RegexpMatcher;
038import org.xml.sax.Attributes;
039import org.xml.sax.InputSource;
040import org.xml.sax.Locator;
041import org.xml.sax.SAXException;
042import org.xml.sax.SAXParseException;
043import org.xml.sax.helpers.DefaultHandler;
044
045/**
046 * A simplified version of {@link org.apache.commons.digester.Digester}. This version is without as
047 * many bells and whistles but has some key features needed when parsing a document (rather than a
048 * configuration file): <br>
049 * <ul>
050 * <li>Notifications for each bit of text
051 * </ul>
052 * <li>Tracking of exact location within the document.</li>
053 * </ul>
054 * <p>
055 * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
056 * coding), in that there's a one-to-one relationship between an element and a rule.
057 * <p>
058 * Based on SAX2.
059 * 
060 * @author Howard Lewis Ship
061 * @since 3.0
062 */
063
064public class RuleDirectedParser extends DefaultHandler
065{
066    private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
067
068    private Resource _documentLocation;
069
070    private List _ruleStack = new ArrayList();
071
072    private List _objectStack = new ArrayList();
073
074    private Object _documentObject;
075
076    private Locator _locator;
077
078    private int _line = -1;
079
080    private int _column = -1;
081
082    private Location _location;
083
084    private static SAXParserFactory _parserFactory;
085
086    private SAXParser _parser;
087
088    private RegexpMatcher _matcher;
089
090    private String _uri;
091
092    private String _localName;
093
094    private String _qName;
095
096    /**
097     * Map of {@link IRule}keyed on the local name of the element.
098     */
099    private Map _ruleMap = new HashMap();
100
101    /**
102     * Used to accumlate content provided by
103     * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
104     */
105
106    private StringBuffer _contentBuffer = new StringBuffer();
107
108    /**
109     * Map of paths to external entities (such as the DTD) keyed on public id.
110     */
111
112    private Map _entities = new HashMap();
113
114    public Object parse(Resource documentLocation)
115    {
116        if (LOG.isDebugEnabled())
117            LOG.debug("Parsing: " + documentLocation);
118
119        try
120        {
121            _documentLocation = documentLocation;
122
123            URL url = documentLocation.getResourceURL();
124
125            if (url == null)
126                throw new DocumentParseException(Tapestry.format(
127                        "RuleDrivenParser.resource-missing",
128                        documentLocation), documentLocation);
129
130            return parse(url);
131        }
132        finally
133        {
134            _documentLocation = null;
135            _ruleStack.clear();
136            _objectStack.clear();
137            _documentObject = null;
138
139            _uri = null;
140            _localName = null;
141            _qName = null;
142
143            _line = -1;
144            _column = -1;
145            _location = null;
146            _locator = null;
147
148            _contentBuffer.setLength(0);
149        }
150    }
151
152    protected Object parse(URL url)
153    {
154        if (_parser == null)
155            _parser = constructParser();
156
157        InputStream stream = null;
158
159        try
160        {
161            stream = url.openStream();
162        }
163        catch (IOException ex)
164        {
165            throw new DocumentParseException(Tapestry.format(
166                    "RuleDrivenParser.unable-to-open-resource",
167                    url), _documentLocation, ex);
168        }
169
170        InputSource source = new InputSource(stream);
171
172        try
173        {
174            _parser.parse(source, this);
175
176            stream.close();
177        }
178        catch (Exception ex)
179        {
180            throw new DocumentParseException(Tapestry.format(
181                    "RuleDrivenParser.parse-error",
182                    url,
183                    ex.getMessage()), getLocation(), ex);
184        }
185
186        if (LOG.isDebugEnabled())
187            LOG.debug("Document parsed as: " + _documentObject);
188
189        return _documentObject;
190    }
191
192    /**
193     * Returns an {@link ILocation}representing the current position within the document (depending
194     * on the parser, this may be accurate to column number level).
195     */
196
197    public Location getLocation()
198    {
199        if (_locator == null)
200            return null;
201
202        int line = _locator.getLineNumber();
203        int column = _locator.getColumnNumber();
204
205        if (_line != line || _column != column)
206        {
207            _location = null;
208            _line = line;
209            _column = column;
210        }
211
212        if (_location == null)
213            _location = new LocationImpl(_documentLocation, _line, _column);
214
215        return _location;
216    }
217
218    /**
219     * Pushes an object onto the object stack. The first object pushed is the "document object", the
220     * root object returned by the parse.
221     */
222    public void push(Object object)
223    {
224        if (_documentObject == null)
225            _documentObject = object;
226
227        push(_objectStack, object, "object stack");
228    }
229
230    /**
231     * Returns the top object on the object stack.
232     */
233    public Object peek()
234    {
235        return peek(_objectStack, 0);
236    }
237
238    /**
239     * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
240     * the next-to-top object, etc.
241     */
242
243    public Object peek(int depth)
244    {
245        return peek(_objectStack, depth);
246    }
247
248    /**
249     * Removes and returns the top object on the object stack.
250     */
251    public Object pop()
252    {
253        return pop(_objectStack, "object stack");
254    }
255
256    private Object pop(List list, String name)
257    {
258        Object result = list.remove(list.size() - 1);
259
260        if (LOG.isDebugEnabled())
261            LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
262
263        return result;
264    }
265
266    private Object peek(List list, int depth)
267    {
268        return list.get(list.size() - 1 - depth);
269    }
270
271    private void push(List list, Object object, String name)
272    {
273        if (LOG.isDebugEnabled())
274            LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
275
276        list.add(object);
277    }
278
279    /**
280     * Pushes a new rule onto the rule stack.
281     */
282
283    protected void pushRule(IRule rule)
284    {
285        push(_ruleStack, rule, "rule stack");
286    }
287
288    /**
289     * Returns the top rule on the stack.
290     */
291
292    protected IRule peekRule()
293    {
294        return (IRule) peek(_ruleStack, 0);
295    }
296
297    protected IRule popRule()
298    {
299        return (IRule) pop(_ruleStack, "rule stack");
300    }
301
302    public void addRule(String localElementName, IRule rule)
303    {
304        _ruleMap.put(localElementName, rule);
305    }
306
307    /**
308     * Registers a public id and corresponding input source. Generally, the source is a wrapper
309     * around an input stream to a package resource.
310     * 
311     * @param publicId
312     *            the public identifier to be registerred, generally the publicId of a DTD related
313     *            to the document being parsed
314     * @param entityPath
315     *            the resource path of the entity, typically a DTD file. Relative files names are
316     *            expected to be stored in the same package as the class file, otherwise a leading
317     *            slash is an absolute pathname within the classpath.
318     */
319
320    public void registerEntity(String publicId, String entityPath)
321    {
322        if (LOG.isDebugEnabled())
323            LOG.debug("Registering " + publicId + " as " + entityPath);
324
325        if (_entities == null)
326            _entities = new HashMap();
327
328        _entities.put(publicId, entityPath);
329    }
330
331    protected IRule selectRule(String localName, Attributes attributes)
332    {
333        IRule rule = (IRule) _ruleMap.get(localName);
334
335        if (rule == null)
336            throw new DocumentParseException(Tapestry.format(
337                    "RuleDrivenParser.no-rule-for-element",
338                    localName), getLocation());
339
340        return rule;
341    }
342
343    /**
344     * Uses the {@link Locator}to track the position in the document as a {@link ILocation}. This
345     * is invoked once (before the initial element is parsed) and the Locator is retained and
346     * queried as to the current file location.
347     * 
348     * @see #getLocation()
349     */
350    public void setDocumentLocator(Locator locator)
351    {
352        _locator = locator;
353    }
354
355    /**
356     * Accumulates the content in a buffer; the concatinated content is provided to the top rule
357     * just before any start or end tag.
358     */
359    public void characters(char[] ch, int start, int length) throws SAXException
360    {
361        _contentBuffer.append(ch, start, length);
362    }
363
364    /**
365     * Pops the top rule off the stack and invokes {@link IRule#endElementt(RuleDirectedParser)}.
366     */
367    public void endElement(String uri, String localName, String qName) throws SAXException
368    {
369        fireContentRule();
370
371        _uri = uri;
372        _localName = localName;
373        _qName = qName;
374
375        popRule().endElement(this);
376    }
377
378    /**
379     * Ignorable content is ignored.
380     */
381    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
382    {
383    }
384
385    /**
386     * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
387     * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
388     */
389    public void startElement(String uri, String localName, String qName, Attributes attributes)
390            throws SAXException
391    {
392        fireContentRule();
393
394        _uri = uri;
395        _localName = localName;
396        _qName = qName;
397
398        String name = extractName(uri, localName, qName);
399
400        IRule newRule = selectRule(name, attributes);
401
402        pushRule(newRule);
403
404        newRule.startElement(this, attributes);
405    }
406
407    private String extractName(String uri, String localName, String qName)
408    {
409        return HiveMind.isBlank(localName) ? qName : localName;
410    }
411
412    /**
413     * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
414     * parser.
415     */
416    protected synchronized SAXParser constructParser()
417    {
418        if (_parserFactory == null)
419        {
420            _parserFactory = SAXParserFactory.newInstance();
421            configureParserFactory(_parserFactory);
422        }
423
424        try
425        {
426            return _parserFactory.newSAXParser();
427        }
428        catch (SAXException ex)
429        {
430            throw new ApplicationRuntimeException(ex);
431        }
432        catch (ParserConfigurationException ex)
433        {
434            throw new ApplicationRuntimeException(ex);
435        }
436
437    }
438
439    /**
440     * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
441     * invoked. The default implementation sets validating to true and namespaceAware to false,
442     */
443
444    protected void configureParserFactory(SAXParserFactory factory)
445    {
446        factory.setValidating(true);
447        factory.setNamespaceAware(false);
448    }
449
450    /**
451     * Throws the exception.
452     */
453    public void error(SAXParseException ex) throws SAXException
454    {
455        fatalError(ex);
456    }
457
458    /**
459     * Throws the exception.
460     */
461    public void fatalError(SAXParseException ex) throws SAXException
462    {
463        // Sometimes, a bad parse "corrupts" a parser so that it doesn't
464        // work properly for future parses (of valid documents),
465        // so discard it here.
466
467        _parser = null;
468
469        throw ex;
470    }
471
472    /**
473     * Throws the exception.
474     */
475    public void warning(SAXParseException ex) throws SAXException
476    {
477        fatalError(ex);
478    }
479
480    public InputSource resolveEntity(String publicId, String systemId) throws SAXException
481    {
482        String entityPath = null;
483
484        if (LOG.isDebugEnabled())
485            LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
486                    + systemId);
487
488        if (_entities != null)
489            entityPath = (String) _entities.get(publicId);
490
491        if (entityPath == null)
492        {
493            if (LOG.isDebugEnabled())
494                LOG.debug("Entity not found, using " + systemId);
495
496            return null;
497        }
498
499        InputStream stream = getClass().getResourceAsStream(entityPath);
500
501        InputSource result = new InputSource(stream);
502
503        if (result != null && LOG.isDebugEnabled())
504            LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
505
506        return result;
507    }
508
509    /**
510     * Validates that the input value matches against the specified Perl5 pattern. If valid, the
511     * method simply returns. If not a match, then an error message is generated (using the errorKey
512     * and the input value) and a {@link InvalidStringException}is thrown.
513     */
514
515    public void validate(String value, String pattern, String errorKey)
516            throws DocumentParseException
517    {
518        if (_matcher == null)
519            _matcher = new RegexpMatcher();
520
521        if (_matcher.matches(pattern, value))
522            return;
523
524        throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
525    }
526
527    public Resource getDocumentLocation()
528    {
529        return _documentLocation;
530    }
531
532    /**
533     * Returns the localName for the current element.
534     * 
535     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
536     *      java.lang.String, org.xml.sax.Attributes)
537     */
538    public String getLocalName()
539    {
540        return _localName;
541    }
542
543    /**
544     * Returns the qualified name for the current element.
545     * 
546     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
547     *      java.lang.String, org.xml.sax.Attributes)
548     */
549    public String getQName()
550    {
551        return _qName;
552    }
553
554    /**
555     * Returns the URI for the current element.
556     * 
557     * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
558     *      java.lang.String, org.xml.sax.Attributes)
559     */
560    public String getUri()
561    {
562        return _uri;
563    }
564
565    private void fireContentRule()
566    {
567        String content = _contentBuffer.toString();
568        _contentBuffer.setLength(0);
569
570        if (!_ruleStack.isEmpty())
571            peekRule().content(this, content);
572    }
573
574}