001    // Copyright 2004, 2005 The Apache Software Foundation
002    //
003    // Licensed under the Apache License, Version 2.0 (the "License");
004    // you may not use this file except in compliance with the License.
005    // You may obtain a copy of the License at
006    //
007    //     http://www.apache.org/licenses/LICENSE-2.0
008    //
009    // Unless required by applicable law or agreed to in writing, software
010    // distributed under the License is distributed on an "AS IS" BASIS,
011    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012    // See the License for the specific language governing permissions and
013    // limitations under the License.
014    
015    package org.apache.tapestry.util.xml;
016    
017    import java.io.IOException;
018    import java.io.InputStream;
019    import java.net.URL;
020    import java.util.ArrayList;
021    import java.util.HashMap;
022    import java.util.List;
023    import java.util.Map;
024    
025    import javax.xml.parsers.ParserConfigurationException;
026    import javax.xml.parsers.SAXParser;
027    import javax.xml.parsers.SAXParserFactory;
028    
029    import org.apache.commons.logging.Log;
030    import org.apache.commons.logging.LogFactory;
031    import org.apache.hivemind.ApplicationRuntimeException;
032    import org.apache.hivemind.HiveMind;
033    import org.apache.hivemind.Location;
034    import org.apache.hivemind.Resource;
035    import org.apache.hivemind.impl.LocationImpl;
036    import org.apache.tapestry.Tapestry;
037    import org.apache.tapestry.util.RegexpMatcher;
038    import org.xml.sax.Attributes;
039    import org.xml.sax.InputSource;
040    import org.xml.sax.Locator;
041    import org.xml.sax.SAXException;
042    import org.xml.sax.SAXParseException;
043    import org.xml.sax.helpers.DefaultHandler;
044    
045    /**
046     * A simplified version of {@link org.apache.commons.digester.Digester}. This version is without as
047     * many bells and whistles but has some key features needed when parsing a document (rather than a
048     * configuration file): <br>
049     * <ul>
050     * <li>Notifications for each bit of text
051     * </ul>
052     * <li>Tracking of exact location within the document.</li>
053     * </ul>
054     * <p>
055     * Like Digester, there's an object stack and a rule stack. The rules are much simpler (more
056     * coding), in that there's a one-to-one relationship between an element and a rule.
057     * <p>
058     * Based on SAX2.
059     * 
060     * @author Howard Lewis Ship
061     * @since 3.0
062     */
063    
064    public class RuleDirectedParser extends DefaultHandler
065    {
066        private static final Log LOG = LogFactory.getLog(RuleDirectedParser.class);
067    
068        private Resource _documentLocation;
069    
070        private List _ruleStack = new ArrayList();
071    
072        private List _objectStack = new ArrayList();
073    
074        private Object _documentObject;
075    
076        private Locator _locator;
077    
078        private int _line = -1;
079    
080        private int _column = -1;
081    
082        private Location _location;
083    
084        private static SAXParserFactory _parserFactory;
085    
086        private SAXParser _parser;
087    
088        private RegexpMatcher _matcher;
089    
090        private String _uri;
091    
092        private String _localName;
093    
094        private String _qName;
095    
096        /**
097         * Map of {@link IRule}keyed on the local name of the element.
098         */
099        private Map _ruleMap = new HashMap();
100    
101        /**
102         * Used to accumlate content provided by
103         * {@link org.xml.sax.ContentHandler#characters(char[], int, int)}.
104         */
105    
106        private StringBuffer _contentBuffer = new StringBuffer();
107    
108        /**
109         * Map of paths to external entities (such as the DTD) keyed on public id.
110         */
111    
112        private Map _entities = new HashMap();
113    
114        public Object parse(Resource documentLocation)
115        {
116            if (LOG.isDebugEnabled())
117                LOG.debug("Parsing: " + documentLocation);
118    
119            try
120            {
121                _documentLocation = documentLocation;
122    
123                URL url = documentLocation.getResourceURL();
124    
125                if (url == null)
126                    throw new DocumentParseException(Tapestry.format(
127                            "RuleDrivenParser.resource-missing",
128                            documentLocation), documentLocation);
129    
130                return parse(url);
131            }
132            finally
133            {
134                _documentLocation = null;
135                _ruleStack.clear();
136                _objectStack.clear();
137                _documentObject = null;
138    
139                _uri = null;
140                _localName = null;
141                _qName = null;
142    
143                _line = -1;
144                _column = -1;
145                _location = null;
146                _locator = null;
147    
148                _contentBuffer.setLength(0);
149            }
150        }
151    
152        protected Object parse(URL url)
153        {
154            if (_parser == null)
155                _parser = constructParser();
156    
157            InputStream stream = null;
158    
159            try
160            {
161                stream = url.openStream();
162            }
163            catch (IOException ex)
164            {
165                throw new DocumentParseException(Tapestry.format(
166                        "RuleDrivenParser.unable-to-open-resource",
167                        url), _documentLocation, ex);
168            }
169    
170            InputSource source = new InputSource(stream);
171    
172            try
173            {
174                _parser.parse(source, this);
175    
176                stream.close();
177            }
178            catch (Exception ex)
179            {
180                throw new DocumentParseException(Tapestry.format(
181                        "RuleDrivenParser.parse-error",
182                        url,
183                        ex.getMessage()), getLocation(), ex);
184            }
185    
186            if (LOG.isDebugEnabled())
187                LOG.debug("Document parsed as: " + _documentObject);
188    
189            return _documentObject;
190        }
191    
192        /**
193         * Returns an {@link ILocation}representing the current position within the document (depending
194         * on the parser, this may be accurate to column number level).
195         */
196    
197        public Location getLocation()
198        {
199            if (_locator == null)
200                return null;
201    
202            int line = _locator.getLineNumber();
203            int column = _locator.getColumnNumber();
204    
205            if (_line != line || _column != column)
206            {
207                _location = null;
208                _line = line;
209                _column = column;
210            }
211    
212            if (_location == null)
213                _location = new LocationImpl(_documentLocation, _line, _column);
214    
215            return _location;
216        }
217    
218        /**
219         * Pushes an object onto the object stack. The first object pushed is the "document object", the
220         * root object returned by the parse.
221         */
222        public void push(Object object)
223        {
224            if (_documentObject == null)
225                _documentObject = object;
226    
227            push(_objectStack, object, "object stack");
228        }
229    
230        /**
231         * Returns the top object on the object stack.
232         */
233        public Object peek()
234        {
235            return peek(_objectStack, 0);
236        }
237    
238        /**
239         * Returns an object within the object stack, at depth. Depth 0 is the top object, depth 1 is
240         * the next-to-top object, etc.
241         */
242    
243        public Object peek(int depth)
244        {
245            return peek(_objectStack, depth);
246        }
247    
248        /**
249         * Removes and returns the top object on the object stack.
250         */
251        public Object pop()
252        {
253            return pop(_objectStack, "object stack");
254        }
255    
256        private Object pop(List list, String name)
257        {
258            Object result = list.remove(list.size() - 1);
259    
260            if (LOG.isDebugEnabled())
261                LOG.debug("Popped " + result + " off " + name + " (at " + getLocation() + ")");
262    
263            return result;
264        }
265    
266        private Object peek(List list, int depth)
267        {
268            return list.get(list.size() - 1 - depth);
269        }
270    
271        private void push(List list, Object object, String name)
272        {
273            if (LOG.isDebugEnabled())
274                LOG.debug("Pushing " + object + " onto " + name + " (at " + getLocation() + ")");
275    
276            list.add(object);
277        }
278    
279        /**
280         * Pushes a new rule onto the rule stack.
281         */
282    
283        protected void pushRule(IRule rule)
284        {
285            push(_ruleStack, rule, "rule stack");
286        }
287    
288        /**
289         * Returns the top rule on the stack.
290         */
291    
292        protected IRule peekRule()
293        {
294            return (IRule) peek(_ruleStack, 0);
295        }
296    
297        protected IRule popRule()
298        {
299            return (IRule) pop(_ruleStack, "rule stack");
300        }
301    
302        public void addRule(String localElementName, IRule rule)
303        {
304            _ruleMap.put(localElementName, rule);
305        }
306    
307        /**
308         * Registers a public id and corresponding input source. Generally, the source is a wrapper
309         * around an input stream to a package resource.
310         * 
311         * @param publicId
312         *            the public identifier to be registerred, generally the publicId of a DTD related
313         *            to the document being parsed
314         * @param entityPath
315         *            the resource path of the entity, typically a DTD file. Relative files names are
316         *            expected to be stored in the same package as the class file, otherwise a leading
317         *            slash is an absolute pathname within the classpath.
318         */
319    
320        public void registerEntity(String publicId, String entityPath)
321        {
322            if (LOG.isDebugEnabled())
323                LOG.debug("Registering " + publicId + " as " + entityPath);
324    
325            if (_entities == null)
326                _entities = new HashMap();
327    
328            _entities.put(publicId, entityPath);
329        }
330    
331        protected IRule selectRule(String localName, Attributes attributes)
332        {
333            IRule rule = (IRule) _ruleMap.get(localName);
334    
335            if (rule == null)
336                throw new DocumentParseException(Tapestry.format(
337                        "RuleDrivenParser.no-rule-for-element",
338                        localName), getLocation());
339    
340            return rule;
341        }
342    
343        /**
344         * Uses the {@link Locator}to track the position in the document as a {@link ILocation}. This
345         * is invoked once (before the initial element is parsed) and the Locator is retained and
346         * queried as to the current file location.
347         * 
348         * @see #getLocation()
349         */
350        public void setDocumentLocator(Locator locator)
351        {
352            _locator = locator;
353        }
354    
355        /**
356         * Accumulates the content in a buffer; the concatinated content is provided to the top rule
357         * just before any start or end tag.
358         */
359        public void characters(char[] ch, int start, int length) throws SAXException
360        {
361            _contentBuffer.append(ch, start, length);
362        }
363    
364        /**
365         * Pops the top rule off the stack and invokes {@link IRule#endElementt(RuleDirectedParser)}.
366         */
367        public void endElement(String uri, String localName, String qName) throws SAXException
368        {
369            fireContentRule();
370    
371            _uri = uri;
372            _localName = localName;
373            _qName = qName;
374    
375            popRule().endElement(this);
376        }
377    
378        /**
379         * Ignorable content is ignored.
380         */
381        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException
382        {
383        }
384    
385        /**
386         * Invokes {@link #selectRule(String, Attributes)}to choose a new rule, which is pushed onto
387         * the rule stack, then invokes {@link IRule#startElement(RuleDirectedParser, Attributes)}.
388         */
389        public void startElement(String uri, String localName, String qName, Attributes attributes)
390                throws SAXException
391        {
392            fireContentRule();
393    
394            _uri = uri;
395            _localName = localName;
396            _qName = qName;
397    
398            String name = extractName(uri, localName, qName);
399    
400            IRule newRule = selectRule(name, attributes);
401    
402            pushRule(newRule);
403    
404            newRule.startElement(this, attributes);
405        }
406    
407        private String extractName(String uri, String localName, String qName)
408        {
409            return HiveMind.isBlank(localName) ? qName : localName;
410        }
411    
412        /**
413         * Uses {@link javax.xml.parsers.SAXParserFactory}to create a instance of a validation SAX2
414         * parser.
415         */
416        protected synchronized SAXParser constructParser()
417        {
418            if (_parserFactory == null)
419            {
420                _parserFactory = SAXParserFactory.newInstance();
421                configureParserFactory(_parserFactory);
422            }
423    
424            try
425            {
426                return _parserFactory.newSAXParser();
427            }
428            catch (SAXException ex)
429            {
430                throw new ApplicationRuntimeException(ex);
431            }
432            catch (ParserConfigurationException ex)
433            {
434                throw new ApplicationRuntimeException(ex);
435            }
436    
437        }
438    
439        /**
440         * Configures a {@link SAXParserFactory}before {@link SAXParserFactory#newSAXParser()}is
441         * invoked. The default implementation sets validating to true and namespaceAware to false,
442         */
443    
444        protected void configureParserFactory(SAXParserFactory factory)
445        {
446            factory.setValidating(true);
447            factory.setNamespaceAware(false);
448        }
449    
450        /**
451         * Throws the exception.
452         */
453        public void error(SAXParseException ex) throws SAXException
454        {
455            fatalError(ex);
456        }
457    
458        /**
459         * Throws the exception.
460         */
461        public void fatalError(SAXParseException ex) throws SAXException
462        {
463            // Sometimes, a bad parse "corrupts" a parser so that it doesn't
464            // work properly for future parses (of valid documents),
465            // so discard it here.
466    
467            _parser = null;
468    
469            throw ex;
470        }
471    
472        /**
473         * Throws the exception.
474         */
475        public void warning(SAXParseException ex) throws SAXException
476        {
477            fatalError(ex);
478        }
479    
480        public InputSource resolveEntity(String publicId, String systemId) throws SAXException
481        {
482            String entityPath = null;
483    
484            if (LOG.isDebugEnabled())
485                LOG.debug("Attempting to resolve entity; publicId = " + publicId + " systemId = "
486                        + systemId);
487    
488            if (_entities != null)
489                entityPath = (String) _entities.get(publicId);
490    
491            if (entityPath == null)
492            {
493                if (LOG.isDebugEnabled())
494                    LOG.debug("Entity not found, using " + systemId);
495    
496                return null;
497            }
498    
499            InputStream stream = getClass().getResourceAsStream(entityPath);
500    
501            InputSource result = new InputSource(stream);
502    
503            if (result != null && LOG.isDebugEnabled())
504                LOG.debug("Resolved " + publicId + " as " + result + " (for " + entityPath + ")");
505    
506            return result;
507        }
508    
509        /**
510         * Validates that the input value matches against the specified Perl5 pattern. If valid, the
511         * method simply returns. If not a match, then an error message is generated (using the errorKey
512         * and the input value) and a {@link InvalidStringException}is thrown.
513         */
514    
515        public void validate(String value, String pattern, String errorKey)
516                throws DocumentParseException
517        {
518            if (_matcher == null)
519                _matcher = new RegexpMatcher();
520    
521            if (_matcher.matches(pattern, value))
522                return;
523    
524            throw new InvalidStringException(Tapestry.format(errorKey, value), value, getLocation());
525        }
526    
527        public Resource getDocumentLocation()
528        {
529            return _documentLocation;
530        }
531    
532        /**
533         * Returns the localName for the current element.
534         * 
535         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
536         *      java.lang.String, org.xml.sax.Attributes)
537         */
538        public String getLocalName()
539        {
540            return _localName;
541        }
542    
543        /**
544         * Returns the qualified name for the current element.
545         * 
546         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
547         *      java.lang.String, org.xml.sax.Attributes)
548         */
549        public String getQName()
550        {
551            return _qName;
552        }
553    
554        /**
555         * Returns the URI for the current element.
556         * 
557         * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String,
558         *      java.lang.String, org.xml.sax.Attributes)
559         */
560        public String getUri()
561        {
562            return _uri;
563        }
564    
565        private void fireContentRule()
566        {
567            String content = _contentBuffer.toString();
568            _contentBuffer.setLength(0);
569    
570            if (!_ruleStack.isEmpty())
571                peekRule().content(this, content);
572        }
573    
574    }