001// Copyright 2004, 2005 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.hivemind.parse;
016
017import java.util.ArrayList;
018import java.util.HashMap;
019import java.util.List;
020import java.util.Map;
021
022import org.apache.hivemind.ApplicationRuntimeException;
023import org.apache.hivemind.HiveMind;
024import org.apache.hivemind.Location;
025import org.apache.hivemind.Resource;
026import org.apache.hivemind.impl.LocationImpl;
027import org.xml.sax.Attributes;
028import org.xml.sax.Locator;
029import org.xml.sax.SAXException;
030import org.xml.sax.SAXParseException;
031import org.xml.sax.helpers.DefaultHandler;
032
033/**
034 * Abstract super-class for parsers based around the SAX event model. This class provides support
035 * for managing a stack of elements, making it reasonable to establish relationships between
036 * elements. It also assists in setting the {@link org.apache.hivemind.Location} of elements as they
037 * are created.
038 * <p>
039 * This support is structured around both XML but is suited towards configuration files rather than
040 * documents, in that the <em>content</em> (parsable character data) within an element is
041 * concatinated together and tracked as a single blob.
042 * 
043 * @author Howard Lewis Ship
044 */
045public abstract class AbstractParser extends DefaultHandler
046{
047
048    /**
049     * The parser is built around a stack of these Items. This used to figure out the current state,
050     * the element being processed, and the matching descriptor object.
051     */
052    private static class Item
053    {
054        StringBuffer _buffer;
055
056        String _elementName;
057
058        boolean _ignoreCharacterData;
059
060        Object _object;
061
062        /**
063         * Prior state of the parser before this item was pushed.
064         */
065        int _priorState;
066
067        Item(String elementName, Object object, int priorState, boolean ignoreCharacterData)
068        {
069            _elementName = elementName;
070            _object = object;
071            _priorState = priorState;
072            _ignoreCharacterData = ignoreCharacterData;
073        }
074
075        void addContent(char[] buffer, int start, int length)
076        {
077            if (_ignoreCharacterData)
078                return;
079
080            if (_buffer == null)
081                _buffer = new StringBuffer(length);
082
083            _buffer.append(buffer, start, length);
084        }
085
086        String getContent()
087        {
088            if (_buffer != null)
089                return _buffer.toString().trim();
090
091            return null;
092        }
093    }
094
095    private int _currentColumn;
096
097    private int _currentLine;
098
099    private Location _location;
100
101    private Locator _locator;
102
103    private Resource _resource;
104
105    private List _stack;
106
107    private int _state;
108
109    private Item _top;
110
111    /**
112     * Accepts parseable character data from within an element and applies it to the top stack
113     * element. This may be invoked multiple times by the parser, and the overall data will
114     * accumulate. This content can be retrieved via {@link #peekContent()}.
115     */
116    public void characters(char[] ch, int start, int length) throws SAXException
117    {
118        _top.addContent(ch, start, length);
119    }
120
121    /**
122     * Invokes {@link #fatalError(SAXParseException)}.
123     */
124    public void error(SAXParseException ex) throws SAXException
125    {
126        fatalError(ex);
127    }
128
129    /**
130     * @param ex
131     *            exception to be thrown
132     * @throws SAXParseException
133     */
134    public void fatalError(SAXParseException ex) throws SAXException
135    {
136        throw ex;
137    }
138
139    /**
140     * Returns a "path" to the current element, as a series of element names seperated by slashes,
141     * i.e., "top/middle/leaf".
142     */
143    protected String getElementPath()
144    {
145        StringBuffer buffer = new StringBuffer();
146
147        int count = _stack.size();
148        for (int i = 0; i < count; i++)
149        {
150            if (i > 0)
151                buffer.append('/');
152
153            Item item = (Item) _stack.get(i);
154
155            buffer.append(item._elementName);
156        }
157
158        return buffer.toString();
159    }
160
161    /**
162     * Returns the current lcoation, as reported by the parser.
163     */
164    protected Location getLocation()
165    {
166        int line = _locator.getLineNumber();
167        int column = _locator.getColumnNumber();
168
169        if (line != _currentLine || column != _currentColumn)
170            _location = null;
171
172        if (_location == null)
173            _location = new LocationImpl(_resource, line, column);
174
175        _currentLine = line;
176        _currentColumn = column;
177
178        return _location;
179    }
180
181    /**
182     * Returns the {@link Resource} being parsed (as set by {@link #initializeParser(Resource, int)}.
183     */
184
185    protected Resource getResource()
186    {
187        return _resource;
188    }
189
190    /**
191     * Returns the current state of the parser. State is initially set by
192     * {@link #initializeParser(Resource, int)} and is later updated by
193     * {@link #push(String, Object, int, boolean)} and {@link #pop()}.
194     */
195    protected int getState()
196    {
197        return _state;
198    }
199
200    /**
201     * Initializes the parser; this should be called before any SAX parse events are received.
202     * 
203     * @param resource
204     *            the resource being parsed (used for some error messages)
205     * @param startState
206     *            the initial state of the parser (the interpretation of state is determined by
207     *            subclasses)
208     */
209    protected void initializeParser(Resource resource, int startState)
210    {
211        _resource = resource;
212        _stack = new ArrayList();
213
214        _location = null;
215        _state = startState;
216    }
217
218    /**
219     * Peeks at the top element on the stack, and returns its content (the accumuulated parseable
220     * character data directly enclosed by its start/end tags.
221     */
222    protected String peekContent()
223    {
224        return _top.getContent();
225    }
226
227    /**
228     * Peeks at the top element on the stack and returns its element name.
229     */
230    protected String peekElementName()
231    {
232        return _top._elementName;
233    }
234
235    /**
236     * Peeks at the top element on the stack and returns the object for that element.
237     */
238
239    protected Object peekObject()
240    {
241        return _top._object;
242    }
243
244    /**
245     * Invoked when the closing tag for an element is enountered {i.e, from
246     * {@link #endElement(String, String, String)}. This removes the corresponding item from the
247     * stack, and sets the parser state back to the (new) top element's state.
248     */
249    protected void pop()
250    {
251        int count = _stack.size();
252
253        _state = _top._priorState;
254
255        _stack.remove(count - 1);
256
257        if (count == 1)
258            _top = null;
259        else
260            _top = (Item) _stack.get(count - 2);
261    }
262
263    /**
264     * Enters a new state, pushing an object onto the stack. Invokes
265     * {@link #push(String, Object, int, boolean)}, and ignores character data within the element.
266     * 
267     * @param elementName
268     *            the element whose start tag was just parsed
269     * @param object
270     *            the object created to represent the new object
271     * @param state
272     *            the new state for the parse
273     */
274    protected void push(String elementName, Object object, int state)
275    {
276        push(elementName, object, state, true);
277    }
278
279    /**
280     * Enters a new state, pusubhing an object onto the stack. If the object implements
281     * {@link org.apache.hivemind.LocationHolder} then its location property is set to the
282     * current location.
283     * 
284     * @param elementName
285     *            the element whose start tag was just parsed
286     * @param object
287     *            the object created to represent the new object
288     * @param state
289     *            the new state for the parse
290     * @param ignoreCharacterData
291     *            if true, then any character data (typically whitespace) directly enclosed by the
292     *            element is ignored
293     */
294    protected void push(String elementName, Object object, int state, boolean ignoreCharacterData)
295    {
296        HiveMind.setLocation(object, getLocation());
297
298        Item item = new Item(elementName, object, _state, ignoreCharacterData);
299
300        _stack.add(item);
301
302        _top = item;
303        _state = state;
304    }
305
306    /**
307     * Resets all state after a parse.
308     */
309    protected void resetParser()
310    {
311        _resource = null;
312        _locator = null;
313        _stack = null;
314        _location = null;
315    }
316
317    /**
318     * Invoked by the parser, the locator is stored and later used by {@link #getLocation()}.
319     */
320    public void setDocumentLocator(Locator locator)
321    {
322        _locator = locator;
323    }
324
325    /**
326     * Forces a change to a specific state.
327     */
328    protected void setState(int state)
329    {
330        _state = state;
331    }
332
333    /**
334     * Invoked when an unexpected element is parsed (useful for parses that don't perform
335     * validation, or when there's no DTD).
336     * 
337     * @throws ApplicationRuntimeException
338     *             describing the situation
339     */
340    protected void unexpectedElement(String elementName)
341    {
342        throw new ApplicationRuntimeException(ParseMessages.unexpectedElement(
343                elementName,
344                getElementPath()), getLocation(), null);
345    }
346
347    /**
348     * Ocassionaly it is necessary to "change our mind" about what's on the top of the stack.
349     * 
350     * @param object
351     *            the new object for the top stack element
352     */
353    protected void updateObject(Object object)
354    {
355        _top._object = object;
356    }
357
358    /**
359     * Invokes {@link #fatalError(SAXParseException)}.
360     */
361    public void warning(SAXParseException ex) throws SAXException
362    {
363        fatalError(ex);
364    }
365
366    private Map constructAttributesMap(Attributes attributes)
367    {
368        Map result = new HashMap();
369        int count = attributes.getLength();
370
371        for (int i = 0; i < count; i++)
372        {
373            String key = attributes.getLocalName(i);
374
375            if (HiveMind.isBlank(key))
376                key = attributes.getQName(i);
377
378            String value = attributes.getValue(i);
379
380            result.put(key, value);
381        }
382
383        return result;
384    }
385
386    /**
387     * Invoked when an element's start tag is recognized. The element and attributes are provided to
388     * the subclass for further processing.
389     */
390    protected abstract void begin(String elementName, Map attributes);
391
392    /**
393     * Invoked when an element's close tag is recognized. The element is provided. The content of
394     * the element (the unparsed whitespace within the element's tags) is available via
395     * {@link #peekContent()}.
396     */
397
398    protected abstract void end(String elementName);
399
400    public void endElement(String uri, String localName, String qName) throws SAXException
401    {
402        end(getElementName(localName, qName));
403    }
404
405    public void startElement(String uri, String localName, String qName, Attributes attributes)
406            throws SAXException
407    {
408        String elementName = getElementName(localName, qName);
409
410        begin(elementName, constructAttributesMap(attributes));
411    }
412
413    private String getElementName(String localName, String qName)
414    {
415        return qName != null ? qName : localName;
416    }
417}