001// Copyright 2004, 2005 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.tapestry.parse;
016
017import java.util.ArrayList;
018import java.util.Collections;
019import java.util.HashMap;
020import java.util.Iterator;
021import java.util.List;
022import java.util.Map;
023
024import org.apache.hivemind.ApplicationRuntimeException;
025import org.apache.hivemind.Location;
026import org.apache.hivemind.Resource;
027import org.apache.hivemind.impl.LocationImpl;
028import org.apache.oro.text.regex.MalformedPatternException;
029import org.apache.oro.text.regex.MatchResult;
030import org.apache.oro.text.regex.Pattern;
031import org.apache.oro.text.regex.PatternMatcher;
032import org.apache.oro.text.regex.Perl5Compiler;
033import org.apache.oro.text.regex.Perl5Matcher;
034import org.apache.tapestry.util.IdAllocator;
035
036/**
037 * Parses Tapestry templates, breaking them into a series of
038 * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML
039 * template", there is no real requirement that the template be HTML. This parser can handle any
040 * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of
041 * HTML reasonably.
042 * <p>
043 * Deployed as the tapestry.parse.TemplateParser service, using the threaded model.
044 * <p>
045 * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a
046 * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag,
047 * or end the tag with "<code>/&gt;</code>".
048 * <p>
049 * Generally, the id specified in the template is matched against an component defined in the
050 * specification. However, implicit components are also possible. The jwcid attribute uses the
051 * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id
052 *       prefix. Such a component is anonymous (but is given a unique id).
053 *       <p>
054 *       (The unique ids assigned start with a dollar sign, which is normally no allowed for
055 *       component ids ... this helps to make them stand out and assures that they do not conflict
056 *       with user-defined component ids. These ids tend to propagate into URLs and become HTML
057 *       element names and even JavaScript variable names ... the dollar sign is acceptible in these
058 *       contexts as well).
059 *       <p>
060 *       Implicit component may also be given a name using the syntax "
061 *       <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the
062 *       specification, but may still be accessed via
063 *       {@link org.apache.tapestry.IComponent#getComponent(String)}.
064 *       <p>
065 *       Both defined and implicit components may have additional attributes defined, simply by
066 *       including them in the template. They set formal or informal parameters of the component to
067 *       static strings.
068 *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if
069 *       false, will cause such attributes to be simply ignored. For defined components, conflicting
070 *       values defined in the template are ignored.
071 *       <p>
072 *       Attributes in component tags will become formal and informal parameters of the
073 *       corresponding component. Most attributes will be
074 *       <p>
075 *       The parser removes the body of some tags (when the corresponding component doesn't
076 *       {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and
077 *       allows portions of the template to be completely removed.
078 *       <p>
079 *       The parser does a pretty thorough lexical analysis of the template, and reports a great
080 *       number of errors, including improper nesting of tags.
081 *       <p>
082 *       The parser supports <em>invisible localization</em>: The parser recognizes HTML of the
083 *       form: <code>&lt;span key="<i>value</i>"&gt; ... &lt;/span&gt;</code> and converts them
084 *       into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code>
085 *       attribute ... if the value is <code>true</code>, then the localized value is sent to the
086 *       client without filtering, which is appropriate if the value has any markup that should not
087 *       be escaped.
088 * @author Howard Lewis Ship, Geoff Longman
089 */
090
091public class TemplateParser implements ITemplateParser
092{
093    /**
094     * A "magic" component id that causes the tag with the id and its entire body to be ignored
095     * during parsing.
096     */
097
098    private static final String REMOVE_ID = "$remove$";
099
100    /**
101     * A "magic" component id that causes the tag to represent the true content of the template. Any
102     * content prior to the tag is discarded, and any content after the tag is ignored. The tag
103     * itself is not included.
104     */
105
106    private static final String CONTENT_ID = "$content$";
107
108    /**
109     * The attribute, checked for in &lt;span&gt; tags, that signfies that the span is being used as
110     * an invisible localization.
111     * 
112     * @since 2.0.4
113     */
114
115    public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key";
116
117    /**
118     * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be
119     * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must
120     * equal "true" (caselessly).
121     * 
122     * @since 2.3
123     */
124
125    public static final String RAW_ATTRIBUTE_NAME = "raw";
126
127    /**
128     * Attribute name used to identify components.
129     * 
130     * @since 4.0
131     */
132
133    private String _componentAttributeName;
134
135    private static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*";
136
137    /**
138     * Pattern used to recognize ordinary components (defined in the specification).
139     * 
140     * @since 3.0
141     */
142
143    public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$";
144
145    /**
146     * Pattern used to recognize implicit components (whose type is defined in the template).
147     * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified
148     * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component
149     * type, which may (as of 4.0) have slashes to delinate folders containing the component.
150     * 
151     * @since 3.0
152     */
153
154    public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@((("
155            + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*"
156            + PROPERTY_NAME_PATTERN + "))$";
157
158    private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1;
159
160    private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2;
161
162    private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4;
163
164    private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5;
165
166    private Pattern _simpleIdPattern;
167
168    private Pattern _implicitIdPattern;
169
170    private PatternMatcher _patternMatcher;
171
172    private IdAllocator _idAllocator = new IdAllocator();
173
174    private ITemplateParserDelegate _delegate;
175
176    /**
177     * Identifies the template being parsed; used with error messages.
178     */
179
180    private Resource _resourceLocation;
181
182    /**
183     * Shared instance of {@link Location} used by all {@link TextToken} instances in the template.
184     */
185
186    private Location _templateLocation;
187
188    /**
189     * Location with in the resource for the current line.
190     */
191
192    private Location _currentLocation;
193
194    /**
195     * Local reference to the template data that is to be parsed.
196     */
197
198    private char[] _templateData;
199
200    /**
201     * List of Tag
202     */
203
204    private List _stack = new ArrayList();
205
206    private static class Tag
207    {
208        // The element, i.e., <jwc> or virtually any other element (via jwcid attribute)
209        String _tagName;
210
211        // If true, the tag is a placeholder for a dynamic element
212        boolean _component;
213
214        // If true, the body of the tag is being ignored, and the
215        // ignore flag is cleared when the close tag is reached
216        boolean _ignoringBody;
217
218        // If true, then the entire tag (and its body) is being ignored
219        boolean _removeTag;
220
221        // If true, then the tag must have a balanced closing tag.
222        // This is always true for components.
223        boolean _mustBalance;
224
225        // The line on which the start tag exists
226        int _line;
227
228        // If true, then the parse ends when the closing tag is found.
229        boolean _content;
230
231        Tag(String tagName, int line)
232        {
233            _tagName = tagName;
234            _line = line;
235        }
236
237        boolean match(String matchTagName)
238        {
239            return _tagName.equalsIgnoreCase(matchTagName);
240        }
241    }
242
243    /**
244     * List of {@link TemplateToken}, this forms the ultimate response.
245     */
246
247    private List _tokens = new ArrayList();
248
249    /**
250     * The location of the 'cursor' within the template data. The advance() method moves this
251     * forward.
252     */
253
254    private int _cursor;
255
256    /**
257     * The start of the current block of static text, or -1 if no block is active.
258     */
259
260    private int _blockStart;
261
262    /**
263     * The current line number; tracked by advance(). Starts at 1.
264     */
265
266    private int _line;
267
268    /**
269     * Set to true when the body of a tag is being ignored. This is typically used to skip over the
270     * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid
271     * of $remove$ is used.
272     */
273
274    private boolean _ignoring;
275
276    /**
277     * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag.
278     */
279
280    private Map _attributes = new HashMap();
281
282    /**
283     * A factory used to create template tokens.
284     */
285
286    private TemplateTokenFactory _factory;
287
288    public TemplateParser()
289    {
290        Perl5Compiler compiler = new Perl5Compiler();
291
292        try
293        {
294            _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN);
295            _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN);
296        }
297        catch (MalformedPatternException ex)
298        {
299            throw new ApplicationRuntimeException(ex);
300        }
301
302        _patternMatcher = new Perl5Matcher();
303    }
304
305    /**
306     * Parses the template data into an array of {@link TemplateToken}s.
307     * <p>
308     * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single
309     * thread accesses it.
310     * 
311     * @param templateData
312     *            the HTML template to parse. Some tokens will hold a reference to this array.
313     * @param delegate
314     *            object that "knows" about defined components
315     * @param resourceLocation
316     *            a description of where the template originated from, used with error messages.
317     */
318
319    public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate,
320            Resource resourceLocation) throws TemplateParseException
321    {
322        try
323        {
324            beforeParse(templateData, delegate, resourceLocation);
325
326            parse();
327
328            return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]);
329        }
330        finally
331        {
332            afterParse();
333        }
334    }
335
336    /**
337     * perform default initialization of the parser.
338     */
339
340    protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate,
341            Resource resourceLocation)
342    {
343        _templateData = templateData;
344        _resourceLocation = resourceLocation;
345        _templateLocation = new LocationImpl(resourceLocation);
346        _delegate = delegate;
347        _ignoring = false;
348        _line = 1;
349        _componentAttributeName = delegate.getComponentAttributeName();
350    }
351
352    /**
353     * Perform default cleanup after parsing completes.
354     */
355
356    protected void afterParse()
357    {
358        _delegate = null;
359        _templateData = null;
360        _resourceLocation = null;
361        _templateLocation = null;
362        _currentLocation = null;
363        _stack.clear();
364        _tokens.clear();
365        _attributes.clear();
366        _idAllocator.clear();
367    }
368
369    /**
370     * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem
371     * is reported.
372     * <p>
373     * The default implementation simply throws an exception that contains the message and location
374     * parameters.
375     * <p>
376     * Subclasses may override but <b>must </b> ensure they throw the required exception.
377     * 
378     * @param message
379     * @param location
380     * @param line
381     *            ignored by the default impl
382     * @param cursor
383     *            ignored by the default impl
384     * @throws TemplateParseException
385     *             always thrown in order to terminate the parse.
386     */
387
388    protected void templateParseProblem(String message, Location location, int line, int cursor)
389            throws TemplateParseException
390    {
391        throw new TemplateParseException(message, location);
392    }
393
394    /**
395     * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must
396     * </b> stop when a problem is reported.
397     * <p>
398     * The default implementation simply rethrows the exception.
399     * <p>
400     * Subclasses may override but <b>must </b> ensure they rethrow the exception.
401     * 
402     * @param exception
403     * @param line
404     *            ignored by the default impl
405     * @param cursor
406     *            ignored by the default impl
407     * @throws ApplicationRuntimeException
408     *             always rethrown in order to terminate the parse.
409     */
410
411    protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor)
412            throws ApplicationRuntimeException
413    {
414        throw exception;
415    }
416
417    /**
418     * Give subclasses access to the parse results.
419     */
420    protected List getTokens()
421    {
422        if (_tokens == null)
423            return Collections.EMPTY_LIST;
424
425        return _tokens;
426    }
427
428    /**
429     * Checks to see if the next few characters match a given pattern.
430     */
431
432    private boolean lookahead(char[] match)
433    {
434        try
435        {
436            for (int i = 0; i < match.length; i++)
437            {
438                if (_templateData[_cursor + i] != match[i])
439                    return false;
440            }
441
442            // Every character matched.
443
444            return true;
445        }
446        catch (IndexOutOfBoundsException ex)
447        {
448            return false;
449        }
450    }
451
452    private static final char[] COMMENT_START = new char[]
453    { '<', '!', '-', '-' };
454
455    private static final char[] COMMENT_END = new char[]
456    { '-', '-', '>' };
457
458    private static final char[] CLOSE_TAG = new char[]
459    { '<', '/' };
460
461    protected void parse() throws TemplateParseException
462    {
463        _cursor = 0;
464        _blockStart = -1;
465        int length = _templateData.length;
466
467        while (_cursor < length)
468        {
469            if (_templateData[_cursor] != '<')
470            {
471                if (_blockStart < 0 && !_ignoring)
472                    _blockStart = _cursor;
473
474                advance();
475                continue;
476            }
477
478            // OK, start of something.
479
480            if (lookahead(CLOSE_TAG))
481            {
482                closeTag();
483                continue;
484            }
485
486            if (lookahead(COMMENT_START))
487            {
488                skipComment();
489                continue;
490            }
491
492            // The start of some tag.
493
494            startTag();
495        }
496
497        // Usually there's some text at the end of the template (after the last closing tag) that
498        // should
499        // be added. Often the last few tags are static tags so we definately
500        // need to end the text block.
501
502        addTextToken(_templateData.length - 1);
503    }
504
505    /**
506     * Advance forward in the document until the end of the comment is reached. In addition, skip
507     * any whitespace following the comment.
508     */
509
510    private void skipComment() throws TemplateParseException
511    {
512        int length = _templateData.length;
513        int startLine = _line;
514
515        if (_blockStart < 0 && !_ignoring)
516            _blockStart = _cursor;
517
518        while (true)
519        {
520            if (_cursor >= length)
521                templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl(
522                        _resourceLocation, startLine), startLine, _cursor);
523
524            if (lookahead(COMMENT_END))
525                break;
526
527            // Not the end of the comment, advance over it.
528
529            advance();
530        }
531
532        _cursor += COMMENT_END.length;
533        advanceOverWhitespace();
534    }
535
536    private void addTextToken(int end)
537    {
538        // No active block to add to.
539
540        if (_blockStart < 0)
541            return;
542
543        if (_blockStart <= end)
544        {
545            // This seems odd, shouldn't the location be the current location? I guess
546            // no errors are ever reported for a text token.
547
548            TemplateToken token = _factory.createTextToken(
549                    _templateData,
550                    _blockStart,
551                    end,
552                    _templateLocation);
553
554            _tokens.add(token);
555        }
556
557        _blockStart = -1;
558    }
559
560    private static final int WAIT_FOR_ATTRIBUTE_NAME = 0;
561
562    private static final int COLLECT_ATTRIBUTE_NAME = 1;
563
564    private static final int ADVANCE_PAST_EQUALS = 2;
565
566    private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3;
567
568    private static final int COLLECT_QUOTED_VALUE = 4;
569
570    private static final int COLLECT_UNQUOTED_VALUE = 5;
571
572    private void startTag() throws TemplateParseException
573    {
574        int cursorStart = _cursor;
575        int length = _templateData.length;
576        String tagName = null;
577        boolean endOfTag = false;
578        boolean emptyTag = false;
579        int startLine = _line;
580        Location startLocation = new LocationImpl(_resourceLocation, startLine);
581
582        tagBeginEvent(startLine, _cursor);
583
584        advance();
585
586        // Collect the element type
587
588        while (_cursor < length)
589        {
590            char ch = _templateData[_cursor];
591
592            if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
593            {
594                tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1);
595
596                break;
597            }
598
599            advance();
600        }
601
602        String attributeName = null;
603        int attributeNameStart = -1;
604        int attributeValueStart = -1;
605        int state = WAIT_FOR_ATTRIBUTE_NAME;
606        char quoteChar = 0;
607
608        _attributes.clear();
609
610        // Collect each attribute
611
612        while (!endOfTag)
613        {
614            if (_cursor >= length)
615            {
616                String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine)
617                        : ParseMessages.unclosedTag(tagName, startLine);
618
619                templateParseProblem(message, startLocation, startLine, cursorStart);
620            }
621
622            char ch = _templateData[_cursor];
623
624            switch (state)
625            {
626                case WAIT_FOR_ATTRIBUTE_NAME:
627
628                    // Ignore whitespace before the next attribute name, while
629                    // looking for the end of the current tag.
630
631                    if (ch == '/')
632                    {
633                        emptyTag = true;
634                        advance();
635                        break;
636                    }
637
638                    if (ch == '>')
639                    {
640                        endOfTag = true;
641                        break;
642                    }
643
644                    if (Character.isWhitespace(ch))
645                    {
646                        advance();
647                        break;
648                    }
649
650                    // Found non-whitespace, assume its the attribute name.
651                    // Note: could use a check here for non-alpha.
652
653                    attributeNameStart = _cursor;
654                    state = COLLECT_ATTRIBUTE_NAME;
655                    advance();
656                    break;
657
658                case COLLECT_ATTRIBUTE_NAME:
659
660                    // Looking for end of attribute name.
661
662                    if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch))
663                    {
664                        attributeName = new String(_templateData, attributeNameStart, _cursor
665                                - attributeNameStart);
666
667                        state = ADVANCE_PAST_EQUALS;
668                        break;
669                    }
670
671                    // Part of the attribute name
672
673                    advance();
674                    break;
675
676                case ADVANCE_PAST_EQUALS:
677
678                    // Looking for the '=' sign. May hit the end of the tag, or (for bare
679                    // attributes),
680                    // the next attribute name.
681
682                    if (ch == '/' || ch == '>')
683                    {
684                        // A bare attribute, which is not interesting to
685                        // us.
686
687                        state = WAIT_FOR_ATTRIBUTE_NAME;
688                        break;
689                    }
690
691                    if (Character.isWhitespace(ch))
692                    {
693                        advance();
694                        break;
695                    }
696
697                    if (ch == '=')
698                    {
699                        state = WAIT_FOR_ATTRIBUTE_VALUE;
700                        quoteChar = 0;
701                        attributeValueStart = -1;
702                        advance();
703                        break;
704                    }
705
706                    // Otherwise, an HTML style "bare" attribute (such as <select multiple>).
707                    // We aren't interested in those (we're just looking for the id or jwcid
708                    // attribute).
709
710                    state = WAIT_FOR_ATTRIBUTE_NAME;
711                    break;
712
713                case WAIT_FOR_ATTRIBUTE_VALUE:
714
715                    if (ch == '/' || ch == '>')
716                        templateParseProblem(ParseMessages.missingAttributeValue(
717                                tagName,
718                                _line,
719                                attributeName), getCurrentLocation(), _line, _cursor);
720
721                    // Ignore whitespace between '=' and the attribute value. Also, look
722                    // for initial quote.
723
724                    if (Character.isWhitespace(ch))
725                    {
726                        advance();
727                        break;
728                    }
729
730                    if (ch == '\'' || ch == '"')
731                    {
732                        quoteChar = ch;
733
734                        state = COLLECT_QUOTED_VALUE;
735                        advance();
736                        attributeValueStart = _cursor;
737                        attributeBeginEvent(attributeName, _line, attributeValueStart);
738                        break;
739                    }
740
741                    // Not whitespace or quote, must be start of unquoted attribute.
742
743                    state = COLLECT_UNQUOTED_VALUE;
744                    attributeValueStart = _cursor;
745                    attributeBeginEvent(attributeName, _line, attributeValueStart);
746                    break;
747
748                case COLLECT_QUOTED_VALUE:
749
750                    // Start collecting the quoted attribute value. Stop at the matching quote
751                    // character,
752                    // unless bare, in which case, stop at the next whitespace.
753
754                    if (ch == quoteChar)
755                    {
756                        String attributeValue = new String(_templateData, attributeValueStart,
757                                _cursor - attributeValueStart);
758
759                        attributeEndEvent(_cursor);
760
761                        addAttributeIfUnique(tagName, attributeName, attributeValue);
762
763                        // Advance over the quote.
764                        advance();
765                        state = WAIT_FOR_ATTRIBUTE_NAME;
766                        break;
767                    }
768
769                    advance();
770                    break;
771
772                case COLLECT_UNQUOTED_VALUE:
773
774                    // An unquoted attribute value ends with whitespace
775                    // or the end of the enclosing tag.
776
777                    if (ch == '/' || ch == '>' || Character.isWhitespace(ch))
778                    {
779                        String attributeValue = new String(_templateData, attributeValueStart,
780                                _cursor - attributeValueStart);
781
782                        attributeEndEvent(_cursor);
783                        addAttributeIfUnique(tagName, attributeName, attributeValue);
784
785                        state = WAIT_FOR_ATTRIBUTE_NAME;
786                        break;
787                    }
788
789                    advance();
790                    break;
791            }
792        }
793
794        tagEndEvent(_cursor);
795
796        // Check for invisible localizations
797
798        String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes);
799        String jwcId = findValueCaselessly(_componentAttributeName, _attributes);
800
801        if (localizationKey != null && tagName.equalsIgnoreCase("span") && jwcId == null)
802        {
803            if (_ignoring)
804                templateParseProblem(
805                        ParseMessages.componentMayNotBeIgnored(tagName, startLine),
806                        startLocation,
807                        startLine,
808                        cursorStart);
809
810            // If the tag isn't empty, then create a Tag instance to ignore the
811            // body of the tag.
812
813            if (!emptyTag)
814            {
815                Tag tag = new Tag(tagName, startLine);
816
817                tag._component = false;
818                tag._removeTag = true;
819                tag._ignoringBody = true;
820                tag._mustBalance = true;
821
822                _stack.add(tag);
823
824                // Start ignoring content until the close tag.
825
826                _ignoring = true;
827            }
828            else
829            {
830                // Cursor is at the closing carat, advance over it.
831                advance();
832                // TAPESTRY-359: *don't* skip whitespace
833            }
834
835            // End any open block.
836
837            addTextToken(cursorStart - 1);
838
839            boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes);
840
841            Map attributes = filter(_attributes, new String[]
842            { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME });
843
844            TemplateToken token = _factory.createLocalizationToken(
845                    tagName,
846                    localizationKey,
847                    raw,
848                    attributes,
849                    startLocation);
850
851            _tokens.add(token);
852
853            return;
854        }
855
856        if (jwcId != null)
857        {
858            processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation);
859            return;
860        }
861
862        // A static tag (not a tag without a jwcid attribute).
863        // We need to record this so that we can match close tags later.
864
865        if (!emptyTag)
866        {
867            Tag tag = new Tag(tagName, startLine);
868            _stack.add(tag);
869        }
870
871        // If there wasn't an active block, then start one.
872
873        if (_blockStart < 0 && !_ignoring)
874            _blockStart = cursorStart;
875
876        advance();
877    }
878
879    /**
880     * @throws TemplateParseException
881     * @since 4.0
882     */
883
884    private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue)
885            throws TemplateParseException
886    {
887
888        if (_attributes.containsKey(attributeName))
889            templateParseProblem(
890                    ParseMessages.duplicateTagAttribute(tagName, _line, attributeName),
891                    getCurrentLocation(),
892                    _line,
893                    _cursor);
894
895        _attributes.put(attributeName, attributeValue);
896    }
897
898    /**
899     * Processes a tag that is the open tag for a component (but also handles the $remove$ and
900     * $content$ tags).
901     */
902
903    /**
904     * Notify that the beginning of a tag has been detected.
905     * <p>
906     * Default implementation does nothing.
907     */
908    protected void tagBeginEvent(int startLine, int cursorPosition)
909    {
910    }
911
912    /**
913     * Notify that the end of the current tag has been detected.
914     * <p>
915     * Default implementation does nothing.
916     */
917    protected void tagEndEvent(int cursorPosition)
918    {
919    }
920
921    /**
922     * Notify that the beginning of an attribute value has been detected.
923     * <p>
924     * Default implementation does nothing.
925     */
926    protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition)
927    {
928    }
929
930    /**
931     * Notify that the end of the current attribute value has been detected.
932     * <p>
933     * Default implementation does nothing.
934     */
935    protected void attributeEndEvent(int cursorPosition)
936    {
937    }
938
939    private void processComponentStart(String tagName, String jwcId, boolean emptyTag,
940            int startLine, int cursorStart, Location startLocation) throws TemplateParseException
941    {
942        if (jwcId.equalsIgnoreCase(CONTENT_ID))
943        {
944            processContentTag(tagName, startLine, cursorStart, emptyTag);
945
946            return;
947        }
948
949        boolean isRemoveId = jwcId.equalsIgnoreCase(REMOVE_ID);
950
951        if (_ignoring && !isRemoveId)
952            templateParseProblem(
953                    ParseMessages.componentMayNotBeIgnored(tagName, startLine),
954                    startLocation,
955                    startLine,
956                    cursorStart);
957
958        String type = null;
959        boolean allowBody = false;
960
961        if (_patternMatcher.matches(jwcId, _implicitIdPattern))
962        {
963            MatchResult match = _patternMatcher.getMatch();
964
965            jwcId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP);
966            type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP);
967
968            String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP);
969            String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP);
970
971            // If (and this is typical) no actual component id was specified,
972            // then generate one on the fly.
973            // The allocated id for anonymous components is
974            // based on the simple (unprefixed) type, but starts
975            // with a leading dollar sign to ensure no conflicts
976            // with user defined component ids (which don't allow dollar signs
977            // in the id).
978            // New for 4.0: the component type may included slashes ('/'), but these
979            // are not valid identifiers, so we convert them to '$'.
980
981            if (jwcId == null)
982                jwcId = _idAllocator.allocateId("$" + simpleType.replace('/', '$'));
983
984            try
985            {
986                allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation);
987            }
988            catch (ApplicationRuntimeException e)
989            {
990                // give subclasses a chance to handle and rethrow
991                templateParseProblem(e, startLine, cursorStart);
992            }
993
994        }
995        else
996        {
997            if (!isRemoveId)
998            {
999                if (!_patternMatcher.matches(jwcId, _simpleIdPattern))
1000                    templateParseProblem(
1001                            ParseMessages.componentIdInvalid(tagName, startLine, jwcId),
1002                            startLocation,
1003                            startLine,
1004                            cursorStart);
1005
1006                if (!_delegate.getKnownComponent(jwcId))
1007                    templateParseProblem(
1008                            ParseMessages.unknownComponentId(tagName, startLine, jwcId),
1009                            startLocation,
1010                            startLine,
1011                            cursorStart);
1012
1013                try
1014                {
1015                    allowBody = _delegate.getAllowBody(jwcId, startLocation);
1016                }
1017                catch (ApplicationRuntimeException e)
1018                {
1019                    // give subclasses a chance to handle and rethrow
1020                    templateParseProblem(e, startLine, cursorStart);
1021                }
1022            }
1023        }
1024
1025        // Ignore the body if we're removing the entire tag,
1026        // of if the corresponding component doesn't allow
1027        // a body.
1028
1029        boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody);
1030
1031        if (_ignoring && ignoreBody)
1032            templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl(
1033                    _resourceLocation, startLine), startLine, cursorStart);
1034
1035        if (!emptyTag)
1036            pushNewTag(tagName, startLine, isRemoveId, ignoreBody);
1037
1038        // End any open block.
1039
1040        addTextToken(cursorStart - 1);
1041
1042        if (!isRemoveId)
1043        {
1044            addOpenToken(tagName, jwcId, type, startLocation);
1045
1046            if (emptyTag)
1047                _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1048        }
1049
1050        advance();
1051    }
1052
1053    private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody)
1054    {
1055        Tag tag = new Tag(tagName, startLine);
1056
1057        tag._component = !isRemoveId;
1058        tag._removeTag = isRemoveId;
1059
1060        tag._ignoringBody = ignoreBody;
1061
1062        _ignoring = tag._ignoringBody;
1063
1064        tag._mustBalance = true;
1065
1066        _stack.add(tag);
1067    }
1068
1069    private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag)
1070            throws TemplateParseException
1071    {
1072        if (_ignoring)
1073            templateParseProblem(
1074                    ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine),
1075                    new LocationImpl(_resourceLocation, startLine),
1076                    startLine,
1077                    cursorStart);
1078
1079        if (emptyTag)
1080            templateParseProblem(
1081                    ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine),
1082                    new LocationImpl(_resourceLocation, startLine),
1083                    startLine,
1084                    cursorStart);
1085
1086        _tokens.clear();
1087        _blockStart = -1;
1088
1089        Tag tag = new Tag(tagName, startLine);
1090
1091        tag._mustBalance = true;
1092        tag._content = true;
1093
1094        _stack.clear();
1095        _stack.add(tag);
1096
1097        advance();
1098    }
1099
1100    private void addOpenToken(String tagName, String jwcId, String type, Location location)
1101    {
1102        OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location);
1103        _tokens.add(token);
1104
1105        if (_attributes.isEmpty())
1106            return;
1107
1108        Iterator i = _attributes.entrySet().iterator();
1109        while (i.hasNext())
1110        {
1111            Map.Entry entry = (Map.Entry) i.next();
1112
1113            String key = (String) entry.getKey();
1114
1115            if (key.equalsIgnoreCase(_componentAttributeName))
1116                continue;
1117
1118            String value = (String) entry.getValue();
1119
1120            addAttributeToToken(token, key, value);
1121        }
1122    }
1123
1124    /**
1125     * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream).
1126     * 
1127     * @since 3.0
1128     */
1129
1130    private void addAttributeToToken(OpenToken token, String name, String attributeValue)
1131    {
1132        token.addAttribute(name, convertEntitiesToPlain(attributeValue));
1133    }
1134
1135    /**
1136     * Invoked to handle a closing tag, i.e., &lt;/foo&gt;. When a tag closes, it will match against
1137     * a tag on the open tag start. Preferably the top tag on the stack (if everything is well
1138     * balanced), but this is HTML, not XML, so many tags won't balance.
1139     * <p>
1140     * Once the matching tag is located, the question is ... is the tag dynamic or static? If
1141     * static, then the current text block is extended to include this close tag. If dynamic, then
1142     * the current text block is ended (before the '&lt;' that starts the tag) and a close token is
1143     * added.
1144     * <p>
1145     * In either case, the matching static element and anything above it is removed, and the cursor
1146     * is left on the character following the '&gt;'.
1147     */
1148
1149    private void closeTag() throws TemplateParseException
1150    {
1151        int cursorStart = _cursor;
1152        int length = _templateData.length;
1153        int startLine = _line;
1154
1155        Location startLocation = getCurrentLocation();
1156
1157        _cursor += CLOSE_TAG.length;
1158
1159        int tagStart = _cursor;
1160
1161        while (true)
1162        {
1163            if (_cursor >= length)
1164                templateParseProblem(
1165                        ParseMessages.incompleteCloseTag(startLine),
1166                        startLocation,
1167                        startLine,
1168                        cursorStart);
1169
1170            char ch = _templateData[_cursor];
1171
1172            if (ch == '>')
1173                break;
1174
1175            advance();
1176        }
1177
1178        String tagName = new String(_templateData, tagStart, _cursor - tagStart);
1179
1180        int stackPos = _stack.size() - 1;
1181        Tag tag = null;
1182
1183        while (stackPos >= 0)
1184        {
1185            tag = (Tag) _stack.get(stackPos);
1186
1187            if (tag.match(tagName))
1188                break;
1189
1190            if (tag._mustBalance)
1191                templateParseProblem(ParseMessages.improperlyNestedCloseTag(
1192                        tagName,
1193                        startLine,
1194                        tag._tagName,
1195                        tag._line), startLocation, startLine, cursorStart);
1196
1197            stackPos--;
1198        }
1199
1200        if (stackPos < 0)
1201            templateParseProblem(
1202                    ParseMessages.unmatchedCloseTag(tagName, startLine),
1203                    startLocation,
1204                    startLine,
1205                    cursorStart);
1206
1207        // Special case for the content tag
1208
1209        if (tag._content)
1210        {
1211            addTextToken(cursorStart - 1);
1212
1213            // Advance the cursor right to the end.
1214
1215            _cursor = length;
1216            _stack.clear();
1217            return;
1218        }
1219
1220        // When a component closes, add a CLOSE tag.
1221        if (tag._component)
1222        {
1223            addTextToken(cursorStart - 1);
1224
1225            _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation()));
1226        }
1227        else
1228        {
1229            // The close of a static tag. Unless removing the tag
1230            // entirely, make sure the block tag is part of a text block.
1231
1232            if (_blockStart < 0 && !tag._removeTag && !_ignoring)
1233                _blockStart = cursorStart;
1234        }
1235
1236        // Remove all elements at stackPos or above.
1237
1238        for (int i = _stack.size() - 1; i >= stackPos; i--)
1239            _stack.remove(i);
1240
1241        // Advance cursor past '>'
1242
1243        advance();
1244
1245        // If editting out the tag (i.e., $remove$) then kill any whitespace.
1246        // For components that simply don't contain a body, removeTag will
1247        // be false.
1248
1249        if (tag._removeTag)
1250            advanceOverWhitespace();
1251
1252        // If we were ignoring the body of the tag, then clear the ignoring
1253        // flag, since we're out of the body.
1254
1255        if (tag._ignoringBody)
1256            _ignoring = false;
1257    }
1258
1259    /**
1260     * Advances the cursor to the next character. If the end-of-line is reached, then increments the
1261     * line counter.
1262     */
1263
1264    private void advance()
1265    {
1266        int length = _templateData.length;
1267
1268        if (_cursor >= length)
1269            return;
1270
1271        char ch = _templateData[_cursor];
1272
1273        _cursor++;
1274
1275        if (ch == '\n')
1276        {
1277            _line++;
1278            _currentLocation = null;
1279            return;
1280        }
1281
1282        // A \r, or a \r\n also counts as a new line.
1283
1284        if (ch == '\r')
1285        {
1286            _line++;
1287            _currentLocation = null;
1288
1289            if (_cursor < length && _templateData[_cursor] == '\n')
1290                _cursor++;
1291
1292            return;
1293        }
1294
1295        // Not an end-of-line character.
1296
1297    }
1298
1299    private void advanceOverWhitespace()
1300    {
1301        int length = _templateData.length;
1302
1303        while (_cursor < length)
1304        {
1305            char ch = _templateData[_cursor];
1306            if (!Character.isWhitespace(ch))
1307                return;
1308
1309            advance();
1310        }
1311    }
1312
1313    /**
1314     * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list
1315     * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded
1316     * from the output map. May return null (rather than return an empty Map).
1317     */
1318
1319    private Map filter(Map input, String[] removeKeys)
1320    {
1321        if (input == null || input.isEmpty())
1322            return null;
1323
1324        Map result = null;
1325
1326        Iterator i = input.entrySet().iterator();
1327
1328        nextkey: while (i.hasNext())
1329        {
1330            Map.Entry entry = (Map.Entry) i.next();
1331
1332            String key = (String) entry.getKey();
1333
1334            for (int j = 0; j < removeKeys.length; j++)
1335            {
1336                if (key.equalsIgnoreCase(removeKeys[j]))
1337                    continue nextkey;
1338            }
1339
1340            if (result == null)
1341                result = new HashMap(input.size());
1342
1343            result.put(key, entry.getValue());
1344        }
1345
1346        return result;
1347    }
1348
1349    /**
1350     * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys
1351     * and values. Returns the value for the first key found that matches (caselessly) the input
1352     * key. Returns null if no value found.
1353     */
1354
1355    protected String findValueCaselessly(String key, Map map)
1356    {
1357        String result = (String) map.get(key);
1358
1359        if (result != null)
1360            return result;
1361
1362        Iterator i = map.entrySet().iterator();
1363        while (i.hasNext())
1364        {
1365            Map.Entry entry = (Map.Entry) i.next();
1366
1367            String entryKey = (String) entry.getKey();
1368
1369            if (entryKey.equalsIgnoreCase(key))
1370                return (String) entry.getValue();
1371        }
1372
1373        return null;
1374    }
1375
1376    /**
1377     * Conversions needed by {@link #convertEntitiesToPlain(String)}
1378     */
1379
1380    private static final String[] CONVERSIONS =
1381    { "&lt;", "<", "&gt;", ">", "&quot;", "\"", "&amp;", "&" };
1382
1383    /**
1384     * Provided a raw input string that has been recognized to be an expression, this removes excess
1385     * white space and converts &amp;amp;;, &amp;quot;; &amp;lt;; and &amp;gt;; to their normal
1386     * character values (otherwise its impossible to specify those values in expressions in the
1387     * template).
1388     */
1389
1390    private String convertEntitiesToPlain(String input)
1391    {
1392        int inputLength = input.length();
1393
1394        StringBuffer buffer = new StringBuffer(inputLength);
1395
1396        int cursor = 0;
1397
1398        outer: while (cursor < inputLength)
1399        {
1400            for (int i = 0; i < CONVERSIONS.length; i += 2)
1401            {
1402                String entity = CONVERSIONS[i];
1403                int entityLength = entity.length();
1404                String value = CONVERSIONS[i + 1];
1405
1406                if (cursor + entityLength > inputLength)
1407                    continue;
1408
1409                if (input.substring(cursor, cursor + entityLength).equals(entity))
1410                {
1411                    buffer.append(value);
1412                    cursor += entityLength;
1413                    continue outer;
1414                }
1415            }
1416
1417            buffer.append(input.charAt(cursor));
1418            cursor++;
1419        }
1420
1421        return buffer.toString().trim();
1422    }
1423
1424    /**
1425     * Returns true if the map contains the given key (caseless search) and the value is "true"
1426     * (caseless comparison).
1427     */
1428
1429    private boolean checkBoolean(String key, Map map)
1430    {
1431        String value = findValueCaselessly(key, map);
1432
1433        if (value == null)
1434            return false;
1435
1436        return value.equalsIgnoreCase("true");
1437    }
1438
1439    /**
1440     * Gets the current location within the file. This allows the location to be created only as
1441     * needed, and multiple objects on the same line can share the same Location instance.
1442     * 
1443     * @since 3.0
1444     */
1445
1446    protected Location getCurrentLocation()
1447    {
1448        if (_currentLocation == null)
1449            _currentLocation = new LocationImpl(_resourceLocation, _line);
1450
1451        return _currentLocation;
1452    }
1453
1454    public void setFactory(TemplateTokenFactory factory)
1455    {
1456        _factory = factory;
1457    }
1458
1459}