001 // Copyright 2004, 2005 The Apache Software Foundation 002 // 003 // Licensed under the Apache License, Version 2.0 (the "License"); 004 // you may not use this file except in compliance with the License. 005 // You may obtain a copy of the License at 006 // 007 // http://www.apache.org/licenses/LICENSE-2.0 008 // 009 // Unless required by applicable law or agreed to in writing, software 010 // distributed under the License is distributed on an "AS IS" BASIS, 011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012 // See the License for the specific language governing permissions and 013 // limitations under the License. 014 015 package org.apache.tapestry.parse; 016 017 import java.util.ArrayList; 018 import java.util.Collections; 019 import java.util.HashMap; 020 import java.util.Iterator; 021 import java.util.List; 022 import java.util.Map; 023 024 import org.apache.hivemind.ApplicationRuntimeException; 025 import org.apache.hivemind.Location; 026 import org.apache.hivemind.Resource; 027 import org.apache.hivemind.impl.LocationImpl; 028 import org.apache.oro.text.regex.MalformedPatternException; 029 import org.apache.oro.text.regex.MatchResult; 030 import org.apache.oro.text.regex.Pattern; 031 import org.apache.oro.text.regex.PatternMatcher; 032 import org.apache.oro.text.regex.Perl5Compiler; 033 import org.apache.oro.text.regex.Perl5Matcher; 034 import org.apache.tapestry.util.IdAllocator; 035 036 /** 037 * Parses Tapestry templates, breaking them into a series of 038 * {@link org.apache.tapestry.parse.TemplateToken tokens}. Although often referred to as an "HTML 039 * template", there is no real requirement that the template be HTML. This parser can handle any 040 * reasonable SGML derived markup (including XML), but specifically works around the ambiguities of 041 * HTML reasonably. 042 * <p> 043 * Deployed as the tapestry.parse.TemplateParser service, using the threaded model. 044 * <p> 045 * Dynamic markup in Tapestry attempts to be invisible. Components are arbitrary tags containing a 046 * <code>jwcid</code> attribute. Such components must be well balanced (have a matching close tag, 047 * or end the tag with "<code>/></code>". 048 * <p> 049 * Generally, the id specified in the template is matched against an component defined in the 050 * specification. However, implicit components are also possible. The jwcid attribute uses the 051 * syntax "<code>@Type</code>" for implicit components. Type is the component type, and may include a library id 052 * prefix. Such a component is anonymous (but is given a unique id). 053 * <p> 054 * (The unique ids assigned start with a dollar sign, which is normally no allowed for 055 * component ids ... this helps to make them stand out and assures that they do not conflict 056 * with user-defined component ids. These ids tend to propagate into URLs and become HTML 057 * element names and even JavaScript variable names ... the dollar sign is acceptible in these 058 * contexts as well). 059 * <p> 060 * Implicit component may also be given a name using the syntax " 061 * <code>componentId:@Type</code>". Such a component should <b>not </b> be defined in the 062 * specification, but may still be accessed via 063 * {@link org.apache.tapestry.IComponent#getComponent(String)}. 064 * <p> 065 * Both defined and implicit components may have additional attributes defined, simply by 066 * including them in the template. They set formal or informal parameters of the component to 067 * static strings. 068 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowInformalParameters()}, if 069 * false, will cause such attributes to be simply ignored. For defined components, conflicting 070 * values defined in the template are ignored. 071 * <p> 072 * Attributes in component tags will become formal and informal parameters of the 073 * corresponding component. Most attributes will be 074 * <p> 075 * The parser removes the body of some tags (when the corresponding component doesn't 076 * {@link org.apache.tapestry.spec.IComponentSpecification#getAllowBody() allow a body}, and 077 * allows portions of the template to be completely removed. 078 * <p> 079 * The parser does a pretty thorough lexical analysis of the template, and reports a great 080 * number of errors, including improper nesting of tags. 081 * <p> 082 * The parser supports <em>invisible localization</em>: The parser recognizes HTML of the 083 * form: <code><span key="<i>value</i>"> ... </span></code> and converts them 084 * into a {@link TokenType#LOCALIZATION} token. You may also specifify a <code>raw</code> 085 * attribute ... if the value is <code>true</code>, then the localized value is sent to the 086 * client without filtering, which is appropriate if the value has any markup that should not 087 * be escaped. 088 * @author Howard Lewis Ship, Geoff Longman 089 */ 090 091 public class TemplateParser implements ITemplateParser 092 { 093 /** 094 * A "magic" component id that causes the tag with the id and its entire body to be ignored 095 * during parsing. 096 */ 097 098 private static final String REMOVE_ID = "$remove$"; 099 100 /** 101 * A "magic" component id that causes the tag to represent the true content of the template. Any 102 * content prior to the tag is discarded, and any content after the tag is ignored. The tag 103 * itself is not included. 104 */ 105 106 private static final String CONTENT_ID = "$content$"; 107 108 /** 109 * The attribute, checked for in <span> tags, that signfies that the span is being used as 110 * an invisible localization. 111 * 112 * @since 2.0.4 113 */ 114 115 public static final String LOCALIZATION_KEY_ATTRIBUTE_NAME = "key"; 116 117 /** 118 * Used with {@link #LOCALIZATION_KEY_ATTRIBUTE_NAME} to indicate a string that should be 119 * rendered "raw" (without escaping HTML). If not specified, defaults to "false". The value must 120 * equal "true" (caselessly). 121 * 122 * @since 2.3 123 */ 124 125 public static final String RAW_ATTRIBUTE_NAME = "raw"; 126 127 /** 128 * Attribute name used to identify components. 129 * 130 * @since 4.0 131 */ 132 133 private String _componentAttributeName; 134 135 private static final String PROPERTY_NAME_PATTERN = "_?[a-zA-Z]\\w*"; 136 137 /** 138 * Pattern used to recognize ordinary components (defined in the specification). 139 * 140 * @since 3.0 141 */ 142 143 public static final String SIMPLE_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")$"; 144 145 /** 146 * Pattern used to recognize implicit components (whose type is defined in the template). 147 * Subgroup 1 is the id (which may be null) and subgroup 2 is the type (which may be qualified 148 * with a library prefix). Subgroup 4 is the library id, Subgroup 5 is the simple component 149 * type, which may (as of 4.0) have slashes to delinate folders containing the component. 150 * 151 * @since 3.0 152 */ 153 154 public static final String IMPLICIT_ID_PATTERN = "^(" + PROPERTY_NAME_PATTERN + ")?@(((" 155 + PROPERTY_NAME_PATTERN + "):)?((" + PROPERTY_NAME_PATTERN + "/)*" 156 + PROPERTY_NAME_PATTERN + "))$"; 157 158 private static final int IMPLICIT_ID_PATTERN_ID_GROUP = 1; 159 160 private static final int IMPLICIT_ID_PATTERN_TYPE_GROUP = 2; 161 162 private static final int IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP = 4; 163 164 private static final int IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP = 5; 165 166 private Pattern _simpleIdPattern; 167 168 private Pattern _implicitIdPattern; 169 170 private PatternMatcher _patternMatcher; 171 172 private IdAllocator _idAllocator = new IdAllocator(); 173 174 private ITemplateParserDelegate _delegate; 175 176 /** 177 * Identifies the template being parsed; used with error messages. 178 */ 179 180 private Resource _resourceLocation; 181 182 /** 183 * Shared instance of {@link Location} used by all {@link TextToken} instances in the template. 184 */ 185 186 private Location _templateLocation; 187 188 /** 189 * Location with in the resource for the current line. 190 */ 191 192 private Location _currentLocation; 193 194 /** 195 * Local reference to the template data that is to be parsed. 196 */ 197 198 private char[] _templateData; 199 200 /** 201 * List of Tag 202 */ 203 204 private List _stack = new ArrayList(); 205 206 private static class Tag 207 { 208 // The element, i.e., <jwc> or virtually any other element (via jwcid attribute) 209 String _tagName; 210 211 // If true, the tag is a placeholder for a dynamic element 212 boolean _component; 213 214 // If true, the body of the tag is being ignored, and the 215 // ignore flag is cleared when the close tag is reached 216 boolean _ignoringBody; 217 218 // If true, then the entire tag (and its body) is being ignored 219 boolean _removeTag; 220 221 // If true, then the tag must have a balanced closing tag. 222 // This is always true for components. 223 boolean _mustBalance; 224 225 // The line on which the start tag exists 226 int _line; 227 228 // If true, then the parse ends when the closing tag is found. 229 boolean _content; 230 231 Tag(String tagName, int line) 232 { 233 _tagName = tagName; 234 _line = line; 235 } 236 237 boolean match(String matchTagName) 238 { 239 return _tagName.equalsIgnoreCase(matchTagName); 240 } 241 } 242 243 /** 244 * List of {@link TemplateToken}, this forms the ultimate response. 245 */ 246 247 private List _tokens = new ArrayList(); 248 249 /** 250 * The location of the 'cursor' within the template data. The advance() method moves this 251 * forward. 252 */ 253 254 private int _cursor; 255 256 /** 257 * The start of the current block of static text, or -1 if no block is active. 258 */ 259 260 private int _blockStart; 261 262 /** 263 * The current line number; tracked by advance(). Starts at 1. 264 */ 265 266 private int _line; 267 268 /** 269 * Set to true when the body of a tag is being ignored. This is typically used to skip over the 270 * body of a tag when its corresponding component doesn't allow a body, or whe the special jwcid 271 * of $remove$ is used. 272 */ 273 274 private boolean _ignoring; 275 276 /** 277 * A {@link Map}of {@link String}s, used to store attributes collected while parsing a tag. 278 */ 279 280 private Map _attributes = new HashMap(); 281 282 /** 283 * A factory used to create template tokens. 284 */ 285 286 private TemplateTokenFactory _factory; 287 288 public TemplateParser() 289 { 290 Perl5Compiler compiler = new Perl5Compiler(); 291 292 try 293 { 294 _simpleIdPattern = compiler.compile(SIMPLE_ID_PATTERN); 295 _implicitIdPattern = compiler.compile(IMPLICIT_ID_PATTERN); 296 } 297 catch (MalformedPatternException ex) 298 { 299 throw new ApplicationRuntimeException(ex); 300 } 301 302 _patternMatcher = new Perl5Matcher(); 303 } 304 305 /** 306 * Parses the template data into an array of {@link TemplateToken}s. 307 * <p> 308 * The parser is <i>decidedly </i> not threadsafe, so care should be taken that only a single 309 * thread accesses it. 310 * 311 * @param templateData 312 * the HTML template to parse. Some tokens will hold a reference to this array. 313 * @param delegate 314 * object that "knows" about defined components 315 * @param resourceLocation 316 * a description of where the template originated from, used with error messages. 317 */ 318 319 public TemplateToken[] parse(char[] templateData, ITemplateParserDelegate delegate, 320 Resource resourceLocation) throws TemplateParseException 321 { 322 try 323 { 324 beforeParse(templateData, delegate, resourceLocation); 325 326 parse(); 327 328 return (TemplateToken[]) _tokens.toArray(new TemplateToken[_tokens.size()]); 329 } 330 finally 331 { 332 afterParse(); 333 } 334 } 335 336 /** 337 * perform default initialization of the parser. 338 */ 339 340 protected void beforeParse(char[] templateData, ITemplateParserDelegate delegate, 341 Resource resourceLocation) 342 { 343 _templateData = templateData; 344 _resourceLocation = resourceLocation; 345 _templateLocation = new LocationImpl(resourceLocation); 346 _delegate = delegate; 347 _ignoring = false; 348 _line = 1; 349 _componentAttributeName = delegate.getComponentAttributeName(); 350 } 351 352 /** 353 * Perform default cleanup after parsing completes. 354 */ 355 356 protected void afterParse() 357 { 358 _delegate = null; 359 _templateData = null; 360 _resourceLocation = null; 361 _templateLocation = null; 362 _currentLocation = null; 363 _stack.clear(); 364 _tokens.clear(); 365 _attributes.clear(); 366 _idAllocator.clear(); 367 } 368 369 /** 370 * Used by the parser to report problems in the parse. Parsing <b>must </b> stop when a problem 371 * is reported. 372 * <p> 373 * The default implementation simply throws an exception that contains the message and location 374 * parameters. 375 * <p> 376 * Subclasses may override but <b>must </b> ensure they throw the required exception. 377 * 378 * @param message 379 * @param location 380 * @param line 381 * ignored by the default impl 382 * @param cursor 383 * ignored by the default impl 384 * @throws TemplateParseException 385 * always thrown in order to terminate the parse. 386 */ 387 388 protected void templateParseProblem(String message, Location location, int line, int cursor) 389 throws TemplateParseException 390 { 391 throw new TemplateParseException(message, location); 392 } 393 394 /** 395 * Used by the parser to report tapestry runtime specific problems in the parse. Parsing <b>must 396 * </b> stop when a problem is reported. 397 * <p> 398 * The default implementation simply rethrows the exception. 399 * <p> 400 * Subclasses may override but <b>must </b> ensure they rethrow the exception. 401 * 402 * @param exception 403 * @param line 404 * ignored by the default impl 405 * @param cursor 406 * ignored by the default impl 407 * @throws ApplicationRuntimeException 408 * always rethrown in order to terminate the parse. 409 */ 410 411 protected void templateParseProblem(ApplicationRuntimeException exception, int line, int cursor) 412 throws ApplicationRuntimeException 413 { 414 throw exception; 415 } 416 417 /** 418 * Give subclasses access to the parse results. 419 */ 420 protected List getTokens() 421 { 422 if (_tokens == null) 423 return Collections.EMPTY_LIST; 424 425 return _tokens; 426 } 427 428 /** 429 * Checks to see if the next few characters match a given pattern. 430 */ 431 432 private boolean lookahead(char[] match) 433 { 434 try 435 { 436 for (int i = 0; i < match.length; i++) 437 { 438 if (_templateData[_cursor + i] != match[i]) 439 return false; 440 } 441 442 // Every character matched. 443 444 return true; 445 } 446 catch (IndexOutOfBoundsException ex) 447 { 448 return false; 449 } 450 } 451 452 private static final char[] COMMENT_START = new char[] 453 { '<', '!', '-', '-' }; 454 455 private static final char[] COMMENT_END = new char[] 456 { '-', '-', '>' }; 457 458 private static final char[] CLOSE_TAG = new char[] 459 { '<', '/' }; 460 461 protected void parse() throws TemplateParseException 462 { 463 _cursor = 0; 464 _blockStart = -1; 465 int length = _templateData.length; 466 467 while (_cursor < length) 468 { 469 if (_templateData[_cursor] != '<') 470 { 471 if (_blockStart < 0 && !_ignoring) 472 _blockStart = _cursor; 473 474 advance(); 475 continue; 476 } 477 478 // OK, start of something. 479 480 if (lookahead(CLOSE_TAG)) 481 { 482 closeTag(); 483 continue; 484 } 485 486 if (lookahead(COMMENT_START)) 487 { 488 skipComment(); 489 continue; 490 } 491 492 // The start of some tag. 493 494 startTag(); 495 } 496 497 // Usually there's some text at the end of the template (after the last closing tag) that 498 // should 499 // be added. Often the last few tags are static tags so we definately 500 // need to end the text block. 501 502 addTextToken(_templateData.length - 1); 503 } 504 505 /** 506 * Advance forward in the document until the end of the comment is reached. In addition, skip 507 * any whitespace following the comment. 508 */ 509 510 private void skipComment() throws TemplateParseException 511 { 512 int length = _templateData.length; 513 int startLine = _line; 514 515 if (_blockStart < 0 && !_ignoring) 516 _blockStart = _cursor; 517 518 while (true) 519 { 520 if (_cursor >= length) 521 templateParseProblem(ParseMessages.commentNotEnded(startLine), new LocationImpl( 522 _resourceLocation, startLine), startLine, _cursor); 523 524 if (lookahead(COMMENT_END)) 525 break; 526 527 // Not the end of the comment, advance over it. 528 529 advance(); 530 } 531 532 _cursor += COMMENT_END.length; 533 advanceOverWhitespace(); 534 } 535 536 private void addTextToken(int end) 537 { 538 // No active block to add to. 539 540 if (_blockStart < 0) 541 return; 542 543 if (_blockStart <= end) 544 { 545 // This seems odd, shouldn't the location be the current location? I guess 546 // no errors are ever reported for a text token. 547 548 TemplateToken token = _factory.createTextToken( 549 _templateData, 550 _blockStart, 551 end, 552 _templateLocation); 553 554 _tokens.add(token); 555 } 556 557 _blockStart = -1; 558 } 559 560 private static final int WAIT_FOR_ATTRIBUTE_NAME = 0; 561 562 private static final int COLLECT_ATTRIBUTE_NAME = 1; 563 564 private static final int ADVANCE_PAST_EQUALS = 2; 565 566 private static final int WAIT_FOR_ATTRIBUTE_VALUE = 3; 567 568 private static final int COLLECT_QUOTED_VALUE = 4; 569 570 private static final int COLLECT_UNQUOTED_VALUE = 5; 571 572 private void startTag() throws TemplateParseException 573 { 574 int cursorStart = _cursor; 575 int length = _templateData.length; 576 String tagName = null; 577 boolean endOfTag = false; 578 boolean emptyTag = false; 579 int startLine = _line; 580 Location startLocation = new LocationImpl(_resourceLocation, startLine); 581 582 tagBeginEvent(startLine, _cursor); 583 584 advance(); 585 586 // Collect the element type 587 588 while (_cursor < length) 589 { 590 char ch = _templateData[_cursor]; 591 592 if (ch == '/' || ch == '>' || Character.isWhitespace(ch)) 593 { 594 tagName = new String(_templateData, cursorStart + 1, _cursor - cursorStart - 1); 595 596 break; 597 } 598 599 advance(); 600 } 601 602 String attributeName = null; 603 int attributeNameStart = -1; 604 int attributeValueStart = -1; 605 int state = WAIT_FOR_ATTRIBUTE_NAME; 606 char quoteChar = 0; 607 608 _attributes.clear(); 609 610 // Collect each attribute 611 612 while (!endOfTag) 613 { 614 if (_cursor >= length) 615 { 616 String message = (tagName == null) ? ParseMessages.unclosedUnknownTag(startLine) 617 : ParseMessages.unclosedTag(tagName, startLine); 618 619 templateParseProblem(message, startLocation, startLine, cursorStart); 620 } 621 622 char ch = _templateData[_cursor]; 623 624 switch (state) 625 { 626 case WAIT_FOR_ATTRIBUTE_NAME: 627 628 // Ignore whitespace before the next attribute name, while 629 // looking for the end of the current tag. 630 631 if (ch == '/') 632 { 633 emptyTag = true; 634 advance(); 635 break; 636 } 637 638 if (ch == '>') 639 { 640 endOfTag = true; 641 break; 642 } 643 644 if (Character.isWhitespace(ch)) 645 { 646 advance(); 647 break; 648 } 649 650 // Found non-whitespace, assume its the attribute name. 651 // Note: could use a check here for non-alpha. 652 653 attributeNameStart = _cursor; 654 state = COLLECT_ATTRIBUTE_NAME; 655 advance(); 656 break; 657 658 case COLLECT_ATTRIBUTE_NAME: 659 660 // Looking for end of attribute name. 661 662 if (ch == '=' || ch == '/' || ch == '>' || Character.isWhitespace(ch)) 663 { 664 attributeName = new String(_templateData, attributeNameStart, _cursor 665 - attributeNameStart); 666 667 state = ADVANCE_PAST_EQUALS; 668 break; 669 } 670 671 // Part of the attribute name 672 673 advance(); 674 break; 675 676 case ADVANCE_PAST_EQUALS: 677 678 // Looking for the '=' sign. May hit the end of the tag, or (for bare 679 // attributes), 680 // the next attribute name. 681 682 if (ch == '/' || ch == '>') 683 { 684 // A bare attribute, which is not interesting to 685 // us. 686 687 state = WAIT_FOR_ATTRIBUTE_NAME; 688 break; 689 } 690 691 if (Character.isWhitespace(ch)) 692 { 693 advance(); 694 break; 695 } 696 697 if (ch == '=') 698 { 699 state = WAIT_FOR_ATTRIBUTE_VALUE; 700 quoteChar = 0; 701 attributeValueStart = -1; 702 advance(); 703 break; 704 } 705 706 // Otherwise, an HTML style "bare" attribute (such as <select multiple>). 707 // We aren't interested in those (we're just looking for the id or jwcid 708 // attribute). 709 710 state = WAIT_FOR_ATTRIBUTE_NAME; 711 break; 712 713 case WAIT_FOR_ATTRIBUTE_VALUE: 714 715 if (ch == '/' || ch == '>') 716 templateParseProblem(ParseMessages.missingAttributeValue( 717 tagName, 718 _line, 719 attributeName), getCurrentLocation(), _line, _cursor); 720 721 // Ignore whitespace between '=' and the attribute value. Also, look 722 // for initial quote. 723 724 if (Character.isWhitespace(ch)) 725 { 726 advance(); 727 break; 728 } 729 730 if (ch == '\'' || ch == '"') 731 { 732 quoteChar = ch; 733 734 state = COLLECT_QUOTED_VALUE; 735 advance(); 736 attributeValueStart = _cursor; 737 attributeBeginEvent(attributeName, _line, attributeValueStart); 738 break; 739 } 740 741 // Not whitespace or quote, must be start of unquoted attribute. 742 743 state = COLLECT_UNQUOTED_VALUE; 744 attributeValueStart = _cursor; 745 attributeBeginEvent(attributeName, _line, attributeValueStart); 746 break; 747 748 case COLLECT_QUOTED_VALUE: 749 750 // Start collecting the quoted attribute value. Stop at the matching quote 751 // character, 752 // unless bare, in which case, stop at the next whitespace. 753 754 if (ch == quoteChar) 755 { 756 String attributeValue = new String(_templateData, attributeValueStart, 757 _cursor - attributeValueStart); 758 759 attributeEndEvent(_cursor); 760 761 addAttributeIfUnique(tagName, attributeName, attributeValue); 762 763 // Advance over the quote. 764 advance(); 765 state = WAIT_FOR_ATTRIBUTE_NAME; 766 break; 767 } 768 769 advance(); 770 break; 771 772 case COLLECT_UNQUOTED_VALUE: 773 774 // An unquoted attribute value ends with whitespace 775 // or the end of the enclosing tag. 776 777 if (ch == '/' || ch == '>' || Character.isWhitespace(ch)) 778 { 779 String attributeValue = new String(_templateData, attributeValueStart, 780 _cursor - attributeValueStart); 781 782 attributeEndEvent(_cursor); 783 addAttributeIfUnique(tagName, attributeName, attributeValue); 784 785 state = WAIT_FOR_ATTRIBUTE_NAME; 786 break; 787 } 788 789 advance(); 790 break; 791 } 792 } 793 794 tagEndEvent(_cursor); 795 796 // Check for invisible localizations 797 798 String localizationKey = findValueCaselessly(LOCALIZATION_KEY_ATTRIBUTE_NAME, _attributes); 799 String jwcId = findValueCaselessly(_componentAttributeName, _attributes); 800 801 if (localizationKey != null && tagName.equalsIgnoreCase("span") && jwcId == null) 802 { 803 if (_ignoring) 804 templateParseProblem( 805 ParseMessages.componentMayNotBeIgnored(tagName, startLine), 806 startLocation, 807 startLine, 808 cursorStart); 809 810 // If the tag isn't empty, then create a Tag instance to ignore the 811 // body of the tag. 812 813 if (!emptyTag) 814 { 815 Tag tag = new Tag(tagName, startLine); 816 817 tag._component = false; 818 tag._removeTag = true; 819 tag._ignoringBody = true; 820 tag._mustBalance = true; 821 822 _stack.add(tag); 823 824 // Start ignoring content until the close tag. 825 826 _ignoring = true; 827 } 828 else 829 { 830 // Cursor is at the closing carat, advance over it. 831 advance(); 832 // TAPESTRY-359: *don't* skip whitespace 833 } 834 835 // End any open block. 836 837 addTextToken(cursorStart - 1); 838 839 boolean raw = checkBoolean(RAW_ATTRIBUTE_NAME, _attributes); 840 841 Map attributes = filter(_attributes, new String[] 842 { LOCALIZATION_KEY_ATTRIBUTE_NAME, RAW_ATTRIBUTE_NAME }); 843 844 TemplateToken token = _factory.createLocalizationToken( 845 tagName, 846 localizationKey, 847 raw, 848 attributes, 849 startLocation); 850 851 _tokens.add(token); 852 853 return; 854 } 855 856 if (jwcId != null) 857 { 858 processComponentStart(tagName, jwcId, emptyTag, startLine, cursorStart, startLocation); 859 return; 860 } 861 862 // A static tag (not a tag without a jwcid attribute). 863 // We need to record this so that we can match close tags later. 864 865 if (!emptyTag) 866 { 867 Tag tag = new Tag(tagName, startLine); 868 _stack.add(tag); 869 } 870 871 // If there wasn't an active block, then start one. 872 873 if (_blockStart < 0 && !_ignoring) 874 _blockStart = cursorStart; 875 876 advance(); 877 } 878 879 /** 880 * @throws TemplateParseException 881 * @since 4.0 882 */ 883 884 private void addAttributeIfUnique(String tagName, String attributeName, String attributeValue) 885 throws TemplateParseException 886 { 887 888 if (_attributes.containsKey(attributeName)) 889 templateParseProblem( 890 ParseMessages.duplicateTagAttribute(tagName, _line, attributeName), 891 getCurrentLocation(), 892 _line, 893 _cursor); 894 895 _attributes.put(attributeName, attributeValue); 896 } 897 898 /** 899 * Processes a tag that is the open tag for a component (but also handles the $remove$ and 900 * $content$ tags). 901 */ 902 903 /** 904 * Notify that the beginning of a tag has been detected. 905 * <p> 906 * Default implementation does nothing. 907 */ 908 protected void tagBeginEvent(int startLine, int cursorPosition) 909 { 910 } 911 912 /** 913 * Notify that the end of the current tag has been detected. 914 * <p> 915 * Default implementation does nothing. 916 */ 917 protected void tagEndEvent(int cursorPosition) 918 { 919 } 920 921 /** 922 * Notify that the beginning of an attribute value has been detected. 923 * <p> 924 * Default implementation does nothing. 925 */ 926 protected void attributeBeginEvent(String attributeName, int startLine, int cursorPosition) 927 { 928 } 929 930 /** 931 * Notify that the end of the current attribute value has been detected. 932 * <p> 933 * Default implementation does nothing. 934 */ 935 protected void attributeEndEvent(int cursorPosition) 936 { 937 } 938 939 private void processComponentStart(String tagName, String jwcId, boolean emptyTag, 940 int startLine, int cursorStart, Location startLocation) throws TemplateParseException 941 { 942 if (jwcId.equalsIgnoreCase(CONTENT_ID)) 943 { 944 processContentTag(tagName, startLine, cursorStart, emptyTag); 945 946 return; 947 } 948 949 boolean isRemoveId = jwcId.equalsIgnoreCase(REMOVE_ID); 950 951 if (_ignoring && !isRemoveId) 952 templateParseProblem( 953 ParseMessages.componentMayNotBeIgnored(tagName, startLine), 954 startLocation, 955 startLine, 956 cursorStart); 957 958 String type = null; 959 boolean allowBody = false; 960 961 if (_patternMatcher.matches(jwcId, _implicitIdPattern)) 962 { 963 MatchResult match = _patternMatcher.getMatch(); 964 965 jwcId = match.group(IMPLICIT_ID_PATTERN_ID_GROUP); 966 type = match.group(IMPLICIT_ID_PATTERN_TYPE_GROUP); 967 968 String libraryId = match.group(IMPLICIT_ID_PATTERN_LIBRARY_ID_GROUP); 969 String simpleType = match.group(IMPLICIT_ID_PATTERN_SIMPLE_TYPE_GROUP); 970 971 // If (and this is typical) no actual component id was specified, 972 // then generate one on the fly. 973 // The allocated id for anonymous components is 974 // based on the simple (unprefixed) type, but starts 975 // with a leading dollar sign to ensure no conflicts 976 // with user defined component ids (which don't allow dollar signs 977 // in the id). 978 // New for 4.0: the component type may included slashes ('/'), but these 979 // are not valid identifiers, so we convert them to '$'. 980 981 if (jwcId == null) 982 jwcId = _idAllocator.allocateId("$" + simpleType.replace('/', '$')); 983 984 try 985 { 986 allowBody = _delegate.getAllowBody(libraryId, simpleType, startLocation); 987 } 988 catch (ApplicationRuntimeException e) 989 { 990 // give subclasses a chance to handle and rethrow 991 templateParseProblem(e, startLine, cursorStart); 992 } 993 994 } 995 else 996 { 997 if (!isRemoveId) 998 { 999 if (!_patternMatcher.matches(jwcId, _simpleIdPattern)) 1000 templateParseProblem( 1001 ParseMessages.componentIdInvalid(tagName, startLine, jwcId), 1002 startLocation, 1003 startLine, 1004 cursorStart); 1005 1006 if (!_delegate.getKnownComponent(jwcId)) 1007 templateParseProblem( 1008 ParseMessages.unknownComponentId(tagName, startLine, jwcId), 1009 startLocation, 1010 startLine, 1011 cursorStart); 1012 1013 try 1014 { 1015 allowBody = _delegate.getAllowBody(jwcId, startLocation); 1016 } 1017 catch (ApplicationRuntimeException e) 1018 { 1019 // give subclasses a chance to handle and rethrow 1020 templateParseProblem(e, startLine, cursorStart); 1021 } 1022 } 1023 } 1024 1025 // Ignore the body if we're removing the entire tag, 1026 // of if the corresponding component doesn't allow 1027 // a body. 1028 1029 boolean ignoreBody = !emptyTag && (isRemoveId || !allowBody); 1030 1031 if (_ignoring && ignoreBody) 1032 templateParseProblem(ParseMessages.nestedIgnore(tagName, startLine), new LocationImpl( 1033 _resourceLocation, startLine), startLine, cursorStart); 1034 1035 if (!emptyTag) 1036 pushNewTag(tagName, startLine, isRemoveId, ignoreBody); 1037 1038 // End any open block. 1039 1040 addTextToken(cursorStart - 1); 1041 1042 if (!isRemoveId) 1043 { 1044 addOpenToken(tagName, jwcId, type, startLocation); 1045 1046 if (emptyTag) 1047 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation())); 1048 } 1049 1050 advance(); 1051 } 1052 1053 private void pushNewTag(String tagName, int startLine, boolean isRemoveId, boolean ignoreBody) 1054 { 1055 Tag tag = new Tag(tagName, startLine); 1056 1057 tag._component = !isRemoveId; 1058 tag._removeTag = isRemoveId; 1059 1060 tag._ignoringBody = ignoreBody; 1061 1062 _ignoring = tag._ignoringBody; 1063 1064 tag._mustBalance = true; 1065 1066 _stack.add(tag); 1067 } 1068 1069 private void processContentTag(String tagName, int startLine, int cursorStart, boolean emptyTag) 1070 throws TemplateParseException 1071 { 1072 if (_ignoring) 1073 templateParseProblem( 1074 ParseMessages.contentBlockMayNotBeIgnored(tagName, startLine), 1075 new LocationImpl(_resourceLocation, startLine), 1076 startLine, 1077 cursorStart); 1078 1079 if (emptyTag) 1080 templateParseProblem( 1081 ParseMessages.contentBlockMayNotBeEmpty(tagName, startLine), 1082 new LocationImpl(_resourceLocation, startLine), 1083 startLine, 1084 cursorStart); 1085 1086 _tokens.clear(); 1087 _blockStart = -1; 1088 1089 Tag tag = new Tag(tagName, startLine); 1090 1091 tag._mustBalance = true; 1092 tag._content = true; 1093 1094 _stack.clear(); 1095 _stack.add(tag); 1096 1097 advance(); 1098 } 1099 1100 private void addOpenToken(String tagName, String jwcId, String type, Location location) 1101 { 1102 OpenToken token = _factory.createOpenToken(tagName, jwcId, type, location); 1103 _tokens.add(token); 1104 1105 if (_attributes.isEmpty()) 1106 return; 1107 1108 Iterator i = _attributes.entrySet().iterator(); 1109 while (i.hasNext()) 1110 { 1111 Map.Entry entry = (Map.Entry) i.next(); 1112 1113 String key = (String) entry.getKey(); 1114 1115 if (key.equalsIgnoreCase(_componentAttributeName)) 1116 continue; 1117 1118 String value = (String) entry.getValue(); 1119 1120 addAttributeToToken(token, key, value); 1121 } 1122 } 1123 1124 /** 1125 * Adds the attribute to the token (identifying prefixes and whatnot is now done downstream). 1126 * 1127 * @since 3.0 1128 */ 1129 1130 private void addAttributeToToken(OpenToken token, String name, String attributeValue) 1131 { 1132 token.addAttribute(name, convertEntitiesToPlain(attributeValue)); 1133 } 1134 1135 /** 1136 * Invoked to handle a closing tag, i.e., </foo>. When a tag closes, it will match against 1137 * a tag on the open tag start. Preferably the top tag on the stack (if everything is well 1138 * balanced), but this is HTML, not XML, so many tags won't balance. 1139 * <p> 1140 * Once the matching tag is located, the question is ... is the tag dynamic or static? If 1141 * static, then the current text block is extended to include this close tag. If dynamic, then 1142 * the current text block is ended (before the '<' that starts the tag) and a close token is 1143 * added. 1144 * <p> 1145 * In either case, the matching static element and anything above it is removed, and the cursor 1146 * is left on the character following the '>'. 1147 */ 1148 1149 private void closeTag() throws TemplateParseException 1150 { 1151 int cursorStart = _cursor; 1152 int length = _templateData.length; 1153 int startLine = _line; 1154 1155 Location startLocation = getCurrentLocation(); 1156 1157 _cursor += CLOSE_TAG.length; 1158 1159 int tagStart = _cursor; 1160 1161 while (true) 1162 { 1163 if (_cursor >= length) 1164 templateParseProblem( 1165 ParseMessages.incompleteCloseTag(startLine), 1166 startLocation, 1167 startLine, 1168 cursorStart); 1169 1170 char ch = _templateData[_cursor]; 1171 1172 if (ch == '>') 1173 break; 1174 1175 advance(); 1176 } 1177 1178 String tagName = new String(_templateData, tagStart, _cursor - tagStart); 1179 1180 int stackPos = _stack.size() - 1; 1181 Tag tag = null; 1182 1183 while (stackPos >= 0) 1184 { 1185 tag = (Tag) _stack.get(stackPos); 1186 1187 if (tag.match(tagName)) 1188 break; 1189 1190 if (tag._mustBalance) 1191 templateParseProblem(ParseMessages.improperlyNestedCloseTag( 1192 tagName, 1193 startLine, 1194 tag._tagName, 1195 tag._line), startLocation, startLine, cursorStart); 1196 1197 stackPos--; 1198 } 1199 1200 if (stackPos < 0) 1201 templateParseProblem( 1202 ParseMessages.unmatchedCloseTag(tagName, startLine), 1203 startLocation, 1204 startLine, 1205 cursorStart); 1206 1207 // Special case for the content tag 1208 1209 if (tag._content) 1210 { 1211 addTextToken(cursorStart - 1); 1212 1213 // Advance the cursor right to the end. 1214 1215 _cursor = length; 1216 _stack.clear(); 1217 return; 1218 } 1219 1220 // When a component closes, add a CLOSE tag. 1221 if (tag._component) 1222 { 1223 addTextToken(cursorStart - 1); 1224 1225 _tokens.add(_factory.createCloseToken(tagName, getCurrentLocation())); 1226 } 1227 else 1228 { 1229 // The close of a static tag. Unless removing the tag 1230 // entirely, make sure the block tag is part of a text block. 1231 1232 if (_blockStart < 0 && !tag._removeTag && !_ignoring) 1233 _blockStart = cursorStart; 1234 } 1235 1236 // Remove all elements at stackPos or above. 1237 1238 for (int i = _stack.size() - 1; i >= stackPos; i--) 1239 _stack.remove(i); 1240 1241 // Advance cursor past '>' 1242 1243 advance(); 1244 1245 // If editting out the tag (i.e., $remove$) then kill any whitespace. 1246 // For components that simply don't contain a body, removeTag will 1247 // be false. 1248 1249 if (tag._removeTag) 1250 advanceOverWhitespace(); 1251 1252 // If we were ignoring the body of the tag, then clear the ignoring 1253 // flag, since we're out of the body. 1254 1255 if (tag._ignoringBody) 1256 _ignoring = false; 1257 } 1258 1259 /** 1260 * Advances the cursor to the next character. If the end-of-line is reached, then increments the 1261 * line counter. 1262 */ 1263 1264 private void advance() 1265 { 1266 int length = _templateData.length; 1267 1268 if (_cursor >= length) 1269 return; 1270 1271 char ch = _templateData[_cursor]; 1272 1273 _cursor++; 1274 1275 if (ch == '\n') 1276 { 1277 _line++; 1278 _currentLocation = null; 1279 return; 1280 } 1281 1282 // A \r, or a \r\n also counts as a new line. 1283 1284 if (ch == '\r') 1285 { 1286 _line++; 1287 _currentLocation = null; 1288 1289 if (_cursor < length && _templateData[_cursor] == '\n') 1290 _cursor++; 1291 1292 return; 1293 } 1294 1295 // Not an end-of-line character. 1296 1297 } 1298 1299 private void advanceOverWhitespace() 1300 { 1301 int length = _templateData.length; 1302 1303 while (_cursor < length) 1304 { 1305 char ch = _templateData[_cursor]; 1306 if (!Character.isWhitespace(ch)) 1307 return; 1308 1309 advance(); 1310 } 1311 } 1312 1313 /** 1314 * Returns a new Map that is a copy of the input Map with some key/value pairs removed. A list 1315 * of keys is passed in and matching keys (caseless comparison) from the input Map are excluded 1316 * from the output map. May return null (rather than return an empty Map). 1317 */ 1318 1319 private Map filter(Map input, String[] removeKeys) 1320 { 1321 if (input == null || input.isEmpty()) 1322 return null; 1323 1324 Map result = null; 1325 1326 Iterator i = input.entrySet().iterator(); 1327 1328 nextkey: while (i.hasNext()) 1329 { 1330 Map.Entry entry = (Map.Entry) i.next(); 1331 1332 String key = (String) entry.getKey(); 1333 1334 for (int j = 0; j < removeKeys.length; j++) 1335 { 1336 if (key.equalsIgnoreCase(removeKeys[j])) 1337 continue nextkey; 1338 } 1339 1340 if (result == null) 1341 result = new HashMap(input.size()); 1342 1343 result.put(key, entry.getValue()); 1344 } 1345 1346 return result; 1347 } 1348 1349 /** 1350 * Searches a Map for given key, caselessly. The Map is expected to consist of Strings for keys 1351 * and values. Returns the value for the first key found that matches (caselessly) the input 1352 * key. Returns null if no value found. 1353 */ 1354 1355 protected String findValueCaselessly(String key, Map map) 1356 { 1357 String result = (String) map.get(key); 1358 1359 if (result != null) 1360 return result; 1361 1362 Iterator i = map.entrySet().iterator(); 1363 while (i.hasNext()) 1364 { 1365 Map.Entry entry = (Map.Entry) i.next(); 1366 1367 String entryKey = (String) entry.getKey(); 1368 1369 if (entryKey.equalsIgnoreCase(key)) 1370 return (String) entry.getValue(); 1371 } 1372 1373 return null; 1374 } 1375 1376 /** 1377 * Conversions needed by {@link #convertEntitiesToPlain(String)} 1378 */ 1379 1380 private static final String[] CONVERSIONS = 1381 { "<", "<", ">", ">", """, "\"", "&", "&" }; 1382 1383 /** 1384 * Provided a raw input string that has been recognized to be an expression, this removes excess 1385 * white space and converts &amp;;, &quot;; &lt;; and &gt;; to their normal 1386 * character values (otherwise its impossible to specify those values in expressions in the 1387 * template). 1388 */ 1389 1390 private String convertEntitiesToPlain(String input) 1391 { 1392 int inputLength = input.length(); 1393 1394 StringBuffer buffer = new StringBuffer(inputLength); 1395 1396 int cursor = 0; 1397 1398 outer: while (cursor < inputLength) 1399 { 1400 for (int i = 0; i < CONVERSIONS.length; i += 2) 1401 { 1402 String entity = CONVERSIONS[i]; 1403 int entityLength = entity.length(); 1404 String value = CONVERSIONS[i + 1]; 1405 1406 if (cursor + entityLength > inputLength) 1407 continue; 1408 1409 if (input.substring(cursor, cursor + entityLength).equals(entity)) 1410 { 1411 buffer.append(value); 1412 cursor += entityLength; 1413 continue outer; 1414 } 1415 } 1416 1417 buffer.append(input.charAt(cursor)); 1418 cursor++; 1419 } 1420 1421 return buffer.toString().trim(); 1422 } 1423 1424 /** 1425 * Returns true if the map contains the given key (caseless search) and the value is "true" 1426 * (caseless comparison). 1427 */ 1428 1429 private boolean checkBoolean(String key, Map map) 1430 { 1431 String value = findValueCaselessly(key, map); 1432 1433 if (value == null) 1434 return false; 1435 1436 return value.equalsIgnoreCase("true"); 1437 } 1438 1439 /** 1440 * Gets the current location within the file. This allows the location to be created only as 1441 * needed, and multiple objects on the same line can share the same Location instance. 1442 * 1443 * @since 3.0 1444 */ 1445 1446 protected Location getCurrentLocation() 1447 { 1448 if (_currentLocation == null) 1449 _currentLocation = new LocationImpl(_resourceLocation, _line); 1450 1451 return _currentLocation; 1452 } 1453 1454 public void setFactory(TemplateTokenFactory factory) 1455 { 1456 _factory = factory; 1457 } 1458 1459 }