001// Copyright 2004, 2005 The Apache Software Foundation 002// 003// Licensed under the Apache License, Version 2.0 (the "License"); 004// you may not use this file except in compliance with the License. 005// You may obtain a copy of the License at 006// 007// http://www.apache.org/licenses/LICENSE-2.0 008// 009// Unless required by applicable law or agreed to in writing, software 010// distributed under the License is distributed on an "AS IS" BASIS, 011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 012// See the License for the specific language governing permissions and 013// limitations under the License. 014 015package org.apache.tapestry.util.text; 016 017/** 018 * An object that encodes a character according to rules of the HTML specification, 019 * so that it will be properly parsed by a browser irrespectively of the character 020 * encoding used in the HTML output. 021 * 022 * @author mb 023 * @since 4.0 024 */ 025public class MarkupCharacterTranslator implements ICharacterTranslator 026{ 027 private static final String SAFE_CHARACTERS = 028 "01234567890" 029 + "abcdefghijklmnopqrstuvwxyz" 030 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 031 + "\t\n\r !#$%'()*+,-./:;=?@[\\]^_`{|}~"; 032 033 private static final String[][] ENTITIES = { 034 { "\"", """ }, 035 { "<", "<" }, 036 { ">", ">" }, 037 { "&", "&" } 038 }; 039 040 private static final ICharacterMatcher SAFE_MATCHER = new AsciiCharacterMatcher(SAFE_CHARACTERS); 041 private static final ICharacterTranslator ENTITY_TRANSLATOR = new AsciiCharacterTranslator(ENTITIES); 042 043 private boolean _encodeNonAscii; 044 private ICharacterMatcher _safeMatcher; 045 private ICharacterTranslator _entityTranslator; 046 047 public MarkupCharacterTranslator() 048 { 049 this(true); 050 } 051 052 public MarkupCharacterTranslator(boolean encodeNonAscii) 053 { 054 this(encodeNonAscii, SAFE_MATCHER, ENTITY_TRANSLATOR); 055 } 056 057 public MarkupCharacterTranslator(boolean encodeNonAscii, ICharacterMatcher safeMatcher, ICharacterTranslator entityTranslator) 058 { 059 _encodeNonAscii = encodeNonAscii; 060 _safeMatcher = safeMatcher; 061 _entityTranslator = entityTranslator; 062 } 063 064 public MarkupCharacterTranslator(boolean encodeNonAscii, String safeCharacters, String[][] entities) 065 { 066 _encodeNonAscii = encodeNonAscii; 067 _safeMatcher = new AsciiCharacterMatcher(safeCharacters); 068 _entityTranslator = new AsciiCharacterTranslator(entities); 069 } 070 071 /** 072 * @see org.apache.tapestry.util.text.IMarkupCharacterTranslator#translateAttribute(char) 073 */ 074 public String translate(char ch) { 075 // IE and Firefox do not handle characters between 128 and 159 well, 076 // so they have to be quoted as well 077 if (ch >= 160 && !_encodeNonAscii) 078 return null; 079 080 if (_safeMatcher.matches(ch)) 081 return null; 082 083 String entity = _entityTranslator.translate(ch); 084 if (entity != null) 085 return entity; 086 087 // needs to use a NumberFormat here to be fully compliant, 088 // but this is accepted fine by the browsers 089 return "&#" + (int) ch + ";"; 090 } 091}