001// Copyright 2004, 2005 The Apache Software Foundation
002//
003// Licensed under the Apache License, Version 2.0 (the "License");
004// you may not use this file except in compliance with the License.
005// You may obtain a copy of the License at
006//
007//     http://www.apache.org/licenses/LICENSE-2.0
008//
009// Unless required by applicable law or agreed to in writing, software
010// distributed under the License is distributed on an "AS IS" BASIS,
011// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
012// See the License for the specific language governing permissions and
013// limitations under the License.
014
015package org.apache.tapestry.util.text;
016
017import java.io.BufferedReader;
018import java.io.IOException;
019import java.io.InputStream;
020import java.io.InputStreamReader;
021import java.io.Reader;
022import java.io.UnsupportedEncodingException;
023import java.util.Map;
024
025/**
026 * An object that loads a properties file from the provided input stream or reader.
027 * This class reads the property file exactly like java.util.Properties,
028 * except that it also allows the files to use an encoding other than ISO-8859-1
029 * and all non-ASCII characters are read correctly using the given encoding.
030 * In short, non-latin characters no longer need to be quoted using native2ascii.
031 * 
032 * @author mb
033 * @since 4.0
034 */
035public class LocalizedPropertiesLoader
036{
037    private static final String HEX_DIGITS = "0123456789ABCDEF";
038    
039    private static final ICharacterMatcher WHITESPACE = new WhitespaceMatcher(false);
040    private static final ICharacterMatcher LINE_SEPARATOR = new AsciiCharacterMatcher("\n\r");
041    private static final ICharacterMatcher NOT_LINE_SEPARATOR = new InverseMatcher(LINE_SEPARATOR);
042    private static final ICharacterMatcher KEY_VALUE_SEPARATOR = new AsciiCharacterMatcher("=:");
043    private static final ICharacterMatcher SEPARATOR = new AsciiCharacterMatcher("=:\r\n");
044    private static final ICharacterMatcher COMMENT = new AsciiCharacterMatcher("#!");
045    private static final ICharacterMatcher WHITESPACE_OR_SEPARATOR = 
046        new CompoundMatcher(new ICharacterMatcher[] { WHITESPACE, SEPARATOR });
047
048    private ExtendedReader _extendedReader;
049    
050    /**
051     * Creates a new loader that will load the properties from the given input stream
052     * using the default character encoding
053     * 
054     * @param ins the input stream to load the properties from
055     */
056    public LocalizedPropertiesLoader(InputStream ins)
057    {
058        this(new InputStreamReader(ins));
059    }
060    
061    /**
062     * Creates a new loader that will load the properties from the given input stream
063     * using the provided character encoding
064     * 
065     * @param ins the input stream to load the properties from
066     * @param encoding the character encoding the be used when reading from the stream
067     * @throws UnsupportedEncodingException if the name of the encoding cannot be recognized
068     */
069    public LocalizedPropertiesLoader(InputStream ins, String encoding) throws UnsupportedEncodingException
070    {
071        this(new InputStreamReader(ins, encoding));
072    }
073    
074    /**
075     * Creates a new loader that will load the properties from the given reader
076     * 
077     * @param reader the Reader to load the properties from
078     */
079    public LocalizedPropertiesLoader(Reader reader)
080    {
081        _extendedReader = new ExtendedReader(new BufferedReader(reader));
082    }
083    
084    /**
085     * Read the properties from the provided stream and store them into the given map
086     * 
087     * @param properties the map where the properties will be stored
088     * @throws IOException if an error occurs
089     */
090    public void load(Map properties) throws IOException
091    {
092        while (!isAtEndOfStream()) {
093                // we are at the beginning of a line.
094                // check whether it is a comment and if it is, skip it
095            int nextChar = _extendedReader.peek();
096            if (COMMENT.matches((char) nextChar)) {
097                _extendedReader.skipCharacters(NOT_LINE_SEPARATOR);
098                continue;
099            }
100            
101            _extendedReader.skipCharacters(WHITESPACE);
102            if (!isAtEndOfLine()) {
103                // this line does not consist only of whitespace. the next word is the key
104                String key = readQuotedLine(WHITESPACE_OR_SEPARATOR);
105                _extendedReader.skipCharacters(WHITESPACE);
106                
107                // if the next char is a key-value separator, read it and skip the following spaces
108                nextChar = _extendedReader.peek();
109                if (nextChar > 0 && KEY_VALUE_SEPARATOR.matches((char) nextChar)) {
110                    _extendedReader.read();
111                    _extendedReader.skipCharacters(WHITESPACE);
112                }
113
114                // finally, read the value
115                String value = readQuotedLine(LINE_SEPARATOR);
116                
117                properties.put(key, value);
118            }
119            _extendedReader.skipCharacters(LINE_SEPARATOR);
120        }
121    }
122    
123    
124    private boolean isAtEndOfStream() throws IOException
125    {
126        int nextChar = _extendedReader.peek();
127        return (nextChar < 0);
128    }
129    
130    
131    private boolean isAtEndOfLine() throws IOException
132    {
133        int nextChar = _extendedReader.peek();
134        if (nextChar < 0)
135            return true;
136        return LINE_SEPARATOR.matches((char) nextChar);
137    }
138    
139    
140    private String readQuotedLine(ICharacterMatcher terminators) throws IOException
141    {
142        StringBuffer buf = new StringBuffer();
143        
144        while (true) {
145            // see what the next char is
146            int nextChar = _extendedReader.peek();
147            
148            // if at end of stream or the char is one of the terminators, stop
149            if (nextChar < 0 || terminators.matches((char) nextChar))
150                break;
151
152            try {
153                // read the char (and possibly unquote it)
154                char ch = readQuotedChar();
155                buf.append(ch);
156            } catch (IgnoreCharacterException e) {
157                // simply ignore -- no character was read
158            }
159        }
160        
161        return buf.toString();
162    }
163    
164
165    private char readQuotedChar() throws IOException, IgnoreCharacterException
166    {
167        int nextChar = _extendedReader.read();
168        if (nextChar < 0)
169            throw new IgnoreCharacterException();
170        char ch = (char) nextChar;
171        
172        // if the char is not the quotation char, simply return it
173        if (ch != '\\') 
174            return ch;
175
176        // the character is a quotation character. unquote it
177        nextChar = _extendedReader.read();
178
179        // if at the end of the stream, stop
180        if (nextChar < 0)
181            throw new IgnoreCharacterException();
182        
183        ch = (char) nextChar;
184        switch (ch) {
185            case 'u' :
186                char res = 0;
187                for (int i = 0; i < 4; i++) {
188                    nextChar = _extendedReader.read();
189                    if (nextChar < 0)
190                        throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
191                    char digitChar = (char) nextChar;
192                    int digit = HEX_DIGITS.indexOf(Character.toUpperCase(digitChar));
193                    if (digit < 0)
194                        throw new IllegalArgumentException("Malformed \\uxxxx encoding.");
195                    res = (char) (res * 16 + digit);
196                }
197                return res;
198
199            case '\r' :
200                // if the next char is \n, read it and fall through
201                nextChar = _extendedReader.peek();
202                if (nextChar == '\n')
203                    _extendedReader.read();
204            case '\n' :
205                _extendedReader.skipCharacters(WHITESPACE);
206                throw new IgnoreCharacterException();
207                
208            case 't' :  return '\t';
209            case 'n' :  return '\n';
210            case 'r' :  return '\r';
211            default:    return ch;
212        }
213    }
214    
215
216    private static class IgnoreCharacterException extends Exception 
217    {
218                private static final long serialVersionUID = 8366308710256427596L;
219    }    
220}