001 /** 002 * Copyright 2005 Alan Green 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 */ 017 018 019 package org.codehaus.groovy.antlr; 020 021 import java.io.IOException; 022 import java.io.Reader; 023 024 import antlr.CharScanner; 025 026 /** 027 * Translates GLS-defined unicode escapes into characters. Throws an exception 028 * in the event of an invalid unicode escape being detected. 029 * 030 * <p>No attempt has been made to optimise this class for speed or 031 * space.</p> 032 * 033 * @version $Revision: 2222 $ 034 */ 035 public class UnicodeEscapingReader extends Reader { 036 037 private Reader reader; 038 private CharScanner lexer; 039 private boolean hasNextChar = false; 040 private int nextChar; 041 private SourceBuffer sourceBuffer; 042 043 /** 044 * Constructor. 045 * @param reader The reader that this reader will filter over. 046 */ 047 public UnicodeEscapingReader(Reader reader,SourceBuffer sourceBuffer) { 048 this.reader = reader; 049 this.sourceBuffer = sourceBuffer; 050 } 051 052 /** 053 * Sets the lexer that is using this reader. Must be called before the 054 * lexer is used. 055 */ 056 public void setLexer(CharScanner lexer) { 057 this.lexer = lexer; 058 } 059 060 /** 061 * Reads characters from the underlying reader. 062 * @see java.io.Reader#read(char[],int,int) 063 */ 064 public int read(char cbuf[], int off, int len) throws IOException { 065 int c = 0; 066 int count = 0; 067 while (count < len && (c = read())!= -1) { 068 cbuf[off + count] = (char) c; 069 count++; 070 } 071 return (count == 0 && c == -1) ? -1 : count; 072 } 073 074 /** 075 * Gets the next character from the underlying reader, 076 * translating escapes as required. 077 * @see java.io.Reader#close() 078 */ 079 public int read() throws IOException { 080 if (hasNextChar) { 081 hasNextChar = false; 082 write(nextChar); 083 return nextChar; 084 } 085 086 int c = reader.read(); 087 if (c != '\\') { 088 write(c); 089 return c; 090 } 091 092 // Have one backslash, continue if next char is 'u' 093 c = reader.read(); 094 if (c != 'u') { 095 hasNextChar = true; 096 nextChar = c; 097 write('\\'); 098 return '\\'; 099 } 100 101 // Swallow multiple 'u's 102 do { 103 c = reader.read(); 104 } while (c == 'u'); 105 106 // Get first hex digit 107 checkHexDigit(c); 108 StringBuffer charNum = new StringBuffer(); 109 charNum.append((char) c); 110 111 // Must now be three more hex digits 112 for (int i = 0; i < 3; i++) { 113 c = reader.read(); 114 checkHexDigit(c); 115 charNum.append((char) c); 116 } 117 int rv = Integer.parseInt(charNum.toString(), 16); 118 write(rv); 119 return rv; 120 } 121 private void write(int c) { 122 if (sourceBuffer != null) {sourceBuffer.write(c);} 123 } 124 /** 125 * Checks that the given character is indeed a hex digit. 126 */ 127 private void checkHexDigit(int c) throws IOException { 128 if (c >= '0' && c <= '9') { 129 return; 130 } 131 if (c >= 'a' && c <= 'f') { 132 return; 133 } 134 if (c >= 'A' && c <= 'F') { 135 return; 136 } 137 // Causes the invalid escape to be skipped 138 hasNextChar = true; 139 nextChar = c; 140 throw new IOException("Did not find four digit hex character code." 141 + " line: " + lexer.getLine() + " col:" + lexer.getColumn()); 142 } 143 144 /** 145 * Closes this reader by calling close on the underlying reader. 146 * @see java.io.Reader#close() 147 */ 148 public void close() throws IOException { 149 reader.close(); 150 } 151 }