001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.Reader; 006 007import org.openstreetmap.josm.Main; 008 009/** 010 * FilterInputStream that gets rid of characters that are invalid in an XML 1.0 011 * document. 012 * 013 * Although these characters are forbidden, in the real wold they still appear 014 * in XML files. Java's SAX parser throws an exception, so we have to filter 015 * at a lower level. 016 * 017 * Only handles control characters (<0x20). Invalid characters are replaced 018 * by space (0x20). 019 */ 020public class InvalidXmlCharacterFilter extends Reader { 021 022 private Reader reader; 023 024 public static boolean firstWarning = true; 025 026 public static final boolean[] INVALID_CHARS; 027 028 static { 029 INVALID_CHARS = new boolean[0x20]; 030 for (int i = 0; i < INVALID_CHARS.length; ++i) { 031 INVALID_CHARS[i] = true; 032 } 033 INVALID_CHARS[0x9] = false; // tab 034 INVALID_CHARS[0xA] = false; // LF 035 INVALID_CHARS[0xD] = false; // CR 036 } 037 038 public InvalidXmlCharacterFilter(Reader reader) { 039 this.reader = reader; 040 } 041 042 @Override 043 public int read(char[] b, int off, int len) throws IOException { 044 int n = reader.read(b, off, len); 045 if (n == -1) { 046 return -1; 047 } 048 for (int i = off; i < off + n; ++i) { 049 b[i] = filter(b[i]); 050 } 051 return n; 052 } 053 054 @Override 055 public void close() throws IOException { 056 reader.close(); 057 } 058 059 private char filter(char in) { 060 if (in < 0x20 && in >= 0 && INVALID_CHARS[in]) { 061 if (firstWarning) { 062 Main.warn("Invalid xml character encountered."); 063 firstWarning = false; 064 } 065 return 0x20; 066 } 067 return in; 068 } 069 070}