001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.Reader;
006
007import org.openstreetmap.josm.Main;
008
009/**
010 * FilterInputStream that gets rid of characters that are invalid in an XML 1.0
011 * document.
012 *
013 * Although these characters are forbidden, in the real wold they still appear
014 * in XML files. Java's SAX parser throws an exception, so we have to filter
015 * at a lower level.
016 *
017 * Only handles control characters (<0x20). Invalid characters are replaced
018 * by space (0x20).
019 */
020public class InvalidXmlCharacterFilter extends Reader {
021
022    private Reader reader;
023
024    public static boolean firstWarning = true;
025
026    public static final boolean[] INVALID_CHARS;
027
028    static {
029        INVALID_CHARS = new boolean[0x20];
030        for (int i = 0; i < INVALID_CHARS.length; ++i) {
031            INVALID_CHARS[i] = true;
032        }
033        INVALID_CHARS[0x9] = false; // tab
034        INVALID_CHARS[0xA] = false; // LF
035        INVALID_CHARS[0xD] = false; // CR
036    }
037
038    public InvalidXmlCharacterFilter(Reader reader) {
039        this.reader = reader;
040    }
041
042    @Override
043    public int read(char[] b, int off, int len) throws IOException {
044        int n = reader.read(b, off, len);
045        if (n == -1) {
046            return -1;
047        }
048        for (int i = off; i < off + n; ++i) {
049            b[i] = filter(b[i]);
050        }
051        return n;
052    }
053
054    @Override
055    public void close() throws IOException {
056        reader.close();
057    }
058
059    private char filter(char in) {
060        if (in < 0x20 && in >= 0 && INVALID_CHARS[in]) {
061            if (firstWarning) {
062                Main.warn("Invalid xml character encountered.");
063                firstWarning = false;
064            }
065            return 0x20;
066        }
067        return in;
068    }
069
070}