001// License: GPL. See LICENSE file for details.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009
010/**
011 * Detects the different UTF encodings from byte order mark
012 */
013public final class UTFInputStreamReader extends InputStreamReader {
014    
015    /**
016     * converts input stream to reader
017     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
018     * @return A reader with the correct encoding. Starts to read after the BOM.
019     */
020    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
021        byte[] bom = new byte[4];
022        String encoding = defaultEncoding;
023        int unread;
024        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
025        int n = pushbackStream.read(bom, 0, 4);
026
027        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
028            encoding = "UTF-8";
029            unread = n - 3;
030        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
031            encoding = "UTF-32BE";
032            unread = n - 4;
033        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
034            encoding = "UTF-32LE";
035            unread = n - 4;
036        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
037            encoding = "UTF-16BE";
038            unread = n - 2;
039        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
040            encoding = "UTF-16LE";
041            unread = n - 2;
042        } else {
043            unread = n;
044        }
045
046        if (unread > 0) {
047            pushbackStream.unread(bom, (n - unread), unread);
048        } else if (unread < -1) {
049            pushbackStream.unread(bom, 0, 0);
050        }
051
052        if (encoding == null) {
053            return new UTFInputStreamReader(pushbackStream);
054        } else {
055            return new UTFInputStreamReader(pushbackStream, encoding);
056        }
057    }
058
059    private UTFInputStreamReader(InputStream in) {
060        super(in);
061    }
062    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
063        super(in, cs);
064    }
065}