001// License: GPL. See LICENSE file for details. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.PushbackInputStream; 008import java.io.UnsupportedEncodingException; 009 010/** 011 * Detects the different UTF encodings from byte order mark 012 */ 013public final class UTFInputStreamReader extends InputStreamReader { 014 015 /** 016 * converts input stream to reader 017 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 018 * @return A reader with the correct encoding. Starts to read after the BOM. 019 */ 020 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 021 byte[] bom = new byte[4]; 022 String encoding = defaultEncoding; 023 int unread; 024 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 025 int n = pushbackStream.read(bom, 0, 4); 026 027 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 028 encoding = "UTF-8"; 029 unread = n - 3; 030 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 031 encoding = "UTF-32BE"; 032 unread = n - 4; 033 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 034 encoding = "UTF-32LE"; 035 unread = n - 4; 036 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 037 encoding = "UTF-16BE"; 038 unread = n - 2; 039 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 040 encoding = "UTF-16LE"; 041 unread = n - 2; 042 } else { 043 unread = n; 044 } 045 046 if (unread > 0) { 047 pushbackStream.unread(bom, (n - unread), unread); 048 } else if (unread < -1) { 049 pushbackStream.unread(bom, 0, 0); 050 } 051 052 if (encoding == null) { 053 return new UTFInputStreamReader(pushbackStream); 054 } else { 055 return new UTFInputStreamReader(pushbackStream, encoding); 056 } 057 } 058 059 private UTFInputStreamReader(InputStream in) { 060 super(in); 061 } 062 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 063 super(in, cs); 064 } 065}