Class BinaryGuesser
- java.lang.Object
-
- org.apache.rat.document.impl.guesser.BinaryGuesser
-
public class BinaryGuesser extends java.lang.Object
TODO: factor into MIME guesser and MIME->binary guesser
-
-
Field Summary
Fields Modifier and Type Field Description static int
ASCII_CHAR_THRESHOLD
private static java.lang.String[]
AUDIO_EXTENSIONS
private static java.lang.String[]
BYTECODE_EXTENSIONS
private static java.nio.charset.Charset
CHARSET_FROM_FILE_ENCODING_OR_UTF8
private static java.lang.String[]
DATA_EXTENSIONS
private static java.lang.String
DOT
private static java.lang.String[]
EXE_EXTENSIONS
(package private) static java.lang.String
FILE_ENCODING
static int
HIGH_BYTES_RATIO
private static java.lang.String[]
IMAGE_EXTENSIONS
static java.lang.String
JAR_MANIFEST
static java.lang.String
JAVA
private static java.lang.String[]
KEYSTORE_EXTENSIONS
static int
NON_ASCII_THRESHOLD
private static java.lang.String[]
NON_BINARY_EXTENSIONS
Based on https://www.apache.org/dev/svn-eol-style.txtstatic int
TOTAL_READ_RATIO
-
Constructor Summary
Constructors Constructor Description BinaryGuesser()
-
Method Summary
All Methods Static Methods Concrete Methods Modifier and Type Method Description static boolean
containsExtension(java.lang.String name, java.lang.String[] exts)
static boolean
extensionMatches(java.lang.String name, java.lang.String[] exts)
(package private) static java.nio.charset.Charset
getFileEncodingOrUTF8AsFallback()
static boolean
isAudio(java.lang.String name)
static boolean
isBinary(java.io.InputStream in)
static boolean
isBinary(java.io.Reader in)
private static boolean
isBinary(java.lang.CharSequence taste)
static boolean
isBinary(java.lang.String name)
static boolean
isBinary(Document document)
static boolean
isBinaryData(java.lang.String name)
private static boolean
isBinaryDocument(Document document)
static boolean
isBytecode(java.lang.String name)
static boolean
isExecutable(java.lang.String name)
static boolean
isImage(java.lang.String name)
static boolean
isKeystore(java.lang.String name)
static boolean
isNonBinary(java.lang.String name)
-
-
-
Field Detail
-
DOT
private static final java.lang.String DOT
- See Also:
- Constant Field Values
-
FILE_ENCODING
static final java.lang.String FILE_ENCODING
- See Also:
- Constant Field Values
-
CHARSET_FROM_FILE_ENCODING_OR_UTF8
private static java.nio.charset.Charset CHARSET_FROM_FILE_ENCODING_OR_UTF8
-
DATA_EXTENSIONS
private static final java.lang.String[] DATA_EXTENSIONS
-
EXE_EXTENSIONS
private static final java.lang.String[] EXE_EXTENSIONS
-
KEYSTORE_EXTENSIONS
private static final java.lang.String[] KEYSTORE_EXTENSIONS
-
IMAGE_EXTENSIONS
private static final java.lang.String[] IMAGE_EXTENSIONS
-
BYTECODE_EXTENSIONS
private static final java.lang.String[] BYTECODE_EXTENSIONS
-
AUDIO_EXTENSIONS
private static final java.lang.String[] AUDIO_EXTENSIONS
-
NON_BINARY_EXTENSIONS
private static final java.lang.String[] NON_BINARY_EXTENSIONS
Based on https://www.apache.org/dev/svn-eol-style.txt
-
JAR_MANIFEST
public static final java.lang.String JAR_MANIFEST
- See Also:
- Constant Field Values
-
JAVA
public static final java.lang.String JAVA
- See Also:
- Constant Field Values
-
HIGH_BYTES_RATIO
public static final int HIGH_BYTES_RATIO
- See Also:
- Constant Field Values
-
TOTAL_READ_RATIO
public static final int TOTAL_READ_RATIO
- See Also:
- Constant Field Values
-
NON_ASCII_THRESHOLD
public static final int NON_ASCII_THRESHOLD
- See Also:
- Constant Field Values
-
ASCII_CHAR_THRESHOLD
public static final int ASCII_CHAR_THRESHOLD
- See Also:
- Constant Field Values
-
-
Method Detail
-
isBinaryDocument
private static boolean isBinaryDocument(Document document)
-
isBinary
private static boolean isBinary(java.lang.CharSequence taste)
-
isBinary
public static boolean isBinary(java.io.Reader in)
- Parameters:
in
- the file to check.- Returns:
- Do the first few bytes of the stream hint at a binary file?
Any IOException is swallowed internally and the test returns false.
This method may lead to false negatives if the reader throws an exception because it can't read characters according to the reader's encoding from the underlying stream.
-
isBinary
public static boolean isBinary(java.io.InputStream in)
- Parameters:
in
- the file to check.- Returns:
- Do the first few bytes of the stream hint at a binary file?
Any IOException is swallowed internally and the test returns false.
This method will try to read bytes from the stream and translate them to characters according to the platform's default encoding. If any bytes can not be translated to characters it will assume the original data must be binary and return true.
-
getFileEncodingOrUTF8AsFallback
static java.nio.charset.Charset getFileEncodingOrUTF8AsFallback()
-
isBinaryData
public static final boolean isBinaryData(java.lang.String name)
- Parameters:
name
- current file name.- Returns:
- whether given name is binary.
-
isNonBinary
public static final boolean isNonBinary(java.lang.String name)
- Parameters:
name
- current file name.- Returns:
- Is a file by that name a known non-binary file?
-
isExecutable
public static final boolean isExecutable(java.lang.String name)
- Parameters:
name
- current file name.- Returns:
- Is a file by that name an executable/binary file?
-
containsExtension
public static boolean containsExtension(java.lang.String name, java.lang.String[] exts)
-
extensionMatches
public static boolean extensionMatches(java.lang.String name, java.lang.String[] exts)
-
isBytecode
public static boolean isBytecode(java.lang.String name)
-
isImage
public static final boolean isImage(java.lang.String name)
-
isKeystore
public static final boolean isKeystore(java.lang.String name)
-
isAudio
public static final boolean isAudio(java.lang.String name)
-
isBinary
public static final boolean isBinary(java.lang.String name)
- Parameters:
name
- file name.- Returns:
- Is a file by that name a known binary file?
-
isBinary
public static final boolean isBinary(Document document)
-
-