Leptonica  1.83.1
Image processing and image analysis suite
readfile.c File Reference
#include <string.h>
#include "allheaders.h"

Go to the source code of this file.

Functions

PIXApixaReadFiles (const char *dirname, const char *substr)
 
PIXApixaReadFilesSA (SARRAY *sa)
 
PIXpixRead (const char *filename)
 
PIXpixReadWithHint (const char *filename, l_int32 hint)
 
PIXpixReadIndexed (SARRAY *sa, l_int32 index)
 
PIXpixReadStream (FILE *fp, l_int32 hint)
 
l_ok pixReadHeader (const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
 
l_ok findFileFormat (const char *filename, l_int32 *pformat)
 
l_ok findFileFormatStream (FILE *fp, l_int32 *pformat)
 
l_ok findFileFormatBuffer (const l_uint8 *buf, l_int32 *pformat)
 
l_int32 fileFormatIsTiff (FILE *fp)
 
PIXpixReadMem (const l_uint8 *data, size_t size)
 
l_ok pixReadHeaderMem (const l_uint8 *data, size_t size, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
 
l_ok writeImageFileInfo (const char *filename, FILE *fpout, l_int32 headeronly)
 
l_ok ioFormatTest (const char *filename)
 

Variables

static const char * FILE_BMP = "/tmp/lept/format/file.bmp"
 
static const char * FILE_PNG = "/tmp/lept/format/file.png"
 
static const char * FILE_PNM = "/tmp/lept/format/file.pnm"
 
static const char * FILE_G3 = "/tmp/lept/format/file_g3.tif"
 
static const char * FILE_G4 = "/tmp/lept/format/file_g4.tif"
 
static const char * FILE_RLE = "/tmp/lept/format/file_rle.tif"
 
static const char * FILE_PB = "/tmp/lept/format/file_packbits.tif"
 
static const char * FILE_LZW = "/tmp/lept/format/file_lzw.tif"
 
static const char * FILE_ZIP = "/tmp/lept/format/file_zip.tif"
 
static const char * FILE_TIFF_JPEG = "/tmp/lept/format/file_jpeg.tif"
 
static const char * FILE_TIFF = "/tmp/lept/format/file.tif"
 
static const char * FILE_JPG = "/tmp/lept/format/file.jpg"
 
static const char * FILE_GIF = "/tmp/lept/format/file.gif"
 
static const char * FILE_WEBP = "/tmp/lept/format/file.webp"
 
static const char * FILE_JP2K = "/tmp/lept/format/file.jp2"
 
static const unsigned char JP2K_CODESTREAM [4] = { 0xff, 0x4f, 0xff, 0x51 }
 
static const unsigned char JP2K_IMAGE_DATA [12]
 
const char * ImageFileFormatExtensions []
 

Detailed Description

: reads image on file into memory


     Top-level functions for reading images from file
          PIXA      *pixaReadFiles()
          PIXA      *pixaReadFilesSA()
          PIX       *pixRead()
          PIX       *pixReadWithHint()
          PIX       *pixReadIndexed()
          PIX       *pixReadStream()

     Read header information from file
          l_int32    pixReadHeader()

     Format finders
          l_int32    findFileFormat()
          l_int32    findFileFormatStream()
          l_int32    findFileFormatBuffer()
          l_int32    fileFormatIsTiff()

     Read from memory
          PIX       *pixReadMem()
          l_int32    pixReadHeaderMem()

     Output image file information
          void       writeImageFileInfo()

     Test function for I/O with different formats
          l_int32    ioFormatTest()

 Supported file formats:
 (1) Reading is supported without any external libraries:
         bmp
         pnm   (including pbm, pgm, etc)
         spix  (raw serialized)
 (2) Reading is supported with installation of external libraries:
         png
         jpg   (standard jfif version)
         tiff  (including most varieties of compression)
         gif
         webp
         jp2 (jpeg 2000)
 (3) Other file types will get an "unknown format" error.

Definition in file readfile.c.

Function Documentation

◆ fileFormatIsTiff()

l_int32 fileFormatIsTiff ( FILE *  fp)

fileFormatIsTiff()

Parameters
[in]fpfile stream
Returns
1 if file is tiff; 0 otherwise or on error

Definition at line 780 of file readfile.c.

References findFileFormatStream().

Referenced by convertTiffMultipageToPdf(), convertTiffMultipageToPS(), extractG4DataFromFile(), and pixaReadMultipageTiff().

◆ findFileFormat()

l_ok findFileFormat ( const char *  filename,
l_int32 *  pformat 
)

findFileFormat()

Parameters
[in]filename
[out]pformatfound format
Returns
0 if OK, 1 on error or if format is not recognized

Definition at line 570 of file readfile.c.

Referenced by convertUnscaledToPdfData(), l_generateCIDataForPdf(), l_generateFlateDataPdf(), l_getIndexFromFile(), and writeMultipageTiffSA().

◆ findFileFormatBuffer()

l_ok findFileFormatBuffer ( const l_uint8 *  buf,
l_int32 *  pformat 
)

findFileFormatBuffer()

Parameters
[in]bufbyte buffer at least 12 bytes in size; we can't check
[out]pformatfound format
Returns
0 if OK, 1 on error or if format is not recognized
Notes:
     (1) This determines the file format from the first 12 bytes in
         the compressed data stream, which are stored in memory.
     (2) For tiff files, this returns IFF_TIFF.  The specific tiff
         compression is then determined using findTiffCompression().

Definition at line 653 of file readfile.c.

Referenced by pixReadHeaderMem(), pixReadMem(), and readHeaderMemJp2k().

◆ findFileFormatStream()

l_ok findFileFormatStream ( FILE *  fp,
l_int32 *  pformat 
)

findFileFormatStream()

Parameters
[in]fpfile stream
[out]pformatfound format
Returns
0 if OK, 1 on error or if format is not recognized
Notes:
     (1) Important: Side effect -- this resets fp to BOF.

Definition at line 603 of file readfile.c.

Referenced by fileFormatIsTiff(), freadHeaderTiff(), pixReadHeader(), and pixReadStream().

◆ ioFormatTest()

l_ok ioFormatTest ( const char *  filename)

ioFormatTest()

Parameters
[in]filenameinput image file
Returns
0 if OK; 1 on error or if the test fails
Notes:
     (1) This writes and reads a set of output files losslessly
         in different formats to /tmp/format/, and tests that the
         result before and after is unchanged.
     (2) This should work properly on input images of any depth,
         with and without colormaps.
     (3) All supported formats are tested for bmp, png, tiff and
         non-ascii pnm.  Ascii pnm also works (but who'd ever want
         to use it?)   We allow 2 bpp bmp, although it's not
         supported elsewhere.  And we don't support reading
         16 bpp png, although this can be turned on in pngio.c.
     (4) This silently skips png or tiff testing if HAVE_LIBPNG
         or HAVE_LIBTIFF are 0, respectively.

Definition at line 1261 of file readfile.c.

References boxCreate(), boxDestroy(), lept_mkdir(), pixClipRectangle(), pixClone(), pixDestroy(), pixGetDimensions(), and pixRead().

◆ pixaReadFiles()

PIXA* pixaReadFiles ( const char *  dirname,
const char *  substr 
)

pixaReadFiles()

Parameters
[in]dirname
[in]substr[optional] substring filter on filenames; can be null
Returns
pixa, or NULL on error
Notes:
     (1) dirname is the full path for the directory.
     (2) substr is the part of the file name (excluding
         the directory) that is to be matched.  All matching
         filenames are read into the Pixa.  If substr is NULL,
         all filenames are read into the Pixa.

Definition at line 127 of file readfile.c.

References getSortedPathnamesInDirectory(), pixaReadFilesSA(), and sarrayDestroy().

◆ pixaReadFilesSA()

PIXA* pixaReadFilesSA ( SARRAY sa)

pixaReadFilesSA()

Parameters
[in]safull pathnames for all files
Returns
pixa, or NULL on error

Definition at line 152 of file readfile.c.

References L_INSERT, L_NOCOPY, pixaAddPix(), pixaCreate(), pixRead(), sarrayGetCount(), and sarrayGetString().

Referenced by convertToNUpPixa(), and pixaReadFiles().

◆ pixRead()

PIX* pixRead ( const char *  filename)

pixRead()

Parameters
[in]filenamewith full pathname or in local directory
Returns
pix if OK; NULL on error
Notes:
     (1) See at top of file for supported formats.

Definition at line 189 of file readfile.c.

References fopenReadStream(), and pixReadStream().

Referenced by convertNumberedMasksToBoxaa(), convertToPdfData(), convertToPdfDataSegmented(), convertToPdfSegmented(), ioFormatTest(), l_generateFlateData(), pixaReadFilesSA(), pixReadIndexed(), pixWritePSEmbed(), saConvertFilesToPdfData(), and selReadFromColorImage().

◆ pixReadHeader()

l_ok pixReadHeader ( const char *  filename,
l_int32 *  pformat,
l_int32 *  pw,
l_int32 *  ph,
l_int32 *  pbps,
l_int32 *  pspp,
l_int32 *  piscmap 
)

pixReadHeader()

Parameters
[in]filenamewith full pathname or in local directory
[out]pformat[optional] file format
[out]pw,ph[optional] width and height
[out]pbps[optional] bits/sample
[out]pspp[optional] samples/pixel 1, 3 or 4
[out]piscmap[optional] 1 if cmap exists; 0 otherwise
Returns
0 if OK, 1 on error
Notes:
     (1) This reads the actual headers for jpeg, png, tiff and pnm.
         For bmp and gif, we cheat and read the entire file into a pix,
         from which we extract the "header" information.

Definition at line 434 of file readfile.c.

References findFileFormatStream(), and fopenReadStream().

Referenced by l_generateCIData(), and writeImageFileInfo().

◆ pixReadHeaderMem()

l_ok pixReadHeaderMem ( const l_uint8 *  data,
size_t  size,
l_int32 *  pformat,
l_int32 *  pw,
l_int32 *  ph,
l_int32 *  pbps,
l_int32 *  pspp,
l_int32 *  piscmap 
)

pixReadHeaderMem()

Parameters
[in]dataconst; encoded
[in]sizesize of data
[out]pformat[optional] image format
[out]pw,ph[optional] width and height
[out]pbps[optional] bits/sample
[out]pspp[optional] samples/pixel 1, 3 or 4
[out]piscmap[optional] 1 if cmap exists; 0 otherwise
Returns
0 if OK, 1 on error
Notes:
     (1) This reads the actual headers for jpeg, png, tiff, jp2k and pnm.
         For bmp and gif, we cheat and read all the data into a pix,
         from which we extract the "header" information.
     (2) The amount of data required depends on the format.  For
         png, it requires less than 30 bytes, but for jpeg it can
         require most of the compressed file.  In practice, the data
         is typically the entire compressed file in memory.
     (3) findFileFormatBuffer() requires up to 12 bytes to decide on
         the format, which we require.

Definition at line 950 of file readfile.c.

References findFileFormatBuffer().

Referenced by pixcompCreateFromString().

◆ pixReadIndexed()

PIX* pixReadIndexed ( SARRAY sa,
l_int32  index 
)

pixReadIndexed()

Parameters
[in]sastring array of full pathnames
[in]indexinto pathname array
Returns
pix if OK; null if not found
Notes:
     (1) This function is useful for selecting image files from a
         directory, where the integer index is embedded into
         the file name.
     (2) This is typically done by generating the sarray using
         getNumberedPathnamesInDirectory(), so that the index
         pathname would have the number index in it.  The size
         of the sarray should be the largest number (plus 1) appearing
         in the file names, respecting the constraints in the
         call to getNumberedPathnamesInDirectory().
     (3) Consequently, for some indices into the sarray, there may
         be no pathnames in the directory containing that number.
         By convention, we place empty C strings ("") in those
         locations in the sarray, and it is not an error if such
         a string is encountered and no pix is returned.
         Therefore, the caller must verify that a pix is returned.
     (4) See convertSegmentedPagesToPS() in src/psio1.c for an
         example of usage.

Definition at line 273 of file readfile.c.

References L_NOCOPY, pixRead(), sarrayGetCount(), and sarrayGetString().

Referenced by dewarpShowResults().

◆ pixReadMem()

PIX* pixReadMem ( const l_uint8 *  data,
size_t  size 
)

pixReadMem()

Parameters
[in]dataconst; encoded
[in]sizesize of data
Returns
pix, or NULL on error
Notes:
     (1) This is a variation of pixReadStream(), where the data is read
         from a memory buffer rather than a file.
     (2) On Windows, this only reads tiff formatted files directly from
         memory.  For other formats, it writes to a temp file and
         decompresses from file.
     (3) findFileFormatBuffer() requires up to 12 bytes to decide on
         the format.  That determines the constraint here.  But in
         fact the data must contain the entire compressed string for
         the image.

Definition at line 822 of file readfile.c.

References findFileFormatBuffer().

Referenced by convertImageDataToPdf(), convertImageDataToPdfData(), and pixCreateFromPixcomp().

◆ pixReadStream()

PIX* pixReadStream ( FILE *  fp,
l_int32  hint 
)

pixReadStream()

Parameters
[in]fpfile stream
[in]hintbitwise OR of L_HINT_* values for jpeg; 0 for no hint
Returns
pix if OK; NULL on error
Notes:
     (1) The hint only applies to jpeg.

Definition at line 312 of file readfile.c.

References findFileFormatStream().

Referenced by pixRead(), and pixReadWithHint().

◆ pixReadWithHint()

PIX* pixReadWithHint ( const char *  filename,
l_int32  hint 
)

pixReadWithHint()

Parameters
[in]filenamewith full pathname or in local directory
[in]hintbitwise OR of L_HINT_* values for jpeg; use 0 for no hint
Returns
pix if OK; NULL on error
Notes:
     (1) The hint is not binding, but may be used to optimize jpeg decoding.
         Use 0 for no hinting.

Definition at line 224 of file readfile.c.

References fopenReadStream(), and pixReadStream().

◆ writeImageFileInfo()

l_ok writeImageFileInfo ( const char *  filename,
FILE *  fpout,
l_int32  headeronly 
)

writeImageFileInfo()

Parameters
[in]filenameinput file
[in]fpoutoutput file stream
[in]headeronly1 to read only the header; 0 to read both the header and the input file
Returns
0 if OK; 1 on error
Notes:
     (1) If headeronly == 0 and the image has spp == 4,this will
         also call pixDisplayLayersRGBA() to display the image
         in three views.
     (2) This is a debug function that changes the value of
         var_PNG_STRIP_16_TO_8 to 1 (the default).

Definition at line 1095 of file readfile.c.

References pixReadHeader().

Variable Documentation

◆ JP2K_IMAGE_DATA

const unsigned char JP2K_IMAGE_DATA[12]
static
Initial value:
= { 0x00, 0x00, 0x00, 0x0c,
0x6a, 0x50, 0x20, 0x20,
0x0d, 0x0a, 0x87, 0x0a }

Definition at line 102 of file readfile.c.