Leptonica  1.83.1
Image processing and image analysis suite
pdfio2.c File Reference
#include <string.h>
#include <math.h>
#include "allheaders.h"

Go to the source code of this file.

Macros

#define L_SMALLBUF   256
 
#define L_BIGBUF   2048 /* must be able to hold hex colormap */
 
#define DEBUG_MULTIPAGE   0
 

Functions

static L_COMP_DATAl_generateJp2kData (const char *fname)
 
static L_COMP_DATApixGenerateFlateData (PIX *pixs, l_int32 ascii85flag)
 
static L_COMP_DATApixGenerateJpegData (PIX *pixs, l_int32 ascii85flag, l_int32 quality)
 
static L_COMP_DATApixGenerateJp2kData (PIX *pixs, l_int32 quality)
 
static L_COMP_DATApixGenerateG4Data (PIX *pixs, l_int32 ascii85flag)
 
static l_int32 l_generatePdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
 
static void generateFixedStringsPdf (L_PDF_DATA *lpd)
 
static char * generateEscapeString (const char *str)
 
static void generateMediaboxPdf (L_PDF_DATA *lpd)
 
static l_int32 generatePageStringPdf (L_PDF_DATA *lpd)
 
static l_int32 generateContentStringPdf (L_PDF_DATA *lpd)
 
static l_int32 generatePreXStringsPdf (L_PDF_DATA *lpd)
 
static l_int32 generateColormapStringsPdf (L_PDF_DATA *lpd)
 
static void generateTrailerPdf (L_PDF_DATA *lpd)
 
static char * makeTrailerStringPdf (L_DNA *daloc)
 
static l_int32 generateOutputDataPdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
 
static l_int32 parseTrailerPdf (L_BYTEA *bas, L_DNA **pda)
 
static char * generatePagesObjStringPdf (NUMA *napage)
 
static L_BYTEAsubstituteObjectNumbers (L_BYTEA *bas, NUMA *na_objs)
 
static L_PDF_DATApdfdataCreate (const char *title)
 
static void pdfdataDestroy (L_PDF_DATA **plpd)
 
static L_COMP_DATApdfdataGetCid (L_PDF_DATA *lpd, l_int32 index)
 
l_ok pixConvertToPdfData (PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
 
l_ok ptraConcatenatePdfToData (L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
 
l_ok convertTiffMultipageToPdf (const char *filein, const char *fileout)
 
l_ok l_generateCIDataForPdf (const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
 
l_ok l_generateCIData (const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
 
L_COMP_DATAl_generateFlateDataPdf (const char *fname, PIX *pixs)
 
L_COMP_DATAl_generateJpegData (const char *fname, l_int32 ascii85flag)
 
L_COMP_DATAl_generateJpegDataMem (l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
 
L_COMP_DATAl_generateG4Data (const char *fname, l_int32 ascii85flag)
 
l_ok pixGenerateCIData (PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
 
L_COMP_DATAl_generateFlateData (const char *fname, l_int32 ascii85flag)
 
l_ok cidConvertToPdfData (L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
 
void l_CIDataDestroy (L_COMP_DATA **pcid)
 
void l_pdfSetG4ImageMask (l_int32 flag)
 
void l_pdfSetDateAndVersion (l_int32 flag)
 

Variables

static const l_int32 DefaultInputRes = 300
 
static l_int32 var_WRITE_G4_IMAGE_MASK = 1
 
static l_int32 var_WRITE_DATE_AND_VERSION = 1
 

Detailed Description


   Lower-level operations for generating pdf.

    Intermediate function for single page, multi-image conversion
         l_int32              pixConvertToPdfData()

    Intermediate function for generating multipage pdf output
         l_int32              ptraConcatenatePdfToData()

    Convert tiff multipage to pdf file
         l_int32              convertTiffMultipageToPdf()

    Generates the CID, transcoding under some conditions
         l_int32              l_generateCIDataForPdf()
         l_int32              l_generateCIData()

      Lower-level CID generation without transcoding
         L_COMP_DATA         *l_generateFlateDataPdf()
         L_COMP_DATA         *l_generateJpegData()
         L_COMP_DATA         *l_generateJpegDataMem()
         static L_COMP_DATA  *l_generateJp2kData()
         L_COMP_DATA         *l_generateG4Data()

      Lower-level CID generation with transcoding
         l_int32              pixGenerateCIData()
         L_COMP_DATA         *l_generateFlateData()
         static L_COMP_DATA  *pixGenerateFlateData()
         static L_COMP_DATA  *pixGenerateJpegData()
         static L_COMP_DATA  *pixGenerateJp2kData()
         static L_COMP_DATA  *pixGenerateG4Data()

      Other CID operations
         l_int32              cidConvertToPdfData()
         void                 l_CIDataDestroy()

    Helper functions for generating the output pdf string
         static l_int32       l_generatePdf()
         static void          generateFixedStringsPdf()
         static char         *generateEscapeString()
         static void          generateMediaboxPdf()
         static l_int32       generatePageStringPdf()
         static l_int32       generateContentStringPdf()
         static l_int32       generatePreXStringsPdf()
         static l_int32       generateColormapStringsPdf()
         static void          generateTrailerPdf()
         static l_int32       makeTrailerStringPdf()
         static l_int32       generateOutputDataPdf()

    Helper functions for generating multipage pdf output
         static l_int32       parseTrailerPdf()
         static char         *generatePagesObjStringPdf()
         static L_BYTEA      *substituteObjectNumbers()

    Create/destroy/access pdf data
         static L_PDF_DATA   *pdfdataCreate()
         static void          pdfdataDestroy()
         static L_COMP_DATA  *pdfdataGetCid()

    Set flags for special modes
         void                 l_pdfSetG4ImageMask()
         void                 l_pdfSetDateAndVersion()

Definition in file pdfio2.c.

Function Documentation

◆ cidConvertToPdfData()

l_ok cidConvertToPdfData ( L_COMP_DATA cid,
const char *  title,
l_uint8 **  pdata,
size_t *  pnbytes 
)

cidConvertToPdfData()

Parameters
[in]cidcompressed image data
[in]title[optional] pdf title; can be null
[out]pdataoutput pdf data for image
[out]pnbytessize of output pdf data
Returns
0 if OK, 1 on error
Notes:
     (1) Caller must not destroy the cid.  It is absorbed in the
         lpd and destroyed by this function.

Definition at line 1591 of file pdfio2.c.

References L_Compressed_Data::res.

Referenced by pixcompFastConvertToPdfData().

◆ convertTiffMultipageToPdf()

l_ok convertTiffMultipageToPdf ( const char *  filein,
const char *  fileout 
)

convertTiffMultipageToPdf()

Parameters
[in]filein(tiff)
[in]fileout(pdf)
Returns
0 if OK, 1 on error
Notes:
     (1) A multipage tiff file can also be converted to PS, using
         convertTiffMultipageToPS()

Definition at line 480 of file pdfio2.c.

References fileFormatIsTiff(), fopenReadStream(), pixaConvertToPdf(), pixaDestroy(), and pixaReadMultipageTiff().

◆ generateEscapeString()

static char * generateEscapeString ( const char *  str)
static

generateEscapeString()

Parameters
[in]strinput string
Returns
hex escape string, or null on error
Notes:
     (1) If the input string is not ascii, returns null.
     (2) This takes an input ascii string and generates a hex
         ascii output string with 4 bytes out for each byte in.
         The feff code at the beginning tells the pdf interpreter
         that the data is to be interpreted as big-endian, 4 bytes
         at a time.  For ascii, the first two bytes are 0 and the
         last two bytes are less than 0x80.

Definition at line 1791 of file pdfio2.c.

References stringCat().

◆ generateOutputDataPdf()

static l_int32 generateOutputDataPdf ( l_uint8 **  pdata,
size_t *  pnbytes,
L_PDF_DATA lpd 
)
static

generateOutputDataPdf()

Parameters
[out]pdatapdf data array
[out]pnbytessize of pdf data array
[in]lpdinput data used to make pdf
Returns
0 if OK, 1 on error
Notes:
     (1) Only called from l_generatePdf().  On error, no data is returned.

Definition at line 2207 of file pdfio2.c.

References L_Pdf_Data::id, l_dnaGetIArray(), L_Pdf_Data::n, L_Pdf_Data::obj1, L_Pdf_Data::obj2, L_Pdf_Data::obj3, L_Pdf_Data::obj4, L_Pdf_Data::obj5, L_Pdf_Data::objloc, L_Pdf_Data::objsize, L_Pdf_Data::trailer, and L_Pdf_Data::xrefloc.

◆ l_CIDataDestroy()

void l_CIDataDestroy ( L_COMP_DATA **  pcid)

◆ l_generateCIData()

l_ok l_generateCIData ( const char *  fname,
l_int32  type,
l_int32  quality,
l_int32  ascii85,
L_COMP_DATA **  pcid 
)

l_generateCIData()

Parameters
[in]fname
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[in]ascii850 for binary; 1 for ascii85-encoded
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) This can be used for both PostScript and pdf.
     (1) Set ascii85:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) This attempts to compress according to the requested type.
         If this can't be done, it falls back to ordinary flate encoding.
     (3) This differs from l_generateCIDataForPdf(), which determines
         the file format and only works for pdf.

Definition at line 614 of file pdfio2.c.

References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, and pixReadHeader().

◆ l_generateCIDataForPdf()

l_ok l_generateCIDataForPdf ( const char *  fname,
PIX pix,
l_int32  quality,
L_COMP_DATA **  pcid 
)

l_generateCIDataForPdf()

Parameters
[in]fname[optional] can be null
[in]pix[optional] can be null
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) You must set either filename or pix.
     (2) Given an image file and optionally a pix raster of that data,
         this provides a CID that is compatible with PDF, preferably
         without transcoding.
     (3) The pix is included for efficiency, in case transcoding
         is required and the pix is available to the caller.
     (4) We don't try to open files named "stdin" or "-" for Tesseract
         compatibility reasons. We may remove this restriction
         in the future.
     (5) Note that tiff-g4 must be transcoded to properly handle byte
         order and perhaps photometry (e.g., min-is-black).  For a
         multipage tiff file, data will only be extracted from the
         first page, so this should not be invoked.

Definition at line 532 of file pdfio2.c.

References findFileFormat().

◆ l_generateFlateData()

L_COMP_DATA* l_generateFlateData ( const char *  fname,
l_int32  ascii85flag 
)

l_generateFlateData()

Parameters
[in]fname
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid flate compressed image data, or NULL on error
Notes:
     (1) The input image is converted to one of these 4 types:
          ~ 1 bpp
          ~ 8 bpp, no colormap
          ~ 8 bpp, colormap
          ~ 32 bpp rgb
     (2) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (3) Always transcodes (i.e., first decodes the png file)

Definition at line 1290 of file pdfio2.c.

References pixDestroy(), pixGenerateFlateData(), and pixRead().

Referenced by convertFlateToPSEmbed(), and convertFlateToPSString().

◆ l_generateFlateDataPdf()

L_COMP_DATA* l_generateFlateDataPdf ( const char *  fname,
PIX pixs 
)

l_generateFlateDataPdf()

Parameters
[in]fnamepreferably png
[in]pixs[optional] can be null
Returns
cid containing png data, or NULL on error
Notes:
     (1) If you hand this a png file, you are going to get
         png predictors embedded in the flate data. So it has
         come to this. http://xkcd.com/1022/
     (2) Exception: if the png is interlaced or if it is RGBA,
         it will be transcoded.
     (3) If transcoding is required, this will not have to read from
         file if a pix is input.

Definition at line 716 of file pdfio2.c.

References findFileFormat().

◆ l_generateG4Data()

L_COMP_DATA* l_generateG4Data ( const char *  fname,
l_int32  ascii85flag 
)

l_generateG4Data()

Parameters
[in]fnameof g4 compressed file
[in]ascii85flag0 for g4 compressed; 1 for ascii85-encoded g4
Returns
cid g4 compressed image data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) This does not work for multipage tiff files.

Definition at line 1099 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, extractG4DataFromFile(), fopenReadStream(), getTiffResolution(), L_Compressed_Data::h, L_G4_ENCODE, L_Compressed_Data::minisblack, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, tiffGetCount(), L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by convertG4ToPSEmbed(), and convertG4ToPSString().

◆ l_generateJp2kData()

static L_COMP_DATA * l_generateJp2kData ( const char *  fname)
static

l_generateJp2kData()

Parameters
[in]fnameof jp2k file
Returns
cid containing jp2k data, or NULL on error
Notes:
     (1) This is only called after the file is verified to be jp2k.

Definition at line 1047 of file pdfio2.c.

References L_Compressed_Data::datacomp, fopenReadStream(), l_binaryRead(), l_CIDataDestroy(), and readHeaderJp2k().

◆ l_generateJpegData()

L_COMP_DATA* l_generateJpegData ( const char *  fname,
l_int32  ascii85flag 
)

l_generateJpegData()

Parameters
[in]fnameof jpeg file
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
Returns
cid containing jpeg data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) Most of this function is repeated in l_generateJpegMemData(),
         which is required in pixacompFastConvertToPdfData().

Definition at line 913 of file pdfio2.c.

References fopenReadStream(), and readHeaderJpeg().

Referenced by convertJpegToPSEmbed(), and convertJpegToPSString().

◆ l_generateJpegDataMem()

L_COMP_DATA* l_generateJpegDataMem ( l_uint8 *  data,
size_t  nbytes,
l_int32  ascii85flag 
)

l_generateJpegDataMem()

Parameters
[in]dataof jpeg-encoded file
[in]nbytessize of jpeg-encoded file
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
Returns
cid containing jpeg data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)

Definition at line 987 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_JPEG_ENCODE, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, readHeaderMemJpeg(), readResolutionMemJpeg(), L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by pixcompFastConvertToPdfData().

◆ l_generatePdf()

static l_int32 l_generatePdf ( l_uint8 **  pdata,
size_t *  pnbytes,
L_PDF_DATA lpd 
)
static

l_generatePdf()

Parameters
[out]pdatapdf array
[out]pnbytesnumber of bytes in pdf array
[in]lpdall the required input image data
Returns
0 if OK, 1 on error
Notes:
     (1) On error, no data is returned.
     (2) The objects are:
           1: Catalog
           2: Info
           3: Pages
           4: Page
           5: Contents  (rendering command)
           6 to 6+n-1: n XObjects
           6+n to 6+n+m-1: m colormaps

Definition at line 1683 of file pdfio2.c.

◆ l_pdfSetDateAndVersion()

void l_pdfSetDateAndVersion ( l_int32  flag)

l_pdfSetDateAndVersion()

Parameters
[in]flag1 for writing date/time and leptonica version; 0 for omitting this from the metadata
Returns
void
Notes:
     (1) The default is for writing this data.  For regression tests
         that compare output against golden files, it is useful to omit.

Definition at line 2619 of file pdfio2.c.

◆ l_pdfSetG4ImageMask()

void l_pdfSetG4ImageMask ( l_int32  flag)

l_pdfSetG4ImageMask()

Parameters
[in]flag1 for writing g4 data as fg only through a mask; 0 for writing fg and bg
Returns
void
Notes:
     (1) The default is for writing only the fg (through the mask).
         That way when you write a 1 bpp image, the bg is transparent,
         so any previously written image remains visible behind it.

Definition at line 2599 of file pdfio2.c.

◆ parseTrailerPdf()

static l_int32 parseTrailerPdf ( L_BYTEA bas,
L_DNA **  pda 
)
static

◆ pixConvertToPdfData()

l_ok pixConvertToPdfData ( PIX pix,
l_int32  type,
l_int32  quality,
l_uint8 **  pdata,
size_t *  pnbytes,
l_int32  x,
l_int32  y,
l_int32  res,
const char *  title,
L_PDF_DATA **  plpd,
l_int32  position 
)

pixConvertToPdfData()

Parameters
[in]pixall depths; cmap OK
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE
[in]qualityfor jpeg: 1-100; 0 for default (75) for jp2k: 27-45; 0 for default (34)
[out]pdatapdf array
[out]pnbytesnumber of bytes in pdf array
[in]x,ylocation of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page)
[in]resoverride the resolution of the input image, in ppi; use 0 to respect resolution embedded in the input
[in]title[optional] pdf title; can be null
[in,out]plpdptr to lpd; created on the first invocation and returned until last image is processed
[in]positionin image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE
Returns
0 if OK, 1 on error
Notes:
     (1) If res == 0 and the input resolution field from the pix is 0,
         this will use DefaultInputRes.
     (2) This only writes data if it is the last image to be
         written on the page.
     (3) See comments in convertToPdf().

Definition at line 190 of file pdfio2.c.

References L_Compressed_Data::h, L_FIRST_IMAGE, L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateCIData(), L_Compressed_Data::res, selectDefaultPdfEncoding(), and L_Compressed_Data::w.

Referenced by convertImageDataToPdfData(), convertToPdfData(), pixaConvertToPdfData(), pixConvertToPdf(), pixWriteMemPdf(), and saConvertFilesToPdfData().

◆ pixGenerateCIData()

l_ok pixGenerateCIData ( PIX pixs,
l_int32  type,
l_int32  quality,
l_int32  ascii85,
L_COMP_DATA **  pcid 
)

pixGenerateCIData()

Parameters
[in]pixs8 or 32 bpp, no colormap
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or L_JP2K_ENCODE
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[in]ascii850 for binary; 1 for ascii85-encoded
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) Set ascii85:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) Do not accept images with an asperity ratio greater than 10.

Definition at line 1188 of file pdfio2.c.

References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, pixGetDimensions(), and selectDefaultPdfEncoding().

Referenced by pixConvertToPdfData().

◆ pixGenerateFlateData()

static L_COMP_DATA * pixGenerateFlateData ( PIX pixs,
l_int32  ascii85flag 
)
static

pixGenerateFlateData()

Parameters
[in]pixs
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid flate compressed image data, or NULL on error
Notes:
    (1) If called with an RGBA pix (spp == 4), the alpha channel
        will be removed, projecting a white backgrouond through
        any transparency.
    (2) If called with a colormapped pix, any transparency in the
        alpha component in the colormap will be ignored, as it is
        for all leptonica operations on colormapped pix.

Definition at line 1325 of file pdfio2.c.

References pixGetDimensions().

Referenced by l_generateFlateData().

◆ pixGenerateG4Data()

static L_COMP_DATA * pixGenerateG4Data ( PIX pixs,
l_int32  ascii85flag 
)
static

pixGenerateG4Data()

Parameters
[in]pixs1 bpp, no colormap
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid g4 compressed image data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)

Definition at line 1547 of file pdfio2.c.

◆ pixGenerateJp2kData()

static L_COMP_DATA * pixGenerateJp2kData ( PIX pixs,
l_int32  quality 
)
static

pixGenerateJp2kData()

Parameters
[in]pixs8 or 32 bpp, no colormap
[in]quality0 for default, which is 34
Returns
cid jp2k compressed data, or NULL on error
Notes:
     (1) The quality can be set between 27 (very poor) and 45
         (nearly perfect).  Use 0 for default (34). Use 100 for lossless,
         but this is very expensive and not recommended.

Definition at line 1501 of file pdfio2.c.

◆ pixGenerateJpegData()

static L_COMP_DATA * pixGenerateJpegData ( PIX pixs,
l_int32  ascii85flag,
l_int32  quality 
)
static

pixGenerateJpegData()

Parameters
[in]pixs8, 16 or 32 bpp, no colormap
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
[in]quality0 for default, which is 75
Returns
cid jpeg compressed data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) If 16 bpp, convert first to 8 bpp, using the MSB

Definition at line 1453 of file pdfio2.c.

◆ ptraConcatenatePdfToData()

l_ok ptraConcatenatePdfToData ( L_PTRA pa_data,
SARRAY sa,
l_uint8 **  pdata,
size_t *  pnbytes 
)

ptraConcatenatePdfToData()

Parameters
[in]pa_dataptra array of pdf strings, each for a single-page pdf file
[in]sa[optional] string array of pathnames for input pdf files; can be null
[out]pdataconcatenated pdf data in memory
[out]pnbytesnumber of bytes in pdf data
Returns
0 if OK, 1 on error
Notes:
     (1) This only works with leptonica-formatted single-page pdf files.
         pdf files generated by other programs will have unpredictable
         (and usually bad) results.  The requirements for each pdf file:
           (a) The Catalog and Info objects are the first two.
           (b) Object 3 is Pages
           (c) Object 4 is Page
           (d) The remaining objects are Contents, XObjects, and ColorSpace
     (2) We remove trailers from each page, and append the full trailer
         for all pages at the end.
     (3) For all but the first file, remove the ID and the first 3
         objects (catalog, info, pages), so that each subsequent
         file has only objects of these classes:
             Page, Contents, XObject, ColorSpace (Indexed RGB).
         For those objects, we substitute these refs to objects
         in the local file:
             Page:  Parent(object 3), Contents, XObject(typically multiple)
             XObject:  [ColorSpace if indexed]
         The Pages object on the first page (object 3) has a Kids array
         of references to all the Page objects, with a Count equal
         to the number of pages.  Each Page object refers back to
         this parent.

Definition at line 318 of file pdfio2.c.

References l_byteaDestroy(), L_CLONE, l_dnaaAddDna(), l_dnaaCreate(), l_dnaaDestroy(), l_dnaaGetDna(), l_dnaDestroy(), l_dnaGetCount(), L_INSERT, L_NO_COMPACTION, L_NOCOPY, numaaAddNuma(), numaaCreate(), numaAddNumber(), numaCreate(), numaMakeConstant(), numaMakeSequence(), numaReplaceNumber(), numaSetValue(), parseTrailerPdf(), ptraCompactArray(), ptraGetActualCount(), ptraGetPtrToItem(), ptraRemove(), and sarrayGetString().

Referenced by convertSegmentedFilesToPdf(), pixaConvertToPdfData(), ptraConcatenatePdf(), saConcatenatePdfToData(), saConvertFilesToPdfData(), and saConvertUnscaledFilesToPdfData().

◆ substituteObjectNumbers()

static L_BYTEA * substituteObjectNumbers ( L_BYTEA bas,
NUMA na_objs 
)
static

substituteObjectNumbers()

Parameters
[in]baslba of a pdf object
[in]na_objsobject number mapping array
Returns
bad lba of rewritten pdf for the object
Notes:
     (1) Interpret the first set of bytes as the object number,
         map to the new number, and write it out.
     (2) Find all occurrences of this 4-byte sequence: " 0 R"
     (3) Find the location and value of the integer preceding this,
         and map it to the new value.
     (4) Rewrite the object with new object numbers.

Definition at line 2432 of file pdfio2.c.

References arrayFindEachSequence(), arrayFindSequence(), l_byteaAppendData(), l_byteaAppendString(), l_byteaCreate(), l_byteaGetData(), l_dnaDestroy(), l_dnaGetCount(), l_dnaGetIArray(), numaGetCount(), and numaGetIArray().