Leptonica  1.83.1
Image processing and image analysis suite
utils2.c File Reference
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <string.h>
#include <stddef.h>
#include "allheaders.h"

Go to the source code of this file.

Functions

char * stringNew (const char *src)
 
l_ok stringCopy (char *dest, const char *src, l_int32 n)
 
char * stringCopySegment (const char *src, l_int32 start, l_int32 nbytes)
 
l_ok stringReplace (char **pdest, const char *src)
 
l_int32 stringLength (const char *src, size_t size)
 
l_int32 stringCat (char *dest, size_t size, const char *src)
 
char * stringConcatNew (const char *first,...)
 
char * stringJoin (const char *src1, const char *src2)
 
l_ok stringJoinIP (char **psrc1, const char *src2)
 
char * stringReverse (const char *src)
 
char * strtokSafe (char *cstr, const char *seps, char **psaveptr)
 
l_ok stringSplitOnToken (char *cstr, const char *seps, char **phead, char **ptail)
 
l_ok stringCheckForChars (const char *src, const char *chars, l_int32 *pfound)
 
char * stringRemoveChars (const char *src, const char *remchars)
 
char * stringReplaceEachSubstr (const char *src, const char *sub1, const char *sub2, l_int32 *pcount)
 
char * stringReplaceSubstr (const char *src, const char *sub1, const char *sub2, l_int32 *ploc, l_int32 *pfound)
 
L_DNAstringFindEachSubstr (const char *src, const char *sub)
 
l_int32 stringFindSubstr (const char *src, const char *sub, l_int32 *ploc)
 
l_uint8 * arrayReplaceEachSequence (const l_uint8 *datas, size_t dataslen, const l_uint8 *seq, size_t seqlen, const l_uint8 *newseq, size_t newseqlen, size_t *pdatadlen, l_int32 *pcount)
 
L_DNAarrayFindEachSequence (const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen)
 
l_ok arrayFindSequence (const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound)
 
void * reallocNew (void **pindata, size_t oldsize, size_t newsize)
 
l_uint8 * l_binaryRead (const char *filename, size_t *pnbytes)
 
l_uint8 * l_binaryReadStream (FILE *fp, size_t *pnbytes)
 
l_uint8 * l_binaryReadSelect (const char *filename, size_t start, size_t nbytes, size_t *pnread)
 
l_uint8 * l_binaryReadSelectStream (FILE *fp, size_t start, size_t nbytes, size_t *pnread)
 
l_ok l_binaryWrite (const char *filename, const char *operation, const void *data, size_t nbytes)
 
size_t nbytesInFile (const char *filename)
 
size_t fnbytesInFile (FILE *fp)
 
l_uint8 * l_binaryCopy (const l_uint8 *datas, size_t size)
 
l_ok l_binaryCompare (const l_uint8 *data1, size_t size1, const l_uint8 *data2, size_t size2, l_int32 *psame)
 
l_ok fileCopy (const char *srcfile, const char *newfile)
 
l_ok fileConcatenate (const char *srcfile, const char *destfile)
 
l_ok fileAppendString (const char *filename, const char *str)
 
l_ok fileSplitLinesUniform (const char *filename, l_int32 n, l_int32 save_empty, const char *rootpath, const char *ext)
 
FILE * fopenReadStream (const char *filename)
 
FILE * fopenWriteStream (const char *filename, const char *modestring)
 
FILE * fopenReadFromMemory (const l_uint8 *data, size_t size)
 
FILE * fopenWriteWinTempfile (void)
 
FILE * lept_fopen (const char *filename, const char *mode)
 
l_ok lept_fclose (FILE *fp)
 
void * lept_calloc (size_t nmemb, size_t size)
 
void lept_free (void *ptr)
 
l_int32 lept_mkdir (const char *subdir)
 
l_int32 lept_rmdir (const char *subdir)
 
void lept_direxists (const char *dir, l_int32 *pexists)
 
l_int32 lept_rm_match (const char *subdir, const char *substr)
 
l_int32 lept_rm (const char *subdir, const char *tail)
 
l_int32 lept_rmfile (const char *filepath)
 
l_int32 lept_mv (const char *srcfile, const char *newdir, const char *newtail, char **pnewpath)
 
l_int32 lept_cp (const char *srcfile, const char *newdir, const char *newtail, char **pnewpath)
 
void callSystemDebug (const char *cmd)
 
l_ok splitPathAtDirectory (const char *pathname, char **pdir, char **ptail)
 
l_ok splitPathAtExtension (const char *pathname, char **pbasename, char **pextension)
 
char * pathJoin (const char *dir, const char *fname)
 
char * appendSubdirs (const char *basedir, const char *subdirs)
 
l_ok convertSepCharsInPath (char *path, l_int32 type)
 
char * genPathname (const char *dir, const char *fname)
 
l_ok makeTempDirname (char *result, size_t nbytes, const char *subdir)
 
l_ok modifyTrailingSlash (char *path, size_t nbytes, l_int32 flag)
 
char * l_makeTempFilename (void)
 
l_int32 extractNumberFromFilename (const char *fname, l_int32 numpre, l_int32 numpost)
 

Detailed Description


     ------------------------------------------
     This file has these utilities:
  • safe string operations
  • find/replace operations on strings
  • read/write between file and memory
  • multi-platform file and directory operations
  • file name operations ------------------------------------------ Safe string procs char *stringNew() l_int32 stringCopy() l_int32 stringCopySegment() l_int32 stringReplace() l_int32 stringLength() l_int32 stringCat() char *stringConcatNew() char *stringJoin() l_int32 stringJoinIP() char *stringReverse() char *strtokSafe() l_int32 stringSplitOnToken() Find and replace string and array procs l_int32 stringCheckForChars() char *stringRemoveChars() char *stringReplaceEachSubstr() char *stringReplaceSubstr() L_DNA *stringFindEachSubstr() l_int32 stringFindSubstr() l_uint8 *arrayReplaceEachSequence() L_DNA *arrayFindEachSequence() l_int32 arrayFindSequence() Safe realloc void *reallocNew() Read and write between file and memory l_uint8 *l_binaryRead() l_uint8 *l_binaryReadStream() l_uint8 *l_binaryReadSelect() l_uint8 *l_binaryReadSelectStream() l_int32 l_binaryWrite() l_int32 nbytesInFile() l_int32 fnbytesInFile() Copy and compare in memory l_uint8 *l_binaryCopy() l_uint8 *l_binaryCompare() File copy operations l_int32 fileCopy() l_int32 fileConcatenate() l_int32 fileAppendString() File split operations l_int32 fileSplitLinesUniform() Multi-platform functions for opening file streams FILE *fopenReadStream() FILE *fopenWriteStream() FILE *fopenReadFromMemory() Opening a Windows tmpfile for writing FILE *fopenWriteWinTempfile() Multi-platform functions that avoid C-runtime boundary crossing with Windows DLLs FILE *lept_fopen() l_int32 lept_fclose() void *lept_calloc() void lept_free() Multi-platform file system operations in temp directories l_int32 lept_mkdir() l_int32 lept_rmdir() l_int32 lept_direxists() l_int32 lept_mv() l_int32 lept_rm_match() l_int32 lept_rm() l_int32 lept_rmfile() l_int32 lept_cp() Special debug/test function for calling 'system' void callSystemDebug() General file name operations l_int32 splitPathAtDirectory() l_int32 splitPathAtExtension() char *pathJoin() char *appendSubdirs() Special file name operations l_int32 convertSepCharsInPath() char *genPathname() l_int32 makeTempDirname() l_int32 modifyTrailingSlash() char *l_makeTempFilename() l_int32 extractNumberFromFilename() Notes on multi-platform development ----------------------------------- This is important: (1) With the exception of splitPathAtDirectory(), splitPathAtExtension() and genPathname(), all input pathnames must have unix separators. (2) On Windows, when you specify a read or write to "/tmp/...", the filename is rewritten to use the Windows temp directory: /tmp ==> [Temp]... (Windows) (3) This filename rewrite, along with the conversion from unix to Windows pathnames, happens in genPathname(). (4) Use fopenReadStream() and fopenWriteStream() to open files, because these use genPathname() to find the platform-dependent filenames. Likewise for l_binaryRead() and l_binaryWrite(). (5) For moving, copying and removing files and directories that are in subdirectories of /tmp, use the lept_*() file system shell wrappers: lept_mkdir(), lept_rmdir(), lept_mv(), lept_rm() and lept_cp(). (6) Use the lept_*() C library wrappers. These work properly on Windows, where the same DLL must perform complementary operations on file streams (open/close) and heap memory (malloc/free): lept_fopen(), lept_fclose(), lept_calloc() and lept_free(). (7) Why read and write files to temp directories? The library needs the ability to read and write ephemeral files to default places, both for generating debugging output and for supporting regression tests. Applications also need this ability for debugging. (8) Why do the pathname rewrite on Windows? The goal is to have the library, and programs using the library, run on multiple platforms without changes. The location of temporary files depends on the platform as well as the user's configuration. Temp files on Windows are in some directory not known a priori. To make everything work seamlessly on Windows, every time you open a file for reading or writing, use a special function such as fopenReadStream() or fopenWriteStream(); these call genPathname() to ensure that if it is a temp file, the correct path is used. To indicate that this is a temp file, the application is written with the root directory of the path in a canonical form: "/tmp". (9) Why is it that multi-platform directory functions like lept_mkdir() and lept_rmdir(), as well as associated file functions like lept_rm(), lept_mv() and lept_cp(), only work in the temp dir? These functions were designed to provide easy manipulation of temp files. The restriction to temp files is for safety -- to prevent an accidental deletion of important files. For example, lept_rmdir() first deletes all files in a specified subdirectory of temp, and then removes the directory.

Definition in file utils2.c.

Function Documentation

◆ appendSubdirs()

char* appendSubdirs ( const char *  basedir,
const char *  subdirs 
)

appendSubdirs()

Parameters
[in]basedir
[in]subdirs
Returns
concatenated full directory path without trailing slash, or NULL on error
Notes:
     (1) Use unix pathname separators
     (2) Allocates a new string:  [basedir]/[subdirs]

Definition at line 2950 of file utils2.c.

References stringCat().

◆ arrayFindEachSequence()

L_DNA* arrayFindEachSequence ( const l_uint8 *  data,
size_t  datalen,
const l_uint8 *  sequence,
size_t  seqlen 
)

arrayFindEachSequence()

Parameters
[in]databyte array
[in]datalenlength of data, in bytes
[in]sequencesubarray of bytes to find in data
[in]seqlenlength of sequence, in bytes
Returns
dna of offsets where the sequence is found, or NULL if none are found or on error
Notes:
     (1) The byte arrays data and sequence are not C strings,
         because they can contain null bytes.  Therefore, for each
         we must give the length of the array.
     (2) This finds every non-overlapping occurrence in data of sequence.
         After it finds each match, it moves forward by the length
         of the sequence before continuing the search.  So for example,
         if you search for the sequence 'aa' in the data 'baaabbb',
         you find one match at position 1.

Definition at line 1137 of file utils2.c.

References arrayFindSequence(), l_dnaAddNumber(), l_dnaCreate(), l_dnaDestroy(), and l_dnaGetCount().

Referenced by arrayReplaceEachSequence(), l_byteaFindEachSequence(), stringFindEachSubstr(), and substituteObjectNumbers().

◆ arrayFindSequence()

l_ok arrayFindSequence ( const l_uint8 *  data,
size_t  datalen,
const l_uint8 *  sequence,
size_t  seqlen,
l_int32 *  poffset,
l_int32 *  pfound 
)

arrayFindSequence()

Parameters
[in]databyte array
[in]datalenlength of data, in bytes
[in]sequencesubarray of bytes to find in data
[in]seqlenlength of sequence, in bytes
[out]poffsetoffset from beginning of data where the sequence begins
[out]pfound1 if sequence is found; 0 otherwise
Returns
0 if OK, 1 on error
Notes:
     (1) The byte arrays 'data' and 'sequence' are not C strings,
         because they can contain null bytes.  Therefore, for each
         we must give the length of the array.
     (2) This searches for the first occurrence in data of sequence,
         which consists of seqlen bytes.  The parameter seqlen
         must not exceed the actual length of the sequence byte array.
     (3) If the sequence is not found, the offset will be 0, so you
         must check found.

Definition at line 1195 of file utils2.c.

Referenced by arrayFindEachSequence(), parseTrailerPdf(), sarrayParseRange(), sarraySelectBySubstring(), and substituteObjectNumbers().

◆ arrayReplaceEachSequence()

l_uint8* arrayReplaceEachSequence ( const l_uint8 *  datas,
size_t  dataslen,
const l_uint8 *  seq,
size_t  seqlen,
const l_uint8 *  newseq,
size_t  newseqlen,
size_t *  pdatadlen,
l_int32 *  pcount 
)

arrayReplaceEachSequence()

Parameters
[in]datassource byte array
[in]dataslenlength of source data, in bytes
[in]seqsubarray of bytes to find in source data
[in]seqlenlength of subarray, in bytes
[in]newseqreplacement subarray; can be null
[in]newseqlenlength of replacement subarray, in bytes
[out]pdatadlenlength of dest byte array, in bytes
[out]pcount[optional] the number of times that sub1 is found in src; 0 if not found
Returns
datad with all all subarrays replaced (or removed)
Notes:
     (1) The byte arrays datas, seq and newseq are not C strings,
         because they can contain null bytes.  Therefore, for each
         we must give the length of the array.
     (2) If newseq == NULL, this just removes all instances of seq.
         Otherwise, it replaces every non-overlapping occurrence of
         seq in datas with newseq. A new array datad and its
         size are returned.  See arrayFindEachSequence() for more
         details on finding non-overlapping occurrences.
     (3) If no instances of seq are found, this returns a copy of datas.
     (4) The returned datad is null terminated.
     (5) Can use stringReplaceEachSubstr() if using C strings.

Definition at line 1048 of file utils2.c.

References arrayFindEachSequence(), l_binaryCopy(), l_dnaDestroy(), l_dnaGetCount(), and l_dnaGetIValue().

Referenced by stringReplaceEachSubstr().

◆ callSystemDebug()

void callSystemDebug ( const char *  cmd)

callSystemDebug()

Parameters
[in]cmdcommand to be exec'd
Returns
void
Notes:
     (1) The C library 'system' call is only made through this function.
         It only works in debug/test mode, where the global variable
         LeptDebugOK == TRUE.  This variable is set to FALSE in the
         library as distributed, and calling this function will
         generate an error message.

Definition at line 2658 of file utils2.c.

◆ convertSepCharsInPath()

l_ok convertSepCharsInPath ( char *  path,
l_int32  type 
)

convertSepCharsInPath()

Parameters
[in]path
[in]typeUNIX_PATH_SEPCHAR, WIN_PATH_SEPCHAR
Returns
0 if OK, 1 on error
Notes:
     (1) In-place conversion.
     (2) Type is the resulting type:
           * UNIX_PATH_SEPCHAR:  '\' ==> '/'
           * WIN_PATH_SEPCHAR:   '/' ==> '\'
     (3) Virtually all path operations in leptonica use unix separators.
     (4) The backslash is a valid character in unix pathnames and should
         not be converted.  Each backslash needs to be escaped with a
         preceding backslash for the shell, but the actual filename
         does not include these escape characters.

Definition at line 3004 of file utils2.c.

Referenced by genPathname(), and splitPathAtDirectory().

◆ extractNumberFromFilename()

l_int32 extractNumberFromFilename ( const char *  fname,
l_int32  numpre,
l_int32  numpost 
)

extractNumberFromFilename()

Parameters
[in]fname
[in]numprenumber of characters before the digits to be found
[in]numpostnumber of characters after the digits to be found
Returns
num number embedded in the filename; -1 on error or if not found
Notes:
     (1) The number is to be found in the basename, which is the
         filename without either the directory or the last extension.
     (2) When a number is found, it is non-negative.  If no number
         is found, this returns -1, without an error message.  The
         caller needs to check.

Definition at line 3340 of file utils2.c.

References splitPathAtDirectory(), and splitPathAtExtension().

Referenced by convertSortedToNumberedPathnames().

◆ fileAppendString()

l_ok fileAppendString ( const char *  filename,
const char *  str 
)

fileAppendString()

Parameters
[in]filename
[in]strstring to append to file
Returns
0 if OK, 1 on error

Definition at line 1741 of file utils2.c.

References fopenWriteStream().

◆ fileConcatenate()

l_ok fileConcatenate ( const char *  srcfile,
const char *  destfile 
)

fileConcatenate()

Parameters
[in]srcfileappend data from this file
[in]destfileadd data to this file
Returns
0 if OK, 1 on error

Definition at line 1715 of file utils2.c.

References l_binaryRead(), and l_binaryWrite().

◆ fileCopy()

l_ok fileCopy ( const char *  srcfile,
const char *  newfile 
)

fileCopy()

Parameters
[in]srcfilecopy from this file
[in]newfilecopy to this file
Returns
0 if OK, 1 on error

Definition at line 1687 of file utils2.c.

References l_binaryRead(), and l_binaryWrite().

Referenced by lept_cp(), and lept_mv().

◆ fileSplitLinesUniform()

l_ok fileSplitLinesUniform ( const char *  filename,
l_int32  n,
l_int32  save_empty,
const char *  rootpath,
const char *  ext 
)

fileSplitLinesUniform()

Parameters
[in]filenameinput file
[in]nnumber of output files (>= 1)
[in]save_empty1 to save empty lines; 0 to remove them
[in]rootpathroot pathname of output files
[in]extoutput extension, including the '.'; can be NULL
Returns
0 if OK, 1 on error
Notes:
     (1) This splits an input text file into n files with roughly
         equal numbers of text lines in each file.
     (2) if save_empty == 1, empty lines are included, and concatention
         of the text in the split files will be identical to the original.
     (3) The output filenames are in the form:
              <rootpath>_N.<ext>, N = 1, ... n
     (4) This handles the temp directory pathname conversion on Windows:
             /tmp  ==>  [Windows Temp directory]
     (5) Files can also be sharded into sets of lines by the program 'split':
             split -n l/<n> <filename>
         Using 'split', the resulting files have approximately equal
         numbers of bytes, rather than equal numbers of lines.

Definition at line 1789 of file utils2.c.

References l_binaryRead(), l_binaryWrite(), numaDestroy(), numaGetIValue(), numaGetUniformBinSizes(), sarrayCreateLinesFromString(), sarrayDestroy(), sarrayGetCount(), and sarrayToStringRange().

◆ fnbytesInFile()

size_t fnbytesInFile ( FILE *  fp)

fnbytesInFile()

Parameters
[in]fpfile stream
Returns
nbytes in file; 0 on error

Definition at line 1581 of file utils2.c.

Referenced by freadHeaderPng(), freadHeaderSpix(), and nbytesInFile().

◆ fopenReadFromMemory()

FILE* fopenReadFromMemory ( const l_uint8 *  data,
size_t  size 
)

fopenReadFromMemory()

Parameters
[in]data,size
Returns
file stream, or NULL on error
Notes:
     (1) Work-around if fmemopen() not available.
     (2) Windows tmpfile() writes into the root C:\ directory, which
         requires admin privileges.  This also works around that.

Definition at line 1937 of file utils2.c.

References fopenWriteWinTempfile().

Referenced by boxaaReadMem(), boxaReadMem(), dewarpaReadMem(), dewarpReadMem(), dpixReadMem(), fpixReadMem(), l_dnaaReadMem(), l_dnaReadMem(), numaaReadMem(), numaReadMem(), pixaaReadMem(), pixacompReadMem(), pixaReadMem(), pixcmapReadMem(), pixReadMemJpeg(), pixReadMemPnm(), ptaaReadMem(), ptaReadMem(), readHeaderMemJpeg(), readHeaderMemPnm(), readResolutionMemJpeg(), recogReadMem(), and sarrayReadMem().

◆ fopenReadStream()

◆ fopenWriteStream()

FILE* fopenWriteStream ( const char *  filename,
const char *  modestring 
)

fopenWriteStream()

Parameters
[in]filename
[in]modestring
Returns
stream, or NULL on error
Notes:
     (1) This should be used whenever you want to run fopen() to
         write or append to a stream.  Never call fopen() directory.
     (2) This handles the temp directory pathname conversion on Windows:
             /tmp  ==>  [Windows Temp directory]

Definition at line 1905 of file utils2.c.

References genPathname().

Referenced by boxaaWrite(), boxaWrite(), ccbaWrite(), dewarpaWrite(), dewarpWrite(), dpixWrite(), fileAppendString(), fpixWrite(), gplotWrite(), kernelWrite(), l_binaryWrite(), l_byteaWrite(), l_dnaaWrite(), l_dnaWrite(), lept_fopen(), numaaWrite(), numaWrite(), pixaaWrite(), pixacompWrite(), pixaWrite(), pixcmapWrite(), pixWriteJpeg(), pixWritePng(), ptaaWrite(), ptaWrite(), recogWrite(), sarrayAppend(), sarrayWrite(), selaWrite(), and selWrite().

◆ fopenWriteWinTempfile()

FILE* fopenWriteWinTempfile ( void  )

fopenWriteWinTempfile()

Returns
file stream, or NULL on error
Notes:
     (1) The Windows version of tmpfile() writes into the root
         C:\ directory, which requires admin privileges.  This
         function provides an alternative implementation.

Definition at line 1981 of file utils2.c.

References l_makeTempFilename(), and lept_free().

Referenced by boxaaWriteMem(), boxaWriteMem(), dewarpaWriteMem(), dewarpWriteMem(), dpixWriteMem(), fopenReadFromMemory(), fpixWriteMem(), l_dnaaWriteMem(), l_dnaWriteMem(), numaaWriteMem(), numaWriteMem(), pixaaWriteMem(), pixacompWriteMem(), pixaWriteMem(), pixaWriteMemMultipageTiff(), pixcmapWriteMem(), pixWriteMemJpeg(), pixWriteMemPam(), pixWriteMemPnm(), ptaaWriteMem(), ptaWriteMem(), recogWriteMem(), and sarrayWriteMem().

◆ genPathname()

char* genPathname ( const char *  dir,
const char *  fname 
)

genPathname()

Parameters
[in]dir[optional] directory or full path name, with or without the trailing '/'
[in]fname[optional] file name within a directory
Returns
pathname either a directory or full path, or NULL on error
Notes:
     (1) This function generates actual paths in the following ways:
           * from two sub-parts (e.g., a directory and a file name).
           * from a single path full path, placed in dir, with
             fname == NULL.
           * from the name of a file in the local directory placed in
             fname, with dir == NULL.
           * if in a "/tmp" directory and on Windows, the Windows
             temp directory is used.
     (2) On Windows, if the root of dir is '/tmp', this does a name
         translation:
            "/tmp"  ==>  [Temp] (Windows)
         where [Temp] is the Windows temp directory.
     (3) On unix, the TMPDIR variable is ignored.  No rewriting
         of temp directories is permitted.
     (4) There are four cases for the input:
         (a) dir is a directory and fname is defined: result is a full path
         (b) dir is a directory and fname is null: result is a directory
         (c) dir is a full path and fname is null: result is a full path
         (d) dir is null or an empty string: start in the current dir;
             result is a full path
     (5) In all cases, the resulting pathname is not terminated with a slash
     (6) The caller is responsible for freeing the returned pathname.

Definition at line 3068 of file utils2.c.

References convertSepCharsInPath(), stringCat(), stringCopy(), and stringNew().

Referenced by fopenReadStream(), fopenWriteStream(), getFilenamesInDirectory(), lept_cp(), lept_direxists(), lept_mv(), lept_rm(), lept_rm_match(), lept_rmdir(), makeTempDirname(), and openTiff().

◆ l_binaryCompare()

l_ok l_binaryCompare ( const l_uint8 *  data1,
size_t  size1,
const l_uint8 *  data2,
size_t  size2,
l_int32 *  psame 
)

l_binaryCompare()

Parameters
[in]data1
[in]size1of data1
[in]data2
[in]size2of data1
[out]psame(1 if the same, 0 if different)
Returns
0 if OK, 1 on error
Notes:
     (1) This can also be used to compare C strings str1 and str2.
         If the string lengths are not known, use strlen():
           l_binaryCompare((l_uint8 *)str1, strlen(str1),
                           (l_uint8 *)str2, strlen(str2));

Definition at line 1653 of file utils2.c.

Referenced by regTestCompareStrings().

◆ l_binaryCopy()

l_uint8* l_binaryCopy ( const l_uint8 *  datas,
size_t  size 
)

l_binaryCopy()

Parameters
[in]datas
[in]sizeof data array
Returns
datad on heap, or NULL on error
Notes:
     (1) We add 4 bytes to the zeroed output because in some cases
         (e.g., string handling) it is important to have the data
         be null terminated.  This guarantees that after the memcpy,
         the result is automatically null terminated.

Definition at line 1619 of file utils2.c.

Referenced by arrayReplaceEachSequence(), l_byteaCopyData(), pixcompCreateFromString(), and pixcompFastConvertToPdfData().

◆ l_binaryRead()

◆ l_binaryReadSelect()

l_uint8* l_binaryReadSelect ( const char *  filename,
size_t  start,
size_t  nbytes,
size_t *  pnread 
)

l_binaryReadSelect()

Parameters
[in]filename
[in]startfirst byte to read
[in]nbytesnumber of bytes to read; use 0 to read to end of file
[out]pnreadnumber of bytes actually read
Returns
data, or NULL on error
Notes:
     (1) The returned array is terminated with a null byte so that it can
         be used to read ascii data from a file into a proper C string.

Definition at line 1423 of file utils2.c.

References fopenReadStream(), and l_binaryReadSelectStream().

◆ l_binaryReadSelectStream()

l_uint8* l_binaryReadSelectStream ( FILE *  fp,
size_t  start,
size_t  nbytes,
size_t *  pnread 
)

l_binaryReadSelectStream()

Parameters
[in]fpfile stream
[in]startfirst byte to read
[in]nbytesnumber of bytes to read; use 0 to read to end of file
[out]pnreadnumber of bytes actually read
Returns
null-terminated array, or NULL on error; reading 0 bytes is not an error
Notes:
     (1) The returned array is terminated with a null byte so that it can
         be used to read ascii data from a file into a proper C string.
         If the file to be read is empty and start == 0, an array
         with a single null byte is returned.
     (2) Side effect: the stream pointer is re-positioned to the
         beginning of the file.

Definition at line 1466 of file utils2.c.

Referenced by l_binaryReadSelect(), and l_binaryReadStream().

◆ l_binaryReadStream()

l_uint8* l_binaryReadStream ( FILE *  fp,
size_t *  pnbytes 
)

l_binaryReadStream()

Parameters
[in]fpfile stream opened to read; can be stdin
[out]pnbytesnumber of bytes read
Returns
null-terminated array, or NULL on error; reading 0 bytes is not an error
Notes:
     (1) The returned array is terminated with a null byte so that it can
         be used to read ascii data from a file into a proper C string.
     (2) This can be used to capture data that is piped in via stdin,
         because it does not require seeking within the file.
     (3) For example, you can read an image from stdin into memory
         using shell redirection, with one of these shell commands:
cat <imagefile> | readprog
readprog < <imagefile>
where readprog is:
l_uint8 *data = l_binaryReadStream(stdin, &nbytes);
Pix *pix = pixReadMem(data, nbytes);
PIX * pixReadMem(const l_uint8 *data, size_t size)
pixReadMem()
Definition: readfile.c:822
l_uint8 * l_binaryReadStream(FILE *fp, size_t *pnbytes)
l_binaryReadStream()
Definition: utils2.c:1358

Definition at line 1358 of file utils2.c.

References L_ByteBuffer::array, bbufferCreate(), bbufferDestroy(), bbufferExtendArray(), l_binaryReadSelectStream(), L_ByteBuffer::n, and L_ByteBuffer::nalloc.

Referenced by boxaaWriteMem(), boxaWriteMem(), ccbaReadStream(), dewarpaWriteMem(), dewarpWriteMem(), dpixWriteMem(), fpixWriteMem(), l_binaryRead(), l_byteaInitFromStream(), l_dnaaWriteMem(), l_dnaWriteMem(), numaaWriteMem(), numaWriteMem(), pixaaWriteMem(), pixacompWriteMem(), pixaWriteMem(), pixcmapWriteMem(), pixReadStreamBmp(), pixReadStreamSpix(), pixWriteMemJpeg(), pixWriteMemPam(), pixWriteMemPnm(), ptaaWriteMem(), ptaWriteMem(), recogWriteMem(), and sarrayWriteMem().

◆ l_binaryWrite()

l_ok l_binaryWrite ( const char *  filename,
const char *  operation,
const void *  data,
size_t  nbytes 
)

◆ l_makeTempFilename()

char* l_makeTempFilename ( void  )

l_makeTempFilename()

Returns
fname : heap allocated filename; returns NULL on failure.
Notes:
     (1) On unix, this makes a filename of the form
              "/tmp/lept.XXXXXX",
         where each X is a random character.
     (2) On Windows, this makes a filename of the form
              "/[Temp]/lp.XXXXXX".
     (3) On all systems, this fails if the file is not writable.
     (4) Safest usage is to write to a subdirectory in debug code.
     (5) The returned filename must be freed by the caller, using lept_free.
     (6) The tail of the filename has a '.', so that cygwin interprets
         the file as having an extension.  Otherwise, cygwin assumes it
         is an executable and appends ".exe" to the filename.
     (7) On unix, whenever possible use tmpfile() instead.  tmpfile()
         hides the file name, returns a stream opened for write,
         and deletes the temp file when the stream is closed.

Definition at line 3286 of file utils2.c.

References makeTempDirname(), stringConcatNew(), and stringNew().

Referenced by fopenWriteWinTempfile().

◆ lept_calloc()

void* lept_calloc ( size_t  nmemb,
size_t  size 
)

lept_calloc()

Parameters
[in]nmembnumber of members
[in]sizeof each member
Returns
void ptr, or NULL on error
Notes:
     (1) For safety with Windows DLLs, this can be used in conjunction
         with lept_free() to avoid C-runtime boundary problems.
         Just use these two functions throughout your application.

Definition at line 2089 of file utils2.c.

◆ lept_cp()

l_int32 lept_cp ( const char *  srcfile,
const char *  newdir,
const char *  newtail,
char **  pnewpath 
)

lept_cp()

Parameters
[in]srcfile
[in]newdir[optional]; can be NULL
[in]newtail[optional]; can be NULL
[out]pnewpath[optional] of actual path; can be NULL
Returns
0 on success, non-zero on failure
Notes:
     (1) This copies srcfile to /tmp or to a subdirectory of /tmp.
     (2) srcfile can either be a full path or relative to the
         current directory.
     (3) newdir can either specify an existing subdirectory of /tmp,
         or can be NULL.  In the latter case, the file will be written
         into /tmp.
     (4) newtail can either specify a filename tail or, if NULL,
         the filename is taken from src-tail, the tail of srcfile.
     (5) For debugging, the computed newpath can be returned.  It must
         be freed by the caller.
     (6) Reminders:
         (a) specify files using unix pathnames
         (b) for Windows, translates
                /tmp  ==>  [Temp]
             where [Temp] is the Windows temp directory
     (7) Examples:
         * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
         * newdir = NULL,    newtail = abc     ==> /tmp/abc
         * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
         * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc

Definition at line 2579 of file utils2.c.

References fileCopy(), genPathname(), makeTempDirname(), pathJoin(), and splitPathAtDirectory().

◆ lept_direxists()

void lept_direxists ( const char *  dir,
l_int32 *  pexists 
)

lept_direxists()

Parameters
[in]dir
[out]pexists1 if it exists; 0 otherwise
Returns
void
Notes:
     (1) Always use unix pathname separators.
     (2) By calling genPathname(), if the pathname begins with "/tmp"
         this does an automatic directory translation on Windows
         to a path in the Windows [Temp] directory:
            "/tmp"  ==>  [Temp] (Windows)

Definition at line 2285 of file utils2.c.

References genPathname().

Referenced by lept_rmdir().

◆ lept_fclose()

l_ok lept_fclose ( FILE *  fp)

lept_fclose()

Parameters
[in]fpfile stream
Returns
0 if OK, 1 on error
Notes:
     (1) This should be used by any application that accepts
         a file handle generated by a leptonica Windows DLL.

Definition at line 2065 of file utils2.c.

◆ lept_fopen()

FILE* lept_fopen ( const char *  filename,
const char *  mode 
)

lept_fopen()

Parameters
[in]filename
[in]modesame as for fopen(); e.g., "rb"
Returns
stream or NULL on error
Notes:
     (1) This must be used by any application that passes
         a file handle to a leptonica Windows DLL.

Definition at line 2037 of file utils2.c.

References fopenReadStream(), fopenWriteStream(), and stringFindSubstr().

◆ lept_free()

void lept_free ( void *  ptr)

lept_free()

Parameters
[in]ptr
Notes:
     (1) This should be used by any application that accepts
         heap data allocated by a leptonica Windows DLL.

Definition at line 2110 of file utils2.c.

Referenced by fopenWriteWinTempfile().

◆ lept_mkdir()

◆ lept_mv()

l_int32 lept_mv ( const char *  srcfile,
const char *  newdir,
const char *  newtail,
char **  pnewpath 
)

lept_mv()

Parameters
[in]srcfile
[in]newdir[optional]; can be NULL
[in]newtail[optional]; can be NULL
[out]pnewpath[optional] of actual path; can be NULL
Returns
0 on success, non-zero on failure
Notes:
     (1) This moves srcfile to /tmp or to a subdirectory of /tmp.
     (2) srcfile can either be a full path or relative to the
         current directory.
     (3) newdir can either specify an existing subdirectory of /tmp
         or can be NULL.  In the latter case, the file will be written
         into /tmp.
     (4) newtail can either specify a filename tail or, if NULL,
         the filename is taken from src-tail, the tail of srcfile.
     (5) For debugging, the computed newpath can be returned.  It must
         be freed by the caller.
     (6) Reminders:
         (a) specify files using unix pathnames
         (b) for Windows, translates
                /tmp  ==>  [Temp]
             where [Temp] is the Windows temp directory
     (7) Examples:
         * newdir = NULL,    newtail = NULL    ==> /tmp/src-tail
         * newdir = NULL,    newtail = abc     ==> /tmp/abc
         * newdir = def/ghi, newtail = NULL    ==> /tmp/def/ghi/src-tail
         * newdir = def/ghi, newtail = abc     ==> /tmp/def/ghi/abc

Definition at line 2482 of file utils2.c.

References fileCopy(), genPathname(), makeTempDirname(), pathJoin(), and splitPathAtDirectory().

◆ lept_rm()

l_int32 lept_rm ( const char *  subdir,
const char *  tail 
)

lept_rm()

Parameters
[in]subdir[optional] subdir of '/tmp'; can be NULL
[in]tailfilename without the directory
Returns
0 on success, non-zero on failure
Notes:
     (1) By calling genPathname(), this does an automatic directory
         translation on Windows to a path in the Windows [Temp] directory:
            "/tmp/..."  ==>  [Temp]/... (Windows)

Definition at line 2391 of file utils2.c.

References genPathname(), lept_rmfile(), and makeTempDirname().

◆ lept_rm_match()

l_int32 lept_rm_match ( const char *  subdir,
const char *  substr 
)

lept_rm_match()

Parameters
[in]subdir[optional] if NULL, the removed files are in /tmp
[in]substr[optional] pattern to match in filename
Returns
0 on success, non-zero on failure
Notes:
     (1) This removes the matched files in /tmp or a subdirectory of /tmp.
         Use NULL for subdir if the files are in /tmp.
     (2) If substr == NULL, this removes all files in the directory.
         If substr == "" (empty), this removes no files.
         If both subdir == NULL and substr == NULL, this removes
         all files in /tmp.
     (3) Use unix pathname separators.
     (4) By calling genPathname(), if the pathname begins with "/tmp"
         this does an automatic directory translation on Windows
         to a path in the Windows [Temp] directory:
            "/tmp"  ==>  [Temp] (Windows)
     (5) Error conditions:
           * returns -1 if the directory is not found
           * returns the number of files (> 0) that it was unable to remove.

Definition at line 2343 of file utils2.c.

References genPathname(), getSortedPathnamesInDirectory(), L_NOCOPY, lept_rmfile(), makeTempDirname(), sarrayDestroy(), sarrayGetCount(), and sarrayGetString().

◆ lept_rmdir()

l_int32 lept_rmdir ( const char *  subdir)

lept_rmdir()

Parameters
[in]subdirof /tmp or its equivalent on Windows
Returns
0 on success, non-zero on failure
Notes:
     (1) subdir is a partial path that can consist of one or more
         directories.
     (2) This removes all files from the specified subdirectory of
         the root temp directory:
           /tmp    (unix)
           [Temp]  (Windows)
         and then removes the subdirectory.
     (3) The combination
           lept_rmdir(subdir);
           lept_mkdir(subdir);
         is guaranteed to give you an empty subdirectory.

Definition at line 2213 of file utils2.c.

References genPathname(), getFilenamesInDirectory(), L_NOCOPY, lept_direxists(), pathJoin(), sarrayDestroy(), sarrayGetCount(), and sarrayGetString().

Referenced by compareTilesByHisto(), convertToNUpFiles(), dewarpaApplyDisparity(), dewarpaShowArrays(), dewarpBuildLineModel(), dewarpBuildPageModel(), and dewarpShowResults().

◆ lept_rmfile()

l_int32 lept_rmfile ( const char *  filepath)

lept_rmfile()

Parameters
[in]filepathfull path to file including the directory
Returns
0 on success, non-zero on failure
Notes:
     (1) This removes the named file.
     (2) Use unix pathname separators.
     (3) There is no name translation.
     (4) Unlike the other lept_* functions in this section, this can remove
         any file -- it is not restricted to files that are in /tmp or a
         subdirectory of it.

Definition at line 2429 of file utils2.c.

Referenced by lept_rm(), and lept_rm_match().

◆ makeTempDirname()

l_ok makeTempDirname ( char *  result,
size_t  nbytes,
const char *  subdir 
)

makeTempDirname()

Parameters
[in]resultpreallocated on stack or heap and passed in
[in]nbytessize of result array, in bytes
[in]subdir[optional]; can be NULL or an empty string
Returns
0 if OK, 1 on error
Notes:
     (1) This generates the directory path for output temp files,
         written into result with unix separators.
     (2) Caller allocates result, large enough to hold the path,
         which is:
           /tmp/subdir       (unix)
           [Temp]/subdir     (Windows, macOS, iOS)
         where [Temp] is a path determined
  • on Windows: by GetTempPath()
  • on macOS, iOS: by confstr() (see man page) and subdir is in general a set of nested subdirectories: dir1/dir2/.../dirN which in use would not typically exceed 2 levels. (3) Usage example:
    char result[256];
    makeTempDirname(result, sizeof(result), "lept/golden");
    l_ok makeTempDirname(char *result, size_t nbytes, const char *subdir)
    makeTempDirname()
    Definition: utils2.c:3175

Definition at line 3175 of file utils2.c.

References genPathname(), pathJoin(), stringCopy(), and stringNew().

Referenced by l_makeTempFilename(), lept_cp(), lept_mv(), lept_rm(), and lept_rm_match().

◆ modifyTrailingSlash()

l_ok modifyTrailingSlash ( char *  path,
size_t  nbytes,
l_int32  flag 
)

modifyTrailingSlash()

Parameters
[in]pathpreallocated on stack or heap and passed in
[in]nbytessize of path array, in bytes
[in]flagL_ADD_TRAIL_SLASH or L_REMOVE_TRAIL_SLASH
Returns
0 if OK, 1 on error
Notes:
     (1) This carries out the requested action if necessary.

Definition at line 3238 of file utils2.c.

References L_ADD_TRAIL_SLASH, and L_REMOVE_TRAIL_SLASH.

◆ nbytesInFile()

size_t nbytesInFile ( const char *  filename)

nbytesInFile()

Parameters
[in]filename
Returns
nbytes in file; 0 on error

Definition at line 1559 of file utils2.c.

References fnbytesInFile(), and fopenReadStream().

Referenced by filesAreIdentical().

◆ pathJoin()

char* pathJoin ( const char *  dir,
const char *  fname 
)

pathJoin()

Parameters
[in]dir[optional] can be null
[in]fname[optional] can be null
Returns
specially concatenated path, or NULL on error
Notes:
     (1) Use unix-style pathname separators ('/').
     (2) fname can be the entire path, or part of the path containing
         at least one directory, or a tail without a directory, or NULL.
     (3) It produces a path that strips multiple slashes to a single
         slash, joins dir and fname by a slash, and has no trailing
         slashes (except in the cases where dir == "/" and
         fname == NULL, or v.v.).
     (4) If both dir and fname are null, produces an empty string.
     (5) Neither dir nor fname can begin with '..'.
     (6) The result is not canonicalized or tested for correctness:
         garbage in (e.g., /&%), garbage out.
     (7) Examples:
            //tmp// + //abc/  -->  /tmp/abc
            tmp/ + /abc/      -->  tmp/abc
            tmp/ + abc/       -->  tmp/abc
            /tmp/ + ///       -->  /tmp
            /tmp/ + NULL      -->  /tmp
            // + /abc//       -->  /abc
            // + NULL         -->  /
            NULL + /abc/def/  -->  /abc/def
            NULL + abc//      -->  abc
            NULL + //         -->  /
            NULL + NULL       -->  (empty string)
            "" + ""           -->  (empty string)
            "" + /            -->  /
            ".." + /etc/foo   -->  NULL
            /tmp + ".."       -->  NULL

Definition at line 2873 of file utils2.c.

References l_byteaAppendString(), l_byteaCreate(), sarrayCreate(), and stringNew().

Referenced by dewarpDebug(), getSortedPathnamesInDirectory(), lept_cp(), lept_mv(), lept_rmdir(), makeTempDirname(), and pixaSaveFont().

◆ reallocNew()

void* reallocNew ( void **  pindata,
size_t  oldsize,
size_t  newsize 
)

reallocNew()

Parameters
[in,out]pindatanulls indata before reallocing
[in]oldsizesize of input data to be copied, in bytes
[in]newsizesize of buffer to be reallocated in bytes
Returns
ptr to new data, or NULL on error

Action: !N.B. 3) and (4! 1 Allocates memory, initialized to 0 2 Copies as much of the input data as possible to the new block, truncating the copy if necessary 3 Frees the input data 4 Zeroes the input data ptr

Notes:
     (1) If newsize == 0, frees input data and nulls ptr
     (2) If input data is null, only callocs new memory
     (3) This differs from realloc in that it always allocates
         new memory (if newsize > 0) and initializes it to 0,
         it requires the amount of old data to be copied,
         and it takes the address of the input ptr and
         nulls the handle.

Definition at line 1262 of file utils2.c.

Referenced by bbufferExtendArray(), ccbaExtendArray(), dewarpaExtendArraysToSize(), l_dnaSetCount(), lheapExtendArray(), lqueueExtendArray(), lstackExtendArray(), numaSetCount(), ptraExtendArray(), and selaExtendArray().

◆ splitPathAtDirectory()

l_ok splitPathAtDirectory ( const char *  pathname,
char **  pdir,
char **  ptail 
)

splitPathAtDirectory()

Parameters
[in]pathnamefull path; can be a directory
[out]pdir[optional] root directory name of input path, including trailing '/'
[out]ptail[optional] path tail, which is either the file name within the root directory or the last sub-directory in the path
Returns
0 if OK, 1 on error
Notes:
     (1) If you only want the tail, input null for the root directory ptr.
     (2) If you only want the root directory name, input null for the
         tail ptr.
     (3) This function makes decisions based only on the lexical
         structure of the input.  Examples:
           /usr/tmp/abc.d  -->  dir: /usr/tmp/       tail: abc.d
           /usr/tmp/       -->  dir: /usr/tmp/       tail: [empty string]
           /usr/tmp        -->  dir: /usr/           tail: tmp
           abc.d           -->  dir: [empty string]  tail: abc.d
     (4  Consider the first example above: /usr/tmp/abc.d.
         Suppose you want the stem of the file, abc, without either
         the directory or the extension.  This can be extracted in two steps:
             splitPathAtDirectory("usr/tmp/abc.d", NULL, &tail);
                  [sets tail: "abc.d"]
             splitPathAtExtension(tail, &basename, NULL);
                  [sets basename: "abc"]
     (5) The input can have either forward (unix) or backward (win)
         slash separators.  The output has unix separators.
         Note that Win32 pathname functions generally accept both
         slash forms, but the Windows command line interpreter
         only accepts backward slashes, because forward slashes are
         used to demarcate switches (vs. dashes in unix).

Definition at line 2728 of file utils2.c.

References convertSepCharsInPath(), and stringNew().

Referenced by convertToNUpPixa(), extractNumberFromFilename(), fopenReadStream(), getRootNameFromArgv0(), l_genDescrString(), lept_cp(), lept_mv(), selReadFromColorImage(), and splitPathAtExtension().

◆ splitPathAtExtension()

l_ok splitPathAtExtension ( const char *  pathname,
char **  pbasename,
char **  pextension 
)

splitPathAtExtension()

Parameters
[in]pathnamefull path; can be a directory
[out]pbasename[optional] pathname not including the last dot and characters after that
[out]pextension[optional] path extension, which is the last dot and the characters after it. If there is no extension, it returns the empty string
Returns
0 if OK, 1 on error
Notes:
     (1) If you only want the extension, input null for the basename ptr.
     (2) If you only want the basename without extension, input null
         for the extension ptr.
     (3) This function makes decisions based only on the lexical
         structure of the input.  Examples:
           /usr/tmp/abc.jpg  -->  basename: /usr/tmp/abc    ext: .jpg
           /usr/tmp/.jpg     -->  basename: /usr/tmp/       ext: .jpg
           /usr/tmp.jpg/     -->  basename: /usr/tmp.jpg/   ext: [empty str]
           ./.jpg            -->  basename: ./              ext: .jpg
     (4) The input can have either forward (unix) or backward (win)
         slash separators.  The output has unix separators.
     (5) Note that basename, as used here, is different from the result
         of the unix program 'basename'.  Here, basename is the entire
         pathname up to a final extension and its preceding dot.

Definition at line 2796 of file utils2.c.

References splitPathAtDirectory(), stringJoin(), and stringNew().

Referenced by extractNumberFromFilename(), and selReadFromColorImage().

◆ stringCat()

l_int32 stringCat ( char *  dest,
size_t  size,
const char *  src 
)

stringCat()

Parameters
[in]destnull-terminated byte buffer
[in]sizesize of dest
[in]srcstring can be null or NULL-terminated string
Returns
number of bytes added to dest; -1 on error
Notes:
     (1) Alternative implementation of strncat, that checks the input,
         is easier to use (since the size of the dest buffer is specified
         rather than the number of bytes to copy), and does not complain
         if src is null.
     (2) Never writes past end of dest.
     (3) If there is not enough room to append the src, which is an error,
         it does nothing.
     (4) N.B. The order of 2nd and 3rd args is reversed from that in
         strncat, as in the Windows function strcat_s().

Definition at line 413 of file utils2.c.

References stringLength().

Referenced by appendSubdirs(), generateEscapeString(), genPathname(), l_binaryWrite(), and stringJoin().

◆ stringCheckForChars()

l_ok stringCheckForChars ( const char *  src,
const char *  chars,
l_int32 *  pfound 
)

stringCheckForChars()

Parameters
[in]srcinput string; can be of zero length
[in]charsstring of chars to be searched for in src
[out]pfound1 if any characters are found; 0 otherwise
Returns
0 if OK, 1 on error
Notes:
     (1) This can be used to sanitize an operation by checking for
         special characters that don't belong in a string.

Definition at line 765 of file utils2.c.

◆ stringConcatNew()

char* stringConcatNew ( const char *  first,
  ... 
)

stringConcatNew()

Parameters
[in]firstfirst string in list
[in]...NULL-terminated list of strings
Returns
result new string concatenating the input strings, or NULL if first == NULL
Notes:
     (1) The last arg in the list of strings must be NULL.
     (2) Caller must free the returned string.

Definition at line 459 of file utils2.c.

Referenced by l_makeTempFilename().

◆ stringCopy()

l_ok stringCopy ( char *  dest,
const char *  src,
l_int32  n 
)

stringCopy()

Parameters
[in]destexisting byte buffer
[in]srcstring [optional] can be null
[in]nmax number of characters to copy
Returns
0 if OK, 1 on error
Notes:
     (1) Relatively safe wrapper for strncpy, that checks the input,
         and does not complain if src is null or n < 1.
         If n < 1, this is a no-op.
     (2) dest needs to be at least n bytes in size.
     (3) We don't call strncpy() because valgrind complains about
         use of uninitialized values.

Definition at line 261 of file utils2.c.

Referenced by genPathname(), getImagelibVersions(), l_binaryWrite(), makeTempDirname(), stringCopySegment(), stringNew(), and strtokSafe().

◆ stringCopySegment()

char* stringCopySegment ( const char *  src,
l_int32  start,
l_int32  nbytes 
)

stringCopySegment()

Parameters
[in]srcstring
[in]startbyte position at start of segment
[in]nbytesnumber of bytes in the segment; use 0 to go to end
Returns
copy of segment, or NULL on error
Notes:
     (1) This is a variant of stringNew() that makes a new string
         from a segment of the input string.  The segment is specified
         by the starting position and the number of bytes.
     (2) The start location start must be within the string src.
     (3) The copy is truncated to the end of the source string.
         Use nbytes = 0 to copy to the end of src.

Definition at line 301 of file utils2.c.

References stringCopy().

◆ stringFindEachSubstr()

L_DNA* stringFindEachSubstr ( const char *  src,
const char *  sub 
)

stringFindEachSubstr()

Parameters
[in]srcinput string; can be of zero length
[in]subsubstring to be searched for
Returns
dna of offsets where the sequence is found, or NULL if none are found or on error
Notes:
     (1) This finds every non-overlapping occurrence in src of sub.
         After it finds each match, it moves forward in src by the length
         of sub before continuing the search.  So for example,
         if you search for the sequence 'aa' in the data 'baaabbb',
         you find one match at position 1.

Definition at line 966 of file utils2.c.

References arrayFindEachSequence().

◆ stringFindSubstr()

l_int32 stringFindSubstr ( const char *  src,
const char *  sub,
l_int32 *  ploc 
)

stringFindSubstr()

Parameters
[in]srcinput string; can be of zero length
[in]subsubstring to be searched for; must not be empty
[out]ploc[optional] location of substring in src
Returns
1 if found; 0 if not found or on error
Notes:
     (1) This is a wrapper around strstr().  It finds the first
         instance of sub in src.  If the substring is not found
         and the location is returned, it has the value -1.
     (2) Both src and sub must be defined, and sub must have
         length of at least 1.

Definition at line 995 of file utils2.c.

Referenced by getRootNameFromArgv0(), and lept_fopen().

◆ stringJoin()

char* stringJoin ( const char *  src1,
const char *  src2 
)

stringJoin()

Parameters
[in]src1[optional] string; can be null
[in]src2[optional] string; can be null
Returns
concatenated string, or NULL on error
Notes:
     (1) This is a safe version of strcat; it makes a new string.
     (2) It is not an error if either or both of the strings
         are empty, or if either or both of the pointers are null.

Definition at line 506 of file utils2.c.

References stringCat().

Referenced by pixAddText(), splitPathAtExtension(), and stringJoinIP().

◆ stringJoinIP()

l_ok stringJoinIP ( char **  psrc1,
const char *  src2 
)

stringJoinIP()

Parameters
[in,out]psrc1address of string src1; cannot be on the stack
[in]src2[optional] string; can be null
Returns
0 if OK, 1 on error
Notes:
     (1) This is a safe in-place version of strcat.  The contents of
         src1 is replaced by the concatenation of src1 and src2.
     (2) It is not an error if either or both of the strings
         are empty (""), or if the pointers to the strings (*psrc1, src2)
         are null.
     (3) src1 should be initialized to null or an empty string
         before the first call.  Use one of these:
             char *src1 = NULL;
             char *src1 = stringNew("");
         Then call with:
             stringJoinIP(&src1, src2);
     (4) This can also be implemented as a macro:
#define stringJoinIP(src1, src2) \
{tmpstr = stringJoin((src1),(src2)); \
LEPT_FREE(src1); \
(src1) = tmpstr;}
(5) Another function to consider for joining many strings is stringConcatNew().

Definition at line 559 of file utils2.c.

References stringJoin().

Referenced by getImagelibVersions(), l_genCaseString(), and showExtractNumbers().

◆ stringLength()

l_int32 stringLength ( const char *  src,
size_t  size 
)

stringLength()

Parameters
[in]srcstring can be null or NULL-terminated string
[in]sizesize of src buffer
Returns
length of src in bytes.
Notes:
     (1) Safe implementation of strlen that only checks size bytes
         for trailing NUL.
     (2) Valid returned string lengths are between 0 and size - 1.
         If size bytes are checked without finding a NUL byte, then
         an error is indicated by returning size.

Definition at line 373 of file utils2.c.

Referenced by stringCat().

◆ stringNew()

◆ stringRemoveChars()

char* stringRemoveChars ( const char *  src,
const char *  remchars 
)

stringRemoveChars()

Parameters
[in]srcinput string; can be of zero length
[in]remcharsstring of chars to be removed from src
Returns
dest string with specified chars removed, or NULL on error

Definition at line 798 of file utils2.c.

References stringNew().

Referenced by morphSequenceVerify().

◆ stringReplace()

l_ok stringReplace ( char **  pdest,
const char *  src 
)

stringReplace()

Parameters
[out]pdeststring copy
[in]src[optional] string; can be null
Returns
0 if OK; 1 on error
Notes:
     (1) Frees any existing dest string
     (2) Puts a copy of src string in the dest
     (3) If either or both strings are null, does something reasonable.

Definition at line 339 of file utils2.c.

References stringNew().

Referenced by pixAddText(), pixSetText(), and selSetName().

◆ stringReplaceEachSubstr()

char* stringReplaceEachSubstr ( const char *  src,
const char *  sub1,
const char *  sub2,
l_int32 *  pcount 
)

stringReplaceEachSubstr()

Parameters
[in]srcinput string; can be of zero length
[in]sub1substring to be replaced
[in]sub2substring to put in; can be ""
[out]pcount[optional] the number of times that sub1 is found in src; 0 if not found
Returns
dest string with substring replaced, or NULL if the substring not found or on error.
Notes:
     (1) This is a wrapper for simple string substitution that uses
         the more general function arrayReplaceEachSequence().
     (2) This finds every non-overlapping occurrence of sub1 in
         src, and replaces it with sub2.  By "non-overlapping"
         we mean that after it finds each match, it removes the
         matching characters, replaces with the substitution string
         (if not empty), and continues.  For example, if you replace
         'aa' by 'X' in 'baaabbb', you find one match at position 1
         and return 'bXabbb'.
     (3) To only remove each instance of sub1, use "" for sub2
     (4) Returns a copy of src if sub1 and sub2 are the same.
     (5) If the input src is binary data that can have null characters,
         use arrayReplaceEachSequence() directly.

Definition at line 852 of file utils2.c.

References arrayReplaceEachSequence().

◆ stringReplaceSubstr()

char* stringReplaceSubstr ( const char *  src,
const char *  sub1,
const char *  sub2,
l_int32 *  ploc,
l_int32 *  pfound 
)

stringReplaceSubstr()

Parameters
[in]srcinput string; can be of zero length
[in]sub1substring to be replaced
[in]sub2substring to put in; can be ""
[in,out]ploc[optional] input start location for search; returns the loc after replacement
[out]pfound[optional] 1 if sub1 is found; 0 otherwise
Returns
dest string with substring replaced, or NULL on error.
Notes:
     (1) Replaces the first instance.
     (2) To remove sub1 without replacement, use "" for sub2.
     (3) Returns a copy of src if either no instance of sub1 is found,
         or if sub1 and sub2 are the same.
     (4) If ploc == NULL, the search will start at the beginning of src.
         If ploc != NULL, *ploc must be initialized to the byte offset
         within src from which the search starts.  To search the
         string from the beginning, set loc = 0 and input &loc.
         After finding sub1 and replacing it with sub2, loc will be
         returned as the next position after sub2 in the output string.
     (5) Note that the output string also includes all the characters
         from the input string that occur after the single substitution.

Definition at line 907 of file utils2.c.

References stringNew().

◆ stringReverse()

char* stringReverse ( const char *  src)

stringReverse()

Parameters
[in]srcstring
Returns
dest newly-allocated reversed string

Definition at line 581 of file utils2.c.

Referenced by barcodeDecode2of5(), barcodeDecode39(), barcodeDecode93(), barcodeDecodeCodabar(), barcodeDecodeEan13(), barcodeDecodeI2of5(), barcodeDecodeUpca(), and barcodeVerifyFormat().

◆ stringSplitOnToken()

l_ok stringSplitOnToken ( char *  cstr,
const char *  seps,
char **  phead,
char **  ptail 
)

stringSplitOnToken()

Parameters
[in]cstrinput string to be split; not altered
[in]sepsa string of character separators
[out]pheadptr to copy of the input string, up to the first separator token encountered
[out]ptailptr to copy of the part of the input string starting with the first non-separator character that occurs after the first separator is found
Returns
0 if OK, 1 on error
Notes:
     (1) The input string is not altered; all split parts are new strings.
     (2) The split occurs around the first consecutive sequence of
         tokens encountered.
     (3) The head goes from the beginning of the string up to
         but not including the first token found.
     (4) The tail contains the second part of the string, starting
         with the first char in that part that is NOT a token.
     (5) If no separator token is found, 'head' contains a copy
         of the input string and 'tail' is null.

Definition at line 723 of file utils2.c.

References stringNew(), and strtokSafe().

◆ strtokSafe()

char* strtokSafe ( char *  cstr,
const char *  seps,
char **  psaveptr 
)

strtokSafe()

Parameters
[in]cstrinput string to be sequentially parsed; use NULL after the first call
[in]sepsa string of character separators
[out]psaveptrptr to the next char after the last encountered separator
Returns
substr a new string that is copied from the previous saveptr up to but not including the next separator character, or NULL if end of cstr.
Notes:
     (1) This is a thread-safe implementation of strtok.
     (2) It has the same interface as strtok_r.
     (3) It differs from strtok_r in usage in two respects:
         (a) the input string is not altered
         (b) each returned substring is newly allocated and must
             be freed after use.
     (4) Let me repeat that.  This is "safe" because the input
         string is not altered and because each returned string
         is newly allocated on the heap.
     (5) It is here because, surprisingly, some C libraries don't
         include strtok_r.
     (6) Important usage points:
         ~ Input the string to be parsed on the first invocation.
         ~ Then input NULL after that; the value returned in saveptr
           is used in all subsequent calls.
     (7) This is only slightly slower than strtok_r.

Definition at line 631 of file utils2.c.

References stringCopy().

Referenced by getImagelibVersions(), parseStringForNumbers(), and stringSplitOnToken().