![]() |
Leptonica
1.83.1
Image processing and image analysis suite
|
#include <string.h>
#include <dirent.h>
#include <sys/stat.h>
#include <limits.h>
#include <stdlib.h>
#include "allheaders.h"
#include "array_internal.h"
Go to the source code of this file.
Functions | |
static l_int32 | sarrayExtendArray (SARRAY *sa) |
SARRAY * | sarrayCreate (l_int32 n) |
SARRAY * | sarrayCreateInitialized (l_int32 n, const char *initstr) |
SARRAY * | sarrayCreateWordsFromString (const char *string) |
SARRAY * | sarrayCreateLinesFromString (const char *string, l_int32 blankflag) |
void | sarrayDestroy (SARRAY **psa) |
SARRAY * | sarrayCopy (SARRAY *sa) |
SARRAY * | sarrayClone (SARRAY *sa) |
l_ok | sarrayAddString (SARRAY *sa, const char *string, l_int32 copyflag) |
char * | sarrayRemoveString (SARRAY *sa, l_int32 index) |
l_ok | sarrayReplaceString (SARRAY *sa, l_int32 index, char *newstr, l_int32 copyflag) |
l_ok | sarrayClear (SARRAY *sa) |
l_int32 | sarrayGetCount (SARRAY *sa) |
char ** | sarrayGetArray (SARRAY *sa, l_int32 *pnalloc, l_int32 *pn) |
char * | sarrayGetString (SARRAY *sa, l_int32 index, l_int32 copyflag) |
char * | sarrayToString (SARRAY *sa, l_int32 addnlflag) |
char * | sarrayToStringRange (SARRAY *sa, l_int32 first, l_int32 nstrings, l_int32 addnlflag) |
SARRAY * | sarrayConcatUniformly (SARRAY *sa, l_int32 n, l_int32 addnlflag) |
l_ok | sarrayJoin (SARRAY *sa1, SARRAY *sa2) |
l_ok | sarrayAppendRange (SARRAY *sa1, SARRAY *sa2, l_int32 start, l_int32 end) |
l_ok | sarrayPadToSameSize (SARRAY *sa1, SARRAY *sa2, const char *padstring) |
SARRAY * | sarrayConvertWordsToLines (SARRAY *sa, l_int32 linesize) |
l_int32 | sarraySplitString (SARRAY *sa, const char *str, const char *separators) |
SARRAY * | sarraySelectBySubstring (SARRAY *sain, const char *substr) |
SARRAY * | sarraySelectRange (SARRAY *sain, l_int32 first, l_int32 last) |
l_int32 | sarrayParseRange (SARRAY *sa, l_int32 start, l_int32 *pactualstart, l_int32 *pend, l_int32 *pnewstart, const char *substr, l_int32 loc) |
SARRAY * | sarrayRead (const char *filename) |
SARRAY * | sarrayReadStream (FILE *fp) |
SARRAY * | sarrayReadMem (const l_uint8 *data, size_t size) |
l_ok | sarrayWrite (const char *filename, SARRAY *sa) |
l_ok | sarrayWriteStream (FILE *fp, SARRAY *sa) |
l_ok | sarrayWriteStderr (SARRAY *sa) |
l_ok | sarrayWriteMem (l_uint8 **pdata, size_t *psize, SARRAY *sa) |
l_ok | sarrayAppend (const char *filename, SARRAY *sa) |
SARRAY * | getNumberedPathnamesInDirectory (const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum) |
SARRAY * | getSortedPathnamesInDirectory (const char *dirname, const char *substr, l_int32 first, l_int32 nfiles) |
SARRAY * | convertSortedToNumberedPathnames (SARRAY *sa, l_int32 numpre, l_int32 numpost, l_int32 maxnum) |
SARRAY * | getFilenamesInDirectory (const char *dirname) |
Variables | |
static const l_uint32 | MaxPtrArraySize = 50000000 |
static const l_int32 | InitialPtrArraySize = 50 |
Create/Destroy/Copy SARRAY *sarrayCreate() SARRAY *sarrayCreateInitialized() SARRAY *sarrayCreateWordsFromString() SARRAY *sarrayCreateLinesFromString() void *sarrayDestroy() SARRAY *sarrayCopy() SARRAY *sarrayClone() Add/Remove string l_int32 sarrayAddString() static l_int32 sarrayExtendArray() char *sarrayRemoveString() l_int32 sarrayReplaceString() l_int32 sarrayClear() Accessors l_int32 sarrayGetCount() char **sarrayGetArray() char *sarrayGetString() Conversion back to string char *sarrayToString() char *sarrayToStringRange() Concatenate strings uniformly within the sarray SARRAY *sarrayConcatUniformly() Join 2 sarrays l_int32 sarrayJoin() l_int32 sarrayAppendRange() Pad an sarray to be the same size as another sarray l_int32 sarrayPadToSameSize() Convert word sarray to (formatted) line sarray SARRAY *sarrayConvertWordsToLines() Split string on separator list SARRAY *sarraySplitString() Filter sarray SARRAY *sarraySelectBySubstring() SARRAY *sarraySelectRange() l_int32 sarrayParseRange() Serialize for I/O SARRAY *sarrayRead() SARRAY *sarrayReadStream() SARRAY *sarrayReadMem() l_int32 sarrayWrite() l_int32 sarrayWriteStream() l_int32 sarrayWriteStderr() l_int32 sarrayWriteMem() l_int32 sarrayAppend() Directory filenames SARRAY *getNumberedPathnamesInDirectory() SARRAY *getSortedPathnamesInDirectory() SARRAY *convertSortedToNumberedPathnames() SARRAY *getFilenamesInDirectory() These functions are important for efficient manipulation of string data, and they have found widespread use in leptonica. For example: (1) to generate text files: e.g., PostScript and PDF wrappers around sets of images (2) to parse text files: e.g., extracting prototypes from the source to generate allheaders.h (3) to generate code for compilation: e.g., the fast dwa code for arbitrary structuring elements. Comments on usage: The user is responsible for correctly disposing of strings that have been extracted from sarrays. In the following, "str_not_owned" means the returned handle does not own the string, and "str_owned" means the returned handle owns the string.
Definition in file sarray1.c.
SARRAY* convertSortedToNumberedPathnames | ( | SARRAY * | sa, |
l_int32 | numpre, | ||
l_int32 | numpost, | ||
l_int32 | maxnum | ||
) |
convertSortedToNumberedPathnames()
[in] | sa | sorted pathnames including zero-padded integers |
[in] | numpre | number of characters in name before number |
[in] | numpost | number of characters in name after the number, up to a dot before an extension |
[in] | maxnum | only consider page numbers up to this value |
Notes: (1) Typically, numpre = numpost = 0; e.g., when the filename just has a number followed by an optional extension.
Definition at line 1797 of file sarray1.c.
References extractNumberFromFilename(), L_COPY, L_NOCOPY, sarrayCreate(), sarrayCreateInitialized(), sarrayGetCount(), sarrayGetString(), and sarrayReplaceString().
Referenced by getNumberedPathnamesInDirectory().
SARRAY* getFilenamesInDirectory | ( | const char * | dirname | ) |
[in] | dirname | directory name |
Notes: (1) The versions compiled under unix and cygwin use the POSIX C library commands for handling directories. For Windows, there is a separate implementation. (2) It returns an array of filename tails; i.e., only the part of the path after the last slash. (3) Use of the d_type field of dirent is not portable: "According to POSIX, the dirent structure contains a field char d_name[] of unspecified size, with at most NAME_MAX characters preceding the terminating null character. Use of other fields will harm the portability of your programs." (4) As a consequence of (3), we note several things: ~ MINGW doesn't have a d_type member. ~ Older versions of gcc (e.g., 2.95.3) return DT_UNKNOWN for d_type from all files. On these systems, this function will return directories (except for '.' and '..', which are eliminated using the d_name field).
Definition at line 1877 of file sarray1.c.
References genPathname(), L_COPY, sarrayAddString(), and sarrayCreate().
Referenced by getSortedPathnamesInDirectory(), and lept_rmdir().
SARRAY* getNumberedPathnamesInDirectory | ( | const char * | dirname, |
const char * | substr, | ||
l_int32 | numpre, | ||
l_int32 | numpost, | ||
l_int32 | maxnum | ||
) |
getNumberedPathnamesInDirectory()
[in] | dirname | directory name |
[in] | substr | [optional] substring filter on filenames; can be NULL |
[in] | numpre | number of characters in name before number |
[in] | numpost | number of characters in name after the number, up to a dot before an extension |
[in] | maxnum | only consider page numbers up to this value |
Notes: (1) Returns the full pathnames of the numbered filenames in the directory. The number in the filename is the index into the sarray. For indices for which there are no filenames, an empty string ("") is placed into the sarray. This makes reading numbered files very simple. For example, the image whose filename includes number N can be retrieved using pixReadIndexed(sa, N); (2) If substr is not NULL, only filenames that contain the substring can be included. If substr is NULL, all matching filenames are used. (3) If no numbered files are found, it returns an empty sarray, with no initialized strings. (4) It is assumed that the page number is contained within the basename (the filename without directory or extension). numpre is the number of characters in the basename preceding the actual page number; numpost is the number following the page number, up to either the end of the basename or a ".", whichever comes first. (5) This is useful when all filenames contain numbers that are not necessarily consecutive. 0-padding is not required. (6) To use a O(n) matching algorithm, the largest page number is found and two internal arrays of this size are created. This maximum is constrained not to exceed maxsum, to make sure that an unrealistically large number is not accidentally used to determine the array sizes.
Definition at line 1693 of file sarray1.c.
References convertSortedToNumberedPathnames(), getSortedPathnamesInDirectory(), sarrayCreate(), sarrayDestroy(), and sarrayGetCount().
Referenced by convertNumberedMasksToBoxaa(), and convertSegmentedFilesToPdf().
SARRAY* getSortedPathnamesInDirectory | ( | const char * | dirname, |
const char * | substr, | ||
l_int32 | first, | ||
l_int32 | nfiles | ||
) |
getSortedPathnamesInDirectory()
[in] | dirname | directory name |
[in] | substr | [optional] substring filter on filenames; can be NULL |
[in] | first | 0-based |
[in] | nfiles | use 0 for all to the end |
Notes: (1) Use substr to filter filenames in the directory. If substr == NULL, this takes all files. (2) The files in the directory, after optional filtering by the substring, are lexically sorted in increasing order. Use first and nfiles to select a contiguous set of files. (3) The full pathnames are returned for the requested sequence. If no files are found after filtering, returns an empty sarray.
Definition at line 1739 of file sarray1.c.
References getFilenamesInDirectory(), L_INSERT, L_NOCOPY, L_SORT_INCREASING, pathJoin(), sarrayAddString(), sarrayCreate(), sarrayDestroy(), sarrayGetCount(), sarrayGetString(), sarraySelectBySubstring(), and sarraySort().
Referenced by boxaaReadFromFiles(), concatenatePdf(), concatenatePdfToData(), convertFilesToPdf(), convertToNUpPixa(), convertUnscaledFilesToPdf(), getNumberedPathnamesInDirectory(), jbWordsInTextlines(), lept_rm_match(), pixaaReadFromFiles(), pixaReadFiles(), and writeMultipageTiff().
l_ok sarrayAddString | ( | SARRAY * | sa, |
const char * | string, | ||
l_int32 | copyflag | ||
) |
[in] | sa | string array |
[in] | string | string to be added |
[in] | copyflag | L_INSERT, L_NOCOPY or L_COPY |
Notes: (1) See usage comments at the top of this file. L_INSERT is equivalent to L_NOCOPY.
Definition at line 435 of file sarray1.c.
References Sarray::array, L_COPY, L_INSERT, L_NOCOPY, Sarray::n, Sarray::nalloc, sarrayExtendArray(), sarrayGetCount(), and stringNew().
Referenced by bmfGetLineStrings(), ccbaWriteSVGString(), convertToNUpPixa(), getFilenamesInDirectory(), getSortedPathnamesInDirectory(), l_genDataString(), numaConvertToSarray(), recogAddMissingClassStrings(), recogExtractNumbers(), recogGetClassIndex(), recogIsPaddingNeeded(), sarrayAppendRange(), sarrayConcatUniformly(), sarrayConvertWordsToLines(), sarrayCopy(), sarrayCreateInitialized(), sarrayCreateLinesFromString(), sarrayGenerateIntegers(), sarrayIntersectionByHmap(), sarrayJoin(), sarrayMakeWplsCode(), sarrayPadToSameSize(), sarrayRemoveDupsByHmap(), sarraySelectBySubstring(), sarraySelectRange(), sarraySortByIndex(), selaGetSelnames(), splitStringToParagraphs(), strcodeGenerate(), sudokuReadFile(), and transferRchToRcha().
l_ok sarrayAppend | ( | const char * | filename, |
SARRAY * | sa | ||
) |
[in] | filename | |
[in] | sa |
Definition at line 1627 of file sarray1.c.
References fopenWriteStream(), and sarrayWriteStream().
[in] | sa1 | to be added to |
[in] | sa2 | append specified range of strings in sa2 to sa1 |
[in] | start | index of first string of sa2 to append |
[in] | end | index of last string of sa2 to append; -1 to append to end of array |
Notes: (1) Copies of the strings in sarray2 are added to sarray1. (2) The [start ... end] range is truncated if necessary. (3) Use end == -1 to append to the end of sa2.
Definition at line 935 of file sarray1.c.
References L_COPY, L_NOCOPY, sarrayAddString(), sarrayGetCount(), and sarrayGetString().
l_ok sarrayClear | ( | SARRAY * | sa | ) |
[in] | sa | string array |
Definition at line 592 of file sarray1.c.
References Sarray::array, and Sarray::n.
[in] | sa | string array |
Definition at line 411 of file sarray1.c.
References Sarray::refcount.
Referenced by rchaExtract().
[in] | sa | string array |
[in] | n | number of strings in output sarray |
[in] | addnlflag | flag: 0 adds nothing to each substring 1 adds ' ' to each substring 2 adds ' ' to each substring 3 adds ',' to each substring |
Notes: (1) Divides sa into n essentially equal sets of strings, concatenates each set individually, and makes an output sarray with the n concatenations. n must not exceed the number of strings in sa. (2) If addnlflag != 0, adds '
', ' ' or ',' after each substring.
Definition at line 844 of file sarray1.c.
References L_INSERT, numaDestroy(), numaGetIValue(), numaGetUniformBinSizes(), sarrayAddString(), sarrayCreate(), sarrayGetCount(), and sarrayToStringRange().
[in] | sa | sa of individual words |
[in] | linesize | max num of chars in each line |
Notes: (1) This is useful for re-typesetting text to a specific maximum line length. The individual words in the input sarray are concatenated into textlines. An input word string of zero length is taken to be a paragraph separator. Each time such a string is found, the current line is ended and a new line is also produced that contains just the string of zero length "". When the output sarray of lines is eventually converted to a string with newlines typically appended to each line string, the empty strings are just converted to newlines, producing the visible paragraph separation. (2) What happens when a word is larger than linesize? We write it out as a single line anyway! Words preceding or following this long word are placed on lines preceding or following the line with the long word. Why this choice? Long "words" found in text documents are typically URLs, and it's often desirable not to put newlines in the middle of a URL. The text display program e.g., text editor will typically wrap the long "word" to fit in the window.
Definition at line 1042 of file sarray1.c.
References L_COPY, L_INSERT, L_NOCOPY, sarrayAddString(), sarrayCreate(), sarrayDestroy(), sarrayGetCount(), sarrayGetString(), and sarrayToString().
[in] | sa | string array |
Definition at line 386 of file sarray1.c.
References Sarray::array, L_COPY, Sarray::n, Sarray::nalloc, sarrayAddString(), and sarrayCreate().
Referenced by sarraySelectBySubstring(), sarraySort(), sarrayUnionByAset(), and sarrayUnionByHmap().
SARRAY* sarrayCreate | ( | l_int32 | n | ) |
[in] | n | size of string ptr array to be alloc'd; use 0 for default |
Definition at line 169 of file sarray1.c.
Referenced by bmfGetLineStrings(), ccbaWriteSVGString(), convertSortedToNumberedPathnames(), convertToNUpPixa(), getFilenamesInDirectory(), getNumberedPathnamesInDirectory(), getSortedPathnamesInDirectory(), l_genDataString(), lept_mkdir(), numaConvertToSarray(), numaCreateFromString(), pathJoin(), pixColorMorphSequence(), pixGrayMorphSequence(), pixMorphCompSequence(), pixMorphCompSequenceDwa(), pixMorphSequence(), pixMorphSequenceDwa(), pixReadBarcodes(), rchaCreate(), recogAddMissingClassStrings(), recogExtractNumbers(), sarrayConcatUniformly(), sarrayConvertWordsToLines(), sarrayCopy(), sarrayCreateInitialized(), sarrayCreateLinesFromString(), sarrayCreateWordsFromString(), sarrayGenerateIntegers(), sarrayIntersectionByAset(), sarrayIntersectionByHmap(), sarrayLookupCSKV(), sarrayMakeInnerLoopDWACode(), sarrayMakeWplsCode(), sarrayRemoveDupsByHmap(), sarraySelectBySubstring(), sarraySelectRange(), sarraySortByIndex(), selaGetSelnames(), splitStringToParagraphs(), strcodeCreate(), and sudokuReadFile().
SARRAY* sarrayCreateInitialized | ( | l_int32 | n, |
const char * | initstr | ||
) |
[in] | n | size of string ptr array to be alloc'd |
[in] | initstr | string to be initialized on the full array |
Definition at line 197 of file sarray1.c.
References L_COPY, sarrayAddString(), and sarrayCreate().
Referenced by convertSortedToNumberedPathnames().
SARRAY* sarrayCreateLinesFromString | ( | const char * | string, |
l_int32 | blankflag | ||
) |
[in] | string | |
[in] | blankflag | 0 to exclude blank lines; 1 to include |
Notes: (1) This finds the number of line substrings, each of which ends with a newline, and puts a copy of each substring in a new sarray. (2) The newline characters are removed from each substring.
Definition at line 276 of file sarray1.c.
References L_INSERT, sarrayAddString(), sarrayCreate(), sarrayDestroy(), and stringNew().
Referenced by fileSplitLinesUniform(), kernelCreateFromFile(), parseTrailerPdf(), selaCreateFromFile(), splitStringToParagraphs(), strcodeCreateFromFile(), and sudokuReadFile().
SARRAY* sarrayCreateWordsFromString | ( | const char * | string | ) |
[in] | string |
Notes: (1) This finds the number of word substrings, creates an sarray of this size, and puts copies of each substring into the sarray.
Definition at line 228 of file sarray1.c.
References sarrayCreate().
Referenced by bmfGetLineStrings(), l_getIndexFromFile(), and sudokuReadFile().
void sarrayDestroy | ( | SARRAY ** | psa | ) |
[in,out] | psa | will be set to null before returning |
Notes: (1) Decrements the ref count and, if 0, destroys the sarray. (2) Always nulls the input ptr.
Definition at line 353 of file sarray1.c.
References Sarray::array, Sarray::n, and Sarray::refcount.
Referenced by bmfGetLineStrings(), boxaaReadFromFiles(), ccbaWriteSVGString(), concatenatePdf(), concatenatePdfToData(), convertFilesToPdf(), convertNumberedMasksToBoxaa(), convertSegmentedFilesToPdf(), convertToNUpPixa(), convertUnscaledFilesToPdf(), fileSplitLinesUniform(), getNumberedPathnamesInDirectory(), getSortedPathnamesInDirectory(), gplotDestroy(), kernelCreateFromFile(), l_genDataString(), l_getIndexFromFile(), lept_rm_match(), lept_rmdir(), parseTrailerPdf(), pixaaReadFromFiles(), pixaCompareInPdf(), pixaDisplayPairTiledInColumns(), pixaReadFiles(), rchaDestroy(), recogDestroy(), recogExtractNumbers(), recogPadDigitTrainingSet(), sarrayConvertWordsToLines(), sarrayCreateLinesFromString(), sarrayIntersectionByHmap(), sarrayUnionByAset(), sarrayUnionByHmap(), selaCreateFromFile(), splitStringToParagraphs(), strcodeCreateFromFile(), strcodeDestroy(), sudokuReadFile(), and writeMultipageTiff().
|
static |
[in] | sa | string array |
Notes: (1) Doubles the size of the string ptr array. (2) The max number of strings is 50M.
Definition at line 476 of file sarray1.c.
References Sarray::nalloc.
Referenced by sarrayAddString().
char** sarrayGetArray | ( | SARRAY * | sa, |
l_int32 * | pnalloc, | ||
l_int32 * | pn | ||
) |
[in] | sa | string array |
[out] | pnalloc | [optional] number allocated string ptrs |
[out] | pn | [optional] number allocated strings |
Notes: (1) Caution: the returned array is not a copy, so caller must not destroy it!
Definition at line 640 of file sarray1.c.
References Sarray::array, Sarray::n, and Sarray::nalloc.
Referenced by sarrayRemoveString().
l_int32 sarrayGetCount | ( | SARRAY * | sa | ) |
[in] | sa | string array |
Definition at line 617 of file sarray1.c.
References Sarray::n.
Referenced by bmfGetLineStrings(), bmfGetWordWidths(), boxaaReadFromFiles(), convertNumberedMasksToBoxaa(), convertSegmentedFilesToPdf(), convertSortedToNumberedPathnames(), convertToNUpPixa(), fileSplitLinesUniform(), getNumberedPathnamesInDirectory(), getSortedPathnamesInDirectory(), gplotGenDataFiles(), jbWordsInTextlines(), kernelCreateFromFile(), l_hmapCreateFromSarray(), lept_rm_match(), lept_rmdir(), morphSequenceVerify(), pixaAddTextlines(), pixaaReadFromFiles(), pixaConvertToNUpPixa(), pixaReadFilesSA(), pixaSetText(), pixReadIndexed(), recogAddDigitPadTemplates(), recogExtractNumbers(), saConcatenatePdfToData(), saConvertFilesToPdfData(), saConvertUnscaledFilesToPdfData(), sarrayAddString(), sarrayAppendRange(), sarrayConcatUniformly(), sarrayConvertWordsToLines(), sarrayIntersectionByAset(), sarrayIntersectionByHmap(), sarrayJoin(), sarrayLookupCSKV(), sarrayPadToSameSize(), sarrayParseRange(), sarrayReplaceString(), sarraySelectBySubstring(), sarraySelectRange(), sarraySort(), sarraySortByIndex(), sarrayToStringRange(), sarrayWriteStderr(), sarrayWriteStream(), selaCreateFromFile(), selCreateFromSArray(), showExtractNumbers(), splitStringToParagraphs(), strcodeCreateFromFile(), sudokuReadFile(), writeCustomTiffTags(), and writeMultipageTiffSA().
char* sarrayGetString | ( | SARRAY * | sa, |
l_int32 | index, | ||
l_int32 | copyflag | ||
) |
[in] | sa | string array |
[in] | index | to the index-th string |
[in] | copyflag | L_NOCOPY or L_COPY |
Notes: (1) See usage comments at the top of this file. (2) To get a pointer to the string itself, use L_NOCOPY. To get a copy of the string, use L_COPY.
Definition at line 673 of file sarray1.c.
References Sarray::array, L_COPY, L_NOCOPY, Sarray::n, and stringNew().
Referenced by bmfGetWordWidths(), boxaaReadFromFiles(), convertNumberedMasksToBoxaa(), convertSegmentedFilesToPdf(), convertSortedToNumberedPathnames(), convertToNUpPixa(), getSortedPathnamesInDirectory(), gplotGenDataFiles(), kernelCreateFromFile(), l_getIndexFromFile(), l_hmapCreateFromSarray(), lept_rm_match(), lept_rmdir(), morphSequenceVerify(), parseTrailerPdf(), pixaAddTextlines(), pixaaReadFromFiles(), pixaConvertToNUpPixa(), pixaReadFilesSA(), pixaSetText(), pixReadIndexed(), ptraConcatenatePdfToData(), recogAddAllSamples(), recogAddCharstrLabels(), recogAddDigitPadTemplates(), recogAddMissingClassStrings(), recogExtractNumbers(), recogGetClassString(), recogIsPaddingNeeded(), saConcatenatePdfToData(), saConvertFilesToPdfData(), saConvertUnscaledFilesToPdfData(), sarrayAppendRange(), sarrayConvertWordsToLines(), sarrayIntersectionByHmap(), sarrayJoin(), sarrayLookupCSKV(), sarrayParseRange(), sarrayRemoveDupsByHmap(), sarraySelectBySubstring(), sarraySelectRange(), sarraySortByIndex(), sarrayToStringRange(), selaCreateFromColorPixa(), selaCreateFromFile(), selCreateFromSArray(), showExtractNumbers(), splitStringToParagraphs(), strcodeCreateFromFile(), sudokuReadFile(), writeCustomTiffTags(), and writeMultipageTiffSA().
[in] | sa1 | to be added to |
[in] | sa2 | append to sa1 |
Notes: (1) Copies of the strings in sarray2 are added to sarray1.
Definition at line 894 of file sarray1.c.
References L_COPY, L_NOCOPY, sarrayAddString(), sarrayGetCount(), and sarrayGetString().
Referenced by sarrayUnionByAset(), and sarrayUnionByHmap().
[in] | sa1,sa2 | |
[in] | padstring |
Notes: (1) If two sarrays have different size, this adds enough instances of padstring to the smaller so that they are the same size. It is useful when two or more sarrays are being sequenced in parallel, and it is necessary to find a valid string at each index.
Definition at line 985 of file sarray1.c.
References L_COPY, sarrayAddString(), and sarrayGetCount().
l_int32 sarrayParseRange | ( | SARRAY * | sa, |
l_int32 | start, | ||
l_int32 * | pactualstart, | ||
l_int32 * | pend, | ||
l_int32 * | pnewstart, | ||
const char * | substr, | ||
l_int32 | loc | ||
) |
[in] | sa | input sarray |
[in] | start | index to start range search |
[out] | pactualstart | index of actual start; may be > 'start' |
[out] | pend | index of end |
[out] | pnewstart | index of start of next range |
[in] | substr | substring for matching at beginning of string |
[in] | loc | byte offset within the string for the pattern; use -1 if the location does not matter. |
Notes: (1) This finds the range of the next set of strings in SA, beginning the search at 'start', that does NOT have the substring 'substr' either at the indicated location in the string or anywhere in the string. The input variable 'loc' is the specified offset within the string; use -1 to indicate 'anywhere in the string'. (2) Always check the return value to verify that a valid range was found. (3) If a valid range is not found, the values of actstart, end and newstart are all set to the size of sa. (4) If this is the last valid range, newstart returns the value n. In use, this should be tested before calling the function. (5) Usage example. To find all the valid ranges in a file where the invalid lines begin with two dashes, copy each line in the file to a string in an sarray, and do: start = 0; while (!sarrayParseRange(sa, start, &actstart, &end, &start, "--", 0)) lept_stderr("start = %d, end = %d\n", actstart, end);
Definition at line 1267 of file sarray1.c.
References arrayFindSequence(), L_NOCOPY, sarrayGetCount(), and sarrayGetString().
SARRAY* sarrayRead | ( | const char * | filename | ) |
[in] | filename |
Definition at line 1350 of file sarray1.c.
References fopenReadStream(), and sarrayReadStream().
SARRAY* sarrayReadMem | ( | const l_uint8 * | data, |
size_t | size | ||
) |
[in] | data | serialization in ascii |
[in] | size | of data; can use strlen to get it |
Definition at line 1454 of file sarray1.c.
References fopenReadFromMemory(), and sarrayReadStream().
SARRAY* sarrayReadStream | ( | FILE * | fp | ) |
[in] | fp | file stream |
Notes: (1) We store the size of each string along with the string. The limit on the number of strings is 50M. The limit on the size of any string is 2^30 bytes. (2) This allows a string to have embedded newlines. By reading the entire string, as determined by its size, we are not affected by any number of embedded newlines. (3) It is OK for the sarray to be empty.
Definition at line 1386 of file sarray1.c.
References SARRAY_VERSION_NUMBER.
Referenced by sarrayRead(), and sarrayReadMem().
char* sarrayRemoveString | ( | SARRAY * | sa, |
l_int32 | index | ||
) |
[in] | sa | string array |
[in] | index | of string within sarray |
Definition at line 508 of file sarray1.c.
References Sarray::n, and sarrayGetArray().
l_ok sarrayReplaceString | ( | SARRAY * | sa, |
l_int32 | index, | ||
char * | newstr, | ||
l_int32 | copyflag | ||
) |
[in] | sa | string array |
[in] | index | of string within sarray to be replaced |
[in] | newstr | string to replace existing one |
[in] | copyflag | L_INSERT, L_COPY |
Notes: (1) This destroys an existing string and replaces it with the new string or a copy of it. (2) By design, an sarray is always compacted, so there are never any holes (null ptrs) in the ptr array up to the current count.
Definition at line 557 of file sarray1.c.
References Sarray::array, L_COPY, L_INSERT, sarrayGetCount(), and stringNew().
Referenced by convertSortedToNumberedPathnames().
[in] | sain | input sarray |
[in] | substr | [optional] substring for matching; can be NULL |
Notes: (1) This selects all strings in sain that have substr as a substring. Note that we can't use strncmp() because we're looking for a match to the substring anywhere within each filename. (2) If substr == NULL, returns a copy of the sarray.
Definition at line 1156 of file sarray1.c.
References arrayFindSequence(), L_COPY, L_NOCOPY, sarrayAddString(), sarrayCopy(), sarrayCreate(), sarrayGetCount(), and sarrayGetString().
Referenced by getSortedPathnamesInDirectory().
[in] | sain | input sarray |
[in] | first | index of first string to be selected |
[in] | last | index of last string to be selected; use 0 to go to the end of the sarray |
Notes: (1) This makes saout consisting of copies of all strings in sain in the index set [first ... last]. Use last == 0 to get all strings from first to the last string in the sarray.
Definition at line 1200 of file sarray1.c.
References L_COPY, L_INSERT, sarrayAddString(), sarrayCreate(), sarrayGetCount(), and sarrayGetString().
char* sarrayToString | ( | SARRAY * | sa, |
l_int32 | addnlflag | ||
) |
[in] | sa | string array |
[in] | addnlflag | flag: 0 adds nothing to each substring 1 adds ' ' to each substring 2 adds ' ' to each substring 3 adds ',' to each substring |
Notes: (1) Concatenates all the strings in the sarray, preserving all white space. (2) If addnlflag != 0, adds '
', ' ' or ',' after each substring. (3) This function was NOT implemented as: for (i = 0; i < n; i++) strcat(dest, sarrayGetString(sa, i, L_NOCOPY)); Do you see why?
Definition at line 716 of file sarray1.c.
References sarrayToStringRange().
Referenced by ccbaWriteSVGString(), l_genDataString(), recogExtractNumbers(), sarrayConvertWordsToLines(), and splitStringToParagraphs().
char* sarrayToStringRange | ( | SARRAY * | sa, |
l_int32 | first, | ||
l_int32 | nstrings, | ||
l_int32 | addnlflag | ||
) |
[in] | sa | string array |
[in] | first | index of first string to use; starts with 0 |
[in] | nstrings | number of strings to append into the result; use 0 to append to the end of the sarray |
[in] | addnlflag | flag: 0 adds nothing to each substring 1 adds ' ' to each substring 2 adds ' ' to each substring 3 adds ',' to each substring |
Notes: (1) Concatenates the specified strings in the sarray, preserving all white space. (2) If addnlflag != 0, adds '
', ' ' or ',' after each substring. (3) If the sarray is empty, this returns a string with just the character corresponding to addnlflag.
Definition at line 749 of file sarray1.c.
References L_NOCOPY, sarrayGetCount(), sarrayGetString(), and stringNew().
Referenced by bmfGetLineStrings(), fileSplitLinesUniform(), sarrayConcatUniformly(), and sarrayToString().
l_ok sarrayWrite | ( | const char * | filename, |
SARRAY * | sa | ||
) |
[in] | filename | |
[in] | sa | string array |
Definition at line 1480 of file sarray1.c.
References fopenWriteStream(), and sarrayWriteStream().
l_ok sarrayWriteMem | ( | l_uint8 ** | pdata, |
size_t * | psize, | ||
SARRAY * | sa | ||
) |
[out] | pdata | data of serialized sarray; ascii |
[out] | psize | size of returned data |
[in] | sa |
Notes: (1) Serializes a sarray in memory and puts the result in a buffer.
Definition at line 1578 of file sarray1.c.
References fopenWriteWinTempfile(), l_binaryReadStream(), and sarrayWriteStream().
l_ok sarrayWriteStderr | ( | SARRAY * | sa | ) |
[in] | sa | string array |
Definition at line 1545 of file sarray1.c.
References Sarray::array, lept_stderr(), SARRAY_VERSION_NUMBER, and sarrayGetCount().
Referenced by parseTrailerPdf(), and sarrayWriteStream().
l_ok sarrayWriteStream | ( | FILE * | fp, |
SARRAY * | sa | ||
) |
[in] | fp | file stream; use NULL to write to stderr |
[in] | sa | string array |
Notes: (1) This appends a '
' to each string, which is stripped off by sarrayReadStream().
Definition at line 1515 of file sarray1.c.
References Sarray::array, SARRAY_VERSION_NUMBER, sarrayGetCount(), and sarrayWriteStderr().
Referenced by sarrayAppend(), sarrayWrite(), and sarrayWriteMem().