139 #include <config_auto.h>
145 #include <sys/stat.h>
149 #include "allheaders.h"
152 static const l_uint32 MaxPtrArraySize = 50000000;
173 if (n <= 0 || n > MaxPtrArraySize)
177 if ((sa->
array = (
char **)LEPT_CALLOC(n,
sizeof(
char *))) == NULL) {
179 return (
SARRAY *)ERROR_PTR(
"ptr array not made", __func__, NULL);
204 return (
SARRAY *)ERROR_PTR(
"n must be > 0", __func__, NULL);
206 return (
SARRAY *)ERROR_PTR(
"initstr not defined", __func__, NULL);
209 for (i = 0; i < n; i++)
230 char separators[] =
" \n\t";
231 l_int32 i, nsub, size, inword;
235 return (
SARRAY *)ERROR_PTR(
"textstr not defined", __func__, NULL);
238 size = strlen(
string);
241 for (i = 0; i < size; i++) {
242 if (inword == FALSE &&
243 (
string[i] !=
' ' &&
string[i] !=
'\t' &&
string[i] !=
'\n')) {
246 }
else if (inword == TRUE &&
247 (
string[i] ==
' ' ||
string[i] ==
'\t' ||
string[i] ==
'\n')) {
253 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
254 sarraySplitString(sa,
string, separators);
279 l_int32 i, nsub, size, startptr;
280 char *cstring, *substring;
284 return (
SARRAY *)ERROR_PTR(
"textstr not defined", __func__, NULL);
287 size = strlen(
string);
289 for (i = 0; i < size; i++) {
290 if (
string[i] ==
'\n')
295 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
299 if ((cstring =
stringNew(
string)) == NULL) {
301 return (
SARRAY *)ERROR_PTR(
"cstring not made", __func__, NULL);
305 for (i = 0; i < size; i++) {
306 if (cstring[i] ==
'\n') {
308 if (i > 0 && cstring[i - 1] ==
'\r')
309 cstring[i - 1] =
'\0';
310 if ((substring =
stringNew(cstring + startptr)) == NULL) {
313 return (
SARRAY *)ERROR_PTR(
"substring not made",
321 if (startptr < size) {
322 if ((substring =
stringNew(cstring + startptr)) == NULL) {
325 return (
SARRAY *)ERROR_PTR(
"substring not made",
333 sarraySplitString(sa,
string,
"\r\n");
359 L_WARNING(
"ptr address is NULL!\n", __func__);
362 if ((sa = *psa) == NULL)
367 for (i = 0; i < sa->
n; i++) {
369 LEPT_FREE(sa->
array[i]);
371 LEPT_FREE(sa->
array);
392 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
395 return (
SARRAY *)ERROR_PTR(
"csa not made", __func__, NULL);
397 for (i = 0; i < sa->
n; i++)
414 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
442 return ERROR_INT(
"sa not defined", __func__, 1);
444 return ERROR_INT(
"string not defined", __func__, 1);
446 return ERROR_INT(
"invalid copyflag", __func__, 1);
451 return ERROR_INT(
"extension failed", __func__, 1);
457 sa->
array[n] = (
char *)
string;
478 size_t oldsize, newsize;
481 return ERROR_INT(
"sa not defined", __func__, 1);
482 if (sa->
nalloc >= MaxPtrArraySize)
483 return ERROR_INT(
"sa at maximum ptr size; can't extend", __func__, 1);
484 oldsize = sa->
nalloc *
sizeof(
char *);
485 if (sa->
nalloc > MaxPtrArraySize / 2) {
486 newsize = MaxPtrArraySize *
sizeof(
char *);
487 sa->
nalloc = MaxPtrArraySize;
489 newsize = 2 * oldsize;
493 oldsize, newsize)) == NULL)
494 return ERROR_INT(
"new ptr array not returned", __func__, 1);
513 l_int32 i, n, nalloc;
516 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
519 return (
char *)ERROR_PTR(
"array not returned", __func__, NULL);
521 if (index < 0 || index >= n)
522 return (
char *)ERROR_PTR(
"array index out of bounds", __func__, NULL);
524 string = array[index];
530 for (i = index; i < n - 1; i++)
531 array[i] = array[i + 1];
566 return ERROR_INT(
"sa not defined", __func__, 1);
568 if (index < 0 || index >= n)
569 return ERROR_INT(
"array index out of bounds", __func__, 1);
571 return ERROR_INT(
"newstr not defined", __func__, 1);
573 return ERROR_INT(
"invalid copyflag", __func__, 1);
575 LEPT_FREE(sa->
array[index]);
580 sa->
array[index] = str;
597 return ERROR_INT(
"sa not defined", __func__, 1);
598 for (i = 0; i < sa->
n; i++) {
599 LEPT_FREE(sa->
array[i]);
620 return ERROR_INT(
"sa not defined", __func__, 0);
647 return (
char **)ERROR_PTR(
"sa not defined", __func__, NULL);
650 if (pnalloc) *pnalloc = sa->
nalloc;
678 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
679 if (index < 0 || index >= sa->
n)
680 return (
char *)ERROR_PTR(
"index not valid", __func__, NULL);
682 return (
char *)ERROR_PTR(
"invalid copyflag", __func__, NULL);
685 return sa->
array[index];
720 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
754 char *dest, *src, *str;
755 l_int32 n, i, last, size, index, len;
758 return (
char *)ERROR_PTR(
"sa not defined", __func__, NULL);
759 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2 && addnlflag != 3)
760 return (
char *)ERROR_PTR(
"invalid addnlflag", __func__, NULL);
776 return (
char *)ERROR_PTR(
"first not valid", __func__, NULL);
781 if (first < 0 || first >= n)
782 return (
char *)ERROR_PTR(
"first not valid", __func__, NULL);
783 if (nstrings == 0 || (nstrings > n - first))
784 nstrings = n - first;
785 last = first + nstrings - 1;
789 for (i = first; i <= last; i++) {
791 return (
char *)ERROR_PTR(
"str not found", __func__, NULL);
792 size += strlen(str) + 2;
794 if ((dest = (
char *)LEPT_CALLOC(size + 1,
sizeof(
char))) == NULL)
795 return (
char *)ERROR_PTR(
"dest not made", __func__, NULL);
799 for (i = first; i <= last; i++) {
802 memcpy(dest + index, src, len);
804 if (addnlflag == 1) {
807 }
else if (addnlflag == 2) {
810 }
else if (addnlflag == 3) {
848 l_int32 i, first, ntot, nstr;
854 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
857 return (
SARRAY *)ERROR_PTR(
"n must be >= 1", __func__, NULL);
859 L_ERROR(
"n = %d > ntot = %d\n", __func__, n, ntot);
862 if (addnlflag != 0 && addnlflag != 1 && addnlflag != 2 && addnlflag != 3)
863 return (
SARRAY *)ERROR_PTR(
"invalid addnlflag", __func__, NULL);
867 for (i = 0, first = 0; i < n; i++) {
901 return ERROR_INT(
"sa1 not defined", __func__, 1);
903 return ERROR_INT(
"sa2 not defined", __func__, 1);
906 for (i = 0; i < n; i++) {
909 L_ERROR(
"failed to add string at i = %d\n", __func__, i);
944 return ERROR_INT(
"sa1 not defined", __func__, 1);
946 return ERROR_INT(
"sa2 not defined", __func__, 1);
951 if (end < 0 || end >= n)
954 return ERROR_INT(
"start > end", __func__, 1);
956 for (i = start; i <= end; i++) {
987 const char *padstring)
992 return ERROR_INT(
"both sa1 and sa2 not defined", __func__, 1);
997 for (i = n1; i < n2; i++)
999 }
else if (n1 > n2) {
1000 for (i = n2; i < n1; i++)
1046 char emptystring[] =
"";
1047 l_int32 n, i, len, totlen;
1051 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
1057 for (i = 0; i < n; i++) {
1070 }
else if (totlen == 0 && len + 1 > linesize) {
1072 }
else if (totlen + len + 1 > linesize) {
1111 sarraySplitString(
SARRAY *sa,
1113 const char *separators)
1115 char *cstr, *substr, *saveptr;
1118 return ERROR_INT(
"sa not defined", __func__, 1);
1120 return ERROR_INT(
"str not defined", __func__, 1);
1122 return ERROR_INT(
"separators not defined", __func__, 1);
1126 substr =
strtokSafe(cstr, separators, &saveptr);
1129 while ((substr =
strtokSafe(NULL, separators, &saveptr)))
1160 l_int32 n, i, offset, found;
1164 return (
SARRAY *)ERROR_PTR(
"sain not defined", __func__, NULL);
1167 if (!substr || n == 0)
1171 for (i = 0; i < n; i++) {
1174 strlen(substr), &offset, &found);
1209 return (
SARRAY *)ERROR_PTR(
"sain not defined", __func__, NULL);
1210 if (first < 0) first = 0;
1212 if (last <= 0) last = n - 1;
1214 L_WARNING(
"last > n - 1; setting to n - 1\n", __func__);
1218 return (
SARRAY *)ERROR_PTR(
"first must be >= last", __func__, NULL);
1221 for (i = first; i <= last; i++) {
1269 l_int32 *pactualstart,
1276 l_int32 n, i, offset, found;
1279 return ERROR_INT(
"sa not defined", __func__, 1);
1280 if (!pactualstart || !pend || !pnewstart)
1281 return ERROR_INT(
"not all range addresses defined", __func__, 1);
1283 *pactualstart = *pend = *pnewstart = n;
1285 return ERROR_INT(
"substr not defined", __func__, 1);
1288 if (start < 0 || start >= n)
1290 for (i = start; i < n; i++) {
1293 strlen(substr), &offset, &found);
1297 if (!found || offset != loc)
break;
1305 *pactualstart = start;
1306 for (i = start + 1; i < n; i++) {
1309 strlen(substr), &offset, &found);
1313 if (found && offset == loc)
break;
1323 for (i = start; i < n; i++) {
1326 strlen(substr), &offset, &found);
1330 if (!found || offset != loc)
break;
1356 return (
SARRAY *)ERROR_PTR(
"filename not defined", __func__, NULL);
1359 return (
SARRAY *)ERROR_PTR(
"stream not opened", __func__, NULL);
1363 return (
SARRAY *)ERROR_PTR(
"sa not read", __func__, NULL);
1389 l_int32 i, n, size, index, bufsize, version, ignore, success;
1393 return (
SARRAY *)ERROR_PTR(
"stream not defined", __func__, NULL);
1395 if (fscanf(fp,
"\nSarray Version %d\n", &version) != 1)
1396 return (
SARRAY *)ERROR_PTR(
"not an sarray file", __func__, NULL);
1398 return (
SARRAY *)ERROR_PTR(
"invalid sarray version", __func__, NULL);
1399 if (fscanf(fp,
"Number of strings = %d\n", &n) != 1)
1400 return (
SARRAY *)ERROR_PTR(
"error on # strings", __func__, NULL);
1402 return (
SARRAY *)ERROR_PTR(
"num string ptrs <= 0", __func__, NULL);
1403 if (n > MaxPtrArraySize)
1404 return (
SARRAY *)ERROR_PTR(
"too many string ptrs", __func__, NULL);
1405 if (n == 0) L_INFO(
"the sarray is empty\n", __func__);
1409 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1411 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1413 for (i = 0; i < n; i++) {
1415 if ((fscanf(fp,
"%d[%d]:", &index, &size) != 2) || (size > (1 << 30))) {
1417 L_ERROR(
"error on string size\n", __func__);
1421 if (size > bufsize - 5) {
1422 LEPT_FREE(stringbuf);
1423 bufsize = (l_int32)(1.5 * size);
1424 stringbuf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
1427 if (fread(stringbuf, 1, size + 3, fp) != size + 3) {
1429 L_ERROR(
"error reading string\n", __func__);
1433 stringbuf[size + 2] =
'\0';
1437 ignore = fscanf(fp,
"\n");
1440 LEPT_FREE(stringbuf);
1461 return (
SARRAY *)ERROR_PTR(
"data not defined", __func__, NULL);
1463 return (
SARRAY *)ERROR_PTR(
"stream not opened", __func__, NULL);
1467 if (!sa) L_ERROR(
"sarray not read\n", __func__);
1487 return ERROR_INT(
"filename not defined", __func__, 1);
1489 return ERROR_INT(
"sa not defined", __func__, 1);
1492 return ERROR_INT(
"stream not opened", __func__, 1);
1496 return ERROR_INT(
"sa not written to stream", __func__, 1);
1521 return ERROR_INT(
"stream not defined", __func__, 1);
1527 fprintf(fp,
"Number of strings = %d\n", n);
1528 for (i = 0; i < n; i++) {
1529 len = strlen(sa->
array[i]);
1530 fprintf(fp,
" %d[%d]: %s\n", i, len, sa->
array[i]);
1550 return ERROR_INT(
"sa not defined", __func__, 1);
1555 for (i = 0; i < n; i++) {
1556 len = strlen(sa->
array[i]);
1585 if (pdata) *pdata = NULL;
1586 if (psize) *psize = 0;
1588 return ERROR_INT(
"&data not defined", __func__, 1);
1590 return ERROR_INT(
"&size not defined", __func__, 1);
1592 return ERROR_INT(
"sa not defined", __func__, 1);
1595 if ((fp = open_memstream((
char **)pdata, psize)) == NULL)
1596 return ERROR_INT(
"stream not opened", __func__, 1);
1600 *psize = *psize - 1;
1602 L_INFO(
"work-around: writing to a temp file\n", __func__);
1605 return ERROR_INT(
"tmpfile stream not opened", __func__, 1);
1607 if ((fp = tmpfile()) == NULL)
1608 return ERROR_INT(
"tmpfile stream not opened", __func__, 1);
1633 return ERROR_INT(
"filename not defined", __func__, 1);
1635 return ERROR_INT(
"sa not defined", __func__, 1);
1638 return ERROR_INT(
"stream not opened", __func__, 1);
1641 return ERROR_INT(
"sa not appended to stream", __func__, 1);
1703 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1706 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1744 char *fname, *fullname;
1746 SARRAY *sa, *safiles, *saout;
1749 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1752 return (
SARRAY *)ERROR_PTR(
"sa not made", __func__, NULL);
1757 L_WARNING(
"no files found\n", __func__);
1763 first = L_MIN(L_MAX(first, 0), n - 1);
1766 last = L_MIN(first + nfiles - 1, n - 1);
1769 for (i = first; i <= last; i++) {
1771 fullname =
pathJoin(dirname, fname);
1803 l_int32 i, nfiles, num, index;
1807 return (
SARRAY *)ERROR_PTR(
"sa not defined", __func__, NULL);
1816 for (i = nfiles - 1; i >= 0; i--) {
1819 if (num < 0)
continue;
1820 num = L_MIN(num + 1, maxnum);
1830 for (i = 0; i < nfiles; i++) {
1833 if (index < 0 || index >= num)
continue;
1835 if (str[0] !=
'\0') {
1836 L_WARNING(
"\n Multiple files with same number: %d\n",
1879 char dir[PATH_MAX + 1];
1880 char *realdir, *stat_path, *ignore;
1884 struct dirent *pdirentry;
1889 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1890 if (dirname[0] ==
'\0')
1891 return (
SARRAY *)ERROR_PTR(
"dirname is empty", __func__, NULL);
1907 ignore = realpath(dirname, dir);
1909 return (
SARRAY *)ERROR_PTR(
"dir not made", __func__, NULL);
1911 if ((pdir = opendir(realdir)) == NULL) {
1913 return (
SARRAY *)ERROR_PTR(
"pdir not opened", __func__, NULL);
1916 while ((pdirentry = readdir(pdir))) {
1917 #if HAVE_DIRFD && HAVE_FSTATAT
1921 stat_ret = fstatat(dfd, pdirentry->d_name, &st, 0);
1923 size = strlen(realdir) + strlen(pdirentry->d_name) + 2;
1924 if (size > PATH_MAX) {
1925 L_ERROR(
"size = %zu too large; skipping\n", __func__, size);
1928 stat_path = (
char *)LEPT_CALLOC(size, 1);
1929 snprintf(stat_path, size,
"%s/%s", realdir, pdirentry->d_name);
1930 stat_ret = stat(stat_path, &st);
1931 LEPT_FREE(stat_path);
1933 if (stat_ret == 0 && S_ISDIR(st.st_mode))
1945 #include <windows.h>
1952 HANDLE hFind = INVALID_HANDLE_VALUE;
1954 WIN32_FIND_DATAA ffd;
1957 return (
SARRAY *)ERROR_PTR(
"dirname not defined", __func__, NULL);
1963 if (strlen(pszDir) + 1 > MAX_PATH) {
1965 return (
SARRAY *)ERROR_PTR(
"dirname is too long", __func__, NULL);
1970 return (
SARRAY *)ERROR_PTR(
"safiles not made", __func__, NULL);
1973 hFind = FindFirstFileA(pszDir, &ffd);
1974 if (INVALID_HANDLE_VALUE == hFind) {
1977 return (
SARRAY *)ERROR_PTR(
"hFind not opened", __func__, NULL);
1980 while (FindNextFileA(hFind, &ffd) != 0) {
1981 if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
#define SARRAY_VERSION_NUMBER
void numaDestroy(NUMA **pna)
numaDestroy()
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
NUMA * numaGetUniformBinSizes(l_int32 ntotal, l_int32 nbins)
numaGetUniformBinSizes()
SARRAY * sarraySelectBySubstring(SARRAY *sain, const char *substr)
sarraySelectBySubstring()
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
SARRAY * sarrayRead(const char *filename)
sarrayRead()
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
l_ok sarrayWriteStderr(SARRAY *sa)
sarrayWriteStderr()
static const l_int32 InitialPtrArraySize
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
SARRAY * sarraySelectRange(SARRAY *sain, l_int32 first, l_int32 last)
sarraySelectRange()
char ** sarrayGetArray(SARRAY *sa, l_int32 *pnalloc, l_int32 *pn)
sarrayGetArray()
char * sarrayToStringRange(SARRAY *sa, l_int32 first, l_int32 nstrings, l_int32 addnlflag)
sarrayToStringRange()
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
SARRAY * getFilenamesInDirectory(const char *dirname)
getFilenamesInDirectory()
l_ok sarrayReplaceString(SARRAY *sa, l_int32 index, char *newstr, l_int32 copyflag)
sarrayReplaceString()
l_ok sarrayAppend(const char *filename, SARRAY *sa)
sarrayAppend()
SARRAY * sarrayConvertWordsToLines(SARRAY *sa, l_int32 linesize)
sarrayConvertWordsToLines()
SARRAY * sarrayReadStream(FILE *fp)
sarrayReadStream()
SARRAY * sarrayClone(SARRAY *sa)
sarrayClone()
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
l_ok sarrayClear(SARRAY *sa)
sarrayClear()
char * sarrayRemoveString(SARRAY *sa, l_int32 index)
sarrayRemoveString()
static l_int32 sarrayExtendArray(SARRAY *sa)
sarrayExtendArray()
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
SARRAY * sarrayReadMem(const l_uint8 *data, size_t size)
sarrayReadMem()
SARRAY * sarrayCreateInitialized(l_int32 n, const char *initstr)
sarrayCreateInitialized()
l_ok sarrayWrite(const char *filename, SARRAY *sa)
sarrayWrite()
l_int32 sarrayParseRange(SARRAY *sa, l_int32 start, l_int32 *pactualstart, l_int32 *pend, l_int32 *pnewstart, const char *substr, l_int32 loc)
sarrayParseRange()
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
SARRAY * sarrayConcatUniformly(SARRAY *sa, l_int32 n, l_int32 addnlflag)
sarrayConcatUniformly()
l_ok sarrayWriteStream(FILE *fp, SARRAY *sa)
sarrayWriteStream()
l_ok sarrayWriteMem(l_uint8 **pdata, size_t *psize, SARRAY *sa)
sarrayWriteMem()
SARRAY * convertSortedToNumberedPathnames(SARRAY *sa, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
convertSortedToNumberedPathnames()
l_ok sarrayAppendRange(SARRAY *sa1, SARRAY *sa2, l_int32 start, l_int32 end)
sarrayAppendRange()
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
void lept_stderr(const char *fmt,...)
lept_stderr()
char * stringNew(const char *src)
stringNew()
l_uint8 * l_binaryReadStream(FILE *fp, size_t *pnbytes)
l_binaryReadStream()
char * strtokSafe(char *cstr, const char *seps, char **psaveptr)
strtokSafe()
FILE * fopenReadFromMemory(const l_uint8 *data, size_t size)
fopenReadFromMemory()
FILE * fopenWriteStream(const char *filename, const char *modestring)
fopenWriteStream()
char * pathJoin(const char *dir, const char *fname)
pathJoin()
char * genPathname(const char *dir, const char *fname)
genPathname()
l_ok convertSepCharsInPath(char *path, l_int32 type)
convertSepCharsInPath()
char * stringJoin(const char *src1, const char *src2)
stringJoin()
void * reallocNew(void **pindata, size_t oldsize, size_t newsize)
reallocNew()
FILE * fopenWriteWinTempfile(void)
fopenWriteWinTempfile()
FILE * fopenReadStream(const char *filename)
fopenReadStream()
l_int32 extractNumberFromFilename(const char *fname, l_int32 numpre, l_int32 numpost)
extractNumberFromFilename()
l_ok arrayFindSequence(const l_uint8 *data, size_t datalen, const l_uint8 *sequence, size_t seqlen, l_int32 *poffset, l_int32 *pfound)
arrayFindSequence()