Leptonica  1.83.1
Image processing and image analysis suite
sarray2.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
71 #ifdef HAVE_CONFIG_H
72 #include <config_auto.h>
73 #endif /* HAVE_CONFIG_H */
74 
75 #include <string.h>
76 #include "allheaders.h"
77 #include "array_internal.h"
78 
79 /*----------------------------------------------------------------------*
80  * Sort *
81  *----------------------------------------------------------------------*/
97 SARRAY *
99  SARRAY *sain,
100  l_int32 sortorder)
101 {
102 char **array;
103 char *tmp;
104 l_int32 n, i, j, gap;
105 
106  if (!sain)
107  return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
108 
109  /* Make saout if necessary; otherwise do in-place */
110  if (!saout)
111  saout = sarrayCopy(sain);
112  else if (sain != saout)
113  return (SARRAY *)ERROR_PTR("invalid: not in-place", __func__, NULL);
114  array = saout->array; /* operate directly on the array */
115  n = sarrayGetCount(saout);
116 
117  /* Shell sort */
118  for (gap = n/2; gap > 0; gap = gap / 2) {
119  for (i = gap; i < n; i++) {
120  for (j = i - gap; j >= 0; j -= gap) {
121  if ((sortorder == L_SORT_INCREASING &&
122  stringCompareLexical(array[j], array[j + gap])) ||
123  (sortorder == L_SORT_DECREASING &&
124  stringCompareLexical(array[j + gap], array[j])))
125  {
126  tmp = array[j];
127  array[j] = array[j + gap];
128  array[j + gap] = tmp;
129  }
130  }
131  }
132  }
133 
134  return saout;
135 }
136 
137 
145 SARRAY *
147  NUMA *naindex)
148 {
149 char *str;
150 l_int32 i, n, index;
151 SARRAY *saout;
152 
153  if (!sain)
154  return (SARRAY *)ERROR_PTR("sain not defined", __func__, NULL);
155  if (!naindex)
156  return (SARRAY *)ERROR_PTR("naindex not defined", __func__, NULL);
157 
158  n = sarrayGetCount(sain);
159  saout = sarrayCreate(n);
160  for (i = 0; i < n; i++) {
161  numaGetIValue(naindex, i, &index);
162  str = sarrayGetString(sain, index, L_COPY);
163  sarrayAddString(saout, str, L_INSERT);
164  }
165 
166  return saout;
167 }
168 
169 
183 l_int32
184 stringCompareLexical(const char *str1,
185  const char *str2)
186 {
187 l_int32 i, len1, len2, len;
188 
189  if (!str1)
190  return ERROR_INT("str1 not defined", __func__, 1);
191  if (!str2)
192  return ERROR_INT("str2 not defined", __func__, 1);
193 
194  len1 = strlen(str1);
195  len2 = strlen(str2);
196  len = L_MIN(len1, len2);
197 
198  for (i = 0; i < len; i++) {
199  if (str1[i] == str2[i])
200  continue;
201  if (str1[i] > str2[i])
202  return 1;
203  else
204  return 0;
205  }
206 
207  if (len1 > len2)
208  return 1;
209  else
210  return 0;
211 }
212 
213 
214 /*----------------------------------------------------------------------*
215  * Set operations using aset (rbtree) *
216  *----------------------------------------------------------------------*/
223 L_ASET *
225 {
226 char *str;
227 l_int32 i, n;
228 l_uint64 hash;
229 L_ASET *set;
230 RB_TYPE key;
231 
232  if (!sa)
233  return (L_ASET *)ERROR_PTR("sa not defined", __func__, NULL);
234 
235  set = l_asetCreate(L_UINT_TYPE);
236  n = sarrayGetCount(sa);
237  for (i = 0; i < n; i++) {
238  str = sarrayGetString(sa, i, L_NOCOPY);
239  l_hashStringToUint64Fast(str, &hash);
240  key.utype = hash;
241  l_asetInsert(set, key);
242  }
243 
244  return set;
245 }
246 
247 
265 l_ok
267  SARRAY **psad)
268 {
269 char *str;
270 l_int32 i, n;
271 l_uint64 hash;
272 L_ASET *set;
273 RB_TYPE key;
274 SARRAY *sad;
275 
276  if (!psad)
277  return ERROR_INT("&sad not defined", __func__, 1);
278  *psad = NULL;
279  if (!sas)
280  return ERROR_INT("sas not defined", __func__, 1);
281 
282  set = l_asetCreate(L_UINT_TYPE);
283  sad = sarrayCreate(0);
284  *psad = sad;
285  n = sarrayGetCount(sas);
286  for (i = 0; i < n; i++) {
287  str = sarrayGetString(sas, i, L_NOCOPY);
288  l_hashStringToUint64Fast(str, &hash);
289  key.utype = hash;
290  if (!l_asetFind(set, key)) {
291  sarrayAddString(sad, str, L_COPY);
292  l_asetInsert(set, key);
293  }
294  }
295 
296  l_asetDestroy(&set);
297  return 0;
298 }
299 
300 
319 l_ok
321  SARRAY *sa2,
322  SARRAY **psad)
323 {
324 SARRAY *sa3;
325 
326  if (!psad)
327  return ERROR_INT("&sad not defined", __func__, 1);
328  *psad = NULL;
329  if (!sa1)
330  return ERROR_INT("sa1 not defined", __func__, 1);
331  if (!sa2)
332  return ERROR_INT("sa2 not defined", __func__, 1);
333 
334  /* Join */
335  sa3 = sarrayCopy(sa1);
336  if (sarrayJoin(sa3, sa2) == 1) {
337  sarrayDestroy(&sa3);
338  return ERROR_INT("join failed for sa3", __func__, 1);
339  }
340 
341  /* Eliminate duplicates */
342  sarrayRemoveDupsByAset(sa3, psad);
343  sarrayDestroy(&sa3);
344  return 0;
345 }
346 
347 
368 l_ok
370  SARRAY *sa2,
371  SARRAY **psad)
372 {
373 char *str;
374 l_int32 n1, n2, i, n;
375 l_uint64 hash;
376 L_ASET *set1, *set2;
377 RB_TYPE key;
378 SARRAY *sa_small, *sa_big, *sad;
379 
380  if (!psad)
381  return ERROR_INT("&sad not defined", __func__, 1);
382  *psad = NULL;
383  if (!sa1)
384  return ERROR_INT("sa1 not defined", __func__, 1);
385  if (!sa2)
386  return ERROR_INT("sa2 not defined", __func__, 1);
387 
388  /* Put the elements of the biggest array into a set */
389  n1 = sarrayGetCount(sa1);
390  n2 = sarrayGetCount(sa2);
391  sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
392  sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
393  set1 = l_asetCreateFromSarray(sa_big);
394 
395  /* Build up the intersection of strings */
396  sad = sarrayCreate(0);
397  *psad = sad;
398  n = sarrayGetCount(sa_small);
399  set2 = l_asetCreate(L_UINT_TYPE);
400  for (i = 0; i < n; i++) {
401  str = sarrayGetString(sa_small, i, L_NOCOPY);
402  l_hashStringToUint64Fast(str, &hash);
403  key.utype = hash;
404  if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
405  sarrayAddString(sad, str, L_COPY);
406  l_asetInsert(set2, key);
407  }
408  }
409 
410  l_asetDestroy(&set1);
411  l_asetDestroy(&set2);
412  return 0;
413 }
414 
415 
416 /*----------------------------------------------------------------------*
417  * Hashmap operations *
418  *----------------------------------------------------------------------*/
425 L_HASHMAP *
427 {
428 l_int32 i, n;
429 l_uint64 key;
430 char *str;
431 L_HASHITEM *hitem;
432 L_HASHMAP *hmap;
433 
434  if (!sa)
435  return (L_HASHMAP *)ERROR_PTR("sa not defined", __func__, NULL);
436 
437  n = sarrayGetCount(sa);
438  if ((hmap = l_hmapCreate(0.51 * n, 2)) == NULL)
439  return (L_HASHMAP *)ERROR_PTR("hmap not made", __func__, NULL);
440  for (i = 0; i < n; i++) {
441  str = sarrayGetString(sa, i, L_NOCOPY);
442  l_hashStringToUint64Fast(str, &key);
443  hitem = l_hmapLookup(hmap, key, i, L_HMAP_CREATE);
444  }
445  return hmap;
446 }
447 
448 
457 l_ok
459  SARRAY **psad,
460  L_HASHMAP **phmap)
461 {
462 l_int32 i, tabsize;
463 char *str;
464 SARRAY *sad;
465 L_HASHITEM *hitem;
466 L_HASHMAP *hmap;
467 
468  if (phmap) *phmap = NULL;
469  if (!psad)
470  return ERROR_INT("&sad not defined", __func__, 1);
471  *psad = NULL;
472  if (!sas)
473  return ERROR_INT("sas not defined", __func__, 1);
474 
475  /* Traverse the hashtable lists */
476  if ((hmap = l_hmapCreateFromSarray(sas)) == NULL)
477  return ERROR_INT("hmap not made", __func__, 1);
478  sad = sarrayCreate(0);
479  *psad = sad;
480  tabsize = hmap->tabsize;
481  for (i = 0; i < tabsize; i++) {
482  hitem = hmap->hashtab[i];
483  while (hitem) {
484  str = sarrayGetString(sas, hitem->val, L_COPY);
485  sarrayAddString(sad, str, L_INSERT);
486  hitem = hitem->next;
487  }
488  }
489 
490  if (phmap)
491  *phmap = hmap;
492  else
493  l_hmapDestroy(&hmap);
494  return 0;
495 }
496 
497 
506 l_ok
508  SARRAY *sa2,
509  SARRAY **psad)
510 {
511 SARRAY *sa3;
512 
513  if (!psad)
514  return ERROR_INT("&sad not defined", __func__, 1);
515  *psad = NULL;
516  if (!sa1)
517  return ERROR_INT("sa1 not defined", __func__, 1);
518  if (!sa2)
519  return ERROR_INT("sa2 not defined", __func__, 1);
520 
521  sa3 = sarrayCopy(sa1);
522  if (sarrayJoin(sa3, sa2) == 1) {
523  sarrayDestroy(&sa3);
524  return ERROR_INT("sa3 join failed", __func__, 1);
525  }
526  sarrayRemoveDupsByHmap(sa3, psad, NULL);
527  sarrayDestroy(&sa3);
528  return 0;
529 }
530 
531 
540 l_ok
542  SARRAY *sa2,
543  SARRAY **psad)
544 {
545 l_int32 i, n1, n2, n;
546 l_uint64 key;
547 char *str;
548 SARRAY *sa_small, *sa_big, *sa3, *sad;
549 L_HASHITEM *hitem;
550 L_HASHMAP *hmap;
551 
552  if (!psad)
553  return ERROR_INT("&sad not defined", __func__, 1);
554  *psad = NULL;
555  if (!sa1)
556  return ERROR_INT("sa1 not defined", __func__, 1);
557  if (!sa2)
558  return ERROR_INT("sa2 not defined", __func__, 1);
559 
560  /* Make a hashmap for the elements of the biggest array */
561  n1 = sarrayGetCount(sa1);
562  n2 = sarrayGetCount(sa2);
563  sa_small = (n1 < n2) ? sa1 : sa2; /* do not destroy sa_small */
564  sa_big = (n1 < n2) ? sa2 : sa1; /* do not destroy sa_big */
565  if ((hmap = l_hmapCreateFromSarray(sa_big)) == NULL)
566  return ERROR_INT("hmap not made", __func__, 1);
567 
568  /* Remove duplicates from the smallest array. Alternatively,
569  * we can skip this step and avoid counting duplicates in
570  * sa_small by modifying the count fields in the sa_big hashitems;
571  * e.g., see l_hmapIntersectionDna(). */
572  sarrayRemoveDupsByHmap(sa_small, &sa3, NULL);
573 
574  /* Go through sa3, the set of strings derived from the smallest array,
575  * hashing into the big array table. Any string found belongs to both,
576  * so add it to the output array. */
577  sad = sarrayCreate(0);
578  *psad = sad;
579  n = sarrayGetCount(sa3);
580  for (i = 0; i < n; i++) {
581  str = sarrayGetString(sa3, i, L_NOCOPY);
582  l_hashStringToUint64Fast(str, &key);
583  hitem = l_hmapLookup(hmap, key, i, L_HMAP_CHECK);
584  if (hitem)
585  sarrayAddString(sad, str, L_COPY);
586  }
587  l_hmapDestroy(&hmap);
588  sarrayDestroy(&sa3);
589  return 0;
590 }
591 
592 
593 /*----------------------------------------------------------------------*
594  * Miscellaneous operations *
595  *----------------------------------------------------------------------*/
602 SARRAY *
604 {
605 char buf[32];
606 l_int32 i;
607 SARRAY *sa;
608 
609  if ((sa = sarrayCreate(n)) == NULL)
610  return (SARRAY *)ERROR_PTR("sa not made", __func__, NULL);
611  for (i = 0; i < n; i++) {
612  snprintf(buf, sizeof(buf), "%d", i);
613  sarrayAddString(sa, buf, L_COPY);
614  }
615  return sa;
616 }
617 
618 
641 l_ok
643  const char *keystring,
644  char **pvalstring)
645 {
646 char *key, *val, *str;
647 l_int32 i, n;
648 SARRAY *sa1;
649 
650  if (!pvalstring)
651  return ERROR_INT("&valstring not defined", __func__, 1);
652  *pvalstring = NULL;
653  if (!sa)
654  return ERROR_INT("sa not defined", __func__, 1);
655  if (!keystring)
656  return ERROR_INT("keystring not defined", __func__, 1);
657 
658  n = sarrayGetCount(sa);
659  for (i = 0; i < n; i++) {
660  str = sarrayGetString(sa, i, L_NOCOPY);
661  sa1 = sarrayCreate(2);
662  sarraySplitString(sa1, str, ",");
663  if (sarrayGetCount(sa1) != 2) {
664  sarrayDestroy(&sa1);
665  continue;
666  }
667  key = sarrayGetString(sa1, 0, L_NOCOPY);
668  val = sarrayGetString(sa1, 1, L_NOCOPY);
669  if (!strcmp(key, keystring)) {
670  *pvalstring = stringNew(val);
671  sarrayDestroy(&sa1);
672  return 0;
673  }
674  sarrayDestroy(&sa1);
675  }
676 
677  return 0;
678 }
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:720
@ L_COPY
Definition: pix.h:505
@ L_NOCOPY
Definition: pix.h:503
@ L_INSERT
Definition: pix.h:504
@ L_SORT_DECREASING
Definition: pix.h:523
@ L_SORT_INCREASING
Definition: pix.h:522
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:169
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:673
l_ok sarrayJoin(SARRAY *sa1, SARRAY *sa2)
sarrayJoin()
Definition: sarray1.c:894
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:617
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:353
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:435
SARRAY * sarrayCopy(SARRAY *sa)
sarrayCopy()
Definition: sarray1.c:386
SARRAY * sarraySortByIndex(SARRAY *sain, NUMA *naindex)
sarraySortByIndex()
Definition: sarray2.c:146
SARRAY * sarrayGenerateIntegers(l_int32 n)
sarrayGenerateIntegers()
Definition: sarray2.c:603
l_ok sarrayRemoveDupsByAset(SARRAY *sas, SARRAY **psad)
sarrayRemoveDupsByAset()
Definition: sarray2.c:266
l_ok sarrayIntersectionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByHmap()
Definition: sarray2.c:541
SARRAY * sarraySort(SARRAY *saout, SARRAY *sain, l_int32 sortorder)
sarraySort()
Definition: sarray2.c:98
l_ok sarrayUnionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByAset()
Definition: sarray2.c:320
l_int32 stringCompareLexical(const char *str1, const char *str2)
stringCompareLexical()
Definition: sarray2.c:184
l_ok sarrayLookupCSKV(SARRAY *sa, const char *keystring, char **pvalstring)
sarrayLookupCSKV()
Definition: sarray2.c:642
l_ok sarrayUnionByHmap(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayUnionByHmap()
Definition: sarray2.c:507
l_ok sarrayIntersectionByAset(SARRAY *sa1, SARRAY *sa2, SARRAY **psad)
sarrayIntersectionByAset()
Definition: sarray2.c:369
L_ASET * l_asetCreateFromSarray(SARRAY *sa)
l_asetCreateFromSarray()
Definition: sarray2.c:224
L_HASHMAP * l_hmapCreateFromSarray(SARRAY *sa)
l_hmapCreateFromSarray()
Definition: sarray2.c:426
l_ok sarrayRemoveDupsByHmap(SARRAY *sas, SARRAY **psad, L_HASHMAP **phmap)
sarrayRemoveDupsByHmap()
Definition: sarray2.c:458
l_uint64 val
Definition: hashmap.h:117
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 tabsize
Definition: hashmap.h:107
char ** array
Definition: rbtree.h:62
l_ok l_hashStringToUint64Fast(const char *str, l_uint64 *phash)
l_hashStringToUint64Fast()
Definition: utils1.c:759
char * stringNew(const char *src)
stringNew()
Definition: utils2.c:223