Leptonica  1.83.1
Image processing and image analysis suite
partition.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
45 #ifdef HAVE_CONFIG_H
46 #include <config_auto.h>
47 #endif /* HAVE_CONFIG_H */
48 
49 #include "allheaders.h"
50 
53  l_float32 size; /* sorting key */
54  BOX *box; /* region of the element */
55  BOXA *boxa; /* set of intersecting boxes */
56 };
57 typedef struct PartitionElement PARTEL;
58 
59 static PARTEL * partelCreate(BOX *box);
60 static void partelDestroy(PARTEL **ppartel);
61 static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag);
62 static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim,
63  l_float32 fract);
64 static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim,
65  l_float32 fract);
66 static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa,
67  l_float32 maxoverlap);
68 
69 static const l_int32 DefaultMaxPops = 20000;
70 
71 
72 #ifndef NO_CONSOLE_IO
73 #define OUTPUT_HEAP_STATS 0
74 #endif /* ~NO_CONSOLE_IO */
75 
76 /*------------------------------------------------------------------*
77  * Whitespace block extraction *
78  *------------------------------------------------------------------*/
191 BOXA *
193  BOX *box,
194  l_int32 sortflag,
195  l_int32 maxboxes,
196  l_float32 maxoverlap,
197  l_int32 maxperim,
198  l_float32 fract,
199  l_int32 maxpops)
200 {
201 l_int32 i, w, h, n, nsub, npush, npop;
202 BOX *boxsub;
203 BOXA *boxa, *boxa4, *boxasub, *boxad;
204 PARTEL *partel;
205 L_HEAP *lh;
206 
207  if (!boxas)
208  return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
209  if (sortflag != L_SORT_BY_WIDTH && sortflag != L_SORT_BY_HEIGHT &&
210  sortflag != L_SORT_BY_MIN_DIMENSION &&
211  sortflag != L_SORT_BY_MAX_DIMENSION &&
212  sortflag != L_SORT_BY_PERIMETER && sortflag != L_SORT_BY_AREA)
213  return (BOXA *)ERROR_PTR("invalid sort flag", __func__, NULL);
214  if (maxboxes < 1) {
215  maxboxes = 1;
216  L_WARNING("setting maxboxes = 1\n", __func__);
217  }
218  if (maxoverlap < 0.0 || maxoverlap > 1.0)
219  return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL);
220  if (maxpops == 0)
221  maxpops = DefaultMaxPops;
222 
223  if (!box) {
224  boxaGetExtent(boxas, &w, &h, NULL);
225  box = boxCreate(0, 0, w, h);
226  }
227 
228  /* Prime the heap */
229  lh = lheapCreate(20, L_SORT_DECREASING);
230  partel = partelCreate(box);
231  partel->boxa = boxaCopy(boxas, L_CLONE);
232  partelSetSize(partel, sortflag);
233  lheapAdd(lh, partel);
234 
235  npush = 1;
236  npop = 0;
237  boxad = boxaCreate(0);
238  while (1) {
239  if ((partel = (PARTEL *)lheapRemove(lh)) == NULL) /* we're done */
240  break;
241 
242  npop++; /* How many boxes have we retrieved from the queue? */
243  if (npop > maxpops) {
244  partelDestroy(&partel);
245  break;
246  }
247 
248  /* Extract the contents */
249  boxa = boxaCopy(partel->boxa, L_CLONE);
250  box = boxClone(partel->box);
251  partelDestroy(&partel);
252 
253  /* Can we output this one? */
254  n = boxaGetCount(boxa);
255  if (n == 0) {
256  if (boxCheckIfOverlapIsBig(box, boxad, maxoverlap) == 0)
257  boxaAddBox(boxad, box, L_INSERT);
258  else
259  boxDestroy(&box);
260  boxaDestroy(&boxa);
261  if (boxaGetCount(boxad) >= maxboxes) /* we're done */
262  break;
263  continue;
264  }
265 
266 
267  /* Generate up to 4 subboxes and put them on the heap */
268  boxa4 = boxaGenerateSubboxes(box, boxa, maxperim, fract);
269  boxDestroy(&box);
270  nsub = boxaGetCount(boxa4);
271  for (i = 0; i < nsub; i++) {
272  boxsub = boxaGetBox(boxa4, i, L_CLONE);
273  boxasub = boxaIntersectsBox(boxa, boxsub);
274  partel = partelCreate(boxsub);
275  partel->boxa = boxasub;
276  partelSetSize(partel, sortflag);
277  lheapAdd(lh, partel);
278  boxDestroy(&boxsub);
279  }
280  npush += nsub; /* How many boxes have we put on the queue? */
281 
282 /* boxaWriteStderr(boxa4); */
283 
284  boxaDestroy(&boxa4);
285  boxaDestroy(&boxa);
286  }
287 
288 #if OUTPUT_HEAP_STATS
289  lept_stderr("Heap statistics:\n");
290  lept_stderr(" Number of boxes pushed: %d\n", npush);
291  lept_stderr(" Number of boxes popped: %d\n", npop);
292  lept_stderr(" Number of boxes on heap: %d\n", lheapGetCount(lh));
293 #endif /* OUTPUT_HEAP_STATS */
294 
295  /* Clean up the heap */
296  while ((partel = (PARTEL *)lheapRemove(lh)) != NULL)
297  partelDestroy(&partel);
298  lheapDestroy(&lh, FALSE);
299 
300  return boxad;
301 }
302 
303 
304 /*------------------------------------------------------------------*
305  * Helpers *
306  *------------------------------------------------------------------*/
313 static PARTEL *
315 {
316 PARTEL *partel;
317 
318  partel = (PARTEL *)LEPT_CALLOC(1, sizeof(PARTEL));
319  partel->box = boxCopy(box);
320  return partel;
321 }
322 
323 
330 static void
332 {
333 PARTEL *partel;
334 
335  if (ppartel == NULL) {
336  L_WARNING("ptr address is null!\n", __func__);
337  return;
338  }
339 
340  if ((partel = *ppartel) == NULL)
341  return;
342 
343  boxDestroy(&partel->box);
344  boxaDestroy(&partel->boxa);
345  LEPT_FREE(partel);
346  *ppartel = NULL;
347  return;
348 }
349 
350 
360 static l_int32
362  l_int32 sortflag)
363 {
364 l_int32 w, h;
365 
366  if (!partel)
367  return ERROR_INT("partel not defined", __func__, 1);
368 
369  boxGetGeometry(partel->box, NULL, NULL, &w, &h);
370  if (sortflag == L_SORT_BY_WIDTH)
371  partel->size = (l_float32)w;
372  else if (sortflag == L_SORT_BY_HEIGHT)
373  partel->size = (l_float32)h;
374  else if (sortflag == L_SORT_BY_MIN_DIMENSION)
375  partel->size = (l_float32)L_MIN(w, h);
376  else if (sortflag == L_SORT_BY_MAX_DIMENSION)
377  partel->size = (l_float32)L_MAX(w, h);
378  else if (sortflag == L_SORT_BY_PERIMETER)
379  partel->size = (l_float32)(w + h);
380  else if (sortflag == L_SORT_BY_AREA)
381  partel->size = (l_float32)(w * h);
382  else
383  return ERROR_INT("invalid sortflag", __func__, 1);
384  return 0;
385 }
386 
387 
401 static BOXA *
403  BOXA *boxa,
404  l_int32 maxperim,
405  l_float32 fract)
406 {
407 l_int32 x, y, w, h, xp, yp, wp, hp;
408 BOX *boxp; /* pivot box */
409 BOX *boxsub;
410 BOXA *boxa4;
411 
412  if (!box)
413  return (BOXA *)ERROR_PTR("box not defined", __func__, NULL);
414  if (!boxa)
415  return (BOXA *)ERROR_PTR("boxa not defined", __func__, NULL);
416 
417  boxa4 = boxaCreate(4);
418  boxp = boxaSelectPivotBox(box, boxa, maxperim, fract);
419  boxGetGeometry(box, &x, &y, &w, &h);
420  boxGetGeometry(boxp, &xp, &yp, &wp, &hp);
421  boxDestroy(&boxp);
422  if (xp > x) { /* left sub-box */
423  boxsub = boxCreate(x, y, xp - x, h);
424  boxaAddBox(boxa4, boxsub, L_INSERT);
425  }
426  if (yp > y) { /* top sub-box */
427  boxsub = boxCreate(x, y, w, yp - y);
428  boxaAddBox(boxa4, boxsub, L_INSERT);
429  }
430  if (xp + wp < x + w) { /* right sub-box */
431  boxsub = boxCreate(xp + wp, y, x + w - xp - wp, h);
432  boxaAddBox(boxa4, boxsub, L_INSERT);
433  }
434  if (yp + hp < y + h) { /* bottom sub-box */
435  boxsub = boxCreate(x, yp + hp, w, y + h - yp - hp);
436  boxaAddBox(boxa4, boxsub, L_INSERT);
437  }
438 
439  return boxa4;
440 }
441 
442 
477 static BOX *
479  BOXA *boxa,
480  l_int32 maxperim,
481  l_float32 fract)
482 {
483 l_int32 i, n, bw, bh, w, h;
484 l_int32 smallfound, minindex, perim, minsize;
485 l_float32 delx, dely, mindist, threshdist, dist, x, y, cx, cy;
486 BOX *boxt;
487 
488  if (!box)
489  return (BOX *)ERROR_PTR("box not defined", __func__, NULL);
490  if (!boxa)
491  return (BOX *)ERROR_PTR("boxa not defined", __func__, NULL);
492  n = boxaGetCount(boxa);
493  if (n == 0)
494  return (BOX *)ERROR_PTR("no boxes in boxa", __func__, NULL);
495  if (fract < 0.0 || fract > 1.0) {
496  L_WARNING("fract out of bounds; using 0.0\n", __func__);
497  fract = 0.0;
498  }
499 
500  boxGetGeometry(box, NULL, NULL, &w, &h);
501  boxGetCenter(box, &x, &y);
502  threshdist = fract * (w * w + h * h);
503  mindist = 1000000000.;
504  minindex = 0;
505  smallfound = FALSE;
506  for (i = 0; i < n; i++) {
507  boxt = boxaGetBox(boxa, i, L_CLONE);
508  boxGetGeometry(boxt, NULL, NULL, &bw, &bh);
509  boxGetCenter(boxt, &cx, &cy);
510  boxDestroy(&boxt);
511  if (bw + bh > maxperim)
512  continue;
513  smallfound = TRUE;
514  delx = cx - x;
515  dely = cy - y;
516  dist = delx * delx + dely * dely;
517  if (dist <= threshdist)
518  return boxaGetBox(boxa, i, L_COPY);
519  if (dist < mindist) {
520  minindex = i;
521  mindist = dist;
522  }
523  }
524 
525  /* If there are small boxes but none are within 'fract' of the
526  * centroid, return the nearest one. */
527  if (smallfound == TRUE)
528  return boxaGetBox(boxa, minindex, L_COPY);
529 
530  /* No small boxes; return the smallest of the large boxes */
531  minsize = 1000000000;
532  minindex = 0;
533  for (i = 0; i < n; i++) {
534  boxaGetBoxGeometry(boxa, i, NULL, NULL, &bw, &bh);
535  perim = bw + bh;
536  if (perim < minsize) {
537  minsize = perim;
538  minindex = i;
539  }
540  }
541  return boxaGetBox(boxa, minindex, L_COPY);
542 }
543 
544 
555 static l_int32
557  BOXA *boxa,
558  l_float32 maxoverlap)
559 {
560 l_int32 i, n, bigoverlap;
561 l_float32 fract;
562 BOX *boxt;
563 
564  if (!box)
565  return ERROR_INT("box not defined", __func__, 1);
566  if (!boxa)
567  return ERROR_INT("boxa not defined", __func__, 1);
568  if (maxoverlap < 0.0 || maxoverlap > 1.0)
569  return ERROR_INT("invalid maxoverlap", __func__, 1);
570 
571  n = boxaGetCount(boxa);
572  if (n == 0 || maxoverlap == 1.0)
573  return 0;
574 
575  bigoverlap = 0;
576  for (i = 0; i < n; i++) {
577  boxt = boxaGetBox(boxa, i, L_CLONE);
578  boxOverlapFraction(boxt, box, &fract);
579  boxDestroy(&boxt);
580  if (fract > maxoverlap) {
581  bigoverlap = 1;
582  break;
583  }
584  }
585 
586  return bigoverlap;
587 }
588 
589 
608 BOXA *
610  l_float32 maxoverlap)
611 {
612 l_int32 i, j, n, remove;
613 l_float32 fract;
614 BOX *box1, *box2;
615 BOXA *boxad;
616 
617  if (!boxas)
618  return (BOXA *)ERROR_PTR("boxas not defined", __func__, NULL);
619  if (maxoverlap < 0.0 || maxoverlap > 1.0)
620  return (BOXA *)ERROR_PTR("invalid maxoverlap", __func__, NULL);
621 
622  n = boxaGetCount(boxas);
623  if (n == 0 || maxoverlap == 1.0)
624  return boxaCopy(boxas, L_COPY);
625 
626  boxad = boxaCreate(0);
627  box2 = boxaGetBox(boxas, 0, L_COPY);
628  boxaAddBox(boxad, box2, L_INSERT);
629  for (j = 1; j < n; j++) { /* prune on j */
630  box2 = boxaGetBox(boxas, j, L_COPY);
631  remove = FALSE;
632  for (i = 0; i < j; i++) { /* test on i */
633  box1 = boxaGetBox(boxas, i, L_CLONE);
634  boxOverlapFraction(box1, box2, &fract);
635  boxDestroy(&box1);
636  if (fract > maxoverlap) {
637  remove = TRUE;
638  break;
639  }
640  }
641  if (remove == TRUE)
642  boxDestroy(&box2);
643  else
644  boxaAddBox(boxad, box2, L_INSERT);
645  }
646 
647  return boxad;
648 }
l_ok boxGetGeometry(const BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:301
BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag)
boxaCopy()
Definition: boxbasic.c:475
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:273
BOX * boxClone(BOX *box)
boxClone()
Definition: boxbasic.c:249
l_ok boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxaGetBoxGeometry()
Definition: boxbasic.c:796
l_ok boxaAddBox(BOXA *boxa, BOX *box, l_int32 copyflag)
boxaAddBox()
Definition: boxbasic.c:553
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:519
l_int32 boxaGetCount(const BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:661
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:702
BOX * boxCopy(BOX *box)
boxCopy()
Definition: boxbasic.c:230
BOX * boxCreate(l_int32 x, l_int32 y, l_int32 w, l_int32 h)
boxCreate()
Definition: boxbasic.c:171
BOXA * boxaCreate(l_int32 n)
boxaCreate()
Definition: boxbasic.c:442
BOXA * boxaIntersectsBox(BOXA *boxas, BOX *box)
boxaIntersectsBox()
Definition: boxfunc1.c:322
l_ok boxGetCenter(const BOX *box, l_float32 *pcx, l_float32 *pcy)
boxGetCenter()
Definition: boxfunc1.c:1538
l_ok boxOverlapFraction(BOX *box1, BOX *box2, l_float32 *pfract)
boxOverlapFraction()
Definition: boxfunc1.c:787
l_ok boxaGetExtent(BOXA *boxa, l_int32 *pw, l_int32 *ph, BOX **pbox)
boxaGetExtent()
Definition: boxfunc4.c:922
void lheapDestroy(L_HEAP **plh, l_int32 freeflag)
lheapDestroy()
Definition: heap.c:152
L_HEAP * lheapCreate(l_int32 n, l_int32 direction)
lheapCreate()
Definition: heap.c:112
l_int32 lheapGetCount(L_HEAP *lh)
lheapGetCount()
Definition: heap.c:273
l_ok lheapAdd(L_HEAP *lh, void *item)
lheapAdd()
Definition: heap.c:189
void * lheapRemove(L_HEAP *lh)
lheapRemove()
Definition: heap.c:243
static l_int32 boxCheckIfOverlapIsBig(BOX *box, BOXA *boxa, l_float32 maxoverlap)
boxCheckIfOverlapIsBig()
Definition: partition.c:556
static BOXA * boxaGenerateSubboxes(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaGenerateSubboxes()
Definition: partition.c:402
static PARTEL * partelCreate(BOX *box)
partelCreate()
Definition: partition.c:314
static l_int32 partelSetSize(PARTEL *partel, l_int32 sortflag)
partelSetSize()
Definition: partition.c:361
BOXA * boxaGetWhiteblocks(BOXA *boxas, BOX *box, l_int32 sortflag, l_int32 maxboxes, l_float32 maxoverlap, l_int32 maxperim, l_float32 fract, l_int32 maxpops)
boxaGetWhiteblocks()
Definition: partition.c:192
static void partelDestroy(PARTEL **ppartel)
partelDestroy()
Definition: partition.c:331
BOXA * boxaPruneSortedOnOverlap(BOXA *boxas, l_float32 maxoverlap)
boxaPruneSortedOnOverlap()
Definition: partition.c:609
static BOX * boxaSelectPivotBox(BOX *box, BOXA *boxa, l_int32 maxperim, l_float32 fract)
boxaSelectPivotBox()
Definition: partition.c:478
@ L_SORT_BY_AREA
Definition: pix.h:537
@ L_SORT_BY_MIN_DIMENSION
Definition: pix.h:534
@ L_SORT_BY_PERIMETER
Definition: pix.h:536
@ L_SORT_BY_WIDTH
Definition: pix.h:532
@ L_SORT_BY_HEIGHT
Definition: pix.h:533
@ L_SORT_BY_MAX_DIMENSION
Definition: pix.h:535
@ L_COPY
Definition: pix.h:505
@ L_CLONE
Definition: pix.h:506
@ L_INSERT
Definition: pix.h:504
@ L_SORT_DECREASING
Definition: pix.h:523
Definition: heap.h:78
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306