Leptonica  1.83.1
Image processing and image analysis suite
flipdetect.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
201 #ifdef HAVE_CONFIG_H
202 #include <config_auto.h>
203 #endif /* HAVE_CONFIG_H */
204 
205 #include <math.h>
206 #include "allheaders.h"
207 
208  /* Sels for pixOrientDetect() and pixMirrorDetect() */
209 static const char *textsel1 = "x oo "
210  "x oOo "
211  "x o "
212  "x "
213  "xxxxxx";
214 
215 static const char *textsel2 = " oo x"
216  " oOo x"
217  " o x"
218  " x"
219  "xxxxxx";
220 
221 static const char *textsel3 = "xxxxxx"
222  "x "
223  "x o "
224  "x oOo "
225  "x oo ";
226 
227 static const char *textsel4 = "xxxxxx"
228  " x"
229  " o x"
230  " oOo x"
231  " oo x";
232 
233  /* Parameters for determining orientation */
234 static const l_int32 DefaultMinUpDownCount = 70;
235 static const l_float32 DefaultMinUpDownConf = 8.0;
236 static const l_float32 DefaultMinUpDownRatio = 2.5;
237 
238  /* Parameters for determining mirror flip */
239 static const l_int32 DefaultMinMirrorFlipCount = 100;
240 static const l_float32 DefaultMinMirrorFlipConf = 5.0;
241 
242  /* Static debug function */
243 static void pixDebugFlipDetect(const char *filename, PIX *pixs,
244  PIX *pixhm, l_int32 enable);
245 
246 
247 /*----------------------------------------------------------------*
248  * High-level interface for detection and correction *
249  *----------------------------------------------------------------*/
273 PIX *
275  l_float32 minupconf,
276  l_float32 minratio,
277  l_float32 *pupconf,
278  l_float32 *pleftconf,
279  l_int32 *protation,
280  l_int32 debug)
281 {
282 l_int32 orient;
283 l_float32 upconf, leftconf;
284 PIX *pix1;
285 
286  if (!pixs || pixGetDepth(pixs) != 1)
287  return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", __func__, NULL);
288 
289  /* Get confidences for orientation */
290  pixUpDownDetect(pixs, &upconf, 0, 0, debug);
291  pix1 = pixRotate90(pixs, 1);
292  pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
293  pixDestroy(&pix1);
294  if (pupconf) *pupconf = upconf;
295  if (pleftconf) *pleftconf = leftconf;
296 
297  /* Decide what to do */
298  makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
299 
300  /* Do it */
301  switch (orient)
302  {
304  L_INFO("text orientation not determined; no rotation\n", __func__);
305  if (protation) *protation = 0;
306  return pixCopy(NULL, pixs);
307  break;
308  case L_TEXT_ORIENT_UP:
309  L_INFO("text is oriented up; no rotation\n", __func__);
310  if (protation) *protation = 0;
311  return pixCopy(NULL, pixs);
312  break;
313  case L_TEXT_ORIENT_LEFT:
314  L_INFO("landscape; text oriented left; 90 cw rotation\n", __func__);
315  if (protation) *protation = 90;
316  return pixRotateOrth(pixs, 1);
317  break;
318  case L_TEXT_ORIENT_DOWN:
319  L_INFO("text oriented down; 180 cw rotation\n", __func__);
320  if (protation) *protation = 180;
321  return pixRotateOrth(pixs, 2);
322  break;
323  case L_TEXT_ORIENT_RIGHT:
324  L_INFO("landscape; text oriented right; 270 cw rotation\n", __func__);
325  if (protation) *protation = 270;
326  return pixRotateOrth(pixs, 3);
327  break;
328  default:
329  L_ERROR("invalid orient flag!\n", __func__);
330  return pixCopy(NULL, pixs);
331  }
332 }
333 
334 
335 /*----------------------------------------------------------------*
336  * Orientation detection (four 90 degree angles) *
337  *----------------------------------------------------------------*/
402 l_ok
404  l_float32 *pupconf,
405  l_float32 *pleftconf,
406  l_int32 mincount,
407  l_int32 debug)
408 {
409 PIX *pix1;
410 
411  if (!pixs || pixGetDepth(pixs) != 1)
412  return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
413  if (!pupconf && !pleftconf)
414  return ERROR_INT("nothing to do", __func__, 1);
415  if (mincount == 0)
416  mincount = DefaultMinUpDownCount;
417 
418  if (pupconf)
419  pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
420  if (pleftconf) {
421  pix1 = pixRotate90(pixs, 1);
422  pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
423  pixDestroy(&pix1);
424  }
425 
426  return 0;
427 }
428 
429 
459 l_ok
460 makeOrientDecision(l_float32 upconf,
461  l_float32 leftconf,
462  l_float32 minupconf,
463  l_float32 minratio,
464  l_int32 *porient,
465  l_int32 debug)
466 {
467 l_float32 absupconf, absleftconf;
468 
469  if (!porient)
470  return ERROR_INT("&orient not defined", __func__, 1);
471  *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
472  if (upconf == 0.0 || leftconf == 0.0) {
473  L_INFO("not enough confidence to get orientation\n", __func__);
474  return 0;
475  }
476 
477  if (minupconf == 0.0)
478  minupconf = DefaultMinUpDownConf;
479  if (minratio == 0.0)
480  minratio = DefaultMinUpDownRatio;
481  absupconf = L_ABS(upconf);
482  absleftconf = L_ABS(leftconf);
483 
484  /* Here are the four possible orientation decisions, based
485  * on satisfaction of two threshold constraints. */
486  if (upconf > minupconf && absupconf > minratio * absleftconf)
487  *porient = L_TEXT_ORIENT_UP;
488  else if (leftconf > minupconf && absleftconf > minratio * absupconf)
489  *porient = L_TEXT_ORIENT_LEFT;
490  else if (upconf < -minupconf && absupconf > minratio * absleftconf)
491  *porient = L_TEXT_ORIENT_DOWN;
492  else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
493  *porient = L_TEXT_ORIENT_RIGHT;
494 
495  if (debug) {
496  lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
497  if (*porient == L_TEXT_ORIENT_UNKNOWN)
498  lept_stderr("Confidence is low; no determination is made\n");
499  else if (*porient == L_TEXT_ORIENT_UP)
500  lept_stderr("Text is rightside-up\n");
501  else if (*porient == L_TEXT_ORIENT_LEFT)
502  lept_stderr("Text is rotated 90 deg ccw\n");
503  else if (*porient == L_TEXT_ORIENT_DOWN)
504  lept_stderr("Text is upside-down\n");
505  else /* *porient == L_TEXT_ORIENT_RIGHT */
506  lept_stderr("Text is rotated 90 deg cw\n");
507  }
508 
509  return 0;
510 }
511 
512 
551 l_ok
553  l_float32 *pconf,
554  l_int32 mincount,
555  l_int32 npixels,
556  l_int32 debug)
557 {
558 l_int32 countup, countdown, nmax;
559 l_float32 nup, ndown;
560 PIX *pix0, *pix1, *pix2, *pix3, *pixm;
561 SEL *sel1, *sel2, *sel3, *sel4;
562 
563  if (!pconf)
564  return ERROR_INT("&conf not defined", __func__, 1);
565  *pconf = 0.0;
566  if (!pixs || pixGetDepth(pixs) != 1)
567  return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
568  if (mincount == 0)
569  mincount = DefaultMinUpDownCount;
570  if (npixels < 0)
571  npixels = 0;
572 
573  if (debug) {
574  lept_mkdir("lept/orient");
575  }
576 
577  sel1 = selCreateFromString(textsel1, 5, 6, NULL);
578  sel2 = selCreateFromString(textsel2, 5, 6, NULL);
579  sel3 = selCreateFromString(textsel3, 5, 6, NULL);
580  sel4 = selCreateFromString(textsel4, 5, 6, NULL);
581 
582  /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
583  * This closes holes in x-height characters and joins them at
584  * the x-height. There is more noise in the descender detection
585  * from this, but it works fairly well. */
586  pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
587 
588  /* Optionally, make a mask of the word bounding boxes, shortening
589  * each of them by a fixed amount at each end. */
590  pixm = NULL;
591  if (npixels > 0) {
592  l_int32 i, nbox, x, y, w, h;
593  BOX *box;
594  BOXA *boxa;
595  pix1 = pixMorphSequence(pix0, "o10.1", 0);
596  boxa = pixConnComp(pix1, NULL, 8);
597  pixm = pixCreateTemplate(pix1);
598  pixDestroy(&pix1);
599  nbox = boxaGetCount(boxa);
600  for (i = 0; i < nbox; i++) {
601  box = boxaGetBox(boxa, i, L_CLONE);
602  boxGetGeometry(box, &x, &y, &w, &h);
603  if (w > 2 * npixels)
604  pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
605  PIX_SET, NULL, 0, 0);
606  boxDestroy(&box);
607  }
608  boxaDestroy(&boxa);
609  }
610 
611  /* Find the ascenders and optionally filter with pixm.
612  * For an explanation of the procedure used for counting the result
613  * of the HMT, see comments at the beginning of this function. */
614  pix1 = pixHMT(NULL, pix0, sel1);
615  pix2 = pixHMT(NULL, pix0, sel2);
616  pixOr(pix1, pix1, pix2);
617  if (pixm)
618  pixAnd(pix1, pix1, pixm);
619  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
620  pixCountPixels(pix3, &countup, NULL);
621  pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
622  pixDestroy(&pix1);
623  pixDestroy(&pix2);
624  pixDestroy(&pix3);
625 
626  /* Find the ascenders and optionally filter with pixm. */
627  pix1 = pixHMT(NULL, pix0, sel3);
628  pix2 = pixHMT(NULL, pix0, sel4);
629  pixOr(pix1, pix1, pix2);
630  if (pixm)
631  pixAnd(pix1, pix1, pixm);
632  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
633  pixCountPixels(pix3, &countdown, NULL);
634  pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
635  pixDestroy(&pix1);
636  pixDestroy(&pix2);
637  pixDestroy(&pix3);
638 
639  /* Evaluate statistically, generating a confidence that is
640  * related to the probability with a gaussian distribution. */
641  nup = (l_float32)(countup);
642  ndown = (l_float32)(countdown);
643  nmax = L_MAX(countup, countdown);
644  if (nmax > mincount)
645  *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
646 
647  if (debug) {
648  if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
649  lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
650  nup, ndown, *pconf);
651  if (*pconf > DefaultMinUpDownConf)
652  lept_stderr("Text is rightside-up\n");
653  if (*pconf < -DefaultMinUpDownConf)
654  lept_stderr("Text is upside-down\n");
655  }
656 
657  pixDestroy(&pix0);
658  pixDestroy(&pixm);
659  selDestroy(&sel1);
660  selDestroy(&sel2);
661  selDestroy(&sel3);
662  selDestroy(&sel4);
663  return 0;
664 }
665 
666 
667 /*----------------------------------------------------------------*
668  * Left-right mirror detection *
669  *----------------------------------------------------------------*/
709 l_ok
711  l_float32 *pconf,
712  l_int32 mincount,
713  l_int32 debug)
714 {
715 l_int32 count1, count2, nmax;
716 l_float32 nleft, nright;
717 PIX *pix0, *pix1, *pix2, *pix3;
718 SEL *sel1, *sel2;
719 
720  if (!pconf)
721  return ERROR_INT("&conf not defined", __func__, 1);
722  *pconf = 0.0;
723  if (!pixs || pixGetDepth(pixs) != 1)
724  return ERROR_INT("pixs not defined or not 1 bpp", __func__, 1);
725  if (mincount == 0)
726  mincount = DefaultMinMirrorFlipCount;
727 
728  if (debug) {
729  lept_mkdir("lept/orient");
730  }
731 
732  sel1 = selCreateFromString(textsel1, 5, 6, NULL);
733  sel2 = selCreateFromString(textsel2, 5, 6, NULL);
734 
735  /* Fill x-height characters but not space between them, sort of. */
736  pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
737  pixXor(pix3, pix3, pixs);
738  pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
739  pixXor(pix0, pix0, pixs);
740  pixAnd(pix0, pix0, pix3);
741  pixOr(pix0, pix0, pixs);
742  pixDestroy(&pix3);
743 
744  /* Filter the right-facing characters. */
745  pix1 = pixHMT(NULL, pix0, sel1);
746  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
747  pixCountPixels(pix3, &count1, NULL);
748  pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
749  pixDestroy(&pix1);
750  pixDestroy(&pix3);
751 
752  /* Filter the left-facing characters. */
753  pix2 = pixHMT(NULL, pix0, sel2);
754  pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
755  pixCountPixels(pix3, &count2, NULL);
756  pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
757  pixDestroy(&pix2);
758  pixDestroy(&pix3);
759 
760  nright = (l_float32)count1;
761  nleft = (l_float32)count2;
762  nmax = L_MAX(count1, count2);
763  pixDestroy(&pix0);
764  selDestroy(&sel1);
765  selDestroy(&sel2);
766 
767  if (nmax > mincount)
768  *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
769 
770  if (debug) {
771  lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
772  if (*pconf > DefaultMinMirrorFlipConf)
773  lept_stderr("Text is not mirror reversed\n");
774  if (*pconf < -DefaultMinMirrorFlipConf)
775  lept_stderr("Text is mirror reversed\n");
776  }
777 
778  return 0;
779 }
780 
781 
782 /*----------------------------------------------------------------*
783  * Static debug helper *
784  *----------------------------------------------------------------*/
785 /*
786  * \brief pixDebugFlipDetect()
787  *
788  * \param[in] filename for output debug file
789  * \param[in] pixs input to pix*Detect
790  * \param[in] pixhm hit-miss result from ascenders or descenders
791  * \param[in] enable 1 to enable this function; 0 to disable
792  * \return void
793  */
794 static void
795 pixDebugFlipDetect(const char *filename,
796  PIX *pixs,
797  PIX *pixhm,
798  l_int32 enable)
799 {
800 PIX *pixt, *pixthm;
801 
802  if (!enable) return;
803 
804  /* Display with red dot at counted locations */
805  pixt = pixConvert1To4Cmap(pixs);
806  pixthm = pixMorphSequence(pixhm, "d5.5", 0);
807  pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
808 
809  pixWriteDebug(filename, pixt, IFF_PNG);
810  pixDestroy(&pixthm);
811  pixDestroy(&pixt);
812  return;
813 }
PIX * pixReduceRankBinaryCascade(PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4)
pixReduceRankBinaryCascade()
Definition: binreduce.c:150
l_ok boxGetGeometry(const BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:301
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:273
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:519
l_int32 boxaGetCount(const BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:661
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:702
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:152
PIX * pixOrientCorrect(PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug)
pixOrientCorrect()
Definition: flipdetect.c:274
l_ok makeOrientDecision(l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
makeOrientDecision()
Definition: flipdetect.c:460
l_ok pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
pixMirrorDetect()
Definition: flipdetect.c:710
l_ok pixUpDownDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
pixUpDownDetect()
Definition: flipdetect.c:552
l_ok pixOrientDetect(PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
pixOrientDetect()
Definition: flipdetect.c:403
PIX * pixHMT(PIX *pixd, PIX *pixs, SEL *sel)
pixHMT()
Definition: morph.c:338
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
Definition: morphseq.c:137
PIX * pixMorphCompSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphCompSequence()
Definition: morphseq.c:302
l_ok pixSetMaskedCmap(PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval)
pixSetMaskedCmap()
Definition: paintcmap.c:686
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:608
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:689
PIX * pixCreateTemplate(const PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:380
l_ok pixCountPixels(PIX *pixs, l_int32 *pcount, l_int32 *tab8)
pixCountPixels()
Definition: pix3.c:1893
PIX * pixOr(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixOr()
Definition: pix3.c:1530
PIX * pixAnd(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixAnd()
Definition: pix3.c:1592
PIX * pixXor(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixXor()
Definition: pix3.c:1654
@ L_CLONE
Definition: pix.h:506
@ L_TEXT_ORIENT_DOWN
Definition: pix.h:787
@ L_TEXT_ORIENT_UP
Definition: pix.h:785
@ L_TEXT_ORIENT_LEFT
Definition: pix.h:786
@ L_TEXT_ORIENT_RIGHT
Definition: pix.h:788
@ L_TEXT_ORIENT_UNKNOWN
Definition: pix.h:784
#define PIX_SET
Definition: pix.h:448
PIX * pixConvert1To4Cmap(PIX *pixs)
pixConvert1To4Cmap()
Definition: pixconv.c:2187
l_ok pixRasterop(PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy)
pixRasterop()
Definition: rop.c:204
PIX * pixRotate90(PIX *pixs, l_int32 direction)
pixRotate90()
Definition: rotateorth.c:162
PIX * pixRotateOrth(PIX *pixs, l_int32 quads)
pixRotateOrth()
Definition: rotateorth.c:75
SEL * selCreateFromString(const char *text, l_int32 h, l_int32 w, const char *name)
selCreateFromString()
Definition: sel1.c:1541
void selDestroy(SEL **psel)
selDestroy()
Definition: sel1.c:336
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:2138