Leptonica  1.83.1
Image processing and image analysis suite
classapp.c File Reference
#include <string.h>
#include "allheaders.h"

Go to the source code of this file.

Macros

#define L_BUF_SIZE   512
 

Functions

static l_int32 testLineAlignmentX (NUMA *na1, NUMA *na2, l_int32 shiftx, l_int32 delx, l_int32 nperline)
 
static l_int32 countAlignedMatches (NUMA *nai1, NUMA *nai2, NUMA *nasx, NUMA *nasy, l_int32 n1, l_int32 n2, l_int32 delx, l_int32 dely, l_int32 nreq, l_int32 *psame, l_int32 debugflag)
 
static void printRowIndices (l_int32 *index1, l_int32 n1, l_int32 *index2, l_int32 n2)
 
l_ok jbCorrelation (const char *dirin, l_float32 thresh, l_float32 weight, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
 
l_ok jbRankHaus (const char *dirin, l_int32 size, l_float32 rank, l_int32 components, const char *rootname, l_int32 firstpage, l_int32 npages, l_int32 renderflag)
 
JBCLASSERjbWordsInTextlines (const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages)
 
l_ok pixGetWordsInTextlines (PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai)
 
l_ok pixGetWordBoxesInTextlines (PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, NUMA **pnai)
 
l_ok pixFindWordAndCharacterBoxes (PIX *pixs, BOX *boxs, l_int32 thresh, BOXA **pboxaw, BOXAA **pboxaac, const char *debugdir)
 
NUMAAboxaExtractSortedPattern (BOXA *boxa, NUMA *na)
 
l_ok numaaCompareImagesByBoxes (NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag)
 

Variables

static const l_int32 JB_WORDS_MIN_WIDTH = 5
 
static const l_int32 JB_WORDS_MIN_HEIGHT = 3
 

Detailed Description


     Top-level jb2 correlation and rank-hausdorff
        l_int32         jbCorrelation()
        l_int32         jbRankHaus()

     Extract and classify words in textline order
        JBCLASSER      *jbWordsInTextlines()
        l_int32         pixGetWordsInTextlines()
        l_int32         pixGetWordBoxesInTextlines()

     Extract word and character bounding boxes
        l_int32         pixFindWordAndCharacterBoxes()

     Use word bounding boxes to compare page images
        NUMAA          *boxaExtractSortedPattern()
        l_int32         numaaCompareImagesByBoxes()
        static l_int32  testLineAlignmentX()
        static l_int32  countAlignedMatches()
        static void     printRowIndices()

Definition in file classapp.c.

Macro Definition Documentation

◆ L_BUF_SIZE

#define L_BUF_SIZE   512

size of filename buffer

Definition at line 59 of file classapp.c.

Function Documentation

◆ boxaExtractSortedPattern()

NUMAA* boxaExtractSortedPattern ( BOXA boxa,
NUMA na 
)

boxaExtractSortedPattern()

Parameters
[in]boxatyp. of word bounding boxes, in textline order
[in]naindex of textline for each box in boxa
Returns
naa NUMAA, where each numa represents one textline, or NULL on error
Notes:
     (1) The input is expected to come from pixGetWordBoxesInTextlines().
     (2) Each numa in the output consists of an average y coordinate
         of the first box in the textline, followed by pairs of
         x coordinates representing the left and right edges of each
         of the boxes in the textline.

Definition at line 658 of file classapp.c.

References boxaGetBox(), boxaGetCount(), boxDestroy(), boxGetGeometry(), L_CLONE, L_INSERT, numaaAddNuma(), numaaCreate(), numaAddNumber(), numaCreate(), and numaGetIValue().

◆ jbCorrelation()

l_ok jbCorrelation ( const char *  dirin,
l_float32  thresh,
l_float32  weight,
l_int32  components,
const char *  rootname,
l_int32  firstpage,
l_int32  npages,
l_int32  renderflag 
)

jbCorrelation()

Parameters
[in]dirindirectory of input images
[in]threshtypically ~0.8
[in]weighttypically ~0.6
[in]componentsJB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
[in]rootnamefor output files
[in]firstpage0-based
[in]npagesuse 0 for all pages in dirin
[in]renderflag1 to render from templates; 0 to skip
Returns
0 if OK, 1 on error
Notes:
     (1) The images must be 1 bpp.  If they are not, you can convert
         them using convertFilesTo1bpp().
     (2) See prog/jbcorrelation for generating more output (e.g.,
         for debugging)

Definition at line 99 of file classapp.c.

References L_BUF_SIZE.

◆ jbRankHaus()

l_ok jbRankHaus ( const char *  dirin,
l_int32  size,
l_float32  rank,
l_int32  components,
const char *  rootname,
l_int32  firstpage,
l_int32  npages,
l_int32  renderflag 
)

jbRankHaus()

Parameters
[in]dirindirectory of input images
[in]sizeof Sel used for dilation; typ. 2
[in]rankrank value of match; typ. 0.97
[in]componentsJB_CONN_COMPS, JB_CHARACTERS, JB_WORDS
[in]rootnamefor output files
[in]firstpage0-based
[in]npagesuse 0 for all pages in dirin
[in]renderflag1 to render from templates; 0 to skip
Returns
0 if OK, 1 on error
Notes:
     (1) See prog/jbrankhaus for generating more output (e.g.,
         for debugging)

Definition at line 179 of file classapp.c.

References L_BUF_SIZE.

◆ jbWordsInTextlines()

JBCLASSER* jbWordsInTextlines ( const char *  dirin,
l_int32  reduction,
l_int32  maxwidth,
l_int32  maxheight,
l_float32  thresh,
l_float32  weight,
NUMA **  pnatl,
l_int32  firstpage,
l_int32  npages 
)

jbWordsInTextlines()

Parameters
[in]dirindirectory of input pages
[in]reduction1 for full res; 2 for half-res
[in]maxwidthof word mask components, to be kept
[in]maxheightof word mask components, to be kept
[in]threshon correlation; 0.80 is reasonable
[in]weightfor handling thick text; 0.6 is reasonable
[out]pnatlnuma with textline index for each component
[in]firstpage0-based
[in]npagesuse 0 for all pages in dirin
Returns
classer for the set of pages
Notes:
     (1) This is a high-level function.  See prog/jbwords for example
         of usage.
     (2) Typically, use input of 75 - 150 ppi for finding words.

Definition at line 265 of file classapp.c.

References getSortedPathnamesInDirectory(), and sarrayGetCount().

◆ numaaCompareImagesByBoxes()

l_ok numaaCompareImagesByBoxes ( NUMAA naa1,
NUMAA naa2,
l_int32  nperline,
l_int32  nreq,
l_int32  maxshiftx,
l_int32  maxshifty,
l_int32  delx,
l_int32  dely,
l_int32 *  psame,
l_int32  debugflag 
)

numaaCompareImagesByBoxes()

Parameters
[in]naa1for image 1, formatted by boxaExtractSortedPattern()
[in]naa2for image 2, formatted by boxaExtractSortedPattern()
[in]nperlinenumber of box regions to be used in each textline
[in]nreqnumber of complete row matches required
[in]maxshiftxmax allowed x shift between two patterns, in pixels
[in]maxshiftymax allowed y shift between two patterns, in pixels
[in]delxmax allowed difference in x data, after alignment
[in]delymax allowed difference in y data, after alignment
[out]psame1 if nreq row matches are found; 0 otherwise
[in]debugflag1 for debug output
Returns
0 if OK, 1 on error
Notes:
     (1) Each input numaa describes a set of sorted bounding boxes
         (sorted by textline and, within each textline, from
         left to right) in the images from which they are derived.
         See boxaExtractSortedPattern() for a description of the data
         format in each of the input numaa.
     (2) This function does an alignment between the input
         descriptions of bounding boxes for two images. The
         input parameter nperline specifies the number of boxes
         to consider in each line when testing for a match, and
         nreq is the required number of lines that must be well-aligned
         to get a match.
     (3) Testing by alignment has 3 steps:
         (a) Generating the location of word bounding boxes from the
             images (prior to calling this function).
         (b) Listing all possible pairs of aligned rows, based on
             tolerances in horizontal and vertical positions of
             the boxes.  Specifically, all pairs of rows are enumerated
             whose first nperline boxes can be brought into close
             alignment, based on the delx parameter for boxes in the
             line and within the overall the maxshiftx and maxshifty
             constraints.
         (c) Each pair, starting with the first, is used to search
             for a set of nreq - 1 other pairs that can all be aligned
             with a difference in global translation of not more
             than (delx, dely).

Definition at line 744 of file classapp.c.

References L_CLONE, numaaGetCount(), numaaGetNuma(), numaCreate(), numaDestroy(), numaGetCount(), and numaGetIValue().

◆ pixFindWordAndCharacterBoxes()

l_ok pixFindWordAndCharacterBoxes ( PIX pixs,
BOX boxs,
l_int32  thresh,
BOXA **  pboxaw,
BOXAA **  pboxaac,
const char *  debugdir 
)

pixFindWordAndCharacterBoxes()

Parameters
[in]pixs2, 4, 8 or 32 bpp; colormap OK; typ. 300 ppi
[in]boxs[optional] region to select in pixs
[in]threshbinarization threshold (typ. 100 - 150)
[out]pboxawreturn the word boxes
[out]pboxaacreturn the character boxes
[in]debugdir[optional] for debug images; use NULL to skip
Returns
0 if OK, 1 on error
Notes:
     (1) If boxs == NULL, the entire input image is used.
     (2) Having an input pix that is not 1bpp is necessary to reduce
         touching characters by using a low binarization threshold.
         Suggested thresholds are between 100 and 150.
     (3) The coordinates in the output boxes are global, with respect
         to the input image.

Definition at line 516 of file classapp.c.

◆ pixGetWordBoxesInTextlines()

l_ok pixGetWordBoxesInTextlines ( PIX pixs,
l_int32  minwidth,
l_int32  minheight,
l_int32  maxwidth,
l_int32  maxheight,
BOXA **  pboxad,
NUMA **  pnai 
)

pixGetWordBoxesInTextlines()

Parameters
[in]pixs1 bpp, typ. 75 - 150 ppi
[in]minwidthof saved components; smaller are discarded
[in]minheightof saved components; smaller are discarded
[in]maxwidthof saved components; larger are discarded
[in]maxheightof saved components; larger are discarded
[out]pboxadword boxes sorted in textline line order
[out]pnai[optional] index of textline for each word
Returns
0 if OK, 1 on error
Notes:
     (1) The input should be at a resolution of between 75 and 150 ppi.
     (2) This is a special version of pixGetWordsInTextlines(), that
         just finds the word boxes in line order, with a numa
         giving the textline index for each word.
         See pixGetWordsInTextlines() for more details.

Definition at line 452 of file classapp.c.

References boxaaDestroy(), boxaaFlattenToBoxa(), boxaDestroy(), boxaSort2d(), L_CLONE, and numaDestroy().

◆ pixGetWordsInTextlines()

l_ok pixGetWordsInTextlines ( PIX pixs,
l_int32  minwidth,
l_int32  minheight,
l_int32  maxwidth,
l_int32  maxheight,
BOXA **  pboxad,
PIXA **  ppixad,
NUMA **  pnai 
)

pixGetWordsInTextlines()

Parameters
[in]pixs1 bpp, typ. 75 - 150 ppi
[in]minwidthof saved components; smaller are discarded
[in]minheightof saved components; smaller are discarded
[in]maxwidthof saved components; larger are discarded
[in]maxheightof saved components; larger are discarded
[out]pboxadword boxes sorted in textline line order
[out]ppixadword images sorted in textline line order
[out]pnaiindex of textline for each word
Returns
0 if OK, 1 on error
Notes:
     (1) The input should be at a resolution of between 75 and 150 ppi.
     (2) The four size constraints on saved components are all
         scaled by reduction.
     (3) The result are word images (and their b.b.), extracted in
         textline order, at either full res or 2x reduction,
         and with a numa giving the textline index for each word.
     (4) The pixa and boxa interfaces should make this type of
         application simple to put together.  The steps are:
          ~ generate first estimate of word masks
          ~ get b.b. of these, and remove the small and big ones
          ~ extract pixa of the word images, using the b.b.
          ~ sort actual word images in textline order (2d)
          ~ flatten them to a pixa (1d), saving the textline index
            for each pix
     (5) In an actual application, it may be desirable to pre-filter
         the input image to remove large components, to extract
         single columns of text, and to deskew them.  For example,
         to remove both large components and small noisy components
         that can interfere with the statistics used to estimate
         parameters for segmenting by words, but still retain text lines,
         the following image preprocessing can be done:
               Pix *pixt = pixMorphSequence(pixs, "c40.1", 0);
               Pix *pixf = pixSelectBySize(pixt, 0, 60, 8,
                                    L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL);
               pixAnd(pixf, pixf, pixs);  // the filtered image
         The closing turns text lines into long blobs, but does not
         significantly increase their height.  But if there are many
         small connected components in a dense texture, this is likely
         to generate tall components that will be eliminated in pixf.

Definition at line 377 of file classapp.c.

References boxaaDestroy(), boxaDestroy(), boxaSort2d(), L_CLONE, L_COPY, numaaDestroy(), pixaaDestroy(), pixaaFlattenToPixa(), pixaCreateFromBoxa(), pixaDestroy(), pixaGetBoxa(), and pixaSort2dByIndex().

Variable Documentation

◆ JB_WORDS_MIN_HEIGHT

const l_int32 JB_WORDS_MIN_HEIGHT = 3
static

min. word height in pixels

Definition at line 61 of file classapp.c.

◆ JB_WORDS_MIN_WIDTH

const l_int32 JB_WORDS_MIN_WIDTH = 5
static

min. word width in pixels

Definition at line 60 of file classapp.c.