Leptonica  1.83.1
Image processing and image analysis suite
numafunc2.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
137 #ifdef HAVE_CONFIG_H
138 #include <config_auto.h>
139 #endif /* HAVE_CONFIG_H */
140 
141 #include <math.h>
142 #include "allheaders.h"
143 
144  /* bin sizes in numaMakeHistogram() */
145 static const l_int32 BinSizeArray[] = {2, 5, 10, 20, 50, 100, 200, 500, 1000,\
146  2000, 5000, 10000, 20000, 50000, 100000, 200000,\
147  500000, 1000000, 2000000, 5000000, 10000000,\
148  200000000, 50000000, 100000000};
149 static const l_int32 NBinSizes = 24;
150 
151 
152 #ifndef NO_CONSOLE_IO
153 #define DEBUG_HISTO 0
154 #define DEBUG_CROSSINGS 0
155 #define DEBUG_FREQUENCY 0
156 #endif /* ~NO_CONSOLE_IO */
157 
158 /*----------------------------------------------------------------------*
159  * Morphological operations *
160  *----------------------------------------------------------------------*/
182 NUMA *
184  l_int32 size)
185 {
186 l_int32 i, j, n, hsize, len;
187 l_float32 minval;
188 l_float32 *fa, *fas, *fad;
189 NUMA *nad;
190 
191  if (!nas)
192  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
193  if (size <= 0)
194  return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL);
195  if ((size & 1) == 0 ) {
196  L_WARNING("sel size must be odd; increasing by 1\n", __func__);
197  size++;
198  }
199 
200  if (size == 1)
201  return numaCopy(nas);
202 
203  /* Make a source fa (fas) that has an added (size / 2) boundary
204  * on left and right, contains a copy of nas in the interior region
205  * (between 'size' and 'size + n', and has large values
206  * inserted in the boundary (because it is an erosion). */
207  n = numaGetCount(nas);
208  hsize = size / 2;
209  len = n + 2 * hsize;
210  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
211  return (NUMA *)ERROR_PTR("fas not made", __func__, NULL);
212  for (i = 0; i < hsize; i++)
213  fas[i] = 1.0e37;
214  for (i = hsize + n; i < len; i++)
215  fas[i] = 1.0e37;
216  fa = numaGetFArray(nas, L_NOCOPY);
217  for (i = 0; i < n; i++)
218  fas[hsize + i] = fa[i];
219 
220  nad = numaMakeConstant(0, n);
221  numaCopyParameters(nad, nas);
222  fad = numaGetFArray(nad, L_NOCOPY);
223  for (i = 0; i < n; i++) {
224  minval = 1.0e37; /* start big */
225  for (j = 0; j < size; j++)
226  minval = L_MIN(minval, fas[i + j]);
227  fad[i] = minval;
228  }
229 
230  LEPT_FREE(fas);
231  return nad;
232 }
233 
234 
249 NUMA *
251  l_int32 size)
252 {
253 l_int32 i, j, n, hsize, len;
254 l_float32 maxval;
255 l_float32 *fa, *fas, *fad;
256 NUMA *nad;
257 
258  if (!nas)
259  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
260  if (size <= 0)
261  return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL);
262  if ((size & 1) == 0 ) {
263  L_WARNING("sel size must be odd; increasing by 1\n", __func__);
264  size++;
265  }
266 
267  if (size == 1)
268  return numaCopy(nas);
269 
270  /* Make a source fa (fas) that has an added (size / 2) boundary
271  * on left and right, contains a copy of nas in the interior region
272  * (between 'size' and 'size + n', and has small values
273  * inserted in the boundary (because it is a dilation). */
274  n = numaGetCount(nas);
275  hsize = size / 2;
276  len = n + 2 * hsize;
277  if ((fas = (l_float32 *)LEPT_CALLOC(len, sizeof(l_float32))) == NULL)
278  return (NUMA *)ERROR_PTR("fas not made", __func__, NULL);
279  for (i = 0; i < hsize; i++)
280  fas[i] = -1.0e37;
281  for (i = hsize + n; i < len; i++)
282  fas[i] = -1.0e37;
283  fa = numaGetFArray(nas, L_NOCOPY);
284  for (i = 0; i < n; i++)
285  fas[hsize + i] = fa[i];
286 
287  nad = numaMakeConstant(0, n);
288  numaCopyParameters(nad, nas);
289  fad = numaGetFArray(nad, L_NOCOPY);
290  for (i = 0; i < n; i++) {
291  maxval = -1.0e37; /* start small */
292  for (j = 0; j < size; j++)
293  maxval = L_MAX(maxval, fas[i + j]);
294  fad[i] = maxval;
295  }
296 
297  LEPT_FREE(fas);
298  return nad;
299 }
300 
301 
316 NUMA *
318  l_int32 size)
319 {
320 NUMA *nat, *nad;
321 
322  if (!nas)
323  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
324  if (size <= 0)
325  return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL);
326  if ((size & 1) == 0 ) {
327  L_WARNING("sel size must be odd; increasing by 1\n", __func__);
328  size++;
329  }
330 
331  if (size == 1)
332  return numaCopy(nas);
333 
334  nat = numaErode(nas, size);
335  nad = numaDilate(nat, size);
336  numaDestroy(&nat);
337  return nad;
338 }
339 
340 
361 NUMA *
363  l_int32 size)
364 {
365 NUMA *nab, *nat1, *nat2, *nad;
366 
367  if (!nas)
368  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
369  if (size <= 0)
370  return (NUMA *)ERROR_PTR("size must be > 0", __func__, NULL);
371  if ((size & 1) == 0 ) {
372  L_WARNING("sel size must be odd; increasing by 1\n", __func__);
373  size++;
374  }
375 
376  if (size == 1)
377  return numaCopy(nas);
378 
379  nab = numaAddBorder(nas, size, size, 0); /* to preserve extensivity */
380  nat1 = numaDilate(nab, size);
381  nat2 = numaErode(nat1, size);
382  nad = numaRemoveBorder(nat2, size, size);
383  numaDestroy(&nab);
384  numaDestroy(&nat1);
385  numaDestroy(&nat2);
386  return nad;
387 }
388 
389 
390 /*----------------------------------------------------------------------*
391  * Other transforms *
392  *----------------------------------------------------------------------*/
406 NUMA *
408  l_float32 shift,
409  l_float32 scale)
410 {
411 l_int32 i, n;
412 l_float32 val;
413 NUMA *nad;
414 
415  if (!nas)
416  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
417  n = numaGetCount(nas);
418  if ((nad = numaCreate(n)) == NULL)
419  return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
420  numaCopyParameters(nad, nas);
421  for (i = 0; i < n; i++) {
422  numaGetFValue(nas, i, &val);
423  val = scale * (val + shift);
424  numaAddNumber(nad, val);
425  }
426  return nad;
427 }
428 
429 
441 l_ok
443  l_int32 first,
444  l_int32 last,
445  l_float32 *pmean,
446  l_float32 *pvar,
447  l_float32 *prvar)
448 {
449 l_int32 i, n, ni;
450 l_float32 sum, sumsq, val, mean, var;
451 
452  if (pmean) *pmean = 0.0;
453  if (pvar) *pvar = 0.0;
454  if (prvar) *prvar = 0.0;
455  if (!pmean && !pvar && !prvar)
456  return ERROR_INT("nothing requested", __func__, 1);
457  if (!na)
458  return ERROR_INT("na not defined", __func__, 1);
459  if ((n = numaGetCount(na)) == 0)
460  return ERROR_INT("na is empty", __func__, 1);
461  first = L_MAX(0, first);
462  if (last < 0) last = n - 1;
463  if (first >= n)
464  return ERROR_INT("invalid first", __func__, 1);
465  if (last >= n) {
466  L_WARNING("last = %d is beyond max index = %d; adjusting\n",
467  __func__, last, n - 1);
468  last = n - 1;
469  }
470  if (first > last)
471  return ERROR_INT("first > last\n", __func__, 1);
472  ni = last - first + 1;
473  sum = sumsq = 0.0;
474  for (i = first; i <= last; i++) {
475  numaGetFValue(na, i, &val);
476  sum += val;
477  sumsq += val * val;
478  }
479 
480  mean = sum / ni;
481  if (pmean)
482  *pmean = mean;
483  if (pvar || prvar) {
484  var = sumsq / ni - mean * mean;
485  if (pvar) *pvar = var;
486  if (prvar) *prvar = sqrtf(var);
487  }
488 
489  return 0;
490 }
491 
492 
524 l_ok
526  l_int32 wc,
527  NUMA **pnam,
528  NUMA **pnams,
529  NUMA **pnav,
530  NUMA **pnarv)
531 {
532 NUMA *nam, *nams;
533 
534  if (!nas)
535  return ERROR_INT("nas not defined", __func__, 1);
536  if (2 * wc + 1 > numaGetCount(nas))
537  L_WARNING("filter wider than input array!\n", __func__);
538 
539  if (!pnav && !pnarv) {
540  if (pnam) *pnam = numaWindowedMean(nas, wc);
541  if (pnams) *pnams = numaWindowedMeanSquare(nas, wc);
542  return 0;
543  }
544 
545  nam = numaWindowedMean(nas, wc);
546  nams = numaWindowedMeanSquare(nas, wc);
547  numaWindowedVariance(nam, nams, pnav, pnarv);
548  if (pnam)
549  *pnam = nam;
550  else
551  numaDestroy(&nam);
552  if (pnams)
553  *pnams = nams;
554  else
555  numaDestroy(&nams);
556  return 0;
557 }
558 
559 
573 NUMA *
575  l_int32 wc)
576 {
577 l_int32 i, n, n1, width;
578 l_float32 sum, norm;
579 l_float32 *fa1, *fad, *suma;
580 NUMA *na1, *nad;
581 
582  if (!nas)
583  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
584  n = numaGetCount(nas);
585  width = 2 * wc + 1; /* filter width */
586  if (width > n)
587  L_WARNING("filter wider than input array!\n", __func__);
588 
589  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
590  n1 = n + 2 * wc;
591  fa1 = numaGetFArray(na1, L_NOCOPY);
592  nad = numaMakeConstant(0, n);
593  fad = numaGetFArray(nad, L_NOCOPY);
594 
595  /* Make sum array; note the indexing */
596  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
597  numaDestroy(&na1);
598  numaDestroy(&nad);
599  return (NUMA *)ERROR_PTR("suma not made", __func__, NULL);
600  }
601  sum = 0.0;
602  suma[0] = 0.0;
603  for (i = 0; i < n1; i++) {
604  sum += fa1[i];
605  suma[i + 1] = sum;
606  }
607 
608  norm = 1. / (2 * wc + 1);
609  for (i = 0; i < n; i++)
610  fad[i] = norm * (suma[width + i] - suma[i]);
611 
612  LEPT_FREE(suma);
613  numaDestroy(&na1);
614  return nad;
615 }
616 
617 
631 NUMA *
633  l_int32 wc)
634 {
635 l_int32 i, n, n1, width;
636 l_float32 sum, norm;
637 l_float32 *fa1, *fad, *suma;
638 NUMA *na1, *nad;
639 
640  if (!nas)
641  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
642  n = numaGetCount(nas);
643  width = 2 * wc + 1; /* filter width */
644  if (width > n)
645  L_WARNING("filter wider than input array!\n", __func__);
646 
647  na1 = numaAddSpecifiedBorder(nas, wc, wc, L_MIRRORED_BORDER);
648  n1 = n + 2 * wc;
649  fa1 = numaGetFArray(na1, L_NOCOPY);
650  nad = numaMakeConstant(0, n);
651  fad = numaGetFArray(nad, L_NOCOPY);
652 
653  /* Make sum array; note the indexing */
654  if ((suma = (l_float32 *)LEPT_CALLOC(n1 + 1, sizeof(l_float32))) == NULL) {
655  numaDestroy(&na1);
656  numaDestroy(&nad);
657  return (NUMA *)ERROR_PTR("suma not made", __func__, NULL);
658  }
659  sum = 0.0;
660  suma[0] = 0.0;
661  for (i = 0; i < n1; i++) {
662  sum += fa1[i] * fa1[i];
663  suma[i + 1] = sum;
664  }
665 
666  norm = 1. / (2 * wc + 1);
667  for (i = 0; i < n; i++)
668  fad[i] = norm * (suma[width + i] - suma[i]);
669 
670  LEPT_FREE(suma);
671  numaDestroy(&na1);
672  return nad;
673 }
674 
675 
697 l_ok
699  NUMA *nams,
700  NUMA **pnav,
701  NUMA **pnarv)
702 {
703 l_int32 i, nm, nms;
704 l_float32 var;
705 l_float32 *fam, *fams, *fav, *farv;
706 NUMA *nav, *narv; /* variance and square root of variance */
707 
708  if (pnav) *pnav = NULL;
709  if (pnarv) *pnarv = NULL;
710  if (!pnav && !pnarv)
711  return ERROR_INT("neither &nav nor &narv are defined", __func__, 1);
712  if (!nam)
713  return ERROR_INT("nam not defined", __func__, 1);
714  if (!nams)
715  return ERROR_INT("nams not defined", __func__, 1);
716  nm = numaGetCount(nam);
717  nms = numaGetCount(nams);
718  if (nm != nms)
719  return ERROR_INT("sizes of nam and nams differ", __func__, 1);
720 
721  if (pnav) {
722  nav = numaMakeConstant(0, nm);
723  *pnav = nav;
724  fav = numaGetFArray(nav, L_NOCOPY);
725  }
726  if (pnarv) {
727  narv = numaMakeConstant(0, nm);
728  *pnarv = narv;
729  farv = numaGetFArray(narv, L_NOCOPY);
730  }
731  fam = numaGetFArray(nam, L_NOCOPY);
732  fams = numaGetFArray(nams, L_NOCOPY);
733 
734  for (i = 0; i < nm; i++) {
735  var = fams[i] - fam[i] * fam[i];
736  if (pnav)
737  fav[i] = var;
738  if (pnarv)
739  farv[i] = sqrtf(var);
740  }
741 
742  return 0;
743 }
744 
745 
763 NUMA *
765  l_int32 halfwin)
766 {
767 l_int32 i, n;
768 l_float32 medval;
769 NUMA *na1, *na2, *nad;
770 
771  if (!nas)
772  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
773  if ((n = numaGetCount(nas)) < 3)
774  return numaCopy(nas);
775  if (halfwin <= 0) {
776  L_ERROR("filter too small; returning a copy\n", __func__);
777  return numaCopy(nas);
778  }
779 
780  if (halfwin > (n - 1) / 2) {
781  halfwin = (n - 1) / 2;
782  L_INFO("reducing filter to halfwin = %d\n", __func__, halfwin);
783  }
784 
785  /* Add a border to both ends */
786  na1 = numaAddSpecifiedBorder(nas, halfwin, halfwin, L_MIRRORED_BORDER);
787 
788  /* Get the median value at the center of each window, corresponding
789  * to locations in the input nas. */
790  nad = numaCreate(n);
791  for (i = 0; i < n; i++) {
792  na2 = numaClipToInterval(na1, i, i + 2 * halfwin);
793  numaGetMedian(na2, &medval);
794  numaAddNumber(nad, medval);
795  numaDestroy(&na2);
796  }
797 
798  numaDestroy(&na1);
799  return nad;
800 }
801 
802 
810 NUMA *
812 {
813 l_int32 i, n, ival;
814 NUMA *nad;
815 
816  if (!nas)
817  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
818 
819  n = numaGetCount(nas);
820  if ((nad = numaCreate(n)) == NULL)
821  return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
822  numaCopyParameters(nad, nas);
823  for (i = 0; i < n; i++) {
824  numaGetIValue(nas, i, &ival);
825  numaAddNumber(nad, ival);
826  }
827  return nad;
828 }
829 
830 
831 /*----------------------------------------------------------------------*
832  * Histogram generation and statistics *
833  *----------------------------------------------------------------------*/
860 NUMA *
862  l_int32 maxbins,
863  l_int32 *pbinsize,
864  l_int32 *pbinstart)
865 {
866 l_int32 i, n, ival, hval;
867 l_int32 iminval, imaxval, range, binsize, nbins, ibin;
868 l_float32 val, ratio;
869 NUMA *nai, *nahist;
870 
871  if (pbinsize) *pbinsize = 0;
872  if (pbinstart) *pbinstart = 0;
873  if (!na)
874  return (NUMA *)ERROR_PTR("na not defined", __func__, NULL);
875  if (maxbins < 1)
876  return (NUMA *)ERROR_PTR("maxbins < 1", __func__, NULL);
877 
878  /* Determine input range */
879  numaGetMin(na, &val, NULL);
880  iminval = (l_int32)(val + 0.5);
881  numaGetMax(na, &val, NULL);
882  imaxval = (l_int32)(val + 0.5);
883  if (pbinstart == NULL) { /* clip negative vals; start from 0 */
884  iminval = 0;
885  if (imaxval < 0)
886  return (NUMA *)ERROR_PTR("all values < 0", __func__, NULL);
887  }
888 
889  /* Determine binsize */
890  range = imaxval - iminval + 1;
891  if (range > maxbins - 1) {
892  ratio = (l_float64)range / (l_float64)maxbins;
893  binsize = 0;
894  for (i = 0; i < NBinSizes; i++) {
895  if (ratio < BinSizeArray[i]) {
896  binsize = BinSizeArray[i];
897  break;
898  }
899  }
900  if (binsize == 0)
901  return (NUMA *)ERROR_PTR("numbers too large", __func__, NULL);
902  } else {
903  binsize = 1;
904  }
905  if (pbinsize) *pbinsize = binsize;
906  nbins = 1 + range / binsize; /* +1 seems to be sufficient */
907 
908  /* Redetermine iminval */
909  if (pbinstart && binsize > 1) {
910  if (iminval >= 0)
911  iminval = binsize * (iminval / binsize);
912  else
913  iminval = binsize * ((iminval - binsize + 1) / binsize);
914  }
915  if (pbinstart) *pbinstart = iminval;
916 
917 #if DEBUG_HISTO
918  lept_stderr(" imaxval = %d, range = %d, nbins = %d\n",
919  imaxval, range, nbins);
920 #endif /* DEBUG_HISTO */
921 
922  /* Use integerized data for input */
923  if ((nai = numaConvertToInt(na)) == NULL)
924  return (NUMA *)ERROR_PTR("nai not made", __func__, NULL);
925  n = numaGetCount(nai);
926 
927  /* Make histogram, converting value in input array
928  * into a bin number for this histogram array. */
929  if ((nahist = numaCreate(nbins)) == NULL) {
930  numaDestroy(&nai);
931  return (NUMA *)ERROR_PTR("nahist not made", __func__, NULL);
932  }
933  numaSetCount(nahist, nbins);
934  numaSetParameters(nahist, iminval, binsize);
935  for (i = 0; i < n; i++) {
936  numaGetIValue(nai, i, &ival);
937  ibin = (ival - iminval) / binsize;
938  if (ibin >= 0 && ibin < nbins) {
939  numaGetIValue(nahist, ibin, &hval);
940  numaSetValue(nahist, ibin, hval + 1.0);
941  }
942  }
943 
944  numaDestroy(&nai);
945  return nahist;
946 }
947 
948 
971 NUMA *
973  l_int32 maxbins)
974 {
975 l_int32 i, n, imin, imax, irange, ibin, ival, allints;
976 l_float32 minval, maxval, range, binsize, fval;
977 NUMA *nah;
978 
979  if (!na)
980  return (NUMA *)ERROR_PTR("na not defined", __func__, NULL);
981  maxbins = L_MAX(1, maxbins);
982 
983  /* Determine input range */
984  numaGetMin(na, &minval, NULL);
985  numaGetMax(na, &maxval, NULL);
986 
987  /* Determine if values are all integers */
988  n = numaGetCount(na);
989  numaHasOnlyIntegers(na, &allints);
990 
991  /* Do simple integer binning if possible */
992  if (allints && (maxval - minval < maxbins)) {
993  imin = (l_int32)minval;
994  imax = (l_int32)maxval;
995  irange = imax - imin + 1;
996  nah = numaCreate(irange);
997  numaSetCount(nah, irange); /* init */
998  numaSetParameters(nah, minval, 1.0);
999  for (i = 0; i < n; i++) {
1000  numaGetIValue(na, i, &ival);
1001  ibin = ival - imin;
1002  numaGetIValue(nah, ibin, &ival);
1003  numaSetValue(nah, ibin, ival + 1.0);
1004  }
1005 
1006  return nah;
1007  }
1008 
1009  /* Do float binning, even if the data is integers. */
1010  range = maxval - minval;
1011  binsize = range / (l_float32)maxbins;
1012  if (range == 0.0) {
1013  nah = numaCreate(1);
1014  numaSetParameters(nah, minval, binsize);
1015  numaAddNumber(nah, n);
1016  return nah;
1017  }
1018  nah = numaCreate(maxbins);
1019  numaSetCount(nah, maxbins);
1020  numaSetParameters(nah, minval, binsize);
1021  for (i = 0; i < n; i++) {
1022  numaGetFValue(na, i, &fval);
1023  ibin = (l_int32)((fval - minval) / binsize);
1024  ibin = L_MIN(ibin, maxbins - 1); /* "edge" case; stay in bounds */
1025  numaGetIValue(nah, ibin, &ival);
1026  numaSetValue(nah, ibin, ival + 1.0);
1027  }
1028 
1029  return nah;
1030 }
1031 
1032 
1053 NUMA *
1055  l_float32 binsize,
1056  l_float32 maxsize)
1057 {
1058 l_int32 i, n, nbins, ival, ibin;
1059 l_float32 val, maxval;
1060 NUMA *nad;
1061 
1062  if (!na)
1063  return (NUMA *)ERROR_PTR("na not defined", __func__, NULL);
1064  if (binsize <= 0.0)
1065  return (NUMA *)ERROR_PTR("binsize must be > 0.0", __func__, NULL);
1066  if (binsize > maxsize)
1067  binsize = maxsize; /* just one bin */
1068 
1069  numaGetMax(na, &maxval, NULL);
1070  n = numaGetCount(na);
1071  maxsize = L_MIN(maxsize, maxval);
1072  nbins = (l_int32)(maxsize / binsize) + 1;
1073 
1074 /* lept_stderr("maxsize = %7.3f, nbins = %d\n", maxsize, nbins); */
1075 
1076  if ((nad = numaCreate(nbins)) == NULL)
1077  return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
1078  numaSetParameters(nad, 0.0, binsize);
1079  numaSetCount(nad, nbins); /* interpret zeroes in bins as data */
1080  for (i = 0; i < n; i++) {
1081  numaGetFValue(na, i, &val);
1082  ibin = (l_int32)(val / binsize);
1083  if (ibin >= 0 && ibin < nbins) {
1084  numaGetIValue(nad, ibin, &ival);
1085  numaSetValue(nad, ibin, ival + 1.0);
1086  }
1087  }
1088 
1089  return nad;
1090 }
1091 
1092 
1100 NUMA *
1102  l_int32 newsize)
1103 {
1104 l_int32 i, j, ns, nd, index, count, val;
1105 l_float32 start, oldsize;
1106 NUMA *nad;
1107 
1108  if (!nas)
1109  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1110  if (newsize <= 1)
1111  return (NUMA *)ERROR_PTR("newsize must be > 1", __func__, NULL);
1112  if ((ns = numaGetCount(nas)) == 0)
1113  return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL);
1114 
1115  nd = (ns + newsize - 1) / newsize;
1116  if ((nad = numaCreate(nd)) == NULL)
1117  return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
1118  numaGetParameters(nad, &start, &oldsize);
1119  numaSetParameters(nad, start, oldsize * newsize);
1120 
1121  for (i = 0; i < nd; i++) { /* new bins */
1122  count = 0;
1123  index = i * newsize;
1124  for (j = 0; j < newsize; j++) {
1125  if (index < ns) {
1126  numaGetIValue(nas, index, &val);
1127  count += val;
1128  index++;
1129  }
1130  }
1131  numaAddNumber(nad, count);
1132  }
1133 
1134  return nad;
1135 }
1136 
1137 
1146 NUMA *
1148  l_float32 tsum)
1149 {
1150 l_int32 i, ns;
1151 l_float32 sum, factor, fval;
1152 NUMA *nad;
1153 
1154  if (!nas)
1155  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
1156  if (tsum <= 0.0)
1157  return (NUMA *)ERROR_PTR("tsum must be > 0.0", __func__, NULL);
1158  if ((ns = numaGetCount(nas)) == 0)
1159  return (NUMA *)ERROR_PTR("no bins in nas", __func__, NULL);
1160 
1161  numaGetSum(nas, &sum);
1162  factor = tsum / sum;
1163 
1164  if ((nad = numaCreate(ns)) == NULL)
1165  return (NUMA *)ERROR_PTR("nad not made", __func__, NULL);
1166  numaCopyParameters(nad, nas);
1167 
1168  for (i = 0; i < ns; i++) {
1169  numaGetFValue(nas, i, &fval);
1170  fval *= factor;
1171  numaAddNumber(nad, fval);
1172  }
1173 
1174  return nad;
1175 }
1176 
1177 
1226 l_ok
1228  l_int32 maxbins,
1229  l_float32 *pmin,
1230  l_float32 *pmax,
1231  l_float32 *pmean,
1232  l_float32 *pvariance,
1233  l_float32 *pmedian,
1234  l_float32 rank,
1235  l_float32 *prval,
1236  NUMA **phisto)
1237 {
1238 l_int32 i, n;
1239 l_float32 minval, maxval, fval, mean, sum;
1240 NUMA *nah;
1241 
1242  if (pmin) *pmin = 0.0;
1243  if (pmax) *pmax = 0.0;
1244  if (pmean) *pmean = 0.0;
1245  if (pvariance) *pvariance = 0.0;
1246  if (pmedian) *pmedian = 0.0;
1247  if (prval) *prval = 0.0;
1248  if (phisto) *phisto = NULL;
1249  if (!na)
1250  return ERROR_INT("na not defined", __func__, 1);
1251  if ((n = numaGetCount(na)) == 0)
1252  return ERROR_INT("numa is empty", __func__, 1);
1253 
1254  numaGetMin(na, &minval, NULL);
1255  numaGetMax(na, &maxval, NULL);
1256  if (pmin) *pmin = minval;
1257  if (pmax) *pmax = maxval;
1258  if (pmean || pvariance) {
1259  sum = 0.0;
1260  for (i = 0; i < n; i++) {
1261  numaGetFValue(na, i, &fval);
1262  sum += fval;
1263  }
1264  mean = sum / (l_float32)n;
1265  if (pmean) *pmean = mean;
1266  }
1267  if (pvariance) {
1268  sum = 0.0;
1269  for (i = 0; i < n; i++) {
1270  numaGetFValue(na, i, &fval);
1271  sum += fval * fval;
1272  }
1273  *pvariance = sum / (l_float32)n - mean * mean;
1274  }
1275 
1276  if (!pmedian && !prval && !phisto)
1277  return 0;
1278 
1279  nah = numaMakeHistogramAuto(na, maxbins);
1280  if (pmedian)
1281  numaHistogramGetValFromRank(nah, 0.5, pmedian);
1282  if (prval)
1283  numaHistogramGetValFromRank(nah, rank, prval);
1284  if (phisto)
1285  *phisto = nah;
1286  else
1287  numaDestroy(&nah);
1288  return 0;
1289 }
1290 
1291 
1315 l_ok
1317  l_float32 startx,
1318  l_float32 deltax,
1319  l_float32 *pxmean,
1320  l_float32 *pxmedian,
1321  l_float32 *pxmode,
1322  l_float32 *pxvariance)
1323 {
1324  if (pxmean) *pxmean = 0.0;
1325  if (pxmedian) *pxmedian = 0.0;
1326  if (pxmode) *pxmode = 0.0;
1327  if (pxvariance) *pxvariance = 0.0;
1328  if (!nahisto)
1329  return ERROR_INT("nahisto not defined", __func__, 1);
1330 
1331  return numaGetHistogramStatsOnInterval(nahisto, startx, deltax, 0, -1,
1332  pxmean, pxmedian, pxmode,
1333  pxvariance);
1334 }
1335 
1336 
1362 l_ok
1364  l_float32 startx,
1365  l_float32 deltax,
1366  l_int32 ifirst,
1367  l_int32 ilast,
1368  l_float32 *pxmean,
1369  l_float32 *pxmedian,
1370  l_float32 *pxmode,
1371  l_float32 *pxvariance)
1372 {
1373 l_int32 i, n, imax;
1374 l_float32 sum, sumval, halfsum, moment, var, x, y, ymax;
1375 
1376  if (pxmean) *pxmean = 0.0;
1377  if (pxmedian) *pxmedian = 0.0;
1378  if (pxmode) *pxmode = 0.0;
1379  if (pxvariance) *pxvariance = 0.0;
1380  if (!nahisto)
1381  return ERROR_INT("nahisto not defined", __func__, 1);
1382  if (!pxmean && !pxmedian && !pxmode && !pxvariance)
1383  return ERROR_INT("nothing to compute", __func__, 1);
1384 
1385  n = numaGetCount(nahisto);
1386  ifirst = L_MAX(0, ifirst);
1387  if (ilast < 0) ilast = n - 1;
1388  if (ifirst >= n)
1389  return ERROR_INT("invalid ifirst", __func__, 1);
1390  if (ilast >= n) {
1391  L_WARNING("ilast = %d is beyond max index = %d; adjusting\n",
1392  __func__, ilast, n - 1);
1393  ilast = n - 1;
1394  }
1395  if (ifirst > ilast)
1396  return ERROR_INT("ifirst > ilast", __func__, 1);
1397  for (sum = 0.0, moment = 0.0, var = 0.0, i = ifirst; i <= ilast ; i++) {
1398  x = startx + i * deltax;
1399  numaGetFValue(nahisto, i, &y);
1400  sum += y;
1401  moment += x * y;
1402  var += x * x * y;
1403  }
1404  if (sum == 0.0) {
1405  L_INFO("sum is 0\n", __func__);
1406  return 0;
1407  }
1408 
1409  if (pxmean)
1410  *pxmean = moment / sum;
1411  if (pxvariance)
1412  *pxvariance = var / sum - moment * moment / (sum * sum);
1413 
1414  if (pxmedian) {
1415  halfsum = sum / 2.0;
1416  for (sumval = 0.0, i = ifirst; i <= ilast; i++) {
1417  numaGetFValue(nahisto, i, &y);
1418  sumval += y;
1419  if (sumval >= halfsum) {
1420  *pxmedian = startx + i * deltax;
1421  break;
1422  }
1423  }
1424  }
1425 
1426  if (pxmode) {
1427  imax = -1;
1428  ymax = -1.0e10;
1429  for (i = ifirst; i <= ilast; i++) {
1430  numaGetFValue(nahisto, i, &y);
1431  if (y > ymax) {
1432  ymax = y;
1433  imax = i;
1434  }
1435  }
1436  *pxmode = startx + imax * deltax;
1437  }
1438 
1439  return 0;
1440 }
1441 
1442 
1454 l_ok
1455 numaMakeRankFromHistogram(l_float32 startx,
1456  l_float32 deltax,
1457  NUMA *nasy,
1458  l_int32 npts,
1459  NUMA **pnax,
1460  NUMA **pnay)
1461 {
1462 l_int32 i, n;
1463 l_float32 sum, fval;
1464 NUMA *nan, *nar;
1465 
1466  if (pnax) *pnax = NULL;
1467  if (!pnay)
1468  return ERROR_INT("&nay not defined", __func__, 1);
1469  *pnay = NULL;
1470  if (!nasy)
1471  return ERROR_INT("nasy not defined", __func__, 1);
1472  if ((n = numaGetCount(nasy)) == 0)
1473  return ERROR_INT("no bins in nas", __func__, 1);
1474 
1475  /* Normalize and generate the rank array corresponding to
1476  * the binned histogram. */
1477  nan = numaNormalizeHistogram(nasy, 1.0);
1478  nar = numaCreate(n + 1); /* rank numa corresponding to nan */
1479  sum = 0.0;
1480  numaAddNumber(nar, sum); /* first element is 0.0 */
1481  for (i = 0; i < n; i++) {
1482  numaGetFValue(nan, i, &fval);
1483  sum += fval;
1484  numaAddNumber(nar, sum);
1485  }
1486 
1487  /* Compute rank array on full range with specified
1488  * number of points and correspondence to x-values. */
1489  numaInterpolateEqxInterval(startx, deltax, nar, L_LINEAR_INTERP,
1490  startx, startx + n * deltax, npts,
1491  pnax, pnay);
1492  numaDestroy(&nan);
1493  numaDestroy(&nar);
1494  return 0;
1495 }
1496 
1497 
1520 l_ok
1522  l_float32 rval,
1523  l_float32 *prank)
1524 {
1525 l_int32 i, ibinval, n;
1526 l_float32 startval, binsize, binval, maxval, fractval, total, sum, val;
1527 
1528  if (!prank)
1529  return ERROR_INT("prank not defined", __func__, 1);
1530  *prank = 0.0;
1531  if (!na)
1532  return ERROR_INT("na not defined", __func__, 1);
1533  numaGetParameters(na, &startval, &binsize);
1534  n = numaGetCount(na);
1535  if (rval < startval)
1536  return 0;
1537  maxval = startval + n * binsize;
1538  if (rval > maxval) {
1539  *prank = 1.0;
1540  return 0;
1541  }
1542 
1543  binval = (rval - startval) / binsize;
1544  ibinval = (l_int32)binval;
1545  if (ibinval >= n) {
1546  *prank = 1.0;
1547  return 0;
1548  }
1549  fractval = binval - (l_float32)ibinval;
1550 
1551  sum = 0.0;
1552  for (i = 0; i < ibinval; i++) {
1553  numaGetFValue(na, i, &val);
1554  sum += val;
1555  }
1556  numaGetFValue(na, ibinval, &val);
1557  sum += fractval * val;
1558  numaGetSum(na, &total);
1559  *prank = sum / total;
1560 
1561 /* lept_stderr("binval = %7.3f, rank = %7.3f\n", binval, *prank); */
1562 
1563  return 0;
1564 }
1565 
1566 
1589 l_ok
1591  l_float32 rank,
1592  l_float32 *prval)
1593 {
1594 l_int32 i, n;
1595 l_float32 startval, binsize, rankcount, total, sum, fract, val;
1596 
1597  if (!prval)
1598  return ERROR_INT("prval not defined", __func__, 1);
1599  *prval = 0.0;
1600  if (!na)
1601  return ERROR_INT("na not defined", __func__, 1);
1602  if (rank < 0.0) {
1603  L_WARNING("rank < 0; setting to 0.0\n", __func__);
1604  rank = 0.0;
1605  }
1606  if (rank > 1.0) {
1607  L_WARNING("rank > 1.0; setting to 1.0\n", __func__);
1608  rank = 1.0;
1609  }
1610 
1611  n = numaGetCount(na);
1612  numaGetParameters(na, &startval, &binsize);
1613  numaGetSum(na, &total);
1614  rankcount = rank * total; /* count that corresponds to rank */
1615  sum = 0.0;
1616  for (i = 0; i < n; i++) {
1617  numaGetFValue(na, i, &val);
1618  if (sum + val >= rankcount)
1619  break;
1620  sum += val;
1621  }
1622  if (val <= 0.0) /* can == 0 if rank == 0.0 */
1623  fract = 0.0;
1624  else /* sum + fract * val = rankcount */
1625  fract = (rankcount - sum) / val;
1626 
1627  /* The use of the fraction of a bin allows a simple calculation
1628  * for the histogram value at the given rank. */
1629  *prval = startval + binsize * ((l_float32)i + fract);
1630 
1631 /* lept_stderr("rank = %7.3f, val = %7.3f\n", rank, *prval); */
1632 
1633  return 0;
1634 }
1635 
1636 
1659 l_ok
1661  l_int32 nbins,
1662  NUMA **pnabinval)
1663 {
1664 NUMA *nabinval; /* average gray value in the bins */
1665 NUMA *naeach;
1666 l_int32 i, ntot, bincount, binindex, binsize;
1667 l_float32 sum, val, ave;
1668 
1669  if (!pnabinval)
1670  return ERROR_INT("&nabinval not defined", __func__, 1);
1671  *pnabinval = NULL;
1672  if (!na)
1673  return ERROR_INT("na not defined", __func__, 1);
1674  if (nbins < 2)
1675  return ERROR_INT("nbins must be > 1", __func__, 1);
1676 
1677  /* Get the number of items in each bin */
1678  ntot = numaGetCount(na);
1679  if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1680  return ERROR_INT("naeach not made", __func__, 1);
1681 
1682  /* Get the average value in each bin */
1683  sum = 0.0;
1684  bincount = 0;
1685  binindex = 0;
1686  numaGetIValue(naeach, 0, &binsize);
1687  nabinval = numaCreate(nbins);
1688  for (i = 0; i < ntot; i++) {
1689  numaGetFValue(na, i, &val);
1690  bincount++;
1691  sum += val;
1692  if (bincount == binsize) { /* add bin entry */
1693  ave = sum / binsize;
1694  numaAddNumber(nabinval, ave);
1695  sum = 0.0;
1696  bincount = 0;
1697  binindex++;
1698  if (binindex == nbins) break;
1699  numaGetIValue(naeach, binindex, &binsize);
1700  }
1701  }
1702  *pnabinval = nabinval;
1703 
1704  numaDestroy(&naeach);
1705  return 0;
1706 }
1707 
1708 
1734 l_ok
1736  l_int32 nbins,
1737  NUMA **pnabinval,
1738  NUMA **pnarank)
1739 {
1740 NUMA *nabinval; /* average gray value in the bins */
1741 NUMA *naeach, *nan;
1742 l_int32 i, j, nxvals, occup, count, bincount, binindex, binsize;
1743 l_float32 sum, ave, ntot;
1744 
1745  if (pnarank) *pnarank = NULL;
1746  if (!pnabinval)
1747  return ERROR_INT("&nabinval not defined", __func__, 1);
1748  *pnabinval = NULL;
1749  if (!na)
1750  return ERROR_INT("na not defined", __func__, 1);
1751  if (nbins < 2)
1752  return ERROR_INT("nbins must be > 1", __func__, 1);
1753 
1754  nxvals = numaGetCount(na);
1755  numaGetSum(na, &ntot);
1756  occup = ntot / nxvals;
1757  if (occup < 1) L_INFO("average occupancy %d < 1\n", __func__, occup);
1758 
1759  /* Get the number of items in each bin */
1760  if ((naeach = numaGetUniformBinSizes(ntot, nbins)) == NULL)
1761  return ERROR_INT("naeach not made", __func__, 1);
1762 
1763  /* Get the average value in each bin */
1764  sum = 0.0;
1765  bincount = 0;
1766  binindex = 0;
1767  numaGetIValue(naeach, 0, &binsize);
1768  nabinval = numaCreate(nbins);
1769  for (i = 0; i < nxvals; i++) {
1770  numaGetIValue(na, i, &count);
1771  for (j = 0; j < count; j++) {
1772  bincount++;
1773  sum += i;
1774  if (bincount == binsize) { /* add bin entry */
1775  ave = sum / binsize;
1776  numaAddNumber(nabinval, ave);
1777  sum = 0.0;
1778  bincount = 0;
1779  binindex++;
1780  if (binindex == nbins) break;
1781  numaGetIValue(naeach, binindex, &binsize);
1782  }
1783  }
1784  if (binindex == nbins) break;
1785  }
1786  *pnabinval = nabinval;
1787  if (binindex != nbins)
1788  L_ERROR("binindex = %d != nbins = %d\n", __func__, binindex, nbins);
1789 
1790  /* Get cumulative normalized histogram (rank[gray value]).
1791  * This is the partial sum operating on the normalized histogram. */
1792  if (pnarank) {
1793  nan = numaNormalizeHistogram(na, 1.0);
1794  *pnarank = numaGetPartialSums(nan);
1795  numaDestroy(&nan);
1796  }
1797  numaDestroy(&naeach);
1798  return 0;
1799 }
1800 
1801 
1820 l_ok
1822  l_int32 nbins,
1823  NUMA **pnam)
1824 {
1825 NUMA *na1;
1826 l_int32 maxbins, type;
1827 l_float32 maxval, delx;
1828 
1829  if (!pnam)
1830  return ERROR_INT("&pnam not defined", __func__, 1);
1831  *pnam = NULL;
1832  if (!na)
1833  return ERROR_INT("na not defined", __func__, 1);
1834  if (numaGetCount(na) == 0)
1835  return ERROR_INT("na is empty", __func__, 1);
1836  if (nbins < 2)
1837  return ERROR_INT("nbins must be > 1", __func__, 1);
1838 
1839  /* Choose between sorted array and a histogram.
1840  * If the input array is has a small number of numbers with
1841  * a large maximum, we will sort it. At the other extreme, if
1842  * the array has many numbers with a small maximum, such as the
1843  * values of pixels in an 8 bpp grayscale image, generate a histogram.
1844  * If type comes back as L_BIN_SORT, use a histogram. */
1845  type = numaChooseSortType(na);
1846  if (type == L_SHELL_SORT) { /* sort the array */
1847  L_INFO("sort the array: input size = %d\n", __func__, numaGetCount(na));
1848  na1 = numaSort(NULL, na, L_SORT_INCREASING);
1849  numaDiscretizeSortedInBins(na1, nbins, pnam);
1850  numaDestroy(&na1);
1851  return 0;
1852  }
1853 
1854  /* Make the histogram. Assuming there are no negative values
1855  * in the array, if the max value in the array does not exceed
1856  * about 100000, the bin size for generating the histogram will
1857  * be 1; maxbins refers to the number of entries in the histogram. */
1858  L_INFO("use a histogram: input size = %d\n", __func__, numaGetCount(na));
1859  numaGetMax(na, &maxval, NULL);
1860  maxbins = L_MIN(100002, (l_int32)maxval + 2);
1861  na1 = numaMakeHistogram(na, maxbins, NULL, NULL);
1862 
1863  /* Warn if there is a scale change. This shouldn't happen
1864  * unless the max value is above 100000. */
1865  numaGetParameters(na1, NULL, &delx);
1866  if (delx > 1.0)
1867  L_WARNING("scale change: delx = %6.2f\n", __func__, delx);
1868 
1869  /* Rank bin the results */
1870  numaDiscretizeHistoInBins(na1, nbins, pnam, NULL);
1871  numaDestroy(&na1);
1872  return 0;
1873 }
1874 
1875 
1889 NUMA *
1890 numaGetUniformBinSizes(l_int32 ntotal,
1891  l_int32 nbins)
1892 {
1893 l_int32 i, start, end;
1894 NUMA *naeach;
1895 
1896  if (ntotal <= 0)
1897  return (NUMA *)ERROR_PTR("ntotal <= 0", __func__, NULL);
1898  if (nbins <= 0)
1899  return (NUMA *)ERROR_PTR("nbins <= 0", __func__, NULL);
1900 
1901  if ((naeach = numaCreate(nbins)) == NULL)
1902  return (NUMA *)ERROR_PTR("naeach not made", __func__, NULL);
1903  start = 0;
1904  for (i = 0; i < nbins; i++) {
1905  end = ntotal * (i + 1) / nbins;
1906  numaAddNumber(naeach, end - start);
1907  start = end;
1908  }
1909  return naeach;
1910 }
1911 
1912 
1913 /*----------------------------------------------------------------------*
1914  * Splitting a distribution *
1915  *----------------------------------------------------------------------*/
1965 l_ok
1967  l_float32 scorefract,
1968  l_int32 *psplitindex,
1969  l_float32 *pave1,
1970  l_float32 *pave2,
1971  l_float32 *pnum1,
1972  l_float32 *pnum2,
1973  NUMA **pnascore)
1974 {
1975 l_int32 i, n, bestsplit, minrange, maxrange, maxindex;
1976 l_float32 ave1, ave2, ave1prev, ave2prev;
1977 l_float32 num1, num2, num1prev, num2prev;
1978 l_float32 val, minval, sum, fract1;
1979 l_float32 norm, score, minscore, maxscore;
1980 NUMA *nascore, *naave1, *naave2, *nanum1, *nanum2;
1981 
1982  if (psplitindex) *psplitindex = 0;
1983  if (pave1) *pave1 = 0.0;
1984  if (pave2) *pave2 = 0.0;
1985  if (pnum1) *pnum1 = 0.0;
1986  if (pnum2) *pnum2 = 0.0;
1987  if (pnascore) *pnascore = NULL;
1988  if (!na)
1989  return ERROR_INT("na not defined", __func__, 1);
1990 
1991  n = numaGetCount(na);
1992  if (n <= 1)
1993  return ERROR_INT("n = 1 in histogram", __func__, 1);
1994  numaGetSum(na, &sum);
1995  if (sum <= 0.0)
1996  return ERROR_INT("sum <= 0.0", __func__, 1);
1997  norm = 4.0 / ((l_float32)(n - 1) * (n - 1));
1998  ave1prev = 0.0;
1999  numaGetHistogramStats(na, 0.0, 1.0, &ave2prev, NULL, NULL, NULL);
2000  num1prev = 0.0;
2001  num2prev = sum;
2002  maxindex = n / 2; /* initialize with something */
2003 
2004  /* Split the histogram with [0 ... i] in the lower part
2005  * and [i+1 ... n-1] in upper part. First, compute an otsu
2006  * score for each possible splitting. */
2007  if ((nascore = numaCreate(n)) == NULL)
2008  return ERROR_INT("nascore not made", __func__, 1);
2009  naave1 = (pave1) ? numaCreate(n) : NULL;
2010  naave2 = (pave2) ? numaCreate(n) : NULL;
2011  nanum1 = (pnum1) ? numaCreate(n) : NULL;
2012  nanum2 = (pnum2) ? numaCreate(n) : NULL;
2013  maxscore = 0.0;
2014  for (i = 0; i < n; i++) {
2015  numaGetFValue(na, i, &val);
2016  num1 = num1prev + val;
2017  if (num1 == 0)
2018  ave1 = ave1prev;
2019  else
2020  ave1 = (num1prev * ave1prev + i * val) / num1;
2021  num2 = num2prev - val;
2022  if (num2 == 0)
2023  ave2 = ave2prev;
2024  else
2025  ave2 = (num2prev * ave2prev - i * val) / num2;
2026  fract1 = num1 / sum;
2027  score = norm * (fract1 * (1 - fract1)) * (ave2 - ave1) * (ave2 - ave1);
2028  numaAddNumber(nascore, score);
2029  if (pave1) numaAddNumber(naave1, ave1);
2030  if (pave2) numaAddNumber(naave2, ave2);
2031  if (pnum1) numaAddNumber(nanum1, num1);
2032  if (pnum2) numaAddNumber(nanum2, num2);
2033  if (score > maxscore) {
2034  maxscore = score;
2035  maxindex = i;
2036  }
2037  num1prev = num1;
2038  num2prev = num2;
2039  ave1prev = ave1;
2040  ave2prev = ave2;
2041  }
2042 
2043  /* Next, for all contiguous scores within a specified fraction
2044  * of the max, choose the split point as the value with the
2045  * minimum in the histogram. */
2046  minscore = (1. - scorefract) * maxscore;
2047  for (i = maxindex - 1; i >= 0; i--) {
2048  numaGetFValue(nascore, i, &val);
2049  if (val < minscore)
2050  break;
2051  }
2052  minrange = i + 1;
2053  for (i = maxindex + 1; i < n; i++) {
2054  numaGetFValue(nascore, i, &val);
2055  if (val < minscore)
2056  break;
2057  }
2058  maxrange = i - 1;
2059  numaGetFValue(na, minrange, &minval);
2060  bestsplit = minrange;
2061  for (i = minrange + 1; i <= maxrange; i++) {
2062  numaGetFValue(na, i, &val);
2063  if (val < minval) {
2064  minval = val;
2065  bestsplit = i;
2066  }
2067  }
2068 
2069  /* Add one to the bestsplit value to get the threshold value,
2070  * because when we take a threshold, as in pixThresholdToBinary(),
2071  * we always choose the set with values below the threshold. */
2072  bestsplit = L_MIN(255, bestsplit + 1);
2073 
2074  if (psplitindex) *psplitindex = bestsplit;
2075  if (pave1) numaGetFValue(naave1, bestsplit, pave1);
2076  if (pave2) numaGetFValue(naave2, bestsplit, pave2);
2077  if (pnum1) numaGetFValue(nanum1, bestsplit, pnum1);
2078  if (pnum2) numaGetFValue(nanum2, bestsplit, pnum2);
2079 
2080  if (pnascore) { /* debug mode */
2081  lept_stderr("minrange = %d, maxrange = %d\n", minrange, maxrange);
2082  lept_stderr("minval = %10.0f\n", minval);
2083  gplotSimple1(nascore, GPLOT_PNG, "/tmp/lept/nascore",
2084  "Score for split distribution");
2085  *pnascore = nascore;
2086  } else {
2087  numaDestroy(&nascore);
2088  }
2089 
2090  if (pave1) numaDestroy(&naave1);
2091  if (pave2) numaDestroy(&naave2);
2092  if (pnum1) numaDestroy(&nanum1);
2093  if (pnum2) numaDestroy(&nanum2);
2094  return 0;
2095 }
2096 
2097 
2098 /*----------------------------------------------------------------------*
2099  * Comparing histograms *
2100  *----------------------------------------------------------------------*/
2125 l_ok
2127  NUMAA *naa2,
2128  NUMA **pnad)
2129 {
2130 l_int32 i, n, nt;
2131 l_float32 dist;
2132 NUMA *na1, *na2, *nad;
2133 
2134  if (!pnad)
2135  return ERROR_INT("&nad not defined", __func__, 1);
2136  *pnad = NULL;
2137  if (!naa1 || !naa2)
2138  return ERROR_INT("na1 and na2 not both defined", __func__, 1);
2139  n = numaaGetCount(naa1);
2140  if (n != numaaGetCount(naa2))
2141  return ERROR_INT("naa1 and naa2 numa counts differ", __func__, 1);
2142  nt = numaaGetNumberCount(naa1);
2143  if (nt != numaaGetNumberCount(naa2))
2144  return ERROR_INT("naa1 and naa2 number counts differ", __func__, 1);
2145  if (256 * n != nt) /* good enough check */
2146  return ERROR_INT("na sizes must be 256", __func__, 1);
2147 
2148  nad = numaCreate(n);
2149  *pnad = nad;
2150  for (i = 0; i < n; i++) {
2151  na1 = numaaGetNuma(naa1, i, L_CLONE);
2152  na2 = numaaGetNuma(naa2, i, L_CLONE);
2153  numaEarthMoverDistance(na1, na2, &dist);
2154  numaAddNumber(nad, dist / 255.); /* normalize to [0.0 - 1.0] */
2155  numaDestroy(&na1);
2156  numaDestroy(&na2);
2157  }
2158  return 0;
2159 }
2160 
2161 
2189 l_ok
2191  NUMA *na2,
2192  l_float32 *pdist)
2193 {
2194 l_int32 n, norm, i;
2195 l_float32 sum1, sum2, diff, total;
2196 l_float32 *array1, *array3;
2197 NUMA *na3;
2198 
2199  if (!pdist)
2200  return ERROR_INT("&dist not defined", __func__, 1);
2201  *pdist = 0.0;
2202  if (!na1 || !na2)
2203  return ERROR_INT("na1 and na2 not both defined", __func__, 1);
2204  n = numaGetCount(na1);
2205  if (n != numaGetCount(na2))
2206  return ERROR_INT("na1 and na2 have different size", __func__, 1);
2207 
2208  /* Generate na3; normalize to na1 if necessary */
2209  numaGetSum(na1, &sum1);
2210  numaGetSum(na2, &sum2);
2211  norm = (L_ABS(sum1 - sum2) < 0.00001 * L_ABS(sum1)) ? 1 : 0;
2212  if (!norm)
2213  na3 = numaTransform(na2, 0, sum1 / sum2);
2214  else
2215  na3 = numaCopy(na2);
2216  array1 = numaGetFArray(na1, L_NOCOPY);
2217  array3 = numaGetFArray(na3, L_NOCOPY);
2218 
2219  /* Move earth in n3 from array elements, to match n1 */
2220  total = 0;
2221  for (i = 1; i < n; i++) {
2222  diff = array1[i - 1] - array3[i - 1];
2223  array3[i] -= diff;
2224  total += L_ABS(diff);
2225  }
2226  *pdist = total / sum1;
2227 
2228  numaDestroy(&na3);
2229  return 0;
2230 }
2231 
2232 
2278 l_ok
2280  l_int32 wc,
2281  NUMA **pnam,
2282  NUMA **pnams,
2283  NUMA **pnav,
2284  NUMA **pnarv)
2285 {
2286 l_int32 i, j, n, nn;
2287 l_float32 **arrays;
2288 l_float32 mean, var, rvar;
2289 NUMA *na1, *na2, *na3, *na4;
2290 
2291  if (pnam) *pnam = NULL;
2292  if (pnams) *pnams = NULL;
2293  if (pnav) *pnav = NULL;
2294  if (pnarv) *pnarv = NULL;
2295  if (!pnam && !pnams && !pnav && !pnarv)
2296  return ERROR_INT("nothing requested", __func__, 1);
2297  if (!naa)
2298  return ERROR_INT("naa not defined", __func__, 1);
2299  n = numaaGetCount(naa);
2300  for (i = 0; i < n; i++) {
2301  nn = numaaGetNumaCount(naa, i);
2302  if (nn != 256) {
2303  L_ERROR("%d numbers in numa[%d]\n", __func__, nn, i);
2304  return 1;
2305  }
2306  }
2307 
2308  if (pnam) *pnam = numaCreate(256);
2309  if (pnams) *pnams = numaCreate(256);
2310  if (pnav) *pnav = numaCreate(256);
2311  if (pnarv) *pnarv = numaCreate(256);
2312 
2313  /* First, use mean smoothing, normalize each histogram,
2314  * and save all results in a 2D matrix. */
2315  arrays = (l_float32 **)LEPT_CALLOC(n, sizeof(l_float32 *));
2316  for (i = 0; i < n; i++) {
2317  na1 = numaaGetNuma(naa, i, L_CLONE);
2318  na2 = numaWindowedMean(na1, wc);
2319  na3 = numaNormalizeHistogram(na2, 10000.);
2320  arrays[i] = numaGetFArray(na3, L_COPY);
2321  numaDestroy(&na1);
2322  numaDestroy(&na2);
2323  numaDestroy(&na3);
2324  }
2325 
2326  /* Get stats between histograms */
2327  for (j = 0; j < 256; j++) {
2328  na4 = numaCreate(n);
2329  for (i = 0; i < n; i++) {
2330  numaAddNumber(na4, arrays[i][j]);
2331  }
2332  numaSimpleStats(na4, 0, -1, &mean, &var, &rvar);
2333  if (pnam) numaAddNumber(*pnam, mean);
2334  if (pnams) numaAddNumber(*pnams, mean * mean);
2335  if (pnav) numaAddNumber(*pnav, var);
2336  if (pnarv) numaAddNumber(*pnarv, rvar);
2337  numaDestroy(&na4);
2338  }
2339 
2340  for (i = 0; i < n; i++)
2341  LEPT_FREE(arrays[i]);
2342  LEPT_FREE(arrays);
2343  return 0;
2344 }
2345 
2346 
2347 /*----------------------------------------------------------------------*
2348  * Extrema finding *
2349  *----------------------------------------------------------------------*/
2366 NUMA *
2368  l_int32 nmax,
2369  l_float32 fract1,
2370  l_float32 fract2)
2371 {
2372 l_int32 i, k, n, maxloc, lloc, rloc;
2373 l_float32 fmaxval, sum, total, newtotal, val, lastval;
2374 l_float32 peakfract;
2375 NUMA *na, *napeak;
2376 
2377  if (!nas)
2378  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2379  n = numaGetCount(nas);
2380  numaGetSum(nas, &total);
2381 
2382  /* We munge this copy */
2383  if ((na = numaCopy(nas)) == NULL)
2384  return (NUMA *)ERROR_PTR("na not made", __func__, NULL);
2385  if ((napeak = numaCreate(4 * nmax)) == NULL) {
2386  numaDestroy(&na);
2387  return (NUMA *)ERROR_PTR("napeak not made", __func__, NULL);
2388  }
2389 
2390  for (k = 0; k < nmax; k++) {
2391  numaGetSum(na, &newtotal);
2392  if (newtotal == 0.0) /* sanity check */
2393  break;
2394  numaGetMax(na, &fmaxval, &maxloc);
2395  sum = fmaxval;
2396  lastval = fmaxval;
2397  lloc = 0;
2398  for (i = maxloc - 1; i >= 0; --i) {
2399  numaGetFValue(na, i, &val);
2400  if (val == 0.0) {
2401  lloc = i + 1;
2402  break;
2403  }
2404  if (val > fract1 * fmaxval) {
2405  sum += val;
2406  lastval = val;
2407  continue;
2408  }
2409  if (lastval - val > fract2 * lastval) {
2410  sum += val;
2411  lastval = val;
2412  continue;
2413  }
2414  lloc = i;
2415  break;
2416  }
2417  lastval = fmaxval;
2418  rloc = n - 1;
2419  for (i = maxloc + 1; i < n; ++i) {
2420  numaGetFValue(na, i, &val);
2421  if (val == 0.0) {
2422  rloc = i - 1;
2423  break;
2424  }
2425  if (val > fract1 * fmaxval) {
2426  sum += val;
2427  lastval = val;
2428  continue;
2429  }
2430  if (lastval - val > fract2 * lastval) {
2431  sum += val;
2432  lastval = val;
2433  continue;
2434  }
2435  rloc = i;
2436  break;
2437  }
2438  peakfract = sum / total;
2439  numaAddNumber(napeak, lloc);
2440  numaAddNumber(napeak, maxloc);
2441  numaAddNumber(napeak, rloc);
2442  numaAddNumber(napeak, peakfract);
2443 
2444  for (i = lloc; i <= rloc; i++)
2445  numaSetValue(na, i, 0.0);
2446  }
2447 
2448  numaDestroy(&na);
2449  return napeak;
2450 }
2451 
2452 
2482 NUMA *
2484  l_float32 delta,
2485  NUMA **pnav)
2486 {
2487 l_int32 i, n, found, loc, direction;
2488 l_float32 startval, val, maxval, minval;
2489 NUMA *nav, *nad;
2490 
2491  if (pnav) *pnav = NULL;
2492  if (!nas)
2493  return (NUMA *)ERROR_PTR("nas not defined", __func__, NULL);
2494  if (delta < 0.0)
2495  return (NUMA *)ERROR_PTR("delta < 0", __func__, NULL);
2496 
2497  n = numaGetCount(nas);
2498  nad = numaCreate(0);
2499  nav = NULL;
2500  if (pnav) {
2501  nav = numaCreate(0);
2502  *pnav = nav;
2503  }
2504 
2505  /* We don't know if we'll find a peak or valley first,
2506  * but use the first element of nas as the reference point.
2507  * Break when we deviate by 'delta' from the first point. */
2508  numaGetFValue(nas, 0, &startval);
2509  found = FALSE;
2510  for (i = 1; i < n; i++) {
2511  numaGetFValue(nas, i, &val);
2512  if (L_ABS(val - startval) >= delta) {
2513  found = TRUE;
2514  break;
2515  }
2516  }
2517 
2518  if (!found)
2519  return nad; /* it's empty */
2520 
2521  /* Are we looking for a peak or a valley? */
2522  if (val > startval) { /* peak */
2523  direction = 1;
2524  maxval = val;
2525  } else {
2526  direction = -1;
2527  minval = val;
2528  }
2529  loc = i;
2530 
2531  /* Sweep through the rest of the array, recording alternating
2532  * peak/valley extrema. */
2533  for (i = i + 1; i < n; i++) {
2534  numaGetFValue(nas, i, &val);
2535  if (direction == 1 && val > maxval ) { /* new local max */
2536  maxval = val;
2537  loc = i;
2538  } else if (direction == -1 && val < minval ) { /* new local min */
2539  minval = val;
2540  loc = i;
2541  } else if (direction == 1 && (maxval - val >= delta)) {
2542  numaAddNumber(nad, loc); /* save the current max location */
2543  if (nav) numaAddNumber(nav, maxval);
2544  direction = -1; /* reverse: start looking for a min */
2545  minval = val;
2546  loc = i; /* current min location */
2547  } else if (direction == -1 && (val - minval >= delta)) {
2548  numaAddNumber(nad, loc); /* save the current min location */
2549  if (nav) numaAddNumber(nav, minval);
2550  direction = 1; /* reverse: start looking for a max */
2551  maxval = val;
2552  loc = i; /* current max location */
2553  }
2554  }
2555 
2556  /* Save the final extremum */
2557 /* numaAddNumber(nad, loc); */
2558  return nad;
2559 }
2560 
2561 
2588 l_ok
2590  l_int32 skip,
2591  l_int32 *pthresh,
2592  l_float32 *pfract)
2593 {
2594 l_int32 i, n, start, index, minloc, found;
2595 l_float32 val, pval, jval, minval, maxval, sum, partsum;
2596 l_float32 *fa;
2597 
2598  if (pfract) *pfract = 0.0;
2599  if (!pthresh)
2600  return ERROR_INT("&thresh not defined", __func__, 1);
2601  *pthresh = 0;
2602  if (!na)
2603  return ERROR_INT("na not defined", __func__, 1);
2604  if (skip <= 0) skip = 20;
2605 
2606  /* Test for constant value */
2607  numaGetMin(na, &minval, NULL);
2608  numaGetMax(na, &maxval, NULL);
2609  if (minval == maxval)
2610  return ERROR_INT("all array values are the same", __func__, 1);
2611 
2612  /* Look for the top of the first peak */
2613  n = numaGetCount(na);
2614  if (n < 256)
2615  L_WARNING("array size %d < 256\n", __func__, n);
2616  fa = numaGetFArray(na, L_NOCOPY);
2617  pval = fa[0];
2618  for (i = 1; i < n; i++) {
2619  val = fa[i];
2620  index = L_MIN(i + skip, n - 1);
2621  jval = fa[index];
2622  if (val < pval && jval < pval) /* near the top if not there */
2623  break;
2624  pval = val;
2625  }
2626 
2627  if (i > n - 5) /* just an increasing function */
2628  return ERROR_INT("top of first peak not found", __func__, 1);
2629 
2630  /* Look for the low point in the valley */
2631  found = FALSE;
2632  start = i;
2633  pval = fa[start];
2634  for (i = start + 1; i < n; i++) {
2635  val = fa[i];
2636  if (val <= pval) { /* appears to be going down */
2637  pval = val;
2638  } else { /* appears to be going up */
2639  index = L_MIN(i + skip, n - 1);
2640  jval = fa[index]; /* junp ahead by 'skip' */
2641  if (val > jval) { /* still going down; jump ahead */
2642  pval = jval;
2643  i = index;
2644  } else { /* really going up; passed the min */
2645  found = TRUE;
2646  break;
2647  }
2648  }
2649  }
2650  if (!found)
2651  return ERROR_INT("no minimum found", __func__, 1);
2652 
2653  /* Find the location of the minimum in the interval */
2654  minloc = index; /* likely passed the min; look backward */
2655  minval = fa[index];
2656  for (i = index - 1; i > index - skip; i--) {
2657  if (fa[i] < minval) {
2658  minval = fa[i];
2659  minloc = i;
2660  }
2661  }
2662 
2663  /* Is the minimum very near the end of the array? */
2664  if (minloc > n - 10)
2665  return ERROR_INT("minimum at end of array; invalid", __func__, 1);
2666  *pthresh = minloc;
2667 
2668  /* Find the fraction under the first peak */
2669  if (pfract) {
2670  numaGetSumOnInterval(na, 0, minloc, &partsum);
2671  numaGetSum(na, &sum);
2672  if (sum > 0.0)
2673  *pfract = partsum / sum;
2674  }
2675  return 0;
2676 }
2677 
2678 
2700 l_ok
2702  l_float32 minreversal,
2703  l_int32 *pnr,
2704  l_float32 *prd)
2705 {
2706 l_int32 i, n, nr, ival, binvals;
2707 l_int32 *ia;
2708 l_float32 fval, delx, len;
2709 NUMA *nat;
2710 
2711  if (pnr) *pnr = 0;
2712  if (prd) *prd = 0.0;
2713  if (!pnr && !prd)
2714  return ERROR_INT("neither &nr nor &rd are defined", __func__, 1);
2715  if (!nas)
2716  return ERROR_INT("nas not defined", __func__, 1);
2717  if ((n = numaGetCount(nas)) == 0) {
2718  L_INFO("nas is empty\n", __func__);
2719  return 0;
2720  }
2721  if (minreversal < 0.0)
2722  return ERROR_INT("minreversal < 0", __func__, 1);
2723 
2724  /* Decide if the only values are 0 and 1 */
2725  binvals = TRUE;
2726  for (i = 0; i < n; i++) {
2727  numaGetFValue(nas, i, &fval);
2728  if (fval != 0.0 && fval != 1.0) {
2729  binvals = FALSE;
2730  break;
2731  }
2732  }
2733 
2734  nr = 0;
2735  if (binvals) {
2736  if (minreversal > 1.0) {
2737  L_WARNING("binary values but minreversal > 1\n", __func__);
2738  } else {
2739  ia = numaGetIArray(nas);
2740  ival = ia[0];
2741  for (i = 1; i < n; i++) {
2742  if (ia[i] != ival) {
2743  nr++;
2744  ival = ia[i];
2745  }
2746  }
2747  LEPT_FREE(ia);
2748  }
2749  } else {
2750  nat = numaFindExtrema(nas, minreversal, NULL);
2751  nr = numaGetCount(nat);
2752  numaDestroy(&nat);
2753  }
2754  if (pnr) *pnr = nr;
2755  if (prd) {
2756  numaGetParameters(nas, NULL, &delx);
2757  len = delx * n;
2758  *prd = (l_float32)nr / len;
2759  }
2760 
2761  return 0;
2762 }
2763 
2764 
2765 /*----------------------------------------------------------------------*
2766  * Threshold crossings and frequency analysis *
2767  *----------------------------------------------------------------------*/
2794 l_ok
2796  NUMA *nay,
2797  l_float32 estthresh,
2798  l_float32 *pbestthresh)
2799 {
2800 l_int32 i, inrun, istart, iend, maxstart, maxend, runlen, maxrunlen;
2801 l_int32 val, maxval, nmax, count;
2802 l_float32 thresh, fmaxval, fmodeval;
2803 NUMA *nat, *nac;
2804 
2805  if (!pbestthresh)
2806  return ERROR_INT("&bestthresh not defined", __func__, 1);
2807  *pbestthresh = 0.0;
2808  if (!nay)
2809  return ERROR_INT("nay not defined", __func__, 1);
2810  if (numaGetCount(nay) < 2) {
2811  L_WARNING("nay count < 2; no threshold crossing\n", __func__);
2812  return 1;
2813  }
2814 
2815  /* Compute the number of crossings for different thresholds */
2816  nat = numaCreate(41);
2817  for (i = 0; i < 41; i++) {
2818  thresh = estthresh - 80.0 + 4.0 * i;
2819  nac = numaCrossingsByThreshold(nax, nay, thresh);
2820  numaAddNumber(nat, numaGetCount(nac));
2821  numaDestroy(&nac);
2822  }
2823 
2824  /* Find the center of the plateau of max crossings, which
2825  * extends from thresh[istart] to thresh[iend]. */
2826  numaGetMax(nat, &fmaxval, NULL);
2827  maxval = (l_int32)fmaxval;
2828  nmax = 0;
2829  for (i = 0; i < 41; i++) {
2830  numaGetIValue(nat, i, &val);
2831  if (val == maxval)
2832  nmax++;
2833  }
2834  if (nmax < 3) { /* likely accidental max; try the mode */
2835  numaGetMode(nat, &fmodeval, &count);
2836  if (count > nmax && fmodeval > 0.5 * fmaxval)
2837  maxval = (l_int32)fmodeval; /* use the mode */
2838  }
2839 
2840  inrun = FALSE;
2841  iend = 40;
2842  maxrunlen = 0, maxstart = 0, maxend = 0;
2843  for (i = 0; i < 41; i++) {
2844  numaGetIValue(nat, i, &val);
2845  if (val == maxval) {
2846  if (!inrun) {
2847  istart = i;
2848  inrun = TRUE;
2849  }
2850  continue;
2851  }
2852  if (inrun && (val != maxval)) {
2853  iend = i - 1;
2854  runlen = iend - istart + 1;
2855  inrun = FALSE;
2856  if (runlen > maxrunlen) {
2857  maxstart = istart;
2858  maxend = iend;
2859  maxrunlen = runlen;
2860  }
2861  }
2862  }
2863  if (inrun) {
2864  runlen = i - istart;
2865  if (runlen > maxrunlen) {
2866  maxstart = istart;
2867  maxend = i - 1;
2868  maxrunlen = runlen;
2869  }
2870  }
2871 
2872  *pbestthresh = estthresh - 80.0 + 2.0 * (l_float32)(maxstart + maxend);
2873 
2874 #if DEBUG_CROSSINGS
2875  lept_stderr("\nCrossings attain a maximum at %d thresholds, between:\n"
2876  " thresh[%d] = %5.1f and thresh[%d] = %5.1f\n",
2877  nmax, maxstart, estthresh - 80.0 + 4.0 * maxstart,
2878  maxend, estthresh - 80.0 + 4.0 * maxend);
2879  lept_stderr("The best choice: %5.1f\n", *pbestthresh);
2880  lept_stderr("Number of crossings at the 41 thresholds:");
2881  numaWriteStderr(nat);
2882 #endif /* DEBUG_CROSSINGS */
2883 
2884  numaDestroy(&nat);
2885  return 0;
2886 }
2887 
2888 
2903 NUMA *
2905  NUMA *nay,
2906  l_float32 thresh)
2907 {
2908 l_int32 i, n;
2909 l_float32 startx, delx;
2910 l_float32 xval1, xval2, yval1, yval2, delta1, delta2, crossval, fract;
2911 NUMA *nad;
2912 
2913  if (!nay)
2914  return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL);
2915  n = numaGetCount(nay);
2916 
2917  if (nax && (numaGetCount(nax) != n))
2918  return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL);
2919 
2920  nad = numaCreate(0);
2921  if (n < 2) return nad;
2922  numaGetFValue(nay, 0, &yval1);
2923  numaGetParameters(nay, &startx, &delx);
2924  if (nax)
2925  numaGetFValue(nax, 0, &xval1);
2926  else
2927  xval1 = startx;
2928  for (i = 1; i < n; i++) {
2929  numaGetFValue(nay, i, &yval2);
2930  if (nax)
2931  numaGetFValue(nax, i, &xval2);
2932  else
2933  xval2 = startx + i * delx;
2934  delta1 = yval1 - thresh;
2935  delta2 = yval2 - thresh;
2936  if (delta1 == 0.0) {
2937  numaAddNumber(nad, xval1);
2938  } else if (delta2 == 0.0) {
2939  numaAddNumber(nad, xval2);
2940  } else if (delta1 * delta2 < 0.0) { /* crossing */
2941  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
2942  crossval = xval1 + fract * (xval2 - xval1);
2943  numaAddNumber(nad, crossval);
2944  }
2945  xval1 = xval2;
2946  yval1 = yval2;
2947  }
2948 
2949  return nad;
2950 }
2951 
2952 
2967 NUMA *
2969  NUMA *nay,
2970  l_float32 delta)
2971 {
2972 l_int32 i, j, n, np, previndex, curindex;
2973 l_float32 startx, delx;
2974 l_float32 xval1, xval2, yval1, yval2, delta1, delta2;
2975 l_float32 prevval, curval, thresh, crossval, fract;
2976 NUMA *nap, *nad;
2977 
2978  if (!nay)
2979  return (NUMA *)ERROR_PTR("nay not defined", __func__, NULL);
2980 
2981  n = numaGetCount(nay);
2982  if (nax && (numaGetCount(nax) != n))
2983  return (NUMA *)ERROR_PTR("nax and nay sizes differ", __func__, NULL);
2984 
2985  /* Find the extrema. Also add last point in nay to get
2986  * the last transition (from the last peak to the end).
2987  * The number of crossings is 1 more than the number of extrema. */
2988  nap = numaFindExtrema(nay, delta, NULL);
2989  numaAddNumber(nap, n - 1);
2990  np = numaGetCount(nap);
2991  L_INFO("Number of crossings: %d\n", __func__, np);
2992 
2993  /* Do all computation in index units of nax or the delx of nay */
2994  nad = numaCreate(np); /* output crossing locations, in nax units */
2995  previndex = 0; /* prime the search with 1st point */
2996  numaGetFValue(nay, 0, &prevval); /* prime the search with 1st point */
2997  numaGetParameters(nay, &startx, &delx);
2998  for (i = 0; i < np; i++) {
2999  numaGetIValue(nap, i, &curindex);
3000  numaGetFValue(nay, curindex, &curval);
3001  thresh = (prevval + curval) / 2.0;
3002  if (nax)
3003  numaGetFValue(nax, previndex, &xval1);
3004  else
3005  xval1 = startx + previndex * delx;
3006  numaGetFValue(nay, previndex, &yval1);
3007  for (j = previndex + 1; j <= curindex; j++) {
3008  if (nax)
3009  numaGetFValue(nax, j, &xval2);
3010  else
3011  xval2 = startx + j * delx;
3012  numaGetFValue(nay, j, &yval2);
3013  delta1 = yval1 - thresh;
3014  delta2 = yval2 - thresh;
3015  if (delta1 == 0.0) {
3016  numaAddNumber(nad, xval1);
3017  break;
3018  } else if (delta2 == 0.0) {
3019  numaAddNumber(nad, xval2);
3020  break;
3021  } else if (delta1 * delta2 < 0.0) { /* crossing */
3022  fract = L_ABS(delta1) / L_ABS(yval1 - yval2);
3023  crossval = xval1 + fract * (xval2 - xval1);
3024  numaAddNumber(nad, crossval);
3025  break;
3026  }
3027  xval1 = xval2;
3028  yval1 = yval2;
3029  }
3030  previndex = curindex;
3031  prevval = curval;
3032  }
3033 
3034  numaDestroy(&nap);
3035  return nad;
3036 }
3037 
3038 
3076 l_ok
3078  l_float32 relweight,
3079  l_int32 nwidth,
3080  l_int32 nshift,
3081  l_float32 minwidth,
3082  l_float32 maxwidth,
3083  l_float32 *pbestwidth,
3084  l_float32 *pbestshift,
3085  l_float32 *pbestscore)
3086 {
3087 l_int32 i, j;
3088 l_float32 delwidth, delshift, width, shift, score;
3089 l_float32 bestwidth, bestshift, bestscore;
3090 
3091  if (pbestscore) *pbestscore = 0.0;
3092  if (pbestwidth) *pbestwidth = 0.0;
3093  if (pbestshift) *pbestshift = 0.0;
3094  if (!pbestwidth || !pbestshift)
3095  return ERROR_INT("&bestwidth and &bestshift not defined", __func__, 1);
3096  if (!nas)
3097  return ERROR_INT("nas not defined", __func__, 1);
3098 
3099  bestscore = bestwidth = bestshift = 0.0;
3100  delwidth = (maxwidth - minwidth) / (nwidth - 1.0);
3101  for (i = 0; i < nwidth; i++) {
3102  width = minwidth + delwidth * i;
3103  delshift = width / (l_float32)(nshift);
3104  for (j = 0; j < nshift; j++) {
3105  shift = j * delshift;
3106  numaEvalHaarSum(nas, width, shift, relweight, &score);
3107  if (score > bestscore) {
3108  bestscore = score;
3109  bestwidth = width;
3110  bestshift = shift;
3111 #if DEBUG_FREQUENCY
3112  lept_stderr("width = %7.3f, shift = %7.3f, score = %7.3f\n",
3113  width, shift, score);
3114 #endif /* DEBUG_FREQUENCY */
3115  }
3116  }
3117  }
3118 
3119  *pbestwidth = bestwidth;
3120  *pbestshift = bestshift;
3121  if (pbestscore)
3122  *pbestscore = bestscore;
3123  return 0;
3124 }
3125 
3126 
3159 l_ok
3161  l_float32 width,
3162  l_float32 shift,
3163  l_float32 relweight,
3164  l_float32 *pscore)
3165 {
3166 l_int32 i, n, nsamp, index;
3167 l_float32 score, weight, val;
3168 
3169  if (!pscore)
3170  return ERROR_INT("&score not defined", __func__, 1);
3171  *pscore = 0.0;
3172  if (!nas)
3173  return ERROR_INT("nas not defined", __func__, 1);
3174  if ((n = numaGetCount(nas)) < 2 * width)
3175  return ERROR_INT("nas size too small", __func__, 1);
3176 
3177  score = 0.0;
3178  nsamp = (l_int32)((n - shift) / width);
3179  for (i = 0; i < nsamp; i++) {
3180  index = (l_int32)(shift + i * width);
3181  weight = (i % 2) ? 1.0 : -1.0 * relweight;
3182  numaGetFValue(nas, index, &val);
3183  score += weight * val;
3184  }
3185 
3186  *pscore = 2.0 * width * score / (l_float32)n;
3187  return 0;
3188 }
3189 
3190 
3191 /*----------------------------------------------------------------------*
3192  * Generating numbers in a range under constraints *
3193  *----------------------------------------------------------------------*/
3214 NUMA *
3216  l_int32 last,
3217  l_int32 nmax,
3218  l_int32 use_pairs)
3219 {
3220 l_int32 i, nsets, val;
3221 l_float32 delta;
3222 NUMA *na;
3223 
3224  first = L_MAX(0, first);
3225  if (last < first)
3226  return (NUMA *)ERROR_PTR("last < first!", __func__, NULL);
3227  if (nmax < 1)
3228  return (NUMA *)ERROR_PTR("nmax < 1!", __func__, NULL);
3229 
3230  nsets = L_MIN(nmax, last - first + 1);
3231  if (use_pairs == 1)
3232  nsets = nsets / 2;
3233  if (nsets == 0)
3234  return (NUMA *)ERROR_PTR("nsets == 0", __func__, NULL);
3235 
3236  /* Select delta so that selection covers the full range if possible */
3237  if (nsets == 1) {
3238  delta = 0.0;
3239  } else {
3240  if (use_pairs == 0)
3241  delta = (l_float32)(last - first) / (nsets - 1);
3242  else
3243  delta = (l_float32)(last - first - 1) / (nsets - 1);
3244  }
3245 
3246  na = numaCreate(nsets);
3247  for (i = 0; i < nsets; i++) {
3248  val = (l_int32)(first + i * delta + 0.5);
3249  numaAddNumber(na, val);
3250  if (use_pairs == 1)
3251  numaAddNumber(na, val + 1);
3252  }
3253 
3254  return na;
3255 }
@ L_LINEAR_INTERP
Definition: array.h:92
@ L_MIRRORED_BORDER
Definition: array.h:100
l_ok gplotSimple1(NUMA *na, l_int32 outformat, const char *outroot, const char *title)
gplotSimple1()
Definition: gplot.c:649
l_ok numaAddNumber(NUMA *na, l_float32 val)
numaAddNumber()
Definition: numabasic.c:460
l_int32 numaaGetNumberCount(NUMAA *naa)
numaaGetNumberCount()
Definition: numabasic.c:1551
l_ok numaGetFValue(NUMA *na, l_int32 index, l_float32 *pval)
numaGetFValue()
Definition: numabasic.c:687
l_ok numaWriteStderr(NUMA *na)
numaWriteStderr()
Definition: numabasic.c:1212
NUMA * numaCreate(l_int32 n)
numaCreate()
Definition: numabasic.c:193
l_int32 numaaGetCount(NUMAA *naa)
numaaGetCount()
Definition: numabasic.c:1516
NUMA * numaaGetNuma(NUMAA *naa, l_int32 index, l_int32 accessflag)
numaaGetNuma()
Definition: numabasic.c:1617
l_int32 numaaGetNumaCount(NUMAA *naa, l_int32 index)
numaaGetNumaCount()
Definition: numabasic.c:1532
l_ok numaSetCount(NUMA *na, l_int32 newcount)
numaSetCount()
Definition: numabasic.c:655
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:357
l_ok numaSetValue(NUMA *na, l_int32 index, l_float32 val)
numaSetValue()
Definition: numabasic.c:750
l_int32 numaGetCount(NUMA *na)
numaGetCount()
Definition: numabasic.c:630
l_ok numaGetIValue(NUMA *na, l_int32 index, l_int32 *pival)
numaGetIValue()
Definition: numabasic.c:720
NUMA * numaCopy(NUMA *na)
numaCopy()
Definition: numabasic.c:387
l_int32 * numaGetIArray(NUMA *na)
numaGetIArray()
Definition: numabasic.c:807
l_ok numaGetParameters(NUMA *na, l_float32 *pstartx, l_float32 *pdelx)
numaGetParameters()
Definition: numabasic.c:882
l_ok numaCopyParameters(NUMA *nad, NUMA *nas)
numaCopyParameters()
Definition: numabasic.c:931
l_ok numaSetParameters(NUMA *na, l_float32 startx, l_float32 delx)
numaSetParameters()
Definition: numabasic.c:910
l_float32 * numaGetFArray(NUMA *na, l_int32 copyflag)
numaGetFArray()
Definition: numabasic.c:850
NUMA * numaSort(NUMA *naout, NUMA *nain, l_int32 sortorder)
numaSort()
Definition: numafunc1.c:2567
l_ok numaGetMode(NUMA *na, l_float32 *pval, l_int32 *pcount)
numaGetMode()
Definition: numafunc1.c:3443
NUMA * numaAddBorder(NUMA *nas, l_int32 left, l_int32 right, l_float32 val)
numaAddBorder()
Definition: numafunc1.c:869
l_ok numaHasOnlyIntegers(NUMA *na, l_int32 *pallints)
numaHasOnlyIntegers()
Definition: numafunc1.c:637
l_ok numaGetMedian(NUMA *na, l_float32 *pval)
numaGetMedian()
Definition: numafunc1.c:3296
l_ok numaGetSumOnInterval(NUMA *na, l_int32 first, l_int32 last, l_float32 *psum)
numaGetSumOnInterval()
Definition: numafunc1.c:596
NUMA * numaRemoveBorder(NUMA *nas, l_int32 left, l_int32 right)
numaRemoveBorder()
Definition: numafunc1.c:959
l_ok numaInterpolateEqxInterval(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 type, l_float32 x0, l_float32 x1, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaInterpolateEqxInterval()
Definition: numafunc1.c:1847
l_ok numaGetMin(NUMA *na, l_float32 *pminval, l_int32 *piminloc)
numaGetMin()
Definition: numafunc1.c:444
NUMA * numaMakeConstant(l_float32 val, l_int32 size)
numaMakeConstant()
Definition: numafunc1.c:820
NUMA * numaClipToInterval(NUMA *nas, l_int32 first, l_int32 last)
numaClipToInterval()
Definition: numafunc1.c:1137
l_ok numaGetMax(NUMA *na, l_float32 *pmaxval, l_int32 *pimaxloc)
numaGetMax()
Definition: numafunc1.c:485
NUMA * numaGetPartialSums(NUMA *na)
numaGetPartialSums()
Definition: numafunc1.c:564
l_int32 numaChooseSortType(NUMA *nas)
numaChooseSortType()
Definition: numafunc1.c:2521
NUMA * numaAddSpecifiedBorder(NUMA *nas, l_int32 left, l_int32 right, l_int32 type)
numaAddSpecifiedBorder()
Definition: numafunc1.c:910
l_ok numaGetSum(NUMA *na, l_float32 *psum)
numaGetSum()
Definition: numafunc1.c:525
NUMA * numaWindowedMeanSquare(NUMA *nas, l_int32 wc)
numaWindowedMeanSquare()
Definition: numafunc2.c:632
l_ok numaGetRankBinValues(NUMA *na, l_int32 nbins, NUMA **pnam)
numaGetRankBinValues()
Definition: numafunc2.c:1821
l_ok numaCountReversals(NUMA *nas, l_float32 minreversal, l_int32 *pnr, l_float32 *prd)
numaCountReversals()
Definition: numafunc2.c:2701
NUMA * numaMakeHistogram(NUMA *na, l_int32 maxbins, l_int32 *pbinsize, l_int32 *pbinstart)
numaMakeHistogram()
Definition: numafunc2.c:861
NUMA * numaFindExtrema(NUMA *nas, l_float32 delta, NUMA **pnav)
numaFindExtrema()
Definition: numafunc2.c:2483
NUMA * numaDilate(NUMA *nas, l_int32 size)
numaDilate()
Definition: numafunc2.c:250
NUMA * genConstrainedNumaInRange(l_int32 first, l_int32 last, l_int32 nmax, l_int32 use_pairs)
genConstrainedNumaInRange()
Definition: numafunc2.c:3215
l_ok numaEvalHaarSum(NUMA *nas, l_float32 width, l_float32 shift, l_float32 relweight, l_float32 *pscore)
numaEvalHaarSum()
Definition: numafunc2.c:3160
NUMA * numaConvertToInt(NUMA *nas)
numaConvertToInt()
Definition: numafunc2.c:811
l_ok numaEarthMoverDistance(NUMA *na1, NUMA *na2, l_float32 *pdist)
numaEarthMoverDistance()
Definition: numafunc2.c:2190
l_ok numaSelectCrossingThreshold(NUMA *nax, NUMA *nay, l_float32 estthresh, l_float32 *pbestthresh)
numaSelectCrossingThreshold()
Definition: numafunc2.c:2795
l_ok numaWindowedStats(NUMA *nas, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
numaWindowedStats()
Definition: numafunc2.c:525
NUMA * numaFindPeaks(NUMA *nas, l_int32 nmax, l_float32 fract1, l_float32 fract2)
numaFindPeaks()
Definition: numafunc2.c:2367
NUMA * numaWindowedMean(NUMA *nas, l_int32 wc)
numaWindowedMean()
Definition: numafunc2.c:574
l_ok numaWindowedVariance(NUMA *nam, NUMA *nams, NUMA **pnav, NUMA **pnarv)
numaWindowedVariance()
Definition: numafunc2.c:698
l_ok numaEvalBestHaarParameters(NUMA *nas, l_float32 relweight, l_int32 nwidth, l_int32 nshift, l_float32 minwidth, l_float32 maxwidth, l_float32 *pbestwidth, l_float32 *pbestshift, l_float32 *pbestscore)
numaEvalBestHaarParameters()
Definition: numafunc2.c:3077
l_ok numaDiscretizeHistoInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval, NUMA **pnarank)
numaDiscretizeHistoInBins()
Definition: numafunc2.c:1735
l_ok numaGetHistogramStatsOnInterval(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_int32 ifirst, l_int32 ilast, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStatsOnInterval()
Definition: numafunc2.c:1363
NUMA * numaMakeHistogramClipped(NUMA *na, l_float32 binsize, l_float32 maxsize)
numaMakeHistogramClipped()
Definition: numafunc2.c:1054
NUMA * numaNormalizeHistogram(NUMA *nas, l_float32 tsum)
numaNormalizeHistogram()
Definition: numafunc2.c:1147
NUMA * numaErode(NUMA *nas, l_int32 size)
numaErode()
Definition: numafunc2.c:183
l_ok numaGetHistogramStats(NUMA *nahisto, l_float32 startx, l_float32 deltax, l_float32 *pxmean, l_float32 *pxmedian, l_float32 *pxmode, l_float32 *pxvariance)
numaGetHistogramStats()
Definition: numafunc2.c:1316
l_ok grayHistogramsToEMD(NUMAA *naa1, NUMAA *naa2, NUMA **pnad)
grayHistogramsToEMD()
Definition: numafunc2.c:2126
NUMA * numaTransform(NUMA *nas, l_float32 shift, l_float32 scale)
numaTransform()
Definition: numafunc2.c:407
NUMA * numaWindowedMedian(NUMA *nas, l_int32 halfwin)
numaWindowedMedian()
Definition: numafunc2.c:764
l_ok numaMakeRankFromHistogram(l_float32 startx, l_float32 deltax, NUMA *nasy, l_int32 npts, NUMA **pnax, NUMA **pnay)
numaMakeRankFromHistogram()
Definition: numafunc2.c:1455
l_ok numaSplitDistribution(NUMA *na, l_float32 scorefract, l_int32 *psplitindex, l_float32 *pave1, l_float32 *pave2, l_float32 *pnum1, l_float32 *pnum2, NUMA **pnascore)
numaSplitDistribution()
Definition: numafunc2.c:1966
l_ok numaHistogramGetRankFromVal(NUMA *na, l_float32 rval, l_float32 *prank)
numaHistogramGetRankFromVal()
Definition: numafunc2.c:1521
l_ok numaHistogramGetValFromRank(NUMA *na, l_float32 rank, l_float32 *prval)
numaHistogramGetValFromRank()
Definition: numafunc2.c:1590
l_ok numaFindLocForThreshold(NUMA *na, l_int32 skip, l_int32 *pthresh, l_float32 *pfract)
numaFindLocForThreshold()
Definition: numafunc2.c:2589
l_ok grayInterHistogramStats(NUMAA *naa, l_int32 wc, NUMA **pnam, NUMA **pnams, NUMA **pnav, NUMA **pnarv)
grayInterHistogramStats()
Definition: numafunc2.c:2279
l_ok numaSimpleStats(NUMA *na, l_int32 first, l_int32 last, l_float32 *pmean, l_float32 *pvar, l_float32 *prvar)
numaSimpleStats()
Definition: numafunc2.c:442
NUMA * numaGetUniformBinSizes(l_int32 ntotal, l_int32 nbins)
numaGetUniformBinSizes()
Definition: numafunc2.c:1890
NUMA * numaCrossingsByThreshold(NUMA *nax, NUMA *nay, l_float32 thresh)
numaCrossingsByThreshold()
Definition: numafunc2.c:2904
NUMA * numaRebinHistogram(NUMA *nas, l_int32 newsize)
numaRebinHistogram()
Definition: numafunc2.c:1101
NUMA * numaMakeHistogramAuto(NUMA *na, l_int32 maxbins)
numaMakeHistogramAuto()
Definition: numafunc2.c:972
NUMA * numaClose(NUMA *nas, l_int32 size)
numaClose()
Definition: numafunc2.c:362
l_ok numaDiscretizeSortedInBins(NUMA *na, l_int32 nbins, NUMA **pnabinval)
numaDiscretizeSortedInBins()
Definition: numafunc2.c:1660
NUMA * numaOpen(NUMA *nas, l_int32 size)
numaOpen()
Definition: numafunc2.c:317
l_ok numaGetStatsUsingHistogram(NUMA *na, l_int32 maxbins, l_float32 *pmin, l_float32 *pmax, l_float32 *pmean, l_float32 *pvariance, l_float32 *pmedian, l_float32 rank, l_float32 *prval, NUMA **phisto)
numaGetStatsUsingHistogram()
Definition: numafunc2.c:1227
NUMA * numaCrossingsByPeaks(NUMA *nax, NUMA *nay, l_float32 delta)
numaCrossingsByPeaks()
Definition: numafunc2.c:2968
@ L_COPY
Definition: pix.h:505
@ L_CLONE
Definition: pix.h:506
@ L_NOCOPY
Definition: pix.h:503
@ L_SHELL_SORT
Definition: pix.h:516
@ L_SORT_INCREASING
Definition: pix.h:522
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306