Leptonica  1.83.1
Image processing and image analysis suite
psio1.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
110 #ifdef HAVE_CONFIG_H
111 #include <config_auto.h>
112 #endif /* HAVE_CONFIG_H */
113 
114 #include <string.h>
115 #include "allheaders.h"
116 
117 /* --------------------------------------------*/
118 #if USE_PSIO /* defined in environ.h */
119  /* --------------------------------------------*/
120 
121 /*-------------------------------------------------------------*
122  * Convert files in a directory to PS *
123  *-------------------------------------------------------------*/
124 /*
125  * \brief convertFilesToPS()
126  *
127  * \param[in] dirin input directory
128  * \param[in] substr [optional] substring filter on filenames; can be NULL
129  * \param[in] res typ. 300 or 600 ppi
130  * \param[in] fileout output ps file
131  * \return 0 if OK, 1 on error
132  *
133  * <pre>
134  * Notes:
135  * (1) This generates a PS file for all image files in a specified
136  * directory that contain the substr pattern to be matched.
137  * (2) Each image is written to a separate page in the output PS file.
138  * (3) All images are written compressed:
139  * * if tiffg4 --> use ccittg4
140  * * if jpeg --> use dct
141  * * all others --> use flate
142  * If the image is jpeg or tiffg4, we use the existing compressed
143  * strings for the encoding; otherwise, we read the image into
144  * a pix and flate-encode the pieces.
145  * (4) The resolution is often confusing. It is interpreted
146  * as the resolution of the output display device: "If the
147  * input image were digitized at 300 ppi, what would it
148  * look like when displayed at res ppi." So, for example,
149  * if res = 100 ppi, then the display pixels are 3x larger
150  * than the 300 ppi pixels, and the image will be rendered
151  * 3x larger.
152  * (5) The size of the PostScript file is independent of the resolution,
153  * because the entire file is encoded. The res parameter just
154  * tells the PS decomposer how to render the page. Therefore,
155  * for minimum file size without loss of visual information,
156  * if the output res is less than 300, you should downscale
157  * the image to the output resolution before wrapping in PS.
158  * (6) The "canvas" on which the image is rendered, at the given
159  * output resolution, is a standard page size (8.5 x 11 in).
160  * </pre>
161  */
162 l_ok
163 convertFilesToPS(const char *dirin,
164  const char *substr,
165  l_int32 res,
166  const char *fileout)
167 {
168 SARRAY *sa;
169 
170  if (!dirin)
171  return ERROR_INT("dirin not defined", __func__, 1);
172  if (!fileout)
173  return ERROR_INT("fileout not defined", __func__, 1);
174  if (res <= 0) {
175  L_INFO("setting res to 300 ppi\n", __func__);
176  res = 300;
177  }
178  if (res < 10 || res > 4000)
179  L_WARNING("res is typically in the range 300-600 ppi\n", __func__);
180 
181  /* Get all filtered and sorted full pathnames. */
182  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
183 
184  /* Generate the PS file. Don't use bounding boxes. */
185  l_psWriteBoundingBox(FALSE);
186  sarrayConvertFilesToPS(sa, res, fileout);
187  l_psWriteBoundingBox(TRUE);
188  sarrayDestroy(&sa);
189  return 0;
190 }
191 
192 
193 /*
194 
195  * \brief sarrayConvertFilesToPS()
196  *
197  * \param[in] sarray of full path names
198  * \param[in] res typ. 300 or 600 ppi
199  * \param[in] fileout output ps file
200  * \return 0 if OK, 1 on error
201  *
202  * <pre>
203  * Notes:
204  * (1) See convertFilesToPS()
205  * </pre>
206  */
207 l_ok
208 sarrayConvertFilesToPS(SARRAY *sa,
209  l_int32 res,
210  const char *fileout)
211 {
212 char *fname;
213 l_int32 i, nfiles, index, ret, format;
214 
215  if (!sa)
216  return ERROR_INT("sa not defined", __func__, 1);
217  if (!fileout)
218  return ERROR_INT("fileout not defined", __func__, 1);
219  if (res <= 0) {
220  L_INFO("setting res to 300 ppi\n", __func__);
221  res = 300;
222  }
223  if (res < 10 || res > 4000)
224  L_WARNING("res is typically in the range 300-600 ppi\n", __func__);
225 
226  nfiles = sarrayGetCount(sa);
227  for (i = 0, index = 0; i < nfiles; i++) {
228  fname = sarrayGetString(sa, i, L_NOCOPY);
229  ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
230  if (ret) continue;
231  if (format == IFF_UNKNOWN)
232  continue;
233 
234  writeImageCompressedToPSFile(fname, fileout, res, &index);
235  }
236 
237  return 0;
238 }
239 
240 
241 /*
242  * \brief convertFilesFittedToPS()
243  *
244  * \param[in] dirin input directory
245  * \param[in] substr [optional] substring filter on filenames; can be NULL)
246  * \param[in] xpts desired size in printer points; use 0 for default
247  * \param[in] ypts desired size in printer points; use 0 for default
248  * \param[in] fileout output ps file
249  * \return 0 if OK, 1 on error
250  *
251  * <pre>
252  * Notes:
253  * (1) This generates a PS file for all files in a specified directory
254  * that contain the substr pattern to be matched.
255  * (2) Each image is written to a separate page in the output PS file.
256  * (3) All images are written compressed:
257  * * if tiffg4 --> use ccittg4
258  * * if jpeg --> use dct
259  * * all others --> use flate
260  * If the image is jpeg or tiffg4, we use the existing compressed
261  * strings for the encoding; otherwise, we read the image into
262  * a pix and flate-encode the pieces.
263  * (4) The resolution is internally determined such that the images
264  * are rendered, in at least one direction, at 100% of the given
265  * size in printer points. Use 0.0 for xpts or ypts to get
266  * the default value, which is 612.0 or 792.0, rsp.
267  * (5) The size of the PostScript file is independent of the resolution,
268  * because the entire file is encoded. The %xpts and %ypts
269  * parameter tells the PS decomposer how to render the page.
270  * </pre>
271  */
272 l_ok
273 convertFilesFittedToPS(const char *dirin,
274  const char *substr,
275  l_float32 xpts,
276  l_float32 ypts,
277  const char *fileout)
278 {
279 SARRAY *sa;
280 
281  if (!dirin)
282  return ERROR_INT("dirin not defined", __func__, 1);
283  if (!fileout)
284  return ERROR_INT("fileout not defined", __func__, 1);
285  if (xpts <= 0.0) {
286  L_INFO("setting xpts to 612.0 ppi\n", __func__);
287  xpts = 612.0;
288  }
289  if (ypts <= 0.0) {
290  L_INFO("setting ypts to 792.0 ppi\n", __func__);
291  ypts = 792.0;
292  }
293  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
294  L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__);
295 
296  /* Get all filtered and sorted full pathnames. */
297  sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
298 
299  /* Generate the PS file. Don't use bounding boxes. */
300  l_psWriteBoundingBox(FALSE);
301  sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
302  l_psWriteBoundingBox(TRUE);
303  sarrayDestroy(&sa);
304  return 0;
305 }
306 
307 
308 /*
309  * \brief sarrayConvertFilesFittedToPS()
310  *
311  * \param[in] sarray of full path names
312  * \param[in] xpts desired size in printer points; use 0 for default
313  * \param[in] ypts desired size in printer points; use 0 for default
314  * \param[in] fileout output ps file
315  * \return 0 if OK, 1 on error
316  *
317  * <pre>
318  * Notes:
319  * (1) See convertFilesFittedToPS()
320  * </pre>
321  */
322 l_ok
323 sarrayConvertFilesFittedToPS(SARRAY *sa,
324  l_float32 xpts,
325  l_float32 ypts,
326  const char *fileout)
327 {
328 char *fname;
329 l_int32 ret, i, w, h, nfiles, index, format, res;
330 
331  if (!sa)
332  return ERROR_INT("sa not defined", __func__, 1);
333  if (!fileout)
334  return ERROR_INT("fileout not defined", __func__, 1);
335  if (xpts <= 0.0) {
336  L_INFO("setting xpts to 612.0\n", __func__);
337  xpts = 612.0;
338  }
339  if (ypts <= 0.0) {
340  L_INFO("setting ypts to 792.0\n", __func__);
341  ypts = 792.0;
342  }
343  if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
344  L_WARNING("xpts,ypts are typically in the range 500-800\n", __func__);
345 
346  nfiles = sarrayGetCount(sa);
347  for (i = 0, index = 0; i < nfiles; i++) {
348  fname = sarrayGetString(sa, i, L_NOCOPY);
349  ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
350  if (ret) continue;
351  if (format == IFF_UNKNOWN)
352  continue;
353 
354  /* Be sure the entire image is wrapped */
355  if (xpts * h < ypts * w)
356  res = (l_int32)((l_float32)w * 72.0 / xpts);
357  else
358  res = (l_int32)((l_float32)h * 72.0 / ypts);
359 
360  writeImageCompressedToPSFile(fname, fileout, res, &index);
361  }
362 
363  return 0;
364 }
365 
366 
367 /*
368  * \brief writeImageCompressedToPSFile()
369  *
370  * \param[in] filein input image file
371  * \param[in] fileout output ps file
372  * \param[in] res output printer resolution
373  * \param[in,out] pindex index of image in output ps file
374  * \return 0 if OK, 1 on error
375  *
376  * <pre>
377  * Notes:
378  * (1) This wraps a single page image in PS.
379  * (2) The input file can be in any format. It is compressed as follows:
380  * * if in tiffg4 --> use ccittg4
381  * * if in jpeg --> use dct
382  * * all others --> use flate
383  * (3) Before the first call, set %index = 0. %index is incremented
384  * if the page is successfully written. It is used to decide
385  * whether to write (index == 0) or append (index > 0) to the file.
386  * </pre>
387  */
388 l_ok
389 writeImageCompressedToPSFile(const char *filein,
390  const char *fileout,
391  l_int32 res,
392  l_int32 *pindex)
393 {
394 const char *op;
395 l_int32 format, retval;
396 
397  if (!pindex)
398  return ERROR_INT("&index not defined", __func__, 1);
399 
400  findFileFormat(filein, &format);
401  if (format == IFF_UNKNOWN) {
402  L_ERROR("format of %s not known\n", __func__, filein);
403  return 1;
404  }
405 
406  op = (*pindex == 0) ? "w" : "a";
407  if (format == IFF_JFIF_JPEG) {
408  retval = convertJpegToPS(filein, fileout, op, 0, 0,
409  res, 1.0, *pindex + 1, TRUE);
410  } else if (format == IFF_TIFF_G4) {
411  retval = convertG4ToPS(filein, fileout, op, 0, 0,
412  res, 1.0, *pindex + 1, FALSE, TRUE);
413  } else { /* all other image formats */
414  retval = convertFlateToPS(filein, fileout, op, 0, 0,
415  res, 1.0, *pindex + 1, TRUE);
416  }
417  if (retval == 0) (*pindex)++;
418 
419  return retval;
420 }
421 
422 
423 /*-------------------------------------------------------------*
424  * Convert mixed text/image files to PS *
425  *-------------------------------------------------------------*/
426 /*
427  * \brief convertSegmentedPagesToPS()
428  *
429  * \param[in] pagedir input page image directory
430  * \param[in] pagestr [optional] substring filter on page filenames;
431  * can be NULL
432  * \param[in] page_numpre number of characters in page name before number
433  * \param[in] maskdir input mask image directory
434  * \param[in] maskstr [optional] substring filter on mask filenames;
435  * can be NULL
436  * \param[in] mask_numpre number of characters in mask name before number
437  * \param[in] numpost number of characters in names after number
438  * \param[in] maxnum only consider page numbers up to this value
439  * \param[in] textscale scale of text output relative to pixs
440  * \param[in] imagescale scale of image output relative to pixs
441  * \param[in] threshold for binarization; typ. about 190; 0 for default
442  * \param[in] fileout output ps file
443  * \return 0 if OK, 1 on error
444  *
445  * <pre>
446  * Notes:
447  * (1) This generates a PS file for all page image and mask files in two
448  * specified directories and that contain the page numbers as
449  * specified below. The two directories can be the same, in which
450  * case the page and mask files are differentiated by the two
451  * substrings for string matches.
452  * (2) The page images are taken in lexicographic order.
453  * Mask images whose numbers match the page images are used to
454  * segment the page images. Page images without a matching
455  * mask image are scaled, thresholded and rendered entirely as text.
456  * (3) Each PS page is generated as a compressed representation of
457  * the page image, where the part of the image under the mask
458  * is suitably scaled and compressed as DCT (i.e., jpeg), and
459  * the remaining part of the page is suitably scaled, thresholded,
460  * compressed as G4 (i.e., tiff g4), and rendered by painting
461  * black through the resulting text mask.
462  * (4) The scaling is typically 2x down for the DCT component
463  * (%imagescale = 0.5) and 2x up for the G4 component
464  * (%textscale = 2.0).
465  * (5) The resolution is automatically set to fit to a
466  * letter-size (8.5 x 11 inch) page.
467  * (6) Both the DCT and the G4 encoding are PostScript level 2.
468  * (7) It is assumed that the page number is contained within
469  * the basename (the filename without directory or extension).
470  * %page_numpre is the number of characters in the page basename
471  * preceding the actual page number; %mask_numpre is likewise for
472  * the mask basename; %numpost is the number of characters
473  * following the page number. For example, for mask name
474  * mask_006.tif, mask_numpre = 5 ("mask_).
475  * (8) To render a page as is -- that is, with no thresholding
476  * of any pixels -- use a mask in the mask directory that is
477  * full size with all pixels set to 1. If the page is 1 bpp,
478  * it is not necessary to have a mask.
479  * </pre>
480  */
481 l_ok
482 convertSegmentedPagesToPS(const char *pagedir,
483  const char *pagestr,
484  l_int32 page_numpre,
485  const char *maskdir,
486  const char *maskstr,
487  l_int32 mask_numpre,
488  l_int32 numpost,
489  l_int32 maxnum,
490  l_float32 textscale,
491  l_float32 imagescale,
492  l_int32 threshold,
493  const char *fileout)
494 {
495 l_int32 pageno, i, npages;
496 PIX *pixs, *pixm;
497 SARRAY *sapage, *samask;
498 
499  if (!pagedir)
500  return ERROR_INT("pagedir not defined", __func__, 1);
501  if (!maskdir)
502  return ERROR_INT("maskdir not defined", __func__, 1);
503  if (!fileout)
504  return ERROR_INT("fileout not defined", __func__, 1);
505  if (threshold <= 0) {
506  L_INFO("setting threshold to 190\n", __func__);
507  threshold = 190;
508  }
509 
510  /* Get numbered full pathnames; max size of sarray is maxnum */
511  sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
512  page_numpre, numpost, maxnum);
513  samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
514  mask_numpre, numpost, maxnum);
515  sarrayPadToSameSize(sapage, samask, "");
516  if ((npages = sarrayGetCount(sapage)) == 0) {
517  sarrayDestroy(&sapage);
518  sarrayDestroy(&samask);
519  return ERROR_INT("no matching pages found", __func__, 1);
520  }
521 
522  /* Generate the PS file */
523  pageno = 1;
524  for (i = 0; i < npages; i++) {
525  if ((pixs = pixReadIndexed(sapage, i)) == NULL)
526  continue;
527  pixm = pixReadIndexed(samask, i);
528  pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
529  threshold, pageno, fileout);
530  pixDestroy(&pixs);
531  pixDestroy(&pixm);
532  pageno++;
533  }
534 
535  sarrayDestroy(&sapage);
536  sarrayDestroy(&samask);
537  return 0;
538 }
539 
540 
541 /*
542  * \brief pixWriteSegmentedPageToPS()
543  *
544  * \param[in] pixs all depths; colormap ok
545  * \param[in] pixm [optional] 1 bpp segmentation mask over image region
546  * \param[in] textscale scale of text output relative to pixs
547  * \param[in] imagescale scale of image output relative to pixs
548  * \param[in] threshold for binarization; typ. about 190; 0 for default
549  * \param[in] pageno page number in set; use 1 for new output file
550  * \param[in] fileout output ps file
551  * \return 0 if OK, 1 on error
552  *
553  * <pre>
554  * Notes:
555  * (1) This generates the PS string for a mixed text/image page,
556  * and adds it to an existing file if %pageno > 1.
557  * The PS output is determined by fitting the result to
558  * a letter-size (8.5 x 11 inch) page.
559  * (2) The two images (pixs and pixm) are at the same resolution
560  * (typically 300 ppi). They are used to generate two compressed
561  * images, pixb and pixc, that are put directly into the output
562  * PS file.
563  * (3) pixb is the text component. In the PostScript world, we think of
564  * it as a mask through which we paint black. It is produced by
565  * scaling pixs by %textscale, and thresholding to 1 bpp.
566  * (4) pixc is the image component, which is that part of pixs under
567  * the mask pixm. It is scaled from pixs by %imagescale.
568  * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
569  * (6) If pixm == NULL, the page has only text. If it is all black,
570  * the page is all image and has no text.
571  * (7) This can be used to write a multi-page PS file, by using
572  * sequential page numbers with the same output file. It can
573  * also be used to write separate PS files for each page,
574  * by using different output files with %pageno = 0 or 1.
575  * </pre>
576  */
577 l_ok
578 pixWriteSegmentedPageToPS(PIX *pixs,
579  PIX *pixm,
580  l_float32 textscale,
581  l_float32 imagescale,
582  l_int32 threshold,
583  l_int32 pageno,
584  const char *fileout)
585 {
586 l_int32 alltext, notext, d, ret;
587 l_uint32 val;
588 l_float32 scaleratio;
589 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
590 
591  if (!pixs)
592  return ERROR_INT("pixs not defined", __func__, 1);
593  if (!fileout)
594  return ERROR_INT("fileout not defined", __func__, 1);
595  if (imagescale <= 0.0 || textscale <= 0.0)
596  return ERROR_INT("relative scales must be > 0.0", __func__, 1);
597 
598  /* Analyze the page. Determine the ratio by which the
599  * binary text mask is scaled relative to the image part.
600  * If there is no image region (alltext == TRUE), the
601  * text mask will be rendered directly to fit the page,
602  * and scaleratio = 1.0. */
603  alltext = TRUE;
604  notext = FALSE;
605  scaleratio = 1.0;
606  if (pixm) {
607  pixZero(pixm, &alltext); /* pixm empty: all text */
608  if (alltext) {
609  pixm = NULL; /* treat it as not existing here */
610  } else {
611  pixmi = pixInvert(NULL, pixm);
612  pixZero(pixmi, &notext); /* pixm full; no text */
613  pixDestroy(&pixmi);
614  scaleratio = textscale / imagescale;
615  }
616  }
617 
618  if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
619  pixb = pixClone(pixs);
620  pixc = NULL;
621  } else {
622  pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
623 
624  /* Get the binary text mask. Note that pixg cannot be a
625  * clone of pixs, because it may be altered by pixSetMasked(). */
626  pixb = NULL;
627  if (notext == FALSE) {
628  d = pixGetDepth(pixt);
629  if (d == 8)
630  pixg = pixCopy(NULL, pixt);
631  else /* d == 32 */
632  pixg = pixConvertRGBToLuminance(pixt);
633  if (pixm) /* clear out the image parts */
634  pixSetMasked(pixg, pixm, 255);
635  if (textscale == 1.0)
636  pixsc = pixClone(pixg);
637  else if (textscale >= 0.7)
638  pixsc = pixScaleGrayLI(pixg, textscale, textscale);
639  else
640  pixsc = pixScaleAreaMap(pixg, textscale, textscale);
641  pixb = pixThresholdToBinary(pixsc, threshold);
642  pixDestroy(&pixg);
643  pixDestroy(&pixsc);
644  }
645 
646  /* Get the scaled image region */
647  pixc = NULL;
648  if (pixm) {
649  if (imagescale == 1.0)
650  pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
651  else
652  pixsc = pixScale(pixt, imagescale, imagescale);
653 
654  /* If pixm is not full, clear the pixels in pixsc
655  * corresponding to bg in pixm, where there can be text
656  * that is written through the mask pixb. Note that
657  * we could skip this and use pixsc directly in
658  * pixWriteMixedToPS(); however, clearing these
659  * non-image regions to a white background will reduce
660  * the size of pixc (relative to pixsc), and hence
661  * reduce the size of the PS file that is generated.
662  * Use a copy so that we don't accidentally alter pixs. */
663  if (notext == FALSE) {
664  pixmis = pixScale(pixm, imagescale, imagescale);
665  pixmi = pixInvert(NULL, pixmis);
666  val = (d == 8) ? 0xff : 0xffffff00;
667  pixc = pixCopy(NULL, pixsc);
668  pixSetMasked(pixc, pixmi, val); /* clear non-image part */
669  pixDestroy(&pixmis);
670  pixDestroy(&pixmi);
671  } else {
672  pixc = pixClone(pixsc);
673  }
674  pixDestroy(&pixsc);
675  }
676  pixDestroy(&pixt);
677  }
678 
679  /* Generate the PS file. Don't use bounding boxes. */
680  l_psWriteBoundingBox(FALSE);
681  ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
682  l_psWriteBoundingBox(TRUE);
683  pixDestroy(&pixb);
684  pixDestroy(&pixc);
685  return ret;
686 }
687 
688 
689 /*
690  * \brief pixWriteMixedToPS()
691  *
692  * \param[in] pixb [optional] 1 bpp mask; typically for text
693  * \param[in] pixc [optional] 8 or 32 bpp image regions
694  * \param[in] scale scale factor for rendering pixb, relative to pixc;
695  * typ. 4.0
696  * \param[in] pageno page number in set; use 1 for new output file
697  * \param[in] fileout output ps file
698  * \return 0 if OK, 1 on error
699  *
700  * <pre>
701  * Notes:
702  * (1) This low level function generates the PS string for a mixed
703  * text/image page, and adds it to an existing file if
704  * %pageno > 1.
705  * (2) The two images (pixb and pixc) are typically generated at the
706  * resolution that they will be rendered in the PS file.
707  * (3) pixb is the text component. In the PostScript world, we think of
708  * it as a mask through which we paint black.
709  * (4) pixc is the (typically halftone) image component. It is
710  * white in the rest of the page. To minimize the size of the
711  * PS file, it should be rendered at a resolution that is at
712  * least equal to its actual resolution.
713  * (5) %scale gives the ratio of resolution of pixb to pixc.
714  * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
715  * so %scale = 4.0. If one of the images is not defined,
716  * the value of %scale is ignored.
717  * (6) We write pixc with DCT compression (jpeg). This is followed
718  * by painting the text as black through the mask pixb. If
719  * pixc doesn't exist (alltext), we write the text with the
720  * PS "image" operator instead of the "imagemask" operator,
721  * because ghostscript's ps2pdf is flaky when the latter is used.
722  * (7) The actual output resolution is determined by fitting the
723  * result to a letter-size (8.5 x 11 inch) page.
724  * <pre>
725  */
726 l_ok
727 pixWriteMixedToPS(PIX *pixb,
728  PIX *pixc,
729  l_float32 scale,
730  l_int32 pageno,
731  const char *fileout)
732 {
733 char *tname;
734 const char *op;
735 l_int32 resb, resc, endpage, maskop, ret;
736 
737  if (!pixb && !pixc)
738  return ERROR_INT("pixb and pixc both undefined", __func__, 1);
739  if (!fileout)
740  return ERROR_INT("fileout not defined", __func__, 1);
741 
742  /* Compute the resolution that fills a letter-size page. */
743  if (!pixc) {
744  resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
745  } else {
746  resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
747  if (pixb)
748  resb = (l_int32)(scale * resc);
749  }
750 
751  /* Write the jpeg image first */
752  if (pixc) {
753  tname = l_makeTempFilename();
754  pixWrite(tname, pixc, IFF_JFIF_JPEG);
755  endpage = (pixb) ? FALSE : TRUE;
756  op = (pageno <= 1) ? "w" : "a";
757  ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
758  pageno, endpage);
759  lept_rmfile(tname);
760  LEPT_FREE(tname);
761  if (ret)
762  return ERROR_INT("jpeg data not written", __func__, 1);
763  }
764 
765  /* Write the binary data, either directly or, if there is
766  * a jpeg image on the page, through the mask. */
767  if (pixb) {
768  tname = l_makeTempFilename();
769  pixWrite(tname, pixb, IFF_TIFF_G4);
770  op = (pageno <= 1 && !pixc) ? "w" : "a";
771  maskop = (pixc) ? 1 : 0;
772  ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
773  pageno, maskop, 1);
774  lept_rmfile(tname);
775  LEPT_FREE(tname);
776  if (ret)
777  return ERROR_INT("tiff data not written", __func__, 1);
778  }
779 
780  return 0;
781 }
782 
783 
784 /*-------------------------------------------------------------*
785  * Convert any image file to PS for embedding *
786  *-------------------------------------------------------------*/
787 /*
788  * \brief convertToPSEmbed()
789  *
790  * \param[in] filein input image file, any format
791  * \param[in] fileout output ps file
792  * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
793  * \return 0 if OK, 1 on error
794  *
795  * <pre>
796  * Notes:
797  * (1) This is a wrapper function that generates a PS file with
798  * a bounding box, from any input image file.
799  * (2) Do the best job of compression given the specified level.
800  * %level=3 does flate compression on anything that is not
801  * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
802  * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
803  * first be written to file as jpeg with quality = 75.
804  * This will remove the colormap and cause some degradation
805  * in the image.
806  * (4) The bounding box is required when a program such as TeX
807  * (through epsf) places and rescales the image. It is
808  * sized for fitting the image to an 8.5 x 11.0 inch page.
809  * </pre>
810  */
811 l_ok
812 convertToPSEmbed(const char *filein,
813  const char *fileout,
814  l_int32 level)
815 {
816 char *tname;
817 l_int32 d, format;
818 PIX *pix, *pixs;
819 
820  if (!filein)
821  return ERROR_INT("filein not defined", __func__, 1);
822  if (!fileout)
823  return ERROR_INT("fileout not defined", __func__, 1);
824  if (level != 1 && level != 2 && level != 3) {
825  L_ERROR("invalid level specified; using level 2\n", __func__);
826  level = 2;
827  }
828 
829  if (level == 1) { /* no compression */
830  pixWritePSEmbed(filein, fileout);
831  return 0;
832  }
833 
834  /* Find the format and write out directly if in jpeg or tiff g4 */
835  findFileFormat(filein, &format);
836  if (format == IFF_JFIF_JPEG) {
837  convertJpegToPSEmbed(filein, fileout);
838  return 0;
839  } else if (format == IFF_TIFF_G4) {
840  convertG4ToPSEmbed(filein, fileout);
841  return 0;
842  } else if (format == IFF_UNKNOWN) {
843  L_ERROR("format of %s not known\n", __func__, filein);
844  return 1;
845  }
846 
847  /* If level 3, flate encode. */
848  if (level == 3) {
849  convertFlateToPSEmbed(filein, fileout);
850  return 0;
851  }
852 
853  /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
854  if ((pixs = pixRead(filein)) == NULL)
855  return ERROR_INT("image not read from file", __func__, 1);
856  d = pixGetDepth(pixs);
857  if ((d == 2 || d == 4) && !pixGetColormap(pixs))
858  pix = pixConvertTo8(pixs, 0);
859  else if (d == 16)
860  pix = pixConvert16To8(pixs, L_MS_BYTE);
861  else
863  pixDestroy(&pixs);
864  if (!pix)
865  return ERROR_INT("converted pix not made", __func__, 1);
866 
867  d = pixGetDepth(pix);
868  tname = l_makeTempFilename();
869  if (d == 1) {
870  if (pixWrite(tname, pix, IFF_TIFF_G4)) {
871  LEPT_FREE(tname);
872  pixDestroy(&pix);
873  return ERROR_INT("g4 tiff not written", __func__, 1);
874  }
875  convertG4ToPSEmbed(tname, fileout);
876  } else {
877  if (pixWrite(tname, pix, IFF_JFIF_JPEG)) {
878  LEPT_FREE(tname);
879  pixDestroy(&pix);
880  return ERROR_INT("jpeg not written", __func__, 1);
881  }
882  convertJpegToPSEmbed(tname, fileout);
883  }
884 
885  lept_rmfile(tname);
886  LEPT_FREE(tname);
887  pixDestroy(&pix);
888  return 0;
889 }
890 
891 
892 /*-------------------------------------------------------------*
893  * Write all images in a pixa out to PS *
894  *-------------------------------------------------------------*/
895 /*
896  * \brief pixaWriteCompressedToPS()
897  *
898  * \param[in] pixa any set of images
899  * \param[in] fileout output ps file
900  * \param[in] res resolution for the set of input images
901  * \param[in] level PostScript compression capability: 2 or 3
902  * \return 0 if OK, 1 on error
903  *
904  * <pre>
905  * Notes:
906  * (1) This generates a PostScript file of multiple page images,
907  * all with bounding boxes.
908  * (2) See pixWriteCompressedToPS() for details.
909  * (3) To generate a pdf from %fileout, use:
910  * ps2pdf <infile.ps> <outfile.pdf>
911  * </pre>
912  */
913 l_ok
914 pixaWriteCompressedToPS(PIXA *pixa,
915  const char *fileout,
916  l_int32 res,
917  l_int32 level)
918 {
919 l_int32 i, n, index, ret;
920 PIX *pix;
921 
922  if (!pixa)
923  return ERROR_INT("pixa not defined", __func__, 1);
924  if (!fileout)
925  return ERROR_INT("fileout not defined", __func__, 1);
926  if (level != 2 && level != 3) {
927  L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__);
928  level = 2;
929  }
930 
931  index = 0;
932  n = pixaGetCount(pixa);
933  for (i = 0; i < n; i++) {
934  pix = pixaGetPix(pixa, i, L_CLONE);
935  ret = pixWriteCompressedToPS(pix, fileout, res, level, &index);
936  if (ret) L_ERROR("PS string not written for image %d\n", __func__, i);
937  pixDestroy(&pix);
938  }
939  return 0;
940 }
941 
942 
943 /*
944  * \brief pixWriteCompressedToPS()
945  *
946  * \param[in] pix any depth; colormap OK
947  * \param[in] fileout output ps file
948  * \param[in] res of input image
949  * \param[in] level PostScript compression capability: 2 or 3
950  * \param[in,out] pindex index of image in output ps file
951  * \return 0 if OK, 1 on error
952  *
953  * <pre>
954  * Notes:
955  * (1) This generates a PostScript string for %pix, and writes it
956  * to a file, with a bounding box.
957  * (2) *pindex keeps track of the number of images that have been
958  * written to %fileout. If this is the first image to be
959  * converted, set *pindex == 0 before passing it in. If the
960  * PostScript string is successfully generated, this will increment
961  * *pindex. If *pindex > 0, the PostScript string will be
962  * appended to %fileout.
963  * (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg
964  * compression. Level 3 adds lossless flate (essentially gzip)
965  * compression.
966  * * For images with a colormap, lossless flate is often better in
967  * both quality and size than jpeg.
968  * * The decision for images without a colormap affects compression
969  * efficiency: %level2 (jpeg) is usually better than %level3 (flate)
970  * * Because jpeg does not handle 16 bpp, if %level == 2, the image
971  * is converted to 8 bpp (using MSB) and compressed with jpeg,
972  * cmap + level2: jpeg
973  * cmap + level3: flate
974  * 1 bpp: tiffg4
975  * 2 or 4 bpp + level2: jpeg
976  * 2 or 4 bpp + level3: flate
977  * 8 bpp + level2: jpeg
978  * 8 bpp + level3: flate
979  * 16 bpp + level2: jpeg [converted to 8 bpp, with warning]
980  * 16 bpp + level3: flate
981  * 32 bpp + level2: jpeg
982  * 32 bpp + level3: flate
983  * </pre>
984  */
985 l_ok
986 pixWriteCompressedToPS(PIX *pix,
987  const char *fileout,
988  l_int32 res,
989  l_int32 level,
990  l_int32 *pindex)
991 {
992 char *tname;
993 l_int32 writeout, d;
994 PIX *pixt;
995 PIXCMAP *cmap;
996 
997  if (!pix)
998  return ERROR_INT("pix not defined", __func__, 1);
999  if (!fileout)
1000  return ERROR_INT("fileout not defined", __func__, 1);
1001  if (level != 2 && level != 3) {
1002  L_ERROR("only levels 2 and 3 permitted; using level 2\n", __func__);
1003  level = 2;
1004  }
1005  if (!pindex)
1006  return ERROR_INT("&index not defined", __func__, 1);
1007 
1008  tname = l_makeTempFilename();
1009  writeout = TRUE;
1010  d = pixGetDepth(pix);
1011  cmap = pixGetColormap(pix);
1012  if (d == 1) {
1013  if (pixWrite(tname, pix, IFF_TIFF_G4))
1014  writeout = FALSE;
1015  } else if (level == 3) {
1016  if (pixWrite(tname, pix, IFF_PNG))
1017  writeout = FALSE;
1018  } else { /* level == 2 */
1019  if (cmap) {
1020  pixt = pixConvertForPSWrap(pix);
1021  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1022  writeout = FALSE;
1023  pixDestroy(&pixt);
1024  } else if (d == 16) {
1025  L_WARNING("d = 16; converting to 8 bpp for jpeg\n", __func__);
1026  pixt = pixConvert16To8(pix, L_MS_BYTE);
1027  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1028  writeout = FALSE;
1029  pixDestroy(&pixt);
1030  } else if (d == 2 || d == 4) {
1031  pixt = pixConvertTo8(pix, 0);
1032  if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1033  writeout = FALSE;
1034  pixDestroy(&pixt);
1035  } else if (d == 8 || d == 32) {
1036  if (pixWrite(tname, pix, IFF_JFIF_JPEG))
1037  writeout = FALSE;
1038  } else { /* shouldn't happen */
1039  L_ERROR("invalid depth with level 2: %d\n", __func__, d);
1040  writeout = FALSE;
1041  }
1042  }
1043 
1044  if (writeout)
1045  writeImageCompressedToPSFile(tname, fileout, res, pindex);
1046 
1047  if (lept_rmfile(tname) != 0)
1048  L_ERROR("temp file %s was not deleted\n", __func__, tname);
1049  LEPT_FREE(tname);
1050  return (writeout) ? 0 : 1;
1051 }
1052 
1053 /* --------------------------------------------*/
1054 #endif /* USE_PSIO */
1055 /* --------------------------------------------*/
PIX * pixThresholdToBinary(PIX *pixs, l_int32 thresh)
pixThresholdToBinary()
Definition: grayquant.c:443
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:608
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:689
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:582
l_ok pixZero(PIX *pix, l_int32 *pempty)
pixZero()
Definition: pix3.c:1777
PIX * pixInvert(PIX *pixd, PIX *pixs)
pixInvert()
Definition: pix3.c:1481
l_ok pixSetMasked(PIX *pixd, PIX *pixm, l_uint32 val)
pixSetMasked()
Definition: pix3.c:163
@ REMOVE_CMAP_BASED_ON_SRC
Definition: pix.h:384
@ L_CLONE
Definition: pix.h:506
@ L_NOCOPY
Definition: pix.h:503
@ L_MS_BYTE
Definition: pix.h:642
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:629
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:647
PIX * pixConvertTo8Or32(PIX *pixs, l_int32 copyflag, l_int32 warnflag)
pixConvertTo8Or32()
Definition: pixconv.c:3400
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:324
PIX * pixConvertForPSWrap(PIX *pixs)
pixConvertForPSWrap()
Definition: pixconv.c:3823
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3055
PIX * pixConvertRGBToLuminance(PIX *pixs)
pixConvertRGBToLuminance()
Definition: pixconv.c:732
PIX * pixConvert16To8(PIX *pixs, l_int32 type)
pixConvert16To8()
Definition: pixconv.c:1726
l_ok convertFlateToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertFlateToPS()
Definition: psio2.c:1637
l_ok convertJpegToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertJpegToPS()
Definition: psio2.c:782
l_ok pixWritePSEmbed(const char *filein, const char *fileout)
pixWritePSEmbed()
Definition: psio2.c:188
l_ok convertFlateToPSEmbed(const char *filein, const char *fileout)
convertFlateToPSEmbed()
Definition: psio2.c:1525
l_ok convertG4ToPSEmbed(const char *filein, const char *fileout)
convertG4ToPSEmbed()
Definition: psio2.c:1058
l_ok convertJpegToPSEmbed(const char *filein, const char *fileout)
convertJpegToPSEmbed()
Definition: psio2.c:668
l_ok convertG4ToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 maskflag, l_int32 endpage)
convertG4ToPS()
Definition: psio2.c:1162
l_int32 getResLetterPage(l_int32 w, l_int32 h, l_float32 fillfract)
getResLetterPage()
Definition: psio2.c:1954
l_ok pixReadHeader(const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
pixReadHeader()
Definition: readfile.c:434
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:570
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:189
PIX * pixReadIndexed(SARRAY *sa, l_int32 index)
pixReadIndexed()
Definition: readfile.c:273
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:673
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
Definition: sarray1.c:985
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:617
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:353
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1739
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1693
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:250
PIX * pixScaleGrayLI(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScaleGrayLI()
Definition: scale1.c:762
PIX * pixScaleAreaMap(PIX *pix, l_float32 scalex, l_float32 scaley)
pixScaleAreaMap()
Definition: scale1.c:1864
l_int32 lept_rmfile(const char *filepath)
lept_rmfile()
Definition: utils2.c:2429
char * l_makeTempFilename(void)
l_makeTempFilename()
Definition: utils2.c:3286