Leptonica  1.83.1
Image processing and image analysis suite
parseprotos.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * \file parseprotos.c
29  * <pre>
30  *
31  * char *parseForProtos()
32  *
33  * Static helpers
34  * static l_int32 getNextNonCommentLine()
35  * static l_int32 getNextNonBlankLine()
36  * static l_int32 getNextNonDoubleSlashLine()
37  * static l_int32 searchForProtoSignature()
38  * static char *captureProtoSignature()
39  * static char *cleanProtoSignature()
40  * static l_int32 skipToEndOfFunction()
41  * static l_int32 skipToMatchingBrace()
42  * static l_int32 skipToSemicolon()
43  * static l_int32 getOffsetForCharacter()
44  * static l_int32 getOffsetForMatchingRP()
45  * </pre>
46  */
47 
48 #ifdef HAVE_CONFIG_H
49 #include <config_auto.h>
50 #endif /* HAVE_CONFIG_H */
51 
52 #include <string.h>
53 #include "allheaders.h"
54 
55 #define L_BUF_SIZE 2048 /* max token size */
56 
57 static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
58 static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
59 static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
60  l_int32 *pnext);
61 static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
62  l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
63  l_int32 *pfound);
64 static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
65  l_int32 charindex);
66 static char * cleanProtoSignature(char *str);
67 static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
68  l_int32 charindex, l_int32 *pnext);
69 static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
70  l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
71 static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
72  l_int32 charindex, l_int32 *pnext);
73 static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
74  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
75 static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
76  l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
77  l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
78 
79 
80 /*
81  * \brief parseForProtos()
82  *
83  * \param[in] filein output of cpp
84  * \param[in] prestring [optional] string that prefaces each decl;
85  * use NULL to omit
86  * \return parsestr string of function prototypes, or NULL on error
87  *
88  * <pre>
89  * Notes:
90  * (1) We parse the output of cpp:
91  * cpp -ansi <filein>
92  * Three plans were attempted, with success on the third.
93  * (2) Plan 1. A cursory examination of the cpp output indicated that
94  * every function was preceded by a cpp comment statement.
95  * So we just need to look at statements beginning after comments.
96  * Unfortunately, this is NOT the case. Some functions start
97  * without cpp comment lines, typically when there are no
98  * comments in the source that immediately precede the function.
99  * (3) Plan 2. Consider the keywords in the language that start
100  * parts of the cpp file. Some, like 'enum', 'union' and
101  * 'struct', are followed after a while by '{', and eventually
102  * end with '}, plus an optional token and a final ';'.
103  * Others, like 'extern', 'static' and 'typedef', are never
104  * the beginnings of global function definitions. Function
105  * prototypes have one or more sets of '(' followed eventually
106  * by a ')', and end with ';'. But function definitions have
107  * tokens, followed by '(', more tokens, ')' and then
108  * immediately a '{'. We would generate a prototype from this
109  * by adding a ';' to all tokens up to the ')'. So we use
110  * these special tokens to decide what we are parsing. And
111  * whenever a function definition is found and the prototype
112  * extracted, we skip through the rest of the function
113  * past the corresponding '}'. This token ends a line, and
114  * is often on a line of its own. But as it turns out,
115  * the only keyword we need to consider is 'static'.
116  * (4) Plan 3. Consider the parentheses and braces for various
117  * declarations. A struct, enum, or union has a pair of
118  * braces followed by a semicolon. With the exception of an
119  * __attribute__ declaration for a struct, they cannot have parentheses
120  * before the left brace, but a struct can have lots of parentheses
121  * within the brace set. A function prototype has no braces.
122  * A function declaration can have sets of left and right
123  * parentheses, but these are followed by a left brace.
124  * So plan 3 looks at the way parentheses and braces are
125  * organized. Once the beginning of a function definition
126  * is found, the prototype is extracted and we search for
127  * the ending right brace.
128  * (5) To find the ending right brace, it is necessary to do some
129  * careful parsing. For example, in this file, we have
130  * left and right braces as characters, and these must not
131  * be counted. Somewhat more tricky, the file fhmtauto.c
132  * generates code, and includes a right brace in a string.
133  * So we must not include braces that are in strings. But how
134  * do we know if something is inside a string? Keep state,
135  * starting with not-inside, and every time you hit a double quote
136  * that is not escaped, toggle the condition. Any brace
137  * found in the state of being within a string is ignored.
138  * (6) When a prototype is extracted, it is put in a canonical
139  * form (i.e., cleaned up). Finally, we check that it is
140  * not static and save it. (If static, it is ignored).
141  * (7) The %prestring for unix is NULL; it is included here so that
142  * you can use Microsoft's declaration for importing or
143  * exporting to a dll. See environ.h for examples of use.
144  * Here, we set: %prestring = "LEPT_DLL ". Note in particular
145  * the space character that will separate 'LEPT_DLL' from
146  * the standard unix prototype that follows.
147  * </pre>
148  */
149 char *
150 parseForProtos(const char *filein,
151  const char *prestring)
152 {
153 char *strdata, *str, *newstr, *parsestr, *secondword;
154 l_int32 start, next, stop, charindex, found;
155 size_t nbytes;
156 SARRAY *sa, *saout, *satest;
157 
158  if (!filein)
159  return (char *)ERROR_PTR("filein not defined", __func__, NULL);
160 
161  /* Read in the cpp output into memory, one string for each
162  * line in the file, omitting blank lines. */
163  strdata = (char *)l_binaryRead(filein, &nbytes);
164  sa = sarrayCreateLinesFromString(strdata, 0);
165 
166  saout = sarrayCreate(0);
167  next = 0;
168  while (1) { /* repeat after each non-static prototype is extracted */
169  searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
170  if (!found)
171  break;
172 /* lept_stderr(" start = %d, stop = %d, charindex = %d\n",
173  start, stop, charindex); */
174  str = captureProtoSignature(sa, start, stop, charindex);
175 
176  /* Make sure that the signature found by cpp does not begin with
177  * static, extern or typedef. We get 'extern' declarations
178  * from header files, and with some versions of cpp running on
179  * #include <sys/stat.h> we get something of the form:
180  * extern ... (( ... )) ... ( ... ) { ...
181  * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
182  * and there is a lot of garbage between the rp and the lp.
183  * It is easiest to simply reject any signature that starts
184  * with 'extern'. Note also that an 'extern' token has been
185  * prepended to each prototype, so the 'static' or
186  * 'extern' keywords we are looking for, if they exist,
187  * would be the second word. We also have a typedef in
188  * bmpio.c that has the form:
189  * typedef struct __attribute__((....)) { ...} ... ;
190  * This is avoided by blacklisting 'typedef' along with 'extern'
191  * and 'static'. */
192  satest = sarrayCreateWordsFromString(str);
193  secondword = sarrayGetString(satest, 1, L_NOCOPY);
194  if (strcmp(secondword, "static") && /* not static */
195  strcmp(secondword, "extern") && /* not extern */
196  strcmp(secondword, "typedef")) { /* not typedef */
197  if (prestring) { /* prepend it to the prototype */
198  newstr = stringJoin(prestring, str);
199  sarrayAddString(saout, newstr, L_INSERT);
200  LEPT_FREE(str);
201  } else {
202  sarrayAddString(saout, str, L_INSERT);
203  }
204  } else {
205  LEPT_FREE(str);
206  }
207  sarrayDestroy(&satest);
208 
209  skipToEndOfFunction(sa, stop, charindex, &next);
210  if (next == -1) break;
211  }
212 
213  /* Flatten into a string with newlines between prototypes */
214  parsestr = sarrayToString(saout, 1);
215  LEPT_FREE(strdata);
216  sarrayDestroy(&sa);
217  sarrayDestroy(&saout);
218 
219  return parsestr;
220 }
221 
222 
223 /*
224  * \brief getNextNonCommentLine()
225  *
226  * \param[in] sa output from cpp, by line)
227  * \param[in] start starting index to search)
228  * \param[out] pnext index of first uncommented line after the start line
229  * \return 0 if OK, o on error
230  *
231  * <pre>
232  * Notes:
233  * (1) Skips over all consecutive comment lines, beginning at 'start'
234  * (2) If all lines to the end are '#' comments, return next = -1
235  * </pre>
236  */
237 static l_int32
238 getNextNonCommentLine(SARRAY *sa,
239  l_int32 start,
240  l_int32 *pnext)
241 {
242 char *str;
243 l_int32 i, n;
244 
245  if (!sa)
246  return ERROR_INT("sa not defined", __func__, 1);
247  if (!pnext)
248  return ERROR_INT("&pnext not defined", __func__, 1);
249 
250  /* Init for situation where this line and all following are comments */
251  *pnext = -1;
252 
253  n = sarrayGetCount(sa);
254  for (i = start; i < n; i++) {
255  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
256  return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
257  if (str[0] != '#') {
258  *pnext = i;
259  return 0;
260  }
261  }
262 
263  return 0;
264 }
265 
266 
267 /*
268  * \brief getNextNonBlankLine()
269  *
270  * \param[in] sa output from cpp, by line
271  * \param[in] start starting index to search
272  * \param[out] pnext index of first nonblank line after the start line
273  * \return 0 if OK, 1 on error
274  *
275  * <pre>
276  * Notes:
277  * (1) Skips over all consecutive blank lines, beginning at 'start'
278  * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
279  * (3) If all lines to the end are blank, return next = -1
280  * </pre>
281  */
282 static l_int32
283 getNextNonBlankLine(SARRAY *sa,
284  l_int32 start,
285  l_int32 *pnext)
286 {
287 char *str;
288 l_int32 i, j, n, len;
289 
290  if (!sa)
291  return ERROR_INT("sa not defined", __func__, 1);
292  if (!pnext)
293  return ERROR_INT("&pnext not defined", __func__, 1);
294 
295  /* Init for situation where this line and all following are blank */
296  *pnext = -1;
297 
298  n = sarrayGetCount(sa);
299  for (i = start; i < n; i++) {
300  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
301  return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
302  len = strlen(str);
303  for (j = 0; j < len; j++) {
304  if (str[j] != ' ' && str[j] != '\t'
305  && str[j] != '\n' && str[j] != '\r') { /* non-blank */
306  *pnext = i;
307  return 0;
308  }
309  }
310  }
311 
312  return 0;
313 }
314 
315 
316 /*
317  * \brief getNextNonDoubleSlashLine()
318  *
319  * \param[in] sa output from cpp, by line
320  * \param[in] start starting index to search
321  * \param[out] pnext index of first uncommented line after the start line
322  * \return 0 if OK, 1 on error
323  *
324  * <pre>
325  * Notes:
326  * (1) Skips over all consecutive '//' lines, beginning at 'start'
327  * (2) If all lines to the end start with '//', return next = -1
328  * </pre>
329  */
330 static l_int32
331 getNextNonDoubleSlashLine(SARRAY *sa,
332  l_int32 start,
333  l_int32 *pnext)
334 {
335 char *str;
336 l_int32 i, n, len;
337 
338  if (!sa)
339  return ERROR_INT("sa not defined", __func__, 1);
340  if (!pnext)
341  return ERROR_INT("&pnext not defined", __func__, 1);
342 
343  /* Init for situation where this line and all following
344  * start with '//' */
345  *pnext = -1;
346 
347  n = sarrayGetCount(sa);
348  for (i = start; i < n; i++) {
349  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
350  return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
351  len = strlen(str);
352  if (len < 2 || str[0] != '/' || str[1] != '/') {
353  *pnext = i;
354  return 0;
355  }
356  }
357 
358  return 0;
359 }
360 
361 
362 /*
363  * \brief searchForProtoSignature()
364  *
365  * \param[in] sa output from cpp, by line
366  * \param[in] begin beginning index to search
367  * \param[out] pstart starting index for function definition
368  * \param[out] pstop index of line on which proto is completed
369  * \param[out] pcharindex char index of completing ')' character
370  * \param[out] pfound 1 if valid signature is found; 0 otherwise
371  * \return 0 if OK, 1 on error
372  *
373  * <pre>
374  * Notes:
375  * (1) If this returns found == 0, it means that there are no
376  * more function definitions in the file. Caller must check
377  * this value and exit the loop over the entire cpp file.
378  * (2) This follows plan 3 (see above). We skip comment and blank
379  * lines at the beginning. Then we don't check for keywords.
380  * Instead, find the relative locations of the first occurrences
381  * of these four tokens: left parenthesis (lp), right
382  * parenthesis (rp), left brace (lb) and semicolon (sc).
383  * (3) The signature of a function definition looks like this:
384  * .... '(' .... ')' '{'
385  * where the lp and rp must both precede the lb, with only
386  * whitespace between the rp and the lb. The '....'
387  * are sets of tokens that have no braces.
388  * (4) If a function definition is found, this returns found = 1,
389  * with 'start' being the first line of the definition and
390  * 'charindex' being the position of the ')' in line 'stop'
391  * at the end of the arg list.
392  * </pre>
393  */
394 static l_int32
395 searchForProtoSignature(SARRAY *sa,
396  l_int32 begin,
397  l_int32 *pstart,
398  l_int32 *pstop,
399  l_int32 *pcharindex,
400  l_int32 *pfound)
401 {
402 l_int32 next, rbline, rbindex, scline;
403 l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc;
404 l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc;
405 l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc;
406 
407  if (!sa)
408  return ERROR_INT("sa not defined", __func__, 1);
409  if (!pstart)
410  return ERROR_INT("&start not defined", __func__, 1);
411  if (!pstop)
412  return ERROR_INT("&stop not defined", __func__, 1);
413  if (!pcharindex)
414  return ERROR_INT("&charindex not defined", __func__, 1);
415  if (!pfound)
416  return ERROR_INT("&found not defined", __func__, 1);
417 
418  *pfound = FALSE;
419 
420  while (1) {
421 
422  /* Skip over sequential '#' comment lines */
423  getNextNonCommentLine(sa, begin, &next);
424  if (next == -1) return 0;
425  if (next != begin) {
426  begin = next;
427  continue;
428  }
429 
430  /* Skip over sequential blank lines */
431  getNextNonBlankLine(sa, begin, &next);
432  if (next == -1) return 0;
433  if (next != begin) {
434  begin = next;
435  continue;
436  }
437 
438  /* Skip over sequential lines starting with '//' */
439  getNextNonDoubleSlashLine(sa, begin, &next);
440  if (next == -1) return 0;
441  if (next != begin) {
442  begin = next;
443  continue;
444  }
445 
446  /* Search for specific character sequence patterns; namely
447  * a lp, a matching rp, a lb and a semicolon.
448  * Abort the search if no lp is found. */
449  getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
450  &toffsetlp);
451  if (soffsetlp == -1)
452  break;
453  getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
454  &soffsetrp, &boffsetrp, &toffsetrp);
455  getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
456  &toffsetlb);
457  getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
458  &toffsetsc);
459 
460  /* We've found a lp. Now weed out the case where a matching
461  * rp and a lb are not both found. */
462  if (soffsetrp == -1 || soffsetlb == -1)
463  break;
464 
465  /* Check if a left brace occurs before a left parenthesis;
466  * if so, skip it */
467  if (toffsetlb < toffsetlp) {
468  skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
469  &rbline, &rbindex);
470  skipToSemicolon(sa, rbline, rbindex, &scline);
471  begin = scline + 1;
472  continue;
473  }
474 
475  /* Check if a semicolon occurs before a left brace or
476  * a left parenthesis; if so, skip it */
477  if ((soffsetsc != -1) &&
478  (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
479  skipToSemicolon(sa, next, 0, &scline);
480  begin = scline + 1;
481  continue;
482  }
483 
484  /* OK, it should be a function definition. We haven't
485  * checked that there is only white space between the
486  * rp and lb, but we've only seen problems with two
487  * extern inlines in sys/stat.h, and this is handled
488  * later by eliminating any prototype beginning with 'extern'. */
489  *pstart = next;
490  *pstop = next + soffsetrp;
491  *pcharindex = boffsetrp;
492  *pfound = TRUE;
493  break;
494  }
495 
496  return 0;
497 }
498 
499 
500 /*
501  * \brief captureProtoSignature()
502  *
503  * \param[in] sa output from cpp, by line
504  * \param[in] start starting index to search; never a comment line
505  * \param[in] stop index of line on which pattern is completed
506  * \param[in] charindex char index of completing ')' character
507  * \return cleanstr prototype string, or NULL on error
508  *
509  * <pre>
510  * Notes:
511  * (1) Return all characters, ending with a ';' after the ')'
512  * </pre>
513  */
514 static char *
515 captureProtoSignature(SARRAY *sa,
516  l_int32 start,
517  l_int32 stop,
518  l_int32 charindex)
519 {
520 char *str, *newstr, *protostr, *cleanstr;
521 SARRAY *sap;
522 l_int32 i;
523 
524  if (!sa)
525  return (char *)ERROR_PTR("sa not defined", __func__, NULL);
526 
527  sap = sarrayCreate(0);
528  for (i = start; i < stop; i++) {
529  str = sarrayGetString(sa, i, L_COPY);
530  sarrayAddString(sap, str, L_INSERT);
531  }
532  str = sarrayGetString(sa, stop, L_COPY);
533  str[charindex + 1] = '\0';
534  newstr = stringJoin(str, ";");
535  sarrayAddString(sap, newstr, L_INSERT);
536  LEPT_FREE(str);
537  protostr = sarrayToString(sap, 2);
538  sarrayDestroy(&sap);
539  cleanstr = cleanProtoSignature(protostr);
540  LEPT_FREE(protostr);
541 
542  return cleanstr;
543 }
544 
545 
546 /*
547  * \brief cleanProtoSignature()
548  *
549  * \param[in] instr input prototype string
550  * \return cleanstr clean prototype string, or NULL on error
551  *
552  * <pre>
553  * Notes:
554  * (1) Adds 'extern' at beginning and regularizes spaces
555  * between tokens.
556  * </pre>
557  */
558 static char *
559 cleanProtoSignature(char *instr)
560 {
561 char *str, *cleanstr;
562 char buf[L_BUF_SIZE];
563 char externstring[] = "extern";
564 l_int32 i, j, nwords, nchars, index, len;
565 SARRAY *sa, *saout;
566 
567  if (!instr)
568  return (char *)ERROR_PTR("instr not defined", __func__, NULL);
569 
570  sa = sarrayCreateWordsFromString(instr);
571  nwords = sarrayGetCount(sa);
572  saout = sarrayCreate(0);
573  sarrayAddString(saout, externstring, L_COPY);
574  for (i = 0; i < nwords; i++) {
575  str = sarrayGetString(sa, i, L_NOCOPY);
576  nchars = strlen(str);
577  index = 0;
578  for (j = 0; j < nchars; j++) {
579  if (index > L_BUF_SIZE - 6) {
580  sarrayDestroy(&sa);
581  sarrayDestroy(&saout);
582  return (char *)ERROR_PTR("token too large", __func__, NULL);
583  }
584  if (str[j] == '(') {
585  buf[index++] = ' ';
586  buf[index++] = '(';
587  buf[index++] = ' ';
588  } else if (str[j] == ')') {
589  buf[index++] = ' ';
590  buf[index++] = ')';
591  } else {
592  buf[index++] = str[j];
593  }
594  }
595  buf[index] = '\0';
596  sarrayAddString(saout, buf, L_COPY);
597  }
598 
599  /* Flatten to a prototype string with spaces added after
600  * each word, and remove the last space */
601  cleanstr = sarrayToString(saout, 2);
602  len = strlen(cleanstr);
603  cleanstr[len - 1] = '\0';
604 
605  sarrayDestroy(&sa);
606  sarrayDestroy(&saout);
607  return cleanstr;
608 }
609 
610 
611 /*
612  * \brief skipToEndOfFunction()
613  *
614  * \param[in] sa output from cpp, by line
615  * \param[in] start index of starting line with left bracket to search
616  * \param[in] lbindex starting char index for left bracket
617  * \param[out] pnext index of line following the ending '}' for function
618  * \return 0 if OK, 1 on error
619  */
620 static l_int32
621 skipToEndOfFunction(SARRAY *sa,
622  l_int32 start,
623  l_int32 lbindex,
624  l_int32 *pnext)
625 {
626 l_int32 end, rbindex;
627 l_int32 soffsetlb, boffsetlb, toffsetlb;
628 
629  if (!sa)
630  return ERROR_INT("sa not defined", __func__, 1);
631  if (!pnext)
632  return ERROR_INT("&next not defined", __func__, 1);
633 
634  getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
635  &toffsetlb);
636  skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
637  if (end == -1) { /* shouldn't happen! */
638  *pnext = -1;
639  return 1;
640  }
641 
642  *pnext = end + 1;
643  return 0;
644 }
645 
646 
647 /*
648  * \brief skipToMatchingBrace()
649  *
650  * \param[in] sa output from cpp, by line
651  * \param[in] start index of starting line with left bracket to search
652  * \param[in] lbindex starting char index for left bracket
653  * \param[out] pstop index of line with the matching right bracket
654  * \param[out] prbindex char index of matching right bracket
655  * \return 0 if OK, 1 on error
656  *
657  * <pre>
658  * Notes:
659  * (1) If the matching right brace is not found, returns
660  * stop = -1. This shouldn't happen.
661  * </pre>
662  */
663 static l_int32
664 skipToMatchingBrace(SARRAY *sa,
665  l_int32 start,
666  l_int32 lbindex,
667  l_int32 *pstop,
668  l_int32 *prbindex)
669 {
670 char *str;
671 l_int32 i, j, jstart, n, sumbrace, found, instring, nchars;
672 
673  if (!sa)
674  return ERROR_INT("sa not defined", __func__, 1);
675  if (!pstop)
676  return ERROR_INT("&stop not defined", __func__, 1);
677  if (!prbindex)
678  return ERROR_INT("&rbindex not defined", __func__, 1);
679 
680  instring = 0; /* init to FALSE; toggle on double quotes */
681  *pstop = -1;
682  n = sarrayGetCount(sa);
683  sumbrace = 1;
684  found = FALSE;
685  for (i = start; i < n; i++) {
686  str = sarrayGetString(sa, i, L_NOCOPY);
687  jstart = 0;
688  if (i == start)
689  jstart = lbindex + 1;
690  nchars = strlen(str);
691  for (j = jstart; j < nchars; j++) {
692  /* Toggle the instring state every time you encounter
693  * a double quote that is NOT escaped. */
694  if (j == jstart && str[j] == '\"')
695  instring = 1 - instring;
696  if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
697  instring = 1 - instring;
698  /* Record the braces if they are neither a literal character
699  * nor within a string. */
700  if (str[j] == '{' && str[j+1] != '\'' && !instring) {
701  sumbrace++;
702  } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
703  sumbrace--;
704  if (sumbrace == 0) {
705  found = TRUE;
706  *prbindex = j;
707  break;
708  }
709  }
710  }
711  if (found) {
712  *pstop = i;
713  return 0;
714  }
715  }
716 
717  return ERROR_INT("matching right brace not found", __func__, 1);
718 }
719 
720 
721 /*
722  * \brief skipToSemicolon()
723  *
724  * \param[in] sa output from cpp, by line
725  * \param[in] start index of starting line to search
726  * \param[in] charindex starting char index for search
727  * \param[out] pnext index of line containing the next ';'
728  * \return 0 if OK, 1 on error
729  *
730  * <pre>
731  * Notes:
732  * (1) If the semicolon isn't found, returns next = -1.
733  * This shouldn't happen.
734  * (2) This is only used in contexts where the semicolon is
735  * not within a string.
736  * </pre>
737  */
738 static l_int32
739 skipToSemicolon(SARRAY *sa,
740  l_int32 start,
741  l_int32 charindex,
742  l_int32 *pnext)
743 {
744 char *str;
745 l_int32 i, j, n, jstart, nchars, found;
746 
747  if (!sa)
748  return ERROR_INT("sa not defined", __func__, 1);
749  if (!pnext)
750  return ERROR_INT("&next not defined", __func__, 1);
751 
752  *pnext = -1;
753  n = sarrayGetCount(sa);
754  found = FALSE;
755  for (i = start; i < n; i++) {
756  str = sarrayGetString(sa, i, L_NOCOPY);
757  jstart = 0;
758  if (i == start)
759  jstart = charindex + 1;
760  nchars = strlen(str);
761  for (j = jstart; j < nchars; j++) {
762  if (str[j] == ';') {
763  found = TRUE;;
764  break;
765  }
766  }
767  if (found) {
768  *pnext = i;
769  return 0;
770  }
771  }
772 
773  return ERROR_INT("semicolon not found", __func__, 1);
774 }
775 
776 
777 /*
778  * \brief getOffsetForCharacter()
779  *
780  * \param[in] sa output from cpp, by line
781  * \param[in] start starting index in sa to search;
782  * never a comment line
783  * \param[in] tchar we are searching for the first instance of this
784  * \param[out] psoffset offset in strings from start index
785  * \param[out] pboffset offset in bytes within string in which
786  * the character is first found
787  * \param[out] ptoffset offset in total bytes from beginning of string
788  * indexed by 'start' to the location where
789  * the character is first found
790  * \return 0 if OK, 1 on error
791  *
792  * <pre>
793  * Notes:
794  * (1) We are searching for the first instance of 'tchar', starting
795  * at the beginning of the string indexed by start.
796  * (2) If the character is not found, soffset is returned as -1,
797  * and the other offsets are set to very large numbers. The
798  * caller must check the value of soffset.
799  * (3) This is only used in contexts where it is not necessary to
800  * consider if the character is inside a string.
801  * </pre>
802  */
803 static l_int32
804 getOffsetForCharacter(SARRAY *sa,
805  l_int32 start,
806  char tchar,
807  l_int32 *psoffset,
808  l_int32 *pboffset,
809  l_int32 *ptoffset)
810 {
811 char *str;
812 l_int32 i, j, n, nchars, totchars, found;
813 
814  if (!sa)
815  return ERROR_INT("sa not defined", __func__, 1);
816  if (!psoffset)
817  return ERROR_INT("&soffset not defined", __func__, 1);
818  if (!pboffset)
819  return ERROR_INT("&boffset not defined", __func__, 1);
820  if (!ptoffset)
821  return ERROR_INT("&toffset not defined", __func__, 1);
822 
823  *psoffset = -1; /* init to not found */
824  *pboffset = 100000000;
825  *ptoffset = 100000000;
826 
827  n = sarrayGetCount(sa);
828  found = FALSE;
829  totchars = 0;
830  for (i = start; i < n; i++) {
831  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
832  return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
833  nchars = strlen(str);
834  for (j = 0; j < nchars; j++) {
835  if (str[j] == tchar) {
836  found = TRUE;
837  break;
838  }
839  }
840  if (found)
841  break;
842  totchars += nchars;
843  }
844 
845  if (found) {
846  *psoffset = i - start;
847  *pboffset = j;
848  *ptoffset = totchars + j;
849  }
850 
851  return 0;
852 }
853 
854 
855 /*
856  * \brief getOffsetForMatchingRP()
857  *
858  * \param[in] sa output from cpp, by line
859  * \param[in] start starting index in sa to search;
860  * never a comment line
861  * \param[in] soffsetlp string offset to first LP
862  * \param[in] boffsetlp byte offset within string to first LP
863  * \param[in] toffsetlp total byte offset to first LP
864  * \param[out] psoffset offset in strings from start index
865  * \param[out] pboffset offset in bytes within string in which
866  * the matching RP is found
867  * \param[out] ptoffset offset in total bytes from beginning of string
868  * indexed by 'start' to the location where
869  * the matching RP is found
870  * \return 0 if OK, 1 on error
871  *
872  * <pre>
873  * Notes:
874  * (1) We are searching for the matching right parenthesis (RP) that
875  * corresponds to the first LP found beginning at the string
876  * indexed by start.
877  * (2) If the matching RP is not found, soffset is returned as -1,
878  * and the other offsets are set to very large numbers. The
879  * caller must check the value of soffset.
880  * (3) This is only used in contexts where it is not necessary to
881  * consider if the character is inside a string.
882  * (4) We must do this because although most arg lists have a single
883  * left and right parenthesis, it is possible to construct
884  * more complicated prototype declarations, such as those
885  * where functions are passed in. The C++ rules for prototypes
886  * are strict, and require that for functions passed in as args,
887  * the function name arg be placed in parenthesis, as well
888  * as its arg list, thus incurring two extra levels of parentheses.
889  * </pre>
890  */
891 static l_int32
892 getOffsetForMatchingRP(SARRAY *sa,
893  l_int32 start,
894  l_int32 soffsetlp,
895  l_int32 boffsetlp,
896  l_int32 toffsetlp,
897  l_int32 *psoffset,
898  l_int32 *pboffset,
899  l_int32 *ptoffset)
900 {
901 char *str;
902 l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
903 
904  if (!sa)
905  return ERROR_INT("sa not defined", __func__, 1);
906  if (!psoffset)
907  return ERROR_INT("&soffset not defined", __func__, 1);
908  if (!pboffset)
909  return ERROR_INT("&boffset not defined", __func__, 1);
910  if (!ptoffset)
911  return ERROR_INT("&toffset not defined", __func__, 1);
912 
913  *psoffset = -1; /* init to not found */
914  *pboffset = 100000000;
915  *ptoffset = 100000000;
916 
917  n = sarrayGetCount(sa);
918  found = FALSE;
919  totchars = toffsetlp;
920  leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */
921  firstline = start + soffsetlp;
922  for (i = firstline; i < n; i++) {
923  if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
924  return ERROR_INT("str not returned; shouldn't happen", __func__, 1);
925  nchars = strlen(str);
926  jstart = 0;
927  if (i == firstline)
928  jstart = boffsetlp + 1;
929  for (j = jstart; j < nchars; j++) {
930  if (str[j] == '(')
931  leftmatch++;
932  else if (str[j] == ')')
933  leftmatch--;
934  if (leftmatch == 0) {
935  found = TRUE;
936  break;
937  }
938  }
939  if (found)
940  break;
941  if (i == firstline)
942  totchars += nchars - boffsetlp;
943  else
944  totchars += nchars;
945  }
946 
947  if (found) {
948  *psoffset = i - start;
949  *pboffset = j;
950  *ptoffset = totchars + j;
951  }
952 
953  return 0;
954 }
#define L_BUF_SIZE
Definition: classapp.c:59
@ L_COPY
Definition: pix.h:505
@ L_NOCOPY
Definition: pix.h:503
@ L_INSERT
Definition: pix.h:504
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:169
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:673
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:617
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:353
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
Definition: sarray1.c:228
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
Definition: sarray1.c:276
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:435
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
Definition: sarray1.c:716
char * stringJoin(const char *src1, const char *src2)
stringJoin()
Definition: utils2.c:506
l_uint8 * l_binaryRead(const char *filename, size_t *pnbytes)
l_binaryRead()
Definition: utils2.c:1310