Libcroco
cr-input.c
Go to the documentation of this file.
1 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8-*- */
2 
3 /*
4  * This file is part of The Croco Library
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of version 2.1 of the GNU Lesser General Public
8  * License as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
18  * USA
19  *
20  * Author: Dodji Seketeli
21  * See COPYRIGHTS file for copyright information.
22  */
23 
24 #include "stdio.h"
25 #include <string.h>
26 #include "cr-input.h"
27 #include "cr-enc-handler.h"
28 
29 /**
30  *@CRInput:
31  *
32  *The definition of the #CRInput class.
33  */
34 
35 /*******************
36  *Private type defs
37  *******************/
38 
39 /**
40  *The private attributes of
41  *the #CRInputPriv class.
42  */
43 struct _CRInputPriv {
44  /*
45  *The input buffer
46  */
47  guchar *in_buf;
48  gulong in_buf_size;
49 
50  gulong nb_bytes;
51 
52  /*
53  *The index of the next byte
54  *to be read.
55  */
57 
58  /*
59  *The current line number
60  */
61  gulong line;
62 
63  /*
64  *The current col number
65  */
66  gulong col;
67 
68  gboolean end_of_line;
69  gboolean end_of_input;
70 
71  /*
72  *the reference count of this
73  *instance.
74  */
75  guint ref_count;
76  gboolean free_in_buf;
77 };
78 
79 #define PRIVATE(object) (object)->priv
80 
81 /***************************
82  *private constants
83  **************************/
84 #define CR_INPUT_MEM_CHUNK_SIZE 1024 * 4
85 
86 static CRInput *cr_input_new_real (void);
87 
88 static CRInput *
89 cr_input_new_real (void)
90 {
91  CRInput *result = NULL;
92 
93  result = g_try_malloc (sizeof (CRInput));
94  if (!result) {
95  cr_utils_trace_info ("Out of memory");
96  return NULL;
97  }
98  memset (result, 0, sizeof (CRInput));
99 
100  PRIVATE (result) = g_try_malloc (sizeof (CRInputPriv));
101  if (!PRIVATE (result)) {
102  cr_utils_trace_info ("Out of memory");
103  g_free (result);
104  return NULL;
105  }
106  memset (PRIVATE (result), 0, sizeof (CRInputPriv));
107  PRIVATE (result)->free_in_buf = TRUE;
108  return result;
109 }
110 
111 /****************
112  *Public methods
113  ***************/
114 
115 /**
116  * cr_input_new_from_buf:
117  *@a_buf: the memory buffer to create the input stream from.
118  *The #CRInput keeps this pointer so user should not free it !.
119  *@a_len: the size of the input buffer.
120  *@a_enc: the buffer's encoding.
121  *@a_free_buf: if set to TRUE, this a_buf will be freed
122  *at the destruction of this instance. If set to false, it is up
123  *to the caller to free it.
124  *
125  *Creates a new input stream from a memory buffer.
126  *Returns the newly built instance of #CRInput.
127  */
128 CRInput *
129 cr_input_new_from_buf (guchar * a_buf,
130  gulong a_len,
131  enum CREncoding a_enc,
132  gboolean a_free_buf)
133 {
134  CRInput *result = NULL;
135  enum CRStatus status = CR_OK;
136  CREncHandler *enc_handler = NULL;
137  gulong len = a_len;
138 
139  g_return_val_if_fail (a_buf, NULL);
140 
141  result = cr_input_new_real ();
142  g_return_val_if_fail (result, NULL);
143 
144  /*transform the encoding in utf8 */
145  if (a_enc != CR_UTF_8) {
146  enc_handler = cr_enc_handler_get_instance (a_enc);
147  if (!enc_handler) {
148  goto error;
149  }
150 
152  (enc_handler, a_buf, &len,
153  &PRIVATE (result)->in_buf,
154  &PRIVATE (result)->in_buf_size);
155  if (status != CR_OK)
156  goto error;
157  PRIVATE (result)->free_in_buf = TRUE;
158  if (a_free_buf == TRUE && a_buf) {
159  g_free (a_buf) ;
160  a_buf = NULL ;
161  }
162  PRIVATE (result)->nb_bytes = PRIVATE (result)->in_buf_size;
163  } else {
164  PRIVATE (result)->in_buf = (guchar *) a_buf;
165  PRIVATE (result)->in_buf_size = a_len;
166  PRIVATE (result)->nb_bytes = a_len;
167  PRIVATE (result)->free_in_buf = a_free_buf;
168  }
169  PRIVATE (result)->line = 1;
170  PRIVATE (result)->col = 0;
171  return result;
172 
173  error:
174  if (result) {
175  cr_input_destroy (result);
176  result = NULL;
177  }
178 
179  return NULL;
180 }
181 
182 /**
183  * cr_input_new_from_uri:
184  *@a_file_uri: the file to create *the input stream from.
185  *@a_enc: the encoding of the file *to create the input from.
186  *
187  *Creates a new input stream from
188  *a file.
189  *
190  *Returns the newly created input stream if
191  *this method could read the file and create it,
192  *NULL otherwise.
193  */
194 
195 CRInput *
196 cr_input_new_from_uri (const gchar * a_file_uri, enum CREncoding a_enc)
197 {
198  CRInput *result = NULL;
199  enum CRStatus status = CR_OK;
200  FILE *file_ptr = NULL;
201  guchar tmp_buf[CR_INPUT_MEM_CHUNK_SIZE] = { 0 };
202  gulong nb_read = 0,
203  len = 0,
204  buf_size = 0;
205  gboolean loop = TRUE;
206  guchar *buf = NULL;
207 
208  g_return_val_if_fail (a_file_uri, NULL);
209 
210  file_ptr = fopen (a_file_uri, "r");
211 
212  if (file_ptr == NULL) {
213 
214 #ifdef CR_DEBUG
215  cr_utils_trace_debug ("could not open file");
216 #endif
217  g_warning ("Could not open file %s\n", a_file_uri);
218 
219  return NULL;
220  }
221 
222  /*load the file */
223  while (loop) {
224  nb_read = fread (tmp_buf, 1 /*read bytes */ ,
225  CR_INPUT_MEM_CHUNK_SIZE /*nb of bytes */ ,
226  file_ptr);
227 
228  if (nb_read != CR_INPUT_MEM_CHUNK_SIZE) {
229  /*we read less chars than we wanted */
230  if (feof (file_ptr)) {
231  /*we reached eof */
232  loop = FALSE;
233  } else {
234  /*a pb occurred !! */
235  cr_utils_trace_debug ("an io error occurred");
236  status = CR_ERROR;
237  goto cleanup;
238  }
239  }
240 
241  if (status == CR_OK) {
242  /*read went well */
243  buf = g_realloc (buf, len + CR_INPUT_MEM_CHUNK_SIZE);
244  memcpy (buf + len, tmp_buf, nb_read);
245  len += nb_read;
246  buf_size += CR_INPUT_MEM_CHUNK_SIZE;
247  }
248  }
249 
250  if (status == CR_OK) {
251  result = cr_input_new_from_buf (buf, len, a_enc, TRUE);
252  if (!result) {
253  goto cleanup;
254  }
255  /*
256  *we should free buf here because it's own by CRInput.
257  *(see the last parameter of cr_input_new_from_buf().
258  */
259  buf = NULL;
260  }
261 
262  cleanup:
263  if (file_ptr) {
264  fclose (file_ptr);
265  file_ptr = NULL;
266  }
267 
268  if (buf) {
269  g_free (buf);
270  buf = NULL;
271  }
272 
273  return result;
274 }
275 
276 /**
277  * cr_input_destroy:
278  *@a_this: the current instance of #CRInput.
279  *
280  *The destructor of the #CRInput class.
281  */
282 void
284 {
285  if (a_this == NULL)
286  return;
287 
288  if (PRIVATE (a_this)) {
289  if (PRIVATE (a_this)->in_buf && PRIVATE (a_this)->free_in_buf) {
290  g_free (PRIVATE (a_this)->in_buf);
291  PRIVATE (a_this)->in_buf = NULL;
292  }
293 
294  g_free (PRIVATE (a_this));
295  PRIVATE (a_this) = NULL;
296  }
297 
298  g_free (a_this);
299 }
300 
301 /**
302  * cr_input_ref:
303  *@a_this: the current instance of #CRInput.
304  *
305  *Increments the reference count of the current
306  *instance of #CRInput.
307  */
308 void
310 {
311  g_return_if_fail (a_this && PRIVATE (a_this));
312 
313  PRIVATE (a_this)->ref_count++;
314 }
315 
316 /**
317  * cr_input_unref:
318  *@a_this: the current instance of #CRInput.
319  *
320  *Decrements the reference count of this instance
321  *of #CRInput. If the reference count goes down to
322  *zero, this instance is destroyed.
323  *
324  * Returns TRUE if the instance of #CRInput got destroyed, false otherwise.
325  */
326 gboolean
328 {
329  g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE);
330 
331  if (PRIVATE (a_this)->ref_count) {
332  PRIVATE (a_this)->ref_count--;
333  }
334 
335  if (PRIVATE (a_this)->ref_count == 0) {
336  cr_input_destroy (a_this);
337  return TRUE;
338  }
339  return FALSE;
340 }
341 
342 /**
343  * cr_input_end_of_input:
344  *@a_this: the current instance of #CRInput.
345  *@a_end_of_input: out parameter. Is set to TRUE if
346  *the current instance has reached the end of its input buffer,
347  *FALSE otherwise.
348  *
349  *Tests wether the current instance of
350  *#CRInput has reached its input buffer.
351  *
352  * Returns CR_OK upon successful completion, an error code otherwise.
353  * Note that all the out parameters of this method are valid if
354  * and only if this method returns CR_OK.
355  */
356 enum CRStatus
357 cr_input_end_of_input (CRInput const * a_this, gboolean * a_end_of_input)
358 {
359  g_return_val_if_fail (a_this && PRIVATE (a_this)
360  && a_end_of_input, CR_BAD_PARAM_ERROR);
361 
362  *a_end_of_input = (PRIVATE (a_this)->next_byte_index
363  >= PRIVATE (a_this)->in_buf_size) ? TRUE : FALSE;
364 
365  return CR_OK;
366 }
367 
368 /**
369  * cr_input_get_nb_bytes_left:
370  *@a_this: the current instance of #CRInput.
371  *
372  *Returns the number of bytes left in the input stream
373  *before the end, -1 in case of error.
374  */
375 glong
377 {
378  g_return_val_if_fail (a_this && PRIVATE (a_this), -1);
379  g_return_val_if_fail (PRIVATE (a_this)->nb_bytes
380  <= PRIVATE (a_this)->in_buf_size, -1);
381  g_return_val_if_fail (PRIVATE (a_this)->next_byte_index
382  <= PRIVATE (a_this)->nb_bytes, -1);
383 
384  if (PRIVATE (a_this)->end_of_input)
385  return 0;
386 
387  return PRIVATE (a_this)->nb_bytes - PRIVATE (a_this)->next_byte_index;
388 }
389 
390 /**
391  * cr_input_read_byte:
392  *@a_this: the current instance of #CRInput.
393  *@a_byte: out parameter the returned byte.
394  *
395  *Gets the next byte of the input.
396  *Updates the state of the input so that
397  *the next invocation of this method returns
398  *the next coming byte.
399  *
400  *Returns CR_OK upon successful completion, an error code
401  *otherwise. All the out parameters of this method are valid if
402  *and only if this method returns CR_OK.
403  */
404 enum CRStatus
405 cr_input_read_byte (CRInput * a_this, guchar * a_byte)
406 {
407  gulong nb_bytes_left = 0;
408 
409  g_return_val_if_fail (a_this && PRIVATE (a_this)
410  && a_byte, CR_BAD_PARAM_ERROR);
411 
412  g_return_val_if_fail (PRIVATE (a_this)->next_byte_index <=
413  PRIVATE (a_this)->nb_bytes, CR_BAD_PARAM_ERROR);
414 
415  if (PRIVATE (a_this)->end_of_input == TRUE)
416  return CR_END_OF_INPUT_ERROR;
417 
418  nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
419 
420  if (nb_bytes_left < 1) {
421  return CR_END_OF_INPUT_ERROR;
422  }
423 
424  *a_byte = PRIVATE (a_this)->in_buf[PRIVATE (a_this)->next_byte_index];
425 
426  if (PRIVATE (a_this)->nb_bytes -
427  PRIVATE (a_this)->next_byte_index < 2) {
428  PRIVATE (a_this)->end_of_input = TRUE;
429  } else {
430  PRIVATE (a_this)->next_byte_index++;
431  }
432 
433  return CR_OK;
434 }
435 
436 /**
437  * cr_input_read_char:
438  *@a_this: the current instance of CRInput.
439  *@a_char: out parameter. The read character.
440  *
441  *Reads an unicode character from the current instance of
442  *#CRInput.
443  *
444  *Returns CR_OK upon successful completion, an error code
445  *otherwise.
446  */
447 enum CRStatus
448 cr_input_read_char (CRInput * a_this, guint32 * a_char)
449 {
450  enum CRStatus status = CR_OK;
451  gulong consumed = 0,
452  nb_bytes_left = 0;
453 
454  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char,
456 
457  if (PRIVATE (a_this)->end_of_input == TRUE)
458  return CR_END_OF_INPUT_ERROR;
459 
460  nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
461 
462  if (nb_bytes_left < 1) {
463  return CR_END_OF_INPUT_ERROR;
464  }
465 
467  (PRIVATE (a_this)->in_buf
468  +
469  PRIVATE (a_this)->next_byte_index,
470  nb_bytes_left, a_char, &consumed);
471 
472  if (status == CR_OK) {
473  /*update next byte index */
474  PRIVATE (a_this)->next_byte_index += consumed;
475 
476  /*update line and column number */
477  if (PRIVATE (a_this)->end_of_line == TRUE) {
478  PRIVATE (a_this)->col = 1;
479  PRIVATE (a_this)->line++;
480  PRIVATE (a_this)->end_of_line = FALSE;
481  } else if (*a_char != '\n') {
482  PRIVATE (a_this)->col++;
483  }
484 
485  if (*a_char == '\n') {
486  PRIVATE (a_this)->end_of_line = TRUE;
487  }
488  }
489 
490  return status;
491 }
492 
493 /**
494  * cr_input_set_line_num:
495  *@a_this: the "this pointer" of the current instance of #CRInput.
496  *@a_line_num: the new line number.
497  *
498  *Setter of the current line number.
499  *
500  *Return CR_OK upon successful completion, an error code otherwise.
501  */
502 enum CRStatus
503 cr_input_set_line_num (CRInput * a_this, glong a_line_num)
504 {
505  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
506 
507  PRIVATE (a_this)->line = a_line_num;
508 
509  return CR_OK;
510 }
511 
512 /**
513  * cr_input_get_line_num:
514  *@a_this: the "this pointer" of the current instance of #CRInput.
515  *@a_line_num: the returned line number.
516  *
517  *Getter of the current line number.
518  *
519  *Returns CR_OK upon successful completion, an error code otherwise.
520  */
521 enum CRStatus
522 cr_input_get_line_num (CRInput const * a_this, glong * a_line_num)
523 {
524  g_return_val_if_fail (a_this && PRIVATE (a_this)
525  && a_line_num, CR_BAD_PARAM_ERROR);
526 
527  *a_line_num = PRIVATE (a_this)->line;
528 
529  return CR_OK;
530 }
531 
532 /**
533  * cr_input_set_column_num:
534  *@a_this: the "this pointer" of the current instance of #CRInput.
535  *@a_col: the new column number.
536  *
537  *Setter of the current column number.
538  *
539  *Returns CR_OK upon successful completion, an error code otherwise.
540  */
541 enum CRStatus
542 cr_input_set_column_num (CRInput * a_this, glong a_col)
543 {
544  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
545 
546  PRIVATE (a_this)->col = a_col;
547 
548  return CR_OK;
549 }
550 
551 /**
552  * cr_input_get_column_num:
553  *@a_this: the "this pointer" of the current instance of #CRInput.
554  *@a_col: out parameter
555  *
556  *Getter of the current column number.
557  *
558  *Returns CR_OK upon successful completion, an error code otherwise.
559  */
560 enum CRStatus
561 cr_input_get_column_num (CRInput const * a_this, glong * a_col)
562 {
563  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_col,
565 
566  *a_col = PRIVATE (a_this)->col;
567 
568  return CR_OK;
569 }
570 
571 /**
572  * cr_input_increment_line_num:
573  *@a_this: the "this pointer" of the current instance of #CRInput.
574  *@a_increment: the increment to add to the line number.
575  *
576  *Increments the current line number.
577  *
578  *Returns CR_OK upon successful completion, an error code otherwise.
579  */
580 enum CRStatus
581 cr_input_increment_line_num (CRInput * a_this, glong a_increment)
582 {
583  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
584 
585  PRIVATE (a_this)->line += a_increment;
586 
587  return CR_OK;
588 }
589 
590 /**
591  * cr_input_increment_col_num:
592  *@a_this: the "this pointer" of the current instance of #CRInput.
593  *@a_increment: the increment to add to the column number.
594  *
595  *Increments the current column number.
596  *
597  *Returns CR_OK upon successful completion, an error code otherwise.
598  */
599 enum CRStatus
600 cr_input_increment_col_num (CRInput * a_this, glong a_increment)
601 {
602  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
603 
604  PRIVATE (a_this)->col += a_increment;
605 
606  return CR_OK;
607 }
608 
609 /**
610  * cr_input_consume_char:
611  *@a_this: the this pointer.
612  *@a_char: the character to consume. If set to zero,
613  *consumes any character.
614  *
615  *Consumes the next character of the input stream if
616  *and only if that character equals a_char.
617  *
618  *Returns CR_OK upon successful completion, CR_PARSING_ERROR if
619  *next char is different from a_char, an other error code otherwise
620  */
621 enum CRStatus
622 cr_input_consume_char (CRInput * a_this, guint32 a_char)
623 {
624  guint32 c;
625  enum CRStatus status;
626 
627  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
628 
629  if ((status = cr_input_peek_char (a_this, &c)) != CR_OK) {
630  return status;
631  }
632 
633  if (c == a_char || a_char == 0) {
634  status = cr_input_read_char (a_this, &c);
635  } else {
636  return CR_PARSING_ERROR;
637  }
638 
639  return status;
640 }
641 
642 /**
643  * cr_input_consume_chars:
644  *@a_this: the this pointer of the current instance of #CRInput.
645  *@a_char: the character to consume.
646  *@a_nb_char: in/out parameter. The number of characters to consume.
647  *If set to a negative value, the function will consume all the occurences
648  *of a_char found.
649  *After return, if the return value equals CR_OK, this variable contains
650  *the number of characters actually consumed.
651  *
652  *Consumes up to a_nb_char occurences of the next contiguous characters
653  *which equal a_char. Note that the next character of the input stream
654  **MUST* equal a_char to trigger the consumption, or else, the error
655  *code CR_PARSING_ERROR is returned.
656  *If the number of contiguous characters that equals a_char is less than
657  *a_nb_char, then this function consumes all the characters it can consume.
658  *
659  *Returns CR_OK if at least one character has been consumed, an error code
660  *otherwise.
661  */
662 enum CRStatus
663 cr_input_consume_chars (CRInput * a_this, guint32 a_char, gulong * a_nb_char)
664 {
665  enum CRStatus status = CR_OK;
666  gulong nb_consumed = 0;
667 
668  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_char,
670 
671  g_return_val_if_fail (a_char != 0 || a_nb_char != NULL,
673 
674  for (nb_consumed = 0; ((status == CR_OK)
675  && (*a_nb_char > 0
676  && nb_consumed < *a_nb_char));
677  nb_consumed++) {
678  status = cr_input_consume_char (a_this, a_char);
679  }
680 
681  *a_nb_char = nb_consumed;
682 
683  if ((nb_consumed > 0)
684  && ((status == CR_PARSING_ERROR)
685  || (status == CR_END_OF_INPUT_ERROR))) {
686  status = CR_OK;
687  }
688 
689  return status;
690 }
691 
692 /**
693  * cr_input_consume_white_spaces:
694  *@a_this: the "this pointer" of the current instance of #CRInput.
695  *@a_nb_chars: in/out parameter. The number of white spaces to
696  *consume. After return, holds the number of white spaces actually consumed.
697  *
698  *Same as cr_input_consume_chars() but this one consumes white
699  *spaces.
700  *
701  *Returns CR_OK upon successful completion, an error code otherwise.
702  */
703 enum CRStatus
704 cr_input_consume_white_spaces (CRInput * a_this, gulong * a_nb_chars)
705 {
706  enum CRStatus status = CR_OK;
707  guint32 cur_char = 0,
708  nb_consumed = 0;
709 
710  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_chars,
712 
713  for (nb_consumed = 0;
714  ((*a_nb_chars > 0) && (nb_consumed < *a_nb_chars));
715  nb_consumed++) {
716  status = cr_input_peek_char (a_this, &cur_char);
717  if (status != CR_OK)
718  break;
719 
720  /*if the next char is a white space, consume it ! */
721  if (cr_utils_is_white_space (cur_char) == TRUE) {
722  status = cr_input_read_char (a_this, &cur_char);
723  if (status != CR_OK)
724  break;
725  continue;
726  }
727 
728  break;
729 
730  }
731 
732  *a_nb_chars = (gulong) nb_consumed;
733 
734  if (nb_consumed && status == CR_END_OF_INPUT_ERROR) {
735  status = CR_OK;
736  }
737 
738  return status;
739 }
740 
741 /**
742  * cr_input_peek_char:
743  *@a_this: the current instance of #CRInput.
744  *@a_char: out parameter. The returned character.
745  *
746  *Same as cr_input_read_char() but does not update the
747  *internal state of the input stream. The next call
748  *to cr_input_peek_char() or cr_input_read_char() will thus
749  *return the same character as the current one.
750  *
751  *Returns CR_OK upon successful completion, an error code
752  *otherwise.
753  */
754 enum CRStatus
755 cr_input_peek_char (CRInput const * a_this, guint32 * a_char)
756 {
757  enum CRStatus status = CR_OK;
758  gulong consumed = 0,
759  nb_bytes_left = 0;
760 
761  g_return_val_if_fail (a_this && PRIVATE (a_this)
762  && a_char, CR_BAD_PARAM_ERROR);
763 
764  if (PRIVATE (a_this)->next_byte_index >=
765  PRIVATE (a_this)->in_buf_size) {
766  return CR_END_OF_INPUT_ERROR;
767  }
768 
769  nb_bytes_left = cr_input_get_nb_bytes_left (a_this);
770 
771  if (nb_bytes_left < 1) {
772  return CR_END_OF_INPUT_ERROR;
773  }
774 
776  (PRIVATE (a_this)->in_buf +
777  PRIVATE (a_this)->next_byte_index,
778  nb_bytes_left, a_char, &consumed);
779 
780  return status;
781 }
782 
783 /**
784  * cr_input_peek_byte:
785  *@a_this: the current instance of #CRInput.
786  *@a_origin: the origin to consider in the calculation
787  *of the position of the byte to peek.
788  *@a_offset: the offset of the byte to peek, starting from
789  *the origin specified by a_origin.
790  *@a_byte: out parameter the peeked byte.
791  *
792  *Gets a byte from the input stream,
793  *starting from the current position in the input stream.
794  *Unlike cr_input_peek_next_byte() this method
795  *does not update the state of the current input stream.
796  *Subsequent calls to cr_input_peek_byte with the same arguments
797  *will return the same byte.
798  *
799  *Returns CR_OK upon successful completion or,
800  *CR_BAD_PARAM_ERROR if at least one of the parameters is invalid;
801  *CR_OUT_OF_BOUNDS_ERROR if the indexed byte is out of bounds.
802  */
803 enum CRStatus
804 cr_input_peek_byte (CRInput const * a_this, enum CRSeekPos a_origin,
805  gulong a_offset, guchar * a_byte)
806 {
807  gulong abs_offset = 0;
808 
809  g_return_val_if_fail (a_this && PRIVATE (a_this)
810  && a_byte, CR_BAD_PARAM_ERROR);
811 
812  switch (a_origin) {
813 
814  case CR_SEEK_CUR:
815  abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_offset;
816  break;
817 
818  case CR_SEEK_BEGIN:
819  abs_offset = a_offset;
820  break;
821 
822  case CR_SEEK_END:
823  abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_offset;
824  break;
825 
826  default:
827  return CR_BAD_PARAM_ERROR;
828  }
829 
830  if (abs_offset < PRIVATE (a_this)->in_buf_size) {
831 
832  *a_byte = PRIVATE (a_this)->in_buf[abs_offset];
833 
834  return CR_OK;
835 
836  } else {
837  return CR_END_OF_INPUT_ERROR;
838  }
839 }
840 
841 /**
842  * cr_input_peek_byte2:
843  *@a_this: the current byte input stream.
844  *@a_offset: the offset of the byte to peek, starting
845  *from the current input position pointer.
846  *@a_eof: out parameter. Is set to true is we reach end of
847  *stream. If set to NULL by the caller, this parameter is not taken
848  *in account.
849  *
850  *Same as cr_input_peek_byte() but with a simplified
851  *interface.
852  *
853  *Returns the read byte or 0 if something bad happened.
854  */
855 guchar
856 cr_input_peek_byte2 (CRInput const * a_this, gulong a_offset, gboolean * a_eof)
857 {
858  guchar result = 0;
859  enum CRStatus status = CR_ERROR;
860 
861  g_return_val_if_fail (a_this && PRIVATE (a_this), 0);
862 
863  if (a_eof)
864  *a_eof = FALSE;
865 
866  status = cr_input_peek_byte (a_this, CR_SEEK_CUR, a_offset, &result);
867 
868  if ((status == CR_END_OF_INPUT_ERROR)
869  && a_eof)
870  *a_eof = TRUE;
871 
872  return result;
873 }
874 
875 /**
876  * cr_input_get_byte_addr:
877  *@a_this: the current instance of #CRInput.
878  *@a_offset: the offset of the byte in the input stream starting
879  *from the beginning of the stream.
880  *
881  *Gets the memory address of the byte located at a given offset
882  *in the input stream.
883  *
884  *Returns the address, otherwise NULL if an error occurred.
885  */
886 guchar *
887 cr_input_get_byte_addr (CRInput * a_this, gulong a_offset)
888 {
889  g_return_val_if_fail (a_this && PRIVATE (a_this), NULL);
890 
891  if (a_offset >= PRIVATE (a_this)->nb_bytes) {
892  return NULL;
893  }
894 
895  return &PRIVATE (a_this)->in_buf[a_offset];
896 }
897 
898 /**
899  * cr_input_get_cur_byte_addr:
900  *@a_this: the current input stream
901  *@a_offset: out parameter. The returned address.
902  *
903  *Gets the address of the current character pointer.
904  *
905  *Returns CR_OK upon successful completion, an error code otherwise.
906  */
907 enum CRStatus
908 cr_input_get_cur_byte_addr (CRInput * a_this, guchar ** a_offset)
909 {
910  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_offset,
912 
913  if (!PRIVATE (a_this)->next_byte_index) {
915  }
916 
917  *a_offset = cr_input_get_byte_addr
918  (a_this, PRIVATE (a_this)->next_byte_index - 1);
919 
920  return CR_OK;
921 }
922 
923 /**
924  * cr_input_seek_index:
925  *@a_this: the current instance of #CRInput.
926  *@a_origin: the origin to consider during the calculation
927  *of the absolute position of the new "current byte index".
928  *@a_pos: the relative offset of the new "current byte index."
929  *This offset is relative to the origin a_origin.
930  *
931  *Sets the "current byte index" of the current instance
932  *of #CRInput. Next call to cr_input_get_byte() will return
933  *the byte next after the new "current byte index".
934  *
935  *Returns CR_OK upon successful completion otherwise returns
936  *CR_BAD_PARAM_ERROR if at least one of the parameters is not valid
937  *or CR_OUT_BOUNDS_ERROR in case of error.
938  */
939 enum CRStatus
940 cr_input_seek_index (CRInput * a_this, enum CRSeekPos a_origin, gint a_pos)
941 {
942 
943  glong abs_offset = 0;
944 
945  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
946 
947  switch (a_origin) {
948 
949  case CR_SEEK_CUR:
950  abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_pos;
951  break;
952 
953  case CR_SEEK_BEGIN:
954  abs_offset = a_pos;
955  break;
956 
957  case CR_SEEK_END:
958  abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_pos;
959  break;
960 
961  default:
962  return CR_BAD_PARAM_ERROR;
963  }
964 
965  if ((abs_offset > 0)
966  && (gulong) abs_offset < PRIVATE (a_this)->nb_bytes) {
967 
968  /*update the input stream's internal state */
969  PRIVATE (a_this)->next_byte_index = abs_offset + 1;
970 
971  return CR_OK;
972  }
973 
974  return CR_OUT_OF_BOUNDS_ERROR;
975 }
976 
977 /**
978  * cr_input_get_cur_pos:
979  *@a_this: the current instance of #CRInput.
980  *@a_pos: out parameter. The returned position.
981  *
982  *Gets the position of the "current byte index" which
983  *is basically the position of the last returned byte in the
984  *input stream.
985  *
986  *Returns CR_OK upon successful completion. Otherwise,
987  *CR_BAD_PARAMETER_ERROR if at least one of the arguments is invalid.
988  *CR_START_OF_INPUT if no call to either cr_input_read_byte()
989  *or cr_input_seek_index() have been issued before calling
990  *cr_input_get_cur_pos()
991  *Note that the out parameters of this function are valid if and only if this
992  *function returns CR_OK.
993  */
994 enum CRStatus
995 cr_input_get_cur_pos (CRInput const * a_this, CRInputPos * a_pos)
996 {
997  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
999 
1000  a_pos->next_byte_index = PRIVATE (a_this)->next_byte_index;
1001  a_pos->line = PRIVATE (a_this)->line;
1002  a_pos->col = PRIVATE (a_this)->col;
1003  a_pos->end_of_line = PRIVATE (a_this)->end_of_line;
1004  a_pos->end_of_file = PRIVATE (a_this)->end_of_input;
1005 
1006  return CR_OK;
1007 }
1008 
1009 /**
1010  * cr_input_get_parsing_location:
1011  *@a_this: the current instance of #CRInput
1012  *@a_loc: the set parsing location.
1013  *
1014  *Gets the current parsing location.
1015  *The Parsing location is a public datastructure that
1016  *represents the current line/column/byte offset/ in the input
1017  *stream.
1018  *
1019  *Returns CR_OK upon successful completion, an error
1020  *code otherwise.
1021  */
1022 enum CRStatus
1024  CRParsingLocation *a_loc)
1025 {
1026  g_return_val_if_fail (a_this
1027  && PRIVATE (a_this)
1028  && a_loc,
1030 
1031  a_loc->line = PRIVATE (a_this)->line ;
1032  a_loc->column = PRIVATE (a_this)->col ;
1033  if (PRIVATE (a_this)->next_byte_index) {
1034  a_loc->byte_offset = PRIVATE (a_this)->next_byte_index - 1 ;
1035  } else {
1036  a_loc->byte_offset = PRIVATE (a_this)->next_byte_index ;
1037  }
1038  return CR_OK ;
1039 }
1040 
1041 /**
1042  * cr_input_get_cur_index:
1043  *@a_this: the "this pointer" of the current instance of
1044  *#CRInput
1045  *@a_index: out parameter. The returned index.
1046  *
1047  *Getter of the next byte index.
1048  *It actually returns the index of the
1049  *next byte to be read.
1050  *
1051  *Returns CR_OK upon successful completion, an error code
1052  *otherwise.
1053  */
1054 enum CRStatus
1055 cr_input_get_cur_index (CRInput const * a_this, glong * a_index)
1056 {
1057  g_return_val_if_fail (a_this && PRIVATE (a_this)
1058  && a_index, CR_BAD_PARAM_ERROR);
1059 
1060  *a_index = PRIVATE (a_this)->next_byte_index;
1061 
1062  return CR_OK;
1063 }
1064 
1065 /**
1066  * cr_input_set_cur_index:
1067  *@a_this: the "this pointer" of the current instance
1068  *of #CRInput .
1069  *@a_index: the new index to set.
1070  *
1071  *Setter of the next byte index.
1072  *It sets the index of the next byte to be read.
1073  *
1074  *Returns CR_OK upon successful completion, an error code otherwise.
1075  */
1076 enum CRStatus
1077 cr_input_set_cur_index (CRInput * a_this, glong a_index)
1078 {
1079  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1080 
1081  PRIVATE (a_this)->next_byte_index = a_index;
1082 
1083  return CR_OK;
1084 }
1085 
1086 /**
1087  * cr_input_set_end_of_file:
1088  *@a_this: the current instance of #CRInput.
1089  *@a_eof: the new end of file flag.
1090  *
1091  *Sets the end of file flag.
1092  *
1093  *Returns CR_OK upon successful completion, an error code otherwise.
1094  */
1095 enum CRStatus
1096 cr_input_set_end_of_file (CRInput * a_this, gboolean a_eof)
1097 {
1098  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1099 
1100  PRIVATE (a_this)->end_of_input = a_eof;
1101 
1102  return CR_OK;
1103 }
1104 
1105 /**
1106  * cr_input_get_end_of_file:
1107  *@a_this: the current instance of #CRInput.
1108  *@a_eof: out parameter the place to put the end of
1109  *file flag.
1110  *
1111  *Gets the end of file flag.
1112  *
1113  *Returns CR_OK upon successful completion, an error code otherwise.
1114  */
1115 enum CRStatus
1116 cr_input_get_end_of_file (CRInput const * a_this, gboolean * a_eof)
1117 {
1118  g_return_val_if_fail (a_this && PRIVATE (a_this)
1119  && a_eof, CR_BAD_PARAM_ERROR);
1120 
1121  *a_eof = PRIVATE (a_this)->end_of_input;
1122 
1123  return CR_OK;
1124 }
1125 
1126 /**
1127  * cr_input_set_end_of_line:
1128  *@a_this: the current instance of #CRInput.
1129  *@a_eol: the new end of line flag.
1130  *
1131  *Sets the end of line flag.
1132  *
1133  *Returns CR_OK upon successful completion, an error code
1134  *otherwise.
1135  */
1136 enum CRStatus
1137 cr_input_set_end_of_line (CRInput * a_this, gboolean a_eol)
1138 {
1139  g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR);
1140 
1141  PRIVATE (a_this)->end_of_line = a_eol;
1142 
1143  return CR_OK;
1144 }
1145 
1146 /**
1147  * cr_input_get_end_of_line:
1148  *@a_this: the current instance of #CRInput
1149  *@a_eol: out parameter. The place to put
1150  *the returned flag
1151  *
1152  *Gets the end of line flag of the current input.
1153  *
1154  *Returns CR_OK upon successful completion, an error code
1155  *otherwise.
1156  */
1157 enum CRStatus
1158 cr_input_get_end_of_line (CRInput const * a_this, gboolean * a_eol)
1159 {
1160  g_return_val_if_fail (a_this && PRIVATE (a_this)
1161  && a_eol, CR_BAD_PARAM_ERROR);
1162 
1163  *a_eol = PRIVATE (a_this)->end_of_line;
1164 
1165  return CR_OK;
1166 }
1167 
1168 /**
1169  * cr_input_set_cur_pos:
1170  *@a_this: the "this pointer" of the current instance of
1171  *#CRInput.
1172  *@a_pos: the new position.
1173  *
1174  *Sets the current position in the input stream.
1175  *
1176  * Returns CR_OK upon successful completion, an error code otherwise.
1177  */
1178 enum CRStatus
1179 cr_input_set_cur_pos (CRInput * a_this, CRInputPos const * a_pos)
1180 {
1181  g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos,
1183 
1184  cr_input_set_column_num (a_this, a_pos->col);
1185  cr_input_set_line_num (a_this, a_pos->line);
1186  cr_input_set_cur_index (a_this, a_pos->next_byte_index);
1187  cr_input_set_end_of_line (a_this, a_pos->end_of_line);
1188  cr_input_set_end_of_file (a_this, a_pos->end_of_file);
1189 
1190  return CR_OK;
1191 }
typedefG_BEGIN_DECLS struct _CREncHandler CREncHandler
guchar * cr_input_get_byte_addr(CRInput *a_this, gulong a_offset)
cr_input_get_byte_addr: @a_this: the current instance of CRInput.
Definition: cr-input.c:887
enum CRStatus cr_input_get_parsing_location(CRInput const *a_this, CRParsingLocation *a_loc)
cr_input_get_parsing_location: @a_this: the current instance of CRInput @a_loc: the set parsing locat...
Definition: cr-input.c:1023
enum CRStatus cr_input_read_char(CRInput *a_this, guint32 *a_char)
cr_input_read_char: @a_this: the current instance of CRInput.
Definition: cr-input.c:448
gboolean end_of_line
Definition: cr-input.h:59
CREncHandler * cr_enc_handler_get_instance(enum CREncoding a_enc)
cr_enc_handler_get_instance: @a_enc: the encoding of the Handler.
@CRInput:
Definition: cr-input.c:43
guint ref_count
Definition: cr-input.c:75
glong next_byte_index
Definition: cr-input.h:60
gulong in_buf_size
Definition: cr-input.c:48
gboolean end_of_file
Definition: cr-input.h:58
enum CRStatus cr_input_get_end_of_line(CRInput const *a_this, gboolean *a_eol)
cr_input_get_end_of_line: @a_this: the current instance of CRInput @a_eol: out parameter.
Definition: cr-input.c:1158
enum CRStatus cr_input_get_line_num(CRInput const *a_this, glong *a_line_num)
cr_input_get_line_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:522
enum CRStatus cr_input_set_cur_index(CRInput *a_this, glong a_index)
cr_input_set_cur_index: @a_this: the "this pointer" of the current instance of CRInput .
Definition: cr-input.c:1077
gulong col
Definition: cr-input.c:66
enum CRStatus cr_input_consume_char(CRInput *a_this, guint32 a_char)
cr_input_consume_char: @a_this: the this pointer.
Definition: cr-input.c:622
CREncoding
Encoding values.
Definition: cr-utils.h:84
enum CRStatus cr_input_peek_byte(CRInput const *a_this, enum CRSeekPos a_origin, gulong a_offset, guchar *a_byte)
cr_input_peek_byte: @a_this: the current instance of CRInput.
Definition: cr-input.c:804
enum CRStatus cr_input_get_cur_byte_addr(CRInput *a_this, guchar **a_offset)
cr_input_get_cur_byte_addr: @a_this: the current input stream @a_offset: out parameter.
Definition: cr-input.c:908
CRStatus
The status type returned by the methods of the croco library.
Definition: cr-utils.h:43
gboolean end_of_input
Definition: cr-input.c:69
The CRInput class provides the abstraction of an utf8-encoded character stream.
Definition: cr-input.h:47
enum CRStatus cr_input_consume_chars(CRInput *a_this, guint32 a_char, gulong *a_nb_char)
cr_input_consume_chars: @a_this: the this pointer of the current instance of CRInput.
Definition: cr-input.c:663
enum CRStatus cr_input_set_end_of_file(CRInput *a_this, gboolean a_eof)
cr_input_set_end_of_file: @a_this: the current instance of CRInput.
Definition: cr-input.c:1096
enum CRStatus cr_input_set_line_num(CRInput *a_this, glong a_line_num)
cr_input_set_line_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:503
enum CRStatus cr_input_increment_col_num(CRInput *a_this, glong a_increment)
cr_input_increment_col_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:600
Definition: cr-utils.h:44
CRSeekPos
Values used by cr_input_seek_position() ;.
Definition: cr-utils.h:75
enum CRStatus cr_input_get_column_num(CRInput const *a_this, glong *a_col)
cr_input_get_column_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:561
gulong next_byte_index
Definition: cr-input.c:56
gboolean end_of_line
Definition: cr-input.c:68
enum CRStatus cr_utils_read_char_from_utf8_buf(const guchar *a_in, gulong a_in_len, guint32 *a_out, gulong *a_consumed)
Reads a character from an utf8 buffer.
Definition: cr-utils.c:428
#define PRIVATE(object)
Definition: cr-input.c:79
enum CRStatus cr_input_read_byte(CRInput *a_this, guchar *a_byte)
cr_input_read_byte: @a_this: the current instance of CRInput.
Definition: cr-input.c:405
void cr_input_destroy(CRInput *a_this)
cr_input_destroy: @a_this: the current instance of CRInput.
Definition: cr-input.c:283
gulong line
Definition: cr-input.c:61
glong line
Definition: cr-input.h:56
enum CRStatus cr_input_get_end_of_file(CRInput const *a_this, gboolean *a_eof)
cr_input_get_end_of_file: @a_this: the current instance of CRInput.
Definition: cr-input.c:1116
The declaration of the CREncHandler class.
guchar * in_buf
Definition: cr-input.c:47
CRInput * cr_input_new_from_uri(const gchar *a_file_uri, enum CREncoding a_enc)
cr_input_new_from_uri: @a_file_uri: the file to create *the input stream from.
Definition: cr-input.c:196
gulong nb_bytes
Definition: cr-input.c:50
enum CRStatus cr_input_seek_index(CRInput *a_this, enum CRSeekPos a_origin, gint a_pos)
cr_input_seek_index: @a_this: the current instance of CRInput.
Definition: cr-input.c:940
enum CRStatus cr_input_consume_white_spaces(CRInput *a_this, gulong *a_nb_chars)
cr_input_consume_white_spaces: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:704
enum CRStatus cr_enc_handler_convert_input(CREncHandler *a_this, const guchar *a_in, gulong *a_in_len, guchar **a_out, gulong *a_out_len)
cr_enc_handler_convert_input: @a_this: the current instance of CREncHandler.
enum CRStatus cr_input_end_of_input(CRInput const *a_this, gboolean *a_end_of_input)
cr_input_end_of_input: @a_this: the current instance of CRInput.
Definition: cr-input.c:357
enum CRStatus cr_input_set_cur_pos(CRInput *a_this, CRInputPos const *a_pos)
cr_input_set_cur_pos: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:1179
CRInput * cr_input_new_from_buf(guchar *a_buf, gulong a_len, enum CREncoding a_enc, gboolean a_free_buf)
cr_input_new_from_buf: @a_buf: the memory buffer to create the input stream from.
Definition: cr-input.c:129
enum CRStatus cr_input_set_end_of_line(CRInput *a_this, gboolean a_eol)
cr_input_set_end_of_line: @a_this: the current instance of CRInput.
Definition: cr-input.c:1137
#define CR_INPUT_MEM_CHUNK_SIZE
Definition: cr-input.c:84
enum CRStatus cr_input_increment_line_num(CRInput *a_this, glong a_increment)
cr_input_increment_line_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:581
enum CRStatus cr_input_peek_char(CRInput const *a_this, guint32 *a_char)
cr_input_peek_char: @a_this: the current instance of CRInput.
Definition: cr-input.c:755
The libcroco basic input stream class declaration file.
#define cr_utils_trace_debug(a_msg)
Trace a debug message.
Definition: cr-utils.h:137
enum CRStatus cr_input_get_cur_pos(CRInput const *a_this, CRInputPos *a_pos)
cr_input_get_cur_pos: @a_this: the current instance of CRInput.
Definition: cr-input.c:995
gboolean free_in_buf
Definition: cr-input.c:76
gboolean cr_utils_is_white_space(guint32 a_char)
Returns TRUE if a_char is a white space as defined in the css spec in chap 4.1.1.
Definition: cr-utils.c:1181
gboolean cr_input_unref(CRInput *a_this)
cr_input_unref: @a_this: the current instance of CRInput.
Definition: cr-input.c:327
guchar cr_input_peek_byte2(CRInput const *a_this, gulong a_offset, gboolean *a_eof)
cr_input_peek_byte2: @a_this: the current byte input stream.
Definition: cr-input.c:856
glong col
Definition: cr-input.h:57
glong cr_input_get_nb_bytes_left(CRInput const *a_this)
cr_input_get_nb_bytes_left: @a_this: the current instance of CRInput.
Definition: cr-input.c:376
enum CRStatus cr_input_get_cur_index(CRInput const *a_this, glong *a_index)
cr_input_get_cur_index: @a_this: the "this pointer" of the current instance of CRInput @a_index: out ...
Definition: cr-input.c:1055
enum CRStatus cr_input_set_column_num(CRInput *a_this, glong a_col)
cr_input_set_column_num: @a_this: the "this pointer" of the current instance of CRInput.
Definition: cr-input.c:542
#define cr_utils_trace_info(a_msg)
Traces an info message.
Definition: cr-utils.h:127
void cr_input_ref(CRInput *a_this)
cr_input_ref: @a_this: the current instance of CRInput.
Definition: cr-input.c:309