Libcroco
|
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8-*- */ 00002 00003 /* 00004 * This file is part of The Croco Library 00005 * 00006 * This program is free software; you can redistribute it and/or 00007 * modify it under the terms of version 2.1 of the GNU Lesser General Public 00008 * License as published by the Free Software Foundation. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public License 00016 * along with this program; if not, write to the Free Software 00017 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00018 * USA 00019 * 00020 * Author: Dodji Seketeli 00021 * See COPYRIGHTS file for copyright information. 00022 */ 00023 00024 #include "stdio.h" 00025 #include <string.h> 00026 #include "cr-input.h" 00027 #include "cr-enc-handler.h" 00028 00029 /** 00030 *@CRInput: 00031 * 00032 *The definition of the #CRInput class. 00033 */ 00034 00035 /******************* 00036 *Private type defs 00037 *******************/ 00038 00039 /** 00040 *The private attributes of 00041 *the #CRInputPriv class. 00042 */ 00043 struct _CRInputPriv { 00044 /* 00045 *The input buffer 00046 */ 00047 guchar *in_buf; 00048 gulong in_buf_size; 00049 00050 gulong nb_bytes; 00051 00052 /* 00053 *The index of the next byte 00054 *to be read. 00055 */ 00056 gulong next_byte_index; 00057 00058 /* 00059 *The current line number 00060 */ 00061 gulong line; 00062 00063 /* 00064 *The current col number 00065 */ 00066 gulong col; 00067 00068 gboolean end_of_line; 00069 gboolean end_of_input; 00070 00071 /* 00072 *the reference count of this 00073 *instance. 00074 */ 00075 guint ref_count; 00076 gboolean free_in_buf; 00077 }; 00078 00079 #define PRIVATE(object) (object)->priv 00080 00081 /*************************** 00082 *private constants 00083 **************************/ 00084 #define CR_INPUT_MEM_CHUNK_SIZE 1024 * 4 00085 00086 static CRInput *cr_input_new_real (void); 00087 00088 static CRInput * 00089 cr_input_new_real (void) 00090 { 00091 CRInput *result = NULL; 00092 00093 result = g_try_malloc (sizeof (CRInput)); 00094 if (!result) { 00095 cr_utils_trace_info ("Out of memory"); 00096 return NULL; 00097 } 00098 memset (result, 0, sizeof (CRInput)); 00099 00100 PRIVATE (result) = g_try_malloc (sizeof (CRInputPriv)); 00101 if (!PRIVATE (result)) { 00102 cr_utils_trace_info ("Out of memory"); 00103 g_free (result); 00104 return NULL; 00105 } 00106 memset (PRIVATE (result), 0, sizeof (CRInputPriv)); 00107 PRIVATE (result)->free_in_buf = TRUE; 00108 return result; 00109 } 00110 00111 /**************** 00112 *Public methods 00113 ***************/ 00114 00115 /** 00116 * cr_input_new_from_buf: 00117 *@a_buf: the memory buffer to create the input stream from. 00118 *The #CRInput keeps this pointer so user should not free it !. 00119 *@a_len: the size of the input buffer. 00120 *@a_enc: the buffer's encoding. 00121 *@a_free_buf: if set to TRUE, this a_buf will be freed 00122 *at the destruction of this instance. If set to false, it is up 00123 *to the caller to free it. 00124 * 00125 *Creates a new input stream from a memory buffer. 00126 *Returns the newly built instance of #CRInput. 00127 */ 00128 CRInput * 00129 cr_input_new_from_buf (guchar * a_buf, 00130 gulong a_len, 00131 enum CREncoding a_enc, 00132 gboolean a_free_buf) 00133 { 00134 CRInput *result = NULL; 00135 enum CRStatus status = CR_OK; 00136 CREncHandler *enc_handler = NULL; 00137 gulong len = a_len; 00138 00139 g_return_val_if_fail (a_buf, NULL); 00140 00141 result = cr_input_new_real (); 00142 g_return_val_if_fail (result, NULL); 00143 00144 /*transform the encoding in utf8 */ 00145 if (a_enc != CR_UTF_8) { 00146 enc_handler = cr_enc_handler_get_instance (a_enc); 00147 if (!enc_handler) { 00148 goto error; 00149 } 00150 00151 status = cr_enc_handler_convert_input 00152 (enc_handler, a_buf, &len, 00153 &PRIVATE (result)->in_buf, 00154 &PRIVATE (result)->in_buf_size); 00155 if (status != CR_OK) 00156 goto error; 00157 PRIVATE (result)->free_in_buf = TRUE; 00158 if (a_free_buf == TRUE && a_buf) { 00159 g_free (a_buf) ; 00160 a_buf = NULL ; 00161 } 00162 PRIVATE (result)->nb_bytes = PRIVATE (result)->in_buf_size; 00163 } else { 00164 PRIVATE (result)->in_buf = (guchar *) a_buf; 00165 PRIVATE (result)->in_buf_size = a_len; 00166 PRIVATE (result)->nb_bytes = a_len; 00167 PRIVATE (result)->free_in_buf = a_free_buf; 00168 } 00169 PRIVATE (result)->line = 1; 00170 PRIVATE (result)->col = 0; 00171 return result; 00172 00173 error: 00174 if (result) { 00175 cr_input_destroy (result); 00176 result = NULL; 00177 } 00178 00179 return NULL; 00180 } 00181 00182 /** 00183 * cr_input_new_from_uri: 00184 *@a_file_uri: the file to create *the input stream from. 00185 *@a_enc: the encoding of the file *to create the input from. 00186 * 00187 *Creates a new input stream from 00188 *a file. 00189 * 00190 *Returns the newly created input stream if 00191 *this method could read the file and create it, 00192 *NULL otherwise. 00193 */ 00194 00195 CRInput * 00196 cr_input_new_from_uri (const gchar * a_file_uri, enum CREncoding a_enc) 00197 { 00198 CRInput *result = NULL; 00199 enum CRStatus status = CR_OK; 00200 FILE *file_ptr = NULL; 00201 guchar tmp_buf[CR_INPUT_MEM_CHUNK_SIZE] = { 0 }; 00202 gulong nb_read = 0, 00203 len = 0, 00204 buf_size = 0; 00205 gboolean loop = TRUE; 00206 guchar *buf = NULL; 00207 00208 g_return_val_if_fail (a_file_uri, NULL); 00209 00210 file_ptr = fopen (a_file_uri, "r"); 00211 00212 if (file_ptr == NULL) { 00213 00214 #ifdef CR_DEBUG 00215 cr_utils_trace_debug ("could not open file"); 00216 #endif 00217 g_warning ("Could not open file %s\n", a_file_uri); 00218 00219 return NULL; 00220 } 00221 00222 /*load the file */ 00223 while (loop) { 00224 nb_read = fread (tmp_buf, 1 /*read bytes */ , 00225 CR_INPUT_MEM_CHUNK_SIZE /*nb of bytes */ , 00226 file_ptr); 00227 00228 if (nb_read != CR_INPUT_MEM_CHUNK_SIZE) { 00229 /*we read less chars than we wanted */ 00230 if (feof (file_ptr)) { 00231 /*we reached eof */ 00232 loop = FALSE; 00233 } else { 00234 /*a pb occured !! */ 00235 cr_utils_trace_debug ("an io error occured"); 00236 status = CR_ERROR; 00237 goto cleanup; 00238 } 00239 } 00240 00241 if (status == CR_OK) { 00242 /*read went well */ 00243 buf = g_realloc (buf, len + CR_INPUT_MEM_CHUNK_SIZE); 00244 memcpy (buf + len, tmp_buf, nb_read); 00245 len += nb_read; 00246 buf_size += CR_INPUT_MEM_CHUNK_SIZE; 00247 } 00248 } 00249 00250 if (status == CR_OK) { 00251 result = cr_input_new_from_buf (buf, len, a_enc, TRUE); 00252 if (!result) { 00253 goto cleanup; 00254 } 00255 /* 00256 *we should free buf here because it's own by CRInput. 00257 *(see the last parameter of cr_input_new_from_buf(). 00258 */ 00259 buf = NULL ; 00260 } 00261 00262 cleanup: 00263 if (file_ptr) { 00264 fclose (file_ptr); 00265 file_ptr = NULL; 00266 } 00267 00268 if (buf) { 00269 g_free (buf); 00270 buf = NULL; 00271 } 00272 00273 return result; 00274 } 00275 00276 /** 00277 * cr_input_destroy: 00278 *@a_this: the current instance of #CRInput. 00279 * 00280 *The destructor of the #CRInput class. 00281 */ 00282 void 00283 cr_input_destroy (CRInput * a_this) 00284 { 00285 if (a_this == NULL) 00286 return; 00287 00288 if (PRIVATE (a_this)) { 00289 if (PRIVATE (a_this)->in_buf && PRIVATE (a_this)->free_in_buf) { 00290 g_free (PRIVATE (a_this)->in_buf); 00291 PRIVATE (a_this)->in_buf = NULL; 00292 } 00293 00294 g_free (PRIVATE (a_this)); 00295 PRIVATE (a_this) = NULL; 00296 } 00297 00298 g_free (a_this); 00299 } 00300 00301 /** 00302 * cr_input_ref: 00303 *@a_this: the current instance of #CRInput. 00304 * 00305 *Increments the reference count of the current 00306 *instance of #CRInput. 00307 */ 00308 void 00309 cr_input_ref (CRInput * a_this) 00310 { 00311 g_return_if_fail (a_this && PRIVATE (a_this)); 00312 00313 PRIVATE (a_this)->ref_count++; 00314 } 00315 00316 /** 00317 * cr_input_unref: 00318 *@a_this: the current instance of #CRInput. 00319 * 00320 *Decrements the reference count of this instance 00321 *of #CRInput. If the reference count goes down to 00322 *zero, this instance is destroyed. 00323 * 00324 * Returns TRUE if the instance of #CRInput got destroyed, false otherwise. 00325 */ 00326 gboolean 00327 cr_input_unref (CRInput * a_this) 00328 { 00329 g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE); 00330 00331 if (PRIVATE (a_this)->ref_count) { 00332 PRIVATE (a_this)->ref_count--; 00333 } 00334 00335 if (PRIVATE (a_this)->ref_count == 0) { 00336 cr_input_destroy (a_this); 00337 return TRUE; 00338 } 00339 return FALSE; 00340 } 00341 00342 /** 00343 * cr_input_end_of_input: 00344 *@a_this: the current instance of #CRInput. 00345 *@a_end_of_input: out parameter. Is set to TRUE if 00346 *the current instance has reached the end of its input buffer, 00347 *FALSE otherwise. 00348 * 00349 *Tests wether the current instance of 00350 *#CRInput has reached its input buffer. 00351 * 00352 * Returns CR_OK upon successful completion, an error code otherwise. 00353 * Note that all the out parameters of this method are valid if 00354 * and only if this method returns CR_OK. 00355 */ 00356 enum CRStatus 00357 cr_input_end_of_input (CRInput const * a_this, gboolean * a_end_of_input) 00358 { 00359 g_return_val_if_fail (a_this && PRIVATE (a_this) 00360 && a_end_of_input, CR_BAD_PARAM_ERROR); 00361 00362 *a_end_of_input = (PRIVATE (a_this)->next_byte_index 00363 >= PRIVATE (a_this)->in_buf_size) ? TRUE : FALSE; 00364 00365 return CR_OK; 00366 } 00367 00368 /** 00369 * cr_input_get_nb_bytes_left: 00370 *@a_this: the current instance of #CRInput. 00371 * 00372 *Returns the number of bytes left in the input stream 00373 *before the end, -1 in case of error. 00374 */ 00375 glong 00376 cr_input_get_nb_bytes_left (CRInput const * a_this) 00377 { 00378 g_return_val_if_fail (a_this && PRIVATE (a_this), -1); 00379 g_return_val_if_fail (PRIVATE (a_this)->nb_bytes 00380 <= PRIVATE (a_this)->in_buf_size, -1); 00381 g_return_val_if_fail (PRIVATE (a_this)->next_byte_index 00382 <= PRIVATE (a_this)->nb_bytes, -1); 00383 00384 if (PRIVATE (a_this)->end_of_input) 00385 return 0; 00386 00387 return PRIVATE (a_this)->nb_bytes - PRIVATE (a_this)->next_byte_index; 00388 } 00389 00390 /** 00391 * cr_input_read_byte: 00392 *@a_this: the current instance of #CRInput. 00393 *@a_byte: out parameter the returned byte. 00394 * 00395 *Gets the next byte of the input. 00396 *Updates the state of the input so that 00397 *the next invocation of this method returns 00398 *the next coming byte. 00399 * 00400 *Returns CR_OK upon successful completion, an error code 00401 *otherwise. All the out parameters of this method are valid if 00402 *and only if this method returns CR_OK. 00403 */ 00404 enum CRStatus 00405 cr_input_read_byte (CRInput * a_this, guchar * a_byte) 00406 { 00407 g_return_val_if_fail (a_this && PRIVATE (a_this) 00408 && a_byte, CR_BAD_PARAM_ERROR); 00409 00410 g_return_val_if_fail (PRIVATE (a_this)->next_byte_index <= 00411 PRIVATE (a_this)->nb_bytes, CR_BAD_PARAM_ERROR); 00412 00413 if (PRIVATE (a_this)->end_of_input == TRUE) 00414 return CR_END_OF_INPUT_ERROR; 00415 00416 *a_byte = PRIVATE (a_this)->in_buf[PRIVATE (a_this)->next_byte_index]; 00417 00418 if (PRIVATE (a_this)->nb_bytes - 00419 PRIVATE (a_this)->next_byte_index < 2) { 00420 PRIVATE (a_this)->end_of_input = TRUE; 00421 } else { 00422 PRIVATE (a_this)->next_byte_index++; 00423 } 00424 00425 return CR_OK; 00426 } 00427 00428 /** 00429 * cr_input_read_char: 00430 *@a_this: the current instance of CRInput. 00431 *@a_char: out parameter. The read character. 00432 * 00433 *Reads an unicode character from the current instance of 00434 *#CRInput. 00435 * 00436 *Returns CR_OK upon successful completion, an error code 00437 *otherwise. 00438 */ 00439 enum CRStatus 00440 cr_input_read_char (CRInput * a_this, guint32 * a_char) 00441 { 00442 enum CRStatus status = CR_OK; 00443 gulong consumed = 0, 00444 nb_bytes_left = 0; 00445 00446 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char, 00447 CR_BAD_PARAM_ERROR); 00448 00449 if (PRIVATE (a_this)->end_of_input == TRUE) 00450 return CR_END_OF_INPUT_ERROR; 00451 00452 nb_bytes_left = cr_input_get_nb_bytes_left (a_this); 00453 00454 if (nb_bytes_left < 1) { 00455 return CR_END_OF_INPUT_ERROR; 00456 } 00457 00458 status = cr_utils_read_char_from_utf8_buf 00459 (PRIVATE (a_this)->in_buf 00460 + 00461 PRIVATE (a_this)->next_byte_index, 00462 nb_bytes_left, a_char, &consumed); 00463 00464 if (status == CR_OK) { 00465 /*update next byte index */ 00466 PRIVATE (a_this)->next_byte_index += consumed; 00467 00468 /*update line and column number */ 00469 if (PRIVATE (a_this)->end_of_line == TRUE) { 00470 PRIVATE (a_this)->col = 1; 00471 PRIVATE (a_this)->line++; 00472 PRIVATE (a_this)->end_of_line = FALSE; 00473 } else if (*a_char != '\n') { 00474 PRIVATE (a_this)->col++; 00475 } 00476 00477 if (*a_char == '\n') { 00478 PRIVATE (a_this)->end_of_line = TRUE; 00479 } 00480 00481 } 00482 00483 return status; 00484 } 00485 00486 /** 00487 * cr_input_set_line_num: 00488 *@a_this: the "this pointer" of the current instance of #CRInput. 00489 *@a_line_num: the new line number. 00490 * 00491 *Setter of the current line number. 00492 * 00493 *Return CR_OK upon successful completion, an error code otherwise. 00494 */ 00495 enum CRStatus 00496 cr_input_set_line_num (CRInput * a_this, glong a_line_num) 00497 { 00498 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00499 00500 PRIVATE (a_this)->line = a_line_num; 00501 00502 return CR_OK; 00503 } 00504 00505 /** 00506 * cr_input_get_line_num: 00507 *@a_this: the "this pointer" of the current instance of #CRInput. 00508 *@a_line_num: the returned line number. 00509 * 00510 *Getter of the current line number. 00511 * 00512 *Returns CR_OK upon successful completion, an error code otherwise. 00513 */ 00514 enum CRStatus 00515 cr_input_get_line_num (CRInput const * a_this, glong * a_line_num) 00516 { 00517 g_return_val_if_fail (a_this && PRIVATE (a_this) 00518 && a_line_num, CR_BAD_PARAM_ERROR); 00519 00520 *a_line_num = PRIVATE (a_this)->line; 00521 00522 return CR_OK; 00523 } 00524 00525 /** 00526 * cr_input_set_column_num: 00527 *@a_this: the "this pointer" of the current instance of #CRInput. 00528 *@a_col: the new column number. 00529 * 00530 *Setter of the current column number. 00531 * 00532 *Returns CR_OK upon successful completion, an error code otherwise. 00533 */ 00534 enum CRStatus 00535 cr_input_set_column_num (CRInput * a_this, glong a_col) 00536 { 00537 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00538 00539 PRIVATE (a_this)->col = a_col; 00540 00541 return CR_OK; 00542 } 00543 00544 /** 00545 * cr_input_get_column_num: 00546 *@a_this: the "this pointer" of the current instance of #CRInput. 00547 *@a_col: out parameter 00548 * 00549 *Getter of the current column number. 00550 * 00551 *Returns CR_OK upon successful completion, an error code otherwise. 00552 */ 00553 enum CRStatus 00554 cr_input_get_column_num (CRInput const * a_this, glong * a_col) 00555 { 00556 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_col, 00557 CR_BAD_PARAM_ERROR); 00558 00559 *a_col = PRIVATE (a_this)->col; 00560 00561 return CR_OK; 00562 } 00563 00564 /** 00565 * cr_input_increment_line_num: 00566 *@a_this: the "this pointer" of the current instance of #CRInput. 00567 *@a_increment: the increment to add to the line number. 00568 * 00569 *Increments the current line number. 00570 * 00571 *Returns CR_OK upon successful completion, an error code otherwise. 00572 */ 00573 enum CRStatus 00574 cr_input_increment_line_num (CRInput * a_this, glong a_increment) 00575 { 00576 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00577 00578 PRIVATE (a_this)->line += a_increment; 00579 00580 return CR_OK; 00581 } 00582 00583 /** 00584 * cr_input_increment_col_num: 00585 *@a_this: the "this pointer" of the current instance of #CRInput. 00586 *@a_increment: the increment to add to the column number. 00587 * 00588 *Increments the current column number. 00589 * 00590 *Returns CR_OK upon successful completion, an error code otherwise. 00591 */ 00592 enum CRStatus 00593 cr_input_increment_col_num (CRInput * a_this, glong a_increment) 00594 { 00595 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00596 00597 PRIVATE (a_this)->col += a_increment; 00598 00599 return CR_OK; 00600 } 00601 00602 /** 00603 * cr_input_consume_char: 00604 *@a_this: the this pointer. 00605 *@a_char: the character to consume. If set to zero, 00606 *consumes any character. 00607 * 00608 *Consumes the next character of the input stream if 00609 *and only if that character equals a_char. 00610 * 00611 *Returns CR_OK upon successful completion, CR_PARSING_ERROR if 00612 *next char is different from a_char, an other error code otherwise 00613 */ 00614 enum CRStatus 00615 cr_input_consume_char (CRInput * a_this, guint32 a_char) 00616 { 00617 guint32 c; 00618 enum CRStatus status; 00619 00620 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00621 00622 if ((status = cr_input_peek_char (a_this, &c)) != CR_OK) { 00623 return status; 00624 } 00625 00626 if (c == a_char || a_char == 0) { 00627 status = cr_input_read_char (a_this, &c); 00628 } else { 00629 return CR_PARSING_ERROR; 00630 } 00631 00632 return status; 00633 } 00634 00635 /** 00636 * cr_input_consume_chars: 00637 *@a_this: the this pointer of the current instance of #CRInput. 00638 *@a_char: the character to consume. 00639 *@a_nb_char: in/out parameter. The number of characters to consume. 00640 *If set to a negative value, the function will consume all the occurences 00641 *of a_char found. 00642 *After return, if the return value equals CR_OK, this variable contains 00643 *the number of characters actually consumed. 00644 * 00645 *Consumes up to a_nb_char occurences of the next contiguous characters 00646 *which equal a_char. Note that the next character of the input stream 00647 **MUST* equal a_char to trigger the consumption, or else, the error 00648 *code CR_PARSING_ERROR is returned. 00649 *If the number of contiguous characters that equals a_char is less than 00650 *a_nb_char, then this function consumes all the characters it can consume. 00651 * 00652 *Returns CR_OK if at least one character has been consumed, an error code 00653 *otherwise. 00654 */ 00655 enum CRStatus 00656 cr_input_consume_chars (CRInput * a_this, guint32 a_char, gulong * a_nb_char) 00657 { 00658 enum CRStatus status = CR_OK; 00659 gulong nb_consumed = 0; 00660 00661 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_char, 00662 CR_BAD_PARAM_ERROR); 00663 00664 g_return_val_if_fail (a_char != 0 || a_nb_char != NULL, 00665 CR_BAD_PARAM_ERROR); 00666 00667 for (nb_consumed = 0; ((status == CR_OK) 00668 && (*a_nb_char > 0 00669 && nb_consumed < *a_nb_char)); 00670 nb_consumed++) { 00671 status = cr_input_consume_char (a_this, a_char); 00672 } 00673 00674 *a_nb_char = nb_consumed; 00675 00676 if ((nb_consumed > 0) 00677 && ((status == CR_PARSING_ERROR) 00678 || (status == CR_END_OF_INPUT_ERROR))) { 00679 status = CR_OK; 00680 } 00681 00682 return status; 00683 } 00684 00685 /** 00686 * cr_input_consume_white_spaces: 00687 *@a_this: the "this pointer" of the current instance of #CRInput. 00688 *@a_nb_chars: in/out parameter. The number of white spaces to 00689 *consume. After return, holds the number of white spaces actually consumed. 00690 * 00691 *Same as cr_input_consume_chars() but this one consumes white 00692 *spaces. 00693 * 00694 *Returns CR_OK upon successful completion, an error code otherwise. 00695 */ 00696 enum CRStatus 00697 cr_input_consume_white_spaces (CRInput * a_this, gulong * a_nb_chars) 00698 { 00699 enum CRStatus status = CR_OK; 00700 guint32 cur_char = 0, 00701 nb_consumed = 0; 00702 00703 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_nb_chars, 00704 CR_BAD_PARAM_ERROR); 00705 00706 for (nb_consumed = 0; 00707 ((*a_nb_chars > 0) && (nb_consumed < *a_nb_chars)); 00708 nb_consumed++) { 00709 status = cr_input_peek_char (a_this, &cur_char); 00710 if (status != CR_OK) 00711 break; 00712 00713 /*if the next char is a white space, consume it ! */ 00714 if (cr_utils_is_white_space (cur_char) == TRUE) { 00715 status = cr_input_read_char (a_this, &cur_char); 00716 if (status != CR_OK) 00717 break; 00718 continue; 00719 } 00720 00721 break; 00722 00723 } 00724 00725 if (nb_consumed && status == CR_END_OF_INPUT_ERROR) { 00726 status = CR_OK; 00727 } 00728 00729 return status; 00730 } 00731 00732 /** 00733 * cr_input_peek_char: 00734 *@a_this: the current instance of #CRInput. 00735 *@a_char: out parameter. The returned character. 00736 * 00737 *Same as cr_input_read_char() but does not update the 00738 *internal state of the input stream. The next call 00739 *to cr_input_peek_char() or cr_input_read_char() will thus 00740 *return the same character as the current one. 00741 * 00742 *Returns CR_OK upon successful completion, an error code 00743 *otherwise. 00744 */ 00745 enum CRStatus 00746 cr_input_peek_char (CRInput const * a_this, guint32 * a_char) 00747 { 00748 enum CRStatus status = CR_OK; 00749 glong consumed = 0, 00750 nb_bytes_left = 0; 00751 00752 g_return_val_if_fail (a_this && PRIVATE (a_this) 00753 && a_char, CR_BAD_PARAM_ERROR); 00754 00755 if (PRIVATE (a_this)->next_byte_index >= 00756 PRIVATE (a_this)->in_buf_size) { 00757 return CR_END_OF_INPUT_ERROR; 00758 } 00759 00760 nb_bytes_left = cr_input_get_nb_bytes_left (a_this); 00761 00762 if (nb_bytes_left < 1) { 00763 return CR_END_OF_INPUT_ERROR; 00764 } 00765 00766 status = cr_utils_read_char_from_utf8_buf 00767 (PRIVATE (a_this)->in_buf + 00768 PRIVATE (a_this)->next_byte_index, 00769 nb_bytes_left, a_char, &consumed); 00770 00771 return status; 00772 } 00773 00774 /** 00775 * cr_input_peek_byte: 00776 *@a_this: the current instance of #CRInput. 00777 *@a_origin: the origin to consider in the calculation 00778 *of the position of the byte to peek. 00779 *@a_offset: the offset of the byte to peek, starting from 00780 *the origin specified by a_origin. 00781 *@a_byte: out parameter the peeked byte. 00782 * 00783 *Gets a byte from the input stream, 00784 *starting from the current position in the input stream. 00785 *Unlike cr_input_peek_next_byte() this method 00786 *does not update the state of the current input stream. 00787 *Subsequent calls to cr_input_peek_byte with the same arguments 00788 *will return the same byte. 00789 * 00790 *Returns CR_OK upon successful completion or, 00791 *CR_BAD_PARAM_ERROR if at least one of the parameters is invalid; 00792 *CR_OUT_OF_BOUNDS_ERROR if the indexed byte is out of bounds. 00793 */ 00794 enum CRStatus 00795 cr_input_peek_byte (CRInput const * a_this, enum CRSeekPos a_origin, 00796 gulong a_offset, guchar * a_byte) 00797 { 00798 gulong abs_offset = 0; 00799 00800 g_return_val_if_fail (a_this && PRIVATE (a_this) 00801 && a_byte, CR_BAD_PARAM_ERROR); 00802 00803 switch (a_origin) { 00804 00805 case CR_SEEK_CUR: 00806 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_offset; 00807 break; 00808 00809 case CR_SEEK_BEGIN: 00810 abs_offset = a_offset; 00811 break; 00812 00813 case CR_SEEK_END: 00814 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_offset; 00815 break; 00816 00817 default: 00818 return CR_BAD_PARAM_ERROR; 00819 } 00820 00821 if (abs_offset < PRIVATE (a_this)->in_buf_size) { 00822 00823 *a_byte = PRIVATE (a_this)->in_buf[abs_offset]; 00824 00825 return CR_OK; 00826 00827 } else { 00828 return CR_END_OF_INPUT_ERROR; 00829 } 00830 } 00831 00832 /** 00833 * cr_input_peek_byte2: 00834 *@a_this: the current byte input stream. 00835 *@a_offset: the offset of the byte to peek, starting 00836 *from the current input position pointer. 00837 *@a_eof: out parameter. Is set to true is we reach end of 00838 *stream. If set to NULL by the caller, this parameter is not taken 00839 *in account. 00840 * 00841 *Same as cr_input_peek_byte() but with a simplified 00842 *interface. 00843 * 00844 *Returns the read byte or 0 if something bad happened. 00845 */ 00846 guchar 00847 cr_input_peek_byte2 (CRInput const * a_this, gulong a_offset, gboolean * a_eof) 00848 { 00849 guchar result = 0; 00850 enum CRStatus status = CR_ERROR; 00851 00852 g_return_val_if_fail (a_this && PRIVATE (a_this), 0); 00853 00854 if (a_eof) 00855 *a_eof = FALSE; 00856 00857 status = cr_input_peek_byte (a_this, CR_SEEK_CUR, a_offset, &result); 00858 00859 if ((status == CR_END_OF_INPUT_ERROR) 00860 && a_eof) 00861 *a_eof = TRUE; 00862 00863 return result; 00864 } 00865 00866 /** 00867 * cr_input_get_byte_addr: 00868 *@a_this: the current instance of #CRInput. 00869 *@a_offset: the offset of the byte in the input stream starting 00870 *from the beginning of the stream. 00871 * 00872 *Gets the memory address of the byte located at a given offset 00873 *in the input stream. 00874 * 00875 *Returns the address, otherwise NULL if an error occured. 00876 */ 00877 guchar * 00878 cr_input_get_byte_addr (CRInput * a_this, gulong a_offset) 00879 { 00880 g_return_val_if_fail (a_this && PRIVATE (a_this), NULL); 00881 00882 if (a_offset >= PRIVATE (a_this)->nb_bytes) { 00883 return NULL; 00884 } 00885 00886 return &PRIVATE (a_this)->in_buf[a_offset]; 00887 } 00888 00889 /** 00890 * cr_input_get_cur_byte_addr: 00891 *@a_this: the current input stream 00892 *@a_offset: out parameter. The returned address. 00893 * 00894 *Gets the address of the current character pointer. 00895 * 00896 *Returns CR_OK upon successful completion, an error code otherwise. 00897 */ 00898 enum CRStatus 00899 cr_input_get_cur_byte_addr (CRInput * a_this, guchar ** a_offset) 00900 { 00901 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_offset, 00902 CR_BAD_PARAM_ERROR); 00903 00904 if (!PRIVATE (a_this)->next_byte_index) { 00905 return CR_START_OF_INPUT_ERROR; 00906 } 00907 00908 *a_offset = cr_input_get_byte_addr 00909 (a_this, PRIVATE (a_this)->next_byte_index - 1); 00910 00911 return CR_OK; 00912 } 00913 00914 /** 00915 * cr_input_seek_index: 00916 *@a_this: the current instance of #CRInput. 00917 *@a_origin: the origin to consider during the calculation 00918 *of the absolute position of the new "current byte index". 00919 *@a_pos: the relative offset of the new "current byte index." 00920 *This offset is relative to the origin a_origin. 00921 * 00922 *Sets the "current byte index" of the current instance 00923 *of #CRInput. Next call to cr_input_get_byte() will return 00924 *the byte next after the new "current byte index". 00925 * 00926 *Returns CR_OK upon successful completion otherwise returns 00927 *CR_BAD_PARAM_ERROR if at least one of the parameters is not valid 00928 *or CR_OUT_BOUNDS_ERROR in case of error. 00929 */ 00930 enum CRStatus 00931 cr_input_seek_index (CRInput * a_this, enum CRSeekPos a_origin, gint a_pos) 00932 { 00933 00934 glong abs_offset = 0; 00935 00936 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 00937 00938 switch (a_origin) { 00939 00940 case CR_SEEK_CUR: 00941 abs_offset = PRIVATE (a_this)->next_byte_index - 1 + a_pos; 00942 break; 00943 00944 case CR_SEEK_BEGIN: 00945 abs_offset = a_pos; 00946 break; 00947 00948 case CR_SEEK_END: 00949 abs_offset = PRIVATE (a_this)->in_buf_size - 1 - a_pos; 00950 break; 00951 00952 default: 00953 return CR_BAD_PARAM_ERROR; 00954 } 00955 00956 if ((abs_offset > 0) 00957 && (gulong) abs_offset < PRIVATE (a_this)->nb_bytes) { 00958 00959 /*update the input stream's internal state */ 00960 PRIVATE (a_this)->next_byte_index = abs_offset + 1; 00961 00962 return CR_OK; 00963 } 00964 00965 return CR_OUT_OF_BOUNDS_ERROR; 00966 } 00967 00968 /** 00969 * cr_input_get_cur_pos: 00970 *@a_this: the current instance of #CRInput. 00971 *@a_pos: out parameter. The returned position. 00972 * 00973 *Gets the position of the "current byte index" which 00974 *is basically the position of the last returned byte in the 00975 *input stream. 00976 * 00977 *Returns CR_OK upon successful completion. Otherwise, 00978 *CR_BAD_PARAMETER_ERROR if at least one of the arguments is invalid. 00979 *CR_START_OF_INPUT if no call to either cr_input_read_byte() 00980 *or cr_input_seek_index() have been issued before calling 00981 *cr_input_get_cur_pos() 00982 *Note that the out parameters of this function are valid if and only if this 00983 *function returns CR_OK. 00984 */ 00985 enum CRStatus 00986 cr_input_get_cur_pos (CRInput const * a_this, CRInputPos * a_pos) 00987 { 00988 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos, 00989 CR_BAD_PARAM_ERROR); 00990 00991 a_pos->next_byte_index = PRIVATE (a_this)->next_byte_index; 00992 a_pos->line = PRIVATE (a_this)->line; 00993 a_pos->col = PRIVATE (a_this)->col; 00994 a_pos->end_of_line = PRIVATE (a_this)->end_of_line; 00995 a_pos->end_of_file = PRIVATE (a_this)->end_of_input; 00996 00997 return CR_OK; 00998 } 00999 01000 /** 01001 * cr_input_get_parsing_location: 01002 *@a_this: the current instance of #CRInput 01003 *@a_loc: the set parsing location. 01004 * 01005 *Gets the current parsing location. 01006 *The Parsing location is a public datastructure that 01007 *represents the current line/column/byte offset/ in the input 01008 *stream. 01009 * 01010 *Returns CR_OK upon successful completion, an error 01011 *code otherwise. 01012 */ 01013 enum CRStatus 01014 cr_input_get_parsing_location (CRInput const *a_this, 01015 CRParsingLocation *a_loc) 01016 { 01017 g_return_val_if_fail (a_this 01018 && PRIVATE (a_this) 01019 && a_loc, 01020 CR_BAD_PARAM_ERROR) ; 01021 01022 a_loc->line = PRIVATE (a_this)->line ; 01023 a_loc->column = PRIVATE (a_this)->col ; 01024 if (PRIVATE (a_this)->next_byte_index) { 01025 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index - 1 ; 01026 } else { 01027 a_loc->byte_offset = PRIVATE (a_this)->next_byte_index ; 01028 } 01029 return CR_OK ; 01030 } 01031 01032 /** 01033 * cr_input_get_cur_index: 01034 *@a_this: the "this pointer" of the current instance of 01035 *#CRInput 01036 *@a_index: out parameter. The returned index. 01037 * 01038 *Getter of the next byte index. 01039 *It actually returns the index of the 01040 *next byte to be read. 01041 * 01042 *Returns CR_OK upon successful completion, an error code 01043 *otherwise. 01044 */ 01045 enum CRStatus 01046 cr_input_get_cur_index (CRInput const * a_this, glong * a_index) 01047 { 01048 g_return_val_if_fail (a_this && PRIVATE (a_this) 01049 && a_index, CR_BAD_PARAM_ERROR); 01050 01051 *a_index = PRIVATE (a_this)->next_byte_index; 01052 01053 return CR_OK; 01054 } 01055 01056 /** 01057 * cr_input_set_cur_index: 01058 *@a_this: the "this pointer" of the current instance 01059 *of #CRInput . 01060 *@a_index: the new index to set. 01061 * 01062 *Setter of the next byte index. 01063 *It sets the index of the next byte to be read. 01064 * 01065 *Returns CR_OK upon successful completion, an error code otherwise. 01066 */ 01067 enum CRStatus 01068 cr_input_set_cur_index (CRInput * a_this, glong a_index) 01069 { 01070 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 01071 01072 PRIVATE (a_this)->next_byte_index = a_index; 01073 01074 return CR_OK; 01075 } 01076 01077 /** 01078 * cr_input_set_end_of_file: 01079 *@a_this: the current instance of #CRInput. 01080 *@a_eof: the new end of file flag. 01081 * 01082 *Sets the end of file flag. 01083 * 01084 *Returns CR_OK upon successful completion, an error code otherwise. 01085 */ 01086 enum CRStatus 01087 cr_input_set_end_of_file (CRInput * a_this, gboolean a_eof) 01088 { 01089 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 01090 01091 PRIVATE (a_this)->end_of_input = a_eof; 01092 01093 return CR_OK; 01094 } 01095 01096 /** 01097 * cr_input_get_end_of_file: 01098 *@a_this: the current instance of #CRInput. 01099 *@a_eof: out parameter the place to put the end of 01100 *file flag. 01101 * 01102 *Gets the end of file flag. 01103 * 01104 *Returns CR_OK upon successful completion, an error code otherwise. 01105 */ 01106 enum CRStatus 01107 cr_input_get_end_of_file (CRInput const * a_this, gboolean * a_eof) 01108 { 01109 g_return_val_if_fail (a_this && PRIVATE (a_this) 01110 && a_eof, CR_BAD_PARAM_ERROR); 01111 01112 *a_eof = PRIVATE (a_this)->end_of_input; 01113 01114 return CR_OK; 01115 } 01116 01117 /** 01118 * cr_input_set_end_of_line: 01119 *@a_this: the current instance of #CRInput. 01120 *@a_eol: the new end of line flag. 01121 * 01122 *Sets the end of line flag. 01123 * 01124 *Returns CR_OK upon successful completion, an error code 01125 *otherwise. 01126 */ 01127 enum CRStatus 01128 cr_input_set_end_of_line (CRInput * a_this, gboolean a_eol) 01129 { 01130 g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); 01131 01132 PRIVATE (a_this)->end_of_line = a_eol; 01133 01134 return CR_OK; 01135 } 01136 01137 /** 01138 * cr_input_get_end_of_line: 01139 *@a_this: the current instance of #CRInput 01140 *@a_eol: out parameter. The place to put 01141 *the returned flag 01142 * 01143 *Gets the end of line flag of the current input. 01144 * 01145 *Returns CR_OK upon successful completion, an error code 01146 *otherwise. 01147 */ 01148 enum CRStatus 01149 cr_input_get_end_of_line (CRInput const * a_this, gboolean * a_eol) 01150 { 01151 g_return_val_if_fail (a_this && PRIVATE (a_this) 01152 && a_eol, CR_BAD_PARAM_ERROR); 01153 01154 *a_eol = PRIVATE (a_this)->end_of_line; 01155 01156 return CR_OK; 01157 } 01158 01159 /** 01160 * cr_input_set_cur_pos: 01161 *@a_this: the "this pointer" of the current instance of 01162 *#CRInput. 01163 *@a_pos: the new position. 01164 * 01165 *Sets the current position in the input stream. 01166 * 01167 * Returns CR_OK upon successful completion, an error code otherwise. 01168 */ 01169 enum CRStatus 01170 cr_input_set_cur_pos (CRInput * a_this, CRInputPos const * a_pos) 01171 { 01172 g_return_val_if_fail (a_this && PRIVATE (a_this) && a_pos, 01173 CR_BAD_PARAM_ERROR); 01174 01175 cr_input_set_column_num (a_this, a_pos->col); 01176 cr_input_set_line_num (a_this, a_pos->line); 01177 cr_input_set_cur_index (a_this, a_pos->next_byte_index); 01178 cr_input_set_end_of_line (a_this, a_pos->end_of_line); 01179 cr_input_set_end_of_file (a_this, a_pos->end_of_file); 01180 01181 return CR_OK; 01182 }