Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

cr-tknzr.c File Reference

The definition of the CRTknzr (tokenizer) class. More...

#include "string.h"
#include "cr-tknzr.h"
#include "cr-doc-handler.h"

Go to the source code of this file.

Data Structures

struct  _CRTknzrPriv

Defines

#define PRIVATE(obj)   ((obj)->priv)
#define IS_NUM(a_char)   (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
 return TRUE if the character is a number ([0-9]), FALSE otherwise

#define CHECK_PARSING_STATUS(status, is_exception)
 Checks if 'status' equals CR_OK.

#define PEEK_NEXT_CHAR(a_tknzr, a_to_char)
 Peeks the next char from the input stream of the current tokenizer.

#define READ_NEXT_CHAR(a_tknzr, to_char)
 Reads the next char from the input stream of the current parser.

#define RECORD_INITIAL_POS(a_tknzr, a_pos)
 Gets information about the current position in the input of the parser.

#define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr)
 Gets the address of the current byte inside the parser input.

#define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr)
 Peeks a byte from the topmost parser input at a given offset from the current position.

#define BYTE(a_input, a_n, a_eof)   cr_input_peek_byte2 (a_input, a_n, a_eof)
#define READ_NEXT_BYTE(a_tknzr, a_byte_ptr)
 Reads a byte from the topmost parser input steam.

#define SKIP_BYTES(a_tknzr, a_nb_bytes)
 Skips a given number of byte in the topmost parser input.

#define SKIP_CHARS(a_tknzr, a_nb_chars)
 Skip utf8 encoded characters.

#define ENSURE_PARSING_COND(condition)   if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
 Tests the condition and if it is false, sets status to "CR_PARSING_ERROR" and goto the 'error' label.


Functions

CRTknzrcr_tknzr_new (CRInput *a_input)
CRTknzrcr_tknzr_new_from_buf (const guchar *a_buf, gulong a_len, enum CREncoding a_enc, gboolean a_free_at_destroy)
CRTknzrcr_tknzr_new_from_uri (const guchar *a_file_uri, enum CREncoding a_enc)
void cr_tknzr_ref (CRTknzr *a_this)
gboolean cr_tknzr_unref (CRTknzr *a_this)
enum CRStatus cr_tknzr_set_input (CRTknzr *a_this, CRInput *a_input)
enum CRStatus cr_tknzr_get_input (CRTknzr *a_this, CRInput **a_input)
enum CRStatus cr_tknzr_read_byte (CRTknzr *a_this, guchar *a_byte)
 Reads the next byte from the parser input stream.

enum CRStatus cr_tknzr_read_char (CRTknzr *a_this, guint32 *a_char)
 Reads the next char from the parser input stream.

enum CRStatus cr_tknzr_peek_char (CRTknzr *a_this, guint32 *a_char)
 Peeks a char from the parser input stream.

enum CRStatus cr_tknzr_peek_byte (CRTknzr *a_this, gulong a_offset, guchar *a_byte)
 Peeks a byte ahead at a given postion in the parser input stream.

guchar cr_tknzr_peek_byte2 (CRTknzr *a_this, gulong a_offset, gboolean *a_eof)
 Same as cr_tknzr_peek_byte() but this api returns the byte peeked.

glong cr_tknzr_get_nb_bytes_left (CRTknzr *a_this)
 Gets the number of bytes left in the topmost input stream associated to this parser.

enum CRStatus cr_tknzr_get_cur_pos (CRTknzr *a_this, CRInputPos *a_pos)
enum CRStatus cr_tknzr_get_cur_byte_addr (CRTknzr *a_this, guchar **a_addr)
enum CRStatus cr_tknzr_seek_index (CRTknzr *a_this, enum CRSeekPos a_origin, gint a_pos)
enum CRStatus cr_tknzr_consume_chars (CRTknzr *a_this, guint32 a_char, glong *a_nb_char)
enum CRStatus cr_tknzr_set_cur_pos (CRTknzr *a_this, CRInputPos *a_pos)
enum CRStatus cr_tknzr_unget_token (CRTknzr *a_this, CRToken *a_token)
enum CRStatus cr_tknzr_get_next_token (CRTknzr *a_this, CRToken **a_tk)
 Returns the next token of the input stream.

enum CRStatus cr_tknzr_parse_token (CRTknzr *a_this, enum CRTokenType a_type, enum CRTokenExtraType a_et, gpointer a_res, gpointer a_extra_res)
void cr_tknzr_destroy (CRTknzr *a_this)


Detailed Description

The definition of the CRTknzr (tokenizer) class.

Definition in file cr-tknzr.c.


Define Documentation

#define BYTE a_input,
a_n,
a_eof   )     cr_input_peek_byte2 (a_input, a_n, a_eof)
 

Definition at line 173 of file cr-tknzr.c.

#define CHECK_PARSING_STATUS status,
is_exception   ) 
 

Value:

if ((status) != CR_OK) \
{ \
        if (is_exception == FALSE) \
        { \
                status = CR_PARSING_ERROR ; \
        } \
        goto error ; \
}
Checks if 'status' equals CR_OK.

If not, goto the 'error' label.

Parameters:
status the status (of type enum CRStatus) to test.
is_exception if set to FALSE, the final status returned the current function will be CR_PARSING_ERROR. If set to TRUE, the current status will be the current value of the 'status' variable.

Definition at line 85 of file cr-tknzr.c.

#define ENSURE_PARSING_COND condition   )     if (! (condition)) {status = CR_PARSING_ERROR; goto error ;}
 

Tests the condition and if it is false, sets status to "CR_PARSING_ERROR" and goto the 'error' label.

Parameters:
condition the condition to test.

Definition at line 225 of file cr-tknzr.c.

#define IS_NUM a_char   )     (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE)
 

return TRUE if the character is a number ([0-9]), FALSE otherwise

Parameters:
a_char the char to test.

Definition at line 74 of file cr-tknzr.c.

#define PEEK_BYTE a_tknzr,
a_offset,
a_byte_ptr   ) 
 

Value:

status = cr_tknzr_peek_byte (a_tknzr, \
                             a_offset, \
                             a_byte_ptr) ; \
CHECK_PARSING_STATUS (status, TRUE) ;
Peeks a byte from the topmost parser input at a given offset from the current position.

If it fails, goto the "error:" label.

Parameters:
a_parser the current instance of CRTknzr.
a_offset the offset of the byte to peek, the current byte having the offset '0'.
a_byte_ptr out parameter a pointer (guchar*) to where the peeked char is to be stored.

Definition at line 166 of file cr-tknzr.c.

#define PEEK_NEXT_CHAR a_tknzr,
a_to_char   ) 
 

Value:

{\
status = cr_tknzr_peek_char  (a_tknzr, a_to_char) ; \
CHECK_PARSING_STATUS (status, TRUE) \
}
Peeks the next char from the input stream of the current tokenizer.

invokes CHECK_PARSING_STATUS on the status returned by cr_tknzr_input_peek_char().

Parameters:
the current instance of #CRTkzr.
to_char a pointer to the char where to store the char peeked.

Definition at line 105 of file cr-tknzr.c.

#define PRIVATE obj   )     ((obj)->priv)
 

Definition at line 68 of file cr-tknzr.c.

#define READ_NEXT_BYTE a_tknzr,
a_byte_ptr   ) 
 

Value:

status = \
cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\
CHECK_PARSING_STATUS (status, TRUE) ;
Reads a byte from the topmost parser input steam.

If it fails, goto the "error" label.

Parameters:
a_parser the current instance of CRTknzr.
a_byte_ptr the guchar * where to put the read char.

Definition at line 183 of file cr-tknzr.c.

#define READ_NEXT_CHAR a_tknzr,
to_char   ) 
 

Value:

status = cr_tknzr_read_char (a_tknzr, to_char) ;\
CHECK_PARSING_STATUS (status, TRUE)
Reads the next char from the input stream of the current parser.

In case of error, jumps to the "error:" label located in the function where this macro is called.

Parameters:
parser the curent instance of CRTknzr
to_char a pointer to the guint32 char where to store the character read.

Definition at line 121 of file cr-tknzr.c.

#define RECORD_CUR_BYTE_ADDR a_tknzr,
a_addr   ) 
 

Value:

status = cr_input_get_cur_byte_addr \
            (PRIVATE (a_tknzr)->input, a_addr) ; \
CHECK_PARSING_STATUS (status, TRUE)
Gets the address of the current byte inside the parser input.

Parameters:
parser the current instance of CRTknzr.
addr out parameter a pointer (guchar*) to where the address must be put.

Definition at line 149 of file cr-tknzr.c.

#define RECORD_INITIAL_POS a_tknzr,
a_pos   ) 
 

Value:

status = cr_input_get_cur_pos (PRIVATE  \
(a_tknzr)->input, a_pos) ; \
g_return_val_if_fail (status == CR_OK, status)
Gets information about the current position in the input of the parser.

In case of failure, this macro returns from the calling function and returns a status code of type enum CRStatus.

Parameters:
parser the current instance of CRTknzr.
pos out parameter. A pointer to the position inside the current parser input. Must

Definition at line 136 of file cr-tknzr.c.

#define SKIP_BYTES a_tknzr,
a_nb_bytes   ) 
 

Value:

status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \
                                     CR_SEEK_CUR, a_nb_bytes) ; \
CHECK_PARSING_STATUS (status, TRUE) ;
Skips a given number of byte in the topmost parser input.

Don't update line and column number. In case of error, jumps to the "error:" label of the surrounding function.

Parameters:
a_parser the current instance of CRTknzr.
a_nb_bytes the number of bytes to skip.

Definition at line 197 of file cr-tknzr.c.

#define SKIP_CHARS a_tknzr,
a_nb_chars   ) 
 

Value:

{ \
glong nb_chars = a_nb_chars ; \
status = cr_input_consume_chars \
     (PRIVATE (a_tknzr)->input,0, &nb_chars) ; \
CHECK_PARSING_STATUS (status, TRUE) ; \
}
Skip utf8 encoded characters.

Updates line and column numbers.

Parameters:
a_parser the current instance of CRTknzr.
a_nb_chars the number of chars to skip. Must be of type glong.

Definition at line 210 of file cr-tknzr.c.


Function Documentation

enum CRStatus cr_tknzr_consume_chars CRTknzr a_this,
guint32  a_char,
glong *  a_nb_char
 

Definition at line 2040 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_consume_chars(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

void cr_tknzr_destroy CRTknzr a_this  ) 
 

Definition at line 2824 of file cr-tknzr.c.

References cr_input_unref(), cr_token_destroy(), and PRIVATE.

Referenced by cr_tknzr_unref().

enum CRStatus cr_tknzr_get_cur_byte_addr CRTknzr a_this,
guchar **  a_addr
 

Definition at line 2000 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_get_cur_byte_addr(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

enum CRStatus cr_tknzr_get_cur_pos CRTknzr a_this,
CRInputPos a_pos
 

Definition at line 1981 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_get_cur_pos(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

enum CRStatus cr_tknzr_get_input CRTknzr a_this,
CRInput **  a_input
 

Definition at line 1813 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, CR_OK, and PRIVATE.

glong cr_tknzr_get_nb_bytes_left CRTknzr a_this  ) 
 

Gets the number of bytes left in the topmost input stream associated to this parser.

Parameters:
a_this the current instance of CRTknzr
Returns:
the number of bytes left or -1 in case of error.

Definition at line 1962 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_get_nb_bytes_left(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

enum CRStatus cr_tknzr_get_next_token CRTknzr a_this,
CRToken **  a_tk
 

Returns the next token of the input stream.

This method is really central. Each parsing method calls it.

Parameters:
a_this the current tokenizer.
a_tk out parameter. The returned token. for the sake of mem leak avoidance, *a_tk must be NULL.
CR_OK upon successfull completion, an error code otherwise.

Definition at line 2100 of file cr-tknzr.c.

References ANGLE_DEG_ET, ANGLE_GRAD_ET, ANGLE_RAD_ET, BYTE, CHECK_PARSING_STATUS, CR_BAD_PARAM_ERROR, CR_END_OF_INPUT_ERROR, cr_input_get_end_of_file(), cr_input_peek_char(), CR_OK, cr_tknzr_set_cur_pos(), cr_token_destroy(), cr_token_new(), cr_token_set_angle(), cr_token_set_atkeyword(), cr_token_set_bc(), cr_token_set_bo(), cr_token_set_cbc(), cr_token_set_cbo(), cr_token_set_cdc(), cr_token_set_cdo(), cr_token_set_charset_sym(), cr_token_set_comment(), cr_token_set_dashmatch(), cr_token_set_delim(), cr_token_set_dimen(), cr_token_set_ems(), cr_token_set_exs(), cr_token_set_font_face_sym(), cr_token_set_freq(), cr_token_set_function(), cr_token_set_hash(), cr_token_set_ident(), cr_token_set_import_sym(), cr_token_set_important_sym(), cr_token_set_includes(), cr_token_set_length(), cr_token_set_media_sym(), cr_token_set_number(), cr_token_set_page_sym(), cr_token_set_pc(), cr_token_set_percentage(), cr_token_set_po(), cr_token_set_rgb(), cr_token_set_s(), cr_token_set_semicolon(), cr_token_set_string(), cr_token_set_time(), cr_token_set_uri(), cr_utils_is_nonascii(), CRRgb, CRStatus, ENSURE_PARSING_COND, FREQ_HZ_ET, FREQ_KHZ_ET, LENGTH_CM_ET, LENGTH_IN_ET, LENGTH_MM_ET, LENGTH_PC_ET, LENGTH_PT_ET, LENGTH_PX_ET, NUM_ANGLE_DEG, NUM_ANGLE_GRAD, NUM_ANGLE_RAD, NUM_FREQ_HZ, NUM_FREQ_KHZ, NUM_LENGTH_CM, NUM_LENGTH_EM, NUM_LENGTH_EX, NUM_LENGTH_IN, NUM_LENGTH_MM, NUM_LENGTH_PC, NUM_LENGTH_PT, NUM_LENGTH_PX, NUM_PERCENTAGE, NUM_TIME_MS, NUM_TIME_S, NUM_UNKNOWN_TYPE, PEEK_NEXT_CHAR, PRIVATE, READ_NEXT_CHAR, RECORD_INITIAL_POS, SKIP_CHARS, TIME_MS_ET, TIME_S_ET, and _CRNum::type.

Referenced by cr_parser_parse_charset(), cr_parser_parse_font_face(), cr_parser_parse_media(), cr_parser_parse_page(), cr_parser_parse_statement_core(), cr_parser_try_to_skip_spaces_and_comments(), and cr_tknzr_parse_token().

CRTknzr* cr_tknzr_new CRInput a_input  ) 
 

Definition at line 1694 of file cr-tknzr.c.

References cr_tknzr_set_input(), cr_utils_trace_info, and CRTknzr.

Referenced by cr_parser_new_from_input(), cr_tknzr_new_from_buf(), and cr_tknzr_new_from_uri().

CRTknzr* cr_tknzr_new_from_buf const guchar *  a_buf,
gulong  a_len,
enum CREncoding  a_enc,
gboolean  a_free_at_destroy
 

Definition at line 1733 of file cr-tknzr.c.

References cr_input_new_from_buf(), cr_tknzr_new(), and CRTknzr.

Referenced by cr_parser_parse_buf().

CRTknzr* cr_tknzr_new_from_uri const guchar *  a_file_uri,
enum CREncoding  a_enc
 

Definition at line 1751 of file cr-tknzr.c.

References cr_input_new_from_uri(), cr_tknzr_new(), and CRTknzr.

Referenced by cr_parser_new_from_file(), and cr_parser_parse_file().

enum CRStatus cr_tknzr_parse_token CRTknzr a_this,
enum CRTokenType  a_type,
enum CRTokenExtraType  a_et,
gpointer  a_res,
gpointer  a_extra_res
 

Definition at line 2706 of file cr-tknzr.c.

References ANGLE_TK, ATKEYWORD_TK, CDC_TK, CDO_TK, CHARSET_SYM_TK, COMMENT_TK, CR_BAD_PARAM_ERROR, CR_OK, CR_PARSING_ERROR, cr_tknzr_get_next_token(), cr_tknzr_unget_token(), cr_token_destroy(), CRStatus, DASHMATCH_TK, DELIM_TK, _CRToken::dimen, DIMEN_TK, EMS_TK, EXS_TK, _CRToken::extra_type, FONT_FACE_SYM_TK, FREQ_TK, FUNCTION_TK, HASH_TK, IDENT_TK, IMPORT_SYM_TK, IMPORTANT_SYM_TK, INCLUDES_TK, LENGTH_TK, MEDIA_SYM_TK, NO_TK, NUMBER_TK, PAGE_SYM_TK, PERCENTAGE_TK, PRIVATE, S_TK, STRING_TK, TIME_TK, _CRToken::type, _CRToken::u, UNICODERANGE_TK, and URI_TK.

enum CRStatus cr_tknzr_peek_byte CRTknzr a_this,
gulong  a_offset,
guchar *  a_byte
 

Peeks a byte ahead at a given postion in the parser input stream.

Parameters:
a_this the current instance of CRTknzr.
a_offset the offset of the peeked byte starting from the current byte in the parser input stream.
a_byte out parameter. The peeked byte upon successfull completion.
Returns:
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1915 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_peek_byte(), cr_input_set_cur_pos(), CR_SEEK_CUR, cr_token_destroy(), and PRIVATE.

Referenced by cr_parser_parse_expr().

guchar cr_tknzr_peek_byte2 CRTknzr a_this,
gulong  a_offset,
gboolean *  a_eof
 

Same as cr_tknzr_peek_byte() but this api returns the byte peeked.

Parameters:
a_this the current instance of CRTknzr.
a_offset the offset of the peeked byte starting from the current byte in the parser input stream.
a_eof out parameter. If not NULL, is set to TRUE if we reached end of file, FALE otherwise. If the caller sets it to NULL, this parameter is just ignored.
Returns:
the peeked byte.

Definition at line 1944 of file cr-tknzr.c.

References cr_input_peek_byte2(), and PRIVATE.

enum CRStatus cr_tknzr_peek_char CRTknzr a_this,
guint32 *  a_char
 

Peeks a char from the parser input stream.

To "peek a char" means reads the next char without consuming it. Subsequent calls to this function return the same char.

Parameters:
a_this the current instance of CRTknzr.
a_char out parameter. The peeked char uppon successfull completion.
Returns:
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1885 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_peek_char(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

Referenced by cr_parser_parse_import().

enum CRStatus cr_tknzr_read_byte CRTknzr a_this,
guchar *  a_byte
 

Reads the next byte from the parser input stream.

Parameters:
a_this the "this pointer" of the current instance of CRParser.
a_byte out parameter the place where to store the byte read.
Returns:
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1838 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_read_byte(), and PRIVATE.

enum CRStatus cr_tknzr_read_char CRTknzr a_this,
guint32 *  a_char
 

Reads the next char from the parser input stream.

Parameters:
a_this the current instance of CRTknzr.
a_char out parameter. The read char.
Returns:
CR_OK upon successfull completion, an error code otherwise.

Definition at line 1857 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_read_char(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

void cr_tknzr_ref CRTknzr a_this  ) 
 

Definition at line 1766 of file cr-tknzr.c.

References PRIVATE.

Referenced by cr_parser_set_tknzr().

enum CRStatus cr_tknzr_seek_index CRTknzr a_this,
enum CRSeekPos  a_origin,
gint  a_pos
 

Definition at line 2019 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_seek_index(), cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

enum CRStatus cr_tknzr_set_cur_pos CRTknzr a_this,
CRInputPos a_pos
 

Definition at line 2060 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_set_cur_pos(), cr_token_destroy(), and PRIVATE.

Referenced by cr_parser_parse_charset(), cr_parser_parse_declaration(), cr_parser_parse_expr(), cr_parser_parse_font_face(), cr_parser_parse_import(), cr_parser_parse_media(), cr_parser_parse_page(), cr_parser_parse_ruleset(), cr_parser_parse_statement_core(), and cr_tknzr_get_next_token().

enum CRStatus cr_tknzr_set_input CRTknzr a_this,
CRInput a_input
 

Definition at line 1793 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, cr_input_ref(), cr_input_unref(), CR_OK, and PRIVATE.

Referenced by cr_tknzr_new().

enum CRStatus cr_tknzr_unget_token CRTknzr a_this,
CRToken a_token
 

Definition at line 2077 of file cr-tknzr.c.

References CR_BAD_PARAM_ERROR, CR_OK, and PRIVATE.

Referenced by cr_parser_parse_page(), cr_parser_parse_statement_core(), cr_parser_try_to_skip_spaces_and_comments(), and cr_tknzr_parse_token().

gboolean cr_tknzr_unref CRTknzr a_this  ) 
 

Definition at line 1774 of file cr-tknzr.c.

References cr_tknzr_destroy(), and PRIVATE.

Referenced by cr_parser_destroy(), and cr_parser_set_tknzr().


Generated on Wed Oct 1 01:36:52 2003 for Libcroco by doxygen 1.3.3