• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

ext/psych/parser.c

Go to the documentation of this file.
00001 #include <psych.h>
00002 
00003 VALUE cPsychParser;
00004 VALUE ePsychSyntaxError;
00005 
00006 static ID id_read;
00007 static ID id_empty;
00008 static ID id_start_stream;
00009 static ID id_end_stream;
00010 static ID id_start_document;
00011 static ID id_end_document;
00012 static ID id_alias;
00013 static ID id_scalar;
00014 static ID id_start_sequence;
00015 static ID id_end_sequence;
00016 static ID id_start_mapping;
00017 static ID id_end_mapping;
00018 
00019 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
00020   do { \
00021     rb_enc_associate_index(_str, _yaml_enc); \
00022     if(_internal_enc) \
00023       _str = rb_str_export_to_enc(_str, _internal_enc); \
00024   } while (0)
00025 
00026 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
00027 {
00028     VALUE io = (VALUE)data;
00029     VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
00030 
00031     *read = 0;
00032 
00033     if(! NIL_P(string)) {
00034         void * str = (void *)StringValuePtr(string);
00035         *read = (size_t)RSTRING_LEN(string);
00036         memcpy(buf, str, *read);
00037     }
00038 
00039     return 1;
00040 }
00041 
00042 /*
00043  * call-seq:
00044  *    parser.parse(yaml)
00045  *
00046  * Parse the YAML document contained in +yaml+.  Events will be called on
00047  * the handler set on the parser instance.
00048  *
00049  * See Psych::Parser and Psych::Parser#handler
00050  */
00051 static VALUE parse(VALUE self, VALUE yaml)
00052 {
00053     yaml_parser_t parser;
00054     yaml_event_t event;
00055     int done = 0;
00056     int tainted = 0;
00057 #ifdef HAVE_RUBY_ENCODING_H
00058     int encoding = rb_enc_find_index("ASCII-8BIT");
00059     rb_encoding * internal_enc;
00060 #endif
00061     VALUE handler = rb_iv_get(self, "@handler");
00062 
00063 
00064     yaml_parser_initialize(&parser);
00065 
00066     if (OBJ_TAINTED(yaml)) tainted = 1;
00067 
00068     if(rb_respond_to(yaml, id_read)) {
00069         yaml_parser_set_input(&parser, io_reader, (void *)yaml);
00070         if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
00071     } else {
00072         StringValue(yaml);
00073         yaml_parser_set_input_string(
00074                 &parser,
00075                 (const unsigned char *)RSTRING_PTR(yaml),
00076                 (size_t)RSTRING_LEN(yaml)
00077                 );
00078     }
00079 
00080     while(!done) {
00081         if(!yaml_parser_parse(&parser, &event)) {
00082             size_t line   = parser.mark.line + 1;
00083             size_t column = parser.mark.column;
00084 
00085             yaml_parser_delete(&parser);
00086             rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
00087                     (int)line, (int)column);
00088         }
00089 
00090         switch(event.type) {
00091           case YAML_STREAM_START_EVENT:
00092 
00093 #ifdef HAVE_RUBY_ENCODING_H
00094             switch(event.data.stream_start.encoding) {
00095               case YAML_ANY_ENCODING:
00096                 break;
00097               case YAML_UTF8_ENCODING:
00098                 encoding = rb_enc_find_index("UTF-8");
00099                 break;
00100               case YAML_UTF16LE_ENCODING:
00101                 encoding = rb_enc_find_index("UTF-16LE");
00102                 break;
00103               case YAML_UTF16BE_ENCODING:
00104                 encoding = rb_enc_find_index("UTF-16BE");
00105                 break;
00106               default:
00107                 break;
00108             }
00109             internal_enc = rb_default_internal_encoding();
00110 #endif
00111 
00112             rb_funcall(handler, id_start_stream, 1,
00113                        INT2NUM((long)event.data.stream_start.encoding)
00114                 );
00115             break;
00116           case YAML_DOCUMENT_START_EVENT:
00117             {
00118                 /* Get a list of tag directives (if any) */
00119                 VALUE tag_directives = rb_ary_new();
00120                 /* Grab the document version */
00121                 VALUE version = event.data.document_start.version_directive ?
00122                     rb_ary_new3(
00123                         (long)2,
00124                         INT2NUM((long)event.data.document_start.version_directive->major),
00125                         INT2NUM((long)event.data.document_start.version_directive->minor)
00126                         ) : rb_ary_new();
00127 
00128                 if(event.data.document_start.tag_directives.start) {
00129                     yaml_tag_directive_t *start =
00130                         event.data.document_start.tag_directives.start;
00131                     yaml_tag_directive_t *end =
00132                         event.data.document_start.tag_directives.end;
00133                     for(; start != end; start++) {
00134                         VALUE handle = Qnil;
00135                         VALUE prefix = Qnil;
00136                         if(start->handle) {
00137                             handle = rb_str_new2((const char *)start->handle);
00138                             if (tainted) OBJ_TAINT(handle);
00139 #ifdef HAVE_RUBY_ENCODING_H
00140                             PSYCH_TRANSCODE(handle, encoding, internal_enc);
00141 #endif
00142                         }
00143 
00144                         if(start->prefix) {
00145                             prefix = rb_str_new2((const char *)start->prefix);
00146                             if (tainted) OBJ_TAINT(prefix);
00147 #ifdef HAVE_RUBY_ENCODING_H
00148                             PSYCH_TRANSCODE(prefix, encoding, internal_enc);
00149 #endif
00150                         }
00151 
00152                         rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
00153                     }
00154                 }
00155                 rb_funcall(handler, id_start_document, 3,
00156                            version, tag_directives,
00157                            event.data.document_start.implicit == 1 ? Qtrue : Qfalse
00158                     );
00159             }
00160             break;
00161           case YAML_DOCUMENT_END_EVENT:
00162             rb_funcall(handler, id_end_document, 1,
00163                        event.data.document_end.implicit == 1 ? Qtrue : Qfalse
00164                 );
00165             break;
00166           case YAML_ALIAS_EVENT:
00167             {
00168                 VALUE alias = Qnil;
00169                 if(event.data.alias.anchor) {
00170                     alias = rb_str_new2((const char *)event.data.alias.anchor);
00171                     if (tainted) OBJ_TAINT(alias);
00172 #ifdef HAVE_RUBY_ENCODING_H
00173                     PSYCH_TRANSCODE(alias, encoding, internal_enc);
00174 #endif
00175                 }
00176 
00177                 rb_funcall(handler, id_alias, 1, alias);
00178             }
00179             break;
00180           case YAML_SCALAR_EVENT:
00181             {
00182                 VALUE anchor = Qnil;
00183                 VALUE tag = Qnil;
00184                 VALUE plain_implicit, quoted_implicit, style;
00185                 VALUE val = rb_str_new(
00186                     (const char *)event.data.scalar.value,
00187                     (long)event.data.scalar.length
00188                     );
00189                 if (tainted) OBJ_TAINT(val);
00190 
00191 #ifdef HAVE_RUBY_ENCODING_H
00192                 PSYCH_TRANSCODE(val, encoding, internal_enc);
00193 #endif
00194 
00195                 if(event.data.scalar.anchor) {
00196                     anchor = rb_str_new2((const char *)event.data.scalar.anchor);
00197                     if (tainted) OBJ_TAINT(anchor);
00198 #ifdef HAVE_RUBY_ENCODING_H
00199                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00200 #endif
00201                 }
00202 
00203                 if(event.data.scalar.tag) {
00204                     tag = rb_str_new2((const char *)event.data.scalar.tag);
00205                     if (tainted) OBJ_TAINT(tag);
00206 #ifdef HAVE_RUBY_ENCODING_H
00207                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00208 #endif
00209                 }
00210 
00211                 plain_implicit =
00212                     event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
00213 
00214                 quoted_implicit =
00215                     event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
00216 
00217                 style = INT2NUM((long)event.data.scalar.style);
00218 
00219                 rb_funcall(handler, id_scalar, 6,
00220                            val, anchor, tag, plain_implicit, quoted_implicit, style);
00221             }
00222             break;
00223           case YAML_SEQUENCE_START_EVENT:
00224             {
00225                 VALUE anchor = Qnil;
00226                 VALUE tag = Qnil;
00227                 VALUE implicit, style;
00228                 if(event.data.sequence_start.anchor) {
00229                     anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
00230                     if (tainted) OBJ_TAINT(anchor);
00231 #ifdef HAVE_RUBY_ENCODING_H
00232                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00233 #endif
00234                 }
00235 
00236                 tag = Qnil;
00237                 if(event.data.sequence_start.tag) {
00238                     tag = rb_str_new2((const char *)event.data.sequence_start.tag);
00239                     if (tainted) OBJ_TAINT(tag);
00240 #ifdef HAVE_RUBY_ENCODING_H
00241                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00242 #endif
00243                 }
00244 
00245                 implicit =
00246                     event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
00247 
00248                 style = INT2NUM((long)event.data.sequence_start.style);
00249 
00250                 rb_funcall(handler, id_start_sequence, 4,
00251                            anchor, tag, implicit, style);
00252             }
00253             break;
00254           case YAML_SEQUENCE_END_EVENT:
00255             rb_funcall(handler, id_end_sequence, 0);
00256             break;
00257           case YAML_MAPPING_START_EVENT:
00258             {
00259                 VALUE anchor = Qnil;
00260                 VALUE tag = Qnil;
00261                 VALUE implicit, style;
00262                 if(event.data.mapping_start.anchor) {
00263                     anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
00264                     if (tainted) OBJ_TAINT(anchor);
00265 #ifdef HAVE_RUBY_ENCODING_H
00266                     PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00267 #endif
00268                 }
00269 
00270                 if(event.data.mapping_start.tag) {
00271                     tag = rb_str_new2((const char *)event.data.mapping_start.tag);
00272                     if (tainted) OBJ_TAINT(tag);
00273 #ifdef HAVE_RUBY_ENCODING_H
00274                     PSYCH_TRANSCODE(tag, encoding, internal_enc);
00275 #endif
00276                 }
00277 
00278                 implicit =
00279                     event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
00280 
00281                 style = INT2NUM((long)event.data.mapping_start.style);
00282 
00283                 rb_funcall(handler, id_start_mapping, 4,
00284                            anchor, tag, implicit, style);
00285             }
00286             break;
00287           case YAML_MAPPING_END_EVENT:
00288             rb_funcall(handler, id_end_mapping, 0);
00289             break;
00290           case YAML_NO_EVENT:
00291             rb_funcall(handler, id_empty, 0);
00292             break;
00293           case YAML_STREAM_END_EVENT:
00294             rb_funcall(handler, id_end_stream, 0);
00295             done = 1;
00296             break;
00297         }
00298         yaml_event_delete(&event);
00299     }
00300 
00301     return self;
00302 }
00303 
00304 void Init_psych_parser()
00305 {
00306 #if 0
00307     mPsych = rb_define_module("Psych");
00308 #endif
00309 
00310     cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
00311 
00312     /* Any encoding: Let the parser choose the encoding */
00313     rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
00314 
00315     /* UTF-8 Encoding */
00316     rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
00317 
00318     /* UTF-16-LE Encoding with BOM */
00319     rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
00320 
00321     /* UTF-16-BE Encoding with BOM */
00322     rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
00323 
00324     ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
00325 
00326     rb_define_method(cPsychParser, "parse", parse, 1);
00327 
00328     id_read           = rb_intern("read");
00329     id_empty          = rb_intern("empty");
00330     id_start_stream   = rb_intern("start_stream");
00331     id_end_stream     = rb_intern("end_stream");
00332     id_start_document = rb_intern("start_document");
00333     id_end_document   = rb_intern("end_document");
00334     id_alias          = rb_intern("alias");
00335     id_scalar         = rb_intern("scalar");
00336     id_start_sequence = rb_intern("start_sequence");
00337     id_end_sequence   = rb_intern("end_sequence");
00338     id_start_mapping  = rb_intern("start_mapping");
00339     id_end_mapping    = rb_intern("end_mapping");
00340 }
00341 /* vim: set noet sws=4 sw=4: */
00342 

Generated on Sat Jul 7 2012 15:29:09 for Ruby by  doxygen 1.7.1