00001 #include <psych.h>
00002
00003 VALUE cPsychParser;
00004 VALUE ePsychSyntaxError;
00005
00006 static ID id_read;
00007 static ID id_empty;
00008 static ID id_start_stream;
00009 static ID id_end_stream;
00010 static ID id_start_document;
00011 static ID id_end_document;
00012 static ID id_alias;
00013 static ID id_scalar;
00014 static ID id_start_sequence;
00015 static ID id_end_sequence;
00016 static ID id_start_mapping;
00017 static ID id_end_mapping;
00018
00019 #define PSYCH_TRANSCODE(_str, _yaml_enc, _internal_enc) \
00020 do { \
00021 rb_enc_associate_index(_str, _yaml_enc); \
00022 if(_internal_enc) \
00023 _str = rb_str_export_to_enc(_str, _internal_enc); \
00024 } while (0)
00025
00026 static int io_reader(void * data, unsigned char *buf, size_t size, size_t *read)
00027 {
00028 VALUE io = (VALUE)data;
00029 VALUE string = rb_funcall(io, id_read, 1, INT2NUM(size));
00030
00031 *read = 0;
00032
00033 if(! NIL_P(string)) {
00034 void * str = (void *)StringValuePtr(string);
00035 *read = (size_t)RSTRING_LEN(string);
00036 memcpy(buf, str, *read);
00037 }
00038
00039 return 1;
00040 }
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051 static VALUE parse(VALUE self, VALUE yaml)
00052 {
00053 yaml_parser_t parser;
00054 yaml_event_t event;
00055 int done = 0;
00056 int tainted = 0;
00057 #ifdef HAVE_RUBY_ENCODING_H
00058 int encoding = rb_enc_find_index("ASCII-8BIT");
00059 rb_encoding * internal_enc;
00060 #endif
00061 VALUE handler = rb_iv_get(self, "@handler");
00062
00063
00064 yaml_parser_initialize(&parser);
00065
00066 if (OBJ_TAINTED(yaml)) tainted = 1;
00067
00068 if(rb_respond_to(yaml, id_read)) {
00069 yaml_parser_set_input(&parser, io_reader, (void *)yaml);
00070 if (RTEST(rb_obj_is_kind_of(yaml, rb_cIO))) tainted = 1;
00071 } else {
00072 StringValue(yaml);
00073 yaml_parser_set_input_string(
00074 &parser,
00075 (const unsigned char *)RSTRING_PTR(yaml),
00076 (size_t)RSTRING_LEN(yaml)
00077 );
00078 }
00079
00080 while(!done) {
00081 if(!yaml_parser_parse(&parser, &event)) {
00082 size_t line = parser.mark.line + 1;
00083 size_t column = parser.mark.column;
00084
00085 yaml_parser_delete(&parser);
00086 rb_raise(ePsychSyntaxError, "couldn't parse YAML at line %d column %d",
00087 (int)line, (int)column);
00088 }
00089
00090 switch(event.type) {
00091 case YAML_STREAM_START_EVENT:
00092
00093 #ifdef HAVE_RUBY_ENCODING_H
00094 switch(event.data.stream_start.encoding) {
00095 case YAML_ANY_ENCODING:
00096 break;
00097 case YAML_UTF8_ENCODING:
00098 encoding = rb_enc_find_index("UTF-8");
00099 break;
00100 case YAML_UTF16LE_ENCODING:
00101 encoding = rb_enc_find_index("UTF-16LE");
00102 break;
00103 case YAML_UTF16BE_ENCODING:
00104 encoding = rb_enc_find_index("UTF-16BE");
00105 break;
00106 default:
00107 break;
00108 }
00109 internal_enc = rb_default_internal_encoding();
00110 #endif
00111
00112 rb_funcall(handler, id_start_stream, 1,
00113 INT2NUM((long)event.data.stream_start.encoding)
00114 );
00115 break;
00116 case YAML_DOCUMENT_START_EVENT:
00117 {
00118
00119 VALUE tag_directives = rb_ary_new();
00120
00121 VALUE version = event.data.document_start.version_directive ?
00122 rb_ary_new3(
00123 (long)2,
00124 INT2NUM((long)event.data.document_start.version_directive->major),
00125 INT2NUM((long)event.data.document_start.version_directive->minor)
00126 ) : rb_ary_new();
00127
00128 if(event.data.document_start.tag_directives.start) {
00129 yaml_tag_directive_t *start =
00130 event.data.document_start.tag_directives.start;
00131 yaml_tag_directive_t *end =
00132 event.data.document_start.tag_directives.end;
00133 for(; start != end; start++) {
00134 VALUE handle = Qnil;
00135 VALUE prefix = Qnil;
00136 if(start->handle) {
00137 handle = rb_str_new2((const char *)start->handle);
00138 if (tainted) OBJ_TAINT(handle);
00139 #ifdef HAVE_RUBY_ENCODING_H
00140 PSYCH_TRANSCODE(handle, encoding, internal_enc);
00141 #endif
00142 }
00143
00144 if(start->prefix) {
00145 prefix = rb_str_new2((const char *)start->prefix);
00146 if (tainted) OBJ_TAINT(prefix);
00147 #ifdef HAVE_RUBY_ENCODING_H
00148 PSYCH_TRANSCODE(prefix, encoding, internal_enc);
00149 #endif
00150 }
00151
00152 rb_ary_push(tag_directives, rb_ary_new3((long)2, handle, prefix));
00153 }
00154 }
00155 rb_funcall(handler, id_start_document, 3,
00156 version, tag_directives,
00157 event.data.document_start.implicit == 1 ? Qtrue : Qfalse
00158 );
00159 }
00160 break;
00161 case YAML_DOCUMENT_END_EVENT:
00162 rb_funcall(handler, id_end_document, 1,
00163 event.data.document_end.implicit == 1 ? Qtrue : Qfalse
00164 );
00165 break;
00166 case YAML_ALIAS_EVENT:
00167 {
00168 VALUE alias = Qnil;
00169 if(event.data.alias.anchor) {
00170 alias = rb_str_new2((const char *)event.data.alias.anchor);
00171 if (tainted) OBJ_TAINT(alias);
00172 #ifdef HAVE_RUBY_ENCODING_H
00173 PSYCH_TRANSCODE(alias, encoding, internal_enc);
00174 #endif
00175 }
00176
00177 rb_funcall(handler, id_alias, 1, alias);
00178 }
00179 break;
00180 case YAML_SCALAR_EVENT:
00181 {
00182 VALUE anchor = Qnil;
00183 VALUE tag = Qnil;
00184 VALUE plain_implicit, quoted_implicit, style;
00185 VALUE val = rb_str_new(
00186 (const char *)event.data.scalar.value,
00187 (long)event.data.scalar.length
00188 );
00189 if (tainted) OBJ_TAINT(val);
00190
00191 #ifdef HAVE_RUBY_ENCODING_H
00192 PSYCH_TRANSCODE(val, encoding, internal_enc);
00193 #endif
00194
00195 if(event.data.scalar.anchor) {
00196 anchor = rb_str_new2((const char *)event.data.scalar.anchor);
00197 if (tainted) OBJ_TAINT(anchor);
00198 #ifdef HAVE_RUBY_ENCODING_H
00199 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00200 #endif
00201 }
00202
00203 if(event.data.scalar.tag) {
00204 tag = rb_str_new2((const char *)event.data.scalar.tag);
00205 if (tainted) OBJ_TAINT(tag);
00206 #ifdef HAVE_RUBY_ENCODING_H
00207 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00208 #endif
00209 }
00210
00211 plain_implicit =
00212 event.data.scalar.plain_implicit == 0 ? Qfalse : Qtrue;
00213
00214 quoted_implicit =
00215 event.data.scalar.quoted_implicit == 0 ? Qfalse : Qtrue;
00216
00217 style = INT2NUM((long)event.data.scalar.style);
00218
00219 rb_funcall(handler, id_scalar, 6,
00220 val, anchor, tag, plain_implicit, quoted_implicit, style);
00221 }
00222 break;
00223 case YAML_SEQUENCE_START_EVENT:
00224 {
00225 VALUE anchor = Qnil;
00226 VALUE tag = Qnil;
00227 VALUE implicit, style;
00228 if(event.data.sequence_start.anchor) {
00229 anchor = rb_str_new2((const char *)event.data.sequence_start.anchor);
00230 if (tainted) OBJ_TAINT(anchor);
00231 #ifdef HAVE_RUBY_ENCODING_H
00232 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00233 #endif
00234 }
00235
00236 tag = Qnil;
00237 if(event.data.sequence_start.tag) {
00238 tag = rb_str_new2((const char *)event.data.sequence_start.tag);
00239 if (tainted) OBJ_TAINT(tag);
00240 #ifdef HAVE_RUBY_ENCODING_H
00241 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00242 #endif
00243 }
00244
00245 implicit =
00246 event.data.sequence_start.implicit == 0 ? Qfalse : Qtrue;
00247
00248 style = INT2NUM((long)event.data.sequence_start.style);
00249
00250 rb_funcall(handler, id_start_sequence, 4,
00251 anchor, tag, implicit, style);
00252 }
00253 break;
00254 case YAML_SEQUENCE_END_EVENT:
00255 rb_funcall(handler, id_end_sequence, 0);
00256 break;
00257 case YAML_MAPPING_START_EVENT:
00258 {
00259 VALUE anchor = Qnil;
00260 VALUE tag = Qnil;
00261 VALUE implicit, style;
00262 if(event.data.mapping_start.anchor) {
00263 anchor = rb_str_new2((const char *)event.data.mapping_start.anchor);
00264 if (tainted) OBJ_TAINT(anchor);
00265 #ifdef HAVE_RUBY_ENCODING_H
00266 PSYCH_TRANSCODE(anchor, encoding, internal_enc);
00267 #endif
00268 }
00269
00270 if(event.data.mapping_start.tag) {
00271 tag = rb_str_new2((const char *)event.data.mapping_start.tag);
00272 if (tainted) OBJ_TAINT(tag);
00273 #ifdef HAVE_RUBY_ENCODING_H
00274 PSYCH_TRANSCODE(tag, encoding, internal_enc);
00275 #endif
00276 }
00277
00278 implicit =
00279 event.data.mapping_start.implicit == 0 ? Qfalse : Qtrue;
00280
00281 style = INT2NUM((long)event.data.mapping_start.style);
00282
00283 rb_funcall(handler, id_start_mapping, 4,
00284 anchor, tag, implicit, style);
00285 }
00286 break;
00287 case YAML_MAPPING_END_EVENT:
00288 rb_funcall(handler, id_end_mapping, 0);
00289 break;
00290 case YAML_NO_EVENT:
00291 rb_funcall(handler, id_empty, 0);
00292 break;
00293 case YAML_STREAM_END_EVENT:
00294 rb_funcall(handler, id_end_stream, 0);
00295 done = 1;
00296 break;
00297 }
00298 yaml_event_delete(&event);
00299 }
00300
00301 return self;
00302 }
00303
00304 void Init_psych_parser()
00305 {
00306 #if 0
00307 mPsych = rb_define_module("Psych");
00308 #endif
00309
00310 cPsychParser = rb_define_class_under(mPsych, "Parser", rb_cObject);
00311
00312
00313 rb_define_const(cPsychParser, "ANY", INT2NUM(YAML_ANY_ENCODING));
00314
00315
00316 rb_define_const(cPsychParser, "UTF8", INT2NUM(YAML_UTF8_ENCODING));
00317
00318
00319 rb_define_const(cPsychParser, "UTF16LE", INT2NUM(YAML_UTF16LE_ENCODING));
00320
00321
00322 rb_define_const(cPsychParser, "UTF16BE", INT2NUM(YAML_UTF16BE_ENCODING));
00323
00324 ePsychSyntaxError = rb_define_class_under(mPsych, "SyntaxError", rb_eSyntaxError);
00325
00326 rb_define_method(cPsychParser, "parse", parse, 1);
00327
00328 id_read = rb_intern("read");
00329 id_empty = rb_intern("empty");
00330 id_start_stream = rb_intern("start_stream");
00331 id_end_stream = rb_intern("end_stream");
00332 id_start_document = rb_intern("start_document");
00333 id_end_document = rb_intern("end_document");
00334 id_alias = rb_intern("alias");
00335 id_scalar = rb_intern("scalar");
00336 id_start_sequence = rb_intern("start_sequence");
00337 id_end_sequence = rb_intern("end_sequence");
00338 id_start_mapping = rb_intern("start_mapping");
00339 id_end_mapping = rb_intern("end_mapping");
00340 }
00341
00342