Ruby  1.9.3p429(2013-05-15revision40747)
marshal.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  marshal.c -
4 
5  $Author: usa $
6  created at: Thu Apr 27 16:30:01 JST 1995
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/io.h"
14 #include "ruby/st.h"
15 #include "ruby/util.h"
16 #include "ruby/encoding.h"
17 #include "internal.h"
18 
19 #include <math.h>
20 #ifdef HAVE_FLOAT_H
21 #include <float.h>
22 #endif
23 #ifdef HAVE_IEEEFP_H
24 #include <ieeefp.h>
25 #endif
26 
27 #define BITSPERSHORT (2*CHAR_BIT)
28 #define SHORTMASK ((1<<BITSPERSHORT)-1)
29 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
30 
31 #if SIZEOF_SHORT == SIZEOF_BDIGITS
32 #define SHORTLEN(x) (x)
33 #else
34 static long
35 shortlen(long len, BDIGIT *ds)
36 {
37  BDIGIT num;
38  int offset = 0;
39 
40  num = ds[len-1];
41  while (num) {
42  num = SHORTDN(num);
43  offset++;
44  }
45  return (len - 1)*sizeof(BDIGIT)/2 + offset;
46 }
47 #define SHORTLEN(x) shortlen((x),d)
48 #endif
49 
50 #define MARSHAL_MAJOR 4
51 #define MARSHAL_MINOR 8
52 
53 #define TYPE_NIL '0'
54 #define TYPE_TRUE 'T'
55 #define TYPE_FALSE 'F'
56 #define TYPE_FIXNUM 'i'
57 
58 #define TYPE_EXTENDED 'e'
59 #define TYPE_UCLASS 'C'
60 #define TYPE_OBJECT 'o'
61 #define TYPE_DATA 'd'
62 #define TYPE_USERDEF 'u'
63 #define TYPE_USRMARSHAL 'U'
64 #define TYPE_FLOAT 'f'
65 #define TYPE_BIGNUM 'l'
66 #define TYPE_STRING '"'
67 #define TYPE_REGEXP '/'
68 #define TYPE_ARRAY '['
69 #define TYPE_HASH '{'
70 #define TYPE_HASH_DEF '}'
71 #define TYPE_STRUCT 'S'
72 #define TYPE_MODULE_OLD 'M'
73 #define TYPE_CLASS 'c'
74 #define TYPE_MODULE 'm'
75 
76 #define TYPE_SYMBOL ':'
77 #define TYPE_SYMLINK ';'
78 
79 #define TYPE_IVAR 'I'
80 #define TYPE_LINK '@'
81 
85 
86 typedef struct {
89  VALUE (*dumper)(VALUE);
90  VALUE (*loader)(VALUE, VALUE);
92 
95 
96 static int
98 {
100  rb_gc_mark(p->newclass);
101  rb_gc_mark(p->oldclass);
102  return ST_CONTINUE;
103 }
104 
105 static void
107 {
108  if (!tbl) return;
110 }
111 
112 void
113 rb_marshal_define_compat(VALUE newclass, VALUE oldclass, VALUE (*dumper)(VALUE), VALUE (*loader)(VALUE, VALUE))
114 {
115  marshal_compat_t *compat;
116  rb_alloc_func_t allocator = rb_get_alloc_func(newclass);
117 
118  if (!allocator) {
119  rb_raise(rb_eTypeError, "no allocator");
120  }
121 
122  compat = ALLOC(marshal_compat_t);
123  compat->newclass = Qnil;
124  compat->oldclass = Qnil;
125  compat->newclass = newclass;
126  compat->oldclass = oldclass;
127  compat->dumper = dumper;
128  compat->loader = loader;
129 
130  st_insert(compat_allocator_tbl, (st_data_t)allocator, (st_data_t)compat);
131 }
132 
133 #define MARSHAL_INFECTION (FL_TAINT|FL_UNTRUSTED)
134 typedef char ruby_check_marshal_viral_flags[MARSHAL_INFECTION == (int)MARSHAL_INFECTION ? 1 : -1];
135 
136 struct dump_arg {
143 };
144 
147  struct dump_arg *arg;
148  int limit;
149 };
150 
151 static void
153 {
154  if (!arg->symbols) {
155  rb_raise(rb_eRuntimeError, "Marshal.dump reentered at %s",
156  rb_id2name(sym));
157  }
158 }
159 
160 static void clear_dump_arg(struct dump_arg *arg);
161 
162 static void
163 mark_dump_arg(void *ptr)
164 {
165  struct dump_arg *p = ptr;
166  if (!p->symbols)
167  return;
168  rb_mark_set(p->data);
170  rb_gc_mark(p->str);
171 }
172 
173 static void
174 free_dump_arg(void *ptr)
175 {
176  clear_dump_arg(ptr);
177  xfree(ptr);
178 }
179 
180 static size_t
181 memsize_dump_arg(const void *ptr)
182 {
183  return ptr ? sizeof(struct dump_arg) : 0;
184 }
185 
187  "dump_arg",
189 };
190 
191 static const char *
192 must_not_be_anonymous(const char *type, VALUE path)
193 {
194  char *n = RSTRING_PTR(path);
195 
196  if (!rb_enc_asciicompat(rb_enc_get(path))) {
197  /* cannot occur? */
198  rb_raise(rb_eTypeError, "can't dump non-ascii %s name", type);
199  }
200  if (n[0] == '#') {
201  rb_raise(rb_eTypeError, "can't dump anonymous %s %.*s", type,
202  (int)RSTRING_LEN(path), n);
203  }
204  return n;
205 }
206 
207 static VALUE
209 {
210  VALUE path = rb_class_path(klass);
211  const char *n;
212 
213  n = must_not_be_anonymous((TYPE(klass) == T_CLASS ? "class" : "module"), path);
214  if (rb_path_to_class(path) != rb_class_real(klass)) {
215  rb_raise(rb_eTypeError, "%s can't be referred to", n);
216  }
217  return path;
218 }
219 
220 static void w_long(long, struct dump_arg*);
221 static void w_encoding(VALUE obj, long num, struct dump_call_arg *arg);
222 
223 static void
224 w_nbyte(const char *s, long n, struct dump_arg *arg)
225 {
226  VALUE buf = arg->str;
227  rb_str_buf_cat(buf, s, n);
228  RBASIC(buf)->flags |= arg->infection;
229  if (arg->dest && RSTRING_LEN(buf) >= BUFSIZ) {
230  rb_io_write(arg->dest, buf);
231  rb_str_resize(buf, 0);
232  }
233 }
234 
235 static void
236 w_byte(char c, struct dump_arg *arg)
237 {
238  w_nbyte(&c, 1, arg);
239 }
240 
241 static void
242 w_bytes(const char *s, long n, struct dump_arg *arg)
243 {
244  w_long(n, arg);
245  w_nbyte(s, n, arg);
246 }
247 
248 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
249 
250 static void
251 w_short(int x, struct dump_arg *arg)
252 {
253  w_byte((char)((x >> 0) & 0xff), arg);
254  w_byte((char)((x >> 8) & 0xff), arg);
255 }
256 
257 static void
258 w_long(long x, struct dump_arg *arg)
259 {
260  char buf[sizeof(long)+1];
261  int i, len = 0;
262 
263 #if SIZEOF_LONG > 4
264  if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
265  /* big long does not fit in 4 bytes */
266  rb_raise(rb_eTypeError, "long too big to dump");
267  }
268 #endif
269 
270  if (x == 0) {
271  w_byte(0, arg);
272  return;
273  }
274  if (0 < x && x < 123) {
275  w_byte((char)(x + 5), arg);
276  return;
277  }
278  if (-124 < x && x < 0) {
279  w_byte((char)((x - 5)&0xff), arg);
280  return;
281  }
282  for (i=1;i<(int)sizeof(long)+1;i++) {
283  buf[i] = (char)(x & 0xff);
284  x = RSHIFT(x,8);
285  if (x == 0) {
286  buf[0] = i;
287  break;
288  }
289  if (x == -1) {
290  buf[0] = -i;
291  break;
292  }
293  }
294  len = i;
295  for (i=0;i<=len;i++) {
296  w_byte(buf[i], arg);
297  }
298 }
299 
300 #ifdef DBL_MANT_DIG
301 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
302 
303 #if DBL_MANT_DIG > 32
304 #define MANT_BITS 32
305 #elif DBL_MANT_DIG > 24
306 #define MANT_BITS 24
307 #elif DBL_MANT_DIG > 16
308 #define MANT_BITS 16
309 #else
310 #define MANT_BITS 8
311 #endif
312 
313 static double
314 load_mantissa(double d, const char *buf, long len)
315 {
316  if (!len) return d;
317  if (--len > 0 && !*buf++) { /* binary mantissa mark */
318  int e, s = d < 0, dig = 0;
319  unsigned long m;
320 
321  modf(ldexp(frexp(fabs(d), &e), DECIMAL_MANT), &d);
322  do {
323  m = 0;
324  switch (len) {
325  default: m = *buf++ & 0xff;
326 #if MANT_BITS > 24
327  case 3: m = (m << 8) | (*buf++ & 0xff);
328 #endif
329 #if MANT_BITS > 16
330  case 2: m = (m << 8) | (*buf++ & 0xff);
331 #endif
332 #if MANT_BITS > 8
333  case 1: m = (m << 8) | (*buf++ & 0xff);
334 #endif
335  }
336  dig -= len < MANT_BITS / 8 ? 8 * (unsigned)len : MANT_BITS;
337  d += ldexp((double)m, dig);
338  } while ((len -= MANT_BITS / 8) > 0);
339  d = ldexp(d, e - DECIMAL_MANT);
340  if (s) d = -d;
341  }
342  return d;
343 }
344 #else
345 #define load_mantissa(d, buf, len) (d)
346 #endif
347 
348 #ifdef DBL_DIG
349 #define FLOAT_DIG (DBL_DIG+2)
350 #else
351 #define FLOAT_DIG 17
352 #endif
353 
354 static void
355 w_float(double d, struct dump_arg *arg)
356 {
357  char *ruby_dtoa(double d_, int mode, int ndigits, int *decpt, int *sign, char **rve);
358  char buf[FLOAT_DIG + (DECIMAL_MANT + 7) / 8 + 10];
359 
360  if (isinf(d)) {
361  if (d < 0) w_cstr("-inf", arg);
362  else w_cstr("inf", arg);
363  }
364  else if (isnan(d)) {
365  w_cstr("nan", arg);
366  }
367  else if (d == 0.0) {
368  if (1.0/d < 0) w_cstr("-0", arg);
369  else w_cstr("0", arg);
370  }
371  else {
372  int decpt, sign, digs, len = 0;
373  char *e, *p = ruby_dtoa(d, 0, 0, &decpt, &sign, &e);
374  if (sign) buf[len++] = '-';
375  digs = (int)(e - p);
376  if (decpt < -3 || decpt > digs) {
377  buf[len++] = p[0];
378  if (--digs > 0) buf[len++] = '.';
379  memcpy(buf + len, p + 1, digs);
380  len += digs;
381  len += snprintf(buf + len, sizeof(buf) - len, "e%d", decpt - 1);
382  }
383  else if (decpt > 0) {
384  memcpy(buf + len, p, decpt);
385  len += decpt;
386  if ((digs -= decpt) > 0) {
387  buf[len++] = '.';
388  memcpy(buf + len, p + decpt, digs);
389  len += digs;
390  }
391  }
392  else {
393  buf[len++] = '0';
394  buf[len++] = '.';
395  if (decpt) {
396  memset(buf + len, '0', -decpt);
397  len -= decpt;
398  }
399  memcpy(buf + len, p, digs);
400  len += digs;
401  }
402  xfree(p);
403  w_bytes(buf, len, arg);
404  }
405 }
406 
407 static void
408 w_symbol(ID id, struct dump_arg *arg)
409 {
410  VALUE sym;
411  st_data_t num;
412  int encidx = -1;
413 
414  if (st_lookup(arg->symbols, id, &num)) {
415  w_byte(TYPE_SYMLINK, arg);
416  w_long((long)num, arg);
417  }
418  else {
419  sym = rb_id2str(id);
420  if (!sym) {
421  rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, id);
422  }
423  encidx = rb_enc_get_index(sym);
424  if (encidx == rb_usascii_encindex() ||
426  encidx = -1;
427  }
428  else {
429  w_byte(TYPE_IVAR, arg);
430  }
431  w_byte(TYPE_SYMBOL, arg);
432  w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
433  st_add_direct(arg->symbols, id, arg->symbols->num_entries);
434  if (encidx != -1) {
435  struct dump_call_arg c_arg;
436  c_arg.limit = 1;
437  c_arg.arg = arg;
438  w_encoding(sym, 0, &c_arg);
439  }
440  }
441 }
442 
443 static void
445 {
446  must_not_be_anonymous("class", s);
447  w_symbol(rb_intern_str(s), arg);
448 }
449 
450 static void w_object(VALUE,struct dump_arg*,int);
451 
452 static int
454 {
455  w_object(key, arg->arg, arg->limit);
456  w_object(value, arg->arg, arg->limit);
457  return ST_CONTINUE;
458 }
459 
460 static void
461 w_extended(VALUE klass, struct dump_arg *arg, int check)
462 {
463  if (check && FL_TEST(klass, FL_SINGLETON)) {
464  if (RCLASS_M_TBL(klass)->num_entries ||
465  (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) {
466  rb_raise(rb_eTypeError, "singleton can't be dumped");
467  }
468  klass = RCLASS_SUPER(klass);
469  }
470  while (BUILTIN_TYPE(klass) == T_ICLASS) {
471  VALUE path = rb_class_name(RBASIC(klass)->klass);
472  w_byte(TYPE_EXTENDED, arg);
473  w_unique(path, arg);
474  klass = RCLASS_SUPER(klass);
475  }
476 }
477 
478 static void
479 w_class(char type, VALUE obj, struct dump_arg *arg, int check)
480 {
481  VALUE path;
482  st_data_t real_obj;
483  VALUE klass;
484 
485  if (st_lookup(arg->compat_tbl, (st_data_t)obj, &real_obj)) {
486  obj = (VALUE)real_obj;
487  }
488  klass = CLASS_OF(obj);
489  w_extended(klass, arg, check);
490  w_byte(type, arg);
491  path = class2path(rb_class_real(klass));
492  w_unique(path, arg);
493 }
494 
495 static void
496 w_uclass(VALUE obj, VALUE super, struct dump_arg *arg)
497 {
498  VALUE klass = CLASS_OF(obj);
499 
500  w_extended(klass, arg, TRUE);
501  klass = rb_class_real(klass);
502  if (klass != super) {
503  w_byte(TYPE_UCLASS, arg);
504  w_unique(class2path(klass), arg);
505  }
506 }
507 
508 static int
509 w_obj_each(ID id, VALUE value, struct dump_call_arg *arg)
510 {
511  if (id == rb_id_encoding()) return ST_CONTINUE;
512  if (id == rb_intern("E")) return ST_CONTINUE;
513  w_symbol(id, arg->arg);
514  w_object(value, arg->arg, arg->limit);
515  return ST_CONTINUE;
516 }
517 
518 static void
519 w_encoding(VALUE obj, long num, struct dump_call_arg *arg)
520 {
521  int encidx = rb_enc_get_index(obj);
522  rb_encoding *enc = 0;
523  st_data_t name;
524 
525  if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) {
526  w_long(num, arg->arg);
527  return;
528  }
529  w_long(num + 1, arg->arg);
530 
531  /* special treatment for US-ASCII and UTF-8 */
532  if (encidx == rb_usascii_encindex()) {
533  w_symbol(rb_intern("E"), arg->arg);
534  w_object(Qfalse, arg->arg, arg->limit + 1);
535  return;
536  }
537  else if (encidx == rb_utf8_encindex()) {
538  w_symbol(rb_intern("E"), arg->arg);
539  w_object(Qtrue, arg->arg, arg->limit + 1);
540  return;
541  }
542 
543  w_symbol(rb_id_encoding(), arg->arg);
544  do {
545  if (!arg->arg->encodings)
547  else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name))
548  break;
549  name = (st_data_t)rb_str_new2(rb_enc_name(enc));
550  st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name);
551  } while (0);
552  w_object(name, arg->arg, arg->limit + 1);
553 }
554 
555 static void
557 {
558  long num = tbl ? tbl->num_entries : 0;
559 
560  w_encoding(obj, num, arg);
561  if (tbl) {
563  }
564 }
565 
566 static void
568 {
569  VALUE *ptr;
570  long i, len, num;
571 
572  len = ROBJECT_NUMIV(obj);
573  ptr = ROBJECT_IVPTR(obj);
574  num = 0;
575  for (i = 0; i < len; i++)
576  if (ptr[i] != Qundef)
577  num += 1;
578 
579  w_encoding(obj, num, arg);
580  if (num != 0) {
582  }
583 }
584 
585 static void
587 {
588  struct dump_call_arg c_arg;
589  st_table *ivtbl = 0;
590  st_data_t num;
591  int hasiv = 0;
592 #define has_ivars(obj, ivtbl) (((ivtbl) = rb_generic_ivar_table(obj)) != 0 || \
593  (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj)))
594 
595  if (limit == 0) {
596  rb_raise(rb_eArgError, "exceed depth limit");
597  }
598 
599  limit--;
600  c_arg.limit = limit;
601  c_arg.arg = arg;
602 
603  if (st_lookup(arg->data, obj, &num)) {
604  w_byte(TYPE_LINK, arg);
605  w_long((long)num, arg);
606  return;
607  }
608 
609  if (obj == Qnil) {
610  w_byte(TYPE_NIL, arg);
611  }
612  else if (obj == Qtrue) {
613  w_byte(TYPE_TRUE, arg);
614  }
615  else if (obj == Qfalse) {
616  w_byte(TYPE_FALSE, arg);
617  }
618  else if (FIXNUM_P(obj)) {
619 #if SIZEOF_LONG <= 4
620  w_byte(TYPE_FIXNUM, arg);
621  w_long(FIX2INT(obj), arg);
622 #else
623  if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
624  w_byte(TYPE_FIXNUM, arg);
625  w_long(FIX2LONG(obj), arg);
626  }
627  else {
628  w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
629  }
630 #endif
631  }
632  else if (SYMBOL_P(obj)) {
633  w_symbol(SYM2ID(obj), arg);
634  }
635  else {
636  arg->infection |= (int)FL_TEST(obj, MARSHAL_INFECTION);
637 
638  if (rb_respond_to(obj, s_mdump)) {
639  volatile VALUE v;
640 
641  st_add_direct(arg->data, obj, arg->data->num_entries);
642 
643  v = rb_funcall(obj, s_mdump, 0, 0);
644  check_dump_arg(arg, s_mdump);
645  hasiv = has_ivars(obj, ivtbl);
646  if (hasiv) w_byte(TYPE_IVAR, arg);
647  w_class(TYPE_USRMARSHAL, obj, arg, FALSE);
648  w_object(v, arg, limit);
649  if (hasiv) w_ivar(obj, ivtbl, &c_arg);
650  return;
651  }
652  if (rb_respond_to(obj, s_dump)) {
653  VALUE v;
654  st_table *ivtbl2 = 0;
655  int hasiv2;
656 
657  v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
658  check_dump_arg(arg, s_dump);
659  if (TYPE(v) != T_STRING) {
660  rb_raise(rb_eTypeError, "_dump() must return string");
661  }
662  hasiv = has_ivars(obj, ivtbl);
663  if (hasiv) w_byte(TYPE_IVAR, arg);
664  if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) {
665  w_byte(TYPE_IVAR, arg);
666  }
667  w_class(TYPE_USERDEF, obj, arg, FALSE);
668  w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg);
669  if (hasiv2) {
670  w_ivar(v, ivtbl2, &c_arg);
671  }
672  else if (hasiv) {
673  w_ivar(obj, ivtbl, &c_arg);
674  }
675  st_add_direct(arg->data, obj, arg->data->num_entries);
676  return;
677  }
678 
679  st_add_direct(arg->data, obj, arg->data->num_entries);
680 
681  hasiv = has_ivars(obj, ivtbl);
682  {
683  st_data_t compat_data;
684  rb_alloc_func_t allocator = rb_get_alloc_func(RBASIC(obj)->klass);
685  if (st_lookup(compat_allocator_tbl,
686  (st_data_t)allocator,
687  &compat_data)) {
688  marshal_compat_t *compat = (marshal_compat_t*)compat_data;
689  VALUE real_obj = obj;
690  obj = compat->dumper(real_obj);
691  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
692  if (obj != real_obj && !ivtbl) hasiv = 0;
693  }
694  }
695  if (hasiv) w_byte(TYPE_IVAR, arg);
696 
697  switch (BUILTIN_TYPE(obj)) {
698  case T_CLASS:
699  if (FL_TEST(obj, FL_SINGLETON)) {
700  rb_raise(rb_eTypeError, "singleton class can't be dumped");
701  }
702  w_byte(TYPE_CLASS, arg);
703  {
704  volatile VALUE path = class2path(obj);
705  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
706  }
707  break;
708 
709  case T_MODULE:
710  w_byte(TYPE_MODULE, arg);
711  {
712  VALUE path = class2path(obj);
713  w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
714  }
715  break;
716 
717  case T_FLOAT:
718  w_byte(TYPE_FLOAT, arg);
719  w_float(RFLOAT_VALUE(obj), arg);
720  break;
721 
722  case T_BIGNUM:
723  w_byte(TYPE_BIGNUM, arg);
724  {
725  char sign = RBIGNUM_SIGN(obj) ? '+' : '-';
726  long len = RBIGNUM_LEN(obj);
727  BDIGIT *d = RBIGNUM_DIGITS(obj);
728 
729  w_byte(sign, arg);
730  w_long(SHORTLEN(len), arg); /* w_short? */
731  while (len--) {
732 #if SIZEOF_BDIGITS > SIZEOF_SHORT
733  BDIGIT num = *d;
734  int i;
735 
736  for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
737  w_short(num & SHORTMASK, arg);
738  num = SHORTDN(num);
739  if (len == 0 && num == 0) break;
740  }
741 #else
742  w_short(*d, arg);
743 #endif
744  d++;
745  }
746  }
747  break;
748 
749  case T_STRING:
750  w_uclass(obj, rb_cString, arg);
751  w_byte(TYPE_STRING, arg);
752  w_bytes(RSTRING_PTR(obj), RSTRING_LEN(obj), arg);
753  break;
754 
755  case T_REGEXP:
756  w_uclass(obj, rb_cRegexp, arg);
757  w_byte(TYPE_REGEXP, arg);
758  {
759  int opts = rb_reg_options(obj);
760  w_bytes(RREGEXP_SRC_PTR(obj), RREGEXP_SRC_LEN(obj), arg);
761  w_byte((char)opts, arg);
762  }
763  break;
764 
765  case T_ARRAY:
766  w_uclass(obj, rb_cArray, arg);
767  w_byte(TYPE_ARRAY, arg);
768  {
769  long i, len = RARRAY_LEN(obj);
770 
771  w_long(len, arg);
772  for (i=0; i<RARRAY_LEN(obj); i++) {
773  w_object(RARRAY_PTR(obj)[i], arg, limit);
774  if (len != RARRAY_LEN(obj)) {
775  rb_raise(rb_eRuntimeError, "array modified during dump");
776  }
777  }
778  }
779  break;
780 
781  case T_HASH:
782  w_uclass(obj, rb_cHash, arg);
783  if (NIL_P(RHASH_IFNONE(obj))) {
784  w_byte(TYPE_HASH, arg);
785  }
786  else if (FL_TEST(obj, FL_USER2)) {
787  /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
788  rb_raise(rb_eTypeError, "can't dump hash with default proc");
789  }
790  else {
791  w_byte(TYPE_HASH_DEF, arg);
792  }
793  w_long(RHASH_SIZE(obj), arg);
794  rb_hash_foreach(obj, hash_each, (st_data_t)&c_arg);
795  if (!NIL_P(RHASH_IFNONE(obj))) {
796  w_object(RHASH_IFNONE(obj), arg, limit);
797  }
798  break;
799 
800  case T_STRUCT:
801  w_class(TYPE_STRUCT, obj, arg, TRUE);
802  {
803  long len = RSTRUCT_LEN(obj);
804  VALUE mem;
805  long i;
806 
807  w_long(len, arg);
808  mem = rb_struct_members(obj);
809  for (i=0; i<len; i++) {
810  w_symbol(SYM2ID(RARRAY_PTR(mem)[i]), arg);
811  w_object(RSTRUCT_PTR(obj)[i], arg, limit);
812  }
813  }
814  break;
815 
816  case T_OBJECT:
817  w_class(TYPE_OBJECT, obj, arg, TRUE);
818  w_objivar(obj, &c_arg);
819  break;
820 
821  case T_DATA:
822  {
823  VALUE v;
824 
825  if (!rb_respond_to(obj, s_dump_data)) {
827  "no _dump_data is defined for class %s",
828  rb_obj_classname(obj));
829  }
830  v = rb_funcall(obj, s_dump_data, 0);
832  w_class(TYPE_DATA, obj, arg, TRUE);
833  w_object(v, arg, limit);
834  }
835  break;
836 
837  default:
838  rb_raise(rb_eTypeError, "can't dump %s",
839  rb_obj_classname(obj));
840  break;
841  }
842  }
843  if (hasiv) {
844  w_ivar(obj, ivtbl, &c_arg);
845  }
846 }
847 
848 static void
850 {
851  if (!arg->symbols) return;
852  st_free_table(arg->symbols);
853  arg->symbols = 0;
854  st_free_table(arg->data);
855  arg->data = 0;
857  arg->compat_tbl = 0;
858  if (arg->encodings) {
859  st_free_table(arg->encodings);
860  arg->encodings = 0;
861  }
862 }
863 
864 /*
865  * call-seq:
866  * dump( obj [, anIO] , limit=-1 ) -> anIO
867  *
868  * Serializes obj and all descendant objects. If anIO is
869  * specified, the serialized data will be written to it, otherwise the
870  * data will be returned as a String. If limit is specified, the
871  * traversal of subobjects will be limited to that depth. If limit is
872  * negative, no checking of depth will be performed.
873  *
874  * class Klass
875  * def initialize(str)
876  * @str = str
877  * end
878  * def say_hello
879  * @str
880  * end
881  * end
882  *
883  * (produces no output)
884  *
885  * o = Klass.new("hello\n")
886  * data = Marshal.dump(o)
887  * obj = Marshal.load(data)
888  * obj.say_hello #=> "hello\n"
889  *
890  * Marshal can't dump following objects:
891  * * anonymous Class/Module.
892  * * objects which related to its system (ex: Dir, File::Stat, IO, File, Socket
893  * and so on)
894  * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
895  * ThreadGroup, Continuation
896  * * objects which defines singleton methods
897  */
898 static VALUE
900 {
901  VALUE obj, port, a1, a2;
902  int limit = -1;
903  struct dump_arg *arg;
904  volatile VALUE wrapper;
905 
906  port = Qnil;
907  rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
908  if (argc == 3) {
909  if (!NIL_P(a2)) limit = NUM2INT(a2);
910  if (NIL_P(a1)) goto type_error;
911  port = a1;
912  }
913  else if (argc == 2) {
914  if (FIXNUM_P(a1)) limit = FIX2INT(a1);
915  else if (NIL_P(a1)) goto type_error;
916  else port = a1;
917  }
918  wrapper = TypedData_Make_Struct(rb_cData, struct dump_arg, &dump_arg_data, arg);
919  arg->dest = 0;
920  arg->symbols = st_init_numtable();
921  arg->data = st_init_numtable();
922  arg->infection = 0;
923  arg->compat_tbl = st_init_numtable();
924  arg->encodings = 0;
925  arg->str = rb_str_buf_new(0);
926  if (!NIL_P(port)) {
927  if (!rb_respond_to(port, s_write)) {
928  type_error:
929  rb_raise(rb_eTypeError, "instance of IO needed");
930  }
931  arg->dest = port;
932  if (rb_respond_to(port, s_binmode)) {
933  rb_funcall2(port, s_binmode, 0, 0);
935  }
936  }
937  else {
938  port = arg->str;
939  }
940 
941  w_byte(MARSHAL_MAJOR, arg);
942  w_byte(MARSHAL_MINOR, arg);
943 
944  w_object(obj, arg, limit);
945  if (arg->dest) {
946  rb_io_write(arg->dest, arg->str);
947  rb_str_resize(arg->str, 0);
948  }
949  clear_dump_arg(arg);
950  RB_GC_GUARD(wrapper);
951 
952  return port;
953 }
954 
955 struct load_arg {
957  long offset;
963 };
964 
965 static void
967 {
968  if (!arg->symbols) {
969  rb_raise(rb_eRuntimeError, "Marshal.load reentered at %s",
970  rb_id2name(sym));
971  }
972 }
973 
974 static void clear_load_arg(struct load_arg *arg);
975 
976 static void
977 mark_load_arg(void *ptr)
978 {
979  struct load_arg *p = ptr;
980  if (!p->symbols)
981  return;
982  rb_mark_tbl(p->data);
984 }
985 
986 static void
987 free_load_arg(void *ptr)
988 {
989  clear_load_arg(ptr);
990  xfree(ptr);
991 }
992 
993 static size_t
994 memsize_load_arg(const void *ptr)
995 {
996  return ptr ? sizeof(struct load_arg) : 0;
997 }
998 
1000  "load_arg",
1002 };
1003 
1004 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1005 static VALUE r_entry0(VALUE v, st_index_t num, struct load_arg *arg);
1006 static VALUE r_object(struct load_arg *arg);
1007 static ID r_symbol(struct load_arg *arg);
1008 static VALUE path2class(VALUE path);
1009 
1010 static st_index_t
1012 {
1013  st_index_t idx = arg->data->num_entries;
1014 
1015  st_insert(arg->data, (st_data_t)idx, (st_data_t)Qundef);
1016  return idx;
1017 }
1018 
1019 static int
1021 {
1022  int c;
1023 
1024  if (TYPE(arg->src) == T_STRING) {
1025  if (RSTRING_LEN(arg->src) > arg->offset) {
1026  c = (unsigned char)RSTRING_PTR(arg->src)[arg->offset++];
1027  }
1028  else {
1029  rb_raise(rb_eArgError, "marshal data too short");
1030  }
1031  }
1032  else {
1033  VALUE src = arg->src;
1034  VALUE v = rb_funcall2(src, s_getbyte, 0, 0);
1035  check_load_arg(arg, s_getbyte);
1036  if (NIL_P(v)) rb_eof_error();
1037  c = (unsigned char)NUM2CHR(v);
1038  }
1039  return c;
1040 }
1041 
1042 static void
1044 {
1045  rb_raise(rb_eTypeError, "long too big for this architecture (size "
1046  STRINGIZE(SIZEOF_LONG)", given %d)", size);
1047 }
1048 
1049 #undef SIGN_EXTEND_CHAR
1050 #if __STDC__
1051 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
1052 #else /* not __STDC__ */
1053 /* As in Harbison and Steele. */
1054 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
1055 #endif
1056 
1057 static long
1059 {
1060  register long x;
1061  int c = SIGN_EXTEND_CHAR(r_byte(arg));
1062  long i;
1063 
1064  if (c == 0) return 0;
1065  if (c > 0) {
1066  if (4 < c && c < 128) {
1067  return c - 5;
1068  }
1069  if (c > (int)sizeof(long)) long_toobig(c);
1070  x = 0;
1071  for (i=0;i<c;i++) {
1072  x |= (long)r_byte(arg) << (8*i);
1073  }
1074  }
1075  else {
1076  if (-129 < c && c < -4) {
1077  return c + 5;
1078  }
1079  c = -c;
1080  if (c > (int)sizeof(long)) long_toobig(c);
1081  x = -1;
1082  for (i=0;i<c;i++) {
1083  x &= ~((long)0xff << (8*i));
1084  x |= (long)r_byte(arg) << (8*i);
1085  }
1086  }
1087  return x;
1088 }
1089 
1090 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1091 
1092 static VALUE
1093 r_bytes0(long len, struct load_arg *arg)
1094 {
1095  VALUE str;
1096 
1097  if (len == 0) return rb_str_new(0, 0);
1098  if (TYPE(arg->src) == T_STRING) {
1099  if (RSTRING_LEN(arg->src) - arg->offset >= len) {
1100  str = rb_str_new(RSTRING_PTR(arg->src)+arg->offset, len);
1101  arg->offset += len;
1102  }
1103  else {
1104  too_short:
1105  rb_raise(rb_eArgError, "marshal data too short");
1106  }
1107  }
1108  else {
1109  VALUE src = arg->src;
1110  VALUE n = LONG2NUM(len);
1111  str = rb_funcall2(src, s_read, 1, &n);
1112  check_load_arg(arg, s_read);
1113  if (NIL_P(str)) goto too_short;
1114  StringValue(str);
1115  if (RSTRING_LEN(str) != len) goto too_short;
1116  arg->infection |= (int)FL_TEST(str, MARSHAL_INFECTION);
1117  }
1118  return str;
1119 }
1120 
1121 static int
1123 {
1124  if (id == rb_id_encoding()) {
1125  int idx = rb_enc_find_index(StringValueCStr(val));
1126  return idx;
1127  }
1128  else if (id == rb_intern("E")) {
1129  if (val == Qfalse) return rb_usascii_encindex();
1130  else if (val == Qtrue) return rb_utf8_encindex();
1131  /* bogus ignore */
1132  }
1133  return -1;
1134 }
1135 
1136 static ID
1138 {
1139  st_data_t id;
1140  long num = r_long(arg);
1141 
1142  if (st_lookup(arg->symbols, num, &id)) {
1143  return (ID)id;
1144  }
1145  rb_raise(rb_eArgError, "bad symbol");
1146 }
1147 
1148 static ID
1149 r_symreal(struct load_arg *arg, int ivar)
1150 {
1151  volatile VALUE s = r_bytes(arg);
1152  ID id;
1153  int idx = -1;
1154  st_index_t n = arg->symbols->num_entries;
1155 
1156  st_insert(arg->symbols, (st_data_t)n, (st_data_t)0);
1157  if (ivar) {
1158  long num = r_long(arg);
1159  while (num-- > 0) {
1160  id = r_symbol(arg);
1161  idx = id2encidx(id, r_object(arg));
1162  }
1163  }
1164  if (idx < 0) idx = rb_usascii_encindex();
1165  rb_enc_associate_index(s, idx);
1166  id = rb_intern_str(s);
1167  st_insert(arg->symbols, (st_data_t)n, (st_data_t)id);
1168 
1169  return id;
1170 }
1171 
1172 static ID
1174 {
1175  int type, ivar = 0;
1176 
1177  again:
1178  switch ((type = r_byte(arg))) {
1179  case TYPE_IVAR:
1180  ivar = 1;
1181  goto again;
1182  case TYPE_SYMBOL:
1183  return r_symreal(arg, ivar);
1184  case TYPE_SYMLINK:
1185  if (ivar) {
1186  rb_raise(rb_eArgError, "dump format error (symlink with encoding)");
1187  }
1188  return r_symlink(arg);
1189  default:
1190  rb_raise(rb_eArgError, "dump format error for symbol(0x%x)", type);
1191  break;
1192  }
1193 }
1194 
1195 static VALUE
1197 {
1198  return rb_id2str(r_symbol(arg));
1199 }
1200 
1201 static VALUE
1203 {
1204  return r_bytes(arg);
1205 }
1206 
1207 static VALUE
1209 {
1210  st_data_t real_obj = (VALUE)Qundef;
1211  if (st_lookup(arg->compat_tbl, v, &real_obj)) {
1212  st_insert(arg->data, num, (st_data_t)real_obj);
1213  }
1214  else {
1215  st_insert(arg->data, num, (st_data_t)v);
1216  }
1217  if (arg->infection &&
1218  TYPE(v) != T_CLASS && TYPE(v) != T_MODULE) {
1219  FL_SET(v, arg->infection);
1220  if ((VALUE)real_obj != Qundef)
1221  FL_SET((VALUE)real_obj, arg->infection);
1222  }
1223  return v;
1224 }
1225 
1226 static VALUE
1228 {
1229  st_data_t data;
1230  if (st_lookup(arg->compat_tbl, v, &data)) {
1231  VALUE real_obj = (VALUE)data;
1232  rb_alloc_func_t allocator = rb_get_alloc_func(CLASS_OF(real_obj));
1233  st_data_t key = v;
1234  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1235  marshal_compat_t *compat = (marshal_compat_t*)data;
1236  compat->loader(real_obj, v);
1237  }
1238  st_delete(arg->compat_tbl, &key, 0);
1239  v = real_obj;
1240  }
1241  if (arg->proc) {
1242  v = rb_funcall(arg->proc, s_call, 1, v);
1243  check_load_arg(arg, s_call);
1244  }
1245  return v;
1246 }
1247 
1248 static void
1249 r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
1250 {
1251  long len;
1252 
1253  len = r_long(arg);
1254  if (len > 0) {
1255  do {
1256  ID id = r_symbol(arg);
1257  VALUE val = r_object(arg);
1258  int idx = id2encidx(id, val);
1259  if (idx >= 0) {
1260  rb_enc_associate_index(obj, idx);
1261  if (has_encoding) *has_encoding = TRUE;
1262  }
1263  else {
1264  rb_ivar_set(obj, id, val);
1265  }
1266  } while (--len > 0);
1267  }
1268 }
1269 
1270 static VALUE
1272 {
1273  VALUE v = rb_path_to_class(path);
1274 
1275  if (TYPE(v) != T_CLASS) {
1276  rb_raise(rb_eArgError, "%.*s does not refer to class",
1277  (int)RSTRING_LEN(path), RSTRING_PTR(path));
1278  }
1279  return v;
1280 }
1281 
1282 static VALUE
1284 {
1285  VALUE v = rb_path_to_class(path);
1286 
1287  if (TYPE(v) != T_MODULE) {
1288  rb_raise(rb_eArgError, "%.*s does not refer to module",
1289  (int)RSTRING_LEN(path), RSTRING_PTR(path));
1290  }
1291  return v;
1292 }
1293 
1294 static VALUE
1296 {
1297  VALUE klass;
1298  st_data_t data;
1299  rb_alloc_func_t allocator;
1300 
1301  klass = path2class(path);
1302 
1303  allocator = rb_get_alloc_func(klass);
1304  if (st_lookup(compat_allocator_tbl, (st_data_t)allocator, &data)) {
1305  marshal_compat_t *compat = (marshal_compat_t*)data;
1306  VALUE real_obj = rb_obj_alloc(klass);
1307  VALUE obj = rb_obj_alloc(compat->oldclass);
1308  st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj);
1309  return obj;
1310  }
1311 
1312  return rb_obj_alloc(klass);
1313 }
1314 
1315 static VALUE
1316 r_object0(struct load_arg *arg, int *ivp, VALUE extmod)
1317 {
1318  VALUE v = Qnil;
1319  int type = r_byte(arg);
1320  long id;
1321  st_data_t link;
1322 
1323  switch (type) {
1324  case TYPE_LINK:
1325  id = r_long(arg);
1326  if (!st_lookup(arg->data, (st_data_t)id, &link)) {
1327  rb_raise(rb_eArgError, "dump format error (unlinked)");
1328  }
1329  v = (VALUE)link;
1330  if (arg->proc) {
1331  v = rb_funcall(arg->proc, s_call, 1, v);
1332  check_load_arg(arg, s_call);
1333  }
1334  break;
1335 
1336  case TYPE_IVAR:
1337  {
1338  int ivar = TRUE;
1339 
1340  v = r_object0(arg, &ivar, extmod);
1341  if (ivar) r_ivar(v, NULL, arg);
1342  }
1343  break;
1344 
1345  case TYPE_EXTENDED:
1346  {
1347  VALUE m = path2module(r_unique(arg));
1348 
1349  if (NIL_P(extmod)) extmod = rb_ary_new2(0);
1350  rb_ary_push(extmod, m);
1351 
1352  v = r_object0(arg, 0, extmod);
1353  while (RARRAY_LEN(extmod) > 0) {
1354  m = rb_ary_pop(extmod);
1355  rb_extend_object(v, m);
1356  }
1357  }
1358  break;
1359 
1360  case TYPE_UCLASS:
1361  {
1362  VALUE c = path2class(r_unique(arg));
1363 
1364  if (FL_TEST(c, FL_SINGLETON)) {
1365  rb_raise(rb_eTypeError, "singleton can't be loaded");
1366  }
1367  v = r_object0(arg, 0, extmod);
1368  if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
1369  format_error:
1370  rb_raise(rb_eArgError, "dump format error (user class)");
1371  }
1372  if (TYPE(v) == T_MODULE || !RTEST(rb_class_inherited_p(c, RBASIC(v)->klass))) {
1373  VALUE tmp = rb_obj_alloc(c);
1374 
1375  if (TYPE(v) != TYPE(tmp)) goto format_error;
1376  }
1377  RBASIC(v)->klass = c;
1378  }
1379  break;
1380 
1381  case TYPE_NIL:
1382  v = Qnil;
1383  v = r_leave(v, arg);
1384  break;
1385 
1386  case TYPE_TRUE:
1387  v = Qtrue;
1388  v = r_leave(v, arg);
1389  break;
1390 
1391  case TYPE_FALSE:
1392  v = Qfalse;
1393  v = r_leave(v, arg);
1394  break;
1395 
1396  case TYPE_FIXNUM:
1397  {
1398  long i = r_long(arg);
1399  v = LONG2FIX(i);
1400  }
1401  v = r_leave(v, arg);
1402  break;
1403 
1404  case TYPE_FLOAT:
1405  {
1406  double d;
1407  VALUE str = r_bytes(arg);
1408  const char *ptr = RSTRING_PTR(str);
1409 
1410  if (strcmp(ptr, "nan") == 0) {
1411  d = NAN;
1412  }
1413  else if (strcmp(ptr, "inf") == 0) {
1414  d = INFINITY;
1415  }
1416  else if (strcmp(ptr, "-inf") == 0) {
1417  d = -INFINITY;
1418  }
1419  else {
1420  char *e;
1421  d = strtod(ptr, &e);
1422  d = load_mantissa(d, e, RSTRING_LEN(str) - (e - ptr));
1423  }
1424  v = DBL2NUM(d);
1425  v = r_entry(v, arg);
1426  v = r_leave(v, arg);
1427  }
1428  break;
1429 
1430  case TYPE_BIGNUM:
1431  {
1432  long len;
1433  BDIGIT *digits;
1434  volatile VALUE data;
1435 
1436  NEWOBJ(big, struct RBignum);
1437  OBJSETUP(big, rb_cBignum, T_BIGNUM);
1438  RBIGNUM_SET_SIGN(big, (r_byte(arg) == '+'));
1439  len = r_long(arg);
1440  data = r_bytes0(len * 2, arg);
1441 #if SIZEOF_BDIGITS == SIZEOF_SHORT
1442  rb_big_resize((VALUE)big, len);
1443 #else
1444  rb_big_resize((VALUE)big, (len + 1) * 2 / sizeof(BDIGIT));
1445 #endif
1446  digits = RBIGNUM_DIGITS(big);
1447  MEMCPY(digits, RSTRING_PTR(data), char, len * 2);
1448 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1449  MEMZERO((char *)digits + len * 2, char,
1450  RBIGNUM_LEN(big) * sizeof(BDIGIT) - len * 2);
1451 #endif
1452  len = RBIGNUM_LEN(big);
1453  while (len > 0) {
1454  unsigned char *p = (unsigned char *)digits;
1455  BDIGIT num = 0;
1456 #if SIZEOF_BDIGITS > SIZEOF_SHORT
1457  int shift = 0;
1458  int i;
1459 
1460  for (i=0; i<SIZEOF_BDIGITS; i++) {
1461  num |= (int)p[i] << shift;
1462  shift += 8;
1463  }
1464 #else
1465  num = p[0] | (p[1] << 8);
1466 #endif
1467  *digits++ = num;
1468  len--;
1469  }
1470  v = rb_big_norm((VALUE)big);
1471  v = r_entry(v, arg);
1472  v = r_leave(v, arg);
1473  }
1474  break;
1475 
1476  case TYPE_STRING:
1477  v = r_entry(r_string(arg), arg);
1478  v = r_leave(v, arg);
1479  break;
1480 
1481  case TYPE_REGEXP:
1482  {
1483  volatile VALUE str = r_bytes(arg);
1484  int options = r_byte(arg);
1485  int has_encoding = FALSE;
1486  st_index_t idx = r_prepare(arg);
1487 
1488  if (ivp) {
1489  r_ivar(str, &has_encoding, arg);
1490  *ivp = FALSE;
1491  }
1492  if (!has_encoding) {
1493  /* 1.8 compatibility; remove escapes undefined in 1.8 */
1494  char *ptr = RSTRING_PTR(str), *dst = ptr, *src = ptr;
1495  long len = RSTRING_LEN(str);
1496  long bs = 0;
1497  for (; len-- > 0; *dst++ = *src++) {
1498  switch (*src) {
1499  case '\\': bs++; break;
1500  case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1501  case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
1502  case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
1503  case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
1504  case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
1505  if (bs & 1) --dst;
1506  default: bs = 0; break;
1507  }
1508  }
1509  rb_str_set_len(str, dst - ptr);
1510  }
1511  v = r_entry0(rb_reg_new_str(str, options), idx, arg);
1512  v = r_leave(v, arg);
1513  }
1514  break;
1515 
1516  case TYPE_ARRAY:
1517  {
1518  volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */
1519 
1520  v = rb_ary_new2(len);
1521  v = r_entry(v, arg);
1522  while (len--) {
1523  rb_ary_push(v, r_object(arg));
1524  }
1525  v = r_leave(v, arg);
1526  }
1527  break;
1528 
1529  case TYPE_HASH:
1530  case TYPE_HASH_DEF:
1531  {
1532  long len = r_long(arg);
1533 
1534  v = rb_hash_new();
1535  v = r_entry(v, arg);
1536  while (len--) {
1537  VALUE key = r_object(arg);
1538  VALUE value = r_object(arg);
1539  rb_hash_aset(v, key, value);
1540  }
1541  if (type == TYPE_HASH_DEF) {
1542  RHASH_IFNONE(v) = r_object(arg);
1543  }
1544  v = r_leave(v, arg);
1545  }
1546  break;
1547 
1548  case TYPE_STRUCT:
1549  {
1550  VALUE mem, values;
1551  volatile long i; /* gcc 2.7.2.3 -O2 bug?? */
1552  ID slot;
1553  st_index_t idx = r_prepare(arg);
1554  VALUE klass = path2class(r_unique(arg));
1555  long len = r_long(arg);
1556 
1557  v = rb_obj_alloc(klass);
1558  if (TYPE(v) != T_STRUCT) {
1559  rb_raise(rb_eTypeError, "class %s not a struct", rb_class2name(klass));
1560  }
1561  mem = rb_struct_s_members(klass);
1562  if (RARRAY_LEN(mem) != len) {
1563  rb_raise(rb_eTypeError, "struct %s not compatible (struct size differs)",
1564  rb_class2name(klass));
1565  }
1566 
1567  v = r_entry0(v, idx, arg);
1568  values = rb_ary_new2(len);
1569  for (i=0; i<len; i++) {
1570  slot = r_symbol(arg);
1571 
1572  if (RARRAY_PTR(mem)[i] != ID2SYM(slot)) {
1573  rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
1574  rb_class2name(klass),
1575  rb_id2name(slot),
1576  rb_id2name(SYM2ID(RARRAY_PTR(mem)[i])));
1577  }
1578  rb_ary_push(values, r_object(arg));
1579  }
1580  rb_struct_initialize(v, values);
1581  v = r_leave(v, arg);
1582  }
1583  break;
1584 
1585  case TYPE_USERDEF:
1586  {
1587  VALUE klass = path2class(r_unique(arg));
1588  VALUE data;
1589 
1590  if (!rb_respond_to(klass, s_load)) {
1591  rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
1592  rb_class2name(klass));
1593  }
1594  data = r_string(arg);
1595  if (ivp) {
1596  r_ivar(data, NULL, arg);
1597  *ivp = FALSE;
1598  }
1599  v = rb_funcall(klass, s_load, 1, data);
1600  check_load_arg(arg, s_load);
1601  v = r_entry(v, arg);
1602  v = r_leave(v, arg);
1603  }
1604  break;
1605 
1606  case TYPE_USRMARSHAL:
1607  {
1608  VALUE klass = path2class(r_unique(arg));
1609  VALUE data;
1610 
1611  v = rb_obj_alloc(klass);
1612  if (!NIL_P(extmod)) {
1613  while (RARRAY_LEN(extmod) > 0) {
1614  VALUE m = rb_ary_pop(extmod);
1615  rb_extend_object(v, m);
1616  }
1617  }
1618  if (!rb_respond_to(v, s_mload)) {
1619  rb_raise(rb_eTypeError, "instance of %s needs to have method `marshal_load'",
1620  rb_class2name(klass));
1621  }
1622  v = r_entry(v, arg);
1623  data = r_object(arg);
1624  rb_funcall(v, s_mload, 1, data);
1625  check_load_arg(arg, s_mload);
1626  v = r_leave(v, arg);
1627  }
1628  break;
1629 
1630  case TYPE_OBJECT:
1631  {
1632  st_index_t idx = r_prepare(arg);
1633  v = obj_alloc_by_path(r_unique(arg), arg);
1634  if (TYPE(v) != T_OBJECT) {
1635  rb_raise(rb_eArgError, "dump format error");
1636  }
1637  v = r_entry0(v, idx, arg);
1638  r_ivar(v, NULL, arg);
1639  v = r_leave(v, arg);
1640  }
1641  break;
1642 
1643  case TYPE_DATA:
1644  {
1645  VALUE klass = path2class(r_unique(arg));
1646  if (rb_respond_to(klass, s_alloc)) {
1647  static int warn = TRUE;
1648  if (warn) {
1649  rb_warn("define `allocate' instead of `_alloc'");
1650  warn = FALSE;
1651  }
1652  v = rb_funcall(klass, s_alloc, 0);
1653  check_load_arg(arg, s_alloc);
1654  }
1655  else {
1656  v = rb_obj_alloc(klass);
1657  }
1658  if (TYPE(v) != T_DATA) {
1659  rb_raise(rb_eArgError, "dump format error");
1660  }
1661  v = r_entry(v, arg);
1662  if (!rb_respond_to(v, s_load_data)) {
1664  "class %s needs to have instance method `_load_data'",
1665  rb_class2name(klass));
1666  }
1667  rb_funcall(v, s_load_data, 1, r_object0(arg, 0, extmod));
1669  v = r_leave(v, arg);
1670  }
1671  break;
1672 
1673  case TYPE_MODULE_OLD:
1674  {
1675  volatile VALUE str = r_bytes(arg);
1676 
1677  v = rb_path_to_class(str);
1678  v = r_entry(v, arg);
1679  v = r_leave(v, arg);
1680  }
1681  break;
1682 
1683  case TYPE_CLASS:
1684  {
1685  volatile VALUE str = r_bytes(arg);
1686 
1687  v = path2class(str);
1688  v = r_entry(v, arg);
1689  v = r_leave(v, arg);
1690  }
1691  break;
1692 
1693  case TYPE_MODULE:
1694  {
1695  volatile VALUE str = r_bytes(arg);
1696 
1697  v = path2module(str);
1698  v = r_entry(v, arg);
1699  v = r_leave(v, arg);
1700  }
1701  break;
1702 
1703  case TYPE_SYMBOL:
1704  if (ivp) {
1705  v = ID2SYM(r_symreal(arg, *ivp));
1706  *ivp = FALSE;
1707  }
1708  else {
1709  v = ID2SYM(r_symreal(arg, 0));
1710  }
1711  v = r_leave(v, arg);
1712  break;
1713 
1714  case TYPE_SYMLINK:
1715  v = ID2SYM(r_symlink(arg));
1716  break;
1717 
1718  default:
1719  rb_raise(rb_eArgError, "dump format error(0x%x)", type);
1720  break;
1721  }
1722  return v;
1723 }
1724 
1725 static VALUE
1727 {
1728  return r_object0(arg, 0, Qnil);
1729 }
1730 
1731 static void
1733 {
1734  if (!arg->symbols) return;
1735  st_free_table(arg->symbols);
1736  arg->symbols = 0;
1737  st_free_table(arg->data);
1738  arg->data = 0;
1739  st_free_table(arg->compat_tbl);
1740  arg->compat_tbl = 0;
1741 }
1742 
1743 /*
1744  * call-seq:
1745  * load( source [, proc] ) -> obj
1746  * restore( source [, proc] ) -> obj
1747  *
1748  * Returns the result of converting the serialized data in source into a
1749  * Ruby object (possibly with associated subordinate objects). source
1750  * may be either an instance of IO or an object that responds to
1751  * to_str. If proc is specified, it will be passed each object as it
1752  * is deserialized.
1753  */
1754 static VALUE
1756 {
1757  VALUE port, proc;
1758  int major, minor, infection = 0;
1759  VALUE v;
1760  volatile VALUE wrapper;
1761  struct load_arg *arg;
1762 
1763  rb_scan_args(argc, argv, "11", &port, &proc);
1764  v = rb_check_string_type(port);
1765  if (!NIL_P(v)) {
1766  infection = (int)FL_TEST(port, MARSHAL_INFECTION); /* original taintedness */
1767  port = v;
1768  }
1769  else if (rb_respond_to(port, s_getbyte) && rb_respond_to(port, s_read)) {
1770  if (rb_respond_to(port, s_binmode)) {
1771  rb_funcall2(port, s_binmode, 0, 0);
1772  }
1773  infection = (int)(FL_TAINT | FL_TEST(port, FL_UNTRUSTED));
1774  }
1775  else {
1776  rb_raise(rb_eTypeError, "instance of IO needed");
1777  }
1778  wrapper = TypedData_Make_Struct(rb_cData, struct load_arg, &load_arg_data, arg);
1779  arg->infection = infection;
1780  arg->src = port;
1781  arg->offset = 0;
1782  arg->symbols = st_init_numtable();
1783  arg->data = st_init_numtable();
1784  arg->compat_tbl = st_init_numtable();
1785  arg->proc = 0;
1786 
1787  major = r_byte(arg);
1788  minor = r_byte(arg);
1789  if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
1790  clear_load_arg(arg);
1791  rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
1792 \tformat version %d.%d required; %d.%d given",
1793  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
1794  }
1795  if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
1796  rb_warn("incompatible marshal file format (can be read)\n\
1797 \tformat version %d.%d required; %d.%d given",
1798  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
1799  }
1800 
1801  if (!NIL_P(proc)) arg->proc = proc;
1802  v = r_object(arg);
1803  clear_load_arg(arg);
1804  RB_GC_GUARD(wrapper);
1805 
1806  return v;
1807 }
1808 
1809 /*
1810  * The marshaling library converts collections of Ruby objects into a
1811  * byte stream, allowing them to be stored outside the currently
1812  * active script. This data may subsequently be read and the original
1813  * objects reconstituted.
1814  *
1815  * Marshaled data has major and minor version numbers stored along
1816  * with the object information. In normal use, marshaling can only
1817  * load data written with the same major version number and an equal
1818  * or lower minor version number. If Ruby's ``verbose'' flag is set
1819  * (normally using -d, -v, -w, or --verbose) the major and minor
1820  * numbers must match exactly. Marshal versioning is independent of
1821  * Ruby's version numbers. You can extract the version by reading the
1822  * first two bytes of marshaled data.
1823  *
1824  * str = Marshal.dump("thing")
1825  * RUBY_VERSION #=> "1.9.0"
1826  * str[0].ord #=> 4
1827  * str[1].ord #=> 8
1828  *
1829  * Some objects cannot be dumped: if the objects to be dumped include
1830  * bindings, procedure or method objects, instances of class IO, or
1831  * singleton objects, a TypeError will be raised.
1832  *
1833  * If your class has special serialization needs (for example, if you
1834  * want to serialize in some specific format), or if it contains
1835  * objects that would otherwise not be serializable, you can implement
1836  * your own serialization strategy.
1837  *
1838  * There are two methods of doing this, your object can define either
1839  * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
1840  * precedence over _dump if both are defined. marshal_dump may result in
1841  * smaller Marshal strings.
1842  *
1843  * == marshal_dump and marshal_load
1844  *
1845  * When dumping an object the method marshal_dump will be called.
1846  * marshal_dump must return a result containing the information necessary for
1847  * marshal_load to reconstitute the object. The result can be any object.
1848  *
1849  * When loading an object dumped using marshal_dump the object is first
1850  * allocated then marshal_load is called with the result from marshal_dump.
1851  * marshal_load must recreate the object from the information in the result.
1852  *
1853  * Example:
1854  *
1855  * class MyObj
1856  * def initialize name, version, data
1857  * @name = name
1858  * @version = version
1859  * @data = data
1860  * end
1861  *
1862  * def marshal_dump
1863  * [@name, @version]
1864  * end
1865  *
1866  * def marshal_load array
1867  * @name, @version = array
1868  * end
1869  * end
1870  *
1871  * == _dump and _load
1872  *
1873  * Use _dump and _load when you need to allocate the object you're restoring
1874  * yourself.
1875  *
1876  * When dumping an object the instance method _dump is called with an Integer
1877  * which indicates the maximum depth of objects to dump (a value of -1 implies
1878  * that you should disable depth checking). _dump must return a String
1879  * containing the information necessary to reconstitute the object.
1880  *
1881  * The class method _load should take a String and use it to return an object
1882  * of the same class.
1883  *
1884  * Example:
1885  *
1886  * class MyObj
1887  * def initialize name, version, data
1888  * @name = name
1889  * @version = version
1890  * @data = data
1891  * end
1892  *
1893  * def _dump level
1894  * [@name, @version].join ':'
1895  * end
1896  *
1897  * def self._load args
1898  * new(*args.split(':'))
1899  * end
1900  * end
1901  *
1902  * Since Marhsal.dump outputs a string you can have _dump return a Marshal
1903  * string which is Marshal.loaded in _load for complex objects.
1904  */
1905 void
1907 {
1908 #undef rb_intern
1909 #define rb_intern(str) rb_intern_const(str)
1910 
1911  VALUE rb_mMarshal = rb_define_module("Marshal");
1912 
1913  s_dump = rb_intern("_dump");
1914  s_load = rb_intern("_load");
1915  s_mdump = rb_intern("marshal_dump");
1916  s_mload = rb_intern("marshal_load");
1917  s_dump_data = rb_intern("_dump_data");
1918  s_load_data = rb_intern("_load_data");
1919  s_alloc = rb_intern("_alloc");
1920  s_call = rb_intern("call");
1921  s_getbyte = rb_intern("getbyte");
1922  s_read = rb_intern("read");
1923  s_write = rb_intern("write");
1924  s_binmode = rb_intern("binmode");
1925 
1926  rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
1927  rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
1928  rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
1929 
1930  rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
1931  rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
1932 
1933  compat_allocator_tbl = st_init_numtable();
1935  Data_Wrap_Struct(rb_cData, mark_marshal_compat_t, 0, compat_allocator_tbl);
1937 }
1938 
1939 VALUE
1941 {
1942  int argc = 1;
1943  VALUE argv[2];
1944 
1945  argv[0] = obj;
1946  argv[1] = port;
1947  if (!NIL_P(port)) argc = 2;
1948  return marshal_dump(argc, argv);
1949 }
1950 
1951 VALUE
1953 {
1954  return marshal_load(1, &port);
1955 }
1956