Ruby  2.0.0p648(2015-12-16revision53162)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: usa $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #ifndef NO_LOCALE_CHARMAP
18 #ifdef __CYGWIN__
19 #include <windows.h>
20 #endif
21 #ifdef HAVE_LANGINFO_H
22 #include <langinfo.h>
23 #endif
24 #endif
25 #include "ruby/util.h"
26 
27 #if defined __GNUC__ && __GNUC__ >= 4
28 #pragma GCC visibility push(default)
29 int rb_enc_register(const char *name, rb_encoding *encoding);
30 void rb_enc_set_base(const char *name, const char *orig);
31 void rb_encdb_declare(const char *name);
32 int rb_encdb_replicate(const char *name, const char *orig);
33 int rb_encdb_dummy(const char *name);
34 int rb_encdb_alias(const char *alias, const char *orig);
35 void rb_encdb_set_unicode(int index);
36 #pragma GCC visibility pop
37 #endif
38 
39 static ID id_encoding;
42 
44  const char *name;
47 };
48 
49 static struct {
51  int count;
52  int size;
54 } enc_table;
55 
56 void rb_enc_init(void);
57 
58 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
59 #define UNSPECIFIED_ENCODING INT_MAX
60 
61 #define ENCODING_NAMELEN_MAX 63
62 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
63 
64 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
65 
66 static int load_encoding(const char *name);
67 
68 static size_t
69 enc_memsize(const void *p)
70 {
71  return 0;
72 }
73 
75  "encoding",
76  {0, 0, enc_memsize,},
77 };
78 
79 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
80 
81 static VALUE
82 enc_new(rb_encoding *encoding)
83 {
84  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
85 }
86 
87 static VALUE
89 {
90  VALUE list, enc;
91 
92  if (!(list = rb_encoding_list)) {
93  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
94  }
95  enc = rb_ary_entry(list, idx);
96  if (NIL_P(enc)) {
97  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
98  }
99  return enc;
100 }
101 
102 VALUE
104 {
105  int idx;
106  if (!encoding) return Qnil;
107  idx = ENC_TO_ENCINDEX(encoding);
108  return rb_enc_from_encoding_index(idx);
109 }
110 
111 static int enc_autoload(rb_encoding *);
112 
113 static int
115 {
116  int index = rb_enc_to_index(enc);
117  if (rb_enc_from_index(index) != enc)
118  return -1;
119  if (enc_autoload_p(enc)) {
120  index = enc_autoload(enc);
121  }
122  return index;
123 }
124 
125 static int
127 {
128  if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
129  return -1;
130  }
131  return check_encoding(RDATA(obj)->data);
132 }
133 
134 static int
136 {
137  int index = enc_check_encoding(enc);
138  if (index < 0) {
139  rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
140  rb_obj_classname(enc));
141  }
142  return index;
143 }
144 
145 int
147 {
148  int idx;
149 
150  idx = enc_check_encoding(enc);
151  if (idx >= 0) {
152  return idx;
153  }
154  else if (NIL_P(enc = rb_check_string_type(enc))) {
155  return -1;
156  }
157  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
158  return -1;
159  }
160  return rb_enc_find_index(StringValueCStr(enc));
161 }
162 
163 /* Returns encoding index or UNSPECIFIED_ENCODING */
164 static int
166 {
167  int idx;
168 
169  StringValue(enc);
170  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
171  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
172  }
174  return idx;
175 }
176 
177 static int
179 {
180  int idx = str_find_encindex(enc);
181  if (idx < 0) {
182  rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
183  }
184  return idx;
185 }
186 
187 static rb_encoding *
189 {
190  return rb_enc_from_index(str_to_encindex(enc));
191 }
192 
193 rb_encoding *
195 {
196  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
197  return str_to_encoding(enc);
198 }
199 
200 rb_encoding *
202 {
203  int idx;
204  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
205  idx = str_find_encindex(enc);
206  if (idx < 0) return NULL;
207  return rb_enc_from_index(idx);
208 }
209 
210 void
212 {
213 }
214 
215 static int
216 enc_table_expand(int newsize)
217 {
218  struct rb_encoding_entry *ent;
219  int count = newsize;
220 
221  if (enc_table.size >= newsize) return newsize;
222  newsize = (newsize + 7) / 8 * 8;
223  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
224  if (!ent) return -1;
225  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
226  enc_table.list = ent;
227  enc_table.size = newsize;
228  return count;
229 }
230 
231 static int
232 enc_register_at(int index, const char *name, rb_encoding *encoding)
233 {
234  struct rb_encoding_entry *ent = &enc_table.list[index];
235  VALUE list;
236 
237  if (!valid_encoding_name_p(name)) return -1;
238  if (!ent->name) {
239  ent->name = name = strdup(name);
240  }
241  else if (STRCASECMP(name, ent->name)) {
242  return -1;
243  }
244  if (!ent->enc) {
245  ent->enc = xmalloc(sizeof(rb_encoding));
246  }
247  if (encoding) {
248  *ent->enc = *encoding;
249  }
250  else {
251  memset(ent->enc, 0, sizeof(*ent->enc));
252  }
253  encoding = ent->enc;
254  encoding->name = name;
255  encoding->ruby_encoding_index = index;
256  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
257  list = rb_encoding_list;
258  if (list && NIL_P(rb_ary_entry(list, index))) {
259  /* initialize encoding data */
260  rb_ary_store(list, index, enc_new(encoding));
261  }
262  return index;
263 }
264 
265 static int
266 enc_register(const char *name, rb_encoding *encoding)
267 {
268  int index = enc_table.count;
269 
270  if ((index = enc_table_expand(index + 1)) < 0) return -1;
271  enc_table.count = index;
272  return enc_register_at(index - 1, name, encoding);
273 }
274 
275 static void set_encoding_const(const char *, rb_encoding *);
276 int rb_enc_registered(const char *name);
277 
278 int
279 rb_enc_register(const char *name, rb_encoding *encoding)
280 {
281  int index = rb_enc_registered(name);
282 
283  if (index >= 0) {
284  rb_encoding *oldenc = rb_enc_from_index(index);
285  if (STRCASECMP(name, rb_enc_name(oldenc))) {
286  index = enc_register(name, encoding);
287  }
288  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
289  enc_register_at(index, name, encoding);
290  }
291  else {
292  rb_raise(rb_eArgError, "encoding %s is already registered", name);
293  }
294  }
295  else {
296  index = enc_register(name, encoding);
298  }
299  return index;
300 }
301 
302 void
303 rb_encdb_declare(const char *name)
304 {
305  int idx = rb_enc_registered(name);
306  if (idx < 0) {
307  idx = enc_register(name, 0);
308  }
310 }
311 
312 static void
314 {
315  if (rb_enc_registered(name) >= 0) {
316  rb_raise(rb_eArgError, "encoding %s is already registered", name);
317  }
318 }
319 
320 static rb_encoding*
322 {
323  rb_encoding *enc = enc_table.list[index].enc;
324 
325  enc_table.list[index].base = base;
326  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
327  return enc;
328 }
329 
330 /* for encdb.h
331  * Set base encoding for encodings which are not replicas
332  * but not in their own files.
333  */
334 void
335 rb_enc_set_base(const char *name, const char *orig)
336 {
337  int idx = rb_enc_registered(name);
338  int origidx = rb_enc_registered(orig);
339  set_base_encoding(idx, rb_enc_from_index(origidx));
340 }
341 
342 int
343 rb_enc_replicate(const char *name, rb_encoding *encoding)
344 {
345  int idx;
346 
347  enc_check_duplication(name);
348  idx = enc_register(name, encoding);
349  set_base_encoding(idx, encoding);
351  return idx;
352 }
353 
354 /*
355  * call-seq:
356  * enc.replicate(name) -> encoding
357  *
358  * Returns a replicated encoding of _enc_ whose name is _name_.
359  * The new encoding should have the same byte structure of _enc_.
360  * If _name_ is used by another encoding, raise ArgumentError.
361  *
362  */
363 static VALUE
365 {
368  rb_to_encoding(encoding)));
369 }
370 
371 static int
372 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
373 {
374  if (idx < 0) {
375  idx = enc_register(name, origenc);
376  }
377  else {
378  idx = enc_register_at(idx, name, origenc);
379  }
380  if (idx >= 0) {
381  set_base_encoding(idx, origenc);
383  }
384  return idx;
385 }
386 
387 int
388 rb_encdb_replicate(const char *name, const char *orig)
389 {
390  int origidx = rb_enc_registered(orig);
391  int idx = rb_enc_registered(name);
392 
393  if (origidx < 0) {
394  origidx = enc_register(orig, 0);
395  }
396  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
397 }
398 
399 int
401 {
402  int index = rb_enc_replicate(name, rb_ascii8bit_encoding());
403  rb_encoding *enc = enc_table.list[index].enc;
404 
405  ENC_SET_DUMMY(enc);
406  return index;
407 }
408 
409 int
410 rb_encdb_dummy(const char *name)
411 {
413  rb_enc_registered(name));
414  rb_encoding *enc = enc_table.list[index].enc;
415 
416  ENC_SET_DUMMY(enc);
417  return index;
418 }
419 
420 /*
421  * call-seq:
422  * enc.dummy? -> true or false
423  *
424  * Returns true for dummy encodings.
425  * A dummy encoding is an encoding for which character handling is not properly
426  * implemented.
427  * It is used for stateful encodings.
428  *
429  * Encoding::ISO_2022_JP.dummy? #=> true
430  * Encoding::UTF_8.dummy? #=> false
431  *
432  */
433 static VALUE
435 {
436  return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
437 }
438 
439 /*
440  * call-seq:
441  * enc.ascii_compatible? -> true or false
442  *
443  * Returns whether ASCII-compatible or not.
444  *
445  * Encoding::UTF_8.ascii_compatible? #=> true
446  * Encoding::UTF_16BE.ascii_compatible? #=> false
447  *
448  */
449 static VALUE
451 {
452  return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
453 }
454 
455 /*
456  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
457  */
458 int
460 {
461  return ONIGENC_IS_UNICODE(enc);
462 }
463 
464 static st_data_t
466 {
467  return (st_data_t)strdup((const char *)name);
468 }
469 
470 /*
471  * Returns copied alias name when the key is added for st_table,
472  * else returns NULL.
473  */
474 static int
475 enc_alias_internal(const char *alias, int idx)
476 {
477  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
478  enc_dup_name);
479 }
480 
481 static int
482 enc_alias(const char *alias, int idx)
483 {
484  if (!valid_encoding_name_p(alias)) return -1;
485  if (!enc_alias_internal(alias, idx))
487  return idx;
488 }
489 
490 int
491 rb_enc_alias(const char *alias, const char *orig)
492 {
493  int idx;
494 
495  enc_check_duplication(alias);
496  if (!enc_table.list) {
497  rb_enc_init();
498  }
499  if ((idx = rb_enc_find_index(orig)) < 0) {
500  return -1;
501  }
502  return enc_alias(alias, idx);
503 }
504 
505 int
506 rb_encdb_alias(const char *alias, const char *orig)
507 {
508  int idx = rb_enc_registered(orig);
509 
510  if (idx < 0) {
511  idx = enc_register(orig, 0);
512  }
513  return enc_alias(alias, idx);
514 }
515 
516 void
518 {
520 }
521 
522 enum {
527 };
528 
531 
532 void
534 {
536  if (!enc_table.names) {
538  }
539 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
542  ENC_REGISTER(US_ASCII);
543 #undef ENC_REGISTER
545 }
546 
547 rb_encoding *
549 {
550  if (!enc_table.list) {
551  rb_enc_init();
552  }
553  if (index < 0 || enc_table.count <= index) {
554  return 0;
555  }
556  return enc_table.list[index].enc;
557 }
558 
559 int
561 {
562  st_data_t idx = 0;
563 
564  if (!name) return -1;
565  if (!enc_table.list) return -1;
566  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
567  return (int)idx;
568  }
569  return -1;
570 }
571 
572 static VALUE
574 {
575  int safe = rb_safe_level();
576  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
577 }
578 
579 static int
580 load_encoding(const char *name)
581 {
582  VALUE enclib = rb_sprintf("enc/%s.so", name);
583  VALUE verbose = ruby_verbose;
585  VALUE errinfo;
586  VALUE loaded;
587  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
588  int idx;
589 
590  while (s < e) {
591  if (!ISALNUM(*s)) *s = '_';
592  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
593  ++s;
594  }
595  FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
596  OBJ_FREEZE(enclib);
598  ruby_debug = Qfalse;
599  errinfo = rb_errinfo();
600  loaded = rb_protect(require_enc, enclib, 0);
601  ruby_verbose = verbose;
602  ruby_debug = debug;
603  rb_set_errinfo(errinfo);
604  if (NIL_P(loaded)) return -1;
605  if ((idx = rb_enc_registered(name)) < 0) return -1;
606  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
607  return idx;
608 }
609 
610 static int
612 {
613  int i;
614  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
615 
616  if (base) {
617  i = 0;
618  do {
619  if (i >= enc_table.count) return -1;
620  } while (enc_table.list[i].enc != base && (++i, 1));
621  if (enc_autoload_p(base)) {
622  if (enc_autoload(base) < 0) return -1;
623  }
624  i = ENC_TO_ENCINDEX(enc);
625  enc_register_at(i, rb_enc_name(enc), base);
626  }
627  else {
628  i = load_encoding(rb_enc_name(enc));
629  }
630  return i;
631 }
632 
633 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
634 int
636 {
637  int i = rb_enc_registered(name);
638  rb_encoding *enc;
639 
640  if (i < 0) {
641  i = load_encoding(name);
642  }
643  else if (!(enc = rb_enc_from_index(i))) {
644  if (i != UNSPECIFIED_ENCODING) {
645  rb_raise(rb_eArgError, "encoding %s is not registered", name);
646  }
647  }
648  else if (enc_autoload_p(enc)) {
649  if (enc_autoload(enc) < 0) {
650  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
651  name);
652  return 0;
653  }
654  }
655  return i;
656 }
657 
658 rb_encoding *
659 rb_enc_find(const char *name)
660 {
661  int idx = rb_enc_find_index(name);
662  if (idx < 0) idx = 0;
663  return rb_enc_from_index(idx);
664 }
665 
666 static inline int
668 {
669  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
670  switch (BUILTIN_TYPE(obj)) {
671  case T_STRING:
672  case T_REGEXP:
673  case T_FILE:
674  return TRUE;
675  case T_DATA:
676  if (is_data_encoding(obj)) return TRUE;
677  default:
678  return FALSE;
679  }
680 }
681 
682 ID
684 {
685  CONST_ID(id_encoding, "encoding");
686  return id_encoding;
687 }
688 
689 int
691 {
692  int i = -1;
693  VALUE tmp;
694 
695  if (SPECIAL_CONST_P(obj)) {
696  if (!SYMBOL_P(obj)) return -1;
697  obj = rb_id2str(SYM2ID(obj));
698  }
699  switch (BUILTIN_TYPE(obj)) {
700  as_default:
701  default:
702  case T_STRING:
703  case T_REGEXP:
704  i = ENCODING_GET_INLINED(obj);
705  if (i == ENCODING_INLINE_MAX) {
706  VALUE iv;
707 
708  iv = rb_ivar_get(obj, rb_id_encoding());
709  i = NUM2INT(iv);
710  }
711  break;
712  case T_FILE:
713  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
714  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
715  else obj = tmp;
716  if (NIL_P(obj)) break;
717  case T_DATA:
718  if (is_data_encoding(obj)) {
719  i = enc_check_encoding(obj);
720  }
721  else {
722  goto as_default;
723  }
724  break;
725  }
726  return i;
727 }
728 
729 static void
730 enc_set_index(VALUE obj, int idx)
731 {
732  if (idx < ENCODING_INLINE_MAX) {
733  ENCODING_SET_INLINED(obj, idx);
734  return;
735  }
737  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
738 }
739 
740 void
741 rb_enc_set_index(VALUE obj, int idx)
742 {
743  rb_check_frozen(obj);
744  enc_set_index(obj, idx);
745 }
746 
747 VALUE
749 {
750 /* enc_check_capable(obj);*/
751  rb_check_frozen(obj);
752  if (rb_enc_get_index(obj) == idx)
753  return obj;
754  if (SPECIAL_CONST_P(obj)) {
755  rb_raise(rb_eArgError, "cannot set encoding");
756  }
757  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
759  ENC_CODERANGE_CLEAR(obj);
760  }
761  enc_set_index(obj, idx);
762  return obj;
763 }
764 
765 VALUE
767 {
768  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
769 }
770 
773 {
774  return rb_enc_from_index(rb_enc_get_index(obj));
775 }
776 
779 {
780  rb_encoding *enc = rb_enc_compatible(str1, str2);
781  if (!enc)
782  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
783  rb_enc_name(rb_enc_get(str1)),
784  rb_enc_name(rb_enc_get(str2)));
785  return enc;
786 }
787 
790 {
791  int idx1, idx2;
792  rb_encoding *enc1, *enc2;
793  int isstr1, isstr2;
794 
795  idx1 = rb_enc_get_index(str1);
796  idx2 = rb_enc_get_index(str2);
797 
798  if (idx1 < 0 || idx2 < 0)
799  return 0;
800 
801  if (idx1 == idx2) {
802  return rb_enc_from_index(idx1);
803  }
804  enc1 = rb_enc_from_index(idx1);
805  enc2 = rb_enc_from_index(idx2);
806 
807  isstr2 = RB_TYPE_P(str2, T_STRING);
808  if (isstr2 && RSTRING_LEN(str2) == 0)
809  return enc1;
810  isstr1 = RB_TYPE_P(str1, T_STRING);
811  if (isstr1 && RSTRING_LEN(str1) == 0)
812  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
813  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
814  return 0;
815  }
816 
817  /* objects whose encoding is the same of contents */
818  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
819  return enc1;
820  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
821  return enc2;
822 
823  if (!isstr1) {
824  VALUE tmp = str1;
825  int idx0 = idx1;
826  str1 = str2;
827  str2 = tmp;
828  idx1 = idx2;
829  idx2 = idx0;
830  idx0 = isstr1;
831  isstr1 = isstr2;
832  isstr2 = idx0;
833  }
834  if (isstr1) {
835  int cr1, cr2;
836 
837  cr1 = rb_enc_str_coderange(str1);
838  if (isstr2) {
839  cr2 = rb_enc_str_coderange(str2);
840  if (cr1 != cr2) {
841  /* may need to handle ENC_CODERANGE_BROKEN */
842  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
843  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
844  }
845  if (cr2 == ENC_CODERANGE_7BIT) {
846  return enc1;
847  }
848  }
849  if (cr1 == ENC_CODERANGE_7BIT)
850  return enc2;
851  }
852  return 0;
853 }
854 
855 void
857 {
859 }
860 
861 
862 /*
863  * call-seq:
864  * obj.encoding -> encoding
865  *
866  * Returns the Encoding object that represents the encoding of obj.
867  */
868 
869 VALUE
871 {
872  int idx = rb_enc_get_index(obj);
873  if (idx < 0) {
874  rb_raise(rb_eTypeError, "unknown encoding");
875  }
876  return rb_enc_from_encoding_index(idx);
877 }
878 
879 int
880 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
881 {
882  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
883 }
884 
885 int
886 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
887 {
888  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
889  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
890  return MBCLEN_CHARFOUND_LEN(n);
891  else {
892  int min = rb_enc_mbminlen(enc);
893  return min <= e-p ? min : (int)(e-p);
894  }
895 }
896 
897 int
898 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
899 {
900  int n;
901  if (e <= p)
903  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
904  if (e-p < n)
905  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
906  return n;
907 }
908 
909 int
910 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
911 {
912  unsigned int c, l;
913  if (e <= p)
914  return -1;
915  if (rb_enc_asciicompat(enc)) {
916  c = (unsigned char)*p;
917  if (!ISASCII(c))
918  return -1;
919  if (len) *len = 1;
920  return c;
921  }
922  l = rb_enc_precise_mbclen(p, e, enc);
923  if (!MBCLEN_CHARFOUND_P(l))
924  return -1;
925  c = rb_enc_mbc_to_codepoint(p, e, enc);
926  if (!rb_enc_isascii(c, enc))
927  return -1;
928  if (len) *len = l;
929  return c;
930 }
931 
932 unsigned int
933 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
934 {
935  int r;
936  if (e <= p)
937  rb_raise(rb_eArgError, "empty string");
938  r = rb_enc_precise_mbclen(p, e, enc);
939  if (!MBCLEN_CHARFOUND_P(r)) {
940  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
941  }
942  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
943  return rb_enc_mbc_to_codepoint(p, e, enc);
944 }
945 
946 #undef rb_enc_codepoint
947 unsigned int
948 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
949 {
950  return rb_enc_codepoint_len(p, e, 0, enc);
951 }
952 
953 int
955 {
956  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
957  if (n == 0) {
958  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
959  }
960  return n;
961 }
962 
963 int
965 {
967 }
968 
969 int
971 {
973 }
974 
975 /*
976  * call-seq:
977  * enc.inspect -> string
978  *
979  * Returns a string which represents the encoding for programmers.
980  *
981  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
982  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
983  */
984 static VALUE
986 {
987  VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
989  (enc_dummy_p(self) ? " (dummy)" : ""));
991  return str;
992 }
993 
994 /*
995  * call-seq:
996  * enc.name -> string
997  *
998  * Returns the name of the encoding.
999  *
1000  * Encoding::UTF_8.name #=> "UTF-8"
1001  */
1002 static VALUE
1004 {
1006 }
1007 
1008 static int
1010 {
1011  VALUE *arg = (VALUE *)args;
1012 
1013  if ((int)idx == (int)arg[0]) {
1014  VALUE str = rb_usascii_str_new2((char *)name);
1015  OBJ_FREEZE(str);
1016  rb_ary_push(arg[1], str);
1017  }
1018  return ST_CONTINUE;
1019 }
1020 
1021 /*
1022  * call-seq:
1023  * enc.names -> array
1024  *
1025  * Returns the list of name and aliases of the encoding.
1026  *
1027  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1028  */
1029 static VALUE
1031 {
1032  VALUE args[2];
1033 
1034  args[0] = (VALUE)rb_to_encoding_index(self);
1035  args[1] = rb_ary_new2(0);
1036  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1037  return args[1];
1038 }
1039 
1040 /*
1041  * call-seq:
1042  * Encoding.list -> [enc1, enc2, ...]
1043  *
1044  * Returns the list of loaded encodings.
1045  *
1046  * Encoding.list
1047  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1048  * #<Encoding:ISO-2022-JP (dummy)>]
1049  *
1050  * Encoding.find("US-ASCII")
1051  * #=> #<Encoding:US-ASCII>
1052  *
1053  * Encoding.list
1054  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1055  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1056  *
1057  */
1058 static VALUE
1060 {
1061  VALUE ary = rb_ary_new2(0);
1063  return ary;
1064 }
1065 
1066 /*
1067  * call-seq:
1068  * Encoding.find(string) -> enc
1069  *
1070  * Search the encoding with specified <i>name</i>.
1071  * <i>name</i> should be a string.
1072  *
1073  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1074  *
1075  * Names which this method accept are encoding names and aliases
1076  * including following special aliases
1077  *
1078  * "external":: default external encoding
1079  * "internal":: default internal encoding
1080  * "locale":: locale encoding
1081  * "filesystem":: filesystem encoding
1082  *
1083  * An ArgumentError is raised when no encoding with <i>name</i>.
1084  * Only <code>Encoding.find("internal")</code> however returns nil
1085  * when no encoding named "internal", in other words, when Ruby has no
1086  * default internal encoding.
1087  */
1088 static VALUE
1090 {
1091  int idx;
1092  if (RB_TYPE_P(enc, T_DATA) && is_data_encoding(enc))
1093  return enc;
1094  idx = str_to_encindex(enc);
1095  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1096  return rb_enc_from_encoding_index(idx);
1097 }
1098 
1099 /*
1100  * call-seq:
1101  * Encoding.compatible?(obj1, obj2) -> enc or nil
1102  *
1103  * Checks the compatibility of two objects.
1104  *
1105  * If the objects are both strings they are compatible when they are
1106  * concatenatable. The encoding of the concatenated string will be returned
1107  * if they are compatible, nil if they are not.
1108  *
1109  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1110  * #=> #<Encoding:ISO-8859-1>
1111  *
1112  * Encoding.compatible?(
1113  * "\xa1".force_encoding("iso-8859-1"),
1114  * "\xa1\xa1".force_encoding("euc-jp"))
1115  * #=> nil
1116  *
1117  * If the objects are non-strings their encodings are compatible when they
1118  * have an encoding and:
1119  * * Either encoding is US-ASCII compatible
1120  * * One of the encodings is a 7-bit encoding
1121  *
1122  */
1123 static VALUE
1124 enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
1125 {
1126  rb_encoding *enc;
1127 
1128  if (!enc_capable(str1)) return Qnil;
1129  if (!enc_capable(str2)) return Qnil;
1130  enc = rb_enc_compatible(str1, str2);
1131  if (!enc) return Qnil;
1132  return rb_enc_from_encoding(enc);
1133 }
1134 
1135 /* :nodoc: */
1136 static VALUE
1138 {
1139  rb_scan_args(argc, argv, "01", 0);
1140  return enc_name(self);
1141 }
1142 
1143 /* :nodoc: */
1144 static VALUE
1145 enc_load(VALUE klass, VALUE str)
1146 {
1147  return enc_find(klass, str);
1148 }
1149 
1150 rb_encoding *
1152 {
1153  if (!enc_table.list) {
1154  rb_enc_init();
1155  }
1156  return enc_table.list[ENCINDEX_ASCII].enc;
1157 }
1158 
1159 int
1161 {
1162  return ENCINDEX_ASCII;
1163 }
1164 
1165 rb_encoding *
1167 {
1168  if (!enc_table.list) {
1169  rb_enc_init();
1170  }
1171  return enc_table.list[ENCINDEX_UTF_8].enc;
1172 }
1173 
1174 int
1176 {
1177  return ENCINDEX_UTF_8;
1178 }
1179 
1180 rb_encoding *
1182 {
1183  if (!enc_table.list) {
1184  rb_enc_init();
1185  }
1186  return enc_table.list[ENCINDEX_US_ASCII].enc;
1187 }
1188 
1189 int
1191 {
1192  return ENCINDEX_US_ASCII;
1193 }
1194 
1195 int
1197 {
1199  int idx;
1200 
1201  if (NIL_P(charmap))
1202  idx = rb_usascii_encindex();
1203  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1204  idx = rb_ascii8bit_encindex();
1205 
1206  if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
1207 
1208  return idx;
1209 }
1210 
1211 rb_encoding *
1213 {
1215 }
1216 
1217 static int
1219 {
1220  int idx;
1221 #if defined NO_LOCALE_CHARMAP
1223 #elif defined _WIN32 || defined __CYGWIN__
1224  char cp[sizeof(int) * 8 / 3 + 4];
1225  snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
1226  idx = rb_enc_find_index(cp);
1227  if (idx < 0) idx = rb_ascii8bit_encindex();
1228 #else
1230 #endif
1231 
1232  enc_alias_internal("filesystem", idx);
1233  return idx;
1234 }
1235 
1236 int
1238 {
1239  int idx = rb_enc_registered("filesystem");
1240  if (idx < 0)
1241  idx = rb_ascii8bit_encindex();
1242  return idx;
1243 }
1244 
1245 rb_encoding *
1247 {
1249 }
1250 
1252  int index; /* -2 => not yet set, -1 => nil */
1254 };
1255 
1257 
1258 static int
1259 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1260 {
1261  int overridden = FALSE;
1262 
1263  if (def->index != -2)
1264  /* Already set */
1265  overridden = TRUE;
1266 
1267  if (NIL_P(encoding)) {
1268  def->index = -1;
1269  def->enc = 0;
1270  st_insert(enc_table.names, (st_data_t)strdup(name),
1272  }
1273  else {
1274  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1275  def->enc = 0;
1276  enc_alias_internal(name, def->index);
1277  }
1278 
1279  if (def == &default_external)
1281 
1282  return overridden;
1283 }
1284 
1285 rb_encoding *
1287 {
1288  if (default_external.enc) return default_external.enc;
1289 
1290  if (default_external.index >= 0) {
1291  default_external.enc = rb_enc_from_index(default_external.index);
1292  return default_external.enc;
1293  }
1294  else {
1295  return rb_locale_encoding();
1296  }
1297 }
1298 
1299 VALUE
1301 {
1303 }
1304 
1305 /*
1306  * call-seq:
1307  * Encoding.default_external -> enc
1308  *
1309  * Returns default external encoding.
1310  *
1311  * The default external encoding is used by default for strings created from
1312  * the following locations:
1313  *
1314  * * CSV
1315  * * File data read from disk
1316  * * SDBM
1317  * * StringIO
1318  * * Zlib::GzipReader
1319  * * Zlib::GzipWriter
1320  * * String#inspect
1321  * * Regexp#inspect
1322  *
1323  * While strings created from these locations will have this encoding, the
1324  * encoding may not be valid. Be sure to check String#valid_encoding?.
1325  *
1326  * File data written to disk will be transcoded to the default external
1327  * encoding when written.
1328  *
1329  * The default external encoding is initialized by the locale or -E option.
1330  */
1331 static VALUE
1333 {
1334  return rb_enc_default_external();
1335 }
1336 
1337 void
1339 {
1340  if (NIL_P(encoding)) {
1341  rb_raise(rb_eArgError, "default external can not be nil");
1342  }
1343  enc_set_default_encoding(&default_external, encoding,
1344  "external");
1345 }
1346 
1347 /*
1348  * call-seq:
1349  * Encoding.default_external = enc
1350  *
1351  * Sets default external encoding. You should not set
1352  * Encoding::default_external in ruby code as strings created before changing
1353  * the value may have a different encoding from strings created after the value
1354  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1355  * the correct default_external.
1356  *
1357  * See Encoding::default_external for information on how the default external
1358  * encoding is used.
1359  */
1360 static VALUE
1362 {
1363  rb_warning("setting Encoding.default_external");
1364  rb_enc_set_default_external(encoding);
1365  return encoding;
1366 }
1367 
1368 static struct default_encoding default_internal = {-2};
1369 
1370 rb_encoding *
1372 {
1373  if (!default_internal.enc && default_internal.index >= 0) {
1374  default_internal.enc = rb_enc_from_index(default_internal.index);
1375  }
1376  return default_internal.enc; /* can be NULL */
1377 }
1378 
1379 VALUE
1381 {
1382  /* Note: These functions cope with default_internal not being set */
1384 }
1385 
1386 /*
1387  * call-seq:
1388  * Encoding.default_internal -> enc
1389  *
1390  * Returns default internal encoding. Strings will be transcoded to the
1391  * default internal encoding in the following places if the default internal
1392  * encoding is not nil:
1393  *
1394  * * CSV
1395  * * Etc.sysconfdir and Etc.systmpdir
1396  * * File data read from disk
1397  * * File names from Dir
1398  * * Integer#chr
1399  * * String#inspect and Regexp#inspect
1400  * * Strings returned from Curses
1401  * * Strings returned from Readline
1402  * * Strings returned from SDBM
1403  * * Time#zone
1404  * * Values from ENV
1405  * * Values in ARGV including $PROGRAM_NAME
1406  * * __FILE__
1407  *
1408  * Additionally String#encode and String#encode! use the default internal
1409  * encoding if no encoding is given.
1410  *
1411  * The locale encoding (__ENCODING__), not default_internal, is used as the
1412  * encoding of created strings.
1413  *
1414  * Encoding::default_internal is initialized by the source file's
1415  * internal_encoding or -E option.
1416  */
1417 static VALUE
1419 {
1420  return rb_enc_default_internal();
1421 }
1422 
1423 void
1425 {
1426  enc_set_default_encoding(&default_internal, encoding,
1427  "internal");
1428 }
1429 
1430 /*
1431  * call-seq:
1432  * Encoding.default_internal = enc or nil
1433  *
1434  * Sets default internal encoding or removes default internal encoding when
1435  * passed nil. You should not set Encoding::default_internal in ruby code as
1436  * strings created before changing the value may have a different encoding
1437  * from strings created after the change. Instead you should use
1438  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1439  *
1440  * See Encoding::default_internal for information on how the default internal
1441  * encoding is used.
1442  */
1443 static VALUE
1445 {
1446  rb_warning("setting Encoding.default_internal");
1447  rb_enc_set_default_internal(encoding);
1448  return encoding;
1449 }
1450 
1451 /*
1452  * call-seq:
1453  * Encoding.locale_charmap -> string
1454  *
1455  * Returns the locale charmap name.
1456  * It returns nil if no appropriate information.
1457  *
1458  * Debian GNU/Linux
1459  * LANG=C
1460  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1461  * LANG=ja_JP.EUC-JP
1462  * Encoding.locale_charmap #=> "EUC-JP"
1463  *
1464  * SunOS 5
1465  * LANG=C
1466  * Encoding.locale_charmap #=> "646"
1467  * LANG=ja
1468  * Encoding.locale_charmap #=> "eucJP"
1469  *
1470  * The result is highly platform dependent.
1471  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1472  * If you need some encoding object even for unknown locale,
1473  * Encoding.find("locale") can be used.
1474  *
1475  */
1476 VALUE
1478 {
1479 #if defined NO_LOCALE_CHARMAP
1480  return rb_usascii_str_new2("ASCII-8BIT");
1481 #elif defined _WIN32 || defined __CYGWIN__
1482  const char *codeset = 0;
1483  char cp[sizeof(int) * 3 + 4];
1484 # ifdef __CYGWIN__
1485  const char *nl_langinfo_codeset(void);
1486  codeset = nl_langinfo_codeset();
1487 # endif
1488  if (!codeset) {
1489  UINT codepage = GetConsoleCP();
1490  if (!codepage) codepage = GetACP();
1491  snprintf(cp, sizeof(cp), "CP%d", codepage);
1492  codeset = cp;
1493  }
1494  return rb_usascii_str_new2(codeset);
1495 #elif defined HAVE_LANGINFO_H
1496  char *codeset;
1497  codeset = nl_langinfo(CODESET);
1498  return rb_usascii_str_new2(codeset);
1499 #else
1500  return Qnil;
1501 #endif
1502 }
1503 
1504 static void
1506 {
1507  VALUE encoding = rb_enc_from_encoding(enc);
1508  char *s = (char *)name;
1509  int haslower = 0, hasupper = 0, valid = 0;
1510 
1511  if (ISDIGIT(*s)) return;
1512  if (ISUPPER(*s)) {
1513  hasupper = 1;
1514  while (*++s && (ISALNUM(*s) || *s == '_')) {
1515  if (ISLOWER(*s)) haslower = 1;
1516  }
1517  }
1518  if (!*s) {
1519  if (s - name > ENCODING_NAMELEN_MAX) return;
1520  valid = 1;
1521  rb_define_const(rb_cEncoding, name, encoding);
1522  }
1523  if (!valid || haslower) {
1524  size_t len = s - name;
1525  if (len > ENCODING_NAMELEN_MAX) return;
1526  if (!haslower || !hasupper) {
1527  do {
1528  if (ISLOWER(*s)) haslower = 1;
1529  if (ISUPPER(*s)) hasupper = 1;
1530  } while (*++s && (!haslower || !hasupper));
1531  len = s - name;
1532  }
1533  len += strlen(s);
1534  if (len++ > ENCODING_NAMELEN_MAX) return;
1535  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1536  name = s;
1537  if (!valid) {
1538  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1539  for (; *s; ++s) {
1540  if (!ISALNUM(*s)) *s = '_';
1541  }
1542  if (hasupper) {
1543  rb_define_const(rb_cEncoding, name, encoding);
1544  }
1545  }
1546  if (haslower) {
1547  for (s = (char *)name; *s; ++s) {
1548  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1549  }
1550  rb_define_const(rb_cEncoding, name, encoding);
1551  }
1552  }
1553 }
1554 
1555 static int
1557 {
1558  VALUE ary = (VALUE)arg;
1559  VALUE str = rb_usascii_str_new2((char *)name);
1560  OBJ_FREEZE(str);
1561  rb_ary_push(ary, str);
1562  return ST_CONTINUE;
1563 }
1564 
1565 /*
1566  * call-seq:
1567  * Encoding.name_list -> ["enc1", "enc2", ...]
1568  *
1569  * Returns the list of available encoding names.
1570  *
1571  * Encoding.name_list
1572  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1573  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1574  * "Windows-31J",
1575  * "BINARY", "CP932", "eucJP"]
1576  *
1577  */
1578 
1579 static VALUE
1581 {
1582  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1584  return ary;
1585 }
1586 
1587 static int
1589 {
1590  VALUE *p = (VALUE *)arg;
1591  VALUE aliases = p[0], ary = p[1];
1592  int idx = (int)orig;
1593  VALUE key, str = rb_ary_entry(ary, idx);
1594 
1595  if (NIL_P(str)) {
1597 
1598  if (!enc) return ST_CONTINUE;
1599  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1600  return ST_CONTINUE;
1601  }
1602  str = rb_usascii_str_new2(rb_enc_name(enc));
1603  OBJ_FREEZE(str);
1604  rb_ary_store(ary, idx, str);
1605  }
1606  key = rb_usascii_str_new2((char *)name);
1607  OBJ_FREEZE(key);
1608  rb_hash_aset(aliases, key, str);
1609  return ST_CONTINUE;
1610 }
1611 
1612 /*
1613  * call-seq:
1614  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1615  *
1616  * Returns the hash of available encoding alias and original encoding name.
1617  *
1618  * Encoding.aliases
1619  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1620  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1621  *
1622  */
1623 
1624 static VALUE
1626 {
1627  VALUE aliases[2];
1628  aliases[0] = rb_hash_new();
1629  aliases[1] = rb_ary_new();
1631  return aliases[0];
1632 }
1633 
1634 /*
1635  * An Encoding instance represents a character encoding usable in Ruby. It is
1636  * defined as a constant under the Encoding namespace. It has a name and
1637  * optionally, aliases:
1638  *
1639  * Encoding::ISO_8859_1.name
1640  * #=> #<Encoding:ISO-8859-1>
1641  *
1642  * Encoding::ISO_8859_1.names
1643  * #=> ["ISO-8859-1", "ISO8859-1"]
1644  *
1645  * Ruby methods dealing with encodings return or accept Encoding instances as
1646  * arguments (when a method accepts an Encoding instance as an argument, it
1647  * can be passed an Encoding name or alias instead).
1648  *
1649  * "some string".encoding
1650  * #=> #<Encoding:UTF-8>
1651  *
1652  * string = "some string".encode(Encoding::ISO_8859_1)
1653  * #=> "some string"
1654  * string.encoding
1655  * #=> #<Encoding:ISO-8859-1>
1656  *
1657  * "some string".encode "ISO-8859-1"
1658  * #=> "some string"
1659  *
1660  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1661  * used for a byte string, not a character string. But as the name insists,
1662  * its characters in the range of ASCII are considered as ASCII characters.
1663  * This is useful when you use ASCII-8BIT characters with other ASCII
1664  * compatible characters.
1665  *
1666  * == Changing an encoding
1667  *
1668  * The associated Encoding of a String can be changed in two different ways.
1669  *
1670  * First, it is possible to set the Encoding of a string to a new Encoding
1671  * without changing the internal byte representation of the string, with
1672  * String#force_encoding. This is how you can tell Ruby the correct encoding
1673  * of a string.
1674  *
1675  * string
1676  * #=> "R\xC3\xA9sum\xC3\xA9"
1677  * string.encoding
1678  * #=> #<Encoding:ISO-8859-1>
1679  * string.force_encoding(Encoding::UTF-8)
1680  * #=> "R\u00E9sum\u00E9"
1681  *
1682  * Second, it is possible to transcode a string, i.e. translate its internal
1683  * byte representation to another encoding. Its associated encoding is also
1684  * set to the other encoding. See String#encode for the various forms of
1685  * transcoding, and the Encoding::Converter class for additional control over
1686  * the transcoding process.
1687  *
1688  * string
1689  * #=> "R\u00E9sum\u00E9"
1690  * string.encoding
1691  * #=> #<Encoding:UTF-8>
1692  * string = string.encode!(Encoding::ISO_8859_1)
1693  * #=> "R\xE9sum\xE9"
1694  * string.encoding
1695  * #=> #<Encoding::ISO-8859-1>
1696  *
1697  * == Script encoding
1698  *
1699  * All Ruby script code has an associated Encoding which any String literal
1700  * created in the source code will be associated to.
1701  *
1702  * The default script encoding is <code>Encoding::US-ASCII</code>, but it can
1703  * be changed by a magic comment on the first line of the source code file (or
1704  * second line, if there is a shebang line on the first). The comment must
1705  * contain the word <code>coding</code> or <code>encoding</code>, followed
1706  * by a colon, space and the Encoding name or alias:
1707  *
1708  * # encoding: UTF-8
1709  *
1710  * "some string".encoding
1711  * #=> #<Encoding:UTF-8>
1712  *
1713  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1714  * which the keyword is written:
1715  *
1716  * # encoding: ISO-8859-1
1717  *
1718  * __ENCODING__
1719  * #=> #<Encoding:ISO-8859-1>
1720  *
1721  * <code>ruby -K</code> will change the default locale encoding, but this is
1722  * not recommended. Ruby source files should declare its script encoding by a
1723  * magic comment even when they only depend on US-ASCII strings or regular
1724  * expressions.
1725  *
1726  * == Locale encoding
1727  *
1728  * The default encoding of the environment. Usually derived from locale.
1729  *
1730  * see Encoding.locale_charmap, Encoding.find('locale')
1731  *
1732  * == Filesystem encoding
1733  *
1734  * The default encoding of strings from the filesystem of the environment.
1735  * This is used for strings of file names or paths.
1736  *
1737  * see Encoding.find('filesystem')
1738  *
1739  * == External encoding
1740  *
1741  * Each IO object has an external encoding which indicates the encoding that
1742  * Ruby will use to read its data. By default Ruby sets the external encoding
1743  * of an IO object to the default external encoding. The default external
1744  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1745  * Encoding.default_external returns the current value of the external
1746  * encoding.
1747  *
1748  * ENV["LANG"]
1749  * #=> "UTF-8"
1750  * Encoding.default_external
1751  * #=> #<Encoding:UTF-8>
1752  *
1753  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1754  * #<Encoding:ISO-8859-1>
1755  *
1756  * $ LANG=C ruby -e 'p Encoding.default_external'
1757  * #<Encoding:US-ASCII>
1758  *
1759  * The default external encoding may also be set through
1760  * Encoding.default_external=, but you should not do this as strings created
1761  * before and after the change will have inconsistent encodings. Instead use
1762  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1763  *
1764  * When you know that the actual encoding of the data of an IO object is not
1765  * the default external encoding, you can reset its external encoding with
1766  * IO#set_encoding or set it at IO object creation (see IO.new options).
1767  *
1768  * == Internal encoding
1769  *
1770  * To process the data of an IO object which has an encoding different
1771  * from its external encoding, you can set its internal encoding. Ruby will use
1772  * this internal encoding to transcode the data when it is read from the IO
1773  * object.
1774  *
1775  * Conversely, when data is written to the IO object it is transcoded from the
1776  * internal encoding to the external encoding of the IO object.
1777  *
1778  * The internal encoding of an IO object can be set with
1779  * IO#set_encoding or at IO object creation (see IO.new options).
1780  *
1781  * The internal encoding is optional and when not set, the Ruby default
1782  * internal encoding is used. If not explicitly set this default internal
1783  * encoding is +nil+ meaning that by default, no transcoding occurs.
1784  *
1785  * The default internal encoding can be set with the interpreter option
1786  * <code>-E</code>. Encoding.default_internal returns the current internal
1787  * encoding.
1788  *
1789  * $ ruby -e 'p Encoding.default_internal'
1790  * nil
1791  *
1792  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1793  * Encoding.default_internal]"
1794  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1795  *
1796  * The default internal encoding may also be set through
1797  * Encoding.default_internal=, but you should not do this as strings created
1798  * before and after the change will have inconsistent encodings. Instead use
1799  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1800  *
1801  * == IO encoding example
1802  *
1803  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1804  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1805  *
1806  * string = "R\u00E9sum\u00E9"
1807  *
1808  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1809  * io.write(string)
1810  * end
1811  *
1812  * puts "raw text:"
1813  * p File.binread("transcoded.txt")
1814  * puts
1815  *
1816  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1817  * puts "transcoded text:"
1818  * p io.read
1819  * end
1820  *
1821  * While writing the file, the internal encoding is not specified as it is
1822  * only necessary for reading. While reading the file both the internal and
1823  * external encoding must be specified to obtain the correct result.
1824  *
1825  * $ ruby t.rb
1826  * raw text:
1827  * "R\xE9sum\xE9"
1828  *
1829  * transcoded text:
1830  * "R\u00E9sum\u00E9"
1831  *
1832  */
1833 
1834 void
1836 {
1837 #undef rb_intern
1838 #define rb_intern(str) rb_intern_const(str)
1839  VALUE list;
1840  int i;
1841 
1842  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1845  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1846  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1847  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1848  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1849  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1850  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1851  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1857 
1858  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1860 
1866 
1867  list = rb_ary_new2(enc_table.count);
1868  RBASIC(list)->klass = 0;
1871 
1872  for (i = 0; i < enc_table.count; ++i) {
1873  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1874  }
1875 }
1876 
1877 /* locale insensitive ctype functions */
1878 
1879 #define ctype_test(c, ctype) \
1880  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1881 
1882 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1883 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1884 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1885 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1886 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1887 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1888 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1889 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1890 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1891 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1892 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1893 int rb_isxdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_XDIGIT); }
1894 
1895 int
1897 {
1898  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_LOWER_CASE(c) : c;
1899 }
1900 
1901 int
1903 {
1904  return rb_isascii(c) ? ONIGENC_ASCII_CODE_TO_UPPER_CASE(c) : c;
1905 }
1906 
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:730
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1556
rb_encoding OnigEncodingUS_ASCII
#define ONIGENC_CTYPE_BLANK
Definition: oniguruma.h:198
#define ONIGENC_CTYPE_PUNCT
Definition: oniguruma.h:204
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:954
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:690
#define MBCLEN_CHARFOUND_P(ret)
Definition: encoding.h:138
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:778
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1088
#define MBCLEN_CHARFOUND_LEN(ret)
Definition: encoding.h:139
void rb_bug(const char *fmt,...)
Definition: error.c:295
#define rb_enc_mbc_to_codepoint(p, e, enc)
Definition: encoding.h:155
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:856
#define FALSE
Definition: nkf.h:174
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:335
size_t strlen(const char *)
#define INT2NUM(x)
Definition: ruby.h:1178
int i
Definition: win32ole.c:784
Definition: st.h:77
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Definition: encoding.h:73
VALUE rb_id2str(ID id)
Definition: ripper.c:16946
VALUE rb_cEncoding
Definition: encoding.c:40
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1145
#define NUM2INT(x)
Definition: ruby.h:622
int count
Definition: encoding.c:51
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:493
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1497
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1588
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:985
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:321
#define FL_TAINT
Definition: ruby.h:1115
#define CLASS_OF(v)
Definition: ruby.h:448
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1580
static VALUE enc_list(VALUE klass)
Definition: encoding.c:1059
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:232
int rb_toupper(int c)
Definition: encoding.c:1902
#define Qtrue
Definition: ruby.h:434
int st_insert(st_table *, st_data_t, st_data_t)
void Init_Encoding(void)
Definition: encoding.c:1835
#define TypedData_Wrap_Struct(klass, data_type, sval)
Definition: ruby.h:1016
static int str_to_encindex(VALUE enc)
Definition: encoding.c:178
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
#define ENC_CODERANGE_CLEAR(obj)
Definition: encoding.h:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:970
const char * nl_langinfo_codeset(void)
Definition: langinfo.c:64
VALUE rb_eTypeError
Definition: error.c:516
st_table * names
Definition: encoding.c:53
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1371
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
#define SYM2ID(x)
Definition: ruby.h:364
int rb_usascii_encindex(void)
Definition: encoding.c:1190
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:789
static VALUE enc_names(VALUE self)
Definition: encoding.c:1030
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:773
int rb_isblank(int c)
Definition: encoding.c:1884
static int enc_table_expand(int newsize)
Definition: encoding.c:216
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:771
#define ENCODING_GET_INLINED(obj)
Definition: encoding.h:46
int rb_enc_str_coderange(VALUE)
Definition: string.c:327
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:933
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:82
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1788
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1116
unsigned int flags
Definition: oniguruma.h:178
#define ONIGENC_CTYPE_CNTRL
Definition: oniguruma.h:199
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1338
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:766
int rb_isupper(int c)
Definition: encoding.c:1892
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1625
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1361
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:886
int rb_isprint(int c)
Definition: encoding.c:1889
#define DATA_PTR(dta)
Definition: ruby.h:985
VALUE rb_locale_charmap(VALUE klass)
Definition: encoding.c:1477
const char * alias
Definition: nkf.c:1151
#define ENC_SET_DUMMY(enc)
Definition: encoding.h:232
#define ENC_REGISTER(enc)
int rb_enc_registered(const char *name)
Definition: encoding.c:560
ID rb_id_encoding(void)
Definition: encoding.c:683
int rb_isdigit(int c)
Definition: encoding.c:1886
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:475
#define ISDIGIT(c)
#define ONIGENC_CTYPE_ALNUM
Definition: oniguruma.h:209
int rb_filesystem_encindex(void)
Definition: encoding.c:1237
void rb_enc_init(void)
Definition: encoding.c:533
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1166
#define RDATA(obj)
Definition: ruby.h:1103
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1362
#define ONIGENC_CTYPE_ALPHA
Definition: oniguruma.h:197
#define ENC_CODERANGE_7BIT
Definition: encoding.h:58
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:434
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:88
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:964
Win32OLEIDispatch * p
Definition: win32ole.c:786
Definition: nkf.c:87
#define ONIGENC_CTYPE_UPPER
Definition: oniguruma.h:206
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
int args
Definition: win32ole.c:785
st_table * st_init_strcasetable(void)
Definition: st.c:296
#define ctype_test(c, ctype)
Definition: encoding.c:1879
#define RB_TYPE_P(obj, type)
Definition: ruby.h:1537
void rb_encdb_set_unicode(int index)
Definition: encoding.c:517
#define FL_UNTRUSTED
Definition: ruby.h:1116
int st_lookup(st_table *, st_data_t, st_data_t *)
static struct @4 enc_table
static int str_find_encindex(VALUE enc)
Definition: encoding.c:165
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:146
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1286
#define ONIGENC_CTYPE_DIGIT
Definition: oniguruma.h:200
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:263
int rb_isxdigit(int c)
Definition: encoding.c:1893
int rb_ispunct(int c)
Definition: encoding.c:1890
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:880
RUBY_EXTERN VALUE rb_cObject
Definition: ruby.h:1426
Definition: encoding.c:43
#define ONIGENC_CTYPE_XDIGIT
Definition: oniguruma.h:207
#define rb_enc_isascii(c, enc)
Definition: encoding.h:174
#define RSTRING_END(str)
Definition: ruby.h:870
int rb_isspace(int c)
Definition: encoding.c:1891
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:478
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3156
VALUE rb_ary_new(void)
Definition: array.c:424
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1160
VALUE rb_enc_default_external(void)
Definition: encoding.c:1300
#define snprintf
Definition: subst.h:6
#define NIL_P(v)
Definition: ruby.h:446
#define ISASCII(c)
Definition: ruby.h:1629
static VALUE enc_name(VALUE self)
Definition: encoding.c:1003
#define ENC_CODERANGE_ASCIIONLY(obj)
Definition: encoding.h:62
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:488
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:343
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2204
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
#define ENCODING_COUNT
Definition: encoding.c:58
#define ISALNUM(c)
Definition: ruby.h:1635
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1505
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1137
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:506
int argc
Definition: ruby.c:130
#define Qfalse
Definition: ruby.h:433
VALUE rb_require_safe(VALUE, int)
Definition: load.c:934
int rb_locale_encindex(void)
Definition: encoding.c:1196
#define realloc
Definition: ripper.c:99
#define ALLOCA_N(type, n)
Definition: ruby.h:1227
static rb_encoding * str_to_encoding(VALUE enc)
Definition: encoding.c:188
void rb_gc_register_mark_object(VALUE obj)
Definition: gc.c:2982
#define ISUPPER(c)
Definition: ruby.h:1633
#define MEMCPY(p1, p2, type, n)
Definition: ruby.h:1242
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:748
VALUE rb_eEncCompatError
Definition: error.c:523
#define OBJ_FREEZE(x)
Definition: ruby.h:1164
#define rb_enc_mbminlen(enc)
Definition: encoding.h:127
#define ISLOWER(c)
Definition: ruby.h:1634
const char * name
Definition: encoding.c:44
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
Definition: oniguruma.h:254
int rb_isgraph(int c)
Definition: encoding.c:1887
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:269
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1259
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:201
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:910
#define RSTRING_LEN(str)
Definition: ruby.h:862
int rb_encdb_dummy(const char *name)
Definition: encoding.c:410
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:266
#define TRUE
Definition: nkf.h:175
#define T_DATA
Definition: ruby.h:500
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:126
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1275
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:898
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define rb_enc_name(enc)
Definition: encoding.h:124
VALUE rb_hash_new(void)
Definition: hash.c:234
#define strdup(s)
Definition: util.h:69
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1570
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: regenc.h:189
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1128
#define ONIGENC_CTYPE_PRINT
Definition: oniguruma.h:203
unsigned long ID
Definition: ruby.h:105
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1181
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:388
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:61
#define Qnil
Definition: ruby.h:435
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:400
static struct default_encoding default_internal
Definition: encoding.c:1368
const char * name
Definition: oniguruma.h:162
#define BUILTIN_TYPE(x)
Definition: ruby.h:510
#define debug(x)
Definition: _sdbm.c:51
unsigned long VALUE
Definition: ruby.h:104
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1124
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1212
#define RBASIC(obj)
Definition: ruby.h:1094
#define ONIGENC_IS_ASCII_CODE(code)
Definition: regenc.h:187
int rb_utf8_encindex(void)
Definition: encoding.c:1175
#define ENCODING_SET_INLINED(obj, i)
Definition: encoding.h:33
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:870
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: encoding.h:235
static int enc_autoload(rb_encoding *)
Definition: encoding.c:611
#define rb_enc_asciicompat(enc)
Definition: encoding.h:184
#define ONIGENC_CTYPE_SPACE
Definition: oniguruma.h:205
int rb_islower(int c)
Definition: encoding.c:1888
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1444
#define ONIGENC_IS_UNICODE(enc)
Definition: oniguruma.h:229
#define enc_autoload_p(enc)
Definition: encoding.c:64
#define FL_UNSET(x, f)
Definition: ruby.h:1150
#define UChar
Definition: oniguruma.h:110
static void enc_check_duplication(const char *name)
Definition: encoding.c:313
#define StringValueCStr(v)
Definition: ruby.h:548
static size_t enc_memsize(const void *p)
Definition: encoding.c:69
#define RSTRING_PTR(str)
Definition: ruby.h:866
static ID id_encoding
Definition: encoding.c:39
VALUE rb_usascii_str_new2(const char *)
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:1009
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:772
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1332
int size
Definition: encoding.c:52
static struct default_encoding default_external
Definition: encoding.c:1256
#define ONIGENC_FLAG_UNICODE
Definition: oniguruma.h:215
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
Definition: oniguruma.h:258
#define ENCODING_INLINE_MAX
Definition: encoding.h:29
#define xmalloc
Definition: defines.h:64
void rb_set_errinfo(VALUE err)
Definition: eval.c:1442
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:364
rb_encoding * enc
Definition: encoding.c:1253
static int enc_set_filesystem_encoding(void)
Definition: encoding.c:1218
VALUE rb_check_string_type(VALUE)
Definition: string.c:1509
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1424
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:450
uint8_t key[16]
Definition: random.c:1370
#define valid_encoding_name_p(name)
Definition: encoding.c:62
#define T_STRING
Definition: ruby.h:490
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:491
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:573
#define is_data_encoding(obj)
Definition: encoding.c:79
struct rb_encoding_entry * list
Definition: encoding.c:50
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1246
static int enc_capable(VALUE obj)
Definition: encoding.c:667
#define T_FILE
Definition: ruby.h:496
static const rb_data_type_t encoding_data_type
Definition: encoding.c:74
#define rb_isascii(c)
Definition: ruby.h:1612
#define TOLOWER(c)
Definition: ruby.h:1641
static st_data_t enc_dup_name(st_data_t name)
Definition: encoding.c:465
int rb_isalnum(int c)
Definition: encoding.c:1882
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1380
VALUE rb_ary_new2(long capa)
Definition: array.c:417
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:114
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1418
#define rb_safe_level()
Definition: tcltklib.c:94
int rb_tolower(int c)
Definition: encoding.c:1896
#define ruby_debug
Definition: ruby.h:1364
#define ENC_TO_ENCINDEX(enc)
Definition: encoding.h:229
const char * name
Definition: nkf.c:208
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:372
#define rb_errinfo()
Definition: tcltklib.c:89
unsigned long st_data_t
Definition: st.h:35
unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:948
#define STRCASECMP(s1, s2)
Definition: ruby.h:1645
#define rb_intern(str)
#define rb_enc_to_index(enc)
Definition: encoding.h:86
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:482
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1151
void rb_warning(const char *fmt,...)
Definition: error.c:234
int rb_enc_find_index(const char *name)
Definition: encoding.c:635
int rb_iscntrl(int c)
Definition: encoding.c:1885
#define rb_check_frozen(obj)
Definition: intern.h:258
#define CONST_ID(var, str)
Definition: ruby.h:1318
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:279
#define SPECIAL_CONST_P(x)
Definition: ruby.h:1143
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:41
#define ONIGENC_CTYPE_GRAPH
Definition: oniguruma.h:201
void rb_encdb_declare(const char *name)
Definition: encoding.c:303
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:340
void rb_gc_mark_encodings(void)
Definition: encoding.c:211
#define SYMBOL_P(x)
Definition: ruby.h:362
#define NULL
Definition: _sdbm.c:102
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
#define UNSPECIFIED_ENCODING
Definition: encoding.c:59
rb_encoding OnigEncodingUTF_8
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
#define ruby_verbose
Definition: ruby.h:1363
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1006
#define ONIGENC_CTYPE_LOWER
Definition: oniguruma.h:202
void rb_warn(const char *fmt,...)
Definition: error.c:221
rb_encoding * enc
Definition: encoding.c:45
VALUE rb_eArgError
Definition: error.c:517
static int load_encoding(const char *name)
Definition: encoding.c:580
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:659
#define T_REGEXP
Definition: ruby.h:491
char ** argv
Definition: ruby.c:131
#define StringValue(v)
Definition: ruby.h:546
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1089
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
Definition: regenc.h:188
#define ENC_DUMMY_P(enc)
Definition: encoding.h:231
int rb_isalpha(int c)
Definition: encoding.c:1883
static int must_encoding(VALUE enc)
Definition: encoding.c:135
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
rb_encoding * base
Definition: encoding.c:46