Ruby  2.0.0p247(2013-06-27revision41674)
encoding.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  encoding.c -
4 
5  $Author: nobu $
6  created at: Thu May 24 17:23:27 JST 2007
7 
8  Copyright (C) 2007 Yukihiro Matsumoto
9 
10 **********************************************************************/
11 
12 #include "ruby/ruby.h"
13 #include "ruby/encoding.h"
14 #include "internal.h"
15 #include "regenc.h"
16 #include <ctype.h>
17 #ifndef NO_LOCALE_CHARMAP
18 #ifdef __CYGWIN__
19 #include <windows.h>
20 #endif
21 #ifdef HAVE_LANGINFO_H
22 #include <langinfo.h>
23 #endif
24 #endif
25 #include "ruby/util.h"
26 
27 #if defined __GNUC__ && __GNUC__ >= 4
28 #pragma GCC visibility push(default)
29 int rb_enc_register(const char *name, rb_encoding *encoding);
30 void rb_enc_set_base(const char *name, const char *orig);
31 void rb_encdb_declare(const char *name);
32 int rb_encdb_replicate(const char *name, const char *orig);
33 int rb_encdb_dummy(const char *name);
34 int rb_encdb_alias(const char *alias, const char *orig);
35 void rb_encdb_set_unicode(int index);
36 #pragma GCC visibility pop
37 #endif
38 
39 static ID id_encoding;
42 
44  const char *name;
47 };
48 
49 static struct {
51  int count;
52  int size;
54 } enc_table;
55 
56 void rb_enc_init(void);
57 
58 #define ENCODING_COUNT ENCINDEX_BUILTIN_MAX
59 #define UNSPECIFIED_ENCODING INT_MAX
60 
61 #define ENCODING_NAMELEN_MAX 63
62 #define valid_encoding_name_p(name) ((name) && strlen(name) <= ENCODING_NAMELEN_MAX)
63 
64 #define enc_autoload_p(enc) (!rb_enc_mbmaxlen(enc))
65 
66 static int load_encoding(const char *name);
67 
68 static size_t
69 enc_memsize(const void *p)
70 {
71  return 0;
72 }
73 
75  "encoding",
76  {0, 0, enc_memsize,},
77 };
78 
79 #define is_data_encoding(obj) (RTYPEDDATA_P(obj) && RTYPEDDATA_TYPE(obj) == &encoding_data_type)
80 
81 static VALUE
82 enc_new(rb_encoding *encoding)
83 {
84  return TypedData_Wrap_Struct(rb_cEncoding, &encoding_data_type, encoding);
85 }
86 
87 static VALUE
89 {
90  VALUE list, enc;
91 
92  if (!(list = rb_encoding_list)) {
93  rb_bug("rb_enc_from_encoding_index(%d): no rb_encoding_list", idx);
94  }
95  enc = rb_ary_entry(list, idx);
96  if (NIL_P(enc)) {
97  rb_bug("rb_enc_from_encoding_index(%d): not created yet", idx);
98  }
99  return enc;
100 }
101 
102 VALUE
104 {
105  int idx;
106  if (!encoding) return Qnil;
107  idx = ENC_TO_ENCINDEX(encoding);
108  return rb_enc_from_encoding_index(idx);
109 }
110 
111 static int enc_autoload(rb_encoding *);
112 
113 static int
115 {
116  int index = rb_enc_to_index(enc);
117  if (rb_enc_from_index(index) != enc)
118  return -1;
119  if (enc_autoload_p(enc)) {
120  index = enc_autoload(enc);
121  }
122  return index;
123 }
124 
125 static int
127 {
128  if (SPECIAL_CONST_P(obj) || !rb_typeddata_is_kind_of(obj, &encoding_data_type)) {
129  return -1;
130  }
131  return check_encoding(RDATA(obj)->data);
132 }
133 
134 static int
136 {
137  int index = enc_check_encoding(enc);
138  if (index < 0) {
139  rb_raise(rb_eTypeError, "wrong argument type %s (expected Encoding)",
140  rb_obj_classname(enc));
141  }
142  return index;
143 }
144 
145 int
147 {
148  int idx;
149 
150  idx = enc_check_encoding(enc);
151  if (idx >= 0) {
152  return idx;
153  }
154  else if (NIL_P(enc = rb_check_string_type(enc))) {
155  return -1;
156  }
157  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
158  return -1;
159  }
160  return rb_enc_find_index(StringValueCStr(enc));
161 }
162 
163 /* Returns encoding index or UNSPECIFIED_ENCODING */
164 static int
166 {
167  int idx;
168 
169  StringValue(enc);
170  if (!rb_enc_asciicompat(rb_enc_get(enc))) {
171  rb_raise(rb_eArgError, "invalid name encoding (non ASCII)");
172  }
174  return idx;
175 }
176 
177 static int
179 {
180  int idx = str_find_encindex(enc);
181  if (idx < 0) {
182  rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc));
183  }
184  return idx;
185 }
186 
187 static rb_encoding *
189 {
190  return rb_enc_from_index(str_to_encindex(enc));
191 }
192 
193 rb_encoding *
195 {
196  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
197  return str_to_encoding(enc);
198 }
199 
200 rb_encoding *
202 {
203  int idx;
204  if (enc_check_encoding(enc) >= 0) return RDATA(enc)->data;
205  idx = str_find_encindex(enc);
206  if (idx < 0) return NULL;
207  return rb_enc_from_index(idx);
208 }
209 
210 void
212 {
213 }
214 
215 static int
216 enc_table_expand(int newsize)
217 {
218  struct rb_encoding_entry *ent;
219  int count = newsize;
220 
221  if (enc_table.size >= newsize) return newsize;
222  newsize = (newsize + 7) / 8 * 8;
223  ent = realloc(enc_table.list, sizeof(*enc_table.list) * newsize);
224  if (!ent) return -1;
225  memset(ent + enc_table.size, 0, sizeof(*ent)*(newsize - enc_table.size));
226  enc_table.list = ent;
227  enc_table.size = newsize;
228  return count;
229 }
230 
231 static int
232 enc_register_at(int index, const char *name, rb_encoding *encoding)
233 {
234  struct rb_encoding_entry *ent = &enc_table.list[index];
235  VALUE list;
236 
237  if (!valid_encoding_name_p(name)) return -1;
238  if (!ent->name) {
239  ent->name = name = strdup(name);
240  }
241  else if (STRCASECMP(name, ent->name)) {
242  return -1;
243  }
244  if (!ent->enc) {
245  ent->enc = xmalloc(sizeof(rb_encoding));
246  }
247  if (encoding) {
248  *ent->enc = *encoding;
249  }
250  else {
251  memset(ent->enc, 0, sizeof(*ent->enc));
252  }
253  encoding = ent->enc;
254  encoding->name = name;
255  encoding->ruby_encoding_index = index;
256  st_insert(enc_table.names, (st_data_t)name, (st_data_t)index);
257  list = rb_encoding_list;
258  if (list && NIL_P(rb_ary_entry(list, index))) {
259  /* initialize encoding data */
260  rb_ary_store(list, index, enc_new(encoding));
261  }
262  return index;
263 }
264 
265 static int
266 enc_register(const char *name, rb_encoding *encoding)
267 {
268  int index = enc_table.count;
269 
270  if ((index = enc_table_expand(index + 1)) < 0) return -1;
271  enc_table.count = index;
272  return enc_register_at(index - 1, name, encoding);
273 }
274 
275 static void set_encoding_const(const char *, rb_encoding *);
276 int rb_enc_registered(const char *name);
277 
278 int
279 rb_enc_register(const char *name, rb_encoding *encoding)
280 {
281  int index = rb_enc_registered(name);
282 
283  if (index >= 0) {
284  rb_encoding *oldenc = rb_enc_from_index(index);
285  if (STRCASECMP(name, rb_enc_name(oldenc))) {
286  index = enc_register(name, encoding);
287  }
288  else if (enc_autoload_p(oldenc) || !ENC_DUMMY_P(oldenc)) {
289  enc_register_at(index, name, encoding);
290  }
291  else {
292  rb_raise(rb_eArgError, "encoding %s is already registered", name);
293  }
294  }
295  else {
296  index = enc_register(name, encoding);
298  }
299  return index;
300 }
301 
302 void
303 rb_encdb_declare(const char *name)
304 {
305  int idx = rb_enc_registered(name);
306  if (idx < 0) {
307  idx = enc_register(name, 0);
308  }
310 }
311 
312 static void
314 {
315  if (rb_enc_registered(name) >= 0) {
316  rb_raise(rb_eArgError, "encoding %s is already registered", name);
317  }
318 }
319 
320 static rb_encoding*
322 {
323  rb_encoding *enc = enc_table.list[index].enc;
324 
325  enc_table.list[index].base = base;
326  if (rb_enc_dummy_p(base)) ENC_SET_DUMMY(enc);
327  return enc;
328 }
329 
330 /* for encdb.h
331  * Set base encoding for encodings which are not replicas
332  * but not in their own files.
333  */
334 void
335 rb_enc_set_base(const char *name, const char *orig)
336 {
337  int idx = rb_enc_registered(name);
338  int origidx = rb_enc_registered(orig);
339  set_base_encoding(idx, rb_enc_from_index(origidx));
340 }
341 
342 int
343 rb_enc_replicate(const char *name, rb_encoding *encoding)
344 {
345  int idx;
346 
347  enc_check_duplication(name);
348  idx = enc_register(name, encoding);
349  set_base_encoding(idx, encoding);
351  return idx;
352 }
353 
354 /*
355  * call-seq:
356  * enc.replicate(name) -> encoding
357  *
358  * Returns a replicated encoding of _enc_ whose name is _name_.
359  * The new encoding should have the same byte structure of _enc_.
360  * If _name_ is used by another encoding, raise ArgumentError.
361  *
362  */
363 static VALUE
365 {
368  rb_to_encoding(encoding)));
369 }
370 
371 static int
372 enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
373 {
374  if (idx < 0) {
375  idx = enc_register(name, origenc);
376  }
377  else {
378  idx = enc_register_at(idx, name, origenc);
379  }
380  if (idx >= 0) {
381  set_base_encoding(idx, origenc);
383  }
384  return idx;
385 }
386 
387 int
388 rb_encdb_replicate(const char *name, const char *orig)
389 {
390  int origidx = rb_enc_registered(orig);
391  int idx = rb_enc_registered(name);
392 
393  if (origidx < 0) {
394  origidx = enc_register(orig, 0);
395  }
396  return enc_replicate_with_index(name, rb_enc_from_index(origidx), idx);
397 }
398 
399 int
401 {
403  rb_encoding *enc = enc_table.list[index].enc;
404 
405  ENC_SET_DUMMY(enc);
406  return index;
407 }
408 
409 int
410 rb_encdb_dummy(const char *name)
411 {
413  rb_enc_registered(name));
414  rb_encoding *enc = enc_table.list[index].enc;
415 
416  ENC_SET_DUMMY(enc);
417  return index;
418 }
419 
420 /*
421  * call-seq:
422  * enc.dummy? -> true or false
423  *
424  * Returns true for dummy encodings.
425  * A dummy encoding is an encoding for which character handling is not properly
426  * implemented.
427  * It is used for stateful encodings.
428  *
429  * Encoding::ISO_2022_JP.dummy? #=> true
430  * Encoding::UTF_8.dummy? #=> false
431  *
432  */
433 static VALUE
435 {
436  return ENC_DUMMY_P(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
437 }
438 
439 /*
440  * call-seq:
441  * enc.ascii_compatible? -> true or false
442  *
443  * Returns whether ASCII-compatible or not.
444  *
445  * Encoding::UTF_8.ascii_compatible? #=> true
446  * Encoding::UTF_16BE.ascii_compatible? #=> false
447  *
448  */
449 static VALUE
451 {
452  return rb_enc_asciicompat(enc_table.list[must_encoding(enc)].enc) ? Qtrue : Qfalse;
453 }
454 
455 /*
456  * Returns 1 when the encoding is Unicode series other than UTF-7 else 0.
457  */
458 int
460 {
461  return ONIGENC_IS_UNICODE(enc);
462 }
463 
464 static st_data_t
466 {
467  return (st_data_t)strdup((const char *)name);
468 }
469 
470 /*
471  * Returns copied alias name when the key is added for st_table,
472  * else returns NULL.
473  */
474 static int
475 enc_alias_internal(const char *alias, int idx)
476 {
477  return st_insert2(enc_table.names, (st_data_t)alias, (st_data_t)idx,
478  enc_dup_name);
479 }
480 
481 static int
482 enc_alias(const char *alias, int idx)
483 {
484  if (!valid_encoding_name_p(alias)) return -1;
485  if (!enc_alias_internal(alias, idx))
487  return idx;
488 }
489 
490 int
491 rb_enc_alias(const char *alias, const char *orig)
492 {
493  int idx;
494 
495  enc_check_duplication(alias);
496  if (!enc_table.list) {
497  rb_enc_init();
498  }
499  if ((idx = rb_enc_find_index(orig)) < 0) {
500  return -1;
501  }
502  return enc_alias(alias, idx);
503 }
504 
505 int
506 rb_encdb_alias(const char *alias, const char *orig)
507 {
508  int idx = rb_enc_registered(orig);
509 
510  if (idx < 0) {
511  idx = enc_register(orig, 0);
512  }
513  return enc_alias(alias, idx);
514 }
515 
516 void
518 {
520 }
521 
522 enum {
527 };
528 
531 
532 void
534 {
536  if (!enc_table.names) {
538  }
539 #define ENC_REGISTER(enc) enc_register_at(ENCINDEX_##enc, rb_enc_name(&OnigEncoding##enc), &OnigEncoding##enc)
542  ENC_REGISTER(US_ASCII);
543 #undef ENC_REGISTER
545 }
546 
547 rb_encoding *
549 {
550  if (!enc_table.list) {
551  rb_enc_init();
552  }
553  if (index < 0 || enc_table.count <= index) {
554  return 0;
555  }
556  return enc_table.list[index].enc;
557 }
558 
559 int
561 {
562  st_data_t idx = 0;
563 
564  if (!name) return -1;
565  if (!enc_table.list) return -1;
566  if (st_lookup(enc_table.names, (st_data_t)name, &idx)) {
567  return (int)idx;
568  }
569  return -1;
570 }
571 
572 static VALUE
574 {
575  int safe = rb_safe_level();
576  return rb_require_safe(enclib, safe > 3 ? 3 : safe);
577 }
578 
579 static int
580 load_encoding(const char *name)
581 {
582  VALUE enclib = rb_sprintf("enc/%s.so", name);
583  VALUE verbose = ruby_verbose;
585  VALUE loaded;
586  char *s = RSTRING_PTR(enclib) + 4, *e = RSTRING_END(enclib) - 3;
587  int idx;
588 
589  while (s < e) {
590  if (!ISALNUM(*s)) *s = '_';
591  else if (ISUPPER(*s)) *s = (char)TOLOWER(*s);
592  ++s;
593  }
594  FL_UNSET(enclib, FL_TAINT|FL_UNTRUSTED);
595  OBJ_FREEZE(enclib);
597  ruby_debug = Qfalse;
598  loaded = rb_protect(require_enc, enclib, 0);
599  ruby_verbose = verbose;
600  ruby_debug = debug;
602  if (NIL_P(loaded)) return -1;
603  if ((idx = rb_enc_registered(name)) < 0) return -1;
604  if (enc_autoload_p(enc_table.list[idx].enc)) return -1;
605  return idx;
606 }
607 
608 static int
610 {
611  int i;
612  rb_encoding *base = enc_table.list[ENC_TO_ENCINDEX(enc)].base;
613 
614  if (base) {
615  i = 0;
616  do {
617  if (i >= enc_table.count) return -1;
618  } while (enc_table.list[i].enc != base && (++i, 1));
619  if (enc_autoload_p(base)) {
620  if (enc_autoload(base) < 0) return -1;
621  }
622  i = ENC_TO_ENCINDEX(enc);
623  enc_register_at(i, rb_enc_name(enc), base);
624  }
625  else {
626  i = load_encoding(rb_enc_name(enc));
627  }
628  return i;
629 }
630 
631 /* Return encoding index or UNSPECIFIED_ENCODING from encoding name */
632 int
634 {
635  int i = rb_enc_registered(name);
636  rb_encoding *enc;
637 
638  if (i < 0) {
639  i = load_encoding(name);
640  }
641  else if (!(enc = rb_enc_from_index(i))) {
642  if (i != UNSPECIFIED_ENCODING) {
643  rb_raise(rb_eArgError, "encoding %s is not registered", name);
644  }
645  }
646  else if (enc_autoload_p(enc)) {
647  if (enc_autoload(enc) < 0) {
648  rb_warn("failed to load encoding (%s); use ASCII-8BIT instead",
649  name);
650  return 0;
651  }
652  }
653  return i;
654 }
655 
656 rb_encoding *
657 rb_enc_find(const char *name)
658 {
659  int idx = rb_enc_find_index(name);
660  if (idx < 0) idx = 0;
661  return rb_enc_from_index(idx);
662 }
663 
664 static inline int
666 {
667  if (SPECIAL_CONST_P(obj)) return SYMBOL_P(obj);
668  switch (BUILTIN_TYPE(obj)) {
669  case T_STRING:
670  case T_REGEXP:
671  case T_FILE:
672  return TRUE;
673  case T_DATA:
674  if (is_data_encoding(obj)) return TRUE;
675  default:
676  return FALSE;
677  }
678 }
679 
680 ID
682 {
683  CONST_ID(id_encoding, "encoding");
684  return id_encoding;
685 }
686 
687 int
689 {
690  int i = -1;
691  VALUE tmp;
692 
693  if (SPECIAL_CONST_P(obj)) {
694  if (!SYMBOL_P(obj)) return -1;
695  obj = rb_id2str(SYM2ID(obj));
696  }
697  switch (BUILTIN_TYPE(obj)) {
698  as_default:
699  default:
700  case T_STRING:
701  case T_REGEXP:
702  i = ENCODING_GET_INLINED(obj);
703  if (i == ENCODING_INLINE_MAX) {
704  VALUE iv;
705 
706  iv = rb_ivar_get(obj, rb_id_encoding());
707  i = NUM2INT(iv);
708  }
709  break;
710  case T_FILE:
711  tmp = rb_funcall(obj, rb_intern("internal_encoding"), 0, 0);
712  if (NIL_P(tmp)) obj = rb_funcall(obj, rb_intern("external_encoding"), 0, 0);
713  else obj = tmp;
714  if (NIL_P(obj)) break;
715  case T_DATA:
716  if (is_data_encoding(obj)) {
717  i = enc_check_encoding(obj);
718  }
719  else {
720  goto as_default;
721  }
722  break;
723  }
724  return i;
725 }
726 
727 static void
729 {
730  if (idx < ENCODING_INLINE_MAX) {
731  ENCODING_SET_INLINED(obj, idx);
732  return;
733  }
735  rb_ivar_set(obj, rb_id_encoding(), INT2NUM(idx));
736 }
737 
738 void
740 {
741  rb_check_frozen(obj);
742  enc_set_index(obj, idx);
743 }
744 
745 VALUE
747 {
748 /* enc_check_capable(obj);*/
749  rb_check_frozen(obj);
750  if (rb_enc_get_index(obj) == idx)
751  return obj;
752  if (SPECIAL_CONST_P(obj)) {
753  rb_raise(rb_eArgError, "cannot set encoding");
754  }
755  if (!ENC_CODERANGE_ASCIIONLY(obj) ||
757  ENC_CODERANGE_CLEAR(obj);
758  }
759  enc_set_index(obj, idx);
760  return obj;
761 }
762 
763 VALUE
765 {
766  return rb_enc_associate_index(obj, rb_enc_to_index(enc));
767 }
768 
771 {
772  return rb_enc_from_index(rb_enc_get_index(obj));
773 }
774 
777 {
778  rb_encoding *enc = rb_enc_compatible(str1, str2);
779  if (!enc)
780  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
781  rb_enc_name(rb_enc_get(str1)),
782  rb_enc_name(rb_enc_get(str2)));
783  return enc;
784 }
785 
788 {
789  int idx1, idx2;
790  rb_encoding *enc1, *enc2;
791  int isstr1, isstr2;
792 
793  idx1 = rb_enc_get_index(str1);
794  idx2 = rb_enc_get_index(str2);
795 
796  if (idx1 < 0 || idx2 < 0)
797  return 0;
798 
799  if (idx1 == idx2) {
800  return rb_enc_from_index(idx1);
801  }
802  enc1 = rb_enc_from_index(idx1);
803  enc2 = rb_enc_from_index(idx2);
804 
805  isstr2 = RB_TYPE_P(str2, T_STRING);
806  if (isstr2 && RSTRING_LEN(str2) == 0)
807  return enc1;
808  isstr1 = RB_TYPE_P(str1, T_STRING);
809  if (isstr1 && RSTRING_LEN(str1) == 0)
810  return (rb_enc_asciicompat(enc1) && rb_enc_str_asciionly_p(str2)) ? enc1 : enc2;
811  if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) {
812  return 0;
813  }
814 
815  /* objects whose encoding is the same of contents */
816  if (!isstr2 && idx2 == ENCINDEX_US_ASCII)
817  return enc1;
818  if (!isstr1 && idx1 == ENCINDEX_US_ASCII)
819  return enc2;
820 
821  if (!isstr1) {
822  VALUE tmp = str1;
823  int idx0 = idx1;
824  str1 = str2;
825  str2 = tmp;
826  idx1 = idx2;
827  idx2 = idx0;
828  idx0 = isstr1;
829  isstr1 = isstr2;
830  isstr2 = idx0;
831  }
832  if (isstr1) {
833  int cr1, cr2;
834 
835  cr1 = rb_enc_str_coderange(str1);
836  if (isstr2) {
837  cr2 = rb_enc_str_coderange(str2);
838  if (cr1 != cr2) {
839  /* may need to handle ENC_CODERANGE_BROKEN */
840  if (cr1 == ENC_CODERANGE_7BIT) return enc2;
841  if (cr2 == ENC_CODERANGE_7BIT) return enc1;
842  }
843  if (cr2 == ENC_CODERANGE_7BIT) {
844  return enc1;
845  }
846  }
847  if (cr1 == ENC_CODERANGE_7BIT)
848  return enc2;
849  }
850  return 0;
851 }
852 
853 void
855 {
857 }
858 
859 
860 /*
861  * call-seq:
862  * obj.encoding -> encoding
863  *
864  * Returns the Encoding object that represents the encoding of obj.
865  */
866 
867 VALUE
869 {
870  int idx = rb_enc_get_index(obj);
871  if (idx < 0) {
872  rb_raise(rb_eTypeError, "unknown encoding");
873  }
874  return rb_enc_from_encoding_index(idx);
875 }
876 
877 int
878 rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
879 {
880  return ONIGENC_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
881 }
882 
883 int
884 rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
885 {
886  int n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
887  if (MBCLEN_CHARFOUND_P(n) && MBCLEN_CHARFOUND_LEN(n) <= e-p)
888  return MBCLEN_CHARFOUND_LEN(n);
889  else {
890  int min = rb_enc_mbminlen(enc);
891  return min <= e-p ? min : (int)(e-p);
892  }
893 }
894 
895 int
896 rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
897 {
898  int n;
899  if (e <= p)
901  n = ONIGENC_PRECISE_MBC_ENC_LEN(enc, (UChar*)p, (UChar*)e);
902  if (e-p < n)
903  return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n-(int)(e-p));
904  return n;
905 }
906 
907 int
908 rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
909 {
910  unsigned int c, l;
911  if (e <= p)
912  return -1;
913  if (rb_enc_asciicompat(enc)) {
914  c = (unsigned char)*p;
915  if (!ISASCII(c))
916  return -1;
917  if (len) *len = 1;
918  return c;
919  }
920  l = rb_enc_precise_mbclen(p, e, enc);
921  if (!MBCLEN_CHARFOUND_P(l))
922  return -1;
923  c = rb_enc_mbc_to_codepoint(p, e, enc);
924  if (!rb_enc_isascii(c, enc))
925  return -1;
926  if (len) *len = l;
927  return c;
928 }
929 
930 unsigned int
931 rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
932 {
933  int r;
934  if (e <= p)
935  rb_raise(rb_eArgError, "empty string");
936  r = rb_enc_precise_mbclen(p, e, enc);
937  if (!MBCLEN_CHARFOUND_P(r)) {
938  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(enc));
939  }
940  if (len_p) *len_p = MBCLEN_CHARFOUND_LEN(r);
941  return rb_enc_mbc_to_codepoint(p, e, enc);
942 }
943 
944 #undef rb_enc_codepoint
945 unsigned int
946 rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
947 {
948  return rb_enc_codepoint_len(p, e, 0, enc);
949 }
950 
951 int
953 {
954  int n = ONIGENC_CODE_TO_MBCLEN(enc,c);
955  if (n == 0) {
956  rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc));
957  }
958  return n;
959 }
960 
961 int
963 {
965 }
966 
967 int
969 {
971 }
972 
973 /*
974  * call-seq:
975  * enc.inspect -> string
976  *
977  * Returns a string which represents the encoding for programmers.
978  *
979  * Encoding::UTF_8.inspect #=> "#<Encoding:UTF-8>"
980  * Encoding::ISO_2022_JP.inspect #=> "#<Encoding:ISO-2022-JP (dummy)>"
981  */
982 static VALUE
984 {
985  VALUE str = rb_sprintf("#<%s:%s%s>", rb_obj_classname(self),
987  (enc_dummy_p(self) ? " (dummy)" : ""));
989  return str;
990 }
991 
992 /*
993  * call-seq:
994  * enc.name -> string
995  *
996  * Returns the name of the encoding.
997  *
998  * Encoding::UTF_8.name #=> "UTF-8"
999  */
1000 static VALUE
1002 {
1004 }
1005 
1006 static int
1008 {
1009  VALUE *arg = (VALUE *)args;
1010 
1011  if ((int)idx == (int)arg[0]) {
1012  VALUE str = rb_usascii_str_new2((char *)name);
1013  OBJ_FREEZE(str);
1014  rb_ary_push(arg[1], str);
1015  }
1016  return ST_CONTINUE;
1017 }
1018 
1019 /*
1020  * call-seq:
1021  * enc.names -> array
1022  *
1023  * Returns the list of name and aliases of the encoding.
1024  *
1025  * Encoding::WINDOWS_31J.names #=> ["Windows-31J", "CP932", "csWindows31J"]
1026  */
1027 static VALUE
1029 {
1030  VALUE args[2];
1031 
1032  args[0] = (VALUE)rb_to_encoding_index(self);
1033  args[1] = rb_ary_new2(0);
1034  st_foreach(enc_table.names, enc_names_i, (st_data_t)args);
1035  return args[1];
1036 }
1037 
1038 /*
1039  * call-seq:
1040  * Encoding.list -> [enc1, enc2, ...]
1041  *
1042  * Returns the list of loaded encodings.
1043  *
1044  * Encoding.list
1045  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1046  * #<Encoding:ISO-2022-JP (dummy)>]
1047  *
1048  * Encoding.find("US-ASCII")
1049  * #=> #<Encoding:US-ASCII>
1050  *
1051  * Encoding.list
1052  * #=> [#<Encoding:ASCII-8BIT>, #<Encoding:UTF-8>,
1053  * #<Encoding:US-ASCII>, #<Encoding:ISO-2022-JP (dummy)>]
1054  *
1055  */
1056 static VALUE
1058 {
1059  VALUE ary = rb_ary_new2(0);
1061  return ary;
1062 }
1063 
1064 /*
1065  * call-seq:
1066  * Encoding.find(string) -> enc
1067  * Encoding.find(symbol) -> enc
1068  *
1069  * Search the encoding with specified <i>name</i>.
1070  * <i>name</i> should be a string or symbol.
1071  *
1072  * Encoding.find("US-ASCII") #=> #<Encoding:US-ASCII>
1073  * Encoding.find(:Shift_JIS) #=> #<Encoding:Shift_JIS>
1074  *
1075  * Names which this method accept are encoding names and aliases
1076  * including following special aliases
1077  *
1078  * "external":: default external encoding
1079  * "internal":: default internal encoding
1080  * "locale":: locale encoding
1081  * "filesystem":: filesystem encoding
1082  *
1083  * An ArgumentError is raised when no encoding with <i>name</i>.
1084  * Only <code>Encoding.find("internal")</code> however returns nil
1085  * when no encoding named "internal", in other words, when Ruby has no
1086  * default internal encoding.
1087  */
1088 static VALUE
1090 {
1091  int idx;
1092  if (RB_TYPE_P(enc, T_DATA) && is_data_encoding(enc))
1093  return enc;
1094  idx = str_to_encindex(enc);
1095  if (idx == UNSPECIFIED_ENCODING) return Qnil;
1096  return rb_enc_from_encoding_index(idx);
1097 }
1098 
1099 /*
1100  * call-seq:
1101  * Encoding.compatible?(obj1, obj2) -> enc or nil
1102  *
1103  * Checks the compatibility of two objects.
1104  *
1105  * If the objects are both strings they are compatible when they are
1106  * concatenatable. The encoding of the concatenated string will be returned
1107  * if they are compatible, nil if they are not.
1108  *
1109  * Encoding.compatible?("\xa1".force_encoding("iso-8859-1"), "b")
1110  * #=> #<Encoding:ISO-8859-1>
1111  *
1112  * Encoding.compatible?(
1113  * "\xa1".force_encoding("iso-8859-1"),
1114  * "\xa1\xa1".force_encoding("euc-jp"))
1115  * #=> nil
1116  *
1117  * If the objects are non-strings their encodings are compatible when they
1118  * have an encoding and:
1119  * * Either encoding is US-ASCII compatible
1120  * * One of the encodings is a 7-bit encoding
1121  *
1122  */
1123 static VALUE
1125 {
1126  rb_encoding *enc;
1127 
1128  if (!enc_capable(str1)) return Qnil;
1129  if (!enc_capable(str2)) return Qnil;
1130  enc = rb_enc_compatible(str1, str2);
1131  if (!enc) return Qnil;
1132  return rb_enc_from_encoding(enc);
1133 }
1134 
1135 /* :nodoc: */
1136 static VALUE
1138 {
1139  rb_scan_args(argc, argv, "01", 0);
1140  return enc_name(self);
1141 }
1142 
1143 /* :nodoc: */
1144 static VALUE
1146 {
1147  return enc_find(klass, str);
1148 }
1149 
1150 rb_encoding *
1152 {
1153  if (!enc_table.list) {
1154  rb_enc_init();
1155  }
1156  return enc_table.list[ENCINDEX_ASCII].enc;
1157 }
1158 
1159 int
1161 {
1162  return ENCINDEX_ASCII;
1163 }
1164 
1165 rb_encoding *
1167 {
1168  if (!enc_table.list) {
1169  rb_enc_init();
1170  }
1171  return enc_table.list[ENCINDEX_UTF_8].enc;
1172 }
1173 
1174 int
1176 {
1177  return ENCINDEX_UTF_8;
1178 }
1179 
1180 rb_encoding *
1182 {
1183  if (!enc_table.list) {
1184  rb_enc_init();
1185  }
1186  return enc_table.list[ENCINDEX_US_ASCII].enc;
1187 }
1188 
1189 int
1191 {
1192  return ENCINDEX_US_ASCII;
1193 }
1194 
1195 int
1197 {
1199  int idx;
1200 
1201  if (NIL_P(charmap))
1202  idx = rb_usascii_encindex();
1203  else if ((idx = rb_enc_find_index(StringValueCStr(charmap))) < 0)
1204  idx = rb_ascii8bit_encindex();
1205 
1206  if (rb_enc_registered("locale") < 0) enc_alias_internal("locale", idx);
1207 
1208  return idx;
1209 }
1210 
1211 rb_encoding *
1213 {
1215 }
1216 
1217 static int
1219 {
1220  int idx;
1221 #if defined NO_LOCALE_CHARMAP
1223 #elif defined _WIN32 || defined __CYGWIN__
1224  char cp[sizeof(int) * 8 / 3 + 4];
1225  snprintf(cp, sizeof cp, "CP%d", AreFileApisANSI() ? GetACP() : GetOEMCP());
1226  idx = rb_enc_find_index(cp);
1227  if (idx < 0) idx = rb_ascii8bit_encindex();
1228 #else
1230 #endif
1231 
1232  enc_alias_internal("filesystem", idx);
1233  return idx;
1234 }
1235 
1236 int
1238 {
1239  int idx = rb_enc_registered("filesystem");
1240  if (idx < 0)
1241  idx = rb_ascii8bit_encindex();
1242  return idx;
1243 }
1244 
1245 rb_encoding *
1247 {
1249 }
1250 
1252  int index; /* -2 => not yet set, -1 => nil */
1254 };
1255 
1257 
1258 static int
1259 enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
1260 {
1261  int overridden = FALSE;
1262 
1263  if (def->index != -2)
1264  /* Already set */
1265  overridden = TRUE;
1266 
1267  if (NIL_P(encoding)) {
1268  def->index = -1;
1269  def->enc = 0;
1270  st_insert(enc_table.names, (st_data_t)strdup(name),
1272  }
1273  else {
1274  def->index = rb_enc_to_index(rb_to_encoding(encoding));
1275  def->enc = 0;
1276  enc_alias_internal(name, def->index);
1277  }
1278 
1279  if (def == &default_external)
1281 
1282  return overridden;
1283 }
1284 
1285 rb_encoding *
1287 {
1288  if (default_external.enc) return default_external.enc;
1289 
1290  if (default_external.index >= 0) {
1291  default_external.enc = rb_enc_from_index(default_external.index);
1292  return default_external.enc;
1293  }
1294  else {
1295  return rb_locale_encoding();
1296  }
1297 }
1298 
1299 VALUE
1301 {
1303 }
1304 
1305 /*
1306  * call-seq:
1307  * Encoding.default_external -> enc
1308  *
1309  * Returns default external encoding.
1310  *
1311  * The default external encoding is used by default for strings created from
1312  * the following locations:
1313  *
1314  * * CSV
1315  * * File data read from disk
1316  * * SDBM
1317  * * StringIO
1318  * * Zlib::GzipReader
1319  * * Zlib::GzipWriter
1320  * * String#inspect
1321  * * Regexp#inspect
1322  *
1323  * While strings created from these locations will have this encoding, the
1324  * encoding may not be valid. Be sure to check String#valid_encoding?.
1325  *
1326  * File data written to disk will be transcoded to the default external
1327  * encoding when written.
1328  *
1329  * The default external encoding is initialized by the locale or -E option.
1330  */
1331 static VALUE
1333 {
1334  return rb_enc_default_external();
1335 }
1336 
1337 void
1339 {
1340  if (NIL_P(encoding)) {
1341  rb_raise(rb_eArgError, "default external can not be nil");
1342  }
1343  enc_set_default_encoding(&default_external, encoding,
1344  "external");
1345 }
1346 
1347 /*
1348  * call-seq:
1349  * Encoding.default_external = enc
1350  *
1351  * Sets default external encoding. You should not set
1352  * Encoding::default_external in ruby code as strings created before changing
1353  * the value may have a different encoding from strings created after the value
1354  * was changed., instead you should use <tt>ruby -E</tt> to invoke ruby with
1355  * the correct default_external.
1356  *
1357  * See Encoding::default_external for information on how the default external
1358  * encoding is used.
1359  */
1360 static VALUE
1362 {
1363  rb_warning("setting Encoding.default_external");
1364  rb_enc_set_default_external(encoding);
1365  return encoding;
1366 }
1367 
1368 static struct default_encoding default_internal = {-2};
1369 
1370 rb_encoding *
1372 {
1373  if (!default_internal.enc && default_internal.index >= 0) {
1374  default_internal.enc = rb_enc_from_index(default_internal.index);
1375  }
1376  return default_internal.enc; /* can be NULL */
1377 }
1378 
1379 VALUE
1381 {
1382  /* Note: These functions cope with default_internal not being set */
1384 }
1385 
1386 /*
1387  * call-seq:
1388  * Encoding.default_internal -> enc
1389  *
1390  * Returns default internal encoding. Strings will be transcoded to the
1391  * default internal encoding in the following places if the default internal
1392  * encoding is not nil:
1393  *
1394  * * CSV
1395  * * Etc.sysconfdir and Etc.systmpdir
1396  * * File data read from disk
1397  * * File names from Dir
1398  * * Integer#chr
1399  * * String#inspect and Regexp#inspect
1400  * * Strings returned from Curses
1401  * * Strings returned from Readline
1402  * * Strings returned from SDBM
1403  * * Time#zone
1404  * * Values from ENV
1405  * * Values in ARGV including $PROGRAM_NAME
1406  * * __FILE__
1407  *
1408  * Additionally String#encode and String#encode! use the default internal
1409  * encoding if no encoding is given.
1410  *
1411  * The locale encoding (__ENCODING__), not default_internal, is used as the
1412  * encoding of created strings.
1413  *
1414  * Encoding::default_internal is initialized by the source file's
1415  * internal_encoding or -E option.
1416  */
1417 static VALUE
1419 {
1420  return rb_enc_default_internal();
1421 }
1422 
1423 void
1425 {
1426  enc_set_default_encoding(&default_internal, encoding,
1427  "internal");
1428 }
1429 
1430 /*
1431  * call-seq:
1432  * Encoding.default_internal = enc or nil
1433  *
1434  * Sets default internal encoding or removes default internal encoding when
1435  * passed nil. You should not set Encoding::default_internal in ruby code as
1436  * strings created before changing the value may have a different encoding
1437  * from strings created after the change. Instead you should use
1438  * <tt>ruby -E</tt> to invoke ruby with the correct default_internal.
1439  *
1440  * See Encoding::default_internal for information on how the default internal
1441  * encoding is used.
1442  */
1443 static VALUE
1445 {
1446  rb_warning("setting Encoding.default_internal");
1447  rb_enc_set_default_internal(encoding);
1448  return encoding;
1449 }
1450 
1451 /*
1452  * call-seq:
1453  * Encoding.locale_charmap -> string
1454  *
1455  * Returns the locale charmap name.
1456  * It returns nil if no appropriate information.
1457  *
1458  * Debian GNU/Linux
1459  * LANG=C
1460  * Encoding.locale_charmap #=> "ANSI_X3.4-1968"
1461  * LANG=ja_JP.EUC-JP
1462  * Encoding.locale_charmap #=> "EUC-JP"
1463  *
1464  * SunOS 5
1465  * LANG=C
1466  * Encoding.locale_charmap #=> "646"
1467  * LANG=ja
1468  * Encoding.locale_charmap #=> "eucJP"
1469  *
1470  * The result is highly platform dependent.
1471  * So Encoding.find(Encoding.locale_charmap) may cause an error.
1472  * If you need some encoding object even for unknown locale,
1473  * Encoding.find("locale") can be used.
1474  *
1475  */
1476 VALUE
1478 {
1479 #if defined NO_LOCALE_CHARMAP
1480  return rb_usascii_str_new2("ASCII-8BIT");
1481 #elif defined _WIN32 || defined __CYGWIN__
1482  const char *codeset = 0;
1483  char cp[sizeof(int) * 3 + 4];
1484 # ifdef __CYGWIN__
1485  const char *nl_langinfo_codeset(void);
1486  codeset = nl_langinfo_codeset();
1487 # endif
1488  if (!codeset) {
1489  UINT codepage = GetConsoleCP();
1490  if (!codepage) codepage = GetACP();
1491  snprintf(cp, sizeof(cp), "CP%d", codepage);
1492  codeset = cp;
1493  }
1494  return rb_usascii_str_new2(codeset);
1495 #elif defined HAVE_LANGINFO_H
1496  char *codeset;
1497  codeset = nl_langinfo(CODESET);
1498  return rb_usascii_str_new2(codeset);
1499 #else
1500  return Qnil;
1501 #endif
1502 }
1503 
1504 static void
1506 {
1507  VALUE encoding = rb_enc_from_encoding(enc);
1508  char *s = (char *)name;
1509  int haslower = 0, hasupper = 0, valid = 0;
1510 
1511  if (ISDIGIT(*s)) return;
1512  if (ISUPPER(*s)) {
1513  hasupper = 1;
1514  while (*++s && (ISALNUM(*s) || *s == '_')) {
1515  if (ISLOWER(*s)) haslower = 1;
1516  }
1517  }
1518  if (!*s) {
1519  if (s - name > ENCODING_NAMELEN_MAX) return;
1520  valid = 1;
1521  rb_define_const(rb_cEncoding, name, encoding);
1522  }
1523  if (!valid || haslower) {
1524  size_t len = s - name;
1525  if (len > ENCODING_NAMELEN_MAX) return;
1526  if (!haslower || !hasupper) {
1527  do {
1528  if (ISLOWER(*s)) haslower = 1;
1529  if (ISUPPER(*s)) hasupper = 1;
1530  } while (*++s && (!haslower || !hasupper));
1531  len = s - name;
1532  }
1533  len += strlen(s);
1534  if (len++ > ENCODING_NAMELEN_MAX) return;
1535  MEMCPY(s = ALLOCA_N(char, len), name, char, len);
1536  name = s;
1537  if (!valid) {
1538  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1539  for (; *s; ++s) {
1540  if (!ISALNUM(*s)) *s = '_';
1541  }
1542  if (hasupper) {
1543  rb_define_const(rb_cEncoding, name, encoding);
1544  }
1545  }
1546  if (haslower) {
1547  for (s = (char *)name; *s; ++s) {
1548  if (ISLOWER(*s)) *s = ONIGENC_ASCII_CODE_TO_UPPER_CASE((int)*s);
1549  }
1550  rb_define_const(rb_cEncoding, name, encoding);
1551  }
1552  }
1553 }
1554 
1555 static int
1557 {
1558  VALUE ary = (VALUE)arg;
1559  VALUE str = rb_usascii_str_new2((char *)name);
1560  OBJ_FREEZE(str);
1561  rb_ary_push(ary, str);
1562  return ST_CONTINUE;
1563 }
1564 
1565 /*
1566  * call-seq:
1567  * Encoding.name_list -> ["enc1", "enc2", ...]
1568  *
1569  * Returns the list of available encoding names.
1570  *
1571  * Encoding.name_list
1572  * #=> ["US-ASCII", "ASCII-8BIT", "UTF-8",
1573  * "ISO-8859-1", "Shift_JIS", "EUC-JP",
1574  * "Windows-31J",
1575  * "BINARY", "CP932", "eucJP"]
1576  *
1577  */
1578 
1579 static VALUE
1581 {
1582  VALUE ary = rb_ary_new2(enc_table.names->num_entries);
1584  return ary;
1585 }
1586 
1587 static int
1589 {
1590  VALUE *p = (VALUE *)arg;
1591  VALUE aliases = p[0], ary = p[1];
1592  int idx = (int)orig;
1593  VALUE key, str = rb_ary_entry(ary, idx);
1594 
1595  if (NIL_P(str)) {
1597 
1598  if (!enc) return ST_CONTINUE;
1599  if (STRCASECMP((char*)name, rb_enc_name(enc)) == 0) {
1600  return ST_CONTINUE;
1601  }
1602  str = rb_usascii_str_new2(rb_enc_name(enc));
1603  OBJ_FREEZE(str);
1604  rb_ary_store(ary, idx, str);
1605  }
1606  key = rb_usascii_str_new2((char *)name);
1607  OBJ_FREEZE(key);
1608  rb_hash_aset(aliases, key, str);
1609  return ST_CONTINUE;
1610 }
1611 
1612 /*
1613  * call-seq:
1614  * Encoding.aliases -> {"alias1" => "orig1", "alias2" => "orig2", ...}
1615  *
1616  * Returns the hash of available encoding alias and original encoding name.
1617  *
1618  * Encoding.aliases
1619  * #=> {"BINARY"=>"ASCII-8BIT", "ASCII"=>"US-ASCII", "ANSI_X3.4-1986"=>"US-ASCII",
1620  * "SJIS"=>"Shift_JIS", "eucJP"=>"EUC-JP", "CP932"=>"Windows-31J"}
1621  *
1622  */
1623 
1624 static VALUE
1626 {
1627  VALUE aliases[2];
1628  aliases[0] = rb_hash_new();
1629  aliases[1] = rb_ary_new();
1631  return aliases[0];
1632 }
1633 
1634 /*
1635  * An Encoding instance represents a character encoding usable in Ruby. It is
1636  * defined as a constant under the Encoding namespace. It has a name and
1637  * optionally, aliases:
1638  *
1639  * Encoding::ISO_8859_1.name
1640  * #=> #<Encoding:ISO-8859-1>
1641  *
1642  * Encoding::ISO_8859_1.names
1643  * #=> ["ISO-8859-1", "ISO8859-1"]
1644  *
1645  * Ruby methods dealing with encodings return or accept Encoding instances as
1646  * arguments (when a method accepts an Encoding instance as an argument, it
1647  * can be passed an Encoding name or alias instead).
1648  *
1649  * "some string".encoding
1650  * #=> #<Encoding:UTF-8>
1651  *
1652  * string = "some string".encode(Encoding::ISO_8859_1)
1653  * #=> "some string"
1654  * string.encoding
1655  * #=> #<Encoding:ISO-8859-1>
1656  *
1657  * "some string".encode "ISO-8859-1"
1658  * #=> "some string"
1659  *
1660  * <code>Encoding::ASCII_8BIT</code> is a special encoding that is usually
1661  * used for a byte string, not a character string. But as the name insists,
1662  * its characters in the range of ASCII are considered as ASCII characters.
1663  * This is useful when you use ASCII-8BIT characters with other ASCII
1664  * compatible characters.
1665  *
1666  * == Changing an encoding
1667  *
1668  * The associated Encoding of a String can be changed in two different ways.
1669  *
1670  * First, it is possible to set the Encoding of a string to a new Encoding
1671  * without changing the internal byte representation of the string, with
1672  * String#force_encoding. This is how you can tell Ruby the correct encoding
1673  * of a string.
1674  *
1675  * string
1676  * #=> "R\xC3\xA9sum\xC3\xA9"
1677  * string.encoding
1678  * #=> #<Encoding:ISO-8859-1>
1679  * string.force_encoding(Encoding::UTF-8)
1680  * #=> "R\u00E9sum\u00E9"
1681  *
1682  * Second, it is possible to transcode a string, i.e. translate its internal
1683  * byte representation to another encoding. Its associated encoding is also
1684  * set to the other encoding. See String#encode for the various forms of
1685  * transcoding, and the Encoding::Converter class for additional control over
1686  * the transcoding process.
1687  *
1688  * string
1689  * #=> "R\u00E9sum\u00E9"
1690  * string.encoding
1691  * #=> #<Encoding:UTF-8>
1692  * string = string.encode!(Encoding::ISO_8859_1)
1693  * #=> "R\xE9sum\xE9"
1694  * string.encoding
1695  * #=> #<Encoding::ISO-8859-1>
1696  *
1697  * == Script encoding
1698  *
1699  * All Ruby script code has an associated Encoding which any String literal
1700  * created in the source code will be associated to.
1701  *
1702  * The default script encoding is <code>Encoding::US-ASCII</code>, but it can
1703  * be changed by a magic comment on the first line of the source code file (or
1704  * second line, if there is a shebang line on the first). The comment must
1705  * contain the word <code>coding</code> or <code>encoding</code>, followed
1706  * by a colon, space and the Encoding name or alias:
1707  *
1708  * # encoding: UTF-8
1709  *
1710  * "some string".encoding
1711  * #=> #<Encoding:UTF-8>
1712  *
1713  * The <code>__ENCODING__</code> keyword returns the script encoding of the file
1714  * which the keyword is written:
1715  *
1716  * # encoding: ISO-8859-1
1717  *
1718  * __ENCODING__
1719  * #=> #<Encoding:ISO-8859-1>
1720  *
1721  * <code>ruby -K</code> will change the default locale encoding, but this is
1722  * not recommended. Ruby source files should declare its script encoding by a
1723  * magic comment even when they only depend on US-ASCII strings or regular
1724  * expressions.
1725  *
1726  * == Locale encoding
1727  *
1728  * The default encoding of the environment. Usually derived from locale.
1729  *
1730  * see Encoding.locale_charmap, Encoding.find('locale')
1731  *
1732  * == Filesystem encoding
1733  *
1734  * The default encoding of strings from the filesystem of the environment.
1735  * This is used for strings of file names or paths.
1736  *
1737  * see Encoding.find('filesystem')
1738  *
1739  * == External encoding
1740  *
1741  * Each IO object has an external encoding which indicates the encoding that
1742  * Ruby will use to read its data. By default Ruby sets the external encoding
1743  * of an IO object to the default external encoding. The default external
1744  * encoding is set by locale encoding or the interpreter <code>-E</code> option.
1745  * Encoding.default_external returns the current value of the external
1746  * encoding.
1747  *
1748  * ENV["LANG"]
1749  * #=> "UTF-8"
1750  * Encoding.default_external
1751  * #=> #<Encoding:UTF-8>
1752  *
1753  * $ ruby -E ISO-8859-1 -e "p Encoding.default_external"
1754  * #<Encoding:ISO-8859-1>
1755  *
1756  * $ LANG=C ruby -e 'p Encoding.default_external'
1757  * #<Encoding:US-ASCII>
1758  *
1759  * The default external encoding may also be set through
1760  * Encoding.default_external=, but you should not do this as strings created
1761  * before and after the change will have inconsistent encodings. Instead use
1762  * <code>ruby -E</code> to invoke ruby with the correct external encoding.
1763  *
1764  * When you know that the actual encoding of the data of an IO object is not
1765  * the default external encoding, you can reset its external encoding with
1766  * IO#set_encoding or set it at IO object creation (see IO.new options).
1767  *
1768  * == Internal encoding
1769  *
1770  * To process the data of an IO object which has an encoding different
1771  * from its external encoding, you can set its internal encoding. Ruby will use
1772  * this internal encoding to transcode the data when it is read from the IO
1773  * object.
1774  *
1775  * Conversely, when data is written to the IO object it is transcoded from the
1776  * internal encoding to the external encoding of the IO object.
1777  *
1778  * The internal encoding of an IO object can be set with
1779  * IO#set_encoding or at IO object creation (see IO.new options).
1780  *
1781  * The internal encoding is optional and when not set, the Ruby default
1782  * internal encoding is used. If not explicitly set this default internal
1783  * encoding is +nil+ meaning that by default, no transcoding occurs.
1784  *
1785  * The default internal encoding can be set with the interpreter option
1786  * <code>-E</code>. Encoding.default_internal returns the current internal
1787  * encoding.
1788  *
1789  * $ ruby -e 'p Encoding.default_internal'
1790  * nil
1791  *
1792  * $ ruby -E ISO-8859-1:UTF-8 -e "p [Encoding.default_external, \
1793  * Encoding.default_internal]"
1794  * [#<Encoding:ISO-8859-1>, #<Encoding:UTF-8>]
1795  *
1796  * The default internal encoding may also be set through
1797  * Encoding.default_internal=, but you should not do this as strings created
1798  * before and after the change will have inconsistent encodings. Instead use
1799  * <code>ruby -E</code> to invoke ruby with the correct internal encoding.
1800  *
1801  * == IO encoding example
1802  *
1803  * In the following example a UTF-8 encoded string "R\u00E9sum\u00E9" is transcoded for
1804  * output to ISO-8859-1 encoding, then read back in and transcoded to UTF-8:
1805  *
1806  * string = "R\u00E9sum\u00E9"
1807  *
1808  * open("transcoded.txt", "w:ISO-8859-1") do |io|
1809  * io.write(string)
1810  * end
1811  *
1812  * puts "raw text:"
1813  * p File.binread("transcoded.txt")
1814  * puts
1815  *
1816  * open("transcoded.txt", "r:ISO-8859-1:UTF-8") do |io|
1817  * puts "transcoded text:"
1818  * p io.read
1819  * end
1820  *
1821  * While writing the file, the internal encoding is not specified as it is
1822  * only necessary for reading. While reading the file both the internal and
1823  * external encoding must be specified to obtain the correct result.
1824  *
1825  * $ ruby t.rb
1826  * raw text:
1827  * "R\xE9sum\xE9"
1828  *
1829  * transcoded text:
1830  * "R\u00E9sum\u00E9"
1831  *
1832  */
1833 
1834 void
1836 {
1837 #undef rb_intern
1838 #define rb_intern(str) rb_intern_const(str)
1839  VALUE list;
1840  int i;
1841 
1842  rb_cEncoding = rb_define_class("Encoding", rb_cObject);
1845  rb_define_method(rb_cEncoding, "to_s", enc_name, 0);
1846  rb_define_method(rb_cEncoding, "inspect", enc_inspect, 0);
1847  rb_define_method(rb_cEncoding, "name", enc_name, 0);
1848  rb_define_method(rb_cEncoding, "names", enc_names, 0);
1849  rb_define_method(rb_cEncoding, "dummy?", enc_dummy_p, 0);
1850  rb_define_method(rb_cEncoding, "ascii_compatible?", enc_ascii_compatible_p, 0);
1851  rb_define_method(rb_cEncoding, "replicate", enc_replicate, 1);
1857 
1858  rb_define_method(rb_cEncoding, "_dump", enc_dump, -1);
1860 
1866 
1867  list = rb_ary_new2(enc_table.count);
1868  RBASIC(list)->klass = 0;
1871 
1872  for (i = 0; i < enc_table.count; ++i) {
1873  rb_ary_push(list, enc_new(enc_table.list[i].enc));
1874  }
1875 }
1876 
1877 /* locale insensitive ctype functions */
1878 
1879 #define ctype_test(c, ctype) \
1880  (rb_isascii(c) && ONIGENC_IS_ASCII_CODE_CTYPE((c), (ctype)))
1881 
1882 int rb_isalnum(int c) { return ctype_test(c, ONIGENC_CTYPE_ALNUM); }
1883 int rb_isalpha(int c) { return ctype_test(c, ONIGENC_CTYPE_ALPHA); }
1884 int rb_isblank(int c) { return ctype_test(c, ONIGENC_CTYPE_BLANK); }
1885 int rb_iscntrl(int c) { return ctype_test(c, ONIGENC_CTYPE_CNTRL); }
1886 int rb_isdigit(int c) { return ctype_test(c, ONIGENC_CTYPE_DIGIT); }
1887 int rb_isgraph(int c) { return ctype_test(c, ONIGENC_CTYPE_GRAPH); }
1888 int rb_islower(int c) { return ctype_test(c, ONIGENC_CTYPE_LOWER); }
1889 int rb_isprint(int c) { return ctype_test(c, ONIGENC_CTYPE_PRINT); }
1890 int rb_ispunct(int c) { return ctype_test(c, ONIGENC_CTYPE_PUNCT); }
1891 int rb_isspace(int c) { return ctype_test(c, ONIGENC_CTYPE_SPACE); }
1892 int rb_isupper(int c) { return ctype_test(c, ONIGENC_CTYPE_UPPER); }
1894 
1895 int
1897 {
1899 }
1900 
1901 int
1903 {
1905 }
1906 
VALUE data
Definition: tcltklib.c:3368
static void enc_set_index(VALUE obj, int idx)
Definition: encoding.c:728
static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg)
Definition: encoding.c:1556
#define RB_TYPE_P(obj, type)
rb_encoding OnigEncodingUS_ASCII
volatile VALUE tmp
Definition: tcltklib.c:10209
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:952
int rb_enc_get_index(VALUE obj)
Definition: encoding.c:688
ssize_t n
Definition: bigdecimal.c:5655
volatile VALUE ary
Definition: tcltklib.c:9713
#define ONIGENC_CTYPE_GRAPH
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:776
VP_EXPORT int
Definition: bigdecimal.c:5050
VALUE rb_ary_entry(VALUE ary, long offset)
Definition: array.c:1101
void rb_bug(const char *fmt,...)
Definition: error.c:290
VALUE rb_require_safe(VALUE, int)
Definition: load.c:911
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:854
#define FALSE
Definition: nkf.h:174
void rb_enc_set_base(const char *name, const char *orig)
Definition: encoding.c:335
size_t strlen(const char *)
gz enc2
Definition: zlib.c:2272
const char * rb_obj_classname(VALUE)
Definition: variable.c:391
VALUE rb_id2str(ID id)
Definition: ripper.c:16007
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define RSTRING_END(str)
VALUE rb_cEncoding
Definition: encoding.c:40
static VALUE enc_load(VALUE klass, VALUE str)
Definition: encoding.c:1145
int count
Definition: encoding.c:51
#define ONIGENC_CTYPE_PUNCT
int st_lookup(st_table *, st_data_t, st_data_t *)
int ruby_encoding_index
Definition: ripper.y:178
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1493
static int rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg)
Definition: encoding.c:1588
static VALUE enc_inspect(VALUE self)
Definition: encoding.c:983
static rb_encoding * set_base_encoding(int index, rb_encoding *base)
Definition: encoding.c:321
#define ENC_SET_DUMMY(enc)
#define ONIGENC_CTYPE_XDIGIT
static VALUE rb_enc_name_list(VALUE klass)
Definition: encoding.c:1580
static int enc_register_at(int index, const char *name, rb_encoding *encoding)
Definition: encoding.c:232
#define rb_usascii_str_new2
#define rb_enc_codepoint(p, e, enc)
int rb_toupper(int c)
Definition: encoding.c:1902
void Init_Encoding(void)
Definition: encoding.c:1835
ssize_t i
Definition: bigdecimal.c:5655
#define UChar
#define rb_check_frozen(obj)
static int str_to_encindex(VALUE enc)
Definition: encoding.c:178
#define rb_enc_name(enc)
#define ONIGENC_CTYPE_ALNUM
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:968
const char * nl_langinfo_codeset(void)
Definition: langinfo.c:64
VALUE rb_eTypeError
Definition: error.c:511
#define OBJ_FREEZE(x)
st_table * names
Definition: encoding.c:53
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1371
VALUE enc
Definition: tcltklib.c:10311
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
#define ONIGENC_CTYPE_LOWER
st_table * st_init_strcasetable(void)
Definition: st.c:296
int st_insert2(st_table *, st_data_t, st_data_t, st_data_t(*)(st_data_t))
int rb_usascii_encindex(void)
Definition: encoding.c:1190
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:787
static VALUE enc_names(VALUE self)
Definition: encoding.c:1028
#define RSTRING_PTR(str)
#define CLASS_OF(v)
NIL_P(eventloop_thread)
Definition: tcltklib.c:4068
int rb_isblank(int c)
Definition: encoding.c:1884
int safe
Definition: tcltklib.c:6404
static int enc_table_expand(int newsize)
Definition: encoding.c:216
VALUE rb_protect(VALUE(*proc)(VALUE), VALUE data, int *state)
Definition: eval.c:771
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:774
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:931
static VALUE enc_new(rb_encoding *encoding)
Definition: encoding.c:82
void * realloc()
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
#define ONIGENC_CTYPE_SPACE
unsigned int flags
Definition: ripper.y:179
return Qtrue
Definition: tcltklib.c:9610
void rb_enc_set_default_external(VALUE encoding)
Definition: encoding.c:1338
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:764
int rb_isupper(int c)
Definition: encoding.c:1892
#define T_FILE
static VALUE rb_enc_aliases(VALUE klass)
Definition: encoding.c:1625
static VALUE set_default_external(VALUE klass, VALUE encoding)
Definition: encoding.c:1361
int index
Definition: tcltklib.c:4478
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:884
int rb_isprint(int c)
Definition: encoding.c:1889
VALUE rb_locale_charmap(VALUE klass)
Definition: encoding.c:1477
VALUE enc_name
Definition: tcltklib.c:8445
const char * alias
Definition: nkf.c:1151
#define rb_enc_to_index(enc)
r
Definition: bigdecimal.c:1196
#define ENC_REGISTER(enc)
#define FL_UNTRUSTED
int rb_enc_registered(const char *name)
Definition: encoding.c:560
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e)
ID rb_id_encoding(void)
Definition: encoding.c:681
int rb_isdigit(int c)
Definition: encoding.c:1886
static int enc_alias_internal(const char *alias, int idx)
Definition: encoding.c:475
#define ISDIGIT(c)
#define ONIGENC_IS_ASCII_CODE(code)
int rb_filesystem_encindex(void)
Definition: encoding.c:1237
void rb_enc_init(void)
Definition: encoding.c:533
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1166
#define ONIGENC_CTYPE_CNTRL
VALUE VALUE args
Definition: tcltklib.c:2561
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1358
VALUE rb_ivar_get(VALUE, ID)
Definition: variable.c:1111
#define ENC_CODERANGE_ASCIIONLY(obj)
static VALUE enc_dummy_p(VALUE enc)
Definition: encoding.c:434
const char * name
Definition: ripper.y:163
static VALUE rb_enc_from_encoding_index(int idx)
Definition: encoding.c:88
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:962
#define ONIGENC_ASCII_CODE_TO_UPPER_CASE(c)
Definition: nkf.c:87
unsigned long st_data_t
Definition: ripper.y:35
#define ctype_test(c, ctype)
Definition: encoding.c:1879
void rb_encdb_set_unicode(int index)
Definition: encoding.c:517
static struct @4 enc_table
static int str_find_encindex(VALUE enc)
Definition: encoding.c:165
int rb_to_encoding_index(VALUE enc)
Definition: encoding.c:146
#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n)
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1286
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define ENCODING_INLINE_MAX
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
return Qfalse
Definition: tcltklib.c:6779
int rb_isxdigit(int c)
Definition: encoding.c:1893
#define Qnil
Definition: tcltklib.c:1896
#define STRCASECMP(s1, s2)
int rb_ispunct(int c)
Definition: encoding.c:1890
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:878
Definition: encoding.c:43
static VALUE char * str
Definition: tcltklib.c:3547
int rb_isspace(int c)
Definition: encoding.c:1891
int rb_typeddata_is_kind_of(VALUE obj, const rb_data_type_t *data_type)
Definition: error.c:473
VALUE rb_ary_replace(VALUE copy, VALUE orig)
Definition: array.c:3168
VALUE rb_ary_new(void)
Definition: array.c:424
#define StringValueCStr(v)
int rb_ascii8bit_encindex(void)
Definition: encoding.c:1160
unsigned long ID
Definition: ripper.y:105
VALUE rb_enc_default_external(void)
Definition: encoding.c:1300
#define ONIGENC_CTYPE_UPPER
void rb_define_const(VALUE, const char *, VALUE)
Definition: variable.c:2197
#define ISASCII(c)
Definition: ruby.h:1629
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:499
static VALUE VALUE obj
Definition: tcltklib.c:3158
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:739
int rb_enc_replicate(const char *name, rb_encoding *encoding)
Definition: encoding.c:343
int idx
Definition: tcltklib.c:9716
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
#define ENCODING_COUNT
Definition: encoding.c:58
#define ISALNUM(c)
Definition: ruby.h:1635
static void set_encoding_const(const char *, rb_encoding *)
Definition: encoding.c:1505
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:235
#define T_STRING
#define MBCLEN_CHARFOUND_P(ret)
static VALUE enc_dump(int argc, VALUE *argv, VALUE self)
Definition: encoding.c:1137
int rb_encdb_alias(const char *alias, const char *orig)
Definition: encoding.c:506
#define xmalloc
int rb_locale_encindex(void)
Definition: encoding.c:1196
static rb_encoding * str_to_encoding(VALUE enc)
Definition: encoding.c:188
#define TypedData_Wrap_Struct(klass, data_type, sval)
Tcl_Obj * enc_list
Definition: tcltklib.c:10154
#define ISUPPER(c)
Definition: ruby.h:1633
VALUE rb_enc_associate_index(VALUE obj, int idx)
Definition: encoding.c:746
VALUE rb_eEncCompatError
Definition: error.c:518
#define ISLOWER(c)
Definition: ruby.h:1634
#define ALLOCA_N(type, n)
const char * name
Definition: encoding.c:44
static int VALUE key
Definition: tkutil.c:265
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define ONIGENC_CTYPE_BLANK
int rb_isgraph(int c)
Definition: encoding.c:1887
static int enc_set_default_encoding(struct default_encoding *def, VALUE encoding, const char *name)
Definition: encoding.c:1259
rb_encoding * rb_find_encoding(VALUE enc)
Definition: encoding.c:201
VALUE * argv
Definition: tcltklib.c:1971
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:908
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
int st_foreach(st_table *, int(*)(ANYARGS), st_data_t)
Definition: st.c:1000
int rb_encdb_dummy(const char *name)
Definition: encoding.c:410
#define rb_enc_mbminlen(enc)
static int enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:266
#define TRUE
Definition: nkf.h:175
#define ENC_DUMMY_P(enc)
static int enc_check_encoding(VALUE obj)
Definition: encoding.c:126
VALUE rb_sprintf(const char *format,...)
Definition: sprintf.c:1270
#define StringValue(v)
#define RDATA(obj)
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:896
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define rb_isascii(c)
#define T_REGEXP
#define TOLOWER(c)
register char * s
Definition: os2.c:56
#define CONST_ID(var, str)
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:2980
#define strdup(s)
Definition: util.h:69
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1566
static int min(int a, int b)
Definition: strftime.c:131
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1181
int rb_encdb_replicate(const char *name, const char *orig)
Definition: encoding.c:388
#define ENCODING_NAMELEN_MAX
Definition: encoding.c:61
int rb_define_dummy_encoding(const char *name)
Definition: encoding.c:400
#define FL_TAINT
static struct default_encoding default_internal
Definition: encoding.c:1368
#define debug(x)
Definition: _sdbm.c:52
int argc
Definition: tcltklib.c:1970
static VALUE enc_compatible_p(VALUE klass, VALUE str1, VALUE str2)
Definition: encoding.c:1124
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1212
int rb_utf8_encindex(void)
Definition: encoding.c:1175
#define ENCODING_SET_INLINED(obj, i)
#define ONIGENC_IS_UNICODE(enc)
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:482
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:868
static int enc_autoload(rb_encoding *)
Definition: encoding.c:609
int rb_islower(int c)
Definition: encoding.c:1888
ruby_verbose
Definition: tcltklib.c:5818
static VALUE set_default_internal(VALUE klass, VALUE encoding)
Definition: encoding.c:1444
VpDivd * c
Definition: bigdecimal.c:1205
#define enc_autoload_p(enc)
Definition: encoding.c:64
#define ONIGENC_FLAG_UNICODE
#define MEMCPY(p1, p2, type, n)
static void enc_check_duplication(const char *name)
Definition: encoding.c:313
static size_t enc_memsize(const void *p)
Definition: encoding.c:69
VALUE rb_ivar_set(VALUE, ID, VALUE)
Definition: variable.c:1123
arg
Definition: ripper.y:1312
static ID id_encoding
Definition: encoding.c:39
static int enc_names_i(st_data_t name, st_data_t idx, st_data_t args)
Definition: encoding.c:1007
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:770
static VALUE get_default_external(VALUE klass)
Definition: encoding.c:1332
int size
Definition: encoding.c:52
static struct default_encoding default_external
Definition: encoding.c:1256
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
#define SYMBOL_P(x)
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:340
void rb_set_errinfo(VALUE err)
Definition: eval.c:1436
#define ONIGENC_MBC_ENC_LEN(enc, p, e)
static VALUE enc_replicate(VALUE encoding, VALUE name)
Definition: encoding.c:364
DATA_PTR(self)
rb_encoding * enc
Definition: encoding.c:1253
static int enc_set_filesystem_encoding(void)
Definition: encoding.c:1218
ruby_debug
Definition: tcltklib.c:5817
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1426
void rb_enc_set_default_internal(VALUE encoding)
Definition: encoding.c:1424
static VALUE enc_ascii_compatible_p(VALUE enc)
Definition: encoding.c:450
#define RBASIC(obj)
#define valid_encoding_name_p(name)
Definition: encoding.c:62
#define ONIGENC_CTYPE_DIGIT
int rb_enc_alias(const char *alias, const char *orig)
Definition: encoding.c:491
klass
Definition: tcltklib.c:3504
#define INT2NUM(x)
static VALUE require_enc(VALUE enclib)
Definition: encoding.c:573
#define is_data_encoding(obj)
Definition: encoding.c:79
struct rb_encoding_entry * list
Definition: encoding.c:50
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1246
static int enc_capable(VALUE obj)
Definition: encoding.c:665
static const rb_data_type_t encoding_data_type
Definition: encoding.c:74
int st_insert(st_table *, st_data_t, st_data_t)
static st_data_t enc_dup_name(st_data_t name)
Definition: encoding.c:465
int rb_isalnum(int c)
Definition: encoding.c:1882
#define ONIGENC_ASCII_CODE_TO_LOWER_CASE(c)
VALUE rb_enc_default_internal(void)
Definition: encoding.c:1380
VALUE rb_ary_new2(long capa)
Definition: array.c:417
static int check_encoding(rb_encoding *enc)
Definition: encoding.c:114
static VALUE get_default_internal(VALUE klass)
Definition: encoding.c:1418
#define rb_safe_level()
Definition: tcltklib.c:94
int rb_tolower(int c)
Definition: encoding.c:1896
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
static int enc_replicate_with_index(const char *name, rb_encoding *origenc, int idx)
Definition: encoding.c:372
VALUE rb_hash_new(void)
Definition: hash.c:234
#define BUILTIN_TYPE(x)
BDIGIT e
Definition: bigdecimal.c:5085
#define rb_enc_isascii(c, enc)
unsigned long VALUE
Definition: ripper.y:104
static int enc_alias(const char *alias, int idx)
Definition: encoding.c:482
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1151
void rb_warning(const char *fmt,...)
Definition: error.c:229
int rb_enc_find_index(const char *name)
Definition: encoding.c:633
int rb_iscntrl(int c)
Definition: encoding.c:1885
int rb_enc_register(const char *name, rb_encoding *encoding)
Definition: encoding.c:279
Definition: nkf.c:108
static VALUE rb_encoding_list
Definition: encoding.c:41
#define snprintf
#define SPECIAL_CONST_P(x)
void rb_encdb_declare(const char *name)
Definition: encoding.c:303
#define ONIGENC_CTYPE_PRINT
#define rb_intern(str)
void rb_gc_mark_encodings(void)
Definition: encoding.c:211
#define NULL
Definition: _sdbm.c:103
#define T_DATA
const char * name
Definition: nkf.c:208
#define UNSPECIFIED_ENCODING
Definition: encoding.c:59
VALUE rb_check_string_type(VALUE)
Definition: string.c:1508
rb_encoding OnigEncodingUTF_8
#define ENC_TO_ENCINDEX(enc)
int rb_enc_str_coderange(VALUE)
Definition: string.c:327
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1340
void rb_warn(const char *fmt,...)
Definition: error.c:216
#define SYM2ID(x)
rb_encoding * enc
Definition: encoding.c:45
VALUE rb_eArgError
Definition: error.c:512
static int load_encoding(const char *name)
Definition: encoding.c:580
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:657
STATIC void unsigned char * cp
Definition: crypt.c:307
#define FL_UNSET(x, f)
static VALUE enc_find(VALUE klass, VALUE enc)
Definition: encoding.c:1089
#define ENCODING_GET_INLINED(obj)
int rb_isalpha(int c)
Definition: encoding.c:1883
static int must_encoding(VALUE enc)
Definition: encoding.c:135
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
size_t len
Definition: tcltklib.c:3568
rb_encoding * base
Definition: encoding.c:46