Ruby  2.0.0p645(2015-04-13revision50299)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: usa $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_external_str_new_cstr
38 #undef rb_locale_str_new_cstr
39 #undef rb_str_new2
40 #undef rb_str_new3
41 #undef rb_str_new4
42 #undef rb_str_new5
43 #undef rb_tainted_str_new2
44 #undef rb_usascii_str_new2
45 #undef rb_str_dup_frozen
46 #undef rb_str_buf_new_cstr
47 #undef rb_str_buf_new2
48 #undef rb_str_buf_cat2
49 #undef rb_str_cat2
50 
51 static VALUE rb_str_clear(VALUE str);
52 
55 
56 #define RUBY_MAX_CHAR_LEN 16
57 #define STR_TMPLOCK FL_USER7
58 #define STR_NOEMBED FL_USER1
59 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */
60 #define STR_ASSOC FL_USER3
61 #define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
62 #define STR_ASSOC_P(s) FL_ALL((s), STR_NOEMBED|STR_ASSOC)
63 #define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC)
64 #define STR_NOCAPA_P(s) (FL_TEST((s),STR_NOEMBED) && FL_ANY((s),ELTS_SHARED|STR_ASSOC))
65 #define STR_UNSET_NOCAPA(s) do {\
66  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
67 } while (0)
68 
69 
70 #define STR_SET_NOEMBED(str) do {\
71  FL_SET((str), STR_NOEMBED);\
72  STR_SET_EMBED_LEN((str), 0);\
73 } while (0)
74 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
75 #define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
76 #define STR_SET_EMBED_LEN(str, n) do { \
77  long tmp_n = (n);\
78  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
79  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
80 } while (0)
81 
82 #define STR_SET_LEN(str, n) do { \
83  if (STR_EMBED_P(str)) {\
84  STR_SET_EMBED_LEN((str), (n));\
85  }\
86  else {\
87  RSTRING(str)->as.heap.len = (n);\
88  }\
89 } while (0)
90 
91 #define STR_DEC_LEN(str) do {\
92  if (STR_EMBED_P(str)) {\
93  long n = RSTRING_LEN(str);\
94  n--;\
95  STR_SET_EMBED_LEN((str), n);\
96  }\
97  else {\
98  RSTRING(str)->as.heap.len--;\
99  }\
100 } while (0)
101 
102 #define RESIZE_CAPA(str,capacity) do {\
103  if (STR_EMBED_P(str)) {\
104  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
105  char *tmp = ALLOC_N(char, (capacity)+1);\
106  memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
107  RSTRING(str)->as.heap.ptr = tmp;\
108  RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
109  STR_SET_NOEMBED(str);\
110  RSTRING(str)->as.heap.aux.capa = (capacity);\
111  }\
112  }\
113  else {\
114  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
115  if (!STR_NOCAPA_P(str))\
116  RSTRING(str)->as.heap.aux.capa = (capacity);\
117  }\
118 } while (0)
119 
120 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
121 #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
122 
123 #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
124 
125 static inline int
127 {
128  rb_encoding *enc;
129 
130  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
131  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
132  return 1;
133 
134  enc = STR_ENC_GET(str);
135  if (rb_enc_mbmaxlen(enc) == 1)
136  return 1;
137 
138  /* Conservative. Possibly single byte.
139  * "\xa1" in Shift_JIS for example. */
140  return 0;
141 }
142 
144 
145 static inline const char *
146 search_nonascii(const char *p, const char *e)
147 {
148 #if SIZEOF_VALUE == 8
149 # define NONASCII_MASK 0x8080808080808080ULL
150 #elif SIZEOF_VALUE == 4
151 # define NONASCII_MASK 0x80808080UL
152 #endif
153 #ifdef NONASCII_MASK
154  if ((int)sizeof(VALUE) * 2 < e - p) {
155  const VALUE *s, *t;
156  const VALUE lowbits = sizeof(VALUE) - 1;
157  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
158  while (p < (const char *)s) {
159  if (!ISASCII(*p))
160  return p;
161  p++;
162  }
163  t = (const VALUE*)(~lowbits & (VALUE)e);
164  while (s < t) {
165  if (*s & NONASCII_MASK) {
166  t = s;
167  break;
168  }
169  s++;
170  }
171  p = (const char *)t;
172  }
173 #endif
174  while (p < e) {
175  if (!ISASCII(*p))
176  return p;
177  p++;
178  }
179  return NULL;
180 }
181 
182 static int
183 coderange_scan(const char *p, long len, rb_encoding *enc)
184 {
185  const char *e = p + len;
186 
187  if (rb_enc_to_index(enc) == 0) {
188  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
189  p = search_nonascii(p, e);
191  }
192 
193  if (rb_enc_asciicompat(enc)) {
194  p = search_nonascii(p, e);
195  if (!p) {
196  return ENC_CODERANGE_7BIT;
197  }
198  while (p < e) {
199  int ret = rb_enc_precise_mbclen(p, e, enc);
200  if (!MBCLEN_CHARFOUND_P(ret)) {
201  return ENC_CODERANGE_BROKEN;
202  }
203  p += MBCLEN_CHARFOUND_LEN(ret);
204  if (p < e) {
205  p = search_nonascii(p, e);
206  if (!p) {
207  return ENC_CODERANGE_VALID;
208  }
209  }
210  }
211  if (e < p) {
212  return ENC_CODERANGE_BROKEN;
213  }
214  return ENC_CODERANGE_VALID;
215  }
216 
217  while (p < e) {
218  int ret = rb_enc_precise_mbclen(p, e, enc);
219 
220  if (!MBCLEN_CHARFOUND_P(ret)) {
221  return ENC_CODERANGE_BROKEN;
222  }
223  p += MBCLEN_CHARFOUND_LEN(ret);
224  }
225  if (e < p) {
226  return ENC_CODERANGE_BROKEN;
227  }
228  return ENC_CODERANGE_VALID;
229 }
230 
231 long
232 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
233 {
234  const char *p = s;
235 
236  if (*cr == ENC_CODERANGE_BROKEN)
237  return e - s;
238 
239  if (rb_enc_to_index(enc) == 0) {
240  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
241  p = search_nonascii(p, e);
243  return e - s;
244  }
245  else if (rb_enc_asciicompat(enc)) {
246  p = search_nonascii(p, e);
247  if (!p) {
248  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
249  return e - s;
250  }
251  while (p < e) {
252  int ret = rb_enc_precise_mbclen(p, e, enc);
253  if (!MBCLEN_CHARFOUND_P(ret)) {
255  return p - s;
256  }
257  p += MBCLEN_CHARFOUND_LEN(ret);
258  if (p < e) {
259  p = search_nonascii(p, e);
260  if (!p) {
261  *cr = ENC_CODERANGE_VALID;
262  return e - s;
263  }
264  }
265  }
267  return p - s;
268  }
269  else {
270  while (p < e) {
271  int ret = rb_enc_precise_mbclen(p, e, enc);
272  if (!MBCLEN_CHARFOUND_P(ret)) {
274  return p - s;
275  }
276  p += MBCLEN_CHARFOUND_LEN(ret);
277  }
279  return p - s;
280  }
281 }
282 
283 static inline void
285 {
286  rb_enc_set_index(str1, ENCODING_GET(str2));
287 }
288 
289 static void
291 {
292  /* this function is designed for copying encoding and coderange
293  * from src to new string "dest" which is made from the part of src.
294  */
295  str_enc_copy(dest, src);
296  if (RSTRING_LEN(dest) == 0) {
297  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
299  else
301  return;
302  }
303  switch (ENC_CODERANGE(src)) {
304  case ENC_CODERANGE_7BIT:
306  break;
307  case ENC_CODERANGE_VALID:
308  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
311  else
313  break;
314  default:
315  break;
316  }
317 }
318 
319 static void
321 {
322  str_enc_copy(dest, src);
323  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
324 }
325 
326 int
328 {
329  int cr = ENC_CODERANGE(str);
330 
331  if (cr == ENC_CODERANGE_UNKNOWN) {
332  rb_encoding *enc = STR_ENC_GET(str);
333  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
334  ENC_CODERANGE_SET(str, cr);
335  }
336  return cr;
337 }
338 
339 int
341 {
342  rb_encoding *enc = STR_ENC_GET(str);
343 
344  if (!rb_enc_asciicompat(enc))
345  return FALSE;
346  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
347  return TRUE;
348  return FALSE;
349 }
350 
351 static inline void
352 str_mod_check(VALUE s, const char *p, long len)
353 {
354  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
355  rb_raise(rb_eRuntimeError, "string modified");
356  }
357 }
358 
359 size_t
361 {
362  if (STR_EMBED_P(str)) {
363  return RSTRING_EMBED_LEN_MAX;
364  }
365  else if (STR_NOCAPA_P(str)) {
366  return RSTRING(str)->as.heap.len;
367  }
368  else {
369  return RSTRING(str)->as.heap.aux.capa;
370  }
371 }
372 
373 static inline VALUE
375 {
376  NEWOBJ_OF(str, struct RString, klass, T_STRING);
377 
378  str->as.heap.ptr = 0;
379  str->as.heap.len = 0;
380  str->as.heap.aux.capa = 0;
381 
382  return (VALUE)str;
383 }
384 
385 static inline VALUE
387 {
390  }
391  return str_alloc(klass);
392 }
393 
394 static VALUE
395 str_new(VALUE klass, const char *ptr, long len)
396 {
397  VALUE str;
398 
399  if (len < 0) {
400  rb_raise(rb_eArgError, "negative string size (or size too big)");
401  }
402 
405  }
406 
407  str = str_alloc(klass);
408  if (len > RSTRING_EMBED_LEN_MAX) {
409  RSTRING(str)->as.heap.aux.capa = len;
410  RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
411  STR_SET_NOEMBED(str);
412  }
413  else if (len == 0) {
415  }
416  if (ptr) {
417  memcpy(RSTRING_PTR(str), ptr, len);
418  }
419  STR_SET_LEN(str, len);
420  RSTRING_PTR(str)[len] = '\0';
421  return str;
422 }
423 
424 VALUE
425 rb_str_new(const char *ptr, long len)
426 {
427  return str_new(rb_cString, ptr, len);
428 }
429 
430 VALUE
431 rb_usascii_str_new(const char *ptr, long len)
432 {
433  VALUE str = rb_str_new(ptr, len);
435  return str;
436 }
437 
438 VALUE
439 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
440 {
441  VALUE str = rb_str_new(ptr, len);
442  rb_enc_associate(str, enc);
443  return str;
444 }
445 
446 VALUE
447 rb_str_new_cstr(const char *ptr)
448 {
449  if (!ptr) {
450  rb_raise(rb_eArgError, "NULL pointer given");
451  }
452  return rb_str_new(ptr, strlen(ptr));
453 }
454 
456 #define rb_str_new2 rb_str_new_cstr
457 
458 VALUE
459 rb_usascii_str_new_cstr(const char *ptr)
460 {
461  VALUE str = rb_str_new2(ptr);
463  return str;
464 }
465 
467 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
468 
469 VALUE
470 rb_tainted_str_new(const char *ptr, long len)
471 {
472  VALUE str = rb_str_new(ptr, len);
473 
474  OBJ_TAINT(str);
475  return str;
476 }
477 
478 VALUE
479 rb_tainted_str_new_cstr(const char *ptr)
480 {
481  VALUE str = rb_str_new2(ptr);
482 
483  OBJ_TAINT(str);
484  return str;
485 }
486 
488 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
489 
490 VALUE
492 {
494  rb_econv_t *ec;
496  long len, olen;
497  VALUE econv_wrapper;
498  VALUE newstr;
499  const unsigned char *start, *sp;
500  unsigned char *dest, *dp;
501  size_t converted_output = 0;
502 
503  if (!to) return str;
504  if (!from) from = rb_enc_get(str);
505  if (from == to) return str;
506  if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
507  to == rb_ascii8bit_encoding()) {
508  if (STR_ENC_GET(str) != to) {
509  str = rb_str_dup(str);
510  rb_enc_associate(str, to);
511  }
512  return str;
513  }
514 
515  len = RSTRING_LEN(str);
516  newstr = rb_str_new(0, len);
517  olen = len;
518 
519  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
520  RBASIC(econv_wrapper)->klass = 0;
521  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
522  if (!ec) return str;
523  DATA_PTR(econv_wrapper) = ec;
524 
525  sp = (unsigned char*)RSTRING_PTR(str);
526  start = sp;
527  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
528  (dp = dest + converted_output),
529  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
531  /* destination buffer short */
532  size_t converted_input = sp - start;
533  size_t rest = len - converted_input;
534  converted_output = dp - dest;
535  rb_str_set_len(newstr, converted_output);
536  if (converted_input && converted_output &&
537  rest < (LONG_MAX / converted_output)) {
538  rest = (rest * converted_output) / converted_input;
539  }
540  else {
541  rest = olen;
542  }
543  olen += rest < 2 ? 2 : rest;
544  rb_str_resize(newstr, olen);
545  }
546  DATA_PTR(econv_wrapper) = 0;
547  rb_econv_close(ec);
548  rb_gc_force_recycle(econv_wrapper);
549  switch (ret) {
550  case econv_finished:
551  len = dp - (unsigned char*)RSTRING_PTR(newstr);
552  rb_str_set_len(newstr, len);
553  rb_enc_associate(newstr, to);
554  return newstr;
555 
556  default:
557  /* some error, return original */
558  return str;
559  }
560 }
561 
562 VALUE
564 {
565  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
566 }
567 
568 VALUE
569 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
570 {
571  VALUE str;
572 
573  str = rb_tainted_str_new(ptr, len);
574  if (eenc == rb_usascii_encoding() &&
577  return str;
578  }
579  rb_enc_associate(str, eenc);
580  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
581 }
582 
583 VALUE
584 rb_external_str_new(const char *ptr, long len)
585 {
587 }
588 
589 VALUE
590 rb_external_str_new_cstr(const char *ptr)
591 {
593 }
594 
595 VALUE
596 rb_locale_str_new(const char *ptr, long len)
597 {
599 }
600 
601 VALUE
602 rb_locale_str_new_cstr(const char *ptr)
603 {
605 }
606 
607 VALUE
608 rb_filesystem_str_new(const char *ptr, long len)
609 {
611 }
612 
613 VALUE
615 {
617 }
618 
619 VALUE
621 {
623 }
624 
625 VALUE
627 {
628  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
629 }
630 
631 VALUE
633 {
634  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
635 }
636 
637 static VALUE
639 {
640  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
641  STR_SET_EMBED(str2);
642  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
643  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
644  }
645  else {
646  str = rb_str_new_frozen(str);
647  FL_SET(str2, STR_NOEMBED);
648  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
649  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
650  RSTRING(str2)->as.heap.aux.shared = str;
651  FL_SET(str2, ELTS_SHARED);
652  }
653  return str2;
654 }
655 
656 static VALUE
658 {
660  rb_enc_cr_str_exact_copy(str2, str);
661  return str2;
662 }
663 
664 static VALUE
666 {
667  return str_replace_shared(str_alloc(klass), str);
668 }
669 
670 static VALUE
672 {
673  return str_new_shared(klass, str);
674 }
675 
676 VALUE
678 {
679  VALUE str2 = str_new3(rb_obj_class(str), str);
680 
681  OBJ_INFECT(str2, str);
682  return str2;
683 }
684 
686 #define rb_str_new3 rb_str_new_shared
687 
688 static VALUE
689 str_new4(VALUE klass, VALUE str)
690 {
691  VALUE str2;
692 
693  str2 = str_alloc(klass);
694  STR_SET_NOEMBED(str2);
695  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
696  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
697  if (STR_SHARED_P(str)) {
698  VALUE shared = RSTRING(str)->as.heap.aux.shared;
699  assert(OBJ_FROZEN(shared));
700  FL_SET(str2, ELTS_SHARED);
701  RSTRING(str2)->as.heap.aux.shared = shared;
702  }
703  else {
704  FL_SET(str, ELTS_SHARED);
705  RSTRING(str)->as.heap.aux.shared = str2;
706  }
707  rb_enc_cr_str_exact_copy(str2, str);
708  OBJ_INFECT(str2, str);
709  return str2;
710 }
711 
712 VALUE
714 {
715  VALUE klass, str;
716 
717  if (OBJ_FROZEN(orig)) return orig;
718  klass = rb_obj_class(orig);
719  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
720  long ofs;
721  assert(OBJ_FROZEN(str));
722  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
723  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
724  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & (FL_TAINT|FL_UNTRUSTED)) ||
725  ENCODING_GET(str) != ENCODING_GET(orig)) {
726  str = str_new3(klass, str);
727  RSTRING(str)->as.heap.ptr += ofs;
728  RSTRING(str)->as.heap.len -= ofs;
729  rb_enc_cr_str_exact_copy(str, orig);
730  OBJ_INFECT(str, orig);
731  }
732  }
733  else if (STR_EMBED_P(orig)) {
734  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
735  rb_enc_cr_str_exact_copy(str, orig);
736  OBJ_INFECT(str, orig);
737  }
738  else if (STR_ASSOC_P(orig)) {
739  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
740  FL_UNSET(orig, STR_ASSOC);
741  str = str_new4(klass, orig);
742  FL_SET(str, STR_ASSOC);
743  RSTRING(str)->as.heap.aux.shared = assoc;
744  }
745  else {
746  str = str_new4(klass, orig);
747  }
748  OBJ_FREEZE(str);
749  return str;
750 }
751 
753 #define rb_str_new4 rb_str_new_frozen
754 
755 VALUE
756 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
757 {
758  return str_new(rb_obj_class(obj), ptr, len);
759 }
760 
761 RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len),
762  rb_str_new_with_class, (obj, ptr, len))
763 #define rb_str_new5 rb_str_new_with_class
764 
765 static VALUE
766 str_new_empty(VALUE str)
767 {
768  VALUE v = rb_str_new5(str, 0, 0);
769  rb_enc_copy(v, str);
770  OBJ_INFECT(v, str);
771  return v;
772 }
773 
774 #define STR_BUF_MIN_SIZE 128
775 
776 VALUE
777 rb_str_buf_new(long capa)
778 {
779  VALUE str = str_alloc(rb_cString);
780 
781  if (capa < STR_BUF_MIN_SIZE) {
782  capa = STR_BUF_MIN_SIZE;
783  }
784  FL_SET(str, STR_NOEMBED);
785  RSTRING(str)->as.heap.aux.capa = capa;
786  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
787  RSTRING(str)->as.heap.ptr[0] = '\0';
788 
789  return str;
790 }
791 
792 VALUE
793 rb_str_buf_new_cstr(const char *ptr)
794 {
795  VALUE str;
796  long len = strlen(ptr);
797 
798  str = rb_str_buf_new(len);
799  rb_str_buf_cat(str, ptr, len);
800 
801  return str;
802 }
803 
805 #define rb_str_buf_new2 rb_str_buf_new_cstr
806 
807 VALUE
808 rb_str_tmp_new(long len)
809 {
810  return str_new(0, 0, len);
811 }
812 
813 void *
814 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
815 {
816  VALUE s = rb_str_tmp_new(len);
817  *store = s;
818  return RSTRING_PTR(s);
819 }
820 
821 void
822 rb_free_tmp_buffer(volatile VALUE *store)
823 {
824  VALUE s = *store;
825  *store = 0;
826  if (s) rb_str_clear(s);
827 }
828 
829 void
831 {
832  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
833  xfree(RSTRING(str)->as.heap.ptr);
834  }
835 }
836 
837 RUBY_FUNC_EXPORTED size_t
839 {
840  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
841  return RSTRING(str)->as.heap.aux.capa + 1; /* termlen */
842  }
843  else {
844  return 0;
845  }
846 }
847 
848 VALUE
850 {
851  return rb_convert_type(str, T_STRING, "String", "to_str");
852 }
853 
854 static inline void str_discard(VALUE str);
855 
856 void
858 {
859  rb_encoding *enc;
860  int cr;
861  if (str == str2) return;
862  enc = STR_ENC_GET(str2);
863  cr = ENC_CODERANGE(str2);
864  str_discard(str);
865  OBJ_INFECT(str, str2);
866  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
867  STR_SET_EMBED(str);
868  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
869  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
870  rb_enc_associate(str, enc);
871  ENC_CODERANGE_SET(str, cr);
872  return;
873  }
874  STR_SET_NOEMBED(str);
875  STR_UNSET_NOCAPA(str);
876  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
877  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
878  if (STR_NOCAPA_P(str2)) {
879  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
880  RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
881  }
882  else {
883  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
884  }
885  STR_SET_EMBED(str2); /* abandon str2 */
886  RSTRING_PTR(str2)[0] = 0;
887  STR_SET_EMBED_LEN(str2, 0);
888  rb_enc_associate(str, enc);
889  ENC_CODERANGE_SET(str, cr);
890 }
891 
892 static ID id_to_s;
893 
894 VALUE
896 {
897  VALUE str;
898 
899  if (RB_TYPE_P(obj, T_STRING)) {
900  return obj;
901  }
902  str = rb_funcall(obj, id_to_s, 0);
903  if (!RB_TYPE_P(str, T_STRING))
904  return rb_any_to_s(obj);
905  if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
906  return str;
907 }
908 
909 static VALUE
911 {
912  long len;
913 
914  len = RSTRING_LEN(str2);
915  if (STR_ASSOC_P(str2)) {
916  str2 = rb_str_new4(str2);
917  }
918  if (STR_SHARED_P(str2)) {
919  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
920  assert(OBJ_FROZEN(shared));
921  STR_SET_NOEMBED(str);
922  RSTRING(str)->as.heap.len = len;
923  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
924  FL_SET(str, ELTS_SHARED);
925  FL_UNSET(str, STR_ASSOC);
926  RSTRING(str)->as.heap.aux.shared = shared;
927  }
928  else {
929  str_replace_shared(str, str2);
930  }
931 
932  OBJ_INFECT(str, str2);
933  rb_enc_cr_str_exact_copy(str, str2);
934  return str;
935 }
936 
937 static VALUE
939 {
940  VALUE dup = str_alloc(klass);
941  str_replace(dup, str);
942  return dup;
943 }
944 
945 VALUE
947 {
948  return str_duplicate(rb_obj_class(str), str);
949 }
950 
951 VALUE
953 {
957  }
958  return str_replace(str_alloc(rb_cString), str);
959 }
960 
961 /*
962  * call-seq:
963  * String.new(str="") -> new_str
964  *
965  * Returns a new string object containing a copy of <i>str</i>.
966  */
967 
968 static VALUE
970 {
971  VALUE orig;
972 
973  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
974  rb_str_replace(str, orig);
975  return str;
976 }
977 
978 static inline long
979 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
980 {
981  long c;
982  const char *q;
983 
984  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
985  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
986  }
987  else if (rb_enc_asciicompat(enc)) {
988  c = 0;
989  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
990  while (p < e) {
991  if (ISASCII(*p)) {
992  q = search_nonascii(p, e);
993  if (!q)
994  return c + (e - p);
995  c += q - p;
996  p = q;
997  }
998  p += rb_enc_fast_mbclen(p, e, enc);
999  c++;
1000  }
1001  }
1002  else {
1003  while (p < e) {
1004  if (ISASCII(*p)) {
1005  q = search_nonascii(p, e);
1006  if (!q)
1007  return c + (e - p);
1008  c += q - p;
1009  p = q;
1010  }
1011  p += rb_enc_mbclen(p, e, enc);
1012  c++;
1013  }
1014  }
1015  return c;
1016  }
1017 
1018  for (c=0; p<e; c++) {
1019  p += rb_enc_mbclen(p, e, enc);
1020  }
1021  return c;
1022 }
1023 
1024 long
1025 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1026 {
1027  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1028 }
1029 
1030 long
1031 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1032 {
1033  long c;
1034  const char *q;
1035  int ret;
1036 
1037  *cr = 0;
1038  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1039  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1040  }
1041  else if (rb_enc_asciicompat(enc)) {
1042  c = 0;
1043  while (p < e) {
1044  if (ISASCII(*p)) {
1045  q = search_nonascii(p, e);
1046  if (!q) {
1047  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1048  return c + (e - p);
1049  }
1050  c += q - p;
1051  p = q;
1052  }
1053  ret = rb_enc_precise_mbclen(p, e, enc);
1054  if (MBCLEN_CHARFOUND_P(ret)) {
1055  *cr |= ENC_CODERANGE_VALID;
1056  p += MBCLEN_CHARFOUND_LEN(ret);
1057  }
1058  else {
1059  *cr = ENC_CODERANGE_BROKEN;
1060  p++;
1061  }
1062  c++;
1063  }
1064  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1065  return c;
1066  }
1067 
1068  for (c=0; p<e; c++) {
1069  ret = rb_enc_precise_mbclen(p, e, enc);
1070  if (MBCLEN_CHARFOUND_P(ret)) {
1071  *cr |= ENC_CODERANGE_VALID;
1072  p += MBCLEN_CHARFOUND_LEN(ret);
1073  }
1074  else {
1075  *cr = ENC_CODERANGE_BROKEN;
1076  if (p + rb_enc_mbminlen(enc) <= e)
1077  p += rb_enc_mbminlen(enc);
1078  else
1079  p = e;
1080  }
1081  }
1082  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1083  return c;
1084 }
1085 
1086 #ifdef NONASCII_MASK
1087 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1088 
1089 /*
1090  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1091  * bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
1092  * Therefore, following pseudo code can detect UTF-8 leading byte.
1093  *
1094  * if (!(byte & 0x80))
1095  * byte |= 0x40; // turn on bit6
1096  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1097  *
1098  * This function calculate every bytes in the argument word `s'
1099  * using the above logic concurrently. and gather every bytes result.
1100  */
1101 static inline VALUE
1102 count_utf8_lead_bytes_with_word(const VALUE *s)
1103 {
1104  VALUE d = *s;
1105 
1106  /* Transform into bit0 represent UTF-8 leading or not. */
1107  d |= ~(d>>1);
1108  d >>= 6;
1109  d &= NONASCII_MASK >> 7;
1110 
1111  /* Gather every bytes. */
1112  d += (d>>8);
1113  d += (d>>16);
1114 #if SIZEOF_VALUE == 8
1115  d += (d>>32);
1116 #endif
1117  return (d&0xF);
1118 }
1119 #endif
1120 
1121 static long
1123 {
1124  const char *p, *e;
1125  long n;
1126  int cr;
1127 
1128  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1129  if (!enc) enc = STR_ENC_GET(str);
1130  p = RSTRING_PTR(str);
1131  e = RSTRING_END(str);
1132  cr = ENC_CODERANGE(str);
1133 #ifdef NONASCII_MASK
1134  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1135  enc == rb_utf8_encoding()) {
1136 
1137  VALUE len = 0;
1138  if ((int)sizeof(VALUE) * 2 < e - p) {
1139  const VALUE *s, *t;
1140  const VALUE lowbits = sizeof(VALUE) - 1;
1141  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1142  t = (const VALUE*)(~lowbits & (VALUE)e);
1143  while (p < (const char *)s) {
1144  if (is_utf8_lead_byte(*p)) len++;
1145  p++;
1146  }
1147  while (s < t) {
1148  len += count_utf8_lead_bytes_with_word(s);
1149  s++;
1150  }
1151  p = (const char *)s;
1152  }
1153  while (p < e) {
1154  if (is_utf8_lead_byte(*p)) len++;
1155  p++;
1156  }
1157  return (long)len;
1158  }
1159 #endif
1160  n = rb_enc_strlen_cr(p, e, enc, &cr);
1161  if (cr) {
1162  ENC_CODERANGE_SET(str, cr);
1163  }
1164  return n;
1165 }
1166 
1167 long
1169 {
1170  return str_strlen(str, STR_ENC_GET(str));
1171 }
1172 
1173 /*
1174  * call-seq:
1175  * str.length -> integer
1176  * str.size -> integer
1177  *
1178  * Returns the character length of <i>str</i>.
1179  */
1180 
1181 VALUE
1183 {
1184  long len;
1185 
1186  len = str_strlen(str, STR_ENC_GET(str));
1187  return LONG2NUM(len);
1188 }
1189 
1190 /*
1191  * call-seq:
1192  * str.bytesize -> integer
1193  *
1194  * Returns the length of +str+ in bytes.
1195  *
1196  * "\x80\u3042".bytesize #=> 4
1197  * "hello".bytesize #=> 5
1198  */
1199 
1200 static VALUE
1202 {
1203  return LONG2NUM(RSTRING_LEN(str));
1204 }
1205 
1206 /*
1207  * call-seq:
1208  * str.empty? -> true or false
1209  *
1210  * Returns <code>true</code> if <i>str</i> has a length of zero.
1211  *
1212  * "hello".empty? #=> false
1213  * " ".empty? #=> false
1214  * "".empty? #=> true
1215  */
1216 
1217 static VALUE
1219 {
1220  if (RSTRING_LEN(str) == 0)
1221  return Qtrue;
1222  return Qfalse;
1223 }
1224 
1225 /*
1226  * call-seq:
1227  * str + other_str -> new_str
1228  *
1229  * Concatenation---Returns a new <code>String</code> containing
1230  * <i>other_str</i> concatenated to <i>str</i>.
1231  *
1232  * "Hello from " + self.to_s #=> "Hello from main"
1233  */
1234 
1235 VALUE
1237 {
1238  VALUE str3;
1239  rb_encoding *enc;
1240 
1241  StringValue(str2);
1242  enc = rb_enc_check(str1, str2);
1243  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1244  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1245  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1246  RSTRING_PTR(str2), RSTRING_LEN(str2));
1247  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1248 
1249  if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1250  OBJ_TAINT(str3);
1253  return str3;
1254 }
1255 
1256 /*
1257  * call-seq:
1258  * str * integer -> new_str
1259  *
1260  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1261  * +integer+ must be greater than or equal to 0.
1262  *
1263  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1264  * "Ho! " * 0 #=> ""
1265  */
1266 
1267 VALUE
1269 {
1270  VALUE str2;
1271  long n, len;
1272  char *ptr2;
1273 
1274  len = NUM2LONG(times);
1275  if (len < 0) {
1276  rb_raise(rb_eArgError, "negative argument");
1277  }
1278  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1279  rb_raise(rb_eArgError, "argument too big");
1280  }
1281 
1282  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1283  ptr2 = RSTRING_PTR(str2);
1284  if (len) {
1285  n = RSTRING_LEN(str);
1286  memcpy(ptr2, RSTRING_PTR(str), n);
1287  while (n <= len/2) {
1288  memcpy(ptr2 + n, ptr2, n);
1289  n *= 2;
1290  }
1291  memcpy(ptr2 + n, ptr2, len-n);
1292  }
1293  ptr2[RSTRING_LEN(str2)] = '\0';
1294  OBJ_INFECT(str2, str);
1295  rb_enc_cr_str_copy_for_substr(str2, str);
1296 
1297  return str2;
1298 }
1299 
1300 /*
1301  * call-seq:
1302  * str % arg -> new_str
1303  *
1304  * Format---Uses <i>str</i> as a format specification, and returns the result
1305  * of applying it to <i>arg</i>. If the format specification contains more than
1306  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1307  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1308  * details of the format string.
1309  *
1310  * "%05d" % 123 #=> "00123"
1311  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1312  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1313  */
1314 
1315 static VALUE
1317 {
1318  volatile VALUE tmp = rb_check_array_type(arg);
1319 
1320  if (!NIL_P(tmp)) {
1321  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1322  }
1323  return rb_str_format(1, &arg, str);
1324 }
1325 
1326 static inline void
1328 {
1329  if (FL_TEST(str, STR_TMPLOCK)) {
1330  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1331  }
1332  rb_check_frozen(str);
1333  if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4)
1334  rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1335 }
1336 
1337 static inline int
1339 {
1340  str_modifiable(str);
1341  if (!STR_SHARED_P(str)) return 1;
1342  if (STR_EMBED_P(str)) return 1;
1343  return 0;
1344 }
1345 
1346 static void
1348 {
1349  char *ptr;
1350  long len = RSTRING_LEN(str);
1351  long capa = len + expand;
1352 
1353  if (len > capa) len = capa;
1354  ptr = ALLOC_N(char, capa + 1);
1355  if (RSTRING_PTR(str)) {
1356  memcpy(ptr, RSTRING_PTR(str), len);
1357  }
1358  STR_SET_NOEMBED(str);
1359  STR_UNSET_NOCAPA(str);
1360  ptr[len] = 0;
1361  RSTRING(str)->as.heap.ptr = ptr;
1362  RSTRING(str)->as.heap.len = len;
1363  RSTRING(str)->as.heap.aux.capa = capa;
1364 }
1365 
1366 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1367 
1368 void
1370 {
1371  if (!str_independent(str))
1372  str_make_independent(str);
1373  ENC_CODERANGE_CLEAR(str);
1374 }
1375 
1376 void
1377 rb_str_modify_expand(VALUE str, long expand)
1378 {
1379  if (expand < 0) {
1380  rb_raise(rb_eArgError, "negative expanding string size");
1381  }
1382  if (!str_independent(str)) {
1383  str_make_independent_expand(str, expand);
1384  }
1385  else if (expand > 0) {
1386  long len = RSTRING_LEN(str);
1387  long capa = len + expand;
1388  if (!STR_EMBED_P(str)) {
1389  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1390  STR_UNSET_NOCAPA(str);
1391  RSTRING(str)->as.heap.aux.capa = capa;
1392  }
1393  else if (capa > RSTRING_EMBED_LEN_MAX) {
1394  str_make_independent_expand(str, expand);
1395  }
1396  }
1397  ENC_CODERANGE_CLEAR(str);
1398 }
1399 
1400 /* As rb_str_modify(), but don't clear coderange */
1401 static void
1403 {
1404  if (!str_independent(str))
1405  str_make_independent(str);
1406  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1407  /* Force re-scan later */
1408  ENC_CODERANGE_CLEAR(str);
1409 }
1410 
1411 static inline void
1413 {
1414  str_modifiable(str);
1415  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1416  xfree(RSTRING_PTR(str));
1417  RSTRING(str)->as.heap.ptr = 0;
1418  RSTRING(str)->as.heap.len = 0;
1419  }
1420 }
1421 
1422 void
1424 {
1425  /* sanity check */
1426  rb_check_frozen(str);
1427  if (STR_ASSOC_P(str)) {
1428  /* already associated */
1429  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1430  }
1431  else {
1432  if (STR_SHARED_P(str)) {
1433  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1434  str_make_independent(str);
1435  if (STR_ASSOC_P(assoc)) {
1436  assoc = RSTRING(assoc)->as.heap.aux.shared;
1437  rb_ary_concat(assoc, add);
1438  add = assoc;
1439  }
1440  }
1441  else if (STR_EMBED_P(str)) {
1442  str_make_independent(str);
1443  }
1444  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1445  RESIZE_CAPA(str, RSTRING_LEN(str));
1446  }
1447  FL_SET(str, STR_ASSOC);
1448  RBASIC(add)->klass = 0;
1449  RSTRING(str)->as.heap.aux.shared = add;
1450  }
1451 }
1452 
1453 VALUE
1455 {
1456  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1457  if (STR_ASSOC_P(str)) {
1458  return RSTRING(str)->as.heap.aux.shared;
1459  }
1460  return Qfalse;
1461 }
1462 
1463 void
1465 {
1466  rb_encoding *enc = rb_enc_get(str);
1467  if (!rb_enc_asciicompat(enc)) {
1468  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1469  }
1470 }
1471 
1472 VALUE
1473 rb_string_value(volatile VALUE *ptr)
1474 {
1475  VALUE s = *ptr;
1476  if (!RB_TYPE_P(s, T_STRING)) {
1477  s = rb_str_to_str(s);
1478  *ptr = s;
1479  }
1480  return s;
1481 }
1482 
1483 char *
1485 {
1486  VALUE str = rb_string_value(ptr);
1487  return RSTRING_PTR(str);
1488 }
1489 
1490 char *
1492 {
1493  VALUE str = rb_string_value(ptr);
1494  char *s = RSTRING_PTR(str);
1495  long len = RSTRING_LEN(str);
1496 
1497  if (!s || memchr(s, 0, len)) {
1498  rb_raise(rb_eArgError, "string contains null byte");
1499  }
1500  if (s[len]) {
1501  rb_str_modify(str);
1502  s = RSTRING_PTR(str);
1503  s[RSTRING_LEN(str)] = 0;
1504  }
1505  return s;
1506 }
1507 
1508 VALUE
1510 {
1511  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1512  return str;
1513 }
1514 
1515 /*
1516  * call-seq:
1517  * String.try_convert(obj) -> string or nil
1518  *
1519  * Try to convert <i>obj</i> into a String, using to_str method.
1520  * Returns converted string or nil if <i>obj</i> cannot be converted
1521  * for any reason.
1522  *
1523  * String.try_convert("str") #=> "str"
1524  * String.try_convert(/re/) #=> nil
1525  */
1526 static VALUE
1528 {
1529  return rb_check_string_type(str);
1530 }
1531 
1532 static char*
1533 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1534 {
1535  long nth = *nthp;
1536  if (rb_enc_mbmaxlen(enc) == 1) {
1537  p += nth;
1538  }
1539  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1540  p += nth * rb_enc_mbmaxlen(enc);
1541  }
1542  else if (rb_enc_asciicompat(enc)) {
1543  const char *p2, *e2;
1544  int n;
1545 
1546  while (p < e && 0 < nth) {
1547  e2 = p + nth;
1548  if (e < e2) {
1549  *nthp = nth;
1550  return (char *)e;
1551  }
1552  if (ISASCII(*p)) {
1553  p2 = search_nonascii(p, e2);
1554  if (!p2) {
1555  nth -= e2 - p;
1556  *nthp = nth;
1557  return (char *)e2;
1558  }
1559  nth -= p2 - p;
1560  p = p2;
1561  }
1562  n = rb_enc_mbclen(p, e, enc);
1563  p += n;
1564  nth--;
1565  }
1566  *nthp = nth;
1567  if (nth != 0) {
1568  return (char *)e;
1569  }
1570  return (char *)p;
1571  }
1572  else {
1573  while (p < e && nth--) {
1574  p += rb_enc_mbclen(p, e, enc);
1575  }
1576  }
1577  if (p > e) p = e;
1578  *nthp = nth;
1579  return (char*)p;
1580 }
1581 
1582 char*
1583 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1584 {
1585  return str_nth_len(p, e, &nth, enc);
1586 }
1587 
1588 static char*
1589 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1590 {
1591  if (singlebyte)
1592  p += nth;
1593  else {
1594  p = str_nth_len(p, e, &nth, enc);
1595  }
1596  if (!p) return 0;
1597  if (p > e) p = e;
1598  return (char *)p;
1599 }
1600 
1601 /* char offset to byte offset */
1602 static long
1603 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1604 {
1605  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1606  if (!pp) return e - p;
1607  return pp - p;
1608 }
1609 
1610 long
1611 rb_str_offset(VALUE str, long pos)
1612 {
1613  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1615 }
1616 
1617 #ifdef NONASCII_MASK
1618 static char *
1619 str_utf8_nth(const char *p, const char *e, long *nthp)
1620 {
1621  long nth = *nthp;
1622  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1623  const VALUE *s, *t;
1624  const VALUE lowbits = sizeof(VALUE) - 1;
1625  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1626  t = (const VALUE*)(~lowbits & (VALUE)e);
1627  while (p < (const char *)s) {
1628  if (is_utf8_lead_byte(*p)) nth--;
1629  p++;
1630  }
1631  do {
1632  nth -= count_utf8_lead_bytes_with_word(s);
1633  s++;
1634  } while (s < t && (int)sizeof(VALUE) <= nth);
1635  p = (char *)s;
1636  }
1637  while (p < e) {
1638  if (is_utf8_lead_byte(*p)) {
1639  if (nth == 0) break;
1640  nth--;
1641  }
1642  p++;
1643  }
1644  *nthp = nth;
1645  return (char *)p;
1646 }
1647 
1648 static long
1649 str_utf8_offset(const char *p, const char *e, long nth)
1650 {
1651  const char *pp = str_utf8_nth(p, e, &nth);
1652  return pp - p;
1653 }
1654 #endif
1655 
1656 /* byte offset to char offset */
1657 long
1658 rb_str_sublen(VALUE str, long pos)
1659 {
1660  if (single_byte_optimizable(str) || pos < 0)
1661  return pos;
1662  else {
1663  char *p = RSTRING_PTR(str);
1664  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1665  }
1666 }
1667 
1668 VALUE
1669 rb_str_subseq(VALUE str, long beg, long len)
1670 {
1671  VALUE str2;
1672 
1673  if (RSTRING_LEN(str) == beg + len &&
1674  RSTRING_EMBED_LEN_MAX < len) {
1675  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1676  rb_str_drop_bytes(str2, beg);
1677  }
1678  else {
1679  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1680  RB_GC_GUARD(str);
1681  }
1682 
1683  rb_enc_cr_str_copy_for_substr(str2, str);
1684  OBJ_INFECT(str2, str);
1685 
1686  return str2;
1687 }
1688 
1689 static char *
1690 rb_str_subpos(VALUE str, long beg, long *lenp)
1691 {
1692  long len = *lenp;
1693  long slen = -1L;
1694  long blen = RSTRING_LEN(str);
1695  rb_encoding *enc = STR_ENC_GET(str);
1696  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1697 
1698  if (len < 0) return 0;
1699  if (!blen) {
1700  len = 0;
1701  }
1702  if (single_byte_optimizable(str)) {
1703  if (beg > blen) return 0;
1704  if (beg < 0) {
1705  beg += blen;
1706  if (beg < 0) return 0;
1707  }
1708  if (beg + len > blen)
1709  len = blen - beg;
1710  if (len < 0) return 0;
1711  p = s + beg;
1712  goto end;
1713  }
1714  if (beg < 0) {
1715  if (len > -beg) len = -beg;
1716  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1717  beg = -beg;
1718  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1719  p = e;
1720  if (!p) return 0;
1721  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1722  if (!p) return 0;
1723  len = e - p;
1724  goto end;
1725  }
1726  else {
1727  slen = str_strlen(str, enc);
1728  beg += slen;
1729  if (beg < 0) return 0;
1730  p = s + beg;
1731  if (len == 0) goto end;
1732  }
1733  }
1734  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1735  return 0;
1736  }
1737  if (len == 0) {
1738  if (beg > str_strlen(str, enc)) return 0;
1739  p = s + beg;
1740  }
1741 #ifdef NONASCII_MASK
1742  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1743  enc == rb_utf8_encoding()) {
1744  p = str_utf8_nth(s, e, &beg);
1745  if (beg > 0) return 0;
1746  len = str_utf8_offset(p, e, len);
1747  }
1748 #endif
1749  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1750  int char_sz = rb_enc_mbmaxlen(enc);
1751 
1752  p = s + beg * char_sz;
1753  if (p > e) {
1754  return 0;
1755  }
1756  else if (len * char_sz > e - p)
1757  len = e - p;
1758  else
1759  len *= char_sz;
1760  }
1761  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1762  if (beg > 0) return 0;
1763  len = 0;
1764  }
1765  else {
1766  len = str_offset(p, e, len, enc, 0);
1767  }
1768  end:
1769  *lenp = len;
1770  RB_GC_GUARD(str);
1771  return p;
1772 }
1773 
1774 VALUE
1775 rb_str_substr(VALUE str, long beg, long len)
1776 {
1777  VALUE str2;
1778  char *p = rb_str_subpos(str, beg, &len);
1779 
1780  if (!p) return Qnil;
1781  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1782  str2 = rb_str_new4(str);
1783  str2 = str_new3(rb_obj_class(str2), str2);
1784  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1785  RSTRING(str2)->as.heap.len = len;
1786  }
1787  else {
1788  str2 = rb_str_new5(str, p, len);
1789  OBJ_INFECT(str2, str);
1790  RB_GC_GUARD(str);
1791  }
1792  rb_enc_cr_str_copy_for_substr(str2, str);
1793 
1794  return str2;
1795 }
1796 
1797 VALUE
1799 {
1800  if (STR_ASSOC_P(str)) {
1801  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1802  OBJ_FREEZE(ary);
1803  }
1804  return rb_obj_freeze(str);
1805 }
1806 
1808 #define rb_str_dup_frozen rb_str_new_frozen
1809 
1810 VALUE
1811 rb_str_locktmp(VALUE str)
1812 {
1813  if (FL_TEST(str, STR_TMPLOCK)) {
1814  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1815  }
1816  FL_SET(str, STR_TMPLOCK);
1817  return str;
1818 }
1819 
1820 VALUE
1822 {
1823  if (!FL_TEST(str, STR_TMPLOCK)) {
1824  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1825  }
1826  FL_UNSET(str, STR_TMPLOCK);
1827  return str;
1828 }
1829 
1830 VALUE
1832 {
1833  rb_str_locktmp(str);
1834  return rb_ensure(func, arg, rb_str_unlocktmp, str);
1835 }
1836 
1837 void
1838 rb_str_set_len(VALUE str, long len)
1839 {
1840  long capa;
1841 
1842  str_modifiable(str);
1843  if (STR_SHARED_P(str)) {
1844  rb_raise(rb_eRuntimeError, "can't set length of shared string");
1845  }
1846  if (len > (capa = (long)rb_str_capacity(str))) {
1847  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1848  }
1849  STR_SET_LEN(str, len);
1850  RSTRING_PTR(str)[len] = '\0';
1851 }
1852 
1853 VALUE
1854 rb_str_resize(VALUE str, long len)
1855 {
1856  long slen;
1857  int independent;
1858 
1859  if (len < 0) {
1860  rb_raise(rb_eArgError, "negative string size (or size too big)");
1861  }
1862 
1863  independent = str_independent(str);
1864  ENC_CODERANGE_CLEAR(str);
1865  slen = RSTRING_LEN(str);
1866  {
1867  long capa;
1868  if (STR_EMBED_P(str)) {
1869  if (len == slen) return str;
1870  if (len + 1 <= RSTRING_EMBED_LEN_MAX + 1) {
1871  STR_SET_EMBED_LEN(str, len);
1872  RSTRING(str)->as.ary[len] = '\0';
1873  return str;
1874  }
1875  str_make_independent_expand(str, len - slen);
1876  STR_SET_NOEMBED(str);
1877  }
1878  else if (len <= RSTRING_EMBED_LEN_MAX) {
1879  char *ptr = RSTRING(str)->as.heap.ptr;
1880  STR_SET_EMBED(str);
1881  if (slen > len) slen = len;
1882  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1883  RSTRING(str)->as.ary[len] = '\0';
1884  STR_SET_EMBED_LEN(str, len);
1885  if (independent) xfree(ptr);
1886  return str;
1887  }
1888  else if (!independent) {
1889  if (len == slen) return str;
1890  str_make_independent_expand(str, len - slen);
1891  }
1892  else if ((capa = RSTRING(str)->as.heap.aux.capa) < len ||
1893  (capa - len) > (len < 1024 ? len : 1024)) {
1894  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1895  RSTRING(str)->as.heap.aux.capa = len;
1896  }
1897  else if (len == slen) return str;
1898  RSTRING(str)->as.heap.len = len;
1899  RSTRING(str)->as.heap.ptr[len] = '\0'; /* sentinel */
1900  }
1901  return str;
1902 }
1903 
1904 static VALUE
1905 str_buf_cat(VALUE str, const char *ptr, long len)
1906 {
1907  long capa, total, off = -1;
1908 
1909  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
1910  off = ptr - RSTRING_PTR(str);
1911  }
1912  rb_str_modify(str);
1913  if (len == 0) return 0;
1914  if (STR_ASSOC_P(str)) {
1915  FL_UNSET(str, STR_ASSOC);
1916  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1917  }
1918  else if (STR_EMBED_P(str)) {
1919  capa = RSTRING_EMBED_LEN_MAX;
1920  }
1921  else {
1922  capa = RSTRING(str)->as.heap.aux.capa;
1923  }
1924  if (RSTRING_LEN(str) >= LONG_MAX - len) {
1925  rb_raise(rb_eArgError, "string sizes too big");
1926  }
1927  total = RSTRING_LEN(str)+len;
1928  if (capa <= total) {
1929  while (total > capa) {
1930  if (capa + 1 >= LONG_MAX / 2) {
1931  capa = (total + 4095) / 4096 * 4096;
1932  break;
1933  }
1934  capa = (capa + 1) * 2;
1935  }
1936  RESIZE_CAPA(str, capa);
1937  }
1938  if (off != -1) {
1939  ptr = RSTRING_PTR(str) + off;
1940  }
1941  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
1942  STR_SET_LEN(str, total);
1943  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
1944 
1945  return str;
1946 }
1947 
1948 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
1949 
1950 VALUE
1951 rb_str_buf_cat(VALUE str, const char *ptr, long len)
1952 {
1953  if (len == 0) return str;
1954  if (len < 0) {
1955  rb_raise(rb_eArgError, "negative string size (or size too big)");
1956  }
1957  return str_buf_cat(str, ptr, len);
1958 }
1959 
1960 VALUE
1961 rb_str_buf_cat2(VALUE str, const char *ptr)
1962 {
1963  return rb_str_buf_cat(str, ptr, strlen(ptr));
1964 }
1965 
1966 VALUE
1967 rb_str_cat(VALUE str, const char *ptr, long len)
1968 {
1969  if (len < 0) {
1970  rb_raise(rb_eArgError, "negative string size (or size too big)");
1971  }
1972  if (STR_ASSOC_P(str)) {
1973  char *p;
1974  rb_str_modify_expand(str, len);
1975  p = RSTRING(str)->as.heap.ptr;
1976  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1977  len = RSTRING(str)->as.heap.len += len;
1978  p[len] = '\0'; /* sentinel */
1979  return str;
1980  }
1981 
1982  return rb_str_buf_cat(str, ptr, len);
1983 }
1984 
1985 VALUE
1986 rb_str_cat2(VALUE str, const char *ptr)
1987 {
1988  return rb_str_cat(str, ptr, strlen(ptr));
1989 }
1990 
1991 static VALUE
1992 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
1993  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
1994 {
1995  int str_encindex = ENCODING_GET(str);
1996  int res_encindex;
1997  int str_cr, res_cr;
1998 
1999  str_cr = ENC_CODERANGE(str);
2000 
2001  if (str_encindex == ptr_encindex) {
2002  if (str_cr == ENC_CODERANGE_UNKNOWN)
2003  ptr_cr = ENC_CODERANGE_UNKNOWN;
2004  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2005  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
2006  }
2007  }
2008  else {
2009  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
2010  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2011  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2012  if (len == 0)
2013  return str;
2014  if (RSTRING_LEN(str) == 0) {
2015  rb_str_buf_cat(str, ptr, len);
2016  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2017  return str;
2018  }
2019  goto incompatible;
2020  }
2021  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2022  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2023  }
2024  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2025  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2026  str_cr = rb_enc_str_coderange(str);
2027  }
2028  }
2029  }
2030  if (ptr_cr_ret)
2031  *ptr_cr_ret = ptr_cr;
2032 
2033  if (str_encindex != ptr_encindex &&
2034  str_cr != ENC_CODERANGE_7BIT &&
2035  ptr_cr != ENC_CODERANGE_7BIT) {
2036  incompatible:
2037  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2038  rb_enc_name(rb_enc_from_index(str_encindex)),
2039  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2040  }
2041 
2042  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2043  res_encindex = str_encindex;
2044  res_cr = ENC_CODERANGE_UNKNOWN;
2045  }
2046  else if (str_cr == ENC_CODERANGE_7BIT) {
2047  if (ptr_cr == ENC_CODERANGE_7BIT) {
2048  res_encindex = str_encindex;
2049  res_cr = ENC_CODERANGE_7BIT;
2050  }
2051  else {
2052  res_encindex = ptr_encindex;
2053  res_cr = ptr_cr;
2054  }
2055  }
2056  else if (str_cr == ENC_CODERANGE_VALID) {
2057  res_encindex = str_encindex;
2058  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2059  res_cr = str_cr;
2060  else
2061  res_cr = ptr_cr;
2062  }
2063  else { /* str_cr == ENC_CODERANGE_BROKEN */
2064  res_encindex = str_encindex;
2065  res_cr = str_cr;
2066  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2067  }
2068 
2069  if (len < 0) {
2070  rb_raise(rb_eArgError, "negative string size (or size too big)");
2071  }
2072  str_buf_cat(str, ptr, len);
2073  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2074  return str;
2075 }
2076 
2077 VALUE
2078 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2079 {
2080  return rb_enc_cr_str_buf_cat(str, ptr, len,
2082 }
2083 
2084 VALUE
2085 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2086 {
2087  /* ptr must reference NUL terminated ASCII string. */
2088  int encindex = ENCODING_GET(str);
2089  rb_encoding *enc = rb_enc_from_index(encindex);
2090  if (rb_enc_asciicompat(enc)) {
2091  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2092  encindex, ENC_CODERANGE_7BIT, 0);
2093  }
2094  else {
2095  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2096  while (*ptr) {
2097  unsigned int c = (unsigned char)*ptr;
2098  int len = rb_enc_codelen(c, enc);
2099  rb_enc_mbcput(c, buf, enc);
2100  rb_enc_cr_str_buf_cat(str, buf, len,
2101  encindex, ENC_CODERANGE_VALID, 0);
2102  ptr++;
2103  }
2104  return str;
2105  }
2106 }
2107 
2108 VALUE
2110 {
2111  int str2_cr;
2112 
2113  str2_cr = ENC_CODERANGE(str2);
2114 
2115  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2116  ENCODING_GET(str2), str2_cr, &str2_cr);
2117 
2118  OBJ_INFECT(str, str2);
2119  ENC_CODERANGE_SET(str2, str2_cr);
2120 
2121  return str;
2122 }
2123 
2124 VALUE
2126 {
2127  rb_encoding *enc;
2128  int cr, cr2;
2129  long len2;
2130 
2131  StringValue(str2);
2132  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2133  long len = RSTRING_LEN(str) + len2;
2134  enc = rb_enc_check(str, str2);
2135  cr = ENC_CODERANGE(str);
2136  if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2;
2137  rb_str_modify_expand(str, len2);
2138  memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len,
2139  RSTRING_PTR(str2), len2+1);
2140  RSTRING(str)->as.heap.len = len;
2141  rb_enc_associate(str, enc);
2142  ENC_CODERANGE_SET(str, cr);
2143  OBJ_INFECT(str, str2);
2144  return str;
2145  }
2146  return rb_str_buf_append(str, str2);
2147 }
2148 
2149 /*
2150  * call-seq:
2151  * str << integer -> str
2152  * str.concat(integer) -> str
2153  * str << obj -> str
2154  * str.concat(obj) -> str
2155  *
2156  * Append---Concatenates the given object to <i>str</i>. If the object is a
2157  * <code>Integer</code>, it is considered as a codepoint, and is converted
2158  * to a character before concatenation.
2159  *
2160  * a = "hello "
2161  * a << "world" #=> "hello world"
2162  * a.concat(33) #=> "hello world!"
2163  */
2164 
2165 VALUE
2167 {
2168  unsigned int code;
2169  rb_encoding *enc = STR_ENC_GET(str1);
2170 
2171  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2172  if (rb_num_to_uint(str2, &code) == 0) {
2173  }
2174  else if (FIXNUM_P(str2)) {
2175  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2176  }
2177  else {
2178  rb_raise(rb_eRangeError, "bignum out of char range");
2179  }
2180  }
2181  else {
2182  return rb_str_append(str1, str2);
2183  }
2184 
2185  if (enc == rb_usascii_encoding()) {
2186  /* US-ASCII automatically extended to ASCII-8BIT */
2187  char buf[1];
2188  buf[0] = (char)code;
2189  if (code > 0xFF) {
2190  rb_raise(rb_eRangeError, "%u out of char range", code);
2191  }
2192  rb_str_cat(str1, buf, 1);
2193  if (code > 127) {
2196  }
2197  }
2198  else {
2199  long pos = RSTRING_LEN(str1);
2200  int cr = ENC_CODERANGE(str1);
2201  int len;
2202  char *buf;
2203 
2204  switch (len = rb_enc_codelen(code, enc)) {
2206  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2207  break;
2209  case 0:
2210  rb_raise(rb_eRangeError, "%u out of char range", code);
2211  break;
2212  }
2213  buf = ALLOCA_N(char, len + 1);
2214  rb_enc_mbcput(code, buf, enc);
2215  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2216  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2217  }
2218  rb_str_resize(str1, pos+len);
2219  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2220  if (cr == ENC_CODERANGE_7BIT && code > 127)
2221  cr = ENC_CODERANGE_VALID;
2222  ENC_CODERANGE_SET(str1, cr);
2223  }
2224  return str1;
2225 }
2226 
2227 /*
2228  * call-seq:
2229  * str.prepend(other_str) -> str
2230  *
2231  * Prepend---Prepend the given string to <i>str</i>.
2232  *
2233  * a = "world"
2234  * a.prepend("hello ") #=> "hello world"
2235  * a #=> "hello world"
2236  */
2237 
2238 static VALUE
2240 {
2241  StringValue(str2);
2242  StringValue(str);
2243  rb_str_update(str, 0L, 0L, str2);
2244  return str;
2245 }
2246 
2247 st_index_t
2249 {
2250  int e = ENCODING_GET(str);
2251  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2252  e = 0;
2253  }
2254  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2255 }
2256 
2257 int
2259 {
2260  long len;
2261 
2262  if (!rb_str_comparable(str1, str2)) return 1;
2263  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2264  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2265  return 0;
2266  }
2267  return 1;
2268 }
2269 
2270 /*
2271  * call-seq:
2272  * str.hash -> fixnum
2273  *
2274  * Return a hash based on the string's length and content.
2275  */
2276 
2277 static VALUE
2279 {
2280  st_index_t hval = rb_str_hash(str);
2281  return INT2FIX(hval);
2282 }
2283 
2284 #define lesser(a,b) (((a)>(b))?(b):(a))
2285 
2286 int
2288 {
2289  int idx1, idx2;
2290  int rc1, rc2;
2291 
2292  if (RSTRING_LEN(str1) == 0) return TRUE;
2293  if (RSTRING_LEN(str2) == 0) return TRUE;
2294  idx1 = ENCODING_GET(str1);
2295  idx2 = ENCODING_GET(str2);
2296  if (idx1 == idx2) return TRUE;
2297  rc1 = rb_enc_str_coderange(str1);
2298  rc2 = rb_enc_str_coderange(str2);
2299  if (rc1 == ENC_CODERANGE_7BIT) {
2300  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2302  return TRUE;
2303  }
2304  if (rc2 == ENC_CODERANGE_7BIT) {
2306  return TRUE;
2307  }
2308  return FALSE;
2309 }
2310 
2311 int
2313 {
2314  long len1, len2;
2315  const char *ptr1, *ptr2;
2316  int retval;
2317 
2318  if (str1 == str2) return 0;
2319  RSTRING_GETMEM(str1, ptr1, len1);
2320  RSTRING_GETMEM(str2, ptr2, len2);
2321  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2322  if (len1 == len2) {
2323  if (!rb_str_comparable(str1, str2)) {
2324  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2325  return 1;
2326  return -1;
2327  }
2328  return 0;
2329  }
2330  if (len1 > len2) return 1;
2331  return -1;
2332  }
2333  if (retval > 0) return 1;
2334  return -1;
2335 }
2336 
2337 /* expect tail call optimization */
2338 static VALUE
2339 str_eql(const VALUE str1, const VALUE str2)
2340 {
2341  const long len = RSTRING_LEN(str1);
2342  const char *ptr1, *ptr2;
2343 
2344  if (len != RSTRING_LEN(str2)) return Qfalse;
2345  if (!rb_str_comparable(str1, str2)) return Qfalse;
2346  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2347  return Qtrue;
2348  if (memcmp(ptr1, ptr2, len) == 0)
2349  return Qtrue;
2350  return Qfalse;
2351 }
2352 
2353 /*
2354  * call-seq:
2355  * str == obj -> true or false
2356  *
2357  * Equality---If <i>obj</i> is not a <code>String</code>, returns
2358  * <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
2359  * <code><=></code> <i>obj</i> returns zero.
2360  */
2361 
2362 VALUE
2364 {
2365  if (str1 == str2) return Qtrue;
2366  if (!RB_TYPE_P(str2, T_STRING)) {
2367  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2368  return Qfalse;
2369  }
2370  return rb_equal(str2, str1);
2371  }
2372  return str_eql(str1, str2);
2373 }
2374 
2375 /*
2376  * call-seq:
2377  * str.eql?(other) -> true or false
2378  *
2379  * Two strings are equal if they have the same length and content.
2380  */
2381 
2382 static VALUE
2384 {
2385  if (str1 == str2) return Qtrue;
2386  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2387  return str_eql(str1, str2);
2388 }
2389 
2390 /*
2391  * call-seq:
2392  * string <=> other_string -> -1, 0, +1 or nil
2393  *
2394  *
2395  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2396  * than, equal to, or greater than +other_string+.
2397  *
2398  * +nil+ is returned if the two values are incomparable.
2399  *
2400  * If the strings are of different lengths, and the strings are equal when
2401  * compared up to the shortest length, then the longer string is considered
2402  * greater than the shorter one.
2403  *
2404  * <code><=></code> is the basis for the methods <code><</code>,
2405  * <code><=</code>, <code>></code>, <code>>=</code>, and
2406  * <code>between?</code>, included from module Comparable. The method
2407  * String#== does not use Comparable#==.
2408  *
2409  * "abcdef" <=> "abcde" #=> 1
2410  * "abcdef" <=> "abcdef" #=> 0
2411  * "abcdef" <=> "abcdefg" #=> -1
2412  * "abcdef" <=> "ABCDEF" #=> 1
2413  */
2414 
2415 static VALUE
2417 {
2418  int result;
2419 
2420  if (!RB_TYPE_P(str2, T_STRING)) {
2421  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2422  if (RB_TYPE_P(tmp, T_STRING)) {
2423  result = rb_str_cmp(str1, tmp);
2424  }
2425  else {
2426  return rb_invcmp(str1, str2);
2427  }
2428  }
2429  else {
2430  result = rb_str_cmp(str1, str2);
2431  }
2432  return INT2FIX(result);
2433 }
2434 
2435 /*
2436  * call-seq:
2437  * str.casecmp(other_str) -> -1, 0, +1 or nil
2438  *
2439  * Case-insensitive version of <code>String#<=></code>.
2440  *
2441  * "abcdef".casecmp("abcde") #=> 1
2442  * "aBcDeF".casecmp("abcdef") #=> 0
2443  * "abcdef".casecmp("abcdefg") #=> -1
2444  * "abcdef".casecmp("ABCDEF") #=> 0
2445  */
2446 
2447 static VALUE
2449 {
2450  long len;
2451  rb_encoding *enc;
2452  char *p1, *p1end, *p2, *p2end;
2453 
2454  StringValue(str2);
2455  enc = rb_enc_compatible(str1, str2);
2456  if (!enc) {
2457  return Qnil;
2458  }
2459 
2460  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2461  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2462  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2463  while (p1 < p1end && p2 < p2end) {
2464  if (*p1 != *p2) {
2465  unsigned int c1 = TOUPPER(*p1 & 0xff);
2466  unsigned int c2 = TOUPPER(*p2 & 0xff);
2467  if (c1 != c2)
2468  return INT2FIX(c1 < c2 ? -1 : 1);
2469  }
2470  p1++;
2471  p2++;
2472  }
2473  }
2474  else {
2475  while (p1 < p1end && p2 < p2end) {
2476  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2477  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2478 
2479  if (0 <= c1 && 0 <= c2) {
2480  c1 = TOUPPER(c1);
2481  c2 = TOUPPER(c2);
2482  if (c1 != c2)
2483  return INT2FIX(c1 < c2 ? -1 : 1);
2484  }
2485  else {
2486  int r;
2487  l1 = rb_enc_mbclen(p1, p1end, enc);
2488  l2 = rb_enc_mbclen(p2, p2end, enc);
2489  len = l1 < l2 ? l1 : l2;
2490  r = memcmp(p1, p2, len);
2491  if (r != 0)
2492  return INT2FIX(r < 0 ? -1 : 1);
2493  if (l1 != l2)
2494  return INT2FIX(l1 < l2 ? -1 : 1);
2495  }
2496  p1 += l1;
2497  p2 += l2;
2498  }
2499  }
2500  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2501  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2502  return INT2FIX(-1);
2503 }
2504 
2505 static long
2506 rb_str_index(VALUE str, VALUE sub, long offset)
2507 {
2508  long pos;
2509  char *s, *sptr, *e;
2510  long len, slen;
2511  rb_encoding *enc;
2512 
2513  enc = rb_enc_check(str, sub);
2514  if (is_broken_string(sub)) {
2515  return -1;
2516  }
2517  len = str_strlen(str, enc);
2518  slen = str_strlen(sub, enc);
2519  if (offset < 0) {
2520  offset += len;
2521  if (offset < 0) return -1;
2522  }
2523  if (len - offset < slen) return -1;
2524  s = RSTRING_PTR(str);
2525  e = s + RSTRING_LEN(str);
2526  if (offset) {
2527  offset = str_offset(s, RSTRING_END(str), offset, enc, single_byte_optimizable(str));
2528  s += offset;
2529  }
2530  if (slen == 0) return offset;
2531  /* need proceed one character at a time */
2532  sptr = RSTRING_PTR(sub);
2533  slen = RSTRING_LEN(sub);
2534  len = RSTRING_LEN(str) - offset;
2535  for (;;) {
2536  char *t;
2537  pos = rb_memsearch(sptr, slen, s, len, enc);
2538  if (pos < 0) return pos;
2539  t = rb_enc_right_char_head(s, s+pos, e, enc);
2540  if (t == s + pos) break;
2541  if ((len -= t - s) <= 0) return -1;
2542  offset += t - s;
2543  s = t;
2544  }
2545  return pos + offset;
2546 }
2547 
2548 
2549 /*
2550  * call-seq:
2551  * str.index(substring [, offset]) -> fixnum or nil
2552  * str.index(regexp [, offset]) -> fixnum or nil
2553  *
2554  * Returns the index of the first occurrence of the given <i>substring</i> or
2555  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2556  * found. If the second parameter is present, it specifies the position in the
2557  * string to begin the search.
2558  *
2559  * "hello".index('e') #=> 1
2560  * "hello".index('lo') #=> 3
2561  * "hello".index('a') #=> nil
2562  * "hello".index(?e) #=> 1
2563  * "hello".index(/[aeiou]/, -3) #=> 4
2564  */
2565 
2566 static VALUE
2568 {
2569  VALUE sub;
2570  VALUE initpos;
2571  long pos;
2572 
2573  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2574  pos = NUM2LONG(initpos);
2575  }
2576  else {
2577  pos = 0;
2578  }
2579  if (pos < 0) {
2580  pos += str_strlen(str, STR_ENC_GET(str));
2581  if (pos < 0) {
2582  if (RB_TYPE_P(sub, T_REGEXP)) {
2584  }
2585  return Qnil;
2586  }
2587  }
2588 
2589  if (SPECIAL_CONST_P(sub)) goto generic;
2590  switch (BUILTIN_TYPE(sub)) {
2591  case T_REGEXP:
2592  if (pos > str_strlen(str, STR_ENC_GET(str)))
2593  return Qnil;
2594  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2595  rb_enc_check(str, sub), single_byte_optimizable(str));
2596 
2597  pos = rb_reg_search(sub, str, pos, 0);
2598  pos = rb_str_sublen(str, pos);
2599  break;
2600 
2601  generic:
2602  default: {
2603  VALUE tmp;
2604 
2605  tmp = rb_check_string_type(sub);
2606  if (NIL_P(tmp)) {
2607  rb_raise(rb_eTypeError, "type mismatch: %s given",
2608  rb_obj_classname(sub));
2609  }
2610  sub = tmp;
2611  }
2612  /* fall through */
2613  case T_STRING:
2614  pos = rb_str_index(str, sub, pos);
2615  pos = rb_str_sublen(str, pos);
2616  break;
2617  }
2618 
2619  if (pos == -1) return Qnil;
2620  return LONG2NUM(pos);
2621 }
2622 
2623 static long
2624 rb_str_rindex(VALUE str, VALUE sub, long pos)
2625 {
2626  long len, slen;
2627  char *s, *sbeg, *e, *t;
2628  rb_encoding *enc;
2629  int singlebyte = single_byte_optimizable(str);
2630 
2631  enc = rb_enc_check(str, sub);
2632  if (is_broken_string(sub)) {
2633  return -1;
2634  }
2635  len = str_strlen(str, enc);
2636  slen = str_strlen(sub, enc);
2637  /* substring longer than string */
2638  if (len < slen) return -1;
2639  if (len - pos < slen) {
2640  pos = len - slen;
2641  }
2642  if (len == 0) {
2643  return pos;
2644  }
2645  sbeg = RSTRING_PTR(str);
2646  e = RSTRING_END(str);
2647  t = RSTRING_PTR(sub);
2648  slen = RSTRING_LEN(sub);
2649  s = str_nth(sbeg, e, pos, enc, singlebyte);
2650  while (s) {
2651  if (memcmp(s, t, slen) == 0) {
2652  return pos;
2653  }
2654  if (pos == 0) break;
2655  pos--;
2656  s = rb_enc_prev_char(sbeg, s, e, enc);
2657  }
2658  return -1;
2659 }
2660 
2661 
2662 /*
2663  * call-seq:
2664  * str.rindex(substring [, fixnum]) -> fixnum or nil
2665  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2666  *
2667  * Returns the index of the last occurrence of the given <i>substring</i> or
2668  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2669  * found. If the second parameter is present, it specifies the position in the
2670  * string to end the search---characters beyond this point will not be
2671  * considered.
2672  *
2673  * "hello".rindex('e') #=> 1
2674  * "hello".rindex('l') #=> 3
2675  * "hello".rindex('a') #=> nil
2676  * "hello".rindex(?e) #=> 1
2677  * "hello".rindex(/[aeiou]/, -2) #=> 1
2678  */
2679 
2680 static VALUE
2682 {
2683  VALUE sub;
2684  VALUE vpos;
2685  rb_encoding *enc = STR_ENC_GET(str);
2686  long pos, len = str_strlen(str, enc);
2687 
2688  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2689  pos = NUM2LONG(vpos);
2690  if (pos < 0) {
2691  pos += len;
2692  if (pos < 0) {
2693  if (RB_TYPE_P(sub, T_REGEXP)) {
2695  }
2696  return Qnil;
2697  }
2698  }
2699  if (pos > len) pos = len;
2700  }
2701  else {
2702  pos = len;
2703  }
2704 
2705  if (SPECIAL_CONST_P(sub)) goto generic;
2706  switch (BUILTIN_TYPE(sub)) {
2707  case T_REGEXP:
2708  /* enc = rb_get_check(str, sub); */
2709  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2711 
2712  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2713  pos = rb_reg_search(sub, str, pos, 1);
2714  pos = rb_str_sublen(str, pos);
2715  }
2716  if (pos >= 0) return LONG2NUM(pos);
2717  break;
2718 
2719  generic:
2720  default: {
2721  VALUE tmp;
2722 
2723  tmp = rb_check_string_type(sub);
2724  if (NIL_P(tmp)) {
2725  rb_raise(rb_eTypeError, "type mismatch: %s given",
2726  rb_obj_classname(sub));
2727  }
2728  sub = tmp;
2729  }
2730  /* fall through */
2731  case T_STRING:
2732  pos = rb_str_rindex(str, sub, pos);
2733  if (pos >= 0) return LONG2NUM(pos);
2734  break;
2735  }
2736  return Qnil;
2737 }
2738 
2739 /*
2740  * call-seq:
2741  * str =~ obj -> fixnum or nil
2742  *
2743  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2744  * against <i>str</i>,and returns the position the match starts, or
2745  * <code>nil</code> if there is no match. Otherwise, invokes
2746  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2747  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2748  *
2749  * Note: <code>str =~ regexp</code> is not the same as
2750  * <code>regexp =~ str</code>. Strings captured from named capture groups
2751  * are assigned to local variables only in the second case.
2752  *
2753  * "cat o' 9 tails" =~ /\d/ #=> 7
2754  * "cat o' 9 tails" =~ 9 #=> nil
2755  */
2756 
2757 static VALUE
2759 {
2760  if (SPECIAL_CONST_P(y)) goto generic;
2761  switch (BUILTIN_TYPE(y)) {
2762  case T_STRING:
2763  rb_raise(rb_eTypeError, "type mismatch: String given");
2764 
2765  case T_REGEXP:
2766  return rb_reg_match(y, x);
2767 
2768  generic:
2769  default:
2770  return rb_funcall(y, rb_intern("=~"), 1, x);
2771  }
2772 }
2773 
2774 
2775 static VALUE get_pat(VALUE, int);
2776 
2777 
2778 /*
2779  * call-seq:
2780  * str.match(pattern) -> matchdata or nil
2781  * str.match(pattern, pos) -> matchdata or nil
2782  *
2783  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
2784  * then invokes its <code>match</code> method on <i>str</i>. If the second
2785  * parameter is present, it specifies the position in the string to begin the
2786  * search.
2787  *
2788  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
2789  * 'hello'.match('(.)\1')[0] #=> "ll"
2790  * 'hello'.match(/(.)\1/)[0] #=> "ll"
2791  * 'hello'.match('xx') #=> nil
2792  *
2793  * If a block is given, invoke the block with MatchData if match succeed, so
2794  * that you can write
2795  *
2796  * str.match(pat) {|m| ...}
2797  *
2798  * instead of
2799  *
2800  * if m = str.match(pat)
2801  * ...
2802  * end
2803  *
2804  * The return value is a value from block execution in this case.
2805  */
2806 
2807 static VALUE
2809 {
2810  VALUE re, result;
2811  if (argc < 1)
2812  rb_check_arity(argc, 1, 2);
2813  re = argv[0];
2814  argv[0] = str;
2815  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
2816  if (!NIL_P(result) && rb_block_given_p()) {
2817  return rb_yield(result);
2818  }
2819  return result;
2820 }
2821 
2826 };
2827 
2828 static enum neighbor_char
2829 enc_succ_char(char *p, long len, rb_encoding *enc)
2830 {
2831  long i;
2832  int l;
2833  while (1) {
2834  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
2835  p[i] = '\0';
2836  if (i < 0)
2837  return NEIGHBOR_WRAPPED;
2838  ++((unsigned char*)p)[i];
2839  l = rb_enc_precise_mbclen(p, p+len, enc);
2840  if (MBCLEN_CHARFOUND_P(l)) {
2841  l = MBCLEN_CHARFOUND_LEN(l);
2842  if (l == len) {
2843  return NEIGHBOR_FOUND;
2844  }
2845  else {
2846  memset(p+l, 0xff, len-l);
2847  }
2848  }
2849  if (MBCLEN_INVALID_P(l) && i < len-1) {
2850  long len2;
2851  int l2;
2852  for (len2 = len-1; 0 < len2; len2--) {
2853  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2854  if (!MBCLEN_INVALID_P(l2))
2855  break;
2856  }
2857  memset(p+len2+1, 0xff, len-(len2+1));
2858  }
2859  }
2860 }
2861 
2862 static enum neighbor_char
2863 enc_pred_char(char *p, long len, rb_encoding *enc)
2864 {
2865  long i;
2866  int l;
2867  while (1) {
2868  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
2869  p[i] = '\xff';
2870  if (i < 0)
2871  return NEIGHBOR_WRAPPED;
2872  --((unsigned char*)p)[i];
2873  l = rb_enc_precise_mbclen(p, p+len, enc);
2874  if (MBCLEN_CHARFOUND_P(l)) {
2875  l = MBCLEN_CHARFOUND_LEN(l);
2876  if (l == len) {
2877  return NEIGHBOR_FOUND;
2878  }
2879  else {
2880  memset(p+l, 0, len-l);
2881  }
2882  }
2883  if (MBCLEN_INVALID_P(l) && i < len-1) {
2884  long len2;
2885  int l2;
2886  for (len2 = len-1; 0 < len2; len2--) {
2887  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2888  if (!MBCLEN_INVALID_P(l2))
2889  break;
2890  }
2891  memset(p+len2+1, 0, len-(len2+1));
2892  }
2893  }
2894 }
2895 
2896 /*
2897  overwrite +p+ by succeeding letter in +enc+ and returns
2898  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
2899  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
2900  assuming each ranges are successive, and mbclen
2901  never change in each ranges.
2902  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
2903  character.
2904  */
2905 static enum neighbor_char
2906 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
2907 {
2908  enum neighbor_char ret;
2909  unsigned int c;
2910  int ctype;
2911  int range;
2912  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
2913 
2914  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2915  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
2916  ctype = ONIGENC_CTYPE_DIGIT;
2917  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
2918  ctype = ONIGENC_CTYPE_ALPHA;
2919  else
2920  return NEIGHBOR_NOT_CHAR;
2921 
2922  MEMCPY(save, p, char, len);
2923  ret = enc_succ_char(p, len, enc);
2924  if (ret == NEIGHBOR_FOUND) {
2925  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2926  if (rb_enc_isctype(c, ctype, enc))
2927  return NEIGHBOR_FOUND;
2928  }
2929  MEMCPY(p, save, char, len);
2930  range = 1;
2931  while (1) {
2932  MEMCPY(save, p, char, len);
2933  ret = enc_pred_char(p, len, enc);
2934  if (ret == NEIGHBOR_FOUND) {
2935  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2936  if (!rb_enc_isctype(c, ctype, enc)) {
2937  MEMCPY(p, save, char, len);
2938  break;
2939  }
2940  }
2941  else {
2942  MEMCPY(p, save, char, len);
2943  break;
2944  }
2945  range++;
2946  }
2947  if (range == 1) {
2948  return NEIGHBOR_NOT_CHAR;
2949  }
2950 
2951  if (ctype != ONIGENC_CTYPE_DIGIT) {
2952  MEMCPY(carry, p, char, len);
2953  return NEIGHBOR_WRAPPED;
2954  }
2955 
2956  MEMCPY(carry, p, char, len);
2957  enc_succ_char(carry, len, enc);
2958  return NEIGHBOR_WRAPPED;
2959 }
2960 
2961 
2962 /*
2963  * call-seq:
2964  * str.succ -> new_str
2965  * str.next -> new_str
2966  *
2967  * Returns the successor to <i>str</i>. The successor is calculated by
2968  * incrementing characters starting from the rightmost alphanumeric (or
2969  * the rightmost character if there are no alphanumerics) in the
2970  * string. Incrementing a digit always results in another digit, and
2971  * incrementing a letter results in another letter of the same case.
2972  * Incrementing nonalphanumerics uses the underlying character set's
2973  * collating sequence.
2974  *
2975  * If the increment generates a ``carry,'' the character to the left of
2976  * it is incremented. This process repeats until there is no carry,
2977  * adding an additional character if necessary.
2978  *
2979  * "abcd".succ #=> "abce"
2980  * "THX1138".succ #=> "THX1139"
2981  * "<<koala>>".succ #=> "<<koalb>>"
2982  * "1999zzz".succ #=> "2000aaa"
2983  * "ZZZ9999".succ #=> "AAAA0000"
2984  * "***".succ #=> "**+"
2985  */
2986 
2987 VALUE
2989 {
2990  rb_encoding *enc;
2991  VALUE str;
2992  char *sbeg, *s, *e, *last_alnum = 0;
2993  int c = -1;
2994  long l;
2995  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
2996  long carry_pos = 0, carry_len = 1;
2997  enum neighbor_char neighbor = NEIGHBOR_FOUND;
2998 
2999  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
3000  rb_enc_cr_str_copy_for_substr(str, orig);
3001  OBJ_INFECT(str, orig);
3002  if (RSTRING_LEN(str) == 0) return str;
3003 
3004  enc = STR_ENC_GET(orig);
3005  sbeg = RSTRING_PTR(str);
3006  s = e = sbeg + RSTRING_LEN(str);
3007 
3008  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3009  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
3010  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
3011  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3012  s = last_alnum;
3013  break;
3014  }
3015  }
3016  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3017  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3018  switch (neighbor) {
3019  case NEIGHBOR_NOT_CHAR:
3020  continue;
3021  case NEIGHBOR_FOUND:
3022  return str;
3023  case NEIGHBOR_WRAPPED:
3024  last_alnum = s;
3025  break;
3026  }
3027  c = 1;
3028  carry_pos = s - sbeg;
3029  carry_len = l;
3030  }
3031  if (c == -1) { /* str contains no alnum */
3032  s = e;
3033  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3034  enum neighbor_char neighbor;
3035  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3036  neighbor = enc_succ_char(s, l, enc);
3037  if (neighbor == NEIGHBOR_FOUND)
3038  return str;
3039  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3040  /* wrapped to \0...\0. search next valid char. */
3041  enc_succ_char(s, l, enc);
3042  }
3043  if (!rb_enc_asciicompat(enc)) {
3044  MEMCPY(carry, s, char, l);
3045  carry_len = l;
3046  }
3047  carry_pos = s - sbeg;
3048  }
3049  }
3050  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3051  s = RSTRING_PTR(str) + carry_pos;
3052  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3053  memmove(s, carry, carry_len);
3054  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3055  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3056  rb_enc_str_coderange(str);
3057  return str;
3058 }
3059 
3060 
3061 /*
3062  * call-seq:
3063  * str.succ! -> str
3064  * str.next! -> str
3065  *
3066  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3067  * place.
3068  */
3069 
3070 static VALUE
3072 {
3074 
3075  return str;
3076 }
3077 
3078 
3079 /*
3080  * call-seq:
3081  * str.upto(other_str, exclusive=false) {|s| block } -> str
3082  * str.upto(other_str, exclusive=false) -> an_enumerator
3083  *
3084  * Iterates through successive values, starting at <i>str</i> and
3085  * ending at <i>other_str</i> inclusive, passing each value in turn to
3086  * the block. The <code>String#succ</code> method is used to generate
3087  * each value. If optional second argument exclusive is omitted or is false,
3088  * the last value will be included; otherwise it will be excluded.
3089  *
3090  * If no block is given, an enumerator is returned instead.
3091  *
3092  * "a8".upto("b6") {|s| print s, ' ' }
3093  * for s in "a8".."b6"
3094  * print s, ' '
3095  * end
3096  *
3097  * <em>produces:</em>
3098  *
3099  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3100  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3101  *
3102  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3103  * both are recognized as decimal numbers. In addition, the width of
3104  * string (e.g. leading zeros) is handled appropriately.
3105  *
3106  * "9".upto("11").to_a #=> ["9", "10", "11"]
3107  * "25".upto("5").to_a #=> []
3108  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3109  */
3110 
3111 static VALUE
3113 {
3114  VALUE end, exclusive;
3115  VALUE current, after_end;
3116  ID succ;
3117  int n, excl, ascii;
3118  rb_encoding *enc;
3119 
3120  rb_scan_args(argc, argv, "11", &end, &exclusive);
3121  RETURN_ENUMERATOR(beg, argc, argv);
3122  excl = RTEST(exclusive);
3123  CONST_ID(succ, "succ");
3124  StringValue(end);
3125  enc = rb_enc_check(beg, end);
3126  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3127  /* single character */
3128  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3129  char c = RSTRING_PTR(beg)[0];
3130  char e = RSTRING_PTR(end)[0];
3131 
3132  if (c > e || (excl && c == e)) return beg;
3133  for (;;) {
3134  rb_yield(rb_enc_str_new(&c, 1, enc));
3135  if (!excl && c == e) break;
3136  c++;
3137  if (excl && c == e) break;
3138  }
3139  return beg;
3140  }
3141  /* both edges are all digits */
3142  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3143  char *s, *send;
3144  VALUE b, e;
3145  int width;
3146 
3147  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3148  width = rb_long2int(send - s);
3149  while (s < send) {
3150  if (!ISDIGIT(*s)) goto no_digits;
3151  s++;
3152  }
3153  s = RSTRING_PTR(end); send = RSTRING_END(end);
3154  while (s < send) {
3155  if (!ISDIGIT(*s)) goto no_digits;
3156  s++;
3157  }
3158  b = rb_str_to_inum(beg, 10, FALSE);
3159  e = rb_str_to_inum(end, 10, FALSE);
3160  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3161  long bi = FIX2LONG(b);
3162  long ei = FIX2LONG(e);
3163  rb_encoding *usascii = rb_usascii_encoding();
3164 
3165  while (bi <= ei) {
3166  if (excl && bi == ei) break;
3167  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3168  bi++;
3169  }
3170  }
3171  else {
3172  ID op = excl ? '<' : rb_intern("<=");
3174 
3175  args[0] = INT2FIX(width);
3176  while (rb_funcall(b, op, 1, e)) {
3177  args[1] = b;
3178  rb_yield(rb_str_format(numberof(args), args, fmt));
3179  b = rb_funcall(b, succ, 0, 0);
3180  }
3181  }
3182  return beg;
3183  }
3184  /* normal case */
3185  no_digits:
3186  n = rb_str_cmp(beg, end);
3187  if (n > 0 || (excl && n == 0)) return beg;
3188 
3189  after_end = rb_funcall(end, succ, 0, 0);
3190  current = rb_str_dup(beg);
3191  while (!rb_str_equal(current, after_end)) {
3192  VALUE next = Qnil;
3193  if (excl || !rb_str_equal(current, end))
3194  next = rb_funcall(current, succ, 0, 0);
3195  rb_yield(current);
3196  if (NIL_P(next)) break;
3197  current = next;
3198  StringValue(current);
3199  if (excl && rb_str_equal(current, end)) break;
3200  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3201  break;
3202  }
3203 
3204  return beg;
3205 }
3206 
3207 static VALUE
3208 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3209 {
3210  if (rb_reg_search(re, str, 0, 0) >= 0) {
3212  int nth = rb_reg_backref_number(match, backref);
3213  return rb_reg_nth_match(nth, match);
3214  }
3215  return Qnil;
3216 }
3217 
3218 static VALUE
3220 {
3221  long idx;
3222 
3223  if (FIXNUM_P(indx)) {
3224  idx = FIX2LONG(indx);
3225 
3226  num_index:
3227  str = rb_str_substr(str, idx, 1);
3228  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3229  return str;
3230  }
3231 
3232  if (SPECIAL_CONST_P(indx)) goto generic;
3233  switch (BUILTIN_TYPE(indx)) {
3234  case T_REGEXP:
3235  return rb_str_subpat(str, indx, INT2FIX(0));
3236 
3237  case T_STRING:
3238  if (rb_str_index(str, indx, 0) != -1)
3239  return rb_str_dup(indx);
3240  return Qnil;
3241 
3242  generic:
3243  default:
3244  /* check if indx is Range */
3245  {
3246  long beg, len;
3247  VALUE tmp;
3248 
3249  len = str_strlen(str, STR_ENC_GET(str));
3250  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3251  case Qfalse:
3252  break;
3253  case Qnil:
3254  return Qnil;
3255  default:
3256  tmp = rb_str_substr(str, beg, len);
3257  return tmp;
3258  }
3259  }
3260  idx = NUM2LONG(indx);
3261  goto num_index;
3262  }
3263 
3264  UNREACHABLE;
3265 }
3266 
3267 
3268 /*
3269  * call-seq:
3270  * str[index] -> new_str or nil
3271  * str[start, length] -> new_str or nil
3272  * str[range] -> new_str or nil
3273  * str[regexp] -> new_str or nil
3274  * str[regexp, capture] -> new_str or nil
3275  * str[match_str] -> new_str or nil
3276  * str.slice(index) -> new_str or nil
3277  * str.slice(start, length) -> new_str or nil
3278  * str.slice(range) -> new_str or nil
3279  * str.slice(regexp) -> new_str or nil
3280  * str.slice(regexp, capture) -> new_str or nil
3281  * str.slice(match_str) -> new_str or nil
3282  *
3283  * Element Reference --- If passed a single +index+, returns a substring of
3284  * one character at that index. If passed a +start+ index and a +length+,
3285  * returns a substring containing +length+ characters starting at the
3286  * +index+. If passed a +range+, its beginning and end are interpreted as
3287  * offsets delimiting the substring to be returned.
3288  *
3289  * In these three cases, if an index is negative, it is counted from the end
3290  * of the string. For the +start+ and +range+ cases the starting index
3291  * is just before a character and an index matching the string's size.
3292  * Additionally, an empty string is returned when the starting index for a
3293  * character range is at the end of the string.
3294  *
3295  * Returns +nil+ if the initial index falls outside the string or the length
3296  * is negative.
3297  *
3298  * If a +Regexp+ is supplied, the matching portion of the string is
3299  * returned. If a +capture+ follows the regular expression, which may be a
3300  * capture group index or name, follows the regular expression that component
3301  * of the MatchData is returned instead.
3302  *
3303  * If a +match_str+ is given, that string is returned if it occurs in
3304  * the string.
3305  *
3306  * Returns +nil+ if the regular expression does not match or the match string
3307  * cannot be found.
3308  *
3309  * a = "hello there"
3310  *
3311  * a[1] #=> "e"
3312  * a[2, 3] #=> "llo"
3313  * a[2..3] #=> "ll"
3314  *
3315  * a[-3, 2] #=> "er"
3316  * a[7..-2] #=> "her"
3317  * a[-4..-2] #=> "her"
3318  * a[-2..-4] #=> ""
3319  *
3320  * a[11, 0] #=> ""
3321  * a[11] #=> nil
3322  * a[12, 0] #=> nil
3323  * a[12..-1] #=> nil
3324  *
3325  * a[/[aeiou](.)\1/] #=> "ell"
3326  * a[/[aeiou](.)\1/, 0] #=> "ell"
3327  * a[/[aeiou](.)\1/, 1] #=> "l"
3328  * a[/[aeiou](.)\1/, 2] #=> nil
3329  *
3330  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3331  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3332  *
3333  * a["lo"] #=> "lo"
3334  * a["bye"] #=> nil
3335  */
3336 
3337 static VALUE
3339 {
3340  if (argc == 2) {
3341  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3342  return rb_str_subpat(str, argv[0], argv[1]);
3343  }
3344  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3345  }
3346  rb_check_arity(argc, 1, 2);
3347  return rb_str_aref(str, argv[0]);
3348 }
3349 
3350 VALUE
3351 rb_str_drop_bytes(VALUE str, long len)
3352 {
3353  char *ptr = RSTRING_PTR(str);
3354  long olen = RSTRING_LEN(str), nlen;
3355 
3356  str_modifiable(str);
3357  if (len > olen) len = olen;
3358  nlen = olen - len;
3359  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3360  char *oldptr = ptr;
3361  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3362  STR_SET_EMBED(str);
3363  STR_SET_EMBED_LEN(str, nlen);
3364  ptr = RSTRING(str)->as.ary;
3365  memmove(ptr, oldptr + len, nlen);
3366  if (fl == STR_NOEMBED) xfree(oldptr);
3367  }
3368  else {
3369  if (!STR_SHARED_P(str)) rb_str_new4(str);
3370  ptr = RSTRING(str)->as.heap.ptr += len;
3371  RSTRING(str)->as.heap.len = nlen;
3372  }
3373  ptr[nlen] = 0;
3374  ENC_CODERANGE_CLEAR(str);
3375  return str;
3376 }
3377 
3378 static void
3379 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3380 {
3381  if (beg == 0 && RSTRING_LEN(val) == 0) {
3382  rb_str_drop_bytes(str, len);
3383  OBJ_INFECT(str, val);
3384  return;
3385  }
3386 
3387  rb_str_modify(str);
3388  if (len < RSTRING_LEN(val)) {
3389  /* expand string */
3390  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
3391  }
3392 
3393  if (RSTRING_LEN(val) != len) {
3394  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3395  RSTRING_PTR(str) + beg + len,
3396  RSTRING_LEN(str) - (beg + len));
3397  }
3398  if (RSTRING_LEN(val) < beg && len < 0) {
3399  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3400  }
3401  if (RSTRING_LEN(val) > 0) {
3402  memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val));
3403  }
3404  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3405  if (RSTRING_PTR(str)) {
3406  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3407  }
3408  OBJ_INFECT(str, val);
3409 }
3410 
3411 static void
3412 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3413 {
3414  long slen;
3415  char *p, *e;
3416  rb_encoding *enc;
3417  int singlebyte = single_byte_optimizable(str);
3418  int cr;
3419 
3420  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3421 
3422  StringValue(val);
3423  enc = rb_enc_check(str, val);
3424  slen = str_strlen(str, enc);
3425 
3426  if (slen < beg) {
3427  out_of_range:
3428  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3429  }
3430  if (beg < 0) {
3431  if (-beg > slen) {
3432  goto out_of_range;
3433  }
3434  beg += slen;
3435  }
3436  if (slen < len || slen < beg + len) {
3437  len = slen - beg;
3438  }
3439  str_modify_keep_cr(str);
3440  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3441  if (!p) p = RSTRING_END(str);
3442  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3443  if (!e) e = RSTRING_END(str);
3444  /* error check */
3445  beg = p - RSTRING_PTR(str); /* physical position */
3446  len = e - p; /* physical length */
3447  rb_str_splice_0(str, beg, len, val);
3448  rb_enc_associate(str, enc);
3450  if (cr != ENC_CODERANGE_BROKEN)
3451  ENC_CODERANGE_SET(str, cr);
3452 }
3453 
3454 void
3455 rb_str_update(VALUE str, long beg, long len, VALUE val)
3456 {
3457  rb_str_splice(str, beg, len, val);
3458 }
3459 
3460 static void
3462 {
3463  int nth;
3464  VALUE match;
3465  long start, end, len;
3466  rb_encoding *enc;
3467  struct re_registers *regs;
3468 
3469  if (rb_reg_search(re, str, 0, 0) < 0) {
3470  rb_raise(rb_eIndexError, "regexp not matched");
3471  }
3472  match = rb_backref_get();
3473  nth = rb_reg_backref_number(match, backref);
3474  regs = RMATCH_REGS(match);
3475  if (nth >= regs->num_regs) {
3476  out_of_range:
3477  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3478  }
3479  if (nth < 0) {
3480  if (-nth >= regs->num_regs) {
3481  goto out_of_range;
3482  }
3483  nth += regs->num_regs;
3484  }
3485 
3486  start = BEG(nth);
3487  if (start == -1) {
3488  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3489  }
3490  end = END(nth);
3491  len = end - start;
3492  StringValue(val);
3493  enc = rb_enc_check(str, val);
3494  rb_str_splice_0(str, start, len, val);
3495  rb_enc_associate(str, enc);
3496 }
3497 
3498 static VALUE
3500 {
3501  long idx, beg;
3502 
3503  if (FIXNUM_P(indx)) {
3504  idx = FIX2LONG(indx);
3505  num_index:
3506  rb_str_splice(str, idx, 1, val);
3507  return val;
3508  }
3509 
3510  if (SPECIAL_CONST_P(indx)) goto generic;
3511  switch (TYPE(indx)) {
3512  case T_REGEXP:
3513  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3514  return val;
3515 
3516  case T_STRING:
3517  beg = rb_str_index(str, indx, 0);
3518  if (beg < 0) {
3519  rb_raise(rb_eIndexError, "string not matched");
3520  }
3521  beg = rb_str_sublen(str, beg);
3522  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3523  return val;
3524 
3525  generic:
3526  default:
3527  /* check if indx is Range */
3528  {
3529  long beg, len;
3530  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3531  rb_str_splice(str, beg, len, val);
3532  return val;
3533  }
3534  }
3535  idx = NUM2LONG(indx);
3536  goto num_index;
3537  }
3538 }
3539 
3540 /*
3541  * call-seq:
3542  * str[fixnum] = new_str
3543  * str[fixnum, fixnum] = new_str
3544  * str[range] = aString
3545  * str[regexp] = new_str
3546  * str[regexp, fixnum] = new_str
3547  * str[regexp, name] = new_str
3548  * str[other_str] = new_str
3549  *
3550  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3551  * portion of the string affected is determined using the same criteria as
3552  * <code>String#[]</code>. If the replacement string is not the same length as
3553  * the text it is replacing, the string will be adjusted accordingly. If the
3554  * regular expression or string is used as the index doesn't match a position
3555  * in the string, <code>IndexError</code> is raised. If the regular expression
3556  * form is used, the optional second <code>Fixnum</code> allows you to specify
3557  * which portion of the match to replace (effectively using the
3558  * <code>MatchData</code> indexing rules. The forms that take a
3559  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3560  * out of range; the <code>Range</code> form will raise a
3561  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3562  * will raise an <code>IndexError</code> on negative match.
3563  */
3564 
3565 static VALUE
3567 {
3568  if (argc == 3) {
3569  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3570  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3571  }
3572  else {
3573  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3574  }
3575  return argv[2];
3576  }
3577  rb_check_arity(argc, 2, 3);
3578  return rb_str_aset(str, argv[0], argv[1]);
3579 }
3580 
3581 /*
3582  * call-seq:
3583  * str.insert(index, other_str) -> str
3584  *
3585  * Inserts <i>other_str</i> before the character at the given
3586  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3587  * end of the string, and insert <em>after</em> the given character.
3588  * The intent is insert <i>aString</i> so that it starts at the given
3589  * <i>index</i>.
3590  *
3591  * "abcd".insert(0, 'X') #=> "Xabcd"
3592  * "abcd".insert(3, 'X') #=> "abcXd"
3593  * "abcd".insert(4, 'X') #=> "abcdX"
3594  * "abcd".insert(-3, 'X') #=> "abXcd"
3595  * "abcd".insert(-1, 'X') #=> "abcdX"
3596  */
3597 
3598 static VALUE
3600 {
3601  long pos = NUM2LONG(idx);
3602 
3603  if (pos == -1) {
3604  return rb_str_append(str, str2);
3605  }
3606  else if (pos < 0) {
3607  pos++;
3608  }
3609  rb_str_splice(str, pos, 0, str2);
3610  return str;
3611 }
3612 
3613 
3614 /*
3615  * call-seq:
3616  * str.slice!(fixnum) -> fixnum or nil
3617  * str.slice!(fixnum, fixnum) -> new_str or nil
3618  * str.slice!(range) -> new_str or nil
3619  * str.slice!(regexp) -> new_str or nil
3620  * str.slice!(other_str) -> new_str or nil
3621  *
3622  * Deletes the specified portion from <i>str</i>, and returns the portion
3623  * deleted.
3624  *
3625  * string = "this is a string"
3626  * string.slice!(2) #=> "i"
3627  * string.slice!(3..6) #=> " is "
3628  * string.slice!(/s.*t/) #=> "sa st"
3629  * string.slice!("r") #=> "r"
3630  * string #=> "thing"
3631  */
3632 
3633 static VALUE
3635 {
3636  VALUE result;
3637  VALUE buf[3];
3638  int i;
3639 
3640  rb_check_arity(argc, 1, 2);
3641  for (i=0; i<argc; i++) {
3642  buf[i] = argv[i];
3643  }
3644  str_modify_keep_cr(str);
3645  result = rb_str_aref_m(argc, buf, str);
3646  if (!NIL_P(result)) {
3647  buf[i] = rb_str_new(0,0);
3648  rb_str_aset_m(argc+1, buf, str);
3649  }
3650  return result;
3651 }
3652 
3653 static VALUE
3654 get_pat(VALUE pat, int quote)
3655 {
3656  VALUE val;
3657 
3658  switch (TYPE(pat)) {
3659  case T_REGEXP:
3660  return pat;
3661 
3662  case T_STRING:
3663  break;
3664 
3665  default:
3666  val = rb_check_string_type(pat);
3667  if (NIL_P(val)) {
3668  Check_Type(pat, T_REGEXP);
3669  }
3670  pat = val;
3671  }
3672 
3673  if (quote) {
3674  pat = rb_reg_quote(pat);
3675  }
3676 
3677  return rb_reg_regcomp(pat);
3678 }
3679 
3680 
3681 /*
3682  * call-seq:
3683  * str.sub!(pattern, replacement) -> str or nil
3684  * str.sub!(pattern) {|match| block } -> str or nil
3685  *
3686  * Performs the same substitution as String#sub in-place.
3687  *
3688  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3689  * was performed.
3690  */
3691 
3692 static VALUE
3694 {
3695  VALUE pat, repl, hash = Qnil;
3696  int iter = 0;
3697  int tainted = 0;
3698  int untrusted = 0;
3699  long plen;
3700  int min_arity = rb_block_given_p() ? 1 : 2;
3701 
3702  rb_check_arity(argc, min_arity, 2);
3703  if (argc == 1) {
3704  iter = 1;
3705  }
3706  else {
3707  repl = argv[1];
3708  hash = rb_check_hash_type(argv[1]);
3709  if (NIL_P(hash)) {
3710  StringValue(repl);
3711  }
3712  if (OBJ_TAINTED(repl)) tainted = 1;
3713  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3714  }
3715 
3716  pat = get_pat(argv[0], 1);
3717  str_modifiable(str);
3718  if (rb_reg_search(pat, str, 0, 0) >= 0) {
3719  rb_encoding *enc;
3720  int cr = ENC_CODERANGE(str);
3722  struct re_registers *regs = RMATCH_REGS(match);
3723  long beg0 = BEG(0);
3724  long end0 = END(0);
3725  char *p, *rp;
3726  long len, rlen;
3727 
3728  if (iter || !NIL_P(hash)) {
3729  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3730 
3731  if (iter) {
3732  repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3733  }
3734  else {
3735  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
3736  repl = rb_obj_as_string(repl);
3737  }
3738  str_mod_check(str, p, len);
3739  rb_check_frozen(str);
3740  }
3741  else {
3742  repl = rb_reg_regsub(repl, str, regs, pat);
3743  }
3744  enc = rb_enc_compatible(str, repl);
3745  if (!enc) {
3747  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3748  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3749  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3750  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
3751  rb_enc_name(str_enc),
3752  rb_enc_name(STR_ENC_GET(repl)));
3753  }
3754  enc = STR_ENC_GET(repl);
3755  }
3756  rb_str_modify(str);
3757  rb_enc_associate(str, enc);
3758  if (OBJ_TAINTED(repl)) tainted = 1;
3759  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3760  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
3761  int cr2 = ENC_CODERANGE(repl);
3762  if (cr2 == ENC_CODERANGE_BROKEN ||
3763  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
3764  cr = ENC_CODERANGE_UNKNOWN;
3765  else
3766  cr = cr2;
3767  }
3768  plen = end0 - beg0;
3769  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
3770  len = RSTRING_LEN(str);
3771  if (rlen > plen) {
3772  RESIZE_CAPA(str, len + rlen - plen);
3773  }
3774  p = RSTRING_PTR(str);
3775  if (rlen != plen) {
3776  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
3777  }
3778  memcpy(p + beg0, rp, rlen);
3779  len += rlen - plen;
3780  STR_SET_LEN(str, len);
3781  RSTRING_PTR(str)[len] = '\0';
3782  ENC_CODERANGE_SET(str, cr);
3783  if (tainted) OBJ_TAINT(str);
3784  if (untrusted) OBJ_UNTRUST(str);
3785 
3786  return str;
3787  }
3788  return Qnil;
3789 }
3790 
3791 
3792 /*
3793  * call-seq:
3794  * str.sub(pattern, replacement) -> new_str
3795  * str.sub(pattern, hash) -> new_str
3796  * str.sub(pattern) {|match| block } -> new_str
3797  *
3798  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
3799  * replaced by the second argument. The +pattern+ is typically a Regexp; if
3800  * given as a String, any regular expression metacharacters it contains will
3801  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
3802  * followed by 'd', instead of a digit.
3803  *
3804  * If +replacement+ is a String it will be substituted for the matched text.
3805  * It may contain back-references to the pattern's capture groups of the form
3806  * <code>"\\d"</code>, where <i>d</i> is a group number, or
3807  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
3808  * double-quoted string, both back-references must be preceded by an
3809  * additional backslash. However, within +replacement+ the special match
3810  * variables, such as <code>&$</code>, will not refer to the current match.
3811  *
3812  * If the second argument is a Hash, and the matched text is one of its keys,
3813  * the corresponding value is the replacement string.
3814  *
3815  * In the block form, the current match string is passed in as a parameter,
3816  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3817  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3818  * returned by the block will be substituted for the match on each call.
3819  *
3820  * The result inherits any tainting in the original string or any supplied
3821  * replacement string.
3822  *
3823  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
3824  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
3825  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
3826  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
3827  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
3828  * #=> "Is /bin/bash your preferred shell?"
3829  */
3830 
3831 static VALUE
3833 {
3834  str = rb_str_dup(str);
3835  rb_str_sub_bang(argc, argv, str);
3836  return str;
3837 }
3838 
3839 static VALUE
3840 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
3841 {
3842  VALUE pat, val, repl, match, dest, hash = Qnil;
3843  struct re_registers *regs;
3844  long beg, n;
3845  long beg0, end0;
3846  long offset, blen, slen, len, last;
3847  int iter = 0;
3848  char *sp, *cp;
3849  int tainted = 0;
3851 
3852  switch (argc) {
3853  case 1:
3854  RETURN_ENUMERATOR(str, argc, argv);
3855  iter = 1;
3856  break;
3857  case 2:
3858  repl = argv[1];
3859  hash = rb_check_hash_type(argv[1]);
3860  if (NIL_P(hash)) {
3861  StringValue(repl);
3862  }
3863  if (OBJ_TAINTED(repl)) tainted = 1;
3864  break;
3865  default:
3866  rb_check_arity(argc, 1, 2);
3867  }
3868 
3869  pat = get_pat(argv[0], 1);
3870  beg = rb_reg_search(pat, str, 0, 0);
3871  if (beg < 0) {
3872  if (bang) return Qnil; /* no match, no substitution */
3873  return rb_str_dup(str);
3874  }
3875 
3876  offset = 0;
3877  n = 0;
3878  blen = RSTRING_LEN(str) + 30; /* len + margin */
3879  dest = rb_str_buf_new(blen);
3880  sp = RSTRING_PTR(str);
3881  slen = RSTRING_LEN(str);
3882  cp = sp;
3883  str_enc = STR_ENC_GET(str);
3884  rb_enc_associate(dest, str_enc);
3886 
3887  do {
3888  n++;
3889  match = rb_backref_get();
3890  regs = RMATCH_REGS(match);
3891  beg0 = BEG(0);
3892  end0 = END(0);
3893  if (iter || !NIL_P(hash)) {
3894  if (iter) {
3895  val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3896  }
3897  else {
3898  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
3899  val = rb_obj_as_string(val);
3900  }
3901  str_mod_check(str, sp, slen);
3902  if (val == dest) { /* paranoid check [ruby-dev:24827] */
3903  rb_raise(rb_eRuntimeError, "block should not cheat");
3904  }
3905  }
3906  else {
3907  val = rb_reg_regsub(repl, str, regs, pat);
3908  }
3909 
3910  if (OBJ_TAINTED(val)) tainted = 1;
3911 
3912  len = beg0 - offset; /* copy pre-match substr */
3913  if (len) {
3914  rb_enc_str_buf_cat(dest, cp, len, str_enc);
3915  }
3916 
3917  rb_str_buf_append(dest, val);
3918 
3919  last = offset;
3920  offset = end0;
3921  if (beg0 == end0) {
3922  /*
3923  * Always consume at least one character of the input string
3924  * in order to prevent infinite loops.
3925  */
3926  if (RSTRING_LEN(str) <= end0) break;
3927  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
3928  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
3929  offset = end0 + len;
3930  }
3931  cp = RSTRING_PTR(str) + offset;
3932  if (offset > RSTRING_LEN(str)) break;
3933  beg = rb_reg_search(pat, str, offset, 0);
3934  } while (beg >= 0);
3935  if (RSTRING_LEN(str) > offset) {
3936  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
3937  }
3938  rb_reg_search(pat, str, last, 0);
3939  if (bang) {
3940  rb_str_shared_replace(str, dest);
3941  }
3942  else {
3943  RBASIC(dest)->klass = rb_obj_class(str);
3944  OBJ_INFECT(dest, str);
3945  str = dest;
3946  }
3947 
3948  if (tainted) OBJ_TAINT(str);
3949  return str;
3950 }
3951 
3952 
3953 /*
3954  * call-seq:
3955  * str.gsub!(pattern, replacement) -> str or nil
3956  * str.gsub!(pattern) {|match| block } -> str or nil
3957  * str.gsub!(pattern) -> an_enumerator
3958  *
3959  * Performs the substitutions of <code>String#gsub</code> in place, returning
3960  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
3961  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
3962  */
3963 
3964 static VALUE
3966 {
3967  str_modify_keep_cr(str);
3968  return str_gsub(argc, argv, str, 1);
3969 }
3970 
3971 
3972 /*
3973  * call-seq:
3974  * str.gsub(pattern, replacement) -> new_str
3975  * str.gsub(pattern, hash) -> new_str
3976  * str.gsub(pattern) {|match| block } -> new_str
3977  * str.gsub(pattern) -> enumerator
3978  *
3979  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
3980  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
3981  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
3982  * regular expression metacharacters it contains will be interpreted
3983  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
3984  * instead of a digit.
3985  *
3986  * If <i>replacement</i> is a <code>String</code> it will be substituted for
3987  * the matched text. It may contain back-references to the pattern's capture
3988  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
3989  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
3990  * double-quoted string, both back-references must be preceded by an
3991  * additional backslash. However, within <i>replacement</i> the special match
3992  * variables, such as <code>$&</code>, will not refer to the current match.
3993  *
3994  * If the second argument is a <code>Hash</code>, and the matched text is one
3995  * of its keys, the corresponding value is the replacement string.
3996  *
3997  * In the block form, the current match string is passed in as a parameter,
3998  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3999  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
4000  * returned by the block will be substituted for the match on each call.
4001  *
4002  * The result inherits any tainting in the original string or any supplied
4003  * replacement string.
4004  *
4005  * When neither a block nor a second argument is supplied, an
4006  * <code>Enumerator</code> is returned.
4007  *
4008  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
4009  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
4010  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
4011  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4012  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4013  */
4014 
4015 static VALUE
4017 {
4018  return str_gsub(argc, argv, str, 0);
4019 }
4020 
4021 
4022 /*
4023  * call-seq:
4024  * str.replace(other_str) -> str
4025  *
4026  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4027  * values in <i>other_str</i>.
4028  *
4029  * s = "hello" #=> "hello"
4030  * s.replace "world" #=> "world"
4031  */
4032 
4033 VALUE
4035 {
4036  str_modifiable(str);
4037  if (str == str2) return str;
4038 
4039  StringValue(str2);
4040  str_discard(str);
4041  return str_replace(str, str2);
4042 }
4043 
4044 /*
4045  * call-seq:
4046  * string.clear -> string
4047  *
4048  * Makes string empty.
4049  *
4050  * a = "abcde"
4051  * a.clear #=> ""
4052  */
4053 
4054 static VALUE
4056 {
4057  str_discard(str);
4058  STR_SET_EMBED(str);
4059  STR_SET_EMBED_LEN(str, 0);
4060  RSTRING_PTR(str)[0] = 0;
4061  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4063  else
4065  return str;
4066 }
4067 
4068 /*
4069  * call-seq:
4070  * string.chr -> string
4071  *
4072  * Returns a one-character string at the beginning of the string.
4073  *
4074  * a = "abcde"
4075  * a.chr #=> "a"
4076  */
4077 
4078 static VALUE
4080 {
4081  return rb_str_substr(str, 0, 1);
4082 }
4083 
4084 /*
4085  * call-seq:
4086  * str.getbyte(index) -> 0 .. 255
4087  *
4088  * returns the <i>index</i>th byte as an integer.
4089  */
4090 static VALUE
4092 {
4093  long pos = NUM2LONG(index);
4094 
4095  if (pos < 0)
4096  pos += RSTRING_LEN(str);
4097  if (pos < 0 || RSTRING_LEN(str) <= pos)
4098  return Qnil;
4099 
4100  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4101 }
4102 
4103 /*
4104  * call-seq:
4105  * str.setbyte(index, integer) -> integer
4106  *
4107  * modifies the <i>index</i>th byte as <i>integer</i>.
4108  */
4109 static VALUE
4111 {
4112  long pos = NUM2LONG(index);
4113  int byte = NUM2INT(value);
4114 
4115  rb_str_modify(str);
4116 
4117  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4118  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4119  if (pos < 0)
4120  pos += RSTRING_LEN(str);
4121 
4122  RSTRING_PTR(str)[pos] = byte;
4123 
4124  return value;
4125 }
4126 
4127 static VALUE
4128 str_byte_substr(VALUE str, long beg, long len)
4129 {
4130  char *p, *s = RSTRING_PTR(str);
4131  long n = RSTRING_LEN(str);
4132  VALUE str2;
4133 
4134  if (beg > n || len < 0) return Qnil;
4135  if (beg < 0) {
4136  beg += n;
4137  if (beg < 0) return Qnil;
4138  }
4139  if (beg + len > n)
4140  len = n - beg;
4141  if (len <= 0) {
4142  len = 0;
4143  p = 0;
4144  }
4145  else
4146  p = s + beg;
4147 
4148  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4149  str2 = rb_str_new4(str);
4150  str2 = str_new3(rb_obj_class(str2), str2);
4151  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4152  RSTRING(str2)->as.heap.len = len;
4153  }
4154  else {
4155  str2 = rb_str_new5(str, p, len);
4156  }
4157 
4158  str_enc_copy(str2, str);
4159 
4160  if (RSTRING_LEN(str2) == 0) {
4161  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4163  else
4165  }
4166  else {
4167  switch (ENC_CODERANGE(str)) {
4168  case ENC_CODERANGE_7BIT:
4170  break;
4171  default:
4173  break;
4174  }
4175  }
4176 
4177  OBJ_INFECT(str2, str);
4178 
4179  return str2;
4180 }
4181 
4182 static VALUE
4184 {
4185  long idx;
4186  switch (TYPE(indx)) {
4187  case T_FIXNUM:
4188  idx = FIX2LONG(indx);
4189 
4190  num_index:
4191  str = str_byte_substr(str, idx, 1);
4192  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4193  return str;
4194 
4195  default:
4196  /* check if indx is Range */
4197  {
4198  long beg, len = RSTRING_LEN(str);
4199 
4200  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4201  case Qfalse:
4202  break;
4203  case Qnil:
4204  return Qnil;
4205  default:
4206  return str_byte_substr(str, beg, len);
4207  }
4208  }
4209  idx = NUM2LONG(indx);
4210  goto num_index;
4211  }
4212 
4213  UNREACHABLE;
4214 }
4215 
4216 /*
4217  * call-seq:
4218  * str.byteslice(fixnum) -> new_str or nil
4219  * str.byteslice(fixnum, fixnum) -> new_str or nil
4220  * str.byteslice(range) -> new_str or nil
4221  *
4222  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4223  * substring of one byte at that position. If passed two <code>Fixnum</code>
4224  * objects, returns a substring starting at the offset given by the first, and
4225  * a length given by the second. If given a <code>Range</code>, a substring containing
4226  * bytes at offsets given by the range is returned. In all three cases, if
4227  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4228  * <code>nil</code> if the initial offset falls outside the string, the length
4229  * is negative, or the beginning of the range is greater than the end.
4230  * The encoding of the resulted string keeps original encoding.
4231  *
4232  * "hello".byteslice(1) #=> "e"
4233  * "hello".byteslice(-1) #=> "o"
4234  * "hello".byteslice(1, 2) #=> "el"
4235  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4236  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4237  */
4238 
4239 static VALUE
4241 {
4242  if (argc == 2) {
4243  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4244  }
4245  rb_check_arity(argc, 1, 2);
4246  return str_byte_aref(str, argv[0]);
4247 }
4248 
4249 /*
4250  * call-seq:
4251  * str.reverse -> new_str
4252  *
4253  * Returns a new string with the characters from <i>str</i> in reverse order.
4254  *
4255  * "stressed".reverse #=> "desserts"
4256  */
4257 
4258 static VALUE
4260 {
4261  rb_encoding *enc;
4262  VALUE rev;
4263  char *s, *e, *p;
4264  int single = 1;
4265 
4266  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4267  enc = STR_ENC_GET(str);
4268  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4269  s = RSTRING_PTR(str); e = RSTRING_END(str);
4270  p = RSTRING_END(rev);
4271 
4272  if (RSTRING_LEN(str) > 1) {
4273  if (single_byte_optimizable(str)) {
4274  while (s < e) {
4275  *--p = *s++;
4276  }
4277  }
4278  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
4279  while (s < e) {
4280  int clen = rb_enc_fast_mbclen(s, e, enc);
4281 
4282  if (clen > 1 || (*s & 0x80)) single = 0;
4283  p -= clen;
4284  memcpy(p, s, clen);
4285  s += clen;
4286  }
4287  }
4288  else {
4289  while (s < e) {
4290  int clen = rb_enc_mbclen(s, e, enc);
4291 
4292  if (clen > 1 || (*s & 0x80)) single = 0;
4293  p -= clen;
4294  memcpy(p, s, clen);
4295  s += clen;
4296  }
4297  }
4298  }
4299  STR_SET_LEN(rev, RSTRING_LEN(str));
4300  OBJ_INFECT(rev, str);
4301  if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
4302  if (single) {
4304  }
4305  else {
4307  }
4308  }
4310 
4311  return rev;
4312 }
4313 
4314 
4315 /*
4316  * call-seq:
4317  * str.reverse! -> str
4318  *
4319  * Reverses <i>str</i> in place.
4320  */
4321 
4322 static VALUE
4324 {
4325  if (RSTRING_LEN(str) > 1) {
4326  if (single_byte_optimizable(str)) {
4327  char *s, *e, c;
4328 
4329  str_modify_keep_cr(str);
4330  s = RSTRING_PTR(str);
4331  e = RSTRING_END(str) - 1;
4332  while (s < e) {
4333  c = *s;
4334  *s++ = *e;
4335  *e-- = c;
4336  }
4337  }
4338  else {
4340  }
4341  }
4342  else {
4343  str_modify_keep_cr(str);
4344  }
4345  return str;
4346 }
4347 
4348 
4349 /*
4350  * call-seq:
4351  * str.include? other_str -> true or false
4352  *
4353  * Returns <code>true</code> if <i>str</i> contains the given string or
4354  * character.
4355  *
4356  * "hello".include? "lo" #=> true
4357  * "hello".include? "ol" #=> false
4358  * "hello".include? ?h #=> true
4359  */
4360 
4361 static VALUE
4363 {
4364  long i;
4365 
4366  StringValue(arg);
4367  i = rb_str_index(str, arg, 0);
4368 
4369  if (i == -1) return Qfalse;
4370  return Qtrue;
4371 }
4372 
4373 
4374 /*
4375  * call-seq:
4376  * str.to_i(base=10) -> integer
4377  *
4378  * Returns the result of interpreting leading characters in <i>str</i> as an
4379  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4380  * end of a valid number are ignored. If there is not a valid number at the
4381  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4382  * exception when <i>base</i> is valid.
4383  *
4384  * "12345".to_i #=> 12345
4385  * "99 red balloons".to_i #=> 99
4386  * "0a".to_i #=> 0
4387  * "0a".to_i(16) #=> 10
4388  * "hello".to_i #=> 0
4389  * "1100101".to_i(2) #=> 101
4390  * "1100101".to_i(8) #=> 294977
4391  * "1100101".to_i(10) #=> 1100101
4392  * "1100101".to_i(16) #=> 17826049
4393  */
4394 
4395 static VALUE
4397 {
4398  int base;
4399 
4400  if (argc == 0) base = 10;
4401  else {
4402  VALUE b;
4403 
4404  rb_scan_args(argc, argv, "01", &b);
4405  base = NUM2INT(b);
4406  }
4407  if (base < 0) {
4408  rb_raise(rb_eArgError, "invalid radix %d", base);
4409  }
4410  return rb_str_to_inum(str, base, FALSE);
4411 }
4412 
4413 
4414 /*
4415  * call-seq:
4416  * str.to_f -> float
4417  *
4418  * Returns the result of interpreting leading characters in <i>str</i> as a
4419  * floating point number. Extraneous characters past the end of a valid number
4420  * are ignored. If there is not a valid number at the start of <i>str</i>,
4421  * <code>0.0</code> is returned. This method never raises an exception.
4422  *
4423  * "123.45e1".to_f #=> 1234.5
4424  * "45.67 degrees".to_f #=> 45.67
4425  * "thx1138".to_f #=> 0.0
4426  */
4427 
4428 static VALUE
4430 {
4431  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4432 }
4433 
4434 
4435 /*
4436  * call-seq:
4437  * str.to_s -> str
4438  * str.to_str -> str
4439  *
4440  * Returns the receiver.
4441  */
4442 
4443 static VALUE
4445 {
4446  if (rb_obj_class(str) != rb_cString) {
4447  return str_duplicate(rb_cString, str);
4448  }
4449  return str;
4450 }
4451 
4452 #if 0
4453 static void
4454 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4455 {
4456  char s[RUBY_MAX_CHAR_LEN];
4457  int n = rb_enc_codelen(c, enc);
4458 
4459  rb_enc_mbcput(c, s, enc);
4460  rb_enc_str_buf_cat(str, s, n, enc);
4461 }
4462 #endif
4463 
4464 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4465 
4466 int
4467 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4468 {
4469  char buf[CHAR_ESC_LEN + 1];
4470  int l;
4471 
4472 #if SIZEOF_INT > 4
4473  c &= 0xffffffff;
4474 #endif
4475  if (unicode_p) {
4476  if (c < 0x7F && ISPRINT(c)) {
4477  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4478  }
4479  else if (c < 0x10000) {
4480  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4481  }
4482  else {
4483  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4484  }
4485  }
4486  else {
4487  if (c < 0x100) {
4488  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4489  }
4490  else {
4491  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4492  }
4493  }
4494  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4495  rb_str_buf_cat(result, buf, l);
4496  return l;
4497 }
4498 
4499 /*
4500  * call-seq:
4501  * str.inspect -> string
4502  *
4503  * Returns a printable version of _str_, surrounded by quote marks,
4504  * with special characters escaped.
4505  *
4506  * str = "hello"
4507  * str[3] = "\b"
4508  * str.inspect #=> "\"hel\\bo\""
4509  */
4510 
4511 VALUE
4513 {
4514  rb_encoding *enc = STR_ENC_GET(str);
4515  const char *p, *pend, *prev;
4516  char buf[CHAR_ESC_LEN + 1];
4519  int unicode_p = rb_enc_unicode_p(enc);
4520  int asciicompat = rb_enc_asciicompat(enc);
4521  static rb_encoding *utf16, *utf32;
4522 
4523  if (!utf16) utf16 = rb_enc_find("UTF-16");
4524  if (!utf32) utf32 = rb_enc_find("UTF-32");
4525  if (resenc == NULL) resenc = rb_default_external_encoding();
4526  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4527  rb_enc_associate(result, resenc);
4528  str_buf_cat2(result, "\"");
4529 
4530  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4531  prev = p;
4532  if (enc == utf16) {
4533  const unsigned char *q = (const unsigned char *)p;
4534  if (q[0] == 0xFE && q[1] == 0xFF)
4535  enc = rb_enc_find("UTF-16BE");
4536  else if (q[0] == 0xFF && q[1] == 0xFE)
4537  enc = rb_enc_find("UTF-16LE");
4538  else
4539  unicode_p = 0;
4540  }
4541  else if (enc == utf32) {
4542  const unsigned char *q = (const unsigned char *)p;
4543  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
4544  enc = rb_enc_find("UTF-32BE");
4545  else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
4546  enc = rb_enc_find("UTF-32LE");
4547  else
4548  unicode_p = 0;
4549  }
4550  while (p < pend) {
4551  unsigned int c, cc;
4552  int n;
4553 
4554  n = rb_enc_precise_mbclen(p, pend, enc);
4555  if (!MBCLEN_CHARFOUND_P(n)) {
4556  if (p > prev) str_buf_cat(result, prev, p - prev);
4557  n = rb_enc_mbminlen(enc);
4558  if (pend < p + n)
4559  n = (int)(pend - p);
4560  while (n--) {
4561  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4562  str_buf_cat(result, buf, strlen(buf));
4563  prev = ++p;
4564  }
4565  continue;
4566  }
4567  n = MBCLEN_CHARFOUND_LEN(n);
4568  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4569  p += n;
4570  if ((asciicompat || unicode_p) &&
4571  (c == '"'|| c == '\\' ||
4572  (c == '#' &&
4573  p < pend &&
4575  (cc = rb_enc_codepoint(p,pend,enc),
4576  (cc == '$' || cc == '@' || cc == '{'))))) {
4577  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4578  str_buf_cat2(result, "\\");
4579  if (asciicompat || enc == resenc) {
4580  prev = p - n;
4581  continue;
4582  }
4583  }
4584  switch (c) {
4585  case '\n': cc = 'n'; break;
4586  case '\r': cc = 'r'; break;
4587  case '\t': cc = 't'; break;
4588  case '\f': cc = 'f'; break;
4589  case '\013': cc = 'v'; break;
4590  case '\010': cc = 'b'; break;
4591  case '\007': cc = 'a'; break;
4592  case 033: cc = 'e'; break;
4593  default: cc = 0; break;
4594  }
4595  if (cc) {
4596  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4597  buf[0] = '\\';
4598  buf[1] = (char)cc;
4599  str_buf_cat(result, buf, 2);
4600  prev = p;
4601  continue;
4602  }
4603  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4604  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4605  continue;
4606  }
4607  else {
4608  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4609  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4610  prev = p;
4611  continue;
4612  }
4613  }
4614  if (p > prev) str_buf_cat(result, prev, p - prev);
4615  str_buf_cat2(result, "\"");
4616 
4617  OBJ_INFECT(result, str);
4618  return result;
4619 }
4620 
4621 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4622 
4623 /*
4624  * call-seq:
4625  * str.dump -> new_str
4626  *
4627  * Produces a version of +str+ with all non-printing characters replaced by
4628  * <code>\nnn</code> notation and all special characters escaped.
4629  *
4630  * "hello \n ''".dump #=> "\"hello \\n ''\"
4631  */
4632 
4633 VALUE
4635 {
4636  rb_encoding *enc = rb_enc_get(str);
4637  long len;
4638  const char *p, *pend;
4639  char *q, *qend;
4640  VALUE result;
4641  int u8 = (enc == rb_utf8_encoding());
4642 
4643  len = 2; /* "" */
4644  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4645  while (p < pend) {
4646  unsigned char c = *p++;
4647  switch (c) {
4648  case '"': case '\\':
4649  case '\n': case '\r':
4650  case '\t': case '\f':
4651  case '\013': case '\010': case '\007': case '\033':
4652  len += 2;
4653  break;
4654 
4655  case '#':
4656  len += IS_EVSTR(p, pend) ? 2 : 1;
4657  break;
4658 
4659  default:
4660  if (ISPRINT(c)) {
4661  len++;
4662  }
4663  else {
4664  if (u8) { /* \u{NN} */
4665  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4666  if (MBCLEN_CHARFOUND_P(n-1)) {
4667  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4668  while (cc >>= 4) len++;
4669  len += 5;
4670  p += MBCLEN_CHARFOUND_LEN(n)-1;
4671  break;
4672  }
4673  }
4674  len += 4; /* \xNN */
4675  }
4676  break;
4677  }
4678  }
4679  if (!rb_enc_asciicompat(enc)) {
4680  len += 19; /* ".force_encoding('')" */
4681  len += strlen(enc->name);
4682  }
4683 
4684  result = rb_str_new5(str, 0, len);
4685  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4686  q = RSTRING_PTR(result); qend = q + len + 1;
4687 
4688  *q++ = '"';
4689  while (p < pend) {
4690  unsigned char c = *p++;
4691 
4692  if (c == '"' || c == '\\') {
4693  *q++ = '\\';
4694  *q++ = c;
4695  }
4696  else if (c == '#') {
4697  if (IS_EVSTR(p, pend)) *q++ = '\\';
4698  *q++ = '#';
4699  }
4700  else if (c == '\n') {
4701  *q++ = '\\';
4702  *q++ = 'n';
4703  }
4704  else if (c == '\r') {
4705  *q++ = '\\';
4706  *q++ = 'r';
4707  }
4708  else if (c == '\t') {
4709  *q++ = '\\';
4710  *q++ = 't';
4711  }
4712  else if (c == '\f') {
4713  *q++ = '\\';
4714  *q++ = 'f';
4715  }
4716  else if (c == '\013') {
4717  *q++ = '\\';
4718  *q++ = 'v';
4719  }
4720  else if (c == '\010') {
4721  *q++ = '\\';
4722  *q++ = 'b';
4723  }
4724  else if (c == '\007') {
4725  *q++ = '\\';
4726  *q++ = 'a';
4727  }
4728  else if (c == '\033') {
4729  *q++ = '\\';
4730  *q++ = 'e';
4731  }
4732  else if (ISPRINT(c)) {
4733  *q++ = c;
4734  }
4735  else {
4736  *q++ = '\\';
4737  if (u8) {
4738  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
4739  if (MBCLEN_CHARFOUND_P(n)) {
4740  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4741  p += n;
4742  snprintf(q, qend-q, "u{%x}", cc);
4743  q += strlen(q);
4744  continue;
4745  }
4746  }
4747  snprintf(q, qend-q, "x%02X", c);
4748  q += 3;
4749  }
4750  }
4751  *q++ = '"';
4752  *q = '\0';
4753  if (!rb_enc_asciicompat(enc)) {
4754  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
4755  enc = rb_ascii8bit_encoding();
4756  }
4757  OBJ_INFECT(result, str);
4758  /* result from dump is ASCII */
4759  rb_enc_associate(result, enc);
4761  return result;
4762 }
4763 
4764 
4765 static void
4767 {
4768  if (rb_enc_dummy_p(enc)) {
4769  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
4770  rb_enc_name(enc));
4771  }
4772 }
4773 
4774 /*
4775  * call-seq:
4776  * str.upcase! -> str or nil
4777  *
4778  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
4779  * were made.
4780  * Note: case replacement is effective only in ASCII region.
4781  */
4782 
4783 static VALUE
4785 {
4786  rb_encoding *enc;
4787  char *s, *send;
4788  int modify = 0;
4789  int n;
4790 
4791  str_modify_keep_cr(str);
4792  enc = STR_ENC_GET(str);
4794  s = RSTRING_PTR(str); send = RSTRING_END(str);
4795  if (single_byte_optimizable(str)) {
4796  while (s < send) {
4797  unsigned int c = *(unsigned char*)s;
4798 
4799  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4800  *s = 'A' + (c - 'a');
4801  modify = 1;
4802  }
4803  s++;
4804  }
4805  }
4806  else {
4807  int ascompat = rb_enc_asciicompat(enc);
4808 
4809  while (s < send) {
4810  unsigned int c;
4811 
4812  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4813  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4814  *s = 'A' + (c - 'a');
4815  modify = 1;
4816  }
4817  s++;
4818  }
4819  else {
4820  c = rb_enc_codepoint_len(s, send, &n, enc);
4821  if (rb_enc_islower(c, enc)) {
4822  /* assuming toupper returns codepoint with same size */
4823  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4824  modify = 1;
4825  }
4826  s += n;
4827  }
4828  }
4829  }
4830 
4831  if (modify) return str;
4832  return Qnil;
4833 }
4834 
4835 
4836 /*
4837  * call-seq:
4838  * str.upcase -> new_str
4839  *
4840  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
4841  * uppercase counterparts. The operation is locale insensitive---only
4842  * characters ``a'' to ``z'' are affected.
4843  * Note: case replacement is effective only in ASCII region.
4844  *
4845  * "hEllO".upcase #=> "HELLO"
4846  */
4847 
4848 static VALUE
4850 {
4851  str = rb_str_dup(str);
4852  rb_str_upcase_bang(str);
4853  return str;
4854 }
4855 
4856 
4857 /*
4858  * call-seq:
4859  * str.downcase! -> str or nil
4860  *
4861  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
4862  * changes were made.
4863  * Note: case replacement is effective only in ASCII region.
4864  */
4865 
4866 static VALUE
4868 {
4869  rb_encoding *enc;
4870  char *s, *send;
4871  int modify = 0;
4872 
4873  str_modify_keep_cr(str);
4874  enc = STR_ENC_GET(str);
4876  s = RSTRING_PTR(str); send = RSTRING_END(str);
4877  if (single_byte_optimizable(str)) {
4878  while (s < send) {
4879  unsigned int c = *(unsigned char*)s;
4880 
4881  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4882  *s = 'a' + (c - 'A');
4883  modify = 1;
4884  }
4885  s++;
4886  }
4887  }
4888  else {
4889  int ascompat = rb_enc_asciicompat(enc);
4890 
4891  while (s < send) {
4892  unsigned int c;
4893  int n;
4894 
4895  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4896  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4897  *s = 'a' + (c - 'A');
4898  modify = 1;
4899  }
4900  s++;
4901  }
4902  else {
4903  c = rb_enc_codepoint_len(s, send, &n, enc);
4904  if (rb_enc_isupper(c, enc)) {
4905  /* assuming toupper returns codepoint with same size */
4906  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4907  modify = 1;
4908  }
4909  s += n;
4910  }
4911  }
4912  }
4913 
4914  if (modify) return str;
4915  return Qnil;
4916 }
4917 
4918 
4919 /*
4920  * call-seq:
4921  * str.downcase -> new_str
4922  *
4923  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
4924  * lowercase counterparts. The operation is locale insensitive---only
4925  * characters ``A'' to ``Z'' are affected.
4926  * Note: case replacement is effective only in ASCII region.
4927  *
4928  * "hEllO".downcase #=> "hello"
4929  */
4930 
4931 static VALUE
4933 {
4934  str = rb_str_dup(str);
4935  rb_str_downcase_bang(str);
4936  return str;
4937 }
4938 
4939 
4940 /*
4941  * call-seq:
4942  * str.capitalize! -> str or nil
4943  *
4944  * Modifies <i>str</i> by converting the first character to uppercase and the
4945  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
4946  * Note: case conversion is effective only in ASCII region.
4947  *
4948  * a = "hello"
4949  * a.capitalize! #=> "Hello"
4950  * a #=> "Hello"
4951  * a.capitalize! #=> nil
4952  */
4953 
4954 static VALUE
4956 {
4957  rb_encoding *enc;
4958  char *s, *send;
4959  int modify = 0;
4960  unsigned int c;
4961  int n;
4962 
4963  str_modify_keep_cr(str);
4964  enc = STR_ENC_GET(str);
4966  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
4967  s = RSTRING_PTR(str); send = RSTRING_END(str);
4968 
4969  c = rb_enc_codepoint_len(s, send, &n, enc);
4970  if (rb_enc_islower(c, enc)) {
4971  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4972  modify = 1;
4973  }
4974  s += n;
4975  while (s < send) {
4976  c = rb_enc_codepoint_len(s, send, &n, enc);
4977  if (rb_enc_isupper(c, enc)) {
4978  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4979  modify = 1;
4980  }
4981  s += n;
4982  }
4983 
4984  if (modify) return str;
4985  return Qnil;
4986 }
4987 
4988 
4989 /*
4990  * call-seq:
4991  * str.capitalize -> new_str
4992  *
4993  * Returns a copy of <i>str</i> with the first character converted to uppercase
4994  * and the remainder to lowercase.
4995  * Note: case conversion is effective only in ASCII region.
4996  *
4997  * "hello".capitalize #=> "Hello"
4998  * "HELLO".capitalize #=> "Hello"
4999  * "123ABC".capitalize #=> "123abc"
5000  */
5001 
5002 static VALUE
5004 {
5005  str = rb_str_dup(str);
5007  return str;
5008 }
5009 
5010 
5011 /*
5012  * call-seq:
5013  * str.swapcase! -> str or nil
5014  *
5015  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5016  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5017  * Note: case conversion is effective only in ASCII region.
5018  */
5019 
5020 static VALUE
5022 {
5023  rb_encoding *enc;
5024  char *s, *send;
5025  int modify = 0;
5026  int n;
5027 
5028  str_modify_keep_cr(str);
5029  enc = STR_ENC_GET(str);
5031  s = RSTRING_PTR(str); send = RSTRING_END(str);
5032  while (s < send) {
5033  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5034 
5035  if (rb_enc_isupper(c, enc)) {
5036  /* assuming toupper returns codepoint with same size */
5037  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5038  modify = 1;
5039  }
5040  else if (rb_enc_islower(c, enc)) {
5041  /* assuming tolower returns codepoint with same size */
5042  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5043  modify = 1;
5044  }
5045  s += n;
5046  }
5047 
5048  if (modify) return str;
5049  return Qnil;
5050 }
5051 
5052 
5053 /*
5054  * call-seq:
5055  * str.swapcase -> new_str
5056  *
5057  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5058  * to lowercase and lowercase characters converted to uppercase.
5059  * Note: case conversion is effective only in ASCII region.
5060  *
5061  * "Hello".swapcase #=> "hELLO"
5062  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5063  */
5064 
5065 static VALUE
5067 {
5068  str = rb_str_dup(str);
5069  rb_str_swapcase_bang(str);
5070  return str;
5071 }
5072 
5073 typedef unsigned char *USTR;
5074 
5075 struct tr {
5076  int gen;
5077  unsigned int now, max;
5078  char *p, *pend;
5079 };
5080 
5081 static unsigned int
5082 trnext(struct tr *t, rb_encoding *enc)
5083 {
5084  int n;
5085 
5086  for (;;) {
5087  if (!t->gen) {
5088 nextpart:
5089  if (t->p == t->pend) return -1;
5090  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5091  t->p += n;
5092  }
5093  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5094  t->p += n;
5095  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5096  t->p += n;
5097  if (t->p < t->pend) {
5098  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5099  t->p += n;
5100  if (t->now > c) {
5101  if (t->now < 0x80 && c < 0x80) {
5103  "invalid range \"%c-%c\" in string transliteration",
5104  t->now, c);
5105  }
5106  else {
5107  rb_raise(rb_eArgError, "invalid range in string transliteration");
5108  }
5109  continue; /* not reached */
5110  }
5111  t->gen = 1;
5112  t->max = c;
5113  }
5114  }
5115  return t->now;
5116  }
5117  else {
5118  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5119  if (t->now == t->max) {
5120  t->gen = 0;
5121  goto nextpart;
5122  }
5123  }
5124  if (t->now < t->max) {
5125  return t->now;
5126  }
5127  else {
5128  t->gen = 0;
5129  return t->max;
5130  }
5131  }
5132  }
5133 }
5134 
5135 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5136 
5137 static VALUE
5138 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5139 {
5140  const unsigned int errc = -1;
5141  unsigned int trans[256];
5142  rb_encoding *enc, *e1, *e2;
5143  struct tr trsrc, trrepl;
5144  int cflag = 0;
5145  unsigned int c, c0, last = 0;
5146  int modify = 0, i, l;
5147  char *s, *send;
5148  VALUE hash = 0;
5149  int singlebyte = single_byte_optimizable(str);
5150  int cr;
5151 
5152 #define CHECK_IF_ASCII(c) \
5153  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5154  (cr = ENC_CODERANGE_VALID) : 0)
5155 
5156  StringValue(src);
5157  StringValue(repl);
5158  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5159  if (RSTRING_LEN(repl) == 0) {
5160  return rb_str_delete_bang(1, &src, str);
5161  }
5162 
5163  cr = ENC_CODERANGE(str);
5164  e1 = rb_enc_check(str, src);
5165  e2 = rb_enc_check(str, repl);
5166  if (e1 == e2) {
5167  enc = e1;
5168  }
5169  else {
5170  enc = rb_enc_check(src, repl);
5171  }
5172  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5173  if (RSTRING_LEN(src) > 1 &&
5174  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5175  trsrc.p + l < trsrc.pend) {
5176  cflag = 1;
5177  trsrc.p += l;
5178  }
5179  trrepl.p = RSTRING_PTR(repl);
5180  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5181  trsrc.gen = trrepl.gen = 0;
5182  trsrc.now = trrepl.now = 0;
5183  trsrc.max = trrepl.max = 0;
5184 
5185  if (cflag) {
5186  for (i=0; i<256; i++) {
5187  trans[i] = 1;
5188  }
5189  while ((c = trnext(&trsrc, enc)) != errc) {
5190  if (c < 256) {
5191  trans[c] = errc;
5192  }
5193  else {
5194  if (!hash) hash = rb_hash_new();
5195  rb_hash_aset(hash, UINT2NUM(c), Qtrue);
5196  }
5197  }
5198  while ((c = trnext(&trrepl, enc)) != errc)
5199  /* retrieve last replacer */;
5200  last = trrepl.now;
5201  for (i=0; i<256; i++) {
5202  if (trans[i] != errc) {
5203  trans[i] = last;
5204  }
5205  }
5206  }
5207  else {
5208  unsigned int r;
5209 
5210  for (i=0; i<256; i++) {
5211  trans[i] = errc;
5212  }
5213  while ((c = trnext(&trsrc, enc)) != errc) {
5214  r = trnext(&trrepl, enc);
5215  if (r == errc) r = trrepl.now;
5216  if (c < 256) {
5217  trans[c] = r;
5218  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5219  }
5220  else {
5221  if (!hash) hash = rb_hash_new();
5222  rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r));
5223  }
5224  }
5225  }
5226 
5227  if (cr == ENC_CODERANGE_VALID)
5228  cr = ENC_CODERANGE_7BIT;
5229  str_modify_keep_cr(str);
5230  s = RSTRING_PTR(str); send = RSTRING_END(str);
5231  if (sflag) {
5232  int clen, tlen;
5233  long offset, max = RSTRING_LEN(str);
5234  unsigned int save = -1;
5235  char *buf = ALLOC_N(char, max), *t = buf;
5236 
5237  while (s < send) {
5238  int may_modify = 0;
5239 
5240  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5241  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5242 
5243  s += clen;
5244  if (c < 256) {
5245  c = trans[c];
5246  }
5247  else if (hash) {
5248  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5249  if (NIL_P(tmp)) {
5250  if (cflag) c = last;
5251  else c = errc;
5252  }
5253  else if (cflag) c = errc;
5254  else c = NUM2INT(tmp);
5255  }
5256  else {
5257  c = errc;
5258  }
5259  if (c != (unsigned int)-1) {
5260  if (save == c) {
5261  CHECK_IF_ASCII(c);
5262  continue;
5263  }
5264  save = c;
5265  tlen = rb_enc_codelen(c, enc);
5266  modify = 1;
5267  }
5268  else {
5269  save = -1;
5270  c = c0;
5271  if (enc != e1) may_modify = 1;
5272  }
5273  while (t - buf + tlen >= max) {
5274  offset = t - buf;
5275  max *= 2;
5276  REALLOC_N(buf, char, max);
5277  t = buf + offset;
5278  }
5279  rb_enc_mbcput(c, t, enc);
5280  if (may_modify && memcmp(s, t, tlen) != 0) {
5281  modify = 1;
5282  }
5283  CHECK_IF_ASCII(c);
5284  t += tlen;
5285  }
5286  if (!STR_EMBED_P(str)) {
5287  xfree(RSTRING(str)->as.heap.ptr);
5288  }
5289  *t = '\0';
5290  RSTRING(str)->as.heap.ptr = buf;
5291  RSTRING(str)->as.heap.len = t - buf;
5292  STR_SET_NOEMBED(str);
5293  RSTRING(str)->as.heap.aux.capa = max;
5294  }
5295  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5296  while (s < send) {
5297  c = (unsigned char)*s;
5298  if (trans[c] != errc) {
5299  if (!cflag) {
5300  c = trans[c];
5301  *s = c;
5302  modify = 1;
5303  }
5304  else {
5305  *s = last;
5306  modify = 1;
5307  }
5308  }
5309  CHECK_IF_ASCII(c);
5310  s++;
5311  }
5312  }
5313  else {
5314  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5315  long offset;
5316  char *buf = ALLOC_N(char, max), *t = buf;
5317 
5318  while (s < send) {
5319  int may_modify = 0;
5320  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5321  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5322 
5323  if (c < 256) {
5324  c = trans[c];
5325  }
5326  else if (hash) {
5327  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5328  if (NIL_P(tmp)) {
5329  if (cflag) c = last;
5330  else c = errc;
5331  }
5332  else if (cflag) c = errc;
5333  else c = NUM2INT(tmp);
5334  }
5335  else {
5336  c = cflag ? last : errc;
5337  }
5338  if (c != errc) {
5339  tlen = rb_enc_codelen(c, enc);
5340  modify = 1;
5341  }
5342  else {
5343  c = c0;
5344  if (enc != e1) may_modify = 1;
5345  }
5346  while (t - buf + tlen >= max) {
5347  offset = t - buf;
5348  max *= 2;
5349  REALLOC_N(buf, char, max);
5350  t = buf + offset;
5351  }
5352  if (s != t) {
5353  rb_enc_mbcput(c, t, enc);
5354  if (may_modify && memcmp(s, t, tlen) != 0) {
5355  modify = 1;
5356  }
5357  }
5358  CHECK_IF_ASCII(c);
5359  s += clen;
5360  t += tlen;
5361  }
5362  if (!STR_EMBED_P(str)) {
5363  xfree(RSTRING(str)->as.heap.ptr);
5364  }
5365  *t = '\0';
5366  RSTRING(str)->as.heap.ptr = buf;
5367  RSTRING(str)->as.heap.len = t - buf;
5368  STR_SET_NOEMBED(str);
5369  RSTRING(str)->as.heap.aux.capa = max;
5370  }
5371 
5372  if (modify) {
5373  if (cr != ENC_CODERANGE_BROKEN)
5374  ENC_CODERANGE_SET(str, cr);
5375  rb_enc_associate(str, enc);
5376  return str;
5377  }
5378  return Qnil;
5379 }
5380 
5381 
5382 /*
5383  * call-seq:
5384  * str.tr!(from_str, to_str) -> str or nil
5385  *
5386  * Translates <i>str</i> in place, using the same rules as
5387  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5388  * changes were made.
5389  */
5390 
5391 static VALUE
5393 {
5394  return tr_trans(str, src, repl, 0);
5395 }
5396 
5397 
5398 /*
5399  * call-seq:
5400  * str.tr(from_str, to_str) => new_str
5401  *
5402  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5403  * corresponding characters in +to_str+. If +to_str+ is shorter than
5404  * +from_str+, it is padded with its last character in order to maintain the
5405  * correspondence.
5406  *
5407  * "hello".tr('el', 'ip') #=> "hippo"
5408  * "hello".tr('aeiou', '*') #=> "h*ll*"
5409  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5410  *
5411  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5412  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5413  * all characters except those listed.
5414  *
5415  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5416  * "hello".tr('^aeiou', '*') #=> "*e**o"
5417  *
5418  * The backslash character <code></code> can be used to escape
5419  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5420  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5421  *
5422  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5423  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5424  *
5425  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5426  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5427  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5428  *
5429  * "X['\\b']".tr("X\\", "") #=> "['b']"
5430  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5431  */
5432 
5433 static VALUE
5435 {
5436  str = rb_str_dup(str);
5437  tr_trans(str, src, repl, 0);
5438  return str;
5439 }
5440 
5441 #define TR_TABLE_SIZE 257
5442 static void
5443 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5444  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5445 {
5446  const unsigned int errc = -1;
5447  char buf[256];
5448  struct tr tr;
5449  unsigned int c;
5450  VALUE table = 0, ptable = 0;
5451  int i, l, cflag = 0;
5452 
5453  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5454  tr.gen = tr.now = tr.max = 0;
5455 
5456  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5457  cflag = 1;
5458  tr.p += l;
5459  }
5460  if (first) {
5461  for (i=0; i<256; i++) {
5462  stable[i] = 1;
5463  }
5464  stable[256] = cflag;
5465  }
5466  else if (stable[256] && !cflag) {
5467  stable[256] = 0;
5468  }
5469  for (i=0; i<256; i++) {
5470  buf[i] = cflag;
5471  }
5472 
5473  while ((c = trnext(&tr, enc)) != errc) {
5474  if (c < 256) {
5475  buf[c & 0xff] = !cflag;
5476  }
5477  else {
5478  VALUE key = UINT2NUM(c);
5479 
5480  if (!table && (first || *tablep || stable[256])) {
5481  if (cflag) {
5482  ptable = *ctablep;
5483  table = ptable ? ptable : rb_hash_new();
5484  *ctablep = table;
5485  }
5486  else {
5487  table = rb_hash_new();
5488  ptable = *tablep;
5489  *tablep = table;
5490  }
5491  }
5492  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5493  rb_hash_aset(table, key, Qtrue);
5494  }
5495  }
5496  }
5497  for (i=0; i<256; i++) {
5498  stable[i] = stable[i] && buf[i];
5499  }
5500  if (!table && !cflag) {
5501  *tablep = 0;
5502  }
5503 }
5504 
5505 
5506 static int
5507 tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5508 {
5509  if (c < 256) {
5510  return table[c] != 0;
5511  }
5512  else {
5513  VALUE v = UINT2NUM(c);
5514 
5515  if (del) {
5516  if (!NIL_P(rb_hash_lookup(del, v)) &&
5517  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5518  return TRUE;
5519  }
5520  }
5521  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5522  return FALSE;
5523  }
5524  return table[256] ? TRUE : FALSE;
5525  }
5526 }
5527 
5528 /*
5529  * call-seq:
5530  * str.delete!([other_str]+) -> str or nil
5531  *
5532  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5533  * <code>nil</code> if <i>str</i> was not modified.
5534  */
5535 
5536 static VALUE
5538 {
5539  char squeez[TR_TABLE_SIZE];
5540  rb_encoding *enc = 0;
5541  char *s, *send, *t;
5542  VALUE del = 0, nodel = 0;
5543  int modify = 0;
5544  int i, ascompat, cr;
5545 
5546  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5548  for (i=0; i<argc; i++) {
5549  VALUE s = argv[i];
5550 
5551  StringValue(s);
5552  enc = rb_enc_check(str, s);
5553  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5554  }
5555 
5556  str_modify_keep_cr(str);
5557  ascompat = rb_enc_asciicompat(enc);
5558  s = t = RSTRING_PTR(str);
5559  send = RSTRING_END(str);
5560  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5561  while (s < send) {
5562  unsigned int c;
5563  int clen;
5564 
5565  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5566  if (squeez[c]) {
5567  modify = 1;
5568  }
5569  else {
5570  if (t != s) *t = c;
5571  t++;
5572  }
5573  s++;
5574  }
5575  else {
5576  c = rb_enc_codepoint_len(s, send, &clen, enc);
5577 
5578  if (tr_find(c, squeez, del, nodel)) {
5579  modify = 1;
5580  }
5581  else {
5582  if (t != s) rb_enc_mbcput(c, t, enc);
5583  t += clen;
5584  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5585  }
5586  s += clen;
5587  }
5588  }
5589  *t = '\0';
5590  STR_SET_LEN(str, t - RSTRING_PTR(str));
5591  ENC_CODERANGE_SET(str, cr);
5592 
5593  if (modify) return str;
5594  return Qnil;
5595 }
5596 
5597 
5598 /*
5599  * call-seq:
5600  * str.delete([other_str]+) -> new_str
5601  *
5602  * Returns a copy of <i>str</i> with all characters in the intersection of its
5603  * arguments deleted. Uses the same rules for building the set of characters as
5604  * <code>String#count</code>.
5605  *
5606  * "hello".delete "l","lo" #=> "heo"
5607  * "hello".delete "lo" #=> "he"
5608  * "hello".delete "aeiou", "^e" #=> "hell"
5609  * "hello".delete "ej-m" #=> "ho"
5610  */
5611 
5612 static VALUE
5614 {
5615  str = rb_str_dup(str);
5616  rb_str_delete_bang(argc, argv, str);
5617  return str;
5618 }
5619 
5620 
5621 /*
5622  * call-seq:
5623  * str.squeeze!([other_str]*) -> str or nil
5624  *
5625  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5626  * <code>nil</code> if no changes were made.
5627  */
5628 
5629 static VALUE
5631 {
5632  char squeez[TR_TABLE_SIZE];
5633  rb_encoding *enc = 0;
5634  VALUE del = 0, nodel = 0;
5635  char *s, *send, *t;
5636  int i, modify = 0;
5637  int ascompat, singlebyte = single_byte_optimizable(str);
5638  unsigned int save;
5639 
5640  if (argc == 0) {
5641  enc = STR_ENC_GET(str);
5642  }
5643  else {
5644  for (i=0; i<argc; i++) {
5645  VALUE s = argv[i];
5646 
5647  StringValue(s);
5648  enc = rb_enc_check(str, s);
5649  if (singlebyte && !single_byte_optimizable(s))
5650  singlebyte = 0;
5651  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5652  }
5653  }
5654 
5655  str_modify_keep_cr(str);
5656  s = t = RSTRING_PTR(str);
5657  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5658  send = RSTRING_END(str);
5659  save = -1;
5660  ascompat = rb_enc_asciicompat(enc);
5661 
5662  if (singlebyte) {
5663  while (s < send) {
5664  unsigned int c = *(unsigned char*)s++;
5665  if (c != save || (argc > 0 && !squeez[c])) {
5666  *t++ = save = c;
5667  }
5668  }
5669  } else {
5670  while (s < send) {
5671  unsigned int c;
5672  int clen;
5673 
5674  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5675  if (c != save || (argc > 0 && !squeez[c])) {
5676  *t++ = save = c;
5677  }
5678  s++;
5679  }
5680  else {
5681  c = rb_enc_codepoint_len(s, send, &clen, enc);
5682 
5683  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5684  if (t != s) rb_enc_mbcput(c, t, enc);
5685  save = c;
5686  t += clen;
5687  }
5688  s += clen;
5689  }
5690  }
5691  }
5692 
5693  *t = '\0';
5694  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5695  STR_SET_LEN(str, t - RSTRING_PTR(str));
5696  modify = 1;
5697  }
5698 
5699  if (modify) return str;
5700  return Qnil;
5701 }
5702 
5703 
5704 /*
5705  * call-seq:
5706  * str.squeeze([other_str]*) -> new_str
5707  *
5708  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5709  * procedure described for <code>String#count</code>. Returns a new string
5710  * where runs of the same character that occur in this set are replaced by a
5711  * single character. If no arguments are given, all runs of identical
5712  * characters are replaced by a single character.
5713  *
5714  * "yellow moon".squeeze #=> "yelow mon"
5715  * " now is the".squeeze(" ") #=> " now is the"
5716  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5717  */
5718 
5719 static VALUE
5721 {
5722  str = rb_str_dup(str);
5723  rb_str_squeeze_bang(argc, argv, str);
5724  return str;
5725 }
5726 
5727 
5728 /*
5729  * call-seq:
5730  * str.tr_s!(from_str, to_str) -> str or nil
5731  *
5732  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
5733  * returning <i>str</i>, or <code>nil</code> if no changes were made.
5734  */
5735 
5736 static VALUE
5738 {
5739  return tr_trans(str, src, repl, 1);
5740 }
5741 
5742 
5743 /*
5744  * call-seq:
5745  * str.tr_s(from_str, to_str) -> new_str
5746  *
5747  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
5748  * then removes duplicate characters in regions that were affected by the
5749  * translation.
5750  *
5751  * "hello".tr_s('l', 'r') #=> "hero"
5752  * "hello".tr_s('el', '*') #=> "h*o"
5753  * "hello".tr_s('el', 'hx') #=> "hhxo"
5754  */
5755 
5756 static VALUE
5758 {
5759  str = rb_str_dup(str);
5760  tr_trans(str, src, repl, 1);
5761  return str;
5762 }
5763 
5764 
5765 /*
5766  * call-seq:
5767  * str.count([other_str]+) -> fixnum
5768  *
5769  * Each +other_str+ parameter defines a set of characters to count. The
5770  * intersection of these sets defines the characters to count in +str+. Any
5771  * +other_str+ that starts with a caret <code>^</code> is negated. The
5772  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
5773  * backslash character <code></code> can be used to escape <code>^</code> or
5774  * <code>-</code> and is otherwise ignored unless it appears at the end of a
5775  * sequence or the end of a +other_str+.
5776  *
5777  * a = "hello world"
5778  * a.count "lo" #=> 5
5779  * a.count "lo", "o" #=> 2
5780  * a.count "hello", "^l" #=> 4
5781  * a.count "ej-m" #=> 4
5782  *
5783  * "hello^world".count "\\^aeiou" #=> 4
5784  * "hello-world".count "a\\-eo" #=> 4
5785  *
5786  * c = "hello world\\r\\n"
5787  * c.count "\\" #=> 2
5788  * c.count "\\A" #=> 0
5789  * c.count "X-\\w" #=> 3
5790  */
5791 
5792 static VALUE
5794 {
5795  char table[TR_TABLE_SIZE];
5796  rb_encoding *enc = 0;
5797  VALUE del = 0, nodel = 0, tstr;
5798  char *s, *send;
5799  int i;
5800  int ascompat;
5801 
5803 
5804  tstr = argv[0];
5805  StringValue(tstr);
5806  enc = rb_enc_check(str, tstr);
5807  if (argc == 1) {
5808  const char *ptstr;
5809  if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
5810  (ptstr = RSTRING_PTR(tstr),
5811  ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (const unsigned char *)ptstr, (const unsigned char *)ptstr+1)) &&
5812  !is_broken_string(str)) {
5813  int n = 0;
5814  int clen;
5815  unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
5816 
5817  s = RSTRING_PTR(str);
5818  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5819  send = RSTRING_END(str);
5820  while (s < send) {
5821  if (*(unsigned char*)s++ == c) n++;
5822  }
5823  return INT2NUM(n);
5824  }
5825  }
5826 
5827  tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
5828  for (i=1; i<argc; i++) {
5829  tstr = argv[i];
5830  StringValue(tstr);
5831  enc = rb_enc_check(str, tstr);
5832  tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
5833  }
5834 
5835  s = RSTRING_PTR(str);
5836  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5837  send = RSTRING_END(str);
5838  ascompat = rb_enc_asciicompat(enc);
5839  i = 0;
5840  while (s < send) {
5841  unsigned int c;
5842 
5843  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5844  if (table[c]) {
5845  i++;
5846  }
5847  s++;
5848  }
5849  else {
5850  int clen;
5851  c = rb_enc_codepoint_len(s, send, &clen, enc);
5852  if (tr_find(c, table, del, nodel)) {
5853  i++;
5854  }
5855  s += clen;
5856  }
5857  }
5858 
5859  return INT2NUM(i);
5860 }
5861 
5862 static const char isspacetable[256] = {
5863  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
5864  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5865  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5866  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5867  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5868  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5869  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5870  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5871  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5872  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5873  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5874  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5875  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5876  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5877  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5878  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5879 };
5880 
5881 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
5882 
5883 /*
5884  * call-seq:
5885  * str.split(pattern=$;, [limit]) -> anArray
5886  *
5887  * Divides <i>str</i> into substrings based on a delimiter, returning an array
5888  * of these substrings.
5889  *
5890  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
5891  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
5892  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
5893  * of contiguous whitespace characters ignored.
5894  *
5895  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
5896  * pattern matches. Whenever the pattern matches a zero-length string,
5897  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
5898  * groups, the respective matches will be returned in the array as well.
5899  *
5900  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
5901  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
5902  * split on whitespace as if ` ' were specified.
5903  *
5904  * If the <i>limit</i> parameter is omitted, trailing null fields are
5905  * suppressed. If <i>limit</i> is a positive number, at most that number of
5906  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
5907  * string is returned as the only entry in an array). If negative, there is no
5908  * limit to the number of fields returned, and trailing null fields are not
5909  * suppressed.
5910  *
5911  * When the input +str+ is empty an empty Array is returned as the string is
5912  * considered to have no fields to split.
5913  *
5914  * " now's the time".split #=> ["now's", "the", "time"]
5915  * " now's the time".split(' ') #=> ["now's", "the", "time"]
5916  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
5917  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
5918  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
5919  * "hello".split(//, 3) #=> ["h", "e", "llo"]
5920  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
5921  *
5922  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
5923  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
5924  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
5925  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
5926  *
5927  * "".split(',', -1) #=> []
5928  */
5929 
5930 static VALUE
5932 {
5933  rb_encoding *enc;
5934  VALUE spat;
5935  VALUE limit;
5936  enum {awk, string, regexp} split_type;
5937  long beg, end, i = 0;
5938  int lim = 0;
5939  VALUE result, tmp;
5940 
5941  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
5942  lim = NUM2INT(limit);
5943  if (lim <= 0) limit = Qnil;
5944  else if (lim == 1) {
5945  if (RSTRING_LEN(str) == 0)
5946  return rb_ary_new2(0);
5947  return rb_ary_new3(1, str);
5948  }
5949  i = 1;
5950  }
5951 
5952  enc = STR_ENC_GET(str);
5953  if (NIL_P(spat)) {
5954  if (!NIL_P(rb_fs)) {
5955  spat = rb_fs;
5956  goto fs_set;
5957  }
5958  split_type = awk;
5959  }
5960  else {
5961  fs_set:
5962  if (RB_TYPE_P(spat, T_STRING)) {
5963  rb_encoding *enc2 = STR_ENC_GET(spat);
5964 
5965  split_type = string;
5966  if (RSTRING_LEN(spat) == 0) {
5967  /* Special case - split into chars */
5968  spat = rb_reg_regcomp(spat);
5969  split_type = regexp;
5970  }
5971  else if (rb_enc_asciicompat(enc2) == 1) {
5972  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
5973  split_type = awk;
5974  }
5975  }
5976  else {
5977  int l;
5978  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
5979  RSTRING_LEN(spat) == l) {
5980  split_type = awk;
5981  }
5982  }
5983  }
5984  else {
5985  spat = get_pat(spat, 1);
5986  split_type = regexp;
5987  }
5988  }
5989 
5990  result = rb_ary_new();
5991  beg = 0;
5992  if (split_type == awk) {
5993  char *ptr = RSTRING_PTR(str);
5994  char *eptr = RSTRING_END(str);
5995  char *bptr = ptr;
5996  int skip = 1;
5997  unsigned int c;
5998 
5999  end = beg;
6000  if (is_ascii_string(str)) {
6001  while (ptr < eptr) {
6002  c = (unsigned char)*ptr++;
6003  if (skip) {
6004  if (ascii_isspace(c)) {
6005  beg = ptr - bptr;
6006  }
6007  else {
6008  end = ptr - bptr;
6009  skip = 0;
6010  if (!NIL_P(limit) && lim <= i) break;
6011  }
6012  }
6013  else if (ascii_isspace(c)) {
6014  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6015  skip = 1;
6016  beg = ptr - bptr;
6017  if (!NIL_P(limit)) ++i;
6018  }
6019  else {
6020  end = ptr - bptr;
6021  }
6022  }
6023  }
6024  else {
6025  while (ptr < eptr) {
6026  int n;
6027 
6028  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6029  ptr += n;
6030  if (skip) {
6031  if (rb_isspace(c)) {
6032  beg = ptr - bptr;
6033  }
6034  else {
6035  end = ptr - bptr;
6036  skip = 0;
6037  if (!NIL_P(limit) && lim <= i) break;
6038  }
6039  }
6040  else if (rb_isspace(c)) {
6041  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6042  skip = 1;
6043  beg = ptr - bptr;
6044  if (!NIL_P(limit)) ++i;
6045  }
6046  else {
6047  end = ptr - bptr;
6048  }
6049  }
6050  }
6051  }
6052  else if (split_type == string) {
6053  char *ptr = RSTRING_PTR(str);
6054  char *temp = ptr;
6055  char *eptr = RSTRING_END(str);
6056  char *sptr = RSTRING_PTR(spat);
6057  long slen = RSTRING_LEN(spat);
6058 
6059  if (is_broken_string(str)) {
6060  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6061  }
6062  if (is_broken_string(spat)) {
6063  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6064  }
6065  enc = rb_enc_check(str, spat);
6066  while (ptr < eptr &&
6067  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6068  /* Check we are at the start of a char */
6069  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6070  if (t != ptr + end) {
6071  ptr = t;
6072  continue;
6073  }
6074  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6075  ptr += end + slen;
6076  if (!NIL_P(limit) && lim <= ++i) break;
6077  }
6078  beg = ptr - temp;
6079  }
6080  else {
6081  char *ptr = RSTRING_PTR(str);
6082  long len = RSTRING_LEN(str);
6083  long start = beg;
6084  long idx;
6085  int last_null = 0;
6086  struct re_registers *regs;
6087 
6088  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6089  regs = RMATCH_REGS(rb_backref_get());
6090  if (start == end && BEG(0) == END(0)) {
6091  if (!ptr) {
6092  rb_ary_push(result, str_new_empty(str));
6093  break;
6094  }
6095  else if (last_null == 1) {
6096  rb_ary_push(result, rb_str_subseq(str, beg,
6097  rb_enc_fast_mbclen(ptr+beg,
6098  ptr+len,
6099  enc)));
6100  beg = start;
6101  }
6102  else {
6103  if (ptr+start == ptr+len)
6104  start++;
6105  else
6106  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6107  last_null = 1;
6108  continue;
6109  }
6110  }
6111  else {
6112  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6113  beg = start = END(0);
6114  }
6115  last_null = 0;
6116 
6117  for (idx=1; idx < regs->num_regs; idx++) {
6118  if (BEG(idx) == -1) continue;
6119  if (BEG(idx) == END(idx))
6120  tmp = str_new_empty(str);
6121  else
6122  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6123  rb_ary_push(result, tmp);
6124  }
6125  if (!NIL_P(limit) && lim <= ++i) break;
6126  }
6127  }
6128  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6129  if (RSTRING_LEN(str) == beg)
6130  tmp = str_new_empty(str);
6131  else
6132  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6133  rb_ary_push(result, tmp);
6134  }
6135  if (NIL_P(limit) && lim == 0) {
6136  long len;
6137  while ((len = RARRAY_LEN(result)) > 0 &&
6138  (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
6139  rb_ary_pop(result);
6140  }
6141 
6142  return result;
6143 }
6144 
6145 VALUE
6146 rb_str_split(VALUE str, const char *sep0)
6147 {
6148  VALUE sep;
6149 
6150  StringValue(str);
6151  sep = rb_str_new2(sep0);
6152  return rb_str_split_m(1, &sep, str);
6153 }
6154 
6155 
6156 static VALUE
6157 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6158 {
6159  rb_encoding *enc;
6160  VALUE rs;
6161  unsigned int newline;
6162  const char *p, *pend, *s, *ptr;
6163  long len, rslen;
6164  VALUE line;
6165  int n;
6166  VALUE orig = str;
6168 
6169  if (argc == 0) {
6170  rs = rb_rs;
6171  }
6172  else {
6173  rb_scan_args(argc, argv, "01", &rs);
6174  }
6175 
6176  if (rb_block_given_p()) {
6177  if (wantarray) {
6178 #if 0 /* next major */
6179  rb_warn("given block not used");
6180  ary = rb_ary_new();
6181 #else
6182  rb_warning("passing a block to String#lines is deprecated");
6183  wantarray = 0;
6184 #endif
6185  }
6186  }
6187  else {
6188  if (wantarray)
6189  ary = rb_ary_new();
6190  else
6191  RETURN_ENUMERATOR(str, argc, argv);
6192  }
6193 
6194  if (NIL_P(rs)) {
6195  if (wantarray) {
6196  rb_ary_push(ary, str);
6197  return ary;
6198  }
6199  else {
6200  rb_yield(str);
6201  return orig;
6202  }
6203  }
6204  str = rb_str_new4(str);
6205  ptr = p = s = RSTRING_PTR(str);
6206  pend = p + RSTRING_LEN(str);
6207  len = RSTRING_LEN(str);
6208  StringValue(rs);
6209  if (rs == rb_default_rs) {
6210  enc = rb_enc_get(str);
6211  while (p < pend) {
6212  char *p0;
6213 
6214  p = memchr(p, '\n', pend - p);
6215  if (!p) break;
6216  p0 = rb_enc_left_char_head(s, p, pend, enc);
6217  if (!rb_enc_is_newline(p0, pend, enc)) {
6218  p++;
6219  continue;
6220  }
6221  p = p0 + rb_enc_mbclen(p0, pend, enc);
6222  line = rb_str_subseq(str, s - ptr, p - s);
6223  if (wantarray)
6224  rb_ary_push(ary, line);
6225  else
6226  rb_yield(line);
6227  str_mod_check(str, ptr, len);
6228  s = p;
6229  }
6230  goto finish;
6231  }
6232 
6233  enc = rb_enc_check(str, rs);
6234  rslen = RSTRING_LEN(rs);
6235  if (rslen == 0) {
6236  newline = '\n';
6237  }
6238  else {
6239  newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
6240  }
6241 
6242  while (p < pend) {
6243  unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
6244 
6245  again:
6246  if (rslen == 0 && c == newline) {
6247  p += n;
6248  if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
6249  goto again;
6250  }
6251  while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
6252  p += n;
6253  }
6254  p -= n;
6255  }
6256  if (c == newline &&
6257  (rslen <= 1 ||
6258  (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
6259  const char *pp = p + (rslen ? rslen : n);
6260  line = rb_str_subseq(str, s - ptr, pp - s);
6261  if (wantarray)
6262  rb_ary_push(ary, line);
6263  else
6264  rb_yield(line);
6265  str_mod_check(str, ptr, len);
6266  s = pp;
6267  }
6268  p += n;
6269  }
6270 
6271  finish:
6272  if (s != pend) {
6273  line = rb_str_subseq(str, s - ptr, pend - s);
6274  if (wantarray)
6275  rb_ary_push(ary, line);
6276  else
6277  rb_yield(line);
6278  RB_GC_GUARD(str);
6279  }
6280 
6281  if (wantarray)
6282  return ary;
6283  else
6284  return orig;
6285 }
6286 
6287 /*
6288  * call-seq:
6289  * str.each_line(separator=$/) {|substr| block } -> str
6290  * str.each_line(separator=$/) -> an_enumerator
6291  *
6292  * Splits <i>str</i> using the supplied parameter as the record
6293  * separator (<code>$/</code> by default), passing each substring in
6294  * turn to the supplied block. If a zero-length record separator is
6295  * supplied, the string is split into paragraphs delimited by
6296  * multiple successive newlines.
6297  *
6298  * If no block is given, an enumerator is returned instead.
6299  *
6300  * print "Example one\n"
6301  * "hello\nworld".each_line {|s| p s}
6302  * print "Example two\n"
6303  * "hello\nworld".each_line('l') {|s| p s}
6304  * print "Example three\n"
6305  * "hello\n\n\nworld".each_line('') {|s| p s}
6306  *
6307  * <em>produces:</em>
6308  *
6309  * Example one
6310  * "hello\n"
6311  * "world"
6312  * Example two
6313  * "hel"
6314  * "l"
6315  * "o\nworl"
6316  * "d"
6317  * Example three
6318  * "hello\n\n\n"
6319  * "world"
6320  */
6321 
6322 static VALUE
6324 {
6325  return rb_str_enumerate_lines(argc, argv, str, 0);
6326 }
6327 
6328 /*
6329  * call-seq:
6330  * str.lines(separator=$/) -> an_array
6331  *
6332  * Returns an array of lines in <i>str</i> split using the supplied
6333  * record separator (<code>$/</code> by default). This is a
6334  * shorthand for <code>str.each_line(separator).to_a</code>.
6335  *
6336  * If a block is given, which is a deprecated form, works the same as
6337  * <code>each_line</code>.
6338  */
6339 
6340 static VALUE
6342 {
6343  return rb_str_enumerate_lines(argc, argv, str, 1);
6344 }
6345 
6346 static VALUE
6348 {
6349  return LONG2FIX(RSTRING_LEN(str));
6350 }
6351 
6352 static VALUE
6353 rb_str_enumerate_bytes(VALUE str, int wantarray)
6354 {
6355  long i;
6357 
6358  if (rb_block_given_p()) {
6359  if (wantarray) {
6360 #if 0 /* next major */
6361  rb_warn("given block not used");
6362  ary = rb_ary_new();
6363 #else
6364  rb_warning("passing a block to String#bytes is deprecated");
6365  wantarray = 0;
6366 #endif
6367  }
6368  }
6369  else {
6370  if (wantarray)
6371  ary = rb_ary_new2(RSTRING_LEN(str));
6372  else
6374  }
6375 
6376  for (i=0; i<RSTRING_LEN(str); i++) {
6377  if (wantarray)
6378  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6379  else
6380  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6381  }
6382  if (wantarray)
6383  return ary;
6384  else
6385  return str;
6386 }
6387 
6388 /*
6389  * call-seq:
6390  * str.each_byte {|fixnum| block } -> str
6391  * str.each_byte -> an_enumerator
6392  *
6393  * Passes each byte in <i>str</i> to the given block, or returns an
6394  * enumerator if no block is given.
6395  *
6396  * "hello".each_byte {|c| print c, ' ' }
6397  *
6398  * <em>produces:</em>
6399  *
6400  * 104 101 108 108 111
6401  */
6402 
6403 static VALUE
6405 {
6406  return rb_str_enumerate_bytes(str, 0);
6407 }
6408 
6409 /*
6410  * call-seq:
6411  * str.bytes -> an_array
6412  *
6413  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6414  * <code>str.each_byte.to_a</code>.
6415  *
6416  * If a block is given, which is a deprecated form, works the same as
6417  * <code>each_byte</code>.
6418  */
6419 
6420 static VALUE
6422 {
6423  return rb_str_enumerate_bytes(str, 1);
6424 }
6425 
6426 static VALUE
6428 {
6429  long len = RSTRING_LEN(str);
6430  if (!single_byte_optimizable(str)) {
6431  const char *ptr = RSTRING_PTR(str);
6432  rb_encoding *enc = rb_enc_get(str);
6433  const char *end_ptr = ptr + len;
6434  for (len = 0; ptr < end_ptr; ++len) {
6435  ptr += rb_enc_mbclen(ptr, end_ptr, enc);
6436  }
6437  }
6438  return LONG2FIX(len);
6439 }
6440 
6441 static VALUE
6442 rb_str_enumerate_chars(VALUE str, int wantarray)
6443 {
6444  VALUE orig = str;
6445  VALUE substr;
6446  long i, len, n;
6447  const char *ptr;
6448  rb_encoding *enc;
6450 
6451  if (rb_block_given_p()) {
6452  if (wantarray) {
6453 #if 0 /* next major */
6454  rb_warn("given block not used");
6455  ary = rb_ary_new();
6456 #else
6457  rb_warning("passing a block to String#chars is deprecated");
6458  wantarray = 0;
6459 #endif
6460  }
6461  }
6462  else {
6463  if (wantarray)
6464  ary = rb_ary_new();
6465  else
6467  }
6468 
6469  str = rb_str_new4(str);
6470  ptr = RSTRING_PTR(str);
6471  len = RSTRING_LEN(str);
6472  enc = rb_enc_get(str);
6473  switch (ENC_CODERANGE(str)) {
6474  case ENC_CODERANGE_VALID:
6475  case ENC_CODERANGE_7BIT:
6476  for (i = 0; i < len; i += n) {
6477  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6478  substr = rb_str_subseq(str, i, n);
6479  if (wantarray)
6480  rb_ary_push(ary, substr);
6481  else
6482  rb_yield(substr);
6483  }
6484  break;
6485  default:
6486  for (i = 0; i < len; i += n) {
6487  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6488  substr = rb_str_subseq(str, i, n);
6489  if (wantarray)
6490  rb_ary_push(ary, substr);
6491  else
6492  rb_yield(substr);
6493  }
6494  }
6495  RB_GC_GUARD(str);
6496  if (wantarray)
6497  return ary;
6498  else
6499  return orig;
6500 }
6501 
6502 /*
6503  * call-seq:
6504  * str.each_char {|cstr| block } -> str
6505  * str.each_char -> an_enumerator
6506  *
6507  * Passes each character in <i>str</i> to the given block, or returns
6508  * an enumerator if no block is given.
6509  *
6510  * "hello".each_char {|c| print c, ' ' }
6511  *
6512  * <em>produces:</em>
6513  *
6514  * h e l l o
6515  */
6516 
6517 static VALUE
6519 {
6520  return rb_str_enumerate_chars(str, 0);
6521 }
6522 
6523 /*
6524  * call-seq:
6525  * str.chars -> an_array
6526  *
6527  * Returns an array of characters in <i>str</i>. This is a shorthand
6528  * for <code>str.each_char.to_a</code>.
6529  *
6530  * If a block is given, which is a deprecated form, works the same as
6531  * <code>each_char</code>.
6532  */
6533 
6534 static VALUE
6536 {
6537  return rb_str_enumerate_chars(str, 1);
6538 }
6539 
6540 
6541 static VALUE
6543 {
6544  VALUE orig = str;
6545  int n;
6546  unsigned int c;
6547  const char *ptr, *end;
6548  rb_encoding *enc;
6550 
6551  if (single_byte_optimizable(str))
6552  return rb_str_enumerate_bytes(str, wantarray);
6553 
6554  if (rb_block_given_p()) {
6555  if (wantarray) {
6556 #if 0 /* next major */
6557  rb_warn("given block not used");
6558  ary = rb_ary_new();
6559 #else
6560  rb_warning("passing a block to String#codepoints is deprecated");
6561  wantarray = 0;
6562 #endif
6563  }
6564  }
6565  else {
6566  if (wantarray)
6567  ary = rb_ary_new();
6568  else
6570  }
6571 
6572  str = rb_str_new4(str);
6573  ptr = RSTRING_PTR(str);
6574  end = RSTRING_END(str);
6575  enc = STR_ENC_GET(str);
6576  while (ptr < end) {
6577  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6578  if (wantarray)
6579  rb_ary_push(ary, UINT2NUM(c));
6580  else
6581  rb_yield(UINT2NUM(c));
6582  ptr += n;
6583  }
6584  RB_GC_GUARD(str);
6585  if (wantarray)
6586  return ary;
6587  else
6588  return orig;
6589 }
6590 
6591 /*
6592  * call-seq:
6593  * str.each_codepoint {|integer| block } -> str
6594  * str.each_codepoint -> an_enumerator
6595  *
6596  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6597  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6598  * given block.
6599  *
6600  * If no block is given, an enumerator is returned instead.
6601  *
6602  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6603  *
6604  * <em>produces:</em>
6605  *
6606  * 104 101 108 108 111 1593
6607  */
6608 
6609 static VALUE
6611 {
6612  return rb_str_enumerate_codepoints(str, 0);
6613 }
6614 
6615 /*
6616  * call-seq:
6617  * str.codepoints -> an_array
6618  *
6619  * Returns an array of the <code>Integer</code> ordinals of the
6620  * characters in <i>str</i>. This is a shorthand for
6621  * <code>str.each_codepoint.to_a</code>.
6622  *
6623  * If a block is given, which is a deprecated form, works the same as
6624  * <code>each_codepoint</code>.
6625  */
6626 
6627 static VALUE
6629 {
6630  return rb_str_enumerate_codepoints(str, 1);
6631 }
6632 
6633 
6634 static long
6636 {
6637  rb_encoding *enc = STR_ENC_GET(str);
6638  const char *p, *p2, *beg, *end;
6639 
6640  beg = RSTRING_PTR(str);
6641  end = beg + RSTRING_LEN(str);
6642  if (beg > end) return 0;
6643  p = rb_enc_prev_char(beg, end, end, enc);
6644  if (!p) return 0;
6645  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6646  p2 = rb_enc_prev_char(beg, p, end, enc);
6647  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6648  }
6649  return p - beg;
6650 }
6651 
6652 /*
6653  * call-seq:
6654  * str.chop! -> str or nil
6655  *
6656  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6657  * or <code>nil</code> if <i>str</i> is the empty string. See also
6658  * <code>String#chomp!</code>.
6659  */
6660 
6661 static VALUE
6663 {
6664  str_modify_keep_cr(str);
6665  if (RSTRING_LEN(str) > 0) {
6666  long len;
6667  len = chopped_length(str);
6668  STR_SET_LEN(str, len);
6669  RSTRING_PTR(str)[len] = '\0';
6670  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6671  ENC_CODERANGE_CLEAR(str);
6672  }
6673  return str;
6674  }
6675  return Qnil;
6676 }
6677 
6678 
6679 /*
6680  * call-seq:
6681  * str.chop -> new_str
6682  *
6683  * Returns a new <code>String</code> with the last character removed. If the
6684  * string ends with <code>\r\n</code>, both characters are removed. Applying
6685  * <code>chop</code> to an empty string returns an empty
6686  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6687  * the string unchanged if it doesn't end in a record separator.
6688  *
6689  * "string\r\n".chop #=> "string"
6690  * "string\n\r".chop #=> "string\n"
6691  * "string\n".chop #=> "string"
6692  * "string".chop #=> "strin"
6693  * "x".chop.chop #=> ""
6694  */
6695 
6696 static VALUE
6698 {
6699  return rb_str_subseq(str, 0, chopped_length(str));
6700 }
6701 
6702 
6703 /*
6704  * call-seq:
6705  * str.chomp!(separator=$/) -> str or nil
6706  *
6707  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6708  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6709  */
6710 
6711 static VALUE
6713 {
6714  rb_encoding *enc;
6715  VALUE rs;
6716  int newline;
6717  char *p, *pp, *e;
6718  long len, rslen;
6719 
6720  str_modify_keep_cr(str);
6721  len = RSTRING_LEN(str);
6722  if (len == 0) return Qnil;
6723  p = RSTRING_PTR(str);
6724  e = p + len;
6725  if (argc == 0) {
6726  rs = rb_rs;
6727  if (rs == rb_default_rs) {
6728  smart_chomp:
6729  enc = rb_enc_get(str);
6730  if (rb_enc_mbminlen(enc) > 1) {
6731  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6732  if (rb_enc_is_newline(pp, e, enc)) {
6733  e = pp;
6734  }
6735  pp = e - rb_enc_mbminlen(enc);
6736  if (pp >= p) {
6737  pp = rb_enc_left_char_head(p, pp, e, enc);
6738  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6739  e = pp;
6740  }
6741  }
6742  if (e == RSTRING_END(str)) {
6743  return Qnil;
6744  }
6745  len = e - RSTRING_PTR(str);
6746  STR_SET_LEN(str, len);
6747  }
6748  else {
6749  if (RSTRING_PTR(str)[len-1] == '\n') {
6750  STR_DEC_LEN(str);
6751  if (RSTRING_LEN(str) > 0 &&
6752  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6753  STR_DEC_LEN(str);
6754  }
6755  }
6756  else if (RSTRING_PTR(str)[len-1] == '\r') {
6757  STR_DEC_LEN(str);
6758  }
6759  else {
6760  return Qnil;
6761  }
6762  }
6763  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6764  return str;
6765  }
6766  }
6767  else {
6768  rb_scan_args(argc, argv, "01", &rs);
6769  }
6770  if (NIL_P(rs)) return Qnil;
6771  StringValue(rs);
6772  rslen = RSTRING_LEN(rs);
6773  if (rslen == 0) {
6774  while (len>0 && p[len-1] == '\n') {
6775  len--;
6776  if (len>0 && p[len-1] == '\r')
6777  len--;
6778  }
6779  if (len < RSTRING_LEN(str)) {
6780  STR_SET_LEN(str, len);
6781  RSTRING_PTR(str)[len] = '\0';
6782  return str;
6783  }
6784  return Qnil;
6785  }
6786  if (rslen > len) return Qnil;
6787  newline = RSTRING_PTR(rs)[rslen-1];
6788  if (rslen == 1 && newline == '\n')
6789  goto smart_chomp;
6790 
6791  enc = rb_enc_check(str, rs);
6792  if (is_broken_string(rs)) {
6793  return Qnil;
6794  }
6795  pp = e - rslen;
6796  if (p[len-1] == newline &&
6797  (rslen <= 1 ||
6798  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
6799  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
6800  return Qnil;
6801  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6802  ENC_CODERANGE_CLEAR(str);
6803  }
6804  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
6805  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6806  return str;
6807  }
6808  return Qnil;
6809 }
6810 
6811 
6812 /*
6813  * call-seq:
6814  * str.chomp(separator=$/) -> new_str
6815  *
6816  * Returns a new <code>String</code> with the given record separator removed
6817  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
6818  * changed from the default Ruby record separator, then <code>chomp</code> also
6819  * removes carriage return characters (that is it will remove <code>\n</code>,
6820  * <code>\r</code>, and <code>\r\n</code>). If <code>$/</code> is an empty string,
6821  * it will remove all trailing newlines from the string.
6822  *
6823  * "hello".chomp #=> "hello"
6824  * "hello\n".chomp #=> "hello"
6825  * "hello\r\n".chomp #=> "hello"
6826  * "hello\n\r".chomp #=> "hello\n"
6827  * "hello\r".chomp #=> "hello"
6828  * "hello \n there".chomp #=> "hello \n there"
6829  * "hello".chomp("llo") #=> "he"
6830  * "hello\r\n\r\n".chomp('') #=> "hello"
6831  * "hello\r\n\r\r\n".chomp('') #=> "hello\r\n\r"
6832  */
6833 
6834 static VALUE
6836 {
6837  str = rb_str_dup(str);
6838  rb_str_chomp_bang(argc, argv, str);
6839  return str;
6840 }
6841 
6842 /*
6843  * call-seq:
6844  * str.lstrip! -> self or nil
6845  *
6846  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
6847  * change was made. See also <code>String#rstrip!</code> and
6848  * <code>String#strip!</code>.
6849  *
6850  * " hello ".lstrip #=> "hello "
6851  * "hello".lstrip! #=> nil
6852  */
6853 
6854 static VALUE
6856 {
6857  rb_encoding *enc;
6858  char *s, *t, *e;
6859 
6860  str_modify_keep_cr(str);
6861  enc = STR_ENC_GET(str);
6862  s = RSTRING_PTR(str);
6863  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6864  e = t = RSTRING_END(str);
6865  /* remove spaces at head */
6866  while (s < e) {
6867  int n;
6868  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
6869 
6870  if (!rb_isspace(cc)) break;
6871  s += n;
6872  }
6873 
6874  if (s > RSTRING_PTR(str)) {
6875  STR_SET_LEN(str, t-s);
6876  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
6877  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6878  return str;
6879  }
6880  return Qnil;
6881 }
6882 
6883 
6884 /*
6885  * call-seq:
6886  * str.lstrip -> new_str
6887  *
6888  * Returns a copy of <i>str</i> with leading whitespace removed. See also
6889  * <code>String#rstrip</code> and <code>String#strip</code>.
6890  *
6891  * " hello ".lstrip #=> "hello "
6892  * "hello".lstrip #=> "hello"
6893  */
6894 
6895 static VALUE
6897 {
6898  str = rb_str_dup(str);
6899  rb_str_lstrip_bang(str);
6900  return str;
6901 }
6902 
6903 
6904 /*
6905  * call-seq:
6906  * str.rstrip! -> self or nil
6907  *
6908  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
6909  * no change was made. See also <code>String#lstrip!</code> and
6910  * <code>String#strip!</code>.
6911  *
6912  * " hello ".rstrip #=> " hello"
6913  * "hello".rstrip! #=> nil
6914  */
6915 
6916 static VALUE
6918 {
6919  rb_encoding *enc;
6920  char *s, *t, *e;
6921 
6922  str_modify_keep_cr(str);
6923  enc = STR_ENC_GET(str);
6925  s = RSTRING_PTR(str);
6926  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6927  t = e = RSTRING_END(str);
6928 
6929  /* remove trailing spaces or '\0's */
6930  if (single_byte_optimizable(str)) {
6931  unsigned char c;
6932  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
6933  }
6934  else {
6935  char *tp;
6936 
6937  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
6938  unsigned int c = rb_enc_codepoint(tp, e, enc);
6939  if (c && !rb_isspace(c)) break;
6940  t = tp;
6941  }
6942  }
6943  if (t < e) {
6944  long len = t-RSTRING_PTR(str);
6945 
6946  STR_SET_LEN(str, len);
6947  RSTRING_PTR(str)[len] = '\0';
6948  return str;
6949  }
6950  return Qnil;
6951 }
6952 
6953 
6954 /*
6955  * call-seq:
6956  * str.rstrip -> new_str
6957  *
6958  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
6959  * <code>String#lstrip</code> and <code>String#strip</code>.
6960  *
6961  * " hello ".rstrip #=> " hello"
6962  * "hello".rstrip #=> "hello"
6963  */
6964 
6965 static VALUE
6967 {
6968  str = rb_str_dup(str);
6969  rb_str_rstrip_bang(str);
6970  return str;
6971 }
6972 
6973 
6974 /*
6975  * call-seq:
6976  * str.strip! -> str or nil
6977  *
6978  * Removes leading and trailing whitespace from <i>str</i>. Returns
6979  * <code>nil</code> if <i>str</i> was not altered.
6980  */
6981 
6982 static VALUE
6984 {
6985  VALUE l = rb_str_lstrip_bang(str);
6986  VALUE r = rb_str_rstrip_bang(str);
6987 
6988  if (NIL_P(l) && NIL_P(r)) return Qnil;
6989  return str;
6990 }
6991 
6992 
6993 /*
6994  * call-seq:
6995  * str.strip -> new_str
6996  *
6997  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
6998  *
6999  * " hello ".strip #=> "hello"
7000  * "\tgoodbye\r\n".strip #=> "goodbye"
7001  */
7002 
7003 static VALUE
7005 {
7006  str = rb_str_dup(str);
7007  rb_str_strip_bang(str);
7008  return str;
7009 }
7010 
7011 static VALUE
7012 scan_once(VALUE str, VALUE pat, long *start)
7013 {
7014  VALUE result, match;
7015  struct re_registers *regs;
7016  int i;
7017 
7018  if (rb_reg_search(pat, str, *start, 0) >= 0) {
7019  match = rb_backref_get();
7020  regs = RMATCH_REGS(match);
7021  if (BEG(0) == END(0)) {
7022  rb_encoding *enc = STR_ENC_GET(str);
7023  /*
7024  * Always consume at least one character of the input string
7025  */
7026  if (RSTRING_LEN(str) > END(0))
7027  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7028  RSTRING_END(str), enc);
7029  else
7030  *start = END(0)+1;
7031  }
7032  else {
7033  *start = END(0);
7034  }
7035  if (regs->num_regs == 1) {
7036  return rb_reg_nth_match(0, match);
7037  }
7038  result = rb_ary_new2(regs->num_regs);
7039  for (i=1; i < regs->num_regs; i++) {
7040  rb_ary_push(result, rb_reg_nth_match(i, match));
7041  }
7042 
7043  return result;
7044  }
7045  return Qnil;
7046 }
7047 
7048 
7049 /*
7050  * call-seq:
7051  * str.scan(pattern) -> array
7052  * str.scan(pattern) {|match, ...| block } -> str
7053  *
7054  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7055  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7056  * generated and either added to the result array or passed to the block. If
7057  * the pattern contains no groups, each individual result consists of the
7058  * matched string, <code>$&</code>. If the pattern contains groups, each
7059  * individual result is itself an array containing one entry per group.
7060  *
7061  * a = "cruel world"
7062  * a.scan(/\w+/) #=> ["cruel", "world"]
7063  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7064  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7065  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7066  *
7067  * And the block form:
7068  *
7069  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7070  * print "\n"
7071  * a.scan(/(.)(.)/) {|x,y| print y, x }
7072  * print "\n"
7073  *
7074  * <em>produces:</em>
7075  *
7076  * <<cruel>> <<world>>
7077  * rceu lowlr
7078  */
7079 
7080 static VALUE
7082 {
7083  VALUE result;
7084  long start = 0;
7085  long last = -1, prev = 0;
7086  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7087 
7088  pat = get_pat(pat, 1);
7089  if (!rb_block_given_p()) {
7090  VALUE ary = rb_ary_new();
7091 
7092  while (!NIL_P(result = scan_once(str, pat, &start))) {
7093  last = prev;
7094  prev = start;
7095  rb_ary_push(ary, result);
7096  }
7097  if (last >= 0) rb_reg_search(pat, str, last, 0);
7098  return ary;
7099  }
7100 
7101  while (!NIL_P(result = scan_once(str, pat, &start))) {
7102  last = prev;
7103  prev = start;
7104  rb_yield(result);
7105  str_mod_check(str, p, len);
7106  }
7107  if (last >= 0) rb_reg_search(pat, str, last, 0);
7108  return str;
7109 }
7110 
7111 
7112 /*
7113  * call-seq:
7114  * str.hex -> integer
7115  *
7116  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7117  * (with an optional sign and an optional <code>0x</code>) and returns the
7118  * corresponding number. Zero is returned on error.
7119  *
7120  * "0x0a".hex #=> 10
7121  * "-1234".hex #=> -4660
7122  * "0".hex #=> 0
7123  * "wombat".hex #=> 0
7124  */
7125 
7126 static VALUE
7128 {
7129  return rb_str_to_inum(str, 16, FALSE);
7130 }
7131 
7132 
7133 /*
7134  * call-seq:
7135  * str.oct -> integer
7136  *
7137  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7138  * optional sign) and returns the corresponding number. Returns 0 if the
7139  * conversion fails.
7140  *
7141  * "123".oct #=> 83
7142  * "-377".oct #=> -255
7143  * "bad".oct #=> 0
7144  * "0377bad".oct #=> 255
7145  */
7146 
7147 static VALUE
7149 {
7150  return rb_str_to_inum(str, -8, FALSE);
7151 }
7152 
7153 
7154 /*
7155  * call-seq:
7156  * str.crypt(salt_str) -> new_str
7157  *
7158  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7159  * standard library function <code>crypt(3)</code> with the given
7160  * salt string. While the format and the result are system and
7161  * implementation dependent, using a salt matching the regular
7162  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7163  * safe on any platform, in which only the first two characters are
7164  * significant.
7165  *
7166  * This method is for use in system specific scripts, so if you want
7167  * a cross-platform hash function consider using Digest or OpenSSL
7168  * instead.
7169  */
7170 
7171 static VALUE
7173 {
7174  extern char *crypt(const char *, const char *);
7175  VALUE result;
7176  const char *s, *saltp;
7177  char *res;
7178 #ifdef BROKEN_CRYPT
7179  char salt_8bit_clean[3];
7180 #endif
7181 
7182  StringValue(salt);
7183  if (RSTRING_LEN(salt) < 2)
7184  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7185 
7186  s = RSTRING_PTR(str);
7187  if (!s) s = "";
7188  saltp = RSTRING_PTR(salt);
7189 #ifdef BROKEN_CRYPT
7190  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7191  salt_8bit_clean[0] = saltp[0] & 0x7f;
7192  salt_8bit_clean[1] = saltp[1] & 0x7f;
7193  salt_8bit_clean[2] = '\0';
7194  saltp = salt_8bit_clean;
7195  }
7196 #endif
7197  res = crypt(s, saltp);
7198  if (!res) {
7199  rb_sys_fail("crypt");
7200  }
7201  result = rb_str_new2(res);
7202  OBJ_INFECT(result, str);
7203  OBJ_INFECT(result, salt);
7204  return result;
7205 }
7206 
7207 
7208 /*
7209  * call-seq:
7210  * str.intern -> symbol
7211  * str.to_sym -> symbol
7212  *
7213  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7214  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7215  *
7216  * "Koala".intern #=> :Koala
7217  * s = 'cat'.to_sym #=> :cat
7218  * s == :cat #=> true
7219  * s = '@cat'.to_sym #=> :@cat
7220  * s == :@cat #=> true
7221  *
7222  * This can also be used to create symbols that cannot be represented using the
7223  * <code>:xxx</code> notation.
7224  *
7225  * 'cat and dog'.to_sym #=> :"cat and dog"
7226  */
7227 
7228 VALUE
7230 {
7231  VALUE str = RB_GC_GUARD(s);
7232  ID id;
7233 
7234  id = rb_intern_str(str);
7235  return ID2SYM(id);
7236 }
7237 
7238 
7239 /*
7240  * call-seq:
7241  * str.ord -> integer
7242  *
7243  * Return the <code>Integer</code> ordinal of a one-character string.
7244  *
7245  * "a".ord #=> 97
7246  */
7247 
7248 VALUE
7250 {
7251  unsigned int c;
7252 
7254  return UINT2NUM(c);
7255 }
7256 /*
7257  * call-seq:
7258  * str.sum(n=16) -> integer
7259  *
7260  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7261  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7262  * to 16. The result is simply the sum of the binary value of each character in
7263  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7264  * checksum.
7265  */
7266 
7267 static VALUE
7269 {
7270  VALUE vbits;
7271  int bits;
7272  char *ptr, *p, *pend;
7273  long len;
7274  VALUE sum = INT2FIX(0);
7275  unsigned long sum0 = 0;
7276 
7277  if (argc == 0) {
7278  bits = 16;
7279  }
7280  else {
7281  rb_scan_args(argc, argv, "01", &vbits);
7282  bits = NUM2INT(vbits);
7283  }
7284  ptr = p = RSTRING_PTR(str);
7285  len = RSTRING_LEN(str);
7286  pend = p + len;
7287 
7288  while (p < pend) {
7289  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7290  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7291  str_mod_check(str, ptr, len);
7292  sum0 = 0;
7293  }
7294  sum0 += (unsigned char)*p;
7295  p++;
7296  }
7297 
7298  if (bits == 0) {
7299  if (sum0) {
7300  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7301  }
7302  }
7303  else {
7304  if (sum == INT2FIX(0)) {
7305  if (bits < (int)sizeof(long)*CHAR_BIT) {
7306  sum0 &= (((unsigned long)1)<<bits)-1;
7307  }
7308  sum = LONG2FIX(sum0);
7309  }
7310  else {
7311  VALUE mod;
7312 
7313  if (sum0) {
7314  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7315  }
7316 
7317  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7318  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7319  sum = rb_funcall(sum, '&', 1, mod);
7320  }
7321  }
7322  return sum;
7323 }
7324 
7325 static VALUE
7326 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7327 {
7328  rb_encoding *enc;
7329  VALUE w;
7330  long width, len, flen = 1, fclen = 1;
7331  VALUE res;
7332  char *p;
7333  const char *f = " ";
7334  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7335  volatile VALUE pad;
7336  int singlebyte = 1, cr;
7337 
7338  rb_scan_args(argc, argv, "11", &w, &pad);
7339  enc = STR_ENC_GET(str);
7340  width = NUM2LONG(w);
7341  if (argc == 2) {
7342  StringValue(pad);
7343  enc = rb_enc_check(str, pad);
7344  f = RSTRING_PTR(pad);
7345  flen = RSTRING_LEN(pad);
7346  fclen = str_strlen(pad, enc);
7347  singlebyte = single_byte_optimizable(pad);
7348  if (flen == 0 || fclen == 0) {
7349  rb_raise(rb_eArgError, "zero width padding");
7350  }
7351  }
7352  len = str_strlen(str, enc);
7353  if (width < 0 || len >= width) return rb_str_dup(str);
7354  n = width - len;
7355  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7356  rlen = n - llen;
7357  cr = ENC_CODERANGE(str);
7358  if (flen > 1) {
7359  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7360  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7361  }
7362  size = RSTRING_LEN(str);
7363  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7364  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7365  (len += llen2 + rlen2) >= LONG_MAX - size) {
7366  rb_raise(rb_eArgError, "argument too big");
7367  }
7368  len += size;
7369  res = rb_str_new5(str, 0, len);
7370  p = RSTRING_PTR(res);
7371  if (flen <= 1) {
7372  memset(p, *f, llen);
7373  p += llen;
7374  }
7375  else {
7376  while (llen >= fclen) {
7377  memcpy(p,f,flen);
7378  p += flen;
7379  llen -= fclen;
7380  }
7381  if (llen > 0) {
7382  memcpy(p, f, llen2);
7383  p += llen2;
7384  }
7385  }
7386  memcpy(p, RSTRING_PTR(str), size);
7387  p += size;
7388  if (flen <= 1) {
7389  memset(p, *f, rlen);
7390  p += rlen;
7391  }
7392  else {
7393  while (rlen >= fclen) {
7394  memcpy(p,f,flen);
7395  p += flen;
7396  rlen -= fclen;
7397  }
7398  if (rlen > 0) {
7399  memcpy(p, f, rlen2);
7400  p += rlen2;
7401  }
7402  }
7403  *p = '\0';
7404  STR_SET_LEN(res, p-RSTRING_PTR(res));
7405  OBJ_INFECT(res, str);
7406  if (!NIL_P(pad)) OBJ_INFECT(res, pad);
7407  rb_enc_associate(res, enc);
7408  if (argc == 2)
7409  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7410  if (cr != ENC_CODERANGE_BROKEN)
7411  ENC_CODERANGE_SET(res, cr);
7412  return res;
7413 }
7414 
7415 
7416 /*
7417  * call-seq:
7418  * str.ljust(integer, padstr=' ') -> new_str
7419  *
7420  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7421  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7422  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7423  *
7424  * "hello".ljust(4) #=> "hello"
7425  * "hello".ljust(20) #=> "hello "
7426  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7427  */
7428 
7429 static VALUE
7431 {
7432  return rb_str_justify(argc, argv, str, 'l');
7433 }
7434 
7435 
7436 /*
7437  * call-seq:
7438  * str.rjust(integer, padstr=' ') -> new_str
7439  *
7440  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7441  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7442  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7443  *
7444  * "hello".rjust(4) #=> "hello"
7445  * "hello".rjust(20) #=> " hello"
7446  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7447  */
7448 
7449 static VALUE
7451 {
7452  return rb_str_justify(argc, argv, str, 'r');
7453 }
7454 
7455 
7456 /*
7457  * call-seq:
7458  * str.center(width, padstr=' ') -> new_str
7459  *
7460  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7461  * returns a new String of length +width+ with +str+ centered and padded with
7462  * +padstr+; otherwise, returns +str+.
7463  *
7464  * "hello".center(4) #=> "hello"
7465  * "hello".center(20) #=> " hello "
7466  * "hello".center(20, '123') #=> "1231231hello12312312"
7467  */
7468 
7469 static VALUE
7471 {
7472  return rb_str_justify(argc, argv, str, 'c');
7473 }
7474 
7475 /*
7476  * call-seq:
7477  * str.partition(sep) -> [head, sep, tail]
7478  * str.partition(regexp) -> [head, match, tail]
7479  *
7480  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7481  * and returns the part before it, the match, and the part
7482  * after it.
7483  * If it is not found, returns two empty strings and <i>str</i>.
7484  *
7485  * "hello".partition("l") #=> ["he", "l", "lo"]
7486  * "hello".partition("x") #=> ["hello", "", ""]
7487  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7488  */
7489 
7490 static VALUE
7492 {
7493  long pos;
7494  int regex = FALSE;
7495 
7496  if (RB_TYPE_P(sep, T_REGEXP)) {
7497  pos = rb_reg_search(sep, str, 0, 0);
7498  regex = TRUE;
7499  }
7500  else {
7501  VALUE tmp;
7502 
7503  tmp = rb_check_string_type(sep);
7504  if (NIL_P(tmp)) {
7505  rb_raise(rb_eTypeError, "type mismatch: %s given",
7506  rb_obj_classname(sep));
7507  }
7508  sep = tmp;
7509  pos = rb_str_index(str, sep, 0);
7510  }
7511  if (pos < 0) {
7512  failed:
7513  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7514  }
7515  if (regex) {
7516  sep = rb_str_subpat(str, sep, INT2FIX(0));
7517  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7518  }
7519  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7520  sep,
7521  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7522  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7523 }
7524 
7525 /*
7526  * call-seq:
7527  * str.rpartition(sep) -> [head, sep, tail]
7528  * str.rpartition(regexp) -> [head, match, tail]
7529  *
7530  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7531  * of the string, and returns the part before it, the match, and the part
7532  * after it.
7533  * If it is not found, returns two empty strings and <i>str</i>.
7534  *
7535  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7536  * "hello".rpartition("x") #=> ["", "", "hello"]
7537  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7538  */
7539 
7540 static VALUE
7542 {
7543  long pos = RSTRING_LEN(str);
7544  int regex = FALSE;
7545 
7546  if (RB_TYPE_P(sep, T_REGEXP)) {
7547  pos = rb_reg_search(sep, str, pos, 1);
7548  regex = TRUE;
7549  }
7550  else {
7551  VALUE tmp;
7552 
7553  tmp = rb_check_string_type(sep);
7554  if (NIL_P(tmp)) {
7555  rb_raise(rb_eTypeError, "type mismatch: %s given",
7556  rb_obj_classname(sep));
7557  }
7558  sep = tmp;
7559  pos = rb_str_sublen(str, pos);
7560  pos = rb_str_rindex(str, sep, pos);
7561  }
7562  if (pos < 0) {
7563  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7564  }
7565  if (regex) {
7566  sep = rb_reg_nth_match(0, rb_backref_get());
7567  }
7568  return rb_ary_new3(3, rb_str_substr(str, 0, pos),
7569  sep,
7570  rb_str_substr(str,pos+str_strlen(sep,STR_ENC_GET(sep)),RSTRING_LEN(str)));
7571 }
7572 
7573 /*
7574  * call-seq:
7575  * str.start_with?([prefixes]+) -> true or false
7576  *
7577  * Returns true if +str+ starts with one of the +prefixes+ given.
7578  *
7579  * "hello".start_with?("hell") #=> true
7580  *
7581  * # returns true if one of the prefixes matches.
7582  * "hello".start_with?("heaven", "hell") #=> true
7583  * "hello".start_with?("heaven", "paradise") #=> false
7584  */
7585 
7586 static VALUE
7588 {
7589  int i;
7590 
7591  for (i=0; i<argc; i++) {
7592  VALUE tmp = argv[i];
7593  StringValue(tmp);
7594  rb_enc_check(str, tmp);
7595  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7596  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7597  return Qtrue;
7598  }
7599  return Qfalse;
7600 }
7601 
7602 /*
7603  * call-seq:
7604  * str.end_with?([suffixes]+) -> true or false
7605  *
7606  * Returns true if +str+ ends with one of the +suffixes+ given.
7607  */
7608 
7609 static VALUE
7611 {
7612  int i;
7613  char *p, *s, *e;
7614  rb_encoding *enc;
7615 
7616  for (i=0; i<argc; i++) {
7617  VALUE tmp = argv[i];
7618  StringValue(tmp);
7619  enc = rb_enc_check(str, tmp);
7620  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7621  p = RSTRING_PTR(str);
7622  e = p + RSTRING_LEN(str);
7623  s = e - RSTRING_LEN(tmp);
7624  if (rb_enc_left_char_head(p, s, e, enc) != s)
7625  continue;
7626  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7627  return Qtrue;
7628  }
7629  return Qfalse;
7630 }
7631 
7632 void
7634 {
7635  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7636  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7637  }
7638  *var = val;
7639 }
7640 
7641 
7642 /*
7643  * call-seq:
7644  * str.force_encoding(encoding) -> str
7645  *
7646  * Changes the encoding to +encoding+ and returns self.
7647  */
7648 
7649 static VALUE
7651 {
7652  str_modifiable(str);
7653  rb_enc_associate(str, rb_to_encoding(enc));
7654  ENC_CODERANGE_CLEAR(str);
7655  return str;
7656 }
7657 
7658 /*
7659  * call-seq:
7660  * str.b -> str
7661  *
7662  * Returns a copied string whose encoding is ASCII-8BIT.
7663  */
7664 
7665 static VALUE
7667 {
7668  VALUE str2 = str_alloc(rb_cString);
7669  str_replace_shared_without_enc(str2, str);
7670  OBJ_INFECT(str2, str);
7672  return str2;
7673 }
7674 
7675 /*
7676  * call-seq:
7677  * str.valid_encoding? -> true or false
7678  *
7679  * Returns true for a string which encoded correctly.
7680  *
7681  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7682  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7683  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7684  */
7685 
7686 static VALUE
7688 {
7689  int cr = rb_enc_str_coderange(str);
7690 
7691  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7692 }
7693 
7694 /*
7695  * call-seq:
7696  * str.ascii_only? -> true or false
7697  *
7698  * Returns true for a string which has only ASCII characters.
7699  *
7700  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7701  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7702  */
7703 
7704 static VALUE
7706 {
7707  int cr = rb_enc_str_coderange(str);
7708 
7709  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7710 }
7711 
7726 VALUE
7727 rb_str_ellipsize(VALUE str, long len)
7728 {
7729  static const char ellipsis[] = "...";
7730  const long ellipsislen = sizeof(ellipsis) - 1;
7731  rb_encoding *const enc = rb_enc_get(str);
7732  const long blen = RSTRING_LEN(str);
7733  const char *const p = RSTRING_PTR(str), *e = p + blen;
7734  VALUE estr, ret = 0;
7735 
7736  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7737  if (len * rb_enc_mbminlen(enc) >= blen ||
7738  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7739  ret = str;
7740  }
7741  else if (len <= ellipsislen ||
7742  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7743  if (rb_enc_asciicompat(enc)) {
7744  ret = rb_str_new_with_class(str, ellipsis, len);
7745  rb_enc_associate(ret, enc);
7746  }
7747  else {
7748  estr = rb_usascii_str_new(ellipsis, len);
7749  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7750  }
7751  }
7752  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7753  rb_str_cat(ret, ellipsis, ellipsislen);
7754  }
7755  else {
7756  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7757  rb_enc_from_encoding(enc), 0, Qnil);
7758  rb_str_append(ret, estr);
7759  }
7760  return ret;
7761 }
7762 
7763 /**********************************************************************
7764  * Document-class: Symbol
7765  *
7766  * <code>Symbol</code> objects represent names and some strings
7767  * inside the Ruby
7768  * interpreter. They are generated using the <code>:name</code> and
7769  * <code>:"string"</code> literals
7770  * syntax, and by the various <code>to_sym</code> methods. The same
7771  * <code>Symbol</code> object will be created for a given name or string
7772  * for the duration of a program's execution, regardless of the context
7773  * or meaning of that name. Thus if <code>Fred</code> is a constant in
7774  * one context, a method in another, and a class in a third, the
7775  * <code>Symbol</code> <code>:Fred</code> will be the same object in
7776  * all three contexts.
7777  *
7778  * module One
7779  * class Fred
7780  * end
7781  * $f1 = :Fred
7782  * end
7783  * module Two
7784  * Fred = 1
7785  * $f2 = :Fred
7786  * end
7787  * def Fred()
7788  * end
7789  * $f3 = :Fred
7790  * $f1.object_id #=> 2514190
7791  * $f2.object_id #=> 2514190
7792  * $f3.object_id #=> 2514190
7793  *
7794  */
7795 
7796 
7797 /*
7798  * call-seq:
7799  * sym == obj -> true or false
7800  *
7801  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
7802  * symbol, returns <code>true</code>.
7803  */
7804 
7805 static VALUE
7806 sym_equal(VALUE sym1, VALUE sym2)
7807 {
7808  if (sym1 == sym2) return Qtrue;
7809  return Qfalse;
7810 }
7811 
7812 
7813 static int
7814 sym_printable(const char *s, const char *send, rb_encoding *enc)
7815 {
7816  while (s < send) {
7817  int n;
7818  int c = rb_enc_codepoint_len(s, send, &n, enc);
7819 
7820  if (!rb_enc_isprint(c, enc)) return FALSE;
7821  s += n;
7822  }
7823  return TRUE;
7824 }
7825 
7826 int
7828 {
7829  rb_encoding *enc;
7830  const char *ptr;
7831  long len;
7833 
7834  if (resenc == NULL) resenc = rb_default_external_encoding();
7835  enc = STR_ENC_GET(sym);
7836  ptr = RSTRING_PTR(sym);
7837  len = RSTRING_LEN(sym);
7838  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
7839  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
7840  return FALSE;
7841  }
7842  return TRUE;
7843 }
7844 
7845 VALUE
7847 {
7848  rb_encoding *enc;
7849  const char *ptr;
7850  long len;
7851  rb_encoding *resenc;
7852 
7853  Check_Type(str, T_STRING);
7854  resenc = rb_default_internal_encoding();
7855  if (resenc == NULL) resenc = rb_default_external_encoding();
7856  enc = STR_ENC_GET(str);
7857  ptr = RSTRING_PTR(str);
7858  len = RSTRING_LEN(str);
7859  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
7860  !sym_printable(ptr, ptr + len, enc)) {
7861  return rb_str_inspect(str);
7862  }
7863  return str;
7864 }
7865 
7866 VALUE
7868 {
7869  return rb_str_quote_unprintable(rb_id2str(id));
7870 }
7871 
7872 /*
7873  * call-seq:
7874  * sym.inspect -> string
7875  *
7876  * Returns the representation of <i>sym</i> as a symbol literal.
7877  *
7878  * :fred.inspect #=> ":fred"
7879  */
7880 
7881 static VALUE
7883 {
7884  VALUE str;
7885  const char *ptr;
7886  long len;
7887  ID id = SYM2ID(sym);
7888  char *dest;
7889 
7890  sym = rb_id2str(id);
7891  if (!rb_str_symname_p(sym)) {
7892  str = rb_str_inspect(sym);
7893  len = RSTRING_LEN(str);
7894  rb_str_resize(str, len + 1);
7895  dest = RSTRING_PTR(str);
7896  memmove(dest + 1, dest, len);
7897  dest[0] = ':';
7898  }
7899  else {
7900  rb_encoding *enc = STR_ENC_GET(sym);
7901  ptr = RSTRING_PTR(sym);
7902  len = RSTRING_LEN(sym);
7903  str = rb_enc_str_new(0, len + 1, enc);
7904  dest = RSTRING_PTR(str);
7905  dest[0] = ':';
7906  memcpy(dest + 1, ptr, len);
7907  }
7908  return str;
7909 }
7910 
7911 
7912 /*
7913  * call-seq:
7914  * sym.id2name -> string
7915  * sym.to_s -> string
7916  *
7917  * Returns the name or string corresponding to <i>sym</i>.
7918  *
7919  * :fred.id2name #=> "fred"
7920  */
7921 
7922 
7923 VALUE
7925 {
7926  ID id = SYM2ID(sym);
7927 
7928  return str_new3(rb_cString, rb_id2str(id));
7929 }
7930 
7931 
7932 /*
7933  * call-seq:
7934  * sym.to_sym -> sym
7935  * sym.intern -> sym
7936  *
7937  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
7938  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
7939  * in this case.
7940  */
7941 
7942 static VALUE
7944 {
7945  return sym;
7946 }
7947 
7948 static VALUE
7949 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
7950 {
7951  VALUE obj;
7952 
7953  if (argc < 1) {
7954  rb_raise(rb_eArgError, "no receiver given");
7955  }
7956  obj = argv[0];
7957  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
7958 }
7959 
7960 /*
7961  * call-seq:
7962  * sym.to_proc
7963  *
7964  * Returns a _Proc_ object which respond to the given method by _sym_.
7965  *
7966  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
7967  */
7968 
7969 static VALUE
7971 {
7972  static VALUE sym_proc_cache = Qfalse;
7973  enum {SYM_PROC_CACHE_SIZE = 67};
7974  VALUE proc;
7975  long id, index;
7976  VALUE *aryp;
7977 
7978  if (!sym_proc_cache) {
7979  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
7980  rb_gc_register_mark_object(sym_proc_cache);
7981  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
7982  }
7983 
7984  id = SYM2ID(sym);
7985  index = (id % SYM_PROC_CACHE_SIZE) << 1;
7986 
7987  aryp = RARRAY_PTR(sym_proc_cache);
7988  if (aryp[index] == sym) {
7989  return aryp[index + 1];
7990  }
7991  else {
7992  proc = rb_proc_new(sym_call, (VALUE)id);
7993  aryp[index] = sym;
7994  aryp[index + 1] = proc;
7995  return proc;
7996  }
7997 }
7998 
7999 /*
8000  * call-seq:
8001  *
8002  * sym.succ
8003  *
8004  * Same as <code>sym.to_s.succ.intern</code>.
8005  */
8006 
8007 static VALUE
8009 {
8010  return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
8011 }
8012 
8013 /*
8014  * call-seq:
8015  *
8016  * symbol <=> other_symbol -> -1, 0, +1 or nil
8017  *
8018  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
8019  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
8020  * than, equal to, or greater than +other_symbol+.
8021  *
8022  * +nil+ is returned if the two values are incomparable.
8023  *
8024  * See String#<=> for more information.
8025  */
8026 
8027 static VALUE
8029 {
8030  if (!SYMBOL_P(other)) {
8031  return Qnil;
8032  }
8033  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8034 }
8035 
8036 /*
8037  * call-seq:
8038  *
8039  * sym.casecmp(other) -> -1, 0, +1 or nil
8040  *
8041  * Case-insensitive version of <code>Symbol#<=></code>.
8042  */
8043 
8044 static VALUE
8046 {
8047  if (!SYMBOL_P(other)) {
8048  return Qnil;
8049  }
8050  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8051 }
8052 
8053 /*
8054  * call-seq:
8055  * sym =~ obj -> fixnum or nil
8056  *
8057  * Returns <code>sym.to_s =~ obj</code>.
8058  */
8059 
8060 static VALUE
8062 {
8063  return rb_str_match(rb_sym_to_s(sym), other);
8064 }
8065 
8066 /*
8067  * call-seq:
8068  * sym[idx] -> char
8069  * sym[b, n] -> char
8070  *
8071  * Returns <code>sym.to_s[]</code>.
8072  */
8073 
8074 static VALUE
8076 {
8077  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8078 }
8079 
8080 /*
8081  * call-seq:
8082  * sym.length -> integer
8083  *
8084  * Same as <code>sym.to_s.length</code>.
8085  */
8086 
8087 static VALUE
8089 {
8090  return rb_str_length(rb_id2str(SYM2ID(sym)));
8091 }
8092 
8093 /*
8094  * call-seq:
8095  * sym.empty? -> true or false
8096  *
8097  * Returns that _sym_ is :"" or not.
8098  */
8099 
8100 static VALUE
8102 {
8103  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8104 }
8105 
8106 /*
8107  * call-seq:
8108  * sym.upcase -> symbol
8109  *
8110  * Same as <code>sym.to_s.upcase.intern</code>.
8111  */
8112 
8113 static VALUE
8115 {
8117 }
8118 
8119 /*
8120  * call-seq:
8121  * sym.downcase -> symbol
8122  *
8123  * Same as <code>sym.to_s.downcase.intern</code>.
8124  */
8125 
8126 static VALUE
8128 {
8130 }
8131 
8132 /*
8133  * call-seq:
8134  * sym.capitalize -> symbol
8135  *
8136  * Same as <code>sym.to_s.capitalize.intern</code>.
8137  */
8138 
8139 static VALUE
8141 {
8143 }
8144 
8145 /*
8146  * call-seq:
8147  * sym.swapcase -> symbol
8148  *
8149  * Same as <code>sym.to_s.swapcase.intern</code>.
8150  */
8151 
8152 static VALUE
8154 {
8156 }
8157 
8158 /*
8159  * call-seq:
8160  * sym.encoding -> encoding
8161  *
8162  * Returns the Encoding object that represents the encoding of _sym_.
8163  */
8164 
8165 static VALUE
8167 {
8168  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8169 }
8170 
8171 ID
8173 {
8174  VALUE tmp;
8175 
8176  switch (TYPE(name)) {
8177  default:
8178  tmp = rb_check_string_type(name);
8179  if (NIL_P(tmp)) {
8180  tmp = rb_inspect(name);
8181  rb_raise(rb_eTypeError, "%s is not a symbol",
8182  RSTRING_PTR(tmp));
8183  }
8184  name = tmp;
8185  /* fall through */
8186  case T_STRING:
8187  name = rb_str_intern(name);
8188  /* fall through */
8189  case T_SYMBOL:
8190  return SYM2ID(name);
8191  }
8192 
8193  UNREACHABLE;
8194 }
8195 
8196 /*
8197  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8198  * bytes, typically representing characters. String objects may be created
8199  * using <code>String::new</code> or as literals.
8200  *
8201  * Because of aliasing issues, users of strings should be aware of the methods
8202  * that modify the contents of a <code>String</code> object. Typically,
8203  * methods with names ending in ``!'' modify their receiver, while those
8204  * without a ``!'' return a new <code>String</code>. However, there are
8205  * exceptions, such as <code>String#[]=</code>.
8206  *
8207  */
8208 
8209 void
8211 {
8212 #undef rb_intern
8213 #define rb_intern(str) rb_intern_const(str)
8214 
8215  rb_cString = rb_define_class("String", rb_cObject);
8219  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8220  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8224  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8226  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8232  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8233  rb_define_method(rb_cString, "length", rb_str_length, 0);
8235  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8236  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8243  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8246  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8249  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8250  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8251  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8252 
8253  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8256  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8257  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8259 
8260  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8261  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8262  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8263  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8264 
8269 
8273  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8276  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8277  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8279  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8281  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8283  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8284  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8286 
8287  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8288  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8289  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8290 
8292 
8293  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8294  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8295  rb_define_method(rb_cString, "center", rb_str_center, -1);
8296 
8297  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8298  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8300  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8302  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8303  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8304 
8312 
8315  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8316  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8317  rb_define_method(rb_cString, "count", rb_str_count, -1);
8318 
8323 
8324  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8325  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8326  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8327  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8328 
8329  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8330 
8331  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8333 
8334  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8335  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8336 
8337  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8338  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8340  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8342 
8343  id_to_s = rb_intern("to_s");
8344 
8345  rb_fs = Qnil;
8346  rb_define_variable("$;", &rb_fs);
8347  rb_define_variable("$-F", &rb_fs);
8348 
8349  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8353  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8354 
8357  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8359  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8360  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8361  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8362  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8363  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8364  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8365 
8366  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8367  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8369 
8370  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8371  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8372  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8373  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8374  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8375  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8376 
8377  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8378  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8379  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8380  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8381 
8382  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8383 }
static int str_independent(VALUE str)
Definition: string.c:1338
#define rb_enc_islower(c, enc)
#define FIXNUM_MAX
#define RB_TYPE_P(obj, type)
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8114
RARRAY_PTR(q->result)[0]
static long chopped_length(VALUE str)
Definition: string.c:6635
VALUE rb_str_associated(VALUE)
Definition: string.c:1454
volatile VALUE tmp
Definition: tcltklib.c:10208
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:638
Definition: string.c:5075
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:954
VALUE rb_str_locktmp_ensure(VALUE str, VALUE(*func)(VALUE), VALUE arg)
Definition: string.c:1831
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1201
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1905
ssize_t n
Definition: bigdecimal.c:5676
#define RSTRING(obj)
VALUE sym
Definition: tkutil.c:1298
VALUE rb_str_times(VALUE, VALUE)
Definition: string.c:1268
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2624
volatile VALUE ary
Definition: tcltklib.c:9712
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:778
VP_EXPORT int
Definition: bigdecimal.c:5071
VALUE rb_str_ellipsize(VALUE, long)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7727
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:866
#define RESIZE_CAPA(str, capacity)
Definition: string.c:102
VALUE rb_any_to_s(VALUE)
Definition: object.c:393
UChar * pat
Definition: regerror.c:392
void rb_bug(const char *fmt,...)
Definition: error.c:295
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:856
#define FALSE
Definition: nkf.h:174
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:952
#define rb_hash_lookup
Definition: tcltklib.c:268
code
Definition: tcltklib.c:3380
size_t strlen(const char *)
gz enc2
Definition: zlib.c:2272
#define OBJ_INFECT(x, s)
#define CHECK_IF_ASCII(c)
gz ec
Definition: zlib.c:2273
#define TOUPPER(c)
const char * rb_obj_classname(VALUE)
Definition: variable.c:396
VALUE rb_str_buf_cat_ascii(VALUE, const char *)
Definition: string.c:2085
VALUE rb_id2str(ID id)
Definition: ripper.c:16946
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define RSTRING_END(str)
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:7814
#define UNLIMITED_ARGUMENTS
#define rb_tainted_str_new2
#define FL_TEST(x, f)
static int max(int a, int b)
Definition: strftime.c:141
#define ascii_isspace(c)
Definition: string.c:5881
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:183
VALUE rb_str_tmp_new(long)
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1497
VALUE rb_str_buf_append(VALUE, VALUE)
Definition: string.c:2109
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4429
VALUE rb_sym_to_s(VALUE)
Definition: string.c:7924
volatile VALUE str_enc
Definition: tkutil.c:458
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7148
VALUE rb_string_value(volatile VALUE *)
Definition: string.c:1473
#define FL_SET(x, f)
VALUE rb_str_quote_unprintable(VALUE)
Definition: string.c:7846
#define STR_NOCAPA
Definition: string.c:63
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7081
VALUE proc
Definition: tcltklib.c:2958
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4016
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2758
static int VALUE table
Definition: tcltklib.c:10137
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:108
#define rb_usascii_str_new2
#define rb_enc_codepoint(p, e, enc)
int rb_str_cmp(VALUE, VALUE)
Definition: string.c:2312
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:290
VALUE rb_external_str_new_cstr(const char *)
Definition: string.c:590
void rb_gc_force_recycle(VALUE)
Definition: gc.c:2963
ssize_t i
Definition: bigdecimal.c:5676
unsigned char * USTR
Definition: string.c:5073
char * rb_string_value_ptr(volatile VALUE *)
Definition: string.c:1484
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5082
VALUE rb_str_locktmp(VALUE)
#define rb_check_frozen(obj)
#define is_broken_string(str)
Definition: string.c:121
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
#define rb_enc_right_char_head(s, p, e, enc)
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8153
#define rb_enc_name(enc)
static VALUE rb_str_b(VALUE str)
Definition: string.c:7666
char * pend
Definition: string.c:5078
void Init_String(void)
Definition: string.c:8210
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:1669
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4055
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
VALUE rb_str_new_cstr(const char *)
Definition: string.c:447
int ret
Definition: tcltklib.c:280
#define STR_UNSET_NOCAPA(s)
Definition: string.c:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:970
RUBY_EXTERN VALUE rb_cSymbol
Definition: ripper.y:1458
VALUE rb_obj_freeze(VALUE)
Definition: object.c:1012
long rb_str_strlen(VALUE)
Definition: string.c:1168
VALUE rb_eTypeError
Definition: error.c:516
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:130
#define OBJ_FREEZE(x)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
#define OBJ_TAINTED(x)
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:3840
#define UNREACHABLE
Definition: ruby.h:40
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3071
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6353
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6323
#define rb_enc_prev_char(s, p, e, enc)
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1371
VALUE enc
Definition: tcltklib.c:10310
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:671
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3281
int rb_usascii_encindex(void)
Definition: encoding.c:1190
VALUE rb_str_export(VALUE)
Definition: string.c:620
#define TYPE(x)
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2239
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:789
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3965
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:465
#define RSTRING_PTR(str)
#define CLASS_OF(v)
NIL_P(eventloop_thread)
Definition: tcltklib.c:4067
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
Definition: string.c:439
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6628
#define str_buf_cat2(str, ptr)
Definition: string.c:1948
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5021
int rb_str_comparable(VALUE, VALUE)
Definition: string.c:2287
VALUE rb_str_buf_cat2(VALUE, const char *)
Definition: string.c:1961
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:6966
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7326
VALUE var
Definition: tcltklib.c:5516
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4362
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:4766
#define xfree
#define str_make_independent(str)
Definition: string.c:1366
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:773
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:933
register C_block * tp
Definition: crypt.c:311
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1031
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1788
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:833
char * p
Definition: string.c:5078
char * rb_enc_nth(const char *, const char *, long, rb_encoding *)
Definition: string.c:1583
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8127
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2018
VALUE rb_str_succ(VALUE)
Definition: string.c:2988
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:910
return Qtrue
Definition: tcltklib.c:9609
#define rb_str_new4
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:766
VALUE rb_obj_class(VALUE)
Definition: object.c:194
#define RETURN_ENUMERATOR(obj, argc, argv)
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4396
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3634
#define rb_enc_left_char_head(s, p, e, enc)
#define STR_NOEMBED
Definition: string.c:58
VALUE rb_external_str_new(const char *, long)
Definition: string.c:584
int index
Definition: tcltklib.c:4477
RUBY_EXTERN VALUE rb_fs
Definition: ripper.y:487
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:886
VALUE rb_ary_new3(long n,...)
Definition: array.c:432
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1218
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6535
VALUE rb_eSecurityError
Definition: error.c:525
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4323
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:684
#define rb_enc_to_index(enc)
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7470
static VALUE rb_str_each_char_size(VALUE str)
Definition: string.c:6427
VALUE rb_locale_str_new(const char *, long)
Definition: string.c:596
r
Definition: bigdecimal.c:1210
#define FL_UNTRUSTED
#define rb_str_new2
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1457
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3208
void rb_must_asciicompat(VALUE)
Definition: string.c:1464
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3566
VALUE rb_str_unlocktmp(VALUE)
Definition: string.c:1821
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:4784
#define ISDIGIT(c)
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1316
#define STR_SET_NOEMBED(str)
Definition: string.c:70
#define ENCODING_IS_ASCII8BIT(obj)
#define STR_DEC_LEN(str)
Definition: string.c:91
VALUE rb_str_substr(VALUE, long, long)
Definition: string.c:1775
#define numberof(array)
Definition: string.c:32
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1122
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:6835
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1166
#define ID2SYM(x)
#define BEG(no)
Definition: string.c:22
VALUE VALUE args
Definition: tcltklib.c:2560
static VALUE sym_length(VALUE sym)
Definition: string.c:8088
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1362
#define CHAR_ESC_LEN
Definition: string.c:4464
#define ENC_CODERANGE_BROKEN
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:17044
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:386
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:4849
#define LONG2NUM(x)
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:2125
VALUE rb_str_new_frozen(VALUE)
Definition: string.c:713
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2278
static int tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5507
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1075
#define rb_enc_isctype(c, t, enc)
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:56
VALUE rb_str_concat(VALUE, VALUE)
Definition: string.c:2166
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3499
VALUE rb_str_replace(VALUE, VALUE)
Definition: string.c:4034
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
VALUE rb_str_to_str(VALUE)
Definition: string.c:849
void rb_str_modify_expand(VALUE, long)
Definition: string.c:1377
VALUE rb_eRangeError
Definition: error.c:520
d
Definition: strlcat.c:58
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1256
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2363
const char * name
Definition: ripper.y:163
const char * fmt
Definition: tcltklib.c:841
#define ENCODING_GET(obj)
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:964
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3599
#define ISALPHA(c)
Definition: ruby.h:1636
#define MEMZERO(p, type, n)
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:7806
static VALUE sym_inspect(VALUE sym)
Definition: string.c:7882
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7491
VALUE rb_usascii_str_new(const char *, long)
Definition: string.c:431
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1603
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7430
gz ecflags
Definition: zlib.c:2274
VALUE hash
Definition: tkutil.c:267
VALUE rb_str_dump(VALUE)
Definition: string.c:4634
void rb_str_update(VALUE, long, long, VALUE)
Definition: string.c:3455
#define STR_SHARED_P(s)
Definition: string.c:61
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4110
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1286
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
VALUE rb_mComparable
Definition: compar.c:14
neighbor_char
Definition: string.c:2822
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:4955
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4467
static VALUE rb_str_strip(VALUE str)
Definition: string.c:7004
#define FIXNUM_P(f)
return Qfalse
Definition: tcltklib.c:6778
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5077
int rb_block_given_p(void)
Definition: eval.c:672
#define RARRAY_LEN(a)
long rb_str_offset(VALUE, long)
Definition: string.c:1611
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1446
size_t rb_str_capacity(VALUE)
Definition: string.c:360
#define rb_enc_step_back(s, p, e, n, enc)
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:5931
#define Qnil
Definition: tcltklib.c:1895
#define val
Definition: tcltklib.c:1948
static int single_byte_optimizable(VALUE str)
Definition: string.c:126
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:880
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3379
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:822
VALUE rb_eRuntimeError
Definition: error.c:515
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:7943
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:777
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:665
int rb_str_symname_p(VALUE)
Definition: string.c:7827
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7541
static VALUE char * str
Definition: tcltklib.c:3546
int rb_isspace(int c)
Definition: encoding.c:1891
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7172
VALUE rb_locale_str_new_cstr(const char *)
Definition: string.c:602
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2416
VALUE rb_ary_new(void)
Definition: array.c:424
#define Check_Type(v, t)
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1402
#define dp(v)
Definition: vm_debug.h:23
int flags
Definition: tcltklib.c:3022
unsigned long ID
Definition: ripper.y:105
#define STR_BUF_MIN_SIZE
Definition: string.c:774
#define STR_SET_EMBED(str)
Definition: string.c:74
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5757
#define ISASCII(c)
Definition: ruby.h:1629
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
VALUE rb_str_cat2(VALUE, const char *)
Definition: string.c:1986
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5613
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:488
static VALUE VALUE obj
Definition: tcltklib.c:3157
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:741
#define INT2FIX(i)
void rb_str_shared_replace(VALUE, VALUE)
Definition: string.c:857
static VALUE rb_str_each_byte_size(VALUE str, VALUE args)
Definition: string.c:6347
int idx
Definition: tcltklib.c:9715
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6442
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6157
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5737
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:63
#define FIX2LONG(x)
void rb_backref_set(VALUE)
Definition: vm.c:836
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:235
#define T_STRING
#define END(no)
Definition: string.c:23
#define MBCLEN_CHARFOUND_P(ret)
#define ENC_CODERANGE_AND(a, b)
#define rb_enc_isprint(c, enc)
#define STR_ENC_GET(str)
Definition: string.c:123
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:6983
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2672
VALUE rb_cEncodingConverter
Definition: transcode.c:25
#define rb_sourcefile()
Definition: tcltklib.c:97
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:76
VALUE rb_str_freeze(VALUE)
Definition: string.c:1798
#define range(low, item, hi)
Definition: date_strftime.c:21
VALUE rb_check_hash_type(VALUE)
Definition: hash.c:461
#define LONG_MAX
Definition: ruby.h:201
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5392
#define RUBY_FUNC_EXPORTED
Definition: defines.h:184
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
VALUE rb_eEncCompatError
Definition: error.c:523
static VALUE VALUE assoc
Definition: tkutil.c:545
VALUE rb_str_buf_new_cstr(const char *)
Definition: string.c:793
unsigned int max
Definition: string.c:5077
#define DBL2NUM(dbl)
#define ALLOCA_N(type, n)
VALUE rb_check_funcall(VALUE, ID, int, VALUE *)
Definition: vm_eval.c:408
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:7949
#define ENC_CODERANGE_UNKNOWN
void rb_str_setter(VALUE, ID, VALUE *)
Definition: string.c:7633
VALUE rb_eIndexError
Definition: error.c:518
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7450
static int VALUE key
Definition: tkutil.c:265
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define ENC_CODERANGE_SET(obj, cr)
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2741
register int hval
Definition: lex.c:89
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
VALUE rb_str_new_shared(VALUE)
Definition: string.c:677
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8140
VALUE rb_str_buf_cat(VALUE, const char *, long)
Definition: string.c:1951
VALUE rb_str_dup(VALUE)
Definition: string.c:946
VALUE rb_tainted_str_new_cstr(const char *)
Definition: string.c:479
VALUE rb_filesystem_str_new(const char *, long)
Definition: string.c:608
#define rb_long2int(n)
VALUE rb_obj_as_string(VALUE)
Definition: string.c:895
VALUE * argv
Definition: tcltklib.c:1970
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:910
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8028
#define sub(x, y)
Definition: date_strftime.c:24
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3412
VALUE rb_yield(VALUE)
Definition: vm_eval.c:933
VALUE rb_tainted_str_new(const char *, long)
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2339
VALUE rb_str_resize(VALUE, long)
Definition: string.c:1854
memcpy(buf+1, str, len)
#define RTEST(v)
const int id
Definition: nkf.c:209
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8166
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:443
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5066
VALUE rb_str_export_to_enc(VALUE, rb_encoding *)
Definition: string.c:632
#define rb_enc_mbminlen(enc)
#define RUBY_MAX_CHAR_LEN
Definition: string.c:56
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4240
q result
Definition: tcltklib.c:7069
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:814
VALUE rb_str_split(VALUE, const char *)
Definition: string.c:6146
volatile VALUE value
Definition: tcltklib.c:9441
#define StringValue(v)
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1702
#define rb_enc_mbcput(c, buf, enc)
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:227
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:898
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define STR_TMPLOCK
Definition: string.c:57
#define T_REGEXP
register char * s
Definition: os2.c:56
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
Definition: string.c:232
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:16630
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5434
#define CONST_ID(var, str)
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6662
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:2982
#define STR_ASSOC
Definition: string.c:60
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1570
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6542
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5720
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1352
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:938
char * rb_string_value_cstr(volatile VALUE *)
Definition: string.c:1491
VALUE retval
Definition: tcltklib.c:7829
#define no_digits()
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1181
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3338
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:7970
VALUE rb_str_inspect(VALUE)
Definition: string.c:4512
#define OBJ_FROZEN(x)
#define RB_GC_GUARD(v)
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5630
RUBY_EXTERN char * crypt(const char *, const char *)
#define FL_TAINT
static VALUE get_pat(VALUE, int)
Definition: string.c:3654
#define T_FIXNUM
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2863
int argc
Definition: tcltklib.c:1969
VALUE rb_str_buf_new(long)
Definition: string.c:777
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1212
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:6855
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:395
static VALUE str_alloc(VALUE klass)
Definition: string.c:374
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define ELTS_SHARED
#define RUBY_ALIAS_FUNCTION(prot, name, args)
Definition: defines.h:249
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3693
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7705
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:493
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:870
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:64
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2570
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6712
VALUE rb_ensure(VALUE(*b_proc)(ANYARGS), VALUE data1, VALUE(*e_proc)(ANYARGS), VALUE data2)
Definition: eval.c:804
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8045
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1533
int rb_sourceline(void)
Definition: vm.c:884
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4091
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:320
void rb_sys_fail(const char *mesg)
Definition: error.c:1907
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4079
#define rb_str_new3
RUBY_EXTERN VALUE rb_cString
Definition: ripper.y:1456
Real * b
Definition: bigdecimal.c:1196
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:146
VALUE rb_id_quote_unprintable(ID)
Definition: string.c:7867
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2867
return ptr
Definition: tcltklib.c:784
static void str_modifiable(VALUE str)
Definition: string.c:1327
VpDivd * c
Definition: bigdecimal.c:1219
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6421
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2567
#define CHAR_BIT
Definition: ruby.h:208
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2808
#define RSTRING_EMBED_LEN_MAX
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:352
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6341
static const char isspacetable[256]
Definition: string.c:5862
#define T_BIGNUM
#define MEMCPY(p1, p2, type, n)
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:7012
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:3832
VALUE rb_str_export_locale(VALUE)
Definition: string.c:626
#define ENC_CODERANGE_VALID
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1527
gz end
Definition: zlib.c:2270
ID rb_to_id(VALUE)
Definition: string.c:8172
#define RMATCH_REGS(obj)
Definition: re.h:54
static VALUE sym_succ(VALUE sym)
Definition: string.c:8008
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7610
arg
Definition: ripper.y:1317
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:284
VALUE src
Definition: tcltklib.c:7952
void rb_str_modify(VALUE)
Definition: string.c:1369
#define rb_str_buf_new2
#define NEWOBJ_OF(obj, type, klass, flags)
#define T_SYMBOL
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:1967
static ID id_to_s
Definition: string.c:892
VALUE rb_str_length(VALUE)
Definition: string.c:1182
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:772
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:563
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
int size
Definition: encoding.c:52
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7127
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1589
#define f
#define NUM2LONG(x)
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4259
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:4932
#define SYMBOL_P(x)
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
RUBY_EXTERN VALUE rb_default_rs
Definition: ripper.y:490
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7687
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6404
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6697
long rb_str_sublen(VALUE, long)
Definition: string.c:1658
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:5793
#define STR_SET_LEN(str, n)
Definition: string.c:82
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2383
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:340
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3461
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *)
Definition: string.c:569
int t
Definition: ripper.c:14660
void rb_str_associate(VALUE, VALUE)
Definition: string.c:1423
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:979
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:6896
#define MBCLEN_INVALID_P(ret)
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1422
int num_regs
Definition: ripper.y:617
#define ENC_CODERANGE(obj)
#define lesser(a, b)
Definition: string.c:2284
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:557
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:2906
DATA_PTR(self)
#define rb_str_dup_frozen
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8061
VALUE rb_reg_quote(VALUE)
Definition: re.c:2960
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2506
st_index_t rb_str_hash(VALUE)
Definition: string.c:2248
#define rb_str_new5
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1426
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3112
st_data_t st_index_t
Definition: ripper.y:63
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4128
#define ALLOC_N(type, n)
#define LONG2FIX(i)
#define RBASIC(obj)
VALUE rb_str_new_with_class(VALUE, const char *, long)
rb_econv_result_t
Definition: ripper.y:242
#define STR_EMBED_P(str)
Definition: string.c:75
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5138
#define STR_NOCAPA_P(s)
Definition: string.c:64
#define RARRAY_LENINT(ary)
#define ONIGENC_CTYPE_DIGIT
klass
Definition: tcltklib.c:3503
#define UINT2NUM(x)
#define INT2NUM(x)
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:5003
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:594
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1246
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:969
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1598
#define ONIGENC_CODE_TO_MBC_MAXLEN
RUBY_EXTERN VALUE rb_rs
Definition: ripper.y:489
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:6917
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6518
#define ISPRINT(c)
Definition: ruby.h:1631
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:657
VALUE rb_backref_get(void)
Definition: vm.c:830
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1347
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3370
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7587
VALUE rb_ary_new2(long capa)
Definition: array.c:417
#define OBJ_UNTRUST(x)
VALUE rb_str_new(const char *, long)
Definition: string.c:425
#define rb_safe_level()
Definition: tcltklib.c:94
#define rb_enc_is_newline(p, end, enc)
VALUE rb_str_drop_bytes(VALUE, long)
Definition: string.c:3351
static void str_discard(VALUE str)
Definition: string.c:1412
Real * res
Definition: bigdecimal.c:1247
#define assert(condition)
Definition: ossl.h:45
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:990
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
VALUE rb_hash_new(void)
Definition: hash.c:234
size_t rb_str_memsize(VALUE)
Definition: string.c:838
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1740
const char * rb_id2name(ID id)
Definition: ripper.c:17012
int gen
Definition: string.c:5076
VALUE rb_str_intern(VALUE)
Definition: string.c:7229
unsigned long sum
Definition: zlib.c:397
static VALUE sym_empty(VALUE sym)
Definition: string.c:8101
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4444
#define rb_enc_isupper(c, enc)
#define rb_check_arity(argc, min, max)
#define BUILTIN_TYPE(x)
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4183
#define OBJ_UNTRUSTED(x)
BDIGIT e
Definition: bigdecimal.c:5106
#define rb_enc_isascii(c, enc)
int rb_str_hash_cmp(VALUE, VALUE)
Definition: string.c:2258
#define SIZEOF_VALUE
VALUE rb_hash_aref(VALUE, VALUE)
Definition: hash.c:570
VALUE rb_funcall2(VALUE, ID, int, const VALUE *)
Calls a method.
Definition: vm_eval.c:804
unsigned long VALUE
Definition: ripper.y:104
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1151
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:4867
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:1992
void rb_warning(const char *fmt,...)
Definition: error.c:234
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define RREGEXP(obj)
#define RSTRING_GETMEM(str, ptrvar, lenvar)
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7268
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5443
gz ecopts
Definition: zlib.c:2275
#define is_ascii_string(str)
Definition: string.c:120
long salt
Definition: crypt.c:507
#define RREGEXP_SRC_LEN(r)
#define snprintf
#define SPECIAL_CONST_P(x)
#define OBJ_TAINT(x)
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2448
#define rb_intern(str)
BDIGIT v
Definition: bigdecimal.c:5677
#define mod(x, y)
Definition: date_strftime.c:28
VALUE rb_filesystem_str_new_cstr(const char *)
Definition: string.c:614
static char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1690
VALUE rb_str_ord(VALUE s)
Definition: string.c:7249
#define STR_ASSOC_P(s)
Definition: string.c:62
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8075
#define NULL
Definition: _sdbm.c:102
q
Definition: tcltklib.c:2967
const char * name
Definition: nkf.c:208
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3219
VALUE rb_check_string_type(VALUE)
Definition: string.c:1509
#define REALLOC_N(var, type, n)
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2542
int rb_enc_str_coderange(VALUE)
Definition: string.c:327
long rb_enc_strlen(const char *, const char *, rb_encoding *)
Definition: string.c:1025
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1344
volatile VALUE current
Definition: tcltklib.c:7111
#define rb_enc_mbmaxlen(enc)
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5537
void rb_warn(const char *fmt,...)
Definition: error.c:221
#define SYM2ID(x)
VALUE rb_eArgError
Definition: error.c:517
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2425
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7650
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:659
#define IS_EVSTR(p, e)
Definition: string.c:4621
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2438
void rb_str_free(VALUE)
Definition: string.c:830
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2681
VALUE rb_usascii_str_new_cstr(const char *)
#define TR_TABLE_SIZE
Definition: string.c:5441
int dummy
Definition: tcltklib.c:4482
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6610
STATIC void unsigned char * cp
Definition: crypt.c:307
VALUE rb_str_plus(VALUE, VALUE)
Definition: string.c:1236
#define FL_UNSET(x, f)
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Definition: string.c:2078
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2829
VALUE rb_inspect(VALUE)
Definition: object.c:411
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
size_t len
Definition: tcltklib.c:3567
void rb_str_set_len(VALUE, long)
Definition: string.c:1838