Ruby  2.0.0p247(2013-06-27revision41674)
string.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  string.c -
4 
5  $Author: nagachika $
6  created at: Mon Aug 9 17:12:58 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include "vm_core.h"
18 #include "internal.h"
19 #include "probes.h"
20 #include <assert.h>
21 
22 #define BEG(no) (regs->beg[(no)])
23 #define END(no) (regs->end[(no)])
24 
25 #include <math.h>
26 #include <ctype.h>
27 
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 
32 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
33 
34 #undef rb_str_new_cstr
35 #undef rb_tainted_str_new_cstr
36 #undef rb_usascii_str_new_cstr
37 #undef rb_external_str_new_cstr
38 #undef rb_locale_str_new_cstr
39 #undef rb_str_new2
40 #undef rb_str_new3
41 #undef rb_str_new4
42 #undef rb_str_new5
43 #undef rb_tainted_str_new2
44 #undef rb_usascii_str_new2
45 #undef rb_str_dup_frozen
46 #undef rb_str_buf_new_cstr
47 #undef rb_str_buf_new2
48 #undef rb_str_buf_cat2
49 #undef rb_str_cat2
50 
51 static VALUE rb_str_clear(VALUE str);
52 
55 
56 #define RUBY_MAX_CHAR_LEN 16
57 #define STR_TMPLOCK FL_USER7
58 #define STR_NOEMBED FL_USER1
59 #define STR_SHARED FL_USER2 /* = ELTS_SHARED */
60 #define STR_ASSOC FL_USER3
61 #define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
62 #define STR_ASSOC_P(s) FL_ALL((s), STR_NOEMBED|STR_ASSOC)
63 #define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC)
64 #define STR_NOCAPA_P(s) (FL_TEST((s),STR_NOEMBED) && FL_ANY((s),ELTS_SHARED|STR_ASSOC))
65 #define STR_UNSET_NOCAPA(s) do {\
66  if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
67 } while (0)
68 
69 
70 #define STR_SET_NOEMBED(str) do {\
71  FL_SET((str), STR_NOEMBED);\
72  STR_SET_EMBED_LEN((str), 0);\
73 } while (0)
74 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
75 #define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
76 #define STR_SET_EMBED_LEN(str, n) do { \
77  long tmp_n = (n);\
78  RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
79  RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
80 } while (0)
81 
82 #define STR_SET_LEN(str, n) do { \
83  if (STR_EMBED_P(str)) {\
84  STR_SET_EMBED_LEN((str), (n));\
85  }\
86  else {\
87  RSTRING(str)->as.heap.len = (n);\
88  }\
89 } while (0)
90 
91 #define STR_DEC_LEN(str) do {\
92  if (STR_EMBED_P(str)) {\
93  long n = RSTRING_LEN(str);\
94  n--;\
95  STR_SET_EMBED_LEN((str), n);\
96  }\
97  else {\
98  RSTRING(str)->as.heap.len--;\
99  }\
100 } while (0)
101 
102 #define RESIZE_CAPA(str,capacity) do {\
103  if (STR_EMBED_P(str)) {\
104  if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
105  char *tmp = ALLOC_N(char, (capacity)+1);\
106  memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
107  RSTRING(str)->as.heap.ptr = tmp;\
108  RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
109  STR_SET_NOEMBED(str);\
110  RSTRING(str)->as.heap.aux.capa = (capacity);\
111  }\
112  }\
113  else {\
114  REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
115  if (!STR_NOCAPA_P(str))\
116  RSTRING(str)->as.heap.aux.capa = (capacity);\
117  }\
118 } while (0)
119 
120 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
121 #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
122 
123 #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
124 
125 static inline int
127 {
128  rb_encoding *enc;
129 
130  /* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
131  if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
132  return 1;
133 
134  enc = STR_ENC_GET(str);
135  if (rb_enc_mbmaxlen(enc) == 1)
136  return 1;
137 
138  /* Conservative. Possibly single byte.
139  * "\xa1" in Shift_JIS for example. */
140  return 0;
141 }
142 
144 
145 static inline const char *
146 search_nonascii(const char *p, const char *e)
147 {
148 #if SIZEOF_VALUE == 8
149 # define NONASCII_MASK 0x8080808080808080ULL
150 #elif SIZEOF_VALUE == 4
151 # define NONASCII_MASK 0x80808080UL
152 #endif
153 #ifdef NONASCII_MASK
154  if ((int)sizeof(VALUE) * 2 < e - p) {
155  const VALUE *s, *t;
156  const VALUE lowbits = sizeof(VALUE) - 1;
157  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
158  while (p < (const char *)s) {
159  if (!ISASCII(*p))
160  return p;
161  p++;
162  }
163  t = (const VALUE*)(~lowbits & (VALUE)e);
164  while (s < t) {
165  if (*s & NONASCII_MASK) {
166  t = s;
167  break;
168  }
169  s++;
170  }
171  p = (const char *)t;
172  }
173 #endif
174  while (p < e) {
175  if (!ISASCII(*p))
176  return p;
177  p++;
178  }
179  return NULL;
180 }
181 
182 static int
183 coderange_scan(const char *p, long len, rb_encoding *enc)
184 {
185  const char *e = p + len;
186 
187  if (rb_enc_to_index(enc) == 0) {
188  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
189  p = search_nonascii(p, e);
191  }
192 
193  if (rb_enc_asciicompat(enc)) {
194  p = search_nonascii(p, e);
195  if (!p) {
196  return ENC_CODERANGE_7BIT;
197  }
198  while (p < e) {
199  int ret = rb_enc_precise_mbclen(p, e, enc);
200  if (!MBCLEN_CHARFOUND_P(ret)) {
201  return ENC_CODERANGE_BROKEN;
202  }
203  p += MBCLEN_CHARFOUND_LEN(ret);
204  if (p < e) {
205  p = search_nonascii(p, e);
206  if (!p) {
207  return ENC_CODERANGE_VALID;
208  }
209  }
210  }
211  if (e < p) {
212  return ENC_CODERANGE_BROKEN;
213  }
214  return ENC_CODERANGE_VALID;
215  }
216 
217  while (p < e) {
218  int ret = rb_enc_precise_mbclen(p, e, enc);
219 
220  if (!MBCLEN_CHARFOUND_P(ret)) {
221  return ENC_CODERANGE_BROKEN;
222  }
223  p += MBCLEN_CHARFOUND_LEN(ret);
224  }
225  if (e < p) {
226  return ENC_CODERANGE_BROKEN;
227  }
228  return ENC_CODERANGE_VALID;
229 }
230 
231 long
232 rb_str_coderange_scan_restartable(const char *s, const char *e, rb_encoding *enc, int *cr)
233 {
234  const char *p = s;
235 
236  if (*cr == ENC_CODERANGE_BROKEN)
237  return e - s;
238 
239  if (rb_enc_to_index(enc) == 0) {
240  /* enc is ASCII-8BIT. ASCII-8BIT string never be broken. */
241  p = search_nonascii(p, e);
243  return e - s;
244  }
245  else if (rb_enc_asciicompat(enc)) {
246  p = search_nonascii(p, e);
247  if (!p) {
248  if (*cr != ENC_CODERANGE_VALID) *cr = ENC_CODERANGE_7BIT;
249  return e - s;
250  }
251  while (p < e) {
252  int ret = rb_enc_precise_mbclen(p, e, enc);
253  if (!MBCLEN_CHARFOUND_P(ret)) {
255  return p - s;
256  }
257  p += MBCLEN_CHARFOUND_LEN(ret);
258  if (p < e) {
259  p = search_nonascii(p, e);
260  if (!p) {
261  *cr = ENC_CODERANGE_VALID;
262  return e - s;
263  }
264  }
265  }
267  return p - s;
268  }
269  else {
270  while (p < e) {
271  int ret = rb_enc_precise_mbclen(p, e, enc);
272  if (!MBCLEN_CHARFOUND_P(ret)) {
274  return p - s;
275  }
276  p += MBCLEN_CHARFOUND_LEN(ret);
277  }
279  return p - s;
280  }
281 }
282 
283 static inline void
285 {
286  rb_enc_set_index(str1, ENCODING_GET(str2));
287 }
288 
289 static void
291 {
292  /* this function is designed for copying encoding and coderange
293  * from src to new string "dest" which is made from the part of src.
294  */
295  str_enc_copy(dest, src);
296  if (RSTRING_LEN(dest) == 0) {
297  if (!rb_enc_asciicompat(STR_ENC_GET(src)))
299  else
301  return;
302  }
303  switch (ENC_CODERANGE(src)) {
304  case ENC_CODERANGE_7BIT:
306  break;
307  case ENC_CODERANGE_VALID:
308  if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
311  else
313  break;
314  default:
315  break;
316  }
317 }
318 
319 static void
321 {
322  str_enc_copy(dest, src);
323  ENC_CODERANGE_SET(dest, ENC_CODERANGE(src));
324 }
325 
326 int
328 {
329  int cr = ENC_CODERANGE(str);
330 
331  if (cr == ENC_CODERANGE_UNKNOWN) {
332  rb_encoding *enc = STR_ENC_GET(str);
333  cr = coderange_scan(RSTRING_PTR(str), RSTRING_LEN(str), enc);
334  ENC_CODERANGE_SET(str, cr);
335  }
336  return cr;
337 }
338 
339 int
341 {
342  rb_encoding *enc = STR_ENC_GET(str);
343 
344  if (!rb_enc_asciicompat(enc))
345  return FALSE;
346  else if (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
347  return TRUE;
348  return FALSE;
349 }
350 
351 static inline void
352 str_mod_check(VALUE s, const char *p, long len)
353 {
354  if (RSTRING_PTR(s) != p || RSTRING_LEN(s) != len){
355  rb_raise(rb_eRuntimeError, "string modified");
356  }
357 }
358 
359 size_t
361 {
362  if (STR_EMBED_P(str)) {
363  return RSTRING_EMBED_LEN_MAX;
364  }
365  else if (STR_NOCAPA_P(str)) {
366  return RSTRING(str)->as.heap.len;
367  }
368  else {
369  return RSTRING(str)->as.heap.aux.capa;
370  }
371 }
372 
373 static inline VALUE
375 {
376  NEWOBJ_OF(str, struct RString, klass, T_STRING);
377 
378  str->as.heap.ptr = 0;
379  str->as.heap.len = 0;
380  str->as.heap.aux.capa = 0;
381 
382  return (VALUE)str;
383 }
384 
385 static inline VALUE
387 {
390  }
391  return str_alloc(klass);
392 }
393 
394 static VALUE
395 str_new(VALUE klass, const char *ptr, long len)
396 {
397  VALUE str;
398 
399  if (len < 0) {
400  rb_raise(rb_eArgError, "negative string size (or size too big)");
401  }
402 
405  }
406 
407  str = str_alloc(klass);
408  if (len > RSTRING_EMBED_LEN_MAX) {
409  RSTRING(str)->as.heap.aux.capa = len;
410  RSTRING(str)->as.heap.ptr = ALLOC_N(char,len+1);
411  STR_SET_NOEMBED(str);
412  }
413  else if (len == 0) {
415  }
416  if (ptr) {
417  memcpy(RSTRING_PTR(str), ptr, len);
418  }
419  STR_SET_LEN(str, len);
420  RSTRING_PTR(str)[len] = '\0';
421  return str;
422 }
423 
424 VALUE
425 rb_str_new(const char *ptr, long len)
426 {
427  return str_new(rb_cString, ptr, len);
428 }
429 
430 VALUE
431 rb_usascii_str_new(const char *ptr, long len)
432 {
433  VALUE str = rb_str_new(ptr, len);
435  return str;
436 }
437 
438 VALUE
439 rb_enc_str_new(const char *ptr, long len, rb_encoding *enc)
440 {
441  VALUE str = rb_str_new(ptr, len);
442  rb_enc_associate(str, enc);
443  return str;
444 }
445 
446 VALUE
447 rb_str_new_cstr(const char *ptr)
448 {
449  if (!ptr) {
450  rb_raise(rb_eArgError, "NULL pointer given");
451  }
452  return rb_str_new(ptr, strlen(ptr));
453 }
454 
456 #define rb_str_new2 rb_str_new_cstr
457 
458 VALUE
459 rb_usascii_str_new_cstr(const char *ptr)
460 {
461  VALUE str = rb_str_new2(ptr);
463  return str;
464 }
465 
467 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
468 
469 VALUE
470 rb_tainted_str_new(const char *ptr, long len)
471 {
472  VALUE str = rb_str_new(ptr, len);
473 
474  OBJ_TAINT(str);
475  return str;
476 }
477 
478 VALUE
479 rb_tainted_str_new_cstr(const char *ptr)
480 {
481  VALUE str = rb_str_new2(ptr);
482 
483  OBJ_TAINT(str);
484  return str;
485 }
486 
488 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
489 
490 VALUE
492 {
494  rb_econv_t *ec;
496  long len, olen;
497  VALUE econv_wrapper;
498  VALUE newstr;
499  const unsigned char *start, *sp;
500  unsigned char *dest, *dp;
501  size_t converted_output = 0;
502 
503  if (!to) return str;
504  if (!from) from = rb_enc_get(str);
505  if (from == to) return str;
506  if ((rb_enc_asciicompat(to) && ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) ||
507  to == rb_ascii8bit_encoding()) {
508  if (STR_ENC_GET(str) != to) {
509  str = rb_str_dup(str);
510  rb_enc_associate(str, to);
511  }
512  return str;
513  }
514 
515  len = RSTRING_LEN(str);
516  newstr = rb_str_new(0, len);
517  olen = len;
518 
519  econv_wrapper = rb_obj_alloc(rb_cEncodingConverter);
520  RBASIC(econv_wrapper)->klass = 0;
521  ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts);
522  if (!ec) return str;
523  DATA_PTR(econv_wrapper) = ec;
524 
525  sp = (unsigned char*)RSTRING_PTR(str);
526  start = sp;
527  while ((dest = (unsigned char*)RSTRING_PTR(newstr)),
528  (dp = dest + converted_output),
529  (ret = rb_econv_convert(ec, &sp, start + len, &dp, dest + olen, 0)),
531  /* destination buffer short */
532  size_t converted_input = sp - start;
533  size_t rest = len - converted_input;
534  converted_output = dp - dest;
535  rb_str_set_len(newstr, converted_output);
536  if (converted_input && converted_output &&
537  rest < (LONG_MAX / converted_output)) {
538  rest = (rest * converted_output) / converted_input;
539  }
540  else {
541  rest = olen;
542  }
543  olen += rest < 2 ? 2 : rest;
544  rb_str_resize(newstr, olen);
545  }
546  DATA_PTR(econv_wrapper) = 0;
547  rb_econv_close(ec);
548  rb_gc_force_recycle(econv_wrapper);
549  switch (ret) {
550  case econv_finished:
551  len = dp - (unsigned char*)RSTRING_PTR(newstr);
552  rb_str_set_len(newstr, len);
553  rb_enc_associate(newstr, to);
554  return newstr;
555 
556  default:
557  /* some error, return original */
558  return str;
559  }
560 }
561 
562 VALUE
564 {
565  return rb_str_conv_enc_opts(str, from, to, 0, Qnil);
566 }
567 
568 VALUE
569 rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc)
570 {
571  VALUE str;
572 
573  str = rb_tainted_str_new(ptr, len);
574  if (eenc == rb_usascii_encoding() &&
577  return str;
578  }
579  rb_enc_associate(str, eenc);
580  return rb_str_conv_enc(str, eenc, rb_default_internal_encoding());
581 }
582 
583 VALUE
584 rb_external_str_new(const char *ptr, long len)
585 {
587 }
588 
589 VALUE
590 rb_external_str_new_cstr(const char *ptr)
591 {
593 }
594 
595 VALUE
596 rb_locale_str_new(const char *ptr, long len)
597 {
599 }
600 
601 VALUE
602 rb_locale_str_new_cstr(const char *ptr)
603 {
605 }
606 
607 VALUE
608 rb_filesystem_str_new(const char *ptr, long len)
609 {
611 }
612 
613 VALUE
615 {
617 }
618 
619 VALUE
621 {
623 }
624 
625 VALUE
627 {
628  return rb_str_conv_enc(str, STR_ENC_GET(str), rb_locale_encoding());
629 }
630 
631 VALUE
633 {
634  return rb_str_conv_enc(str, STR_ENC_GET(str), enc);
635 }
636 
637 static VALUE
639 {
640  if (RSTRING_LEN(str) <= RSTRING_EMBED_LEN_MAX) {
641  STR_SET_EMBED(str2);
642  memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), RSTRING_LEN(str)+1);
643  STR_SET_EMBED_LEN(str2, RSTRING_LEN(str));
644  }
645  else {
646  str = rb_str_new_frozen(str);
647  FL_SET(str2, STR_NOEMBED);
648  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
649  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
650  RSTRING(str2)->as.heap.aux.shared = str;
651  FL_SET(str2, ELTS_SHARED);
652  }
653  return str2;
654 }
655 
656 static VALUE
658 {
660  rb_enc_cr_str_exact_copy(str2, str);
661  return str2;
662 }
663 
664 static VALUE
666 {
667  return str_replace_shared(str_alloc(klass), str);
668 }
669 
670 static VALUE
672 {
673  return str_new_shared(klass, str);
674 }
675 
676 VALUE
678 {
679  VALUE str2 = str_new3(rb_obj_class(str), str);
680 
681  OBJ_INFECT(str2, str);
682  return str2;
683 }
684 
686 #define rb_str_new3 rb_str_new_shared
687 
688 static VALUE
689 str_new4(VALUE klass, VALUE str)
690 {
691  VALUE str2;
692 
693  str2 = str_alloc(klass);
694  STR_SET_NOEMBED(str2);
695  RSTRING(str2)->as.heap.len = RSTRING_LEN(str);
696  RSTRING(str2)->as.heap.ptr = RSTRING_PTR(str);
697  if (STR_SHARED_P(str)) {
698  VALUE shared = RSTRING(str)->as.heap.aux.shared;
699  assert(OBJ_FROZEN(shared));
700  FL_SET(str2, ELTS_SHARED);
701  RSTRING(str2)->as.heap.aux.shared = shared;
702  }
703  else {
704  FL_SET(str, ELTS_SHARED);
705  RSTRING(str)->as.heap.aux.shared = str2;
706  }
707  rb_enc_cr_str_exact_copy(str2, str);
708  OBJ_INFECT(str2, str);
709  return str2;
710 }
711 
712 VALUE
714 {
715  VALUE klass, str;
716 
717  if (OBJ_FROZEN(orig)) return orig;
718  klass = rb_obj_class(orig);
719  if (STR_SHARED_P(orig) && (str = RSTRING(orig)->as.heap.aux.shared)) {
720  long ofs;
721  assert(OBJ_FROZEN(str));
722  ofs = RSTRING_LEN(str) - RSTRING_LEN(orig);
723  if ((ofs > 0) || (klass != RBASIC(str)->klass) ||
724  ((RBASIC(str)->flags ^ RBASIC(orig)->flags) & (FL_TAINT|FL_UNTRUSTED)) ||
725  ENCODING_GET(str) != ENCODING_GET(orig)) {
726  str = str_new3(klass, str);
727  RSTRING(str)->as.heap.ptr += ofs;
728  RSTRING(str)->as.heap.len -= ofs;
729  rb_enc_cr_str_exact_copy(str, orig);
730  OBJ_INFECT(str, orig);
731  }
732  }
733  else if (STR_EMBED_P(orig)) {
734  str = str_new(klass, RSTRING_PTR(orig), RSTRING_LEN(orig));
735  rb_enc_cr_str_exact_copy(str, orig);
736  OBJ_INFECT(str, orig);
737  }
738  else if (STR_ASSOC_P(orig)) {
739  VALUE assoc = RSTRING(orig)->as.heap.aux.shared;
740  FL_UNSET(orig, STR_ASSOC);
741  str = str_new4(klass, orig);
742  FL_SET(str, STR_ASSOC);
743  RSTRING(str)->as.heap.aux.shared = assoc;
744  }
745  else {
746  str = str_new4(klass, orig);
747  }
748  OBJ_FREEZE(str);
749  return str;
750 }
751 
753 #define rb_str_new4 rb_str_new_frozen
754 
755 VALUE
756 rb_str_new_with_class(VALUE obj, const char *ptr, long len)
757 {
758  return str_new(rb_obj_class(obj), ptr, len);
759 }
760 
761 RUBY_ALIAS_FUNCTION(rb_str_new5(VALUE obj, const char *ptr, long len),
762  rb_str_new_with_class, (obj, ptr, len))
763 #define rb_str_new5 rb_str_new_with_class
764 
765 static VALUE
766 str_new_empty(VALUE str)
767 {
768  VALUE v = rb_str_new5(str, 0, 0);
769  rb_enc_copy(v, str);
770  OBJ_INFECT(v, str);
771  return v;
772 }
773 
774 #define STR_BUF_MIN_SIZE 128
775 
776 VALUE
777 rb_str_buf_new(long capa)
778 {
779  VALUE str = str_alloc(rb_cString);
780 
781  if (capa < STR_BUF_MIN_SIZE) {
782  capa = STR_BUF_MIN_SIZE;
783  }
784  FL_SET(str, STR_NOEMBED);
785  RSTRING(str)->as.heap.aux.capa = capa;
786  RSTRING(str)->as.heap.ptr = ALLOC_N(char, capa+1);
787  RSTRING(str)->as.heap.ptr[0] = '\0';
788 
789  return str;
790 }
791 
792 VALUE
793 rb_str_buf_new_cstr(const char *ptr)
794 {
795  VALUE str;
796  long len = strlen(ptr);
797 
798  str = rb_str_buf_new(len);
799  rb_str_buf_cat(str, ptr, len);
800 
801  return str;
802 }
803 
805 #define rb_str_buf_new2 rb_str_buf_new_cstr
806 
807 VALUE
808 rb_str_tmp_new(long len)
809 {
810  return str_new(0, 0, len);
811 }
812 
813 void *
814 rb_alloc_tmp_buffer(volatile VALUE *store, long len)
815 {
816  VALUE s = rb_str_tmp_new(len);
817  *store = s;
818  return RSTRING_PTR(s);
819 }
820 
821 void
822 rb_free_tmp_buffer(volatile VALUE *store)
823 {
824  VALUE s = *store;
825  *store = 0;
826  if (s) rb_str_clear(s);
827 }
828 
829 void
831 {
832  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
833  xfree(RSTRING(str)->as.heap.ptr);
834  }
835 }
836 
837 RUBY_FUNC_EXPORTED size_t
839 {
840  if (!STR_EMBED_P(str) && !STR_SHARED_P(str)) {
841  return RSTRING(str)->as.heap.aux.capa;
842  }
843  else {
844  return 0;
845  }
846 }
847 
848 VALUE
850 {
851  return rb_convert_type(str, T_STRING, "String", "to_str");
852 }
853 
854 static inline void str_discard(VALUE str);
855 
856 void
858 {
859  rb_encoding *enc;
860  int cr;
861  if (str == str2) return;
862  enc = STR_ENC_GET(str2);
863  cr = ENC_CODERANGE(str2);
864  str_discard(str);
865  OBJ_INFECT(str, str2);
866  if (RSTRING_LEN(str2) <= RSTRING_EMBED_LEN_MAX) {
867  STR_SET_EMBED(str);
868  memcpy(RSTRING_PTR(str), RSTRING_PTR(str2), RSTRING_LEN(str2)+1);
869  STR_SET_EMBED_LEN(str, RSTRING_LEN(str2));
870  rb_enc_associate(str, enc);
871  ENC_CODERANGE_SET(str, cr);
872  return;
873  }
874  STR_SET_NOEMBED(str);
875  STR_UNSET_NOCAPA(str);
876  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
877  RSTRING(str)->as.heap.len = RSTRING_LEN(str2);
878  if (STR_NOCAPA_P(str2)) {
879  FL_SET(str, RBASIC(str2)->flags & STR_NOCAPA);
880  RSTRING(str)->as.heap.aux.shared = RSTRING(str2)->as.heap.aux.shared;
881  }
882  else {
883  RSTRING(str)->as.heap.aux.capa = RSTRING(str2)->as.heap.aux.capa;
884  }
885  STR_SET_EMBED(str2); /* abandon str2 */
886  RSTRING_PTR(str2)[0] = 0;
887  STR_SET_EMBED_LEN(str2, 0);
888  rb_enc_associate(str, enc);
889  ENC_CODERANGE_SET(str, cr);
890 }
891 
892 static ID id_to_s;
893 
894 VALUE
896 {
897  VALUE str;
898 
899  if (RB_TYPE_P(obj, T_STRING)) {
900  return obj;
901  }
902  str = rb_funcall(obj, id_to_s, 0);
903  if (!RB_TYPE_P(str, T_STRING))
904  return rb_any_to_s(obj);
905  if (OBJ_TAINTED(obj)) OBJ_TAINT(str);
906  return str;
907 }
908 
909 static VALUE
911 {
912  long len;
913 
914  len = RSTRING_LEN(str2);
915  if (STR_ASSOC_P(str2)) {
916  str2 = rb_str_new4(str2);
917  }
918  if (STR_SHARED_P(str2)) {
919  VALUE shared = RSTRING(str2)->as.heap.aux.shared;
920  assert(OBJ_FROZEN(shared));
921  STR_SET_NOEMBED(str);
922  RSTRING(str)->as.heap.len = len;
923  RSTRING(str)->as.heap.ptr = RSTRING_PTR(str2);
924  FL_SET(str, ELTS_SHARED);
925  FL_UNSET(str, STR_ASSOC);
926  RSTRING(str)->as.heap.aux.shared = shared;
927  }
928  else {
929  str_replace_shared(str, str2);
930  }
931 
932  OBJ_INFECT(str, str2);
933  rb_enc_cr_str_exact_copy(str, str2);
934  return str;
935 }
936 
937 static VALUE
939 {
940  VALUE dup = str_alloc(klass);
941  str_replace(dup, str);
942  return dup;
943 }
944 
945 VALUE
947 {
948  return str_duplicate(rb_obj_class(str), str);
949 }
950 
951 VALUE
953 {
957  }
958  return str_replace(str_alloc(rb_cString), str);
959 }
960 
961 /*
962  * call-seq:
963  * String.new(str="") -> new_str
964  *
965  * Returns a new string object containing a copy of <i>str</i>.
966  */
967 
968 static VALUE
970 {
971  VALUE orig;
972 
973  if (argc > 0 && rb_scan_args(argc, argv, "01", &orig) == 1)
974  rb_str_replace(str, orig);
975  return str;
976 }
977 
978 static inline long
979 enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
980 {
981  long c;
982  const char *q;
983 
984  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
985  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
986  }
987  else if (rb_enc_asciicompat(enc)) {
988  c = 0;
989  if (cr == ENC_CODERANGE_7BIT || cr == ENC_CODERANGE_VALID) {
990  while (p < e) {
991  if (ISASCII(*p)) {
992  q = search_nonascii(p, e);
993  if (!q)
994  return c + (e - p);
995  c += q - p;
996  p = q;
997  }
998  p += rb_enc_fast_mbclen(p, e, enc);
999  c++;
1000  }
1001  }
1002  else {
1003  while (p < e) {
1004  if (ISASCII(*p)) {
1005  q = search_nonascii(p, e);
1006  if (!q)
1007  return c + (e - p);
1008  c += q - p;
1009  p = q;
1010  }
1011  p += rb_enc_mbclen(p, e, enc);
1012  c++;
1013  }
1014  }
1015  return c;
1016  }
1017 
1018  for (c=0; p<e; c++) {
1019  p += rb_enc_mbclen(p, e, enc);
1020  }
1021  return c;
1022 }
1023 
1024 long
1025 rb_enc_strlen(const char *p, const char *e, rb_encoding *enc)
1026 {
1027  return enc_strlen(p, e, enc, ENC_CODERANGE_UNKNOWN);
1028 }
1029 
1030 long
1031 rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
1032 {
1033  long c;
1034  const char *q;
1035  int ret;
1036 
1037  *cr = 0;
1038  if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1039  return (e - p + rb_enc_mbminlen(enc) - 1) / rb_enc_mbminlen(enc);
1040  }
1041  else if (rb_enc_asciicompat(enc)) {
1042  c = 0;
1043  while (p < e) {
1044  if (ISASCII(*p)) {
1045  q = search_nonascii(p, e);
1046  if (!q) {
1047  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1048  return c + (e - p);
1049  }
1050  c += q - p;
1051  p = q;
1052  }
1053  ret = rb_enc_precise_mbclen(p, e, enc);
1054  if (MBCLEN_CHARFOUND_P(ret)) {
1055  *cr |= ENC_CODERANGE_VALID;
1056  p += MBCLEN_CHARFOUND_LEN(ret);
1057  }
1058  else {
1059  *cr = ENC_CODERANGE_BROKEN;
1060  p++;
1061  }
1062  c++;
1063  }
1064  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1065  return c;
1066  }
1067 
1068  for (c=0; p<e; c++) {
1069  ret = rb_enc_precise_mbclen(p, e, enc);
1070  if (MBCLEN_CHARFOUND_P(ret)) {
1071  *cr |= ENC_CODERANGE_VALID;
1072  p += MBCLEN_CHARFOUND_LEN(ret);
1073  }
1074  else {
1075  *cr = ENC_CODERANGE_BROKEN;
1076  if (p + rb_enc_mbminlen(enc) <= e)
1077  p += rb_enc_mbminlen(enc);
1078  else
1079  p = e;
1080  }
1081  }
1082  if (!*cr) *cr = ENC_CODERANGE_7BIT;
1083  return c;
1084 }
1085 
1086 #ifdef NONASCII_MASK
1087 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1088 
1089 /*
1090  * UTF-8 leading bytes have either 0xxxxxxx or 11xxxxxx
1091  * bit represention. (see http://en.wikipedia.org/wiki/UTF-8)
1092  * Therefore, following pseudo code can detect UTF-8 leading byte.
1093  *
1094  * if (!(byte & 0x80))
1095  * byte |= 0x40; // turn on bit6
1096  * return ((byte>>6) & 1); // bit6 represent it's leading byte or not.
1097  *
1098  * This function calculate every bytes in the argument word `s'
1099  * using the above logic concurrently. and gather every bytes result.
1100  */
1101 static inline VALUE
1102 count_utf8_lead_bytes_with_word(const VALUE *s)
1103 {
1104  VALUE d = *s;
1105 
1106  /* Transform into bit0 represent UTF-8 leading or not. */
1107  d |= ~(d>>1);
1108  d >>= 6;
1109  d &= NONASCII_MASK >> 7;
1110 
1111  /* Gather every bytes. */
1112  d += (d>>8);
1113  d += (d>>16);
1114 #if SIZEOF_VALUE == 8
1115  d += (d>>32);
1116 #endif
1117  return (d&0xF);
1118 }
1119 #endif
1120 
1121 static long
1123 {
1124  const char *p, *e;
1125  long n;
1126  int cr;
1127 
1128  if (single_byte_optimizable(str)) return RSTRING_LEN(str);
1129  if (!enc) enc = STR_ENC_GET(str);
1130  p = RSTRING_PTR(str);
1131  e = RSTRING_END(str);
1132  cr = ENC_CODERANGE(str);
1133 #ifdef NONASCII_MASK
1134  if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1135  enc == rb_utf8_encoding()) {
1136 
1137  VALUE len = 0;
1138  if ((int)sizeof(VALUE) * 2 < e - p) {
1139  const VALUE *s, *t;
1140  const VALUE lowbits = sizeof(VALUE) - 1;
1141  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1142  t = (const VALUE*)(~lowbits & (VALUE)e);
1143  while (p < (const char *)s) {
1144  if (is_utf8_lead_byte(*p)) len++;
1145  p++;
1146  }
1147  while (s < t) {
1148  len += count_utf8_lead_bytes_with_word(s);
1149  s++;
1150  }
1151  p = (const char *)s;
1152  }
1153  while (p < e) {
1154  if (is_utf8_lead_byte(*p)) len++;
1155  p++;
1156  }
1157  return (long)len;
1158  }
1159 #endif
1160  n = rb_enc_strlen_cr(p, e, enc, &cr);
1161  if (cr) {
1162  ENC_CODERANGE_SET(str, cr);
1163  }
1164  return n;
1165 }
1166 
1167 long
1169 {
1170  return str_strlen(str, STR_ENC_GET(str));
1171 }
1172 
1173 /*
1174  * call-seq:
1175  * str.length -> integer
1176  * str.size -> integer
1177  *
1178  * Returns the character length of <i>str</i>.
1179  */
1180 
1181 VALUE
1183 {
1184  long len;
1185 
1186  len = str_strlen(str, STR_ENC_GET(str));
1187  return LONG2NUM(len);
1188 }
1189 
1190 /*
1191  * call-seq:
1192  * str.bytesize -> integer
1193  *
1194  * Returns the length of +str+ in bytes.
1195  *
1196  * "\x80\u3042".bytesize #=> 4
1197  * "hello".bytesize #=> 5
1198  */
1199 
1200 static VALUE
1202 {
1203  return LONG2NUM(RSTRING_LEN(str));
1204 }
1205 
1206 /*
1207  * call-seq:
1208  * str.empty? -> true or false
1209  *
1210  * Returns <code>true</code> if <i>str</i> has a length of zero.
1211  *
1212  * "hello".empty? #=> false
1213  * " ".empty? #=> false
1214  * "".empty? #=> true
1215  */
1216 
1217 static VALUE
1219 {
1220  if (RSTRING_LEN(str) == 0)
1221  return Qtrue;
1222  return Qfalse;
1223 }
1224 
1225 /*
1226  * call-seq:
1227  * str + other_str -> new_str
1228  *
1229  * Concatenation---Returns a new <code>String</code> containing
1230  * <i>other_str</i> concatenated to <i>str</i>.
1231  *
1232  * "Hello from " + self.to_s #=> "Hello from main"
1233  */
1234 
1235 VALUE
1237 {
1238  VALUE str3;
1239  rb_encoding *enc;
1240 
1241  StringValue(str2);
1242  enc = rb_enc_check(str1, str2);
1243  str3 = rb_str_new(0, RSTRING_LEN(str1)+RSTRING_LEN(str2));
1244  memcpy(RSTRING_PTR(str3), RSTRING_PTR(str1), RSTRING_LEN(str1));
1245  memcpy(RSTRING_PTR(str3) + RSTRING_LEN(str1),
1246  RSTRING_PTR(str2), RSTRING_LEN(str2));
1247  RSTRING_PTR(str3)[RSTRING_LEN(str3)] = '\0';
1248 
1249  if (OBJ_TAINTED(str1) || OBJ_TAINTED(str2))
1250  OBJ_TAINT(str3);
1253  return str3;
1254 }
1255 
1256 /*
1257  * call-seq:
1258  * str * integer -> new_str
1259  *
1260  * Copy --- Returns a new String containing +integer+ copies of the receiver.
1261  * +integer+ must be greater than or equal to 0.
1262  *
1263  * "Ho! " * 3 #=> "Ho! Ho! Ho! "
1264  * "Ho! " * 0 #=> ""
1265  */
1266 
1267 VALUE
1269 {
1270  VALUE str2;
1271  long n, len;
1272  char *ptr2;
1273 
1274  len = NUM2LONG(times);
1275  if (len < 0) {
1276  rb_raise(rb_eArgError, "negative argument");
1277  }
1278  if (len && LONG_MAX/len < RSTRING_LEN(str)) {
1279  rb_raise(rb_eArgError, "argument too big");
1280  }
1281 
1282  str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
1283  ptr2 = RSTRING_PTR(str2);
1284  if (len) {
1285  n = RSTRING_LEN(str);
1286  memcpy(ptr2, RSTRING_PTR(str), n);
1287  while (n <= len/2) {
1288  memcpy(ptr2 + n, ptr2, n);
1289  n *= 2;
1290  }
1291  memcpy(ptr2 + n, ptr2, len-n);
1292  }
1293  ptr2[RSTRING_LEN(str2)] = '\0';
1294  OBJ_INFECT(str2, str);
1295  rb_enc_cr_str_copy_for_substr(str2, str);
1296 
1297  return str2;
1298 }
1299 
1300 /*
1301  * call-seq:
1302  * str % arg -> new_str
1303  *
1304  * Format---Uses <i>str</i> as a format specification, and returns the result
1305  * of applying it to <i>arg</i>. If the format specification contains more than
1306  * one substitution, then <i>arg</i> must be an <code>Array</code> or <code>Hash</code>
1307  * containing the values to be substituted. See <code>Kernel::sprintf</code> for
1308  * details of the format string.
1309  *
1310  * "%05d" % 123 #=> "00123"
1311  * "%-5s: %08x" % [ "ID", self.object_id ] #=> "ID : 200e14d6"
1312  * "foo = %{foo}" % { :foo => 'bar' } #=> "foo = bar"
1313  */
1314 
1315 static VALUE
1317 {
1318  volatile VALUE tmp = rb_check_array_type(arg);
1319 
1320  if (!NIL_P(tmp)) {
1321  return rb_str_format(RARRAY_LENINT(tmp), RARRAY_PTR(tmp), str);
1322  }
1323  return rb_str_format(1, &arg, str);
1324 }
1325 
1326 static inline void
1328 {
1329  if (FL_TEST(str, STR_TMPLOCK)) {
1330  rb_raise(rb_eRuntimeError, "can't modify string; temporarily locked");
1331  }
1332  rb_check_frozen(str);
1333  if (!OBJ_UNTRUSTED(str) && rb_safe_level() >= 4)
1334  rb_raise(rb_eSecurityError, "Insecure: can't modify string");
1335 }
1336 
1337 static inline int
1339 {
1340  str_modifiable(str);
1341  if (!STR_SHARED_P(str)) return 1;
1342  if (STR_EMBED_P(str)) return 1;
1343  return 0;
1344 }
1345 
1346 static void
1348 {
1349  char *ptr;
1350  long len = RSTRING_LEN(str);
1351  long capa = len + expand;
1352 
1353  if (len > capa) len = capa;
1354  ptr = ALLOC_N(char, capa + 1);
1355  if (RSTRING_PTR(str)) {
1356  memcpy(ptr, RSTRING_PTR(str), len);
1357  }
1358  STR_SET_NOEMBED(str);
1359  STR_UNSET_NOCAPA(str);
1360  ptr[len] = 0;
1361  RSTRING(str)->as.heap.ptr = ptr;
1362  RSTRING(str)->as.heap.len = len;
1363  RSTRING(str)->as.heap.aux.capa = capa;
1364 }
1365 
1366 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1367 
1368 void
1370 {
1371  if (!str_independent(str))
1372  str_make_independent(str);
1373  ENC_CODERANGE_CLEAR(str);
1374 }
1375 
1376 void
1377 rb_str_modify_expand(VALUE str, long expand)
1378 {
1379  if (expand < 0) {
1380  rb_raise(rb_eArgError, "negative expanding string size");
1381  }
1382  if (!str_independent(str)) {
1383  str_make_independent_expand(str, expand);
1384  }
1385  else if (expand > 0) {
1386  long len = RSTRING_LEN(str);
1387  long capa = len + expand;
1388  if (!STR_EMBED_P(str)) {
1389  REALLOC_N(RSTRING(str)->as.heap.ptr, char, capa+1);
1390  RSTRING(str)->as.heap.aux.capa = capa;
1391  }
1392  else if (capa > RSTRING_EMBED_LEN_MAX) {
1393  str_make_independent_expand(str, expand);
1394  }
1395  }
1396  ENC_CODERANGE_CLEAR(str);
1397 }
1398 
1399 /* As rb_str_modify(), but don't clear coderange */
1400 static void
1402 {
1403  if (!str_independent(str))
1404  str_make_independent(str);
1405  if (ENC_CODERANGE(str) == ENC_CODERANGE_BROKEN)
1406  /* Force re-scan later */
1407  ENC_CODERANGE_CLEAR(str);
1408 }
1409 
1410 static inline void
1412 {
1413  str_modifiable(str);
1414  if (!STR_SHARED_P(str) && !STR_EMBED_P(str)) {
1415  xfree(RSTRING_PTR(str));
1416  RSTRING(str)->as.heap.ptr = 0;
1417  RSTRING(str)->as.heap.len = 0;
1418  }
1419 }
1420 
1421 void
1423 {
1424  /* sanity check */
1425  rb_check_frozen(str);
1426  if (STR_ASSOC_P(str)) {
1427  /* already associated */
1428  rb_ary_concat(RSTRING(str)->as.heap.aux.shared, add);
1429  }
1430  else {
1431  if (STR_SHARED_P(str)) {
1432  VALUE assoc = RSTRING(str)->as.heap.aux.shared;
1433  str_make_independent(str);
1434  if (STR_ASSOC_P(assoc)) {
1435  assoc = RSTRING(assoc)->as.heap.aux.shared;
1436  rb_ary_concat(assoc, add);
1437  add = assoc;
1438  }
1439  }
1440  else if (STR_EMBED_P(str)) {
1441  str_make_independent(str);
1442  }
1443  else if (RSTRING(str)->as.heap.aux.capa != RSTRING_LEN(str)) {
1444  RESIZE_CAPA(str, RSTRING_LEN(str));
1445  }
1446  FL_SET(str, STR_ASSOC);
1447  RBASIC(add)->klass = 0;
1448  RSTRING(str)->as.heap.aux.shared = add;
1449  }
1450 }
1451 
1452 VALUE
1454 {
1455  if (STR_SHARED_P(str)) str = RSTRING(str)->as.heap.aux.shared;
1456  if (STR_ASSOC_P(str)) {
1457  return RSTRING(str)->as.heap.aux.shared;
1458  }
1459  return Qfalse;
1460 }
1461 
1462 void
1464 {
1465  rb_encoding *enc = rb_enc_get(str);
1466  if (!rb_enc_asciicompat(enc)) {
1467  rb_raise(rb_eEncCompatError, "ASCII incompatible encoding: %s", rb_enc_name(enc));
1468  }
1469 }
1470 
1471 VALUE
1472 rb_string_value(volatile VALUE *ptr)
1473 {
1474  VALUE s = *ptr;
1475  if (!RB_TYPE_P(s, T_STRING)) {
1476  s = rb_str_to_str(s);
1477  *ptr = s;
1478  }
1479  return s;
1480 }
1481 
1482 char *
1484 {
1485  VALUE str = rb_string_value(ptr);
1486  return RSTRING_PTR(str);
1487 }
1488 
1489 char *
1491 {
1492  VALUE str = rb_string_value(ptr);
1493  char *s = RSTRING_PTR(str);
1494  long len = RSTRING_LEN(str);
1495 
1496  if (!s || memchr(s, 0, len)) {
1497  rb_raise(rb_eArgError, "string contains null byte");
1498  }
1499  if (s[len]) {
1500  rb_str_modify(str);
1501  s = RSTRING_PTR(str);
1502  s[RSTRING_LEN(str)] = 0;
1503  }
1504  return s;
1505 }
1506 
1507 VALUE
1509 {
1510  str = rb_check_convert_type(str, T_STRING, "String", "to_str");
1511  return str;
1512 }
1513 
1514 /*
1515  * call-seq:
1516  * String.try_convert(obj) -> string or nil
1517  *
1518  * Try to convert <i>obj</i> into a String, using to_str method.
1519  * Returns converted string or nil if <i>obj</i> cannot be converted
1520  * for any reason.
1521  *
1522  * String.try_convert("str") #=> "str"
1523  * String.try_convert(/re/) #=> nil
1524  */
1525 static VALUE
1527 {
1528  return rb_check_string_type(str);
1529 }
1530 
1531 static char*
1532 str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
1533 {
1534  long nth = *nthp;
1535  if (rb_enc_mbmaxlen(enc) == 1) {
1536  p += nth;
1537  }
1538  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1539  p += nth * rb_enc_mbmaxlen(enc);
1540  }
1541  else if (rb_enc_asciicompat(enc)) {
1542  const char *p2, *e2;
1543  int n;
1544 
1545  while (p < e && 0 < nth) {
1546  e2 = p + nth;
1547  if (e < e2) {
1548  *nthp = nth;
1549  return (char *)e;
1550  }
1551  if (ISASCII(*p)) {
1552  p2 = search_nonascii(p, e2);
1553  if (!p2) {
1554  nth -= e2 - p;
1555  *nthp = nth;
1556  return (char *)e2;
1557  }
1558  nth -= p2 - p;
1559  p = p2;
1560  }
1561  n = rb_enc_mbclen(p, e, enc);
1562  p += n;
1563  nth--;
1564  }
1565  *nthp = nth;
1566  if (nth != 0) {
1567  return (char *)e;
1568  }
1569  return (char *)p;
1570  }
1571  else {
1572  while (p < e && nth--) {
1573  p += rb_enc_mbclen(p, e, enc);
1574  }
1575  }
1576  if (p > e) p = e;
1577  *nthp = nth;
1578  return (char*)p;
1579 }
1580 
1581 char*
1582 rb_enc_nth(const char *p, const char *e, long nth, rb_encoding *enc)
1583 {
1584  return str_nth_len(p, e, &nth, enc);
1585 }
1586 
1587 static char*
1588 str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1589 {
1590  if (singlebyte)
1591  p += nth;
1592  else {
1593  p = str_nth_len(p, e, &nth, enc);
1594  }
1595  if (!p) return 0;
1596  if (p > e) p = e;
1597  return (char *)p;
1598 }
1599 
1600 /* char offset to byte offset */
1601 static long
1602 str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
1603 {
1604  const char *pp = str_nth(p, e, nth, enc, singlebyte);
1605  if (!pp) return e - p;
1606  return pp - p;
1607 }
1608 
1609 long
1610 rb_str_offset(VALUE str, long pos)
1611 {
1612  return str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
1614 }
1615 
1616 #ifdef NONASCII_MASK
1617 static char *
1618 str_utf8_nth(const char *p, const char *e, long *nthp)
1619 {
1620  long nth = *nthp;
1621  if ((int)SIZEOF_VALUE * 2 < e - p && (int)SIZEOF_VALUE * 2 < nth) {
1622  const VALUE *s, *t;
1623  const VALUE lowbits = sizeof(VALUE) - 1;
1624  s = (const VALUE*)(~lowbits & ((VALUE)p + lowbits));
1625  t = (const VALUE*)(~lowbits & (VALUE)e);
1626  while (p < (const char *)s) {
1627  if (is_utf8_lead_byte(*p)) nth--;
1628  p++;
1629  }
1630  do {
1631  nth -= count_utf8_lead_bytes_with_word(s);
1632  s++;
1633  } while (s < t && (int)sizeof(VALUE) <= nth);
1634  p = (char *)s;
1635  }
1636  while (p < e) {
1637  if (is_utf8_lead_byte(*p)) {
1638  if (nth == 0) break;
1639  nth--;
1640  }
1641  p++;
1642  }
1643  *nthp = nth;
1644  return (char *)p;
1645 }
1646 
1647 static long
1648 str_utf8_offset(const char *p, const char *e, long nth)
1649 {
1650  const char *pp = str_utf8_nth(p, e, &nth);
1651  return pp - p;
1652 }
1653 #endif
1654 
1655 /* byte offset to char offset */
1656 long
1657 rb_str_sublen(VALUE str, long pos)
1658 {
1659  if (single_byte_optimizable(str) || pos < 0)
1660  return pos;
1661  else {
1662  char *p = RSTRING_PTR(str);
1663  return enc_strlen(p, p + pos, STR_ENC_GET(str), ENC_CODERANGE(str));
1664  }
1665 }
1666 
1667 VALUE
1668 rb_str_subseq(VALUE str, long beg, long len)
1669 {
1670  VALUE str2;
1671 
1672  if (RSTRING_LEN(str) == beg + len &&
1673  RSTRING_EMBED_LEN_MAX < len) {
1674  str2 = rb_str_new_shared(rb_str_new_frozen(str));
1675  rb_str_drop_bytes(str2, beg);
1676  }
1677  else {
1678  str2 = rb_str_new5(str, RSTRING_PTR(str)+beg, len);
1679  RB_GC_GUARD(str);
1680  }
1681 
1682  rb_enc_cr_str_copy_for_substr(str2, str);
1683  OBJ_INFECT(str2, str);
1684 
1685  return str2;
1686 }
1687 
1688 static char *
1689 rb_str_subpos(VALUE str, long beg, long *lenp)
1690 {
1691  long len = *lenp;
1692  long slen = -1L;
1693  long blen = RSTRING_LEN(str);
1694  rb_encoding *enc = STR_ENC_GET(str);
1695  char *p, *s = RSTRING_PTR(str), *e = s + blen;
1696 
1697  if (len < 0) return 0;
1698  if (!blen) {
1699  len = 0;
1700  }
1701  if (single_byte_optimizable(str)) {
1702  if (beg > blen) return 0;
1703  if (beg < 0) {
1704  beg += blen;
1705  if (beg < 0) return 0;
1706  }
1707  if (beg + len > blen)
1708  len = blen - beg;
1709  if (len < 0) return 0;
1710  p = s + beg;
1711  goto end;
1712  }
1713  if (beg < 0) {
1714  if (len > -beg) len = -beg;
1715  if (-beg * rb_enc_mbmaxlen(enc) < RSTRING_LEN(str) / 8) {
1716  beg = -beg;
1717  while (beg-- > len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
1718  p = e;
1719  if (!p) return 0;
1720  while (len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
1721  if (!p) return 0;
1722  len = e - p;
1723  goto end;
1724  }
1725  else {
1726  slen = str_strlen(str, enc);
1727  beg += slen;
1728  if (beg < 0) return 0;
1729  p = s + beg;
1730  if (len == 0) goto end;
1731  }
1732  }
1733  else if (beg > 0 && beg > RSTRING_LEN(str)) {
1734  return 0;
1735  }
1736  if (len == 0) {
1737  if (beg > str_strlen(str, enc)) return 0;
1738  p = s + beg;
1739  }
1740 #ifdef NONASCII_MASK
1741  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID &&
1742  enc == rb_utf8_encoding()) {
1743  p = str_utf8_nth(s, e, &beg);
1744  if (beg > 0) return 0;
1745  len = str_utf8_offset(p, e, len);
1746  }
1747 #endif
1748  else if (rb_enc_mbmaxlen(enc) == rb_enc_mbminlen(enc)) {
1749  int char_sz = rb_enc_mbmaxlen(enc);
1750 
1751  p = s + beg * char_sz;
1752  if (p > e) {
1753  return 0;
1754  }
1755  else if (len * char_sz > e - p)
1756  len = e - p;
1757  else
1758  len *= char_sz;
1759  }
1760  else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
1761  if (beg > 0) return 0;
1762  len = 0;
1763  }
1764  else {
1765  len = str_offset(p, e, len, enc, 0);
1766  }
1767  end:
1768  *lenp = len;
1769  RB_GC_GUARD(str);
1770  return p;
1771 }
1772 
1773 VALUE
1774 rb_str_substr(VALUE str, long beg, long len)
1775 {
1776  VALUE str2;
1777  char *p = rb_str_subpos(str, beg, &len);
1778 
1779  if (!p) return Qnil;
1780  if (len > RSTRING_EMBED_LEN_MAX && p + len == RSTRING_END(str)) {
1781  str2 = rb_str_new4(str);
1782  str2 = str_new3(rb_obj_class(str2), str2);
1783  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
1784  RSTRING(str2)->as.heap.len = len;
1785  }
1786  else {
1787  str2 = rb_str_new5(str, p, len);
1788  rb_enc_cr_str_copy_for_substr(str2, str);
1789  OBJ_INFECT(str2, str);
1790  RB_GC_GUARD(str);
1791  }
1792 
1793  return str2;
1794 }
1795 
1796 VALUE
1798 {
1799  if (STR_ASSOC_P(str)) {
1800  VALUE ary = RSTRING(str)->as.heap.aux.shared;
1801  OBJ_FREEZE(ary);
1802  }
1803  return rb_obj_freeze(str);
1804 }
1805 
1807 #define rb_str_dup_frozen rb_str_new_frozen
1808 
1809 VALUE
1810 rb_str_locktmp(VALUE str)
1811 {
1812  if (FL_TEST(str, STR_TMPLOCK)) {
1813  rb_raise(rb_eRuntimeError, "temporal locking already locked string");
1814  }
1815  FL_SET(str, STR_TMPLOCK);
1816  return str;
1817 }
1818 
1819 VALUE
1821 {
1822  if (!FL_TEST(str, STR_TMPLOCK)) {
1823  rb_raise(rb_eRuntimeError, "temporal unlocking already unlocked string");
1824  }
1825  FL_UNSET(str, STR_TMPLOCK);
1826  return str;
1827 }
1828 
1829 void
1830 rb_str_set_len(VALUE str, long len)
1831 {
1832  long capa;
1833 
1834  str_modifiable(str);
1835  if (STR_SHARED_P(str)) {
1836  rb_raise(rb_eRuntimeError, "can't set length of shared string");
1837  }
1838  if (len > (capa = (long)rb_str_capacity(str))) {
1839  rb_bug("probable buffer overflow: %ld for %ld", len, capa);
1840  }
1841  STR_SET_LEN(str, len);
1842  RSTRING_PTR(str)[len] = '\0';
1843 }
1844 
1845 VALUE
1846 rb_str_resize(VALUE str, long len)
1847 {
1848  long slen;
1849  int independent;
1850 
1851  if (len < 0) {
1852  rb_raise(rb_eArgError, "negative string size (or size too big)");
1853  }
1854 
1855  independent = str_independent(str);
1856  ENC_CODERANGE_CLEAR(str);
1857  slen = RSTRING_LEN(str);
1858  if (len != slen) {
1859  if (STR_EMBED_P(str)) {
1860  if (len <= RSTRING_EMBED_LEN_MAX) {
1861  STR_SET_EMBED_LEN(str, len);
1862  RSTRING(str)->as.ary[len] = '\0';
1863  return str;
1864  }
1865  str_make_independent_expand(str, len - slen);
1866  STR_SET_NOEMBED(str);
1867  }
1868  else if (len <= RSTRING_EMBED_LEN_MAX) {
1869  char *ptr = RSTRING(str)->as.heap.ptr;
1870  STR_SET_EMBED(str);
1871  if (slen > len) slen = len;
1872  if (slen > 0) MEMCPY(RSTRING(str)->as.ary, ptr, char, slen);
1873  RSTRING(str)->as.ary[len] = '\0';
1874  STR_SET_EMBED_LEN(str, len);
1875  if (independent) xfree(ptr);
1876  return str;
1877  }
1878  else if (!independent) {
1879  str_make_independent_expand(str, len - slen);
1880  }
1881  else if (slen < len || slen - len > 1024) {
1882  REALLOC_N(RSTRING(str)->as.heap.ptr, char, len+1);
1883  }
1884  if (!STR_NOCAPA_P(str)) {
1885  RSTRING(str)->as.heap.aux.capa = len;
1886  }
1887  RSTRING(str)->as.heap.len = len;
1888  RSTRING(str)->as.heap.ptr[len] = '\0'; /* sentinel */
1889  }
1890  return str;
1891 }
1892 
1893 static VALUE
1894 str_buf_cat(VALUE str, const char *ptr, long len)
1895 {
1896  long capa, total, off = -1;
1897 
1898  if (ptr >= RSTRING_PTR(str) && ptr <= RSTRING_END(str)) {
1899  off = ptr - RSTRING_PTR(str);
1900  }
1901  rb_str_modify(str);
1902  if (len == 0) return 0;
1903  if (STR_ASSOC_P(str)) {
1904  FL_UNSET(str, STR_ASSOC);
1905  capa = RSTRING(str)->as.heap.aux.capa = RSTRING_LEN(str);
1906  }
1907  else if (STR_EMBED_P(str)) {
1908  capa = RSTRING_EMBED_LEN_MAX;
1909  }
1910  else {
1911  capa = RSTRING(str)->as.heap.aux.capa;
1912  }
1913  if (RSTRING_LEN(str) >= LONG_MAX - len) {
1914  rb_raise(rb_eArgError, "string sizes too big");
1915  }
1916  total = RSTRING_LEN(str)+len;
1917  if (capa <= total) {
1918  while (total > capa) {
1919  if (capa + 1 >= LONG_MAX / 2) {
1920  capa = (total + 4095) / 4096;
1921  break;
1922  }
1923  capa = (capa + 1) * 2;
1924  }
1925  RESIZE_CAPA(str, capa);
1926  }
1927  if (off != -1) {
1928  ptr = RSTRING_PTR(str) + off;
1929  }
1930  memcpy(RSTRING_PTR(str) + RSTRING_LEN(str), ptr, len);
1931  STR_SET_LEN(str, total);
1932  RSTRING_PTR(str)[total] = '\0'; /* sentinel */
1933 
1934  return str;
1935 }
1936 
1937 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
1938 
1939 VALUE
1940 rb_str_buf_cat(VALUE str, const char *ptr, long len)
1941 {
1942  if (len == 0) return str;
1943  if (len < 0) {
1944  rb_raise(rb_eArgError, "negative string size (or size too big)");
1945  }
1946  return str_buf_cat(str, ptr, len);
1947 }
1948 
1949 VALUE
1950 rb_str_buf_cat2(VALUE str, const char *ptr)
1951 {
1952  return rb_str_buf_cat(str, ptr, strlen(ptr));
1953 }
1954 
1955 VALUE
1956 rb_str_cat(VALUE str, const char *ptr, long len)
1957 {
1958  if (len < 0) {
1959  rb_raise(rb_eArgError, "negative string size (or size too big)");
1960  }
1961  if (STR_ASSOC_P(str)) {
1962  char *p;
1963  rb_str_modify_expand(str, len);
1964  p = RSTRING(str)->as.heap.ptr;
1965  memcpy(p + RSTRING(str)->as.heap.len, ptr, len);
1966  len = RSTRING(str)->as.heap.len += len;
1967  p[len] = '\0'; /* sentinel */
1968  return str;
1969  }
1970 
1971  return rb_str_buf_cat(str, ptr, len);
1972 }
1973 
1974 VALUE
1975 rb_str_cat2(VALUE str, const char *ptr)
1976 {
1977  return rb_str_cat(str, ptr, strlen(ptr));
1978 }
1979 
1980 static VALUE
1981 rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len,
1982  int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
1983 {
1984  int str_encindex = ENCODING_GET(str);
1985  int res_encindex;
1986  int str_cr, res_cr;
1987 
1988  str_cr = ENC_CODERANGE(str);
1989 
1990  if (str_encindex == ptr_encindex) {
1991  if (str_cr == ENC_CODERANGE_UNKNOWN)
1992  ptr_cr = ENC_CODERANGE_UNKNOWN;
1993  else if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
1994  ptr_cr = coderange_scan(ptr, len, rb_enc_from_index(ptr_encindex));
1995  }
1996  }
1997  else {
1998  rb_encoding *str_enc = rb_enc_from_index(str_encindex);
1999  rb_encoding *ptr_enc = rb_enc_from_index(ptr_encindex);
2000  if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
2001  if (len == 0)
2002  return str;
2003  if (RSTRING_LEN(str) == 0) {
2004  rb_str_buf_cat(str, ptr, len);
2005  ENCODING_CODERANGE_SET(str, ptr_encindex, ptr_cr);
2006  return str;
2007  }
2008  goto incompatible;
2009  }
2010  if (ptr_cr == ENC_CODERANGE_UNKNOWN) {
2011  ptr_cr = coderange_scan(ptr, len, ptr_enc);
2012  }
2013  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2014  if (ENCODING_IS_ASCII8BIT(str) || ptr_cr != ENC_CODERANGE_7BIT) {
2015  str_cr = rb_enc_str_coderange(str);
2016  }
2017  }
2018  }
2019  if (ptr_cr_ret)
2020  *ptr_cr_ret = ptr_cr;
2021 
2022  if (str_encindex != ptr_encindex &&
2023  str_cr != ENC_CODERANGE_7BIT &&
2024  ptr_cr != ENC_CODERANGE_7BIT) {
2025  incompatible:
2026  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
2027  rb_enc_name(rb_enc_from_index(str_encindex)),
2028  rb_enc_name(rb_enc_from_index(ptr_encindex)));
2029  }
2030 
2031  if (str_cr == ENC_CODERANGE_UNKNOWN) {
2032  res_encindex = str_encindex;
2033  res_cr = ENC_CODERANGE_UNKNOWN;
2034  }
2035  else if (str_cr == ENC_CODERANGE_7BIT) {
2036  if (ptr_cr == ENC_CODERANGE_7BIT) {
2037  res_encindex = str_encindex;
2038  res_cr = ENC_CODERANGE_7BIT;
2039  }
2040  else {
2041  res_encindex = ptr_encindex;
2042  res_cr = ptr_cr;
2043  }
2044  }
2045  else if (str_cr == ENC_CODERANGE_VALID) {
2046  res_encindex = str_encindex;
2047  if (ptr_cr == ENC_CODERANGE_7BIT || ptr_cr == ENC_CODERANGE_VALID)
2048  res_cr = str_cr;
2049  else
2050  res_cr = ptr_cr;
2051  }
2052  else { /* str_cr == ENC_CODERANGE_BROKEN */
2053  res_encindex = str_encindex;
2054  res_cr = str_cr;
2055  if (0 < len) res_cr = ENC_CODERANGE_UNKNOWN;
2056  }
2057 
2058  if (len < 0) {
2059  rb_raise(rb_eArgError, "negative string size (or size too big)");
2060  }
2061  str_buf_cat(str, ptr, len);
2062  ENCODING_CODERANGE_SET(str, res_encindex, res_cr);
2063  return str;
2064 }
2065 
2066 VALUE
2067 rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *ptr_enc)
2068 {
2069  return rb_enc_cr_str_buf_cat(str, ptr, len,
2071 }
2072 
2073 VALUE
2074 rb_str_buf_cat_ascii(VALUE str, const char *ptr)
2075 {
2076  /* ptr must reference NUL terminated ASCII string. */
2077  int encindex = ENCODING_GET(str);
2078  rb_encoding *enc = rb_enc_from_index(encindex);
2079  if (rb_enc_asciicompat(enc)) {
2080  return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
2081  encindex, ENC_CODERANGE_7BIT, 0);
2082  }
2083  else {
2084  char *buf = ALLOCA_N(char, rb_enc_mbmaxlen(enc));
2085  while (*ptr) {
2086  unsigned int c = (unsigned char)*ptr;
2087  int len = rb_enc_codelen(c, enc);
2088  rb_enc_mbcput(c, buf, enc);
2089  rb_enc_cr_str_buf_cat(str, buf, len,
2090  encindex, ENC_CODERANGE_VALID, 0);
2091  ptr++;
2092  }
2093  return str;
2094  }
2095 }
2096 
2097 VALUE
2099 {
2100  int str2_cr;
2101 
2102  str2_cr = ENC_CODERANGE(str2);
2103 
2104  rb_enc_cr_str_buf_cat(str, RSTRING_PTR(str2), RSTRING_LEN(str2),
2105  ENCODING_GET(str2), str2_cr, &str2_cr);
2106 
2107  OBJ_INFECT(str, str2);
2108  ENC_CODERANGE_SET(str2, str2_cr);
2109 
2110  return str;
2111 }
2112 
2113 VALUE
2115 {
2116  rb_encoding *enc;
2117  int cr, cr2;
2118  long len2;
2119 
2120  StringValue(str2);
2121  if ((len2 = RSTRING_LEN(str2)) > 0 && STR_ASSOC_P(str)) {
2122  long len = RSTRING_LEN(str) + len2;
2123  enc = rb_enc_check(str, str2);
2124  cr = ENC_CODERANGE(str);
2125  if ((cr2 = ENC_CODERANGE(str2)) > cr) cr = cr2;
2126  rb_str_modify_expand(str, len2);
2127  memcpy(RSTRING(str)->as.heap.ptr + RSTRING(str)->as.heap.len,
2128  RSTRING_PTR(str2), len2+1);
2129  RSTRING(str)->as.heap.len = len;
2130  rb_enc_associate(str, enc);
2131  ENC_CODERANGE_SET(str, cr);
2132  OBJ_INFECT(str, str2);
2133  return str;
2134  }
2135  return rb_str_buf_append(str, str2);
2136 }
2137 
2138 /*
2139  * call-seq:
2140  * str << integer -> str
2141  * str.concat(integer) -> str
2142  * str << obj -> str
2143  * str.concat(obj) -> str
2144  *
2145  * Append---Concatenates the given object to <i>str</i>. If the object is a
2146  * <code>Integer</code>, it is considered as a codepoint, and is converted
2147  * to a character before concatenation.
2148  *
2149  * a = "hello "
2150  * a << "world" #=> "hello world"
2151  * a.concat(33) #=> "hello world!"
2152  */
2153 
2154 VALUE
2156 {
2157  unsigned int code;
2158  rb_encoding *enc = STR_ENC_GET(str1);
2159 
2160  if (FIXNUM_P(str2) || RB_TYPE_P(str2, T_BIGNUM)) {
2161  if (rb_num_to_uint(str2, &code) == 0) {
2162  }
2163  else if (FIXNUM_P(str2)) {
2164  rb_raise(rb_eRangeError, "%ld out of char range", FIX2LONG(str2));
2165  }
2166  else {
2167  rb_raise(rb_eRangeError, "bignum out of char range");
2168  }
2169  }
2170  else {
2171  return rb_str_append(str1, str2);
2172  }
2173 
2174  if (enc == rb_usascii_encoding()) {
2175  /* US-ASCII automatically extended to ASCII-8BIT */
2176  char buf[1];
2177  buf[0] = (char)code;
2178  if (code > 0xFF) {
2179  rb_raise(rb_eRangeError, "%u out of char range", code);
2180  }
2181  rb_str_cat(str1, buf, 1);
2182  if (code > 127) {
2185  }
2186  }
2187  else {
2188  long pos = RSTRING_LEN(str1);
2189  int cr = ENC_CODERANGE(str1);
2190  int len;
2191  char *buf;
2192 
2193  switch (len = rb_enc_codelen(code, enc)) {
2195  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2196  break;
2198  case 0:
2199  rb_raise(rb_eRangeError, "%u out of char range", code);
2200  break;
2201  }
2202  buf = ALLOCA_N(char, len + 1);
2203  rb_enc_mbcput(code, buf, enc);
2204  if (rb_enc_precise_mbclen(buf, buf + len + 1, enc) != len) {
2205  rb_raise(rb_eRangeError, "invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
2206  }
2207  rb_str_resize(str1, pos+len);
2208  memcpy(RSTRING_PTR(str1) + pos, buf, len);
2209  if (cr == ENC_CODERANGE_7BIT && code > 127)
2210  cr = ENC_CODERANGE_VALID;
2211  ENC_CODERANGE_SET(str1, cr);
2212  }
2213  return str1;
2214 }
2215 
2216 /*
2217  * call-seq:
2218  * str.prepend(other_str) -> str
2219  *
2220  * Prepend---Prepend the given string to <i>str</i>.
2221  *
2222  * a = "world"
2223  * a.prepend("hello ") #=> "hello world"
2224  * a #=> "hello world"
2225  */
2226 
2227 static VALUE
2229 {
2230  StringValue(str2);
2231  StringValue(str);
2232  rb_str_update(str, 0L, 0L, str2);
2233  return str;
2234 }
2235 
2236 st_index_t
2238 {
2239  int e = ENCODING_GET(str);
2240  if (e && rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT) {
2241  e = 0;
2242  }
2243  return rb_memhash((const void *)RSTRING_PTR(str), RSTRING_LEN(str)) ^ e;
2244 }
2245 
2246 int
2248 {
2249  long len;
2250 
2251  if (!rb_str_comparable(str1, str2)) return 1;
2252  if (RSTRING_LEN(str1) == (len = RSTRING_LEN(str2)) &&
2253  memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), len) == 0) {
2254  return 0;
2255  }
2256  return 1;
2257 }
2258 
2259 /*
2260  * call-seq:
2261  * str.hash -> fixnum
2262  *
2263  * Return a hash based on the string's length and content.
2264  */
2265 
2266 static VALUE
2268 {
2269  st_index_t hval = rb_str_hash(str);
2270  return INT2FIX(hval);
2271 }
2272 
2273 #define lesser(a,b) (((a)>(b))?(b):(a))
2274 
2275 int
2277 {
2278  int idx1, idx2;
2279  int rc1, rc2;
2280 
2281  if (RSTRING_LEN(str1) == 0) return TRUE;
2282  if (RSTRING_LEN(str2) == 0) return TRUE;
2283  idx1 = ENCODING_GET(str1);
2284  idx2 = ENCODING_GET(str2);
2285  if (idx1 == idx2) return TRUE;
2286  rc1 = rb_enc_str_coderange(str1);
2287  rc2 = rb_enc_str_coderange(str2);
2288  if (rc1 == ENC_CODERANGE_7BIT) {
2289  if (rc2 == ENC_CODERANGE_7BIT) return TRUE;
2291  return TRUE;
2292  }
2293  if (rc2 == ENC_CODERANGE_7BIT) {
2295  return TRUE;
2296  }
2297  return FALSE;
2298 }
2299 
2300 int
2302 {
2303  long len1, len2;
2304  const char *ptr1, *ptr2;
2305  int retval;
2306 
2307  if (str1 == str2) return 0;
2308  RSTRING_GETMEM(str1, ptr1, len1);
2309  RSTRING_GETMEM(str2, ptr2, len2);
2310  if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
2311  if (len1 == len2) {
2312  if (!rb_str_comparable(str1, str2)) {
2313  if (ENCODING_GET(str1) > ENCODING_GET(str2))
2314  return 1;
2315  return -1;
2316  }
2317  return 0;
2318  }
2319  if (len1 > len2) return 1;
2320  return -1;
2321  }
2322  if (retval > 0) return 1;
2323  return -1;
2324 }
2325 
2326 /* expect tail call optimization */
2327 static VALUE
2328 str_eql(const VALUE str1, const VALUE str2)
2329 {
2330  const long len = RSTRING_LEN(str1);
2331  const char *ptr1, *ptr2;
2332 
2333  if (len != RSTRING_LEN(str2)) return Qfalse;
2334  if (!rb_str_comparable(str1, str2)) return Qfalse;
2335  if ((ptr1 = RSTRING_PTR(str1)) == (ptr2 = RSTRING_PTR(str2)))
2336  return Qtrue;
2337  if (memcmp(ptr1, ptr2, len) == 0)
2338  return Qtrue;
2339  return Qfalse;
2340 }
2341 /*
2342  * call-seq:
2343  * str == obj -> true or false
2344  *
2345  * Equality---If <i>obj</i> is not a <code>String</code>, returns
2346  * <code>false</code>. Otherwise, returns <code>true</code> if <i>str</i>
2347  * <code><=></code> <i>obj</i> returns zero.
2348  */
2349 
2350 VALUE
2352 {
2353  if (str1 == str2) return Qtrue;
2354  if (!RB_TYPE_P(str2, T_STRING)) {
2355  if (!rb_respond_to(str2, rb_intern("to_str"))) {
2356  return Qfalse;
2357  }
2358  return rb_equal(str2, str1);
2359  }
2360  return str_eql(str1, str2);
2361 }
2362 
2363 /*
2364  * call-seq:
2365  * str.eql?(other) -> true or false
2366  *
2367  * Two strings are equal if they have the same length and content.
2368  */
2369 
2370 static VALUE
2372 {
2373  if (str1 == str2) return Qtrue;
2374  if (!RB_TYPE_P(str2, T_STRING)) return Qfalse;
2375  return str_eql(str1, str2);
2376 }
2377 
2378 /*
2379  * call-seq:
2380  * string <=> other_string -> -1, 0, +1 or nil
2381  *
2382  *
2383  * Comparison---Returns -1, 0, +1 or nil depending on whether +string+ is less
2384  * than, equal to, or greater than +other_string+.
2385  *
2386  * +nil+ is returned if the two values are incomparable.
2387  *
2388  * If the strings are of different lengths, and the strings are equal when
2389  * compared up to the shortest length, then the longer string is considered
2390  * greater than the shorter one.
2391  *
2392  * <code><=></code> is the basis for the methods <code><</code>,
2393  * <code><=</code>, <code>></code>, <code>>=</code>, and
2394  * <code>between?</code>, included from module Comparable. The method
2395  * String#== does not use Comparable#==.
2396  *
2397  * "abcdef" <=> "abcde" #=> 1
2398  * "abcdef" <=> "abcdef" #=> 0
2399  * "abcdef" <=> "abcdefg" #=> -1
2400  * "abcdef" <=> "ABCDEF" #=> 1
2401  */
2402 
2403 static VALUE
2405 {
2406  int result;
2407 
2408  if (!RB_TYPE_P(str2, T_STRING)) {
2409  VALUE tmp = rb_check_funcall(str2, rb_intern("to_str"), 0, 0);
2410  if (RB_TYPE_P(tmp, T_STRING)) {
2411  result = rb_str_cmp(str1, tmp);
2412  }
2413  else {
2414  return rb_invcmp(str1, str2);
2415  }
2416  }
2417  else {
2418  result = rb_str_cmp(str1, str2);
2419  }
2420  return INT2FIX(result);
2421 }
2422 
2423 /*
2424  * call-seq:
2425  * str.casecmp(other_str) -> -1, 0, +1 or nil
2426  *
2427  * Case-insensitive version of <code>String#<=></code>.
2428  *
2429  * "abcdef".casecmp("abcde") #=> 1
2430  * "aBcDeF".casecmp("abcdef") #=> 0
2431  * "abcdef".casecmp("abcdefg") #=> -1
2432  * "abcdef".casecmp("ABCDEF") #=> 0
2433  */
2434 
2435 static VALUE
2437 {
2438  long len;
2439  rb_encoding *enc;
2440  char *p1, *p1end, *p2, *p2end;
2441 
2442  StringValue(str2);
2443  enc = rb_enc_compatible(str1, str2);
2444  if (!enc) {
2445  return Qnil;
2446  }
2447 
2448  p1 = RSTRING_PTR(str1); p1end = RSTRING_END(str1);
2449  p2 = RSTRING_PTR(str2); p2end = RSTRING_END(str2);
2450  if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
2451  while (p1 < p1end && p2 < p2end) {
2452  if (*p1 != *p2) {
2453  unsigned int c1 = TOUPPER(*p1 & 0xff);
2454  unsigned int c2 = TOUPPER(*p2 & 0xff);
2455  if (c1 != c2)
2456  return INT2FIX(c1 < c2 ? -1 : 1);
2457  }
2458  p1++;
2459  p2++;
2460  }
2461  }
2462  else {
2463  while (p1 < p1end && p2 < p2end) {
2464  int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
2465  int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
2466 
2467  if (0 <= c1 && 0 <= c2) {
2468  c1 = TOUPPER(c1);
2469  c2 = TOUPPER(c2);
2470  if (c1 != c2)
2471  return INT2FIX(c1 < c2 ? -1 : 1);
2472  }
2473  else {
2474  int r;
2475  l1 = rb_enc_mbclen(p1, p1end, enc);
2476  l2 = rb_enc_mbclen(p2, p2end, enc);
2477  len = l1 < l2 ? l1 : l2;
2478  r = memcmp(p1, p2, len);
2479  if (r != 0)
2480  return INT2FIX(r < 0 ? -1 : 1);
2481  if (l1 != l2)
2482  return INT2FIX(l1 < l2 ? -1 : 1);
2483  }
2484  p1 += l1;
2485  p2 += l2;
2486  }
2487  }
2488  if (RSTRING_LEN(str1) == RSTRING_LEN(str2)) return INT2FIX(0);
2489  if (RSTRING_LEN(str1) > RSTRING_LEN(str2)) return INT2FIX(1);
2490  return INT2FIX(-1);
2491 }
2492 
2493 static long
2494 rb_str_index(VALUE str, VALUE sub, long offset)
2495 {
2496  long pos;
2497  char *s, *sptr, *e;
2498  long len, slen;
2499  rb_encoding *enc;
2500 
2501  enc = rb_enc_check(str, sub);
2502  if (is_broken_string(sub)) {
2503  return -1;
2504  }
2505  len = str_strlen(str, enc);
2506  slen = str_strlen(sub, enc);
2507  if (offset < 0) {
2508  offset += len;
2509  if (offset < 0) return -1;
2510  }
2511  if (len - offset < slen) return -1;
2512  s = RSTRING_PTR(str);
2513  e = s + RSTRING_LEN(str);
2514  if (offset) {
2515  offset = str_offset(s, RSTRING_END(str), offset, enc, single_byte_optimizable(str));
2516  s += offset;
2517  }
2518  if (slen == 0) return offset;
2519  /* need proceed one character at a time */
2520  sptr = RSTRING_PTR(sub);
2521  slen = RSTRING_LEN(sub);
2522  len = RSTRING_LEN(str) - offset;
2523  for (;;) {
2524  char *t;
2525  pos = rb_memsearch(sptr, slen, s, len, enc);
2526  if (pos < 0) return pos;
2527  t = rb_enc_right_char_head(s, s+pos, e, enc);
2528  if (t == s + pos) break;
2529  if ((len -= t - s) <= 0) return -1;
2530  offset += t - s;
2531  s = t;
2532  }
2533  return pos + offset;
2534 }
2535 
2536 
2537 /*
2538  * call-seq:
2539  * str.index(substring [, offset]) -> fixnum or nil
2540  * str.index(regexp [, offset]) -> fixnum or nil
2541  *
2542  * Returns the index of the first occurrence of the given <i>substring</i> or
2543  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2544  * found. If the second parameter is present, it specifies the position in the
2545  * string to begin the search.
2546  *
2547  * "hello".index('e') #=> 1
2548  * "hello".index('lo') #=> 3
2549  * "hello".index('a') #=> nil
2550  * "hello".index(?e) #=> 1
2551  * "hello".index(/[aeiou]/, -3) #=> 4
2552  */
2553 
2554 static VALUE
2556 {
2557  VALUE sub;
2558  VALUE initpos;
2559  long pos;
2560 
2561  if (rb_scan_args(argc, argv, "11", &sub, &initpos) == 2) {
2562  pos = NUM2LONG(initpos);
2563  }
2564  else {
2565  pos = 0;
2566  }
2567  if (pos < 0) {
2568  pos += str_strlen(str, STR_ENC_GET(str));
2569  if (pos < 0) {
2570  if (RB_TYPE_P(sub, T_REGEXP)) {
2572  }
2573  return Qnil;
2574  }
2575  }
2576 
2577  if (SPECIAL_CONST_P(sub)) goto generic;
2578  switch (BUILTIN_TYPE(sub)) {
2579  case T_REGEXP:
2580  if (pos > str_strlen(str, STR_ENC_GET(str)))
2581  return Qnil;
2582  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2583  rb_enc_check(str, sub), single_byte_optimizable(str));
2584 
2585  pos = rb_reg_search(sub, str, pos, 0);
2586  pos = rb_str_sublen(str, pos);
2587  break;
2588 
2589  generic:
2590  default: {
2591  VALUE tmp;
2592 
2593  tmp = rb_check_string_type(sub);
2594  if (NIL_P(tmp)) {
2595  rb_raise(rb_eTypeError, "type mismatch: %s given",
2596  rb_obj_classname(sub));
2597  }
2598  sub = tmp;
2599  }
2600  /* fall through */
2601  case T_STRING:
2602  pos = rb_str_index(str, sub, pos);
2603  pos = rb_str_sublen(str, pos);
2604  break;
2605  }
2606 
2607  if (pos == -1) return Qnil;
2608  return LONG2NUM(pos);
2609 }
2610 
2611 static long
2612 rb_str_rindex(VALUE str, VALUE sub, long pos)
2613 {
2614  long len, slen;
2615  char *s, *sbeg, *e, *t;
2616  rb_encoding *enc;
2617  int singlebyte = single_byte_optimizable(str);
2618 
2619  enc = rb_enc_check(str, sub);
2620  if (is_broken_string(sub)) {
2621  return -1;
2622  }
2623  len = str_strlen(str, enc);
2624  slen = str_strlen(sub, enc);
2625  /* substring longer than string */
2626  if (len < slen) return -1;
2627  if (len - pos < slen) {
2628  pos = len - slen;
2629  }
2630  if (len == 0) {
2631  return pos;
2632  }
2633  sbeg = RSTRING_PTR(str);
2634  e = RSTRING_END(str);
2635  t = RSTRING_PTR(sub);
2636  slen = RSTRING_LEN(sub);
2637  s = str_nth(sbeg, e, pos, enc, singlebyte);
2638  while (s) {
2639  if (memcmp(s, t, slen) == 0) {
2640  return pos;
2641  }
2642  if (pos == 0) break;
2643  pos--;
2644  s = rb_enc_prev_char(sbeg, s, e, enc);
2645  }
2646  return -1;
2647 }
2648 
2649 
2650 /*
2651  * call-seq:
2652  * str.rindex(substring [, fixnum]) -> fixnum or nil
2653  * str.rindex(regexp [, fixnum]) -> fixnum or nil
2654  *
2655  * Returns the index of the last occurrence of the given <i>substring</i> or
2656  * pattern (<i>regexp</i>) in <i>str</i>. Returns <code>nil</code> if not
2657  * found. If the second parameter is present, it specifies the position in the
2658  * string to end the search---characters beyond this point will not be
2659  * considered.
2660  *
2661  * "hello".rindex('e') #=> 1
2662  * "hello".rindex('l') #=> 3
2663  * "hello".rindex('a') #=> nil
2664  * "hello".rindex(?e) #=> 1
2665  * "hello".rindex(/[aeiou]/, -2) #=> 1
2666  */
2667 
2668 static VALUE
2670 {
2671  VALUE sub;
2672  VALUE vpos;
2673  rb_encoding *enc = STR_ENC_GET(str);
2674  long pos, len = str_strlen(str, enc);
2675 
2676  if (rb_scan_args(argc, argv, "11", &sub, &vpos) == 2) {
2677  pos = NUM2LONG(vpos);
2678  if (pos < 0) {
2679  pos += len;
2680  if (pos < 0) {
2681  if (RB_TYPE_P(sub, T_REGEXP)) {
2683  }
2684  return Qnil;
2685  }
2686  }
2687  if (pos > len) pos = len;
2688  }
2689  else {
2690  pos = len;
2691  }
2692 
2693  if (SPECIAL_CONST_P(sub)) goto generic;
2694  switch (BUILTIN_TYPE(sub)) {
2695  case T_REGEXP:
2696  /* enc = rb_get_check(str, sub); */
2697  pos = str_offset(RSTRING_PTR(str), RSTRING_END(str), pos,
2699 
2700  if (!RREGEXP(sub)->ptr || RREGEXP_SRC_LEN(sub)) {
2701  pos = rb_reg_search(sub, str, pos, 1);
2702  pos = rb_str_sublen(str, pos);
2703  }
2704  if (pos >= 0) return LONG2NUM(pos);
2705  break;
2706 
2707  generic:
2708  default: {
2709  VALUE tmp;
2710 
2711  tmp = rb_check_string_type(sub);
2712  if (NIL_P(tmp)) {
2713  rb_raise(rb_eTypeError, "type mismatch: %s given",
2714  rb_obj_classname(sub));
2715  }
2716  sub = tmp;
2717  }
2718  /* fall through */
2719  case T_STRING:
2720  pos = rb_str_rindex(str, sub, pos);
2721  if (pos >= 0) return LONG2NUM(pos);
2722  break;
2723  }
2724  return Qnil;
2725 }
2726 
2727 /*
2728  * call-seq:
2729  * str =~ obj -> fixnum or nil
2730  *
2731  * Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
2732  * against <i>str</i>,and returns the position the match starts, or
2733  * <code>nil</code> if there is no match. Otherwise, invokes
2734  * <i>obj.=~</i>, passing <i>str</i> as an argument. The default
2735  * <code>=~</code> in <code>Object</code> returns <code>nil</code>.
2736  *
2737  * Note: <code>str =~ regexp</code> is not the same as
2738  * <code>regexp =~ str</code>. Strings captured from named capture groups
2739  * are assigned to local variables only in the second case.
2740  *
2741  * "cat o' 9 tails" =~ /\d/ #=> 7
2742  * "cat o' 9 tails" =~ 9 #=> nil
2743  */
2744 
2745 static VALUE
2747 {
2748  if (SPECIAL_CONST_P(y)) goto generic;
2749  switch (BUILTIN_TYPE(y)) {
2750  case T_STRING:
2751  rb_raise(rb_eTypeError, "type mismatch: String given");
2752 
2753  case T_REGEXP:
2754  return rb_reg_match(y, x);
2755 
2756  generic:
2757  default:
2758  return rb_funcall(y, rb_intern("=~"), 1, x);
2759  }
2760 }
2761 
2762 
2763 static VALUE get_pat(VALUE, int);
2764 
2765 
2766 /*
2767  * call-seq:
2768  * str.match(pattern) -> matchdata or nil
2769  * str.match(pattern, pos) -> matchdata or nil
2770  *
2771  * Converts <i>pattern</i> to a <code>Regexp</code> (if it isn't already one),
2772  * then invokes its <code>match</code> method on <i>str</i>. If the second
2773  * parameter is present, it specifies the position in the string to begin the
2774  * search.
2775  *
2776  * 'hello'.match('(.)\1') #=> #<MatchData "ll" 1:"l">
2777  * 'hello'.match('(.)\1')[0] #=> "ll"
2778  * 'hello'.match(/(.)\1/)[0] #=> "ll"
2779  * 'hello'.match('xx') #=> nil
2780  *
2781  * If a block is given, invoke the block with MatchData if match succeed, so
2782  * that you can write
2783  *
2784  * str.match(pat) {|m| ...}
2785  *
2786  * instead of
2787  *
2788  * if m = str.match(pat)
2789  * ...
2790  * end
2791  *
2792  * The return value is a value from block execution in this case.
2793  */
2794 
2795 static VALUE
2797 {
2798  VALUE re, result;
2799  if (argc < 1)
2800  rb_check_arity(argc, 1, 2);
2801  re = argv[0];
2802  argv[0] = str;
2803  result = rb_funcall2(get_pat(re, 0), rb_intern("match"), argc, argv);
2804  if (!NIL_P(result) && rb_block_given_p()) {
2805  return rb_yield(result);
2806  }
2807  return result;
2808 }
2809 
2814 };
2815 
2816 static enum neighbor_char
2817 enc_succ_char(char *p, long len, rb_encoding *enc)
2818 {
2819  long i;
2820  int l;
2821  while (1) {
2822  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0xff; i--)
2823  p[i] = '\0';
2824  if (i < 0)
2825  return NEIGHBOR_WRAPPED;
2826  ++((unsigned char*)p)[i];
2827  l = rb_enc_precise_mbclen(p, p+len, enc);
2828  if (MBCLEN_CHARFOUND_P(l)) {
2829  l = MBCLEN_CHARFOUND_LEN(l);
2830  if (l == len) {
2831  return NEIGHBOR_FOUND;
2832  }
2833  else {
2834  memset(p+l, 0xff, len-l);
2835  }
2836  }
2837  if (MBCLEN_INVALID_P(l) && i < len-1) {
2838  long len2;
2839  int l2;
2840  for (len2 = len-1; 0 < len2; len2--) {
2841  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2842  if (!MBCLEN_INVALID_P(l2))
2843  break;
2844  }
2845  memset(p+len2+1, 0xff, len-(len2+1));
2846  }
2847  }
2848 }
2849 
2850 static enum neighbor_char
2851 enc_pred_char(char *p, long len, rb_encoding *enc)
2852 {
2853  long i;
2854  int l;
2855  while (1) {
2856  for (i = len-1; 0 <= i && (unsigned char)p[i] == 0; i--)
2857  p[i] = '\xff';
2858  if (i < 0)
2859  return NEIGHBOR_WRAPPED;
2860  --((unsigned char*)p)[i];
2861  l = rb_enc_precise_mbclen(p, p+len, enc);
2862  if (MBCLEN_CHARFOUND_P(l)) {
2863  l = MBCLEN_CHARFOUND_LEN(l);
2864  if (l == len) {
2865  return NEIGHBOR_FOUND;
2866  }
2867  else {
2868  memset(p+l, 0, len-l);
2869  }
2870  }
2871  if (MBCLEN_INVALID_P(l) && i < len-1) {
2872  long len2;
2873  int l2;
2874  for (len2 = len-1; 0 < len2; len2--) {
2875  l2 = rb_enc_precise_mbclen(p, p+len2, enc);
2876  if (!MBCLEN_INVALID_P(l2))
2877  break;
2878  }
2879  memset(p+len2+1, 0, len-(len2+1));
2880  }
2881  }
2882 }
2883 
2884 /*
2885  overwrite +p+ by succeeding letter in +enc+ and returns
2886  NEIGHBOR_FOUND or NEIGHBOR_WRAPPED.
2887  When NEIGHBOR_WRAPPED, carried-out letter is stored into carry.
2888  assuming each ranges are successive, and mbclen
2889  never change in each ranges.
2890  NEIGHBOR_NOT_CHAR is returned if invalid character or the range has only one
2891  character.
2892  */
2893 static enum neighbor_char
2894 enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
2895 {
2896  enum neighbor_char ret;
2897  unsigned int c;
2898  int ctype;
2899  int range;
2900  char save[ONIGENC_CODE_TO_MBC_MAXLEN];
2901 
2902  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2903  if (rb_enc_isctype(c, ONIGENC_CTYPE_DIGIT, enc))
2904  ctype = ONIGENC_CTYPE_DIGIT;
2905  else if (rb_enc_isctype(c, ONIGENC_CTYPE_ALPHA, enc))
2906  ctype = ONIGENC_CTYPE_ALPHA;
2907  else
2908  return NEIGHBOR_NOT_CHAR;
2909 
2910  MEMCPY(save, p, char, len);
2911  ret = enc_succ_char(p, len, enc);
2912  if (ret == NEIGHBOR_FOUND) {
2913  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2914  if (rb_enc_isctype(c, ctype, enc))
2915  return NEIGHBOR_FOUND;
2916  }
2917  MEMCPY(p, save, char, len);
2918  range = 1;
2919  while (1) {
2920  MEMCPY(save, p, char, len);
2921  ret = enc_pred_char(p, len, enc);
2922  if (ret == NEIGHBOR_FOUND) {
2923  c = rb_enc_mbc_to_codepoint(p, p+len, enc);
2924  if (!rb_enc_isctype(c, ctype, enc)) {
2925  MEMCPY(p, save, char, len);
2926  break;
2927  }
2928  }
2929  else {
2930  MEMCPY(p, save, char, len);
2931  break;
2932  }
2933  range++;
2934  }
2935  if (range == 1) {
2936  return NEIGHBOR_NOT_CHAR;
2937  }
2938 
2939  if (ctype != ONIGENC_CTYPE_DIGIT) {
2940  MEMCPY(carry, p, char, len);
2941  return NEIGHBOR_WRAPPED;
2942  }
2943 
2944  MEMCPY(carry, p, char, len);
2945  enc_succ_char(carry, len, enc);
2946  return NEIGHBOR_WRAPPED;
2947 }
2948 
2949 
2950 /*
2951  * call-seq:
2952  * str.succ -> new_str
2953  * str.next -> new_str
2954  *
2955  * Returns the successor to <i>str</i>. The successor is calculated by
2956  * incrementing characters starting from the rightmost alphanumeric (or
2957  * the rightmost character if there are no alphanumerics) in the
2958  * string. Incrementing a digit always results in another digit, and
2959  * incrementing a letter results in another letter of the same case.
2960  * Incrementing nonalphanumerics uses the underlying character set's
2961  * collating sequence.
2962  *
2963  * If the increment generates a ``carry,'' the character to the left of
2964  * it is incremented. This process repeats until there is no carry,
2965  * adding an additional character if necessary.
2966  *
2967  * "abcd".succ #=> "abce"
2968  * "THX1138".succ #=> "THX1139"
2969  * "<<koala>>".succ #=> "<<koalb>>"
2970  * "1999zzz".succ #=> "2000aaa"
2971  * "ZZZ9999".succ #=> "AAAA0000"
2972  * "***".succ #=> "**+"
2973  */
2974 
2975 VALUE
2977 {
2978  rb_encoding *enc;
2979  VALUE str;
2980  char *sbeg, *s, *e, *last_alnum = 0;
2981  int c = -1;
2982  long l;
2983  char carry[ONIGENC_CODE_TO_MBC_MAXLEN] = "\1";
2984  long carry_pos = 0, carry_len = 1;
2985  enum neighbor_char neighbor = NEIGHBOR_FOUND;
2986 
2987  str = rb_str_new5(orig, RSTRING_PTR(orig), RSTRING_LEN(orig));
2988  rb_enc_cr_str_copy_for_substr(str, orig);
2989  OBJ_INFECT(str, orig);
2990  if (RSTRING_LEN(str) == 0) return str;
2991 
2992  enc = STR_ENC_GET(orig);
2993  sbeg = RSTRING_PTR(str);
2994  s = e = sbeg + RSTRING_LEN(str);
2995 
2996  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
2997  if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
2998  if (ISALPHA(*last_alnum) ? ISDIGIT(*s) :
2999  ISDIGIT(*last_alnum) ? ISALPHA(*s) : 0) {
3000  s = last_alnum;
3001  break;
3002  }
3003  }
3004  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3005  neighbor = enc_succ_alnum_char(s, l, enc, carry);
3006  switch (neighbor) {
3007  case NEIGHBOR_NOT_CHAR:
3008  continue;
3009  case NEIGHBOR_FOUND:
3010  return str;
3011  case NEIGHBOR_WRAPPED:
3012  last_alnum = s;
3013  break;
3014  }
3015  c = 1;
3016  carry_pos = s - sbeg;
3017  carry_len = l;
3018  }
3019  if (c == -1) { /* str contains no alnum */
3020  s = e;
3021  while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
3022  enum neighbor_char neighbor;
3023  if ((l = rb_enc_precise_mbclen(s, e, enc)) <= 0) continue;
3024  neighbor = enc_succ_char(s, l, enc);
3025  if (neighbor == NEIGHBOR_FOUND)
3026  return str;
3027  if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
3028  /* wrapped to \0...\0. search next valid char. */
3029  enc_succ_char(s, l, enc);
3030  }
3031  if (!rb_enc_asciicompat(enc)) {
3032  MEMCPY(carry, s, char, l);
3033  carry_len = l;
3034  }
3035  carry_pos = s - sbeg;
3036  }
3037  }
3038  RESIZE_CAPA(str, RSTRING_LEN(str) + carry_len);
3039  s = RSTRING_PTR(str) + carry_pos;
3040  memmove(s + carry_len, s, RSTRING_LEN(str) - carry_pos);
3041  memmove(s, carry, carry_len);
3042  STR_SET_LEN(str, RSTRING_LEN(str) + carry_len);
3043  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3044  rb_enc_str_coderange(str);
3045  return str;
3046 }
3047 
3048 
3049 /*
3050  * call-seq:
3051  * str.succ! -> str
3052  * str.next! -> str
3053  *
3054  * Equivalent to <code>String#succ</code>, but modifies the receiver in
3055  * place.
3056  */
3057 
3058 static VALUE
3060 {
3062 
3063  return str;
3064 }
3065 
3066 
3067 /*
3068  * call-seq:
3069  * str.upto(other_str, exclusive=false) {|s| block } -> str
3070  * str.upto(other_str, exclusive=false) -> an_enumerator
3071  *
3072  * Iterates through successive values, starting at <i>str</i> and
3073  * ending at <i>other_str</i> inclusive, passing each value in turn to
3074  * the block. The <code>String#succ</code> method is used to generate
3075  * each value. If optional second argument exclusive is omitted or is false,
3076  * the last value will be included; otherwise it will be excluded.
3077  *
3078  * If no block is given, an enumerator is returned instead.
3079  *
3080  * "a8".upto("b6") {|s| print s, ' ' }
3081  * for s in "a8".."b6"
3082  * print s, ' '
3083  * end
3084  *
3085  * <em>produces:</em>
3086  *
3087  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3088  * a8 a9 b0 b1 b2 b3 b4 b5 b6
3089  *
3090  * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
3091  * both are recognized as decimal numbers. In addition, the width of
3092  * string (e.g. leading zeros) is handled appropriately.
3093  *
3094  * "9".upto("11").to_a #=> ["9", "10", "11"]
3095  * "25".upto("5").to_a #=> []
3096  * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"]
3097  */
3098 
3099 static VALUE
3101 {
3102  VALUE end, exclusive;
3103  VALUE current, after_end;
3104  ID succ;
3105  int n, excl, ascii;
3106  rb_encoding *enc;
3107 
3108  rb_scan_args(argc, argv, "11", &end, &exclusive);
3109  RETURN_ENUMERATOR(beg, argc, argv);
3110  excl = RTEST(exclusive);
3111  CONST_ID(succ, "succ");
3112  StringValue(end);
3113  enc = rb_enc_check(beg, end);
3114  ascii = (is_ascii_string(beg) && is_ascii_string(end));
3115  /* single character */
3116  if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
3117  char c = RSTRING_PTR(beg)[0];
3118  char e = RSTRING_PTR(end)[0];
3119 
3120  if (c > e || (excl && c == e)) return beg;
3121  for (;;) {
3122  rb_yield(rb_enc_str_new(&c, 1, enc));
3123  if (!excl && c == e) break;
3124  c++;
3125  if (excl && c == e) break;
3126  }
3127  return beg;
3128  }
3129  /* both edges are all digits */
3130  if (ascii && ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0])) {
3131  char *s, *send;
3132  VALUE b, e;
3133  int width;
3134 
3135  s = RSTRING_PTR(beg); send = RSTRING_END(beg);
3136  width = rb_long2int(send - s);
3137  while (s < send) {
3138  if (!ISDIGIT(*s)) goto no_digits;
3139  s++;
3140  }
3141  s = RSTRING_PTR(end); send = RSTRING_END(end);
3142  while (s < send) {
3143  if (!ISDIGIT(*s)) goto no_digits;
3144  s++;
3145  }
3146  b = rb_str_to_inum(beg, 10, FALSE);
3147  e = rb_str_to_inum(end, 10, FALSE);
3148  if (FIXNUM_P(b) && FIXNUM_P(e)) {
3149  long bi = FIX2LONG(b);
3150  long ei = FIX2LONG(e);
3151  rb_encoding *usascii = rb_usascii_encoding();
3152 
3153  while (bi <= ei) {
3154  if (excl && bi == ei) break;
3155  rb_yield(rb_enc_sprintf(usascii, "%.*ld", width, bi));
3156  bi++;
3157  }
3158  }
3159  else {
3160  ID op = excl ? '<' : rb_intern("<=");
3162 
3163  args[0] = INT2FIX(width);
3164  while (rb_funcall(b, op, 1, e)) {
3165  args[1] = b;
3166  rb_yield(rb_str_format(numberof(args), args, fmt));
3167  b = rb_funcall(b, succ, 0, 0);
3168  }
3169  }
3170  return beg;
3171  }
3172  /* normal case */
3173  no_digits:
3174  n = rb_str_cmp(beg, end);
3175  if (n > 0 || (excl && n == 0)) return beg;
3176 
3177  after_end = rb_funcall(end, succ, 0, 0);
3178  current = rb_str_dup(beg);
3179  while (!rb_str_equal(current, after_end)) {
3180  VALUE next = Qnil;
3181  if (excl || !rb_str_equal(current, end))
3182  next = rb_funcall(current, succ, 0, 0);
3183  rb_yield(current);
3184  if (NIL_P(next)) break;
3185  current = next;
3186  StringValue(current);
3187  if (excl && rb_str_equal(current, end)) break;
3188  if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
3189  break;
3190  }
3191 
3192  return beg;
3193 }
3194 
3195 static VALUE
3196 rb_str_subpat(VALUE str, VALUE re, VALUE backref)
3197 {
3198  if (rb_reg_search(re, str, 0, 0) >= 0) {
3200  int nth = rb_reg_backref_number(match, backref);
3201  return rb_reg_nth_match(nth, match);
3202  }
3203  return Qnil;
3204 }
3205 
3206 static VALUE
3208 {
3209  long idx;
3210 
3211  if (FIXNUM_P(indx)) {
3212  idx = FIX2LONG(indx);
3213 
3214  num_index:
3215  str = rb_str_substr(str, idx, 1);
3216  if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
3217  return str;
3218  }
3219 
3220  if (SPECIAL_CONST_P(indx)) goto generic;
3221  switch (BUILTIN_TYPE(indx)) {
3222  case T_REGEXP:
3223  return rb_str_subpat(str, indx, INT2FIX(0));
3224 
3225  case T_STRING:
3226  if (rb_str_index(str, indx, 0) != -1)
3227  return rb_str_dup(indx);
3228  return Qnil;
3229 
3230  generic:
3231  default:
3232  /* check if indx is Range */
3233  {
3234  long beg, len;
3235  VALUE tmp;
3236 
3237  len = str_strlen(str, STR_ENC_GET(str));
3238  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
3239  case Qfalse:
3240  break;
3241  case Qnil:
3242  return Qnil;
3243  default:
3244  tmp = rb_str_substr(str, beg, len);
3245  return tmp;
3246  }
3247  }
3248  idx = NUM2LONG(indx);
3249  goto num_index;
3250  }
3251 
3252  UNREACHABLE;
3253 }
3254 
3255 
3256 /*
3257  * call-seq:
3258  * str[index] -> new_str or nil
3259  * str[start, length] -> new_str or nil
3260  * str[range] -> new_str or nil
3261  * str[regexp] -> new_str or nil
3262  * str[regexp, capture] -> new_str or nil
3263  * str[match_str] -> new_str or nil
3264  * str.slice(index) -> new_str or nil
3265  * str.slice(start, length) -> new_str or nil
3266  * str.slice(range) -> new_str or nil
3267  * str.slice(regexp) -> new_str or nil
3268  * str.slice(regexp, capture) -> new_str or nil
3269  * str.slice(match_str) -> new_str or nil
3270  *
3271  * Element Reference --- If passed a single +index+, returns a substring of
3272  * one character at that index. If passed a +start+ index and a +length+,
3273  * returns a substring containing +length+ characters starting at the
3274  * +index+. If passed a +range+, its beginning and end are interpreted as
3275  * offsets delimiting the substring to be returned.
3276  *
3277  * In these three cases, if an index is negative, it is counted from the end
3278  * of the string. For the +start+ and +range+ cases the starting index
3279  * is just before a character and an index matching the string's size.
3280  * Additionally, an empty string is returned when the starting index for a
3281  * character range is at the end of the string.
3282  *
3283  * Returns +nil+ if the initial index falls outside the string or the length
3284  * is negative.
3285  *
3286  * If a +Regexp+ is supplied, the matching portion of the string is
3287  * returned. If a +capture+ follows the regular expression, which may be a
3288  * capture group index or name, follows the regular expression that component
3289  * of the MatchData is returned instead.
3290  *
3291  * If a +match_str+ is given, that string is returned if it occurs in
3292  * the string.
3293  *
3294  * Returns +nil+ if the regular expression does not match or the match string
3295  * cannot be found.
3296  *
3297  * a = "hello there"
3298  *
3299  * a[1] #=> "e"
3300  * a[2, 3] #=> "llo"
3301  * a[2..3] #=> "ll"
3302  *
3303  * a[-3, 2] #=> "er"
3304  * a[7..-2] #=> "her"
3305  * a[-4..-2] #=> "her"
3306  * a[-2..-4] #=> ""
3307  *
3308  * a[11, 0] #=> ""
3309  * a[11] #=> nil
3310  * a[12, 0] #=> nil
3311  * a[12..-1] #=> nil
3312  *
3313  * a[/[aeiou](.)\1/] #=> "ell"
3314  * a[/[aeiou](.)\1/, 0] #=> "ell"
3315  * a[/[aeiou](.)\1/, 1] #=> "l"
3316  * a[/[aeiou](.)\1/, 2] #=> nil
3317  *
3318  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "non_vowel"] #=> "l"
3319  * a[/(?<vowel>[aeiou])(?<non_vowel>[^aeiou])/, "vowel"] #=> "e"
3320  *
3321  * a["lo"] #=> "lo"
3322  * a["bye"] #=> nil
3323  */
3324 
3325 static VALUE
3327 {
3328  if (argc == 2) {
3329  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3330  return rb_str_subpat(str, argv[0], argv[1]);
3331  }
3332  return rb_str_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
3333  }
3334  rb_check_arity(argc, 1, 2);
3335  return rb_str_aref(str, argv[0]);
3336 }
3337 
3338 VALUE
3339 rb_str_drop_bytes(VALUE str, long len)
3340 {
3341  char *ptr = RSTRING_PTR(str);
3342  long olen = RSTRING_LEN(str), nlen;
3343 
3344  str_modifiable(str);
3345  if (len > olen) len = olen;
3346  nlen = olen - len;
3347  if (nlen <= RSTRING_EMBED_LEN_MAX) {
3348  char *oldptr = ptr;
3349  int fl = (int)(RBASIC(str)->flags & (STR_NOEMBED|ELTS_SHARED));
3350  STR_SET_EMBED(str);
3351  STR_SET_EMBED_LEN(str, nlen);
3352  ptr = RSTRING(str)->as.ary;
3353  memmove(ptr, oldptr + len, nlen);
3354  if (fl == STR_NOEMBED) xfree(oldptr);
3355  }
3356  else {
3357  if (!STR_SHARED_P(str)) rb_str_new4(str);
3358  ptr = RSTRING(str)->as.heap.ptr += len;
3359  RSTRING(str)->as.heap.len = nlen;
3360  }
3361  ptr[nlen] = 0;
3362  ENC_CODERANGE_CLEAR(str);
3363  return str;
3364 }
3365 
3366 static void
3367 rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
3368 {
3369  if (beg == 0 && RSTRING_LEN(val) == 0) {
3370  rb_str_drop_bytes(str, len);
3371  OBJ_INFECT(str, val);
3372  return;
3373  }
3374 
3375  rb_str_modify(str);
3376  if (len < RSTRING_LEN(val)) {
3377  /* expand string */
3378  RESIZE_CAPA(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len + 1);
3379  }
3380 
3381  if (RSTRING_LEN(val) != len) {
3382  memmove(RSTRING_PTR(str) + beg + RSTRING_LEN(val),
3383  RSTRING_PTR(str) + beg + len,
3384  RSTRING_LEN(str) - (beg + len));
3385  }
3386  if (RSTRING_LEN(val) < beg && len < 0) {
3387  MEMZERO(RSTRING_PTR(str) + RSTRING_LEN(str), char, -len);
3388  }
3389  if (RSTRING_LEN(val) > 0) {
3390  memmove(RSTRING_PTR(str)+beg, RSTRING_PTR(val), RSTRING_LEN(val));
3391  }
3392  STR_SET_LEN(str, RSTRING_LEN(str) + RSTRING_LEN(val) - len);
3393  if (RSTRING_PTR(str)) {
3394  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
3395  }
3396  OBJ_INFECT(str, val);
3397 }
3398 
3399 static void
3400 rb_str_splice(VALUE str, long beg, long len, VALUE val)
3401 {
3402  long slen;
3403  char *p, *e;
3404  rb_encoding *enc;
3405  int singlebyte = single_byte_optimizable(str);
3406  int cr;
3407 
3408  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
3409 
3410  StringValue(val);
3411  enc = rb_enc_check(str, val);
3412  slen = str_strlen(str, enc);
3413 
3414  if (slen < beg) {
3415  out_of_range:
3416  rb_raise(rb_eIndexError, "index %ld out of string", beg);
3417  }
3418  if (beg < 0) {
3419  if (-beg > slen) {
3420  goto out_of_range;
3421  }
3422  beg += slen;
3423  }
3424  if (slen < len || slen < beg + len) {
3425  len = slen - beg;
3426  }
3427  str_modify_keep_cr(str);
3428  p = str_nth(RSTRING_PTR(str), RSTRING_END(str), beg, enc, singlebyte);
3429  if (!p) p = RSTRING_END(str);
3430  e = str_nth(p, RSTRING_END(str), len, enc, singlebyte);
3431  if (!e) e = RSTRING_END(str);
3432  /* error check */
3433  beg = p - RSTRING_PTR(str); /* physical position */
3434  len = e - p; /* physical length */
3435  rb_str_splice_0(str, beg, len, val);
3436  rb_enc_associate(str, enc);
3438  if (cr != ENC_CODERANGE_BROKEN)
3439  ENC_CODERANGE_SET(str, cr);
3440 }
3441 
3442 void
3443 rb_str_update(VALUE str, long beg, long len, VALUE val)
3444 {
3445  rb_str_splice(str, beg, len, val);
3446 }
3447 
3448 static void
3450 {
3451  int nth;
3452  VALUE match;
3453  long start, end, len;
3454  rb_encoding *enc;
3455  struct re_registers *regs;
3456 
3457  if (rb_reg_search(re, str, 0, 0) < 0) {
3458  rb_raise(rb_eIndexError, "regexp not matched");
3459  }
3460  match = rb_backref_get();
3461  nth = rb_reg_backref_number(match, backref);
3462  regs = RMATCH_REGS(match);
3463  if (nth >= regs->num_regs) {
3464  out_of_range:
3465  rb_raise(rb_eIndexError, "index %d out of regexp", nth);
3466  }
3467  if (nth < 0) {
3468  if (-nth >= regs->num_regs) {
3469  goto out_of_range;
3470  }
3471  nth += regs->num_regs;
3472  }
3473 
3474  start = BEG(nth);
3475  if (start == -1) {
3476  rb_raise(rb_eIndexError, "regexp group %d not matched", nth);
3477  }
3478  end = END(nth);
3479  len = end - start;
3480  StringValue(val);
3481  enc = rb_enc_check(str, val);
3482  rb_str_splice_0(str, start, len, val);
3483  rb_enc_associate(str, enc);
3484 }
3485 
3486 static VALUE
3488 {
3489  long idx, beg;
3490 
3491  if (FIXNUM_P(indx)) {
3492  idx = FIX2LONG(indx);
3493  num_index:
3494  rb_str_splice(str, idx, 1, val);
3495  return val;
3496  }
3497 
3498  if (SPECIAL_CONST_P(indx)) goto generic;
3499  switch (TYPE(indx)) {
3500  case T_REGEXP:
3501  rb_str_subpat_set(str, indx, INT2FIX(0), val);
3502  return val;
3503 
3504  case T_STRING:
3505  beg = rb_str_index(str, indx, 0);
3506  if (beg < 0) {
3507  rb_raise(rb_eIndexError, "string not matched");
3508  }
3509  beg = rb_str_sublen(str, beg);
3510  rb_str_splice(str, beg, str_strlen(indx, 0), val);
3511  return val;
3512 
3513  generic:
3514  default:
3515  /* check if indx is Range */
3516  {
3517  long beg, len;
3518  if (rb_range_beg_len(indx, &beg, &len, str_strlen(str, 0), 2)) {
3519  rb_str_splice(str, beg, len, val);
3520  return val;
3521  }
3522  }
3523  idx = NUM2LONG(indx);
3524  goto num_index;
3525  }
3526 }
3527 
3528 /*
3529  * call-seq:
3530  * str[fixnum] = new_str
3531  * str[fixnum, fixnum] = new_str
3532  * str[range] = aString
3533  * str[regexp] = new_str
3534  * str[regexp, fixnum] = new_str
3535  * str[regexp, name] = new_str
3536  * str[other_str] = new_str
3537  *
3538  * Element Assignment---Replaces some or all of the content of <i>str</i>. The
3539  * portion of the string affected is determined using the same criteria as
3540  * <code>String#[]</code>. If the replacement string is not the same length as
3541  * the text it is replacing, the string will be adjusted accordingly. If the
3542  * regular expression or string is used as the index doesn't match a position
3543  * in the string, <code>IndexError</code> is raised. If the regular expression
3544  * form is used, the optional second <code>Fixnum</code> allows you to specify
3545  * which portion of the match to replace (effectively using the
3546  * <code>MatchData</code> indexing rules. The forms that take a
3547  * <code>Fixnum</code> will raise an <code>IndexError</code> if the value is
3548  * out of range; the <code>Range</code> form will raise a
3549  * <code>RangeError</code>, and the <code>Regexp</code> and <code>String</code>
3550  * will raise an <code>IndexError</code> on negative match.
3551  */
3552 
3553 static VALUE
3555 {
3556  if (argc == 3) {
3557  if (RB_TYPE_P(argv[0], T_REGEXP)) {
3558  rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
3559  }
3560  else {
3561  rb_str_splice(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]), argv[2]);
3562  }
3563  return argv[2];
3564  }
3565  rb_check_arity(argc, 2, 3);
3566  return rb_str_aset(str, argv[0], argv[1]);
3567 }
3568 
3569 /*
3570  * call-seq:
3571  * str.insert(index, other_str) -> str
3572  *
3573  * Inserts <i>other_str</i> before the character at the given
3574  * <i>index</i>, modifying <i>str</i>. Negative indices count from the
3575  * end of the string, and insert <em>after</em> the given character.
3576  * The intent is insert <i>aString</i> so that it starts at the given
3577  * <i>index</i>.
3578  *
3579  * "abcd".insert(0, 'X') #=> "Xabcd"
3580  * "abcd".insert(3, 'X') #=> "abcXd"
3581  * "abcd".insert(4, 'X') #=> "abcdX"
3582  * "abcd".insert(-3, 'X') #=> "abXcd"
3583  * "abcd".insert(-1, 'X') #=> "abcdX"
3584  */
3585 
3586 static VALUE
3588 {
3589  long pos = NUM2LONG(idx);
3590 
3591  if (pos == -1) {
3592  return rb_str_append(str, str2);
3593  }
3594  else if (pos < 0) {
3595  pos++;
3596  }
3597  rb_str_splice(str, pos, 0, str2);
3598  return str;
3599 }
3600 
3601 
3602 /*
3603  * call-seq:
3604  * str.slice!(fixnum) -> fixnum or nil
3605  * str.slice!(fixnum, fixnum) -> new_str or nil
3606  * str.slice!(range) -> new_str or nil
3607  * str.slice!(regexp) -> new_str or nil
3608  * str.slice!(other_str) -> new_str or nil
3609  *
3610  * Deletes the specified portion from <i>str</i>, and returns the portion
3611  * deleted.
3612  *
3613  * string = "this is a string"
3614  * string.slice!(2) #=> "i"
3615  * string.slice!(3..6) #=> " is "
3616  * string.slice!(/s.*t/) #=> "sa st"
3617  * string.slice!("r") #=> "r"
3618  * string #=> "thing"
3619  */
3620 
3621 static VALUE
3623 {
3624  VALUE result;
3625  VALUE buf[3];
3626  int i;
3627 
3628  rb_check_arity(argc, 1, 2);
3629  for (i=0; i<argc; i++) {
3630  buf[i] = argv[i];
3631  }
3632  str_modify_keep_cr(str);
3633  result = rb_str_aref_m(argc, buf, str);
3634  if (!NIL_P(result)) {
3635  buf[i] = rb_str_new(0,0);
3636  rb_str_aset_m(argc+1, buf, str);
3637  }
3638  return result;
3639 }
3640 
3641 static VALUE
3642 get_pat(VALUE pat, int quote)
3643 {
3644  VALUE val;
3645 
3646  switch (TYPE(pat)) {
3647  case T_REGEXP:
3648  return pat;
3649 
3650  case T_STRING:
3651  break;
3652 
3653  default:
3654  val = rb_check_string_type(pat);
3655  if (NIL_P(val)) {
3656  Check_Type(pat, T_REGEXP);
3657  }
3658  pat = val;
3659  }
3660 
3661  if (quote) {
3662  pat = rb_reg_quote(pat);
3663  }
3664 
3665  return rb_reg_regcomp(pat);
3666 }
3667 
3668 
3669 /*
3670  * call-seq:
3671  * str.sub!(pattern, replacement) -> str or nil
3672  * str.sub!(pattern) {|match| block } -> str or nil
3673  *
3674  * Performs the same substitution as String#sub in-place.
3675  *
3676  * Returns +str+ if a substitution was performed or +nil+ if no substitution
3677  * was performed.
3678  */
3679 
3680 static VALUE
3682 {
3683  VALUE pat, repl, hash = Qnil;
3684  int iter = 0;
3685  int tainted = 0;
3686  int untrusted = 0;
3687  long plen;
3688  int min_arity = rb_block_given_p() ? 1 : 2;
3689 
3690  rb_check_arity(argc, min_arity, 2);
3691  if (argc == 1) {
3692  iter = 1;
3693  }
3694  else {
3695  repl = argv[1];
3696  hash = rb_check_hash_type(argv[1]);
3697  if (NIL_P(hash)) {
3698  StringValue(repl);
3699  }
3700  if (OBJ_TAINTED(repl)) tainted = 1;
3701  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3702  }
3703 
3704  pat = get_pat(argv[0], 1);
3705  str_modifiable(str);
3706  if (rb_reg_search(pat, str, 0, 0) >= 0) {
3707  rb_encoding *enc;
3708  int cr = ENC_CODERANGE(str);
3710  struct re_registers *regs = RMATCH_REGS(match);
3711  long beg0 = BEG(0);
3712  long end0 = END(0);
3713  char *p, *rp;
3714  long len, rlen;
3715 
3716  if (iter || !NIL_P(hash)) {
3717  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3718 
3719  if (iter) {
3720  repl = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3721  }
3722  else {
3723  repl = rb_hash_aref(hash, rb_str_subseq(str, beg0, end0 - beg0));
3724  repl = rb_obj_as_string(repl);
3725  }
3726  str_mod_check(str, p, len);
3727  rb_check_frozen(str);
3728  }
3729  else {
3730  repl = rb_reg_regsub(repl, str, regs, pat);
3731  }
3732  enc = rb_enc_compatible(str, repl);
3733  if (!enc) {
3735  p = RSTRING_PTR(str); len = RSTRING_LEN(str);
3736  if (coderange_scan(p, beg0, str_enc) != ENC_CODERANGE_7BIT ||
3737  coderange_scan(p+end0, len-end0, str_enc) != ENC_CODERANGE_7BIT) {
3738  rb_raise(rb_eEncCompatError, "incompatible character encodings: %s and %s",
3739  rb_enc_name(str_enc),
3740  rb_enc_name(STR_ENC_GET(repl)));
3741  }
3742  enc = STR_ENC_GET(repl);
3743  }
3744  rb_str_modify(str);
3745  rb_enc_associate(str, enc);
3746  if (OBJ_TAINTED(repl)) tainted = 1;
3747  if (OBJ_UNTRUSTED(repl)) untrusted = 1;
3748  if (ENC_CODERANGE_UNKNOWN < cr && cr < ENC_CODERANGE_BROKEN) {
3749  int cr2 = ENC_CODERANGE(repl);
3750  if (cr2 == ENC_CODERANGE_BROKEN ||
3751  (cr == ENC_CODERANGE_VALID && cr2 == ENC_CODERANGE_7BIT))
3752  cr = ENC_CODERANGE_UNKNOWN;
3753  else
3754  cr = cr2;
3755  }
3756  plen = end0 - beg0;
3757  rp = RSTRING_PTR(repl); rlen = RSTRING_LEN(repl);
3758  len = RSTRING_LEN(str);
3759  if (rlen > plen) {
3760  RESIZE_CAPA(str, len + rlen - plen);
3761  }
3762  p = RSTRING_PTR(str);
3763  if (rlen != plen) {
3764  memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
3765  }
3766  memcpy(p + beg0, rp, rlen);
3767  len += rlen - plen;
3768  STR_SET_LEN(str, len);
3769  RSTRING_PTR(str)[len] = '\0';
3770  ENC_CODERANGE_SET(str, cr);
3771  if (tainted) OBJ_TAINT(str);
3772  if (untrusted) OBJ_UNTRUST(str);
3773 
3774  return str;
3775  }
3776  return Qnil;
3777 }
3778 
3779 
3780 /*
3781  * call-seq:
3782  * str.sub(pattern, replacement) -> new_str
3783  * str.sub(pattern, hash) -> new_str
3784  * str.sub(pattern) {|match| block } -> new_str
3785  *
3786  * Returns a copy of +str+ with the _first_ occurrence of +pattern+
3787  * replaced by the second argument. The +pattern+ is typically a Regexp; if
3788  * given as a String, any regular expression metacharacters it contains will
3789  * be interpreted literally, e.g. <code>'\\\d'</code> will match a backlash
3790  * followed by 'd', instead of a digit.
3791  *
3792  * If +replacement+ is a String it will be substituted for the matched text.
3793  * It may contain back-references to the pattern's capture groups of the form
3794  * <code>"\\d"</code>, where <i>d</i> is a group number, or
3795  * <code>"\\k<n>"</code>, where <i>n</i> is a group name. If it is a
3796  * double-quoted string, both back-references must be preceded by an
3797  * additional backslash. However, within +replacement+ the special match
3798  * variables, such as <code>&$</code>, will not refer to the current match.
3799  *
3800  * If the second argument is a Hash, and the matched text is one of its keys,
3801  * the corresponding value is the replacement string.
3802  *
3803  * In the block form, the current match string is passed in as a parameter,
3804  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3805  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3806  * returned by the block will be substituted for the match on each call.
3807  *
3808  * The result inherits any tainting in the original string or any supplied
3809  * replacement string.
3810  *
3811  * "hello".sub(/[aeiou]/, '*') #=> "h*llo"
3812  * "hello".sub(/([aeiou])/, '<\1>') #=> "h<e>llo"
3813  * "hello".sub(/./) {|s| s.ord.to_s + ' ' } #=> "104 ello"
3814  * "hello".sub(/(?<foo>[aeiou])/, '*\k<foo>*') #=> "h*e*llo"
3815  * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV)
3816  * #=> "Is /bin/bash your preferred shell?"
3817  */
3818 
3819 static VALUE
3821 {
3822  str = rb_str_dup(str);
3823  rb_str_sub_bang(argc, argv, str);
3824  return str;
3825 }
3826 
3827 static VALUE
3828 str_gsub(int argc, VALUE *argv, VALUE str, int bang)
3829 {
3830  VALUE pat, val, repl, match, dest, hash = Qnil;
3831  struct re_registers *regs;
3832  long beg, n;
3833  long beg0, end0;
3834  long offset, blen, slen, len, last;
3835  int iter = 0;
3836  char *sp, *cp;
3837  int tainted = 0;
3839 
3840  switch (argc) {
3841  case 1:
3842  RETURN_ENUMERATOR(str, argc, argv);
3843  iter = 1;
3844  break;
3845  case 2:
3846  repl = argv[1];
3847  hash = rb_check_hash_type(argv[1]);
3848  if (NIL_P(hash)) {
3849  StringValue(repl);
3850  }
3851  if (OBJ_TAINTED(repl)) tainted = 1;
3852  break;
3853  default:
3854  rb_check_arity(argc, 1, 2);
3855  }
3856 
3857  pat = get_pat(argv[0], 1);
3858  beg = rb_reg_search(pat, str, 0, 0);
3859  if (beg < 0) {
3860  if (bang) return Qnil; /* no match, no substitution */
3861  return rb_str_dup(str);
3862  }
3863 
3864  offset = 0;
3865  n = 0;
3866  blen = RSTRING_LEN(str) + 30; /* len + margin */
3867  dest = rb_str_buf_new(blen);
3868  sp = RSTRING_PTR(str);
3869  slen = RSTRING_LEN(str);
3870  cp = sp;
3871  str_enc = STR_ENC_GET(str);
3872  rb_enc_associate(dest, str_enc);
3874 
3875  do {
3876  n++;
3877  match = rb_backref_get();
3878  regs = RMATCH_REGS(match);
3879  beg0 = BEG(0);
3880  end0 = END(0);
3881  if (iter || !NIL_P(hash)) {
3882  if (iter) {
3883  val = rb_obj_as_string(rb_yield(rb_reg_nth_match(0, match)));
3884  }
3885  else {
3886  val = rb_hash_aref(hash, rb_str_subseq(str, BEG(0), END(0) - BEG(0)));
3887  val = rb_obj_as_string(val);
3888  }
3889  str_mod_check(str, sp, slen);
3890  if (val == dest) { /* paranoid check [ruby-dev:24827] */
3891  rb_raise(rb_eRuntimeError, "block should not cheat");
3892  }
3893  }
3894  else {
3895  val = rb_reg_regsub(repl, str, regs, pat);
3896  }
3897 
3898  if (OBJ_TAINTED(val)) tainted = 1;
3899 
3900  len = beg - offset; /* copy pre-match substr */
3901  if (len) {
3902  rb_enc_str_buf_cat(dest, cp, len, str_enc);
3903  }
3904 
3905  rb_str_buf_append(dest, val);
3906 
3907  last = offset;
3908  offset = end0;
3909  if (beg0 == end0) {
3910  /*
3911  * Always consume at least one character of the input string
3912  * in order to prevent infinite loops.
3913  */
3914  if (RSTRING_LEN(str) <= end0) break;
3915  len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0, RSTRING_END(str), str_enc);
3916  rb_enc_str_buf_cat(dest, RSTRING_PTR(str)+end0, len, str_enc);
3917  offset = end0 + len;
3918  }
3919  cp = RSTRING_PTR(str) + offset;
3920  if (offset > RSTRING_LEN(str)) break;
3921  beg = rb_reg_search(pat, str, offset, 0);
3922  } while (beg >= 0);
3923  if (RSTRING_LEN(str) > offset) {
3924  rb_enc_str_buf_cat(dest, cp, RSTRING_LEN(str) - offset, str_enc);
3925  }
3926  rb_reg_search(pat, str, last, 0);
3927  if (bang) {
3928  rb_str_shared_replace(str, dest);
3929  }
3930  else {
3931  RBASIC(dest)->klass = rb_obj_class(str);
3932  OBJ_INFECT(dest, str);
3933  str = dest;
3934  }
3935 
3936  if (tainted) OBJ_TAINT(str);
3937  return str;
3938 }
3939 
3940 
3941 /*
3942  * call-seq:
3943  * str.gsub!(pattern, replacement) -> str or nil
3944  * str.gsub!(pattern) {|match| block } -> str or nil
3945  * str.gsub!(pattern) -> an_enumerator
3946  *
3947  * Performs the substitutions of <code>String#gsub</code> in place, returning
3948  * <i>str</i>, or <code>nil</code> if no substitutions were performed.
3949  * If no block and no <i>replacement</i> is given, an enumerator is returned instead.
3950  */
3951 
3952 static VALUE
3954 {
3955  str_modify_keep_cr(str);
3956  return str_gsub(argc, argv, str, 1);
3957 }
3958 
3959 
3960 /*
3961  * call-seq:
3962  * str.gsub(pattern, replacement) -> new_str
3963  * str.gsub(pattern, hash) -> new_str
3964  * str.gsub(pattern) {|match| block } -> new_str
3965  * str.gsub(pattern) -> enumerator
3966  *
3967  * Returns a copy of <i>str</i> with the <em>all</em> occurrences of
3968  * <i>pattern</i> substituted for the second argument. The <i>pattern</i> is
3969  * typically a <code>Regexp</code>; if given as a <code>String</code>, any
3970  * regular expression metacharacters it contains will be interpreted
3971  * literally, e.g. <code>'\\\d'</code> will match a backlash followed by 'd',
3972  * instead of a digit.
3973  *
3974  * If <i>replacement</i> is a <code>String</code> it will be substituted for
3975  * the matched text. It may contain back-references to the pattern's capture
3976  * groups of the form <code>\\\d</code>, where <i>d</i> is a group number, or
3977  * <code>\\\k<n></code>, where <i>n</i> is a group name. If it is a
3978  * double-quoted string, both back-references must be preceded by an
3979  * additional backslash. However, within <i>replacement</i> the special match
3980  * variables, such as <code>$&</code>, will not refer to the current match.
3981  *
3982  * If the second argument is a <code>Hash</code>, and the matched text is one
3983  * of its keys, the corresponding value is the replacement string.
3984  *
3985  * In the block form, the current match string is passed in as a parameter,
3986  * and variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
3987  * <code>$&</code>, and <code>$'</code> will be set appropriately. The value
3988  * returned by the block will be substituted for the match on each call.
3989  *
3990  * The result inherits any tainting in the original string or any supplied
3991  * replacement string.
3992  *
3993  * When neither a block nor a second argument is supplied, an
3994  * <code>Enumerator</code> is returned.
3995  *
3996  * "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
3997  * "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
3998  * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 "
3999  * "hello".gsub(/(?<foo>[aeiou])/, '{\k<foo>}') #=> "h{e}ll{o}"
4000  * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*"
4001  */
4002 
4003 static VALUE
4005 {
4006  return str_gsub(argc, argv, str, 0);
4007 }
4008 
4009 
4010 /*
4011  * call-seq:
4012  * str.replace(other_str) -> str
4013  *
4014  * Replaces the contents and taintedness of <i>str</i> with the corresponding
4015  * values in <i>other_str</i>.
4016  *
4017  * s = "hello" #=> "hello"
4018  * s.replace "world" #=> "world"
4019  */
4020 
4021 VALUE
4023 {
4024  str_modifiable(str);
4025  if (str == str2) return str;
4026 
4027  StringValue(str2);
4028  str_discard(str);
4029  return str_replace(str, str2);
4030 }
4031 
4032 /*
4033  * call-seq:
4034  * string.clear -> string
4035  *
4036  * Makes string empty.
4037  *
4038  * a = "abcde"
4039  * a.clear #=> ""
4040  */
4041 
4042 static VALUE
4044 {
4045  str_discard(str);
4046  STR_SET_EMBED(str);
4047  STR_SET_EMBED_LEN(str, 0);
4048  RSTRING_PTR(str)[0] = 0;
4049  if (rb_enc_asciicompat(STR_ENC_GET(str)))
4051  else
4053  return str;
4054 }
4055 
4056 /*
4057  * call-seq:
4058  * string.chr -> string
4059  *
4060  * Returns a one-character string at the beginning of the string.
4061  *
4062  * a = "abcde"
4063  * a.chr #=> "a"
4064  */
4065 
4066 static VALUE
4068 {
4069  return rb_str_substr(str, 0, 1);
4070 }
4071 
4072 /*
4073  * call-seq:
4074  * str.getbyte(index) -> 0 .. 255
4075  *
4076  * returns the <i>index</i>th byte as an integer.
4077  */
4078 static VALUE
4080 {
4081  long pos = NUM2LONG(index);
4082 
4083  if (pos < 0)
4084  pos += RSTRING_LEN(str);
4085  if (pos < 0 || RSTRING_LEN(str) <= pos)
4086  return Qnil;
4087 
4088  return INT2FIX((unsigned char)RSTRING_PTR(str)[pos]);
4089 }
4090 
4091 /*
4092  * call-seq:
4093  * str.setbyte(index, integer) -> integer
4094  *
4095  * modifies the <i>index</i>th byte as <i>integer</i>.
4096  */
4097 static VALUE
4099 {
4100  long pos = NUM2LONG(index);
4101  int byte = NUM2INT(value);
4102 
4103  rb_str_modify(str);
4104 
4105  if (pos < -RSTRING_LEN(str) || RSTRING_LEN(str) <= pos)
4106  rb_raise(rb_eIndexError, "index %ld out of string", pos);
4107  if (pos < 0)
4108  pos += RSTRING_LEN(str);
4109 
4110  RSTRING_PTR(str)[pos] = byte;
4111 
4112  return value;
4113 }
4114 
4115 static VALUE
4116 str_byte_substr(VALUE str, long beg, long len)
4117 {
4118  char *p, *s = RSTRING_PTR(str);
4119  long n = RSTRING_LEN(str);
4120  VALUE str2;
4121 
4122  if (beg > n || len < 0) return Qnil;
4123  if (beg < 0) {
4124  beg += n;
4125  if (beg < 0) return Qnil;
4126  }
4127  if (beg + len > n)
4128  len = n - beg;
4129  if (len <= 0) {
4130  len = 0;
4131  p = 0;
4132  }
4133  else
4134  p = s + beg;
4135 
4136  if (len > RSTRING_EMBED_LEN_MAX && beg + len == n) {
4137  str2 = rb_str_new4(str);
4138  str2 = str_new3(rb_obj_class(str2), str2);
4139  RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
4140  RSTRING(str2)->as.heap.len = len;
4141  }
4142  else {
4143  str2 = rb_str_new5(str, p, len);
4144  }
4145 
4146  str_enc_copy(str2, str);
4147 
4148  if (RSTRING_LEN(str2) == 0) {
4149  if (!rb_enc_asciicompat(STR_ENC_GET(str)))
4151  else
4153  }
4154  else {
4155  switch (ENC_CODERANGE(str)) {
4156  case ENC_CODERANGE_7BIT:
4158  break;
4159  default:
4161  break;
4162  }
4163  }
4164 
4165  OBJ_INFECT(str2, str);
4166 
4167  return str2;
4168 }
4169 
4170 static VALUE
4172 {
4173  long idx;
4174  switch (TYPE(indx)) {
4175  case T_FIXNUM:
4176  idx = FIX2LONG(indx);
4177 
4178  num_index:
4179  str = str_byte_substr(str, idx, 1);
4180  if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil;
4181  return str;
4182 
4183  default:
4184  /* check if indx is Range */
4185  {
4186  long beg, len = RSTRING_LEN(str);
4187 
4188  switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
4189  case Qfalse:
4190  break;
4191  case Qnil:
4192  return Qnil;
4193  default:
4194  return str_byte_substr(str, beg, len);
4195  }
4196  }
4197  idx = NUM2LONG(indx);
4198  goto num_index;
4199  }
4200 
4201  UNREACHABLE;
4202 }
4203 
4204 /*
4205  * call-seq:
4206  * str.byteslice(fixnum) -> new_str or nil
4207  * str.byteslice(fixnum, fixnum) -> new_str or nil
4208  * str.byteslice(range) -> new_str or nil
4209  *
4210  * Byte Reference---If passed a single <code>Fixnum</code>, returns a
4211  * substring of one byte at that position. If passed two <code>Fixnum</code>
4212  * objects, returns a substring starting at the offset given by the first, and
4213  * a length given by the second. If given a <code>Range</code>, a substring containing
4214  * bytes at offsets given by the range is returned. In all three cases, if
4215  * an offset is negative, it is counted from the end of <i>str</i>. Returns
4216  * <code>nil</code> if the initial offset falls outside the string, the length
4217  * is negative, or the beginning of the range is greater than the end.
4218  * The encoding of the resulted string keeps original encoding.
4219  *
4220  * "hello".byteslice(1) #=> "e"
4221  * "hello".byteslice(-1) #=> "o"
4222  * "hello".byteslice(1, 2) #=> "el"
4223  * "\x80\u3042".byteslice(1, 3) #=> "\u3042"
4224  * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042"
4225  */
4226 
4227 static VALUE
4229 {
4230  if (argc == 2) {
4231  return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
4232  }
4233  rb_check_arity(argc, 1, 2);
4234  return str_byte_aref(str, argv[0]);
4235 }
4236 
4237 /*
4238  * call-seq:
4239  * str.reverse -> new_str
4240  *
4241  * Returns a new string with the characters from <i>str</i> in reverse order.
4242  *
4243  * "stressed".reverse #=> "desserts"
4244  */
4245 
4246 static VALUE
4248 {
4249  rb_encoding *enc;
4250  VALUE rev;
4251  char *s, *e, *p;
4252  int single = 1;
4253 
4254  if (RSTRING_LEN(str) <= 1) return rb_str_dup(str);
4255  enc = STR_ENC_GET(str);
4256  rev = rb_str_new5(str, 0, RSTRING_LEN(str));
4257  s = RSTRING_PTR(str); e = RSTRING_END(str);
4258  p = RSTRING_END(rev);
4259 
4260  if (RSTRING_LEN(str) > 1) {
4261  if (single_byte_optimizable(str)) {
4262  while (s < e) {
4263  *--p = *s++;
4264  }
4265  }
4266  else if (ENC_CODERANGE(str) == ENC_CODERANGE_VALID) {
4267  while (s < e) {
4268  int clen = rb_enc_fast_mbclen(s, e, enc);
4269 
4270  if (clen > 1 || (*s & 0x80)) single = 0;
4271  p -= clen;
4272  memcpy(p, s, clen);
4273  s += clen;
4274  }
4275  }
4276  else {
4277  while (s < e) {
4278  int clen = rb_enc_mbclen(s, e, enc);
4279 
4280  if (clen > 1 || (*s & 0x80)) single = 0;
4281  p -= clen;
4282  memcpy(p, s, clen);
4283  s += clen;
4284  }
4285  }
4286  }
4287  STR_SET_LEN(rev, RSTRING_LEN(str));
4288  OBJ_INFECT(rev, str);
4289  if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) {
4290  if (single) {
4292  }
4293  else {
4295  }
4296  }
4298 
4299  return rev;
4300 }
4301 
4302 
4303 /*
4304  * call-seq:
4305  * str.reverse! -> str
4306  *
4307  * Reverses <i>str</i> in place.
4308  */
4309 
4310 static VALUE
4312 {
4313  if (RSTRING_LEN(str) > 1) {
4314  if (single_byte_optimizable(str)) {
4315  char *s, *e, c;
4316 
4317  str_modify_keep_cr(str);
4318  s = RSTRING_PTR(str);
4319  e = RSTRING_END(str) - 1;
4320  while (s < e) {
4321  c = *s;
4322  *s++ = *e;
4323  *e-- = c;
4324  }
4325  }
4326  else {
4328  }
4329  }
4330  else {
4331  str_modify_keep_cr(str);
4332  }
4333  return str;
4334 }
4335 
4336 
4337 /*
4338  * call-seq:
4339  * str.include? other_str -> true or false
4340  *
4341  * Returns <code>true</code> if <i>str</i> contains the given string or
4342  * character.
4343  *
4344  * "hello".include? "lo" #=> true
4345  * "hello".include? "ol" #=> false
4346  * "hello".include? ?h #=> true
4347  */
4348 
4349 static VALUE
4351 {
4352  long i;
4353 
4354  StringValue(arg);
4355  i = rb_str_index(str, arg, 0);
4356 
4357  if (i == -1) return Qfalse;
4358  return Qtrue;
4359 }
4360 
4361 
4362 /*
4363  * call-seq:
4364  * str.to_i(base=10) -> integer
4365  *
4366  * Returns the result of interpreting leading characters in <i>str</i> as an
4367  * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
4368  * end of a valid number are ignored. If there is not a valid number at the
4369  * start of <i>str</i>, <code>0</code> is returned. This method never raises an
4370  * exception when <i>base</i> is valid.
4371  *
4372  * "12345".to_i #=> 12345
4373  * "99 red balloons".to_i #=> 99
4374  * "0a".to_i #=> 0
4375  * "0a".to_i(16) #=> 10
4376  * "hello".to_i #=> 0
4377  * "1100101".to_i(2) #=> 101
4378  * "1100101".to_i(8) #=> 294977
4379  * "1100101".to_i(10) #=> 1100101
4380  * "1100101".to_i(16) #=> 17826049
4381  */
4382 
4383 static VALUE
4385 {
4386  int base;
4387 
4388  if (argc == 0) base = 10;
4389  else {
4390  VALUE b;
4391 
4392  rb_scan_args(argc, argv, "01", &b);
4393  base = NUM2INT(b);
4394  }
4395  if (base < 0) {
4396  rb_raise(rb_eArgError, "invalid radix %d", base);
4397  }
4398  return rb_str_to_inum(str, base, FALSE);
4399 }
4400 
4401 
4402 /*
4403  * call-seq:
4404  * str.to_f -> float
4405  *
4406  * Returns the result of interpreting leading characters in <i>str</i> as a
4407  * floating point number. Extraneous characters past the end of a valid number
4408  * are ignored. If there is not a valid number at the start of <i>str</i>,
4409  * <code>0.0</code> is returned. This method never raises an exception.
4410  *
4411  * "123.45e1".to_f #=> 1234.5
4412  * "45.67 degrees".to_f #=> 45.67
4413  * "thx1138".to_f #=> 0.0
4414  */
4415 
4416 static VALUE
4418 {
4419  return DBL2NUM(rb_str_to_dbl(str, FALSE));
4420 }
4421 
4422 
4423 /*
4424  * call-seq:
4425  * str.to_s -> str
4426  * str.to_str -> str
4427  *
4428  * Returns the receiver.
4429  */
4430 
4431 static VALUE
4433 {
4434  if (rb_obj_class(str) != rb_cString) {
4435  return str_duplicate(rb_cString, str);
4436  }
4437  return str;
4438 }
4439 
4440 #if 0
4441 static void
4442 str_cat_char(VALUE str, unsigned int c, rb_encoding *enc)
4443 {
4444  char s[RUBY_MAX_CHAR_LEN];
4445  int n = rb_enc_codelen(c, enc);
4446 
4447  rb_enc_mbcput(c, s, enc);
4448  rb_enc_str_buf_cat(str, s, n, enc);
4449 }
4450 #endif
4451 
4452 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
4453 
4454 int
4455 rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
4456 {
4457  char buf[CHAR_ESC_LEN + 1];
4458  int l;
4459 
4460 #if SIZEOF_INT > 4
4461  c &= 0xffffffff;
4462 #endif
4463  if (unicode_p) {
4464  if (c < 0x7F && ISPRINT(c)) {
4465  snprintf(buf, CHAR_ESC_LEN, "%c", c);
4466  }
4467  else if (c < 0x10000) {
4468  snprintf(buf, CHAR_ESC_LEN, "\\u%04X", c);
4469  }
4470  else {
4471  snprintf(buf, CHAR_ESC_LEN, "\\u{%X}", c);
4472  }
4473  }
4474  else {
4475  if (c < 0x100) {
4476  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", c);
4477  }
4478  else {
4479  snprintf(buf, CHAR_ESC_LEN, "\\x{%X}", c);
4480  }
4481  }
4482  l = (int)strlen(buf); /* CHAR_ESC_LEN cannot exceed INT_MAX */
4483  rb_str_buf_cat(result, buf, l);
4484  return l;
4485 }
4486 
4487 /*
4488  * call-seq:
4489  * str.inspect -> string
4490  *
4491  * Returns a printable version of _str_, surrounded by quote marks,
4492  * with special characters escaped.
4493  *
4494  * str = "hello"
4495  * str[3] = "\b"
4496  * str.inspect #=> "\"hel\\bo\""
4497  */
4498 
4499 VALUE
4501 {
4502  rb_encoding *enc = STR_ENC_GET(str);
4503  const char *p, *pend, *prev;
4504  char buf[CHAR_ESC_LEN + 1];
4507  int unicode_p = rb_enc_unicode_p(enc);
4508  int asciicompat = rb_enc_asciicompat(enc);
4509  static rb_encoding *utf16, *utf32;
4510 
4511  if (!utf16) utf16 = rb_enc_find("UTF-16");
4512  if (!utf32) utf32 = rb_enc_find("UTF-32");
4513  if (resenc == NULL) resenc = rb_default_external_encoding();
4514  if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding();
4515  rb_enc_associate(result, resenc);
4516  str_buf_cat2(result, "\"");
4517 
4518  p = RSTRING_PTR(str); pend = RSTRING_END(str);
4519  prev = p;
4520  if (enc == utf16) {
4521  const unsigned char *q = (const unsigned char *)p;
4522  if (q[0] == 0xFE && q[1] == 0xFF)
4523  enc = rb_enc_find("UTF-16BE");
4524  else if (q[0] == 0xFF && q[1] == 0xFE)
4525  enc = rb_enc_find("UTF-16LE");
4526  else
4527  unicode_p = 0;
4528  }
4529  else if (enc == utf32) {
4530  const unsigned char *q = (const unsigned char *)p;
4531  if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
4532  enc = rb_enc_find("UTF-32BE");
4533  else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
4534  enc = rb_enc_find("UTF-32LE");
4535  else
4536  unicode_p = 0;
4537  }
4538  while (p < pend) {
4539  unsigned int c, cc;
4540  int n;
4541 
4542  n = rb_enc_precise_mbclen(p, pend, enc);
4543  if (!MBCLEN_CHARFOUND_P(n)) {
4544  if (p > prev) str_buf_cat(result, prev, p - prev);
4545  n = rb_enc_mbminlen(enc);
4546  if (pend < p + n)
4547  n = (int)(pend - p);
4548  while (n--) {
4549  snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377);
4550  str_buf_cat(result, buf, strlen(buf));
4551  prev = ++p;
4552  }
4553  continue;
4554  }
4555  n = MBCLEN_CHARFOUND_LEN(n);
4556  c = rb_enc_mbc_to_codepoint(p, pend, enc);
4557  p += n;
4558  if ((asciicompat || unicode_p) &&
4559  (c == '"'|| c == '\\' ||
4560  (c == '#' &&
4561  p < pend &&
4563  (cc = rb_enc_codepoint(p,pend,enc),
4564  (cc == '$' || cc == '@' || cc == '{'))))) {
4565  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4566  str_buf_cat2(result, "\\");
4567  if (asciicompat || enc == resenc) {
4568  prev = p - n;
4569  continue;
4570  }
4571  }
4572  switch (c) {
4573  case '\n': cc = 'n'; break;
4574  case '\r': cc = 'r'; break;
4575  case '\t': cc = 't'; break;
4576  case '\f': cc = 'f'; break;
4577  case '\013': cc = 'v'; break;
4578  case '\010': cc = 'b'; break;
4579  case '\007': cc = 'a'; break;
4580  case 033: cc = 'e'; break;
4581  default: cc = 0; break;
4582  }
4583  if (cc) {
4584  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4585  buf[0] = '\\';
4586  buf[1] = (char)cc;
4587  str_buf_cat(result, buf, 2);
4588  prev = p;
4589  continue;
4590  }
4591  if ((enc == resenc && rb_enc_isprint(c, enc)) ||
4592  (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c))) {
4593  continue;
4594  }
4595  else {
4596  if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
4597  rb_str_buf_cat_escaped_char(result, c, unicode_p);
4598  prev = p;
4599  continue;
4600  }
4601  }
4602  if (p > prev) str_buf_cat(result, prev, p - prev);
4603  str_buf_cat2(result, "\"");
4604 
4605  OBJ_INFECT(result, str);
4606  return result;
4607 }
4608 
4609 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4610 
4611 /*
4612  * call-seq:
4613  * str.dump -> new_str
4614  *
4615  * Produces a version of +str+ with all non-printing characters replaced by
4616  * <code>\nnn</code> notation and all special characters escaped.
4617  *
4618  * "hello \n ''".dump #=> "\"hello \\n ''\"
4619  */
4620 
4621 VALUE
4623 {
4624  rb_encoding *enc = rb_enc_get(str);
4625  long len;
4626  const char *p, *pend;
4627  char *q, *qend;
4628  VALUE result;
4629  int u8 = (enc == rb_utf8_encoding());
4630 
4631  len = 2; /* "" */
4632  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4633  while (p < pend) {
4634  unsigned char c = *p++;
4635  switch (c) {
4636  case '"': case '\\':
4637  case '\n': case '\r':
4638  case '\t': case '\f':
4639  case '\013': case '\010': case '\007': case '\033':
4640  len += 2;
4641  break;
4642 
4643  case '#':
4644  len += IS_EVSTR(p, pend) ? 2 : 1;
4645  break;
4646 
4647  default:
4648  if (ISPRINT(c)) {
4649  len++;
4650  }
4651  else {
4652  if (u8) { /* \u{NN} */
4653  int n = rb_enc_precise_mbclen(p-1, pend, enc);
4654  if (MBCLEN_CHARFOUND_P(n-1)) {
4655  unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4656  while (cc >>= 4) len++;
4657  len += 5;
4658  p += MBCLEN_CHARFOUND_LEN(n)-1;
4659  break;
4660  }
4661  }
4662  len += 4; /* \xNN */
4663  }
4664  break;
4665  }
4666  }
4667  if (!rb_enc_asciicompat(enc)) {
4668  len += 19; /* ".force_encoding('')" */
4669  len += strlen(enc->name);
4670  }
4671 
4672  result = rb_str_new5(str, 0, len);
4673  p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
4674  q = RSTRING_PTR(result); qend = q + len + 1;
4675 
4676  *q++ = '"';
4677  while (p < pend) {
4678  unsigned char c = *p++;
4679 
4680  if (c == '"' || c == '\\') {
4681  *q++ = '\\';
4682  *q++ = c;
4683  }
4684  else if (c == '#') {
4685  if (IS_EVSTR(p, pend)) *q++ = '\\';
4686  *q++ = '#';
4687  }
4688  else if (c == '\n') {
4689  *q++ = '\\';
4690  *q++ = 'n';
4691  }
4692  else if (c == '\r') {
4693  *q++ = '\\';
4694  *q++ = 'r';
4695  }
4696  else if (c == '\t') {
4697  *q++ = '\\';
4698  *q++ = 't';
4699  }
4700  else if (c == '\f') {
4701  *q++ = '\\';
4702  *q++ = 'f';
4703  }
4704  else if (c == '\013') {
4705  *q++ = '\\';
4706  *q++ = 'v';
4707  }
4708  else if (c == '\010') {
4709  *q++ = '\\';
4710  *q++ = 'b';
4711  }
4712  else if (c == '\007') {
4713  *q++ = '\\';
4714  *q++ = 'a';
4715  }
4716  else if (c == '\033') {
4717  *q++ = '\\';
4718  *q++ = 'e';
4719  }
4720  else if (ISPRINT(c)) {
4721  *q++ = c;
4722  }
4723  else {
4724  *q++ = '\\';
4725  if (u8) {
4726  int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
4727  if (MBCLEN_CHARFOUND_P(n)) {
4728  int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
4729  p += n;
4730  snprintf(q, qend-q, "u{%x}", cc);
4731  q += strlen(q);
4732  continue;
4733  }
4734  }
4735  snprintf(q, qend-q, "x%02X", c);
4736  q += 3;
4737  }
4738  }
4739  *q++ = '"';
4740  *q = '\0';
4741  if (!rb_enc_asciicompat(enc)) {
4742  snprintf(q, qend-q, ".force_encoding(\"%s\")", enc->name);
4743  enc = rb_ascii8bit_encoding();
4744  }
4745  OBJ_INFECT(result, str);
4746  /* result from dump is ASCII */
4747  rb_enc_associate(result, enc);
4749  return result;
4750 }
4751 
4752 
4753 static void
4755 {
4756  if (rb_enc_dummy_p(enc)) {
4757  rb_raise(rb_eEncCompatError, "incompatible encoding with this operation: %s",
4758  rb_enc_name(enc));
4759  }
4760 }
4761 
4762 /*
4763  * call-seq:
4764  * str.upcase! -> str or nil
4765  *
4766  * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
4767  * were made.
4768  * Note: case replacement is effective only in ASCII region.
4769  */
4770 
4771 static VALUE
4773 {
4774  rb_encoding *enc;
4775  char *s, *send;
4776  int modify = 0;
4777  int n;
4778 
4779  str_modify_keep_cr(str);
4780  enc = STR_ENC_GET(str);
4782  s = RSTRING_PTR(str); send = RSTRING_END(str);
4783  if (single_byte_optimizable(str)) {
4784  while (s < send) {
4785  unsigned int c = *(unsigned char*)s;
4786 
4787  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4788  *s = 'A' + (c - 'a');
4789  modify = 1;
4790  }
4791  s++;
4792  }
4793  }
4794  else {
4795  int ascompat = rb_enc_asciicompat(enc);
4796 
4797  while (s < send) {
4798  unsigned int c;
4799 
4800  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4801  if (rb_enc_isascii(c, enc) && 'a' <= c && c <= 'z') {
4802  *s = 'A' + (c - 'a');
4803  modify = 1;
4804  }
4805  s++;
4806  }
4807  else {
4808  c = rb_enc_codepoint_len(s, send, &n, enc);
4809  if (rb_enc_islower(c, enc)) {
4810  /* assuming toupper returns codepoint with same size */
4811  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4812  modify = 1;
4813  }
4814  s += n;
4815  }
4816  }
4817  }
4818 
4819  if (modify) return str;
4820  return Qnil;
4821 }
4822 
4823 
4824 /*
4825  * call-seq:
4826  * str.upcase -> new_str
4827  *
4828  * Returns a copy of <i>str</i> with all lowercase letters replaced with their
4829  * uppercase counterparts. The operation is locale insensitive---only
4830  * characters ``a'' to ``z'' are affected.
4831  * Note: case replacement is effective only in ASCII region.
4832  *
4833  * "hEllO".upcase #=> "HELLO"
4834  */
4835 
4836 static VALUE
4838 {
4839  str = rb_str_dup(str);
4840  rb_str_upcase_bang(str);
4841  return str;
4842 }
4843 
4844 
4845 /*
4846  * call-seq:
4847  * str.downcase! -> str or nil
4848  *
4849  * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
4850  * changes were made.
4851  * Note: case replacement is effective only in ASCII region.
4852  */
4853 
4854 static VALUE
4856 {
4857  rb_encoding *enc;
4858  char *s, *send;
4859  int modify = 0;
4860 
4861  str_modify_keep_cr(str);
4862  enc = STR_ENC_GET(str);
4864  s = RSTRING_PTR(str); send = RSTRING_END(str);
4865  if (single_byte_optimizable(str)) {
4866  while (s < send) {
4867  unsigned int c = *(unsigned char*)s;
4868 
4869  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4870  *s = 'a' + (c - 'A');
4871  modify = 1;
4872  }
4873  s++;
4874  }
4875  }
4876  else {
4877  int ascompat = rb_enc_asciicompat(enc);
4878 
4879  while (s < send) {
4880  unsigned int c;
4881  int n;
4882 
4883  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
4884  if (rb_enc_isascii(c, enc) && 'A' <= c && c <= 'Z') {
4885  *s = 'a' + (c - 'A');
4886  modify = 1;
4887  }
4888  s++;
4889  }
4890  else {
4891  c = rb_enc_codepoint_len(s, send, &n, enc);
4892  if (rb_enc_isupper(c, enc)) {
4893  /* assuming toupper returns codepoint with same size */
4894  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4895  modify = 1;
4896  }
4897  s += n;
4898  }
4899  }
4900  }
4901 
4902  if (modify) return str;
4903  return Qnil;
4904 }
4905 
4906 
4907 /*
4908  * call-seq:
4909  * str.downcase -> new_str
4910  *
4911  * Returns a copy of <i>str</i> with all uppercase letters replaced with their
4912  * lowercase counterparts. The operation is locale insensitive---only
4913  * characters ``A'' to ``Z'' are affected.
4914  * Note: case replacement is effective only in ASCII region.
4915  *
4916  * "hEllO".downcase #=> "hello"
4917  */
4918 
4919 static VALUE
4921 {
4922  str = rb_str_dup(str);
4923  rb_str_downcase_bang(str);
4924  return str;
4925 }
4926 
4927 
4928 /*
4929  * call-seq:
4930  * str.capitalize! -> str or nil
4931  *
4932  * Modifies <i>str</i> by converting the first character to uppercase and the
4933  * remainder to lowercase. Returns <code>nil</code> if no changes are made.
4934  * Note: case conversion is effective only in ASCII region.
4935  *
4936  * a = "hello"
4937  * a.capitalize! #=> "Hello"
4938  * a #=> "Hello"
4939  * a.capitalize! #=> nil
4940  */
4941 
4942 static VALUE
4944 {
4945  rb_encoding *enc;
4946  char *s, *send;
4947  int modify = 0;
4948  unsigned int c;
4949  int n;
4950 
4951  str_modify_keep_cr(str);
4952  enc = STR_ENC_GET(str);
4954  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
4955  s = RSTRING_PTR(str); send = RSTRING_END(str);
4956 
4957  c = rb_enc_codepoint_len(s, send, &n, enc);
4958  if (rb_enc_islower(c, enc)) {
4959  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
4960  modify = 1;
4961  }
4962  s += n;
4963  while (s < send) {
4964  c = rb_enc_codepoint_len(s, send, &n, enc);
4965  if (rb_enc_isupper(c, enc)) {
4966  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
4967  modify = 1;
4968  }
4969  s += n;
4970  }
4971 
4972  if (modify) return str;
4973  return Qnil;
4974 }
4975 
4976 
4977 /*
4978  * call-seq:
4979  * str.capitalize -> new_str
4980  *
4981  * Returns a copy of <i>str</i> with the first character converted to uppercase
4982  * and the remainder to lowercase.
4983  * Note: case conversion is effective only in ASCII region.
4984  *
4985  * "hello".capitalize #=> "Hello"
4986  * "HELLO".capitalize #=> "Hello"
4987  * "123ABC".capitalize #=> "123abc"
4988  */
4989 
4990 static VALUE
4992 {
4993  str = rb_str_dup(str);
4995  return str;
4996 }
4997 
4998 
4999 /*
5000  * call-seq:
5001  * str.swapcase! -> str or nil
5002  *
5003  * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
5004  * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
5005  * Note: case conversion is effective only in ASCII region.
5006  */
5007 
5008 static VALUE
5010 {
5011  rb_encoding *enc;
5012  char *s, *send;
5013  int modify = 0;
5014  int n;
5015 
5016  str_modify_keep_cr(str);
5017  enc = STR_ENC_GET(str);
5019  s = RSTRING_PTR(str); send = RSTRING_END(str);
5020  while (s < send) {
5021  unsigned int c = rb_enc_codepoint_len(s, send, &n, enc);
5022 
5023  if (rb_enc_isupper(c, enc)) {
5024  /* assuming toupper returns codepoint with same size */
5025  rb_enc_mbcput(rb_enc_tolower(c, enc), s, enc);
5026  modify = 1;
5027  }
5028  else if (rb_enc_islower(c, enc)) {
5029  /* assuming tolower returns codepoint with same size */
5030  rb_enc_mbcput(rb_enc_toupper(c, enc), s, enc);
5031  modify = 1;
5032  }
5033  s += n;
5034  }
5035 
5036  if (modify) return str;
5037  return Qnil;
5038 }
5039 
5040 
5041 /*
5042  * call-seq:
5043  * str.swapcase -> new_str
5044  *
5045  * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
5046  * to lowercase and lowercase characters converted to uppercase.
5047  * Note: case conversion is effective only in ASCII region.
5048  *
5049  * "Hello".swapcase #=> "hELLO"
5050  * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
5051  */
5052 
5053 static VALUE
5055 {
5056  str = rb_str_dup(str);
5057  rb_str_swapcase_bang(str);
5058  return str;
5059 }
5060 
5061 typedef unsigned char *USTR;
5062 
5063 struct tr {
5064  int gen;
5065  unsigned int now, max;
5066  char *p, *pend;
5067 };
5068 
5069 static unsigned int
5070 trnext(struct tr *t, rb_encoding *enc)
5071 {
5072  int n;
5073 
5074  for (;;) {
5075  if (!t->gen) {
5076 nextpart:
5077  if (t->p == t->pend) return -1;
5078  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '\\' && t->p + n < t->pend) {
5079  t->p += n;
5080  }
5081  t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5082  t->p += n;
5083  if (rb_enc_ascget(t->p, t->pend, &n, enc) == '-' && t->p + n < t->pend) {
5084  t->p += n;
5085  if (t->p < t->pend) {
5086  unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
5087  t->p += n;
5088  if (t->now > c) {
5089  if (t->now < 0x80 && c < 0x80) {
5091  "invalid range \"%c-%c\" in string transliteration",
5092  t->now, c);
5093  }
5094  else {
5095  rb_raise(rb_eArgError, "invalid range in string transliteration");
5096  }
5097  continue; /* not reached */
5098  }
5099  t->gen = 1;
5100  t->max = c;
5101  }
5102  }
5103  return t->now;
5104  }
5105  else {
5106  while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
5107  if (t->now == t->max) {
5108  t->gen = 0;
5109  goto nextpart;
5110  }
5111  }
5112  if (t->now < t->max) {
5113  return t->now;
5114  }
5115  else {
5116  t->gen = 0;
5117  return t->max;
5118  }
5119  }
5120  }
5121 }
5122 
5123 static VALUE rb_str_delete_bang(int,VALUE*,VALUE);
5124 
5125 static VALUE
5126 tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
5127 {
5128  const unsigned int errc = -1;
5129  unsigned int trans[256];
5130  rb_encoding *enc, *e1, *e2;
5131  struct tr trsrc, trrepl;
5132  int cflag = 0;
5133  unsigned int c, c0, last = 0;
5134  int modify = 0, i, l;
5135  char *s, *send;
5136  VALUE hash = 0;
5137  int singlebyte = single_byte_optimizable(str);
5138  int cr;
5139 
5140 #define CHECK_IF_ASCII(c) \
5141  (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5142  (cr = ENC_CODERANGE_VALID) : 0)
5143 
5144  StringValue(src);
5145  StringValue(repl);
5146  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5147  if (RSTRING_LEN(repl) == 0) {
5148  return rb_str_delete_bang(1, &src, str);
5149  }
5150 
5151  cr = ENC_CODERANGE(str);
5152  e1 = rb_enc_check(str, src);
5153  e2 = rb_enc_check(str, repl);
5154  if (e1 == e2) {
5155  enc = e1;
5156  }
5157  else {
5158  enc = rb_enc_check(src, repl);
5159  }
5160  trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
5161  if (RSTRING_LEN(src) > 1 &&
5162  rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) == '^' &&
5163  trsrc.p + l < trsrc.pend) {
5164  cflag = 1;
5165  trsrc.p += l;
5166  }
5167  trrepl.p = RSTRING_PTR(repl);
5168  trrepl.pend = trrepl.p + RSTRING_LEN(repl);
5169  trsrc.gen = trrepl.gen = 0;
5170  trsrc.now = trrepl.now = 0;
5171  trsrc.max = trrepl.max = 0;
5172 
5173  if (cflag) {
5174  for (i=0; i<256; i++) {
5175  trans[i] = 1;
5176  }
5177  while ((c = trnext(&trsrc, enc)) != errc) {
5178  if (c < 256) {
5179  trans[c] = errc;
5180  }
5181  else {
5182  if (!hash) hash = rb_hash_new();
5183  rb_hash_aset(hash, UINT2NUM(c), Qtrue);
5184  }
5185  }
5186  while ((c = trnext(&trrepl, enc)) != errc)
5187  /* retrieve last replacer */;
5188  last = trrepl.now;
5189  for (i=0; i<256; i++) {
5190  if (trans[i] != errc) {
5191  trans[i] = last;
5192  }
5193  }
5194  }
5195  else {
5196  unsigned int r;
5197 
5198  for (i=0; i<256; i++) {
5199  trans[i] = errc;
5200  }
5201  while ((c = trnext(&trsrc, enc)) != errc) {
5202  r = trnext(&trrepl, enc);
5203  if (r == errc) r = trrepl.now;
5204  if (c < 256) {
5205  trans[c] = r;
5206  if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
5207  }
5208  else {
5209  if (!hash) hash = rb_hash_new();
5210  rb_hash_aset(hash, UINT2NUM(c), UINT2NUM(r));
5211  }
5212  }
5213  }
5214 
5215  if (cr == ENC_CODERANGE_VALID)
5216  cr = ENC_CODERANGE_7BIT;
5217  str_modify_keep_cr(str);
5218  s = RSTRING_PTR(str); send = RSTRING_END(str);
5219  if (sflag) {
5220  int clen, tlen;
5221  long offset, max = RSTRING_LEN(str);
5222  unsigned int save = -1;
5223  char *buf = ALLOC_N(char, max), *t = buf;
5224 
5225  while (s < send) {
5226  int may_modify = 0;
5227 
5228  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5229  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5230 
5231  s += clen;
5232  if (c < 256) {
5233  c = trans[c];
5234  }
5235  else if (hash) {
5236  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5237  if (NIL_P(tmp)) {
5238  if (cflag) c = last;
5239  else c = errc;
5240  }
5241  else if (cflag) c = errc;
5242  else c = NUM2INT(tmp);
5243  }
5244  else {
5245  c = errc;
5246  }
5247  if (c != (unsigned int)-1) {
5248  if (save == c) {
5249  CHECK_IF_ASCII(c);
5250  continue;
5251  }
5252  save = c;
5253  tlen = rb_enc_codelen(c, enc);
5254  modify = 1;
5255  }
5256  else {
5257  save = -1;
5258  c = c0;
5259  if (enc != e1) may_modify = 1;
5260  }
5261  while (t - buf + tlen >= max) {
5262  offset = t - buf;
5263  max *= 2;
5264  REALLOC_N(buf, char, max);
5265  t = buf + offset;
5266  }
5267  rb_enc_mbcput(c, t, enc);
5268  if (may_modify && memcmp(s, t, tlen) != 0) {
5269  modify = 1;
5270  }
5271  CHECK_IF_ASCII(c);
5272  t += tlen;
5273  }
5274  if (!STR_EMBED_P(str)) {
5275  xfree(RSTRING(str)->as.heap.ptr);
5276  }
5277  *t = '\0';
5278  RSTRING(str)->as.heap.ptr = buf;
5279  RSTRING(str)->as.heap.len = t - buf;
5280  STR_SET_NOEMBED(str);
5281  RSTRING(str)->as.heap.aux.capa = max;
5282  }
5283  else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) {
5284  while (s < send) {
5285  c = (unsigned char)*s;
5286  if (trans[c] != errc) {
5287  if (!cflag) {
5288  c = trans[c];
5289  *s = c;
5290  modify = 1;
5291  }
5292  else {
5293  *s = last;
5294  modify = 1;
5295  }
5296  }
5297  CHECK_IF_ASCII(c);
5298  s++;
5299  }
5300  }
5301  else {
5302  int clen, tlen, max = (int)(RSTRING_LEN(str) * 1.2);
5303  long offset;
5304  char *buf = ALLOC_N(char, max), *t = buf;
5305 
5306  while (s < send) {
5307  int may_modify = 0;
5308  c0 = c = rb_enc_codepoint_len(s, send, &clen, e1);
5309  tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
5310 
5311  if (c < 256) {
5312  c = trans[c];
5313  }
5314  else if (hash) {
5315  VALUE tmp = rb_hash_lookup(hash, UINT2NUM(c));
5316  if (NIL_P(tmp)) {
5317  if (cflag) c = last;
5318  else c = errc;
5319  }
5320  else if (cflag) c = errc;
5321  else c = NUM2INT(tmp);
5322  }
5323  else {
5324  c = cflag ? last : errc;
5325  }
5326  if (c != errc) {
5327  tlen = rb_enc_codelen(c, enc);
5328  modify = 1;
5329  }
5330  else {
5331  c = c0;
5332  if (enc != e1) may_modify = 1;
5333  }
5334  while (t - buf + tlen >= max) {
5335  offset = t - buf;
5336  max *= 2;
5337  REALLOC_N(buf, char, max);
5338  t = buf + offset;
5339  }
5340  if (s != t) {
5341  rb_enc_mbcput(c, t, enc);
5342  if (may_modify && memcmp(s, t, tlen) != 0) {
5343  modify = 1;
5344  }
5345  }
5346  CHECK_IF_ASCII(c);
5347  s += clen;
5348  t += tlen;
5349  }
5350  if (!STR_EMBED_P(str)) {
5351  xfree(RSTRING(str)->as.heap.ptr);
5352  }
5353  *t = '\0';
5354  RSTRING(str)->as.heap.ptr = buf;
5355  RSTRING(str)->as.heap.len = t - buf;
5356  STR_SET_NOEMBED(str);
5357  RSTRING(str)->as.heap.aux.capa = max;
5358  }
5359 
5360  if (modify) {
5361  if (cr != ENC_CODERANGE_BROKEN)
5362  ENC_CODERANGE_SET(str, cr);
5363  rb_enc_associate(str, enc);
5364  return str;
5365  }
5366  return Qnil;
5367 }
5368 
5369 
5370 /*
5371  * call-seq:
5372  * str.tr!(from_str, to_str) -> str or nil
5373  *
5374  * Translates <i>str</i> in place, using the same rules as
5375  * <code>String#tr</code>. Returns <i>str</i>, or <code>nil</code> if no
5376  * changes were made.
5377  */
5378 
5379 static VALUE
5381 {
5382  return tr_trans(str, src, repl, 0);
5383 }
5384 
5385 
5386 /*
5387  * call-seq:
5388  * str.tr(from_str, to_str) => new_str
5389  *
5390  * Returns a copy of +str+ with the characters in +from_str+ replaced by the
5391  * corresponding characters in +to_str+. If +to_str+ is shorter than
5392  * +from_str+, it is padded with its last character in order to maintain the
5393  * correspondence.
5394  *
5395  * "hello".tr('el', 'ip') #=> "hippo"
5396  * "hello".tr('aeiou', '*') #=> "h*ll*"
5397  * "hello".tr('aeiou', 'AA*') #=> "hAll*"
5398  *
5399  * Both strings may use the <code>c1-c2</code> notation to denote ranges of
5400  * characters, and +from_str+ may start with a <code>^</code>, which denotes
5401  * all characters except those listed.
5402  *
5403  * "hello".tr('a-y', 'b-z') #=> "ifmmp"
5404  * "hello".tr('^aeiou', '*') #=> "*e**o"
5405  *
5406  * The backslash character <code></code> can be used to escape
5407  * <code>^</code> or <code>-</code> and is otherwise ignored unless it
5408  * appears at the end of a range or the end of the +from_str+ or +to_str+:
5409  *
5410  * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
5411  * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
5412  *
5413  * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
5414  * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
5415  * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
5416  *
5417  * "X['\\b']".tr("X\\", "") #=> "['b']"
5418  * "X['\\b']".tr("X-\\]", "") #=> "'b'"
5419  */
5420 
5421 static VALUE
5423 {
5424  str = rb_str_dup(str);
5425  tr_trans(str, src, repl, 0);
5426  return str;
5427 }
5428 
5429 #define TR_TABLE_SIZE 257
5430 static void
5431 tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first,
5432  VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
5433 {
5434  const unsigned int errc = -1;
5435  char buf[256];
5436  struct tr tr;
5437  unsigned int c;
5438  VALUE table = 0, ptable = 0;
5439  int i, l, cflag = 0;
5440 
5441  tr.p = RSTRING_PTR(str); tr.pend = tr.p + RSTRING_LEN(str);
5442  tr.gen = tr.now = tr.max = 0;
5443 
5444  if (RSTRING_LEN(str) > 1 && rb_enc_ascget(tr.p, tr.pend, &l, enc) == '^') {
5445  cflag = 1;
5446  tr.p += l;
5447  }
5448  if (first) {
5449  for (i=0; i<256; i++) {
5450  stable[i] = 1;
5451  }
5452  stable[256] = cflag;
5453  }
5454  else if (stable[256] && !cflag) {
5455  stable[256] = 0;
5456  }
5457  for (i=0; i<256; i++) {
5458  buf[i] = cflag;
5459  }
5460 
5461  while ((c = trnext(&tr, enc)) != errc) {
5462  if (c < 256) {
5463  buf[c & 0xff] = !cflag;
5464  }
5465  else {
5466  VALUE key = UINT2NUM(c);
5467 
5468  if (!table && (first || *tablep || stable[256])) {
5469  if (cflag) {
5470  ptable = *ctablep;
5471  table = ptable ? ptable : rb_hash_new();
5472  *ctablep = table;
5473  }
5474  else {
5475  table = rb_hash_new();
5476  ptable = *tablep;
5477  *tablep = table;
5478  }
5479  }
5480  if (table && (!ptable || (cflag ^ !NIL_P(rb_hash_aref(ptable, key))))) {
5481  rb_hash_aset(table, key, Qtrue);
5482  }
5483  }
5484  }
5485  for (i=0; i<256; i++) {
5486  stable[i] = stable[i] && buf[i];
5487  }
5488  if (!table && !cflag) {
5489  *tablep = 0;
5490  }
5491 }
5492 
5493 
5494 static int
5495 tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
5496 {
5497  if (c < 256) {
5498  return table[c] != 0;
5499  }
5500  else {
5501  VALUE v = UINT2NUM(c);
5502 
5503  if (del) {
5504  if (!NIL_P(rb_hash_lookup(del, v)) &&
5505  (!nodel || NIL_P(rb_hash_lookup(nodel, v)))) {
5506  return TRUE;
5507  }
5508  }
5509  else if (nodel && !NIL_P(rb_hash_lookup(nodel, v))) {
5510  return FALSE;
5511  }
5512  return table[256] ? TRUE : FALSE;
5513  }
5514 }
5515 
5516 /*
5517  * call-seq:
5518  * str.delete!([other_str]+) -> str or nil
5519  *
5520  * Performs a <code>delete</code> operation in place, returning <i>str</i>, or
5521  * <code>nil</code> if <i>str</i> was not modified.
5522  */
5523 
5524 static VALUE
5526 {
5527  char squeez[TR_TABLE_SIZE];
5528  rb_encoding *enc = 0;
5529  char *s, *send, *t;
5530  VALUE del = 0, nodel = 0;
5531  int modify = 0;
5532  int i, ascompat, cr;
5533 
5534  if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str)) return Qnil;
5536  for (i=0; i<argc; i++) {
5537  VALUE s = argv[i];
5538 
5539  StringValue(s);
5540  enc = rb_enc_check(str, s);
5541  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5542  }
5543 
5544  str_modify_keep_cr(str);
5545  ascompat = rb_enc_asciicompat(enc);
5546  s = t = RSTRING_PTR(str);
5547  send = RSTRING_END(str);
5548  cr = ascompat ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
5549  while (s < send) {
5550  unsigned int c;
5551  int clen;
5552 
5553  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5554  if (squeez[c]) {
5555  modify = 1;
5556  }
5557  else {
5558  if (t != s) *t = c;
5559  t++;
5560  }
5561  s++;
5562  }
5563  else {
5564  c = rb_enc_codepoint_len(s, send, &clen, enc);
5565 
5566  if (tr_find(c, squeez, del, nodel)) {
5567  modify = 1;
5568  }
5569  else {
5570  if (t != s) rb_enc_mbcput(c, t, enc);
5571  t += clen;
5572  if (cr == ENC_CODERANGE_7BIT) cr = ENC_CODERANGE_VALID;
5573  }
5574  s += clen;
5575  }
5576  }
5577  *t = '\0';
5578  STR_SET_LEN(str, t - RSTRING_PTR(str));
5579  ENC_CODERANGE_SET(str, cr);
5580 
5581  if (modify) return str;
5582  return Qnil;
5583 }
5584 
5585 
5586 /*
5587  * call-seq:
5588  * str.delete([other_str]+) -> new_str
5589  *
5590  * Returns a copy of <i>str</i> with all characters in the intersection of its
5591  * arguments deleted. Uses the same rules for building the set of characters as
5592  * <code>String#count</code>.
5593  *
5594  * "hello".delete "l","lo" #=> "heo"
5595  * "hello".delete "lo" #=> "he"
5596  * "hello".delete "aeiou", "^e" #=> "hell"
5597  * "hello".delete "ej-m" #=> "ho"
5598  */
5599 
5600 static VALUE
5602 {
5603  str = rb_str_dup(str);
5604  rb_str_delete_bang(argc, argv, str);
5605  return str;
5606 }
5607 
5608 
5609 /*
5610  * call-seq:
5611  * str.squeeze!([other_str]*) -> str or nil
5612  *
5613  * Squeezes <i>str</i> in place, returning either <i>str</i>, or
5614  * <code>nil</code> if no changes were made.
5615  */
5616 
5617 static VALUE
5619 {
5620  char squeez[TR_TABLE_SIZE];
5621  rb_encoding *enc = 0;
5622  VALUE del = 0, nodel = 0;
5623  char *s, *send, *t;
5624  int i, modify = 0;
5625  int ascompat, singlebyte = single_byte_optimizable(str);
5626  unsigned int save;
5627 
5628  if (argc == 0) {
5629  enc = STR_ENC_GET(str);
5630  }
5631  else {
5632  for (i=0; i<argc; i++) {
5633  VALUE s = argv[i];
5634 
5635  StringValue(s);
5636  enc = rb_enc_check(str, s);
5637  if (singlebyte && !single_byte_optimizable(s))
5638  singlebyte = 0;
5639  tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
5640  }
5641  }
5642 
5643  str_modify_keep_cr(str);
5644  s = t = RSTRING_PTR(str);
5645  if (!s || RSTRING_LEN(str) == 0) return Qnil;
5646  send = RSTRING_END(str);
5647  save = -1;
5648  ascompat = rb_enc_asciicompat(enc);
5649 
5650  if (singlebyte) {
5651  while (s < send) {
5652  unsigned int c = *(unsigned char*)s++;
5653  if (c != save || (argc > 0 && !squeez[c])) {
5654  *t++ = save = c;
5655  }
5656  }
5657  } else {
5658  while (s < send) {
5659  unsigned int c;
5660  int clen;
5661 
5662  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5663  if (c != save || (argc > 0 && !squeez[c])) {
5664  *t++ = save = c;
5665  }
5666  s++;
5667  }
5668  else {
5669  c = rb_enc_codepoint_len(s, send, &clen, enc);
5670 
5671  if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
5672  if (t != s) rb_enc_mbcput(c, t, enc);
5673  save = c;
5674  t += clen;
5675  }
5676  s += clen;
5677  }
5678  }
5679  }
5680 
5681  *t = '\0';
5682  if (t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
5683  STR_SET_LEN(str, t - RSTRING_PTR(str));
5684  modify = 1;
5685  }
5686 
5687  if (modify) return str;
5688  return Qnil;
5689 }
5690 
5691 
5692 /*
5693  * call-seq:
5694  * str.squeeze([other_str]*) -> new_str
5695  *
5696  * Builds a set of characters from the <i>other_str</i> parameter(s) using the
5697  * procedure described for <code>String#count</code>. Returns a new string
5698  * where runs of the same character that occur in this set are replaced by a
5699  * single character. If no arguments are given, all runs of identical
5700  * characters are replaced by a single character.
5701  *
5702  * "yellow moon".squeeze #=> "yelow mon"
5703  * " now is the".squeeze(" ") #=> " now is the"
5704  * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
5705  */
5706 
5707 static VALUE
5709 {
5710  str = rb_str_dup(str);
5711  rb_str_squeeze_bang(argc, argv, str);
5712  return str;
5713 }
5714 
5715 
5716 /*
5717  * call-seq:
5718  * str.tr_s!(from_str, to_str) -> str or nil
5719  *
5720  * Performs <code>String#tr_s</code> processing on <i>str</i> in place,
5721  * returning <i>str</i>, or <code>nil</code> if no changes were made.
5722  */
5723 
5724 static VALUE
5726 {
5727  return tr_trans(str, src, repl, 1);
5728 }
5729 
5730 
5731 /*
5732  * call-seq:
5733  * str.tr_s(from_str, to_str) -> new_str
5734  *
5735  * Processes a copy of <i>str</i> as described under <code>String#tr</code>,
5736  * then removes duplicate characters in regions that were affected by the
5737  * translation.
5738  *
5739  * "hello".tr_s('l', 'r') #=> "hero"
5740  * "hello".tr_s('el', '*') #=> "h*o"
5741  * "hello".tr_s('el', 'hx') #=> "hhxo"
5742  */
5743 
5744 static VALUE
5746 {
5747  str = rb_str_dup(str);
5748  tr_trans(str, src, repl, 1);
5749  return str;
5750 }
5751 
5752 
5753 /*
5754  * call-seq:
5755  * str.count([other_str]+) -> fixnum
5756  *
5757  * Each +other_str+ parameter defines a set of characters to count. The
5758  * intersection of these sets defines the characters to count in +str+. Any
5759  * +other_str+ that starts with a caret <code>^</code> is negated. The
5760  * sequence <code>c1-c2</code> means all characters between c1 and c2. The
5761  * backslash character <code></code> can be used to escape <code>^</code> or
5762  * <code>-</code> and is otherwise ignored unless it appears at the end of a
5763  * sequence or the end of a +other_str+.
5764  *
5765  * a = "hello world"
5766  * a.count "lo" #=> 5
5767  * a.count "lo", "o" #=> 2
5768  * a.count "hello", "^l" #=> 4
5769  * a.count "ej-m" #=> 4
5770  *
5771  * "hello^world".count "\\^aeiou" #=> 4
5772  * "hello-world".count "a\\-eo" #=> 4
5773  *
5774  * c = "hello world\\r\\n"
5775  * c.count "\\" #=> 2
5776  * c.count "\\A" #=> 0
5777  * c.count "X-\\w" #=> 3
5778  */
5779 
5780 static VALUE
5782 {
5783  char table[TR_TABLE_SIZE];
5784  rb_encoding *enc = 0;
5785  VALUE del = 0, nodel = 0;
5786  char *s, *send;
5787  int i;
5788  int ascompat;
5789 
5791  for (i=0; i<argc; i++) {
5792  VALUE tstr = argv[i];
5793  unsigned char c;
5794 
5795  StringValue(tstr);
5796  enc = rb_enc_check(str, tstr);
5797  if (argc == 1 && RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
5798  (c = RSTRING_PTR(tstr)[0]) < 0x80 && !is_broken_string(str)) {
5799  int n = 0;
5800 
5801  s = RSTRING_PTR(str);
5802  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5803  send = RSTRING_END(str);
5804  while (s < send) {
5805  if (*(unsigned char*)s++ == c) n++;
5806  }
5807  return INT2NUM(n);
5808  }
5809  tr_setup_table(tstr, table, i==0, &del, &nodel, enc);
5810  }
5811 
5812  s = RSTRING_PTR(str);
5813  if (!s || RSTRING_LEN(str) == 0) return INT2FIX(0);
5814  send = RSTRING_END(str);
5815  ascompat = rb_enc_asciicompat(enc);
5816  i = 0;
5817  while (s < send) {
5818  unsigned int c;
5819 
5820  if (ascompat && (c = *(unsigned char*)s) < 0x80) {
5821  if (table[c]) {
5822  i++;
5823  }
5824  s++;
5825  }
5826  else {
5827  int clen;
5828  c = rb_enc_codepoint_len(s, send, &clen, enc);
5829  if (tr_find(c, table, del, nodel)) {
5830  i++;
5831  }
5832  s += clen;
5833  }
5834  }
5835 
5836  return INT2NUM(i);
5837 }
5838 
5839 static const char isspacetable[256] = {
5840  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
5841  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5842  1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5843  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5844  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5845  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5846  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5847  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5848  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5849  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5850  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5851  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5852  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5853  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5854  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5855  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5856 };
5857 
5858 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
5859 
5860 /*
5861  * call-seq:
5862  * str.split(pattern=$;, [limit]) -> anArray
5863  *
5864  * Divides <i>str</i> into substrings based on a delimiter, returning an array
5865  * of these substrings.
5866  *
5867  * If <i>pattern</i> is a <code>String</code>, then its contents are used as
5868  * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
5869  * space, <i>str</i> is split on whitespace, with leading whitespace and runs
5870  * of contiguous whitespace characters ignored.
5871  *
5872  * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
5873  * pattern matches. Whenever the pattern matches a zero-length string,
5874  * <i>str</i> is split into individual characters. If <i>pattern</i> contains
5875  * groups, the respective matches will be returned in the array as well.
5876  *
5877  * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
5878  * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
5879  * split on whitespace as if ` ' were specified.
5880  *
5881  * If the <i>limit</i> parameter is omitted, trailing null fields are
5882  * suppressed. If <i>limit</i> is a positive number, at most that number of
5883  * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
5884  * string is returned as the only entry in an array). If negative, there is no
5885  * limit to the number of fields returned, and trailing null fields are not
5886  * suppressed.
5887  *
5888  * When the input +str+ is empty an empty Array is returned as the string is
5889  * considered to have no fields to split.
5890  *
5891  * " now's the time".split #=> ["now's", "the", "time"]
5892  * " now's the time".split(' ') #=> ["now's", "the", "time"]
5893  * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
5894  * "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
5895  * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
5896  * "hello".split(//, 3) #=> ["h", "e", "llo"]
5897  * "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
5898  *
5899  * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
5900  * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
5901  * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
5902  * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
5903  *
5904  * "".split(',', -1) #=> []
5905  */
5906 
5907 static VALUE
5909 {
5910  rb_encoding *enc;
5911  VALUE spat;
5912  VALUE limit;
5913  enum {awk, string, regexp} split_type;
5914  long beg, end, i = 0;
5915  int lim = 0;
5916  VALUE result, tmp;
5917 
5918  if (rb_scan_args(argc, argv, "02", &spat, &limit) == 2) {
5919  lim = NUM2INT(limit);
5920  if (lim <= 0) limit = Qnil;
5921  else if (lim == 1) {
5922  if (RSTRING_LEN(str) == 0)
5923  return rb_ary_new2(0);
5924  return rb_ary_new3(1, str);
5925  }
5926  i = 1;
5927  }
5928 
5929  enc = STR_ENC_GET(str);
5930  if (NIL_P(spat)) {
5931  if (!NIL_P(rb_fs)) {
5932  spat = rb_fs;
5933  goto fs_set;
5934  }
5935  split_type = awk;
5936  }
5937  else {
5938  fs_set:
5939  if (RB_TYPE_P(spat, T_STRING)) {
5940  rb_encoding *enc2 = STR_ENC_GET(spat);
5941 
5942  split_type = string;
5943  if (RSTRING_LEN(spat) == 0) {
5944  /* Special case - split into chars */
5945  spat = rb_reg_regcomp(spat);
5946  split_type = regexp;
5947  }
5948  else if (rb_enc_asciicompat(enc2) == 1) {
5949  if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' '){
5950  split_type = awk;
5951  }
5952  }
5953  else {
5954  int l;
5955  if (rb_enc_ascget(RSTRING_PTR(spat), RSTRING_END(spat), &l, enc2) == ' ' &&
5956  RSTRING_LEN(spat) == l) {
5957  split_type = awk;
5958  }
5959  }
5960  }
5961  else {
5962  spat = get_pat(spat, 1);
5963  split_type = regexp;
5964  }
5965  }
5966 
5967  result = rb_ary_new();
5968  beg = 0;
5969  if (split_type == awk) {
5970  char *ptr = RSTRING_PTR(str);
5971  char *eptr = RSTRING_END(str);
5972  char *bptr = ptr;
5973  int skip = 1;
5974  unsigned int c;
5975 
5976  end = beg;
5977  if (is_ascii_string(str)) {
5978  while (ptr < eptr) {
5979  c = (unsigned char)*ptr++;
5980  if (skip) {
5981  if (ascii_isspace(c)) {
5982  beg = ptr - bptr;
5983  }
5984  else {
5985  end = ptr - bptr;
5986  skip = 0;
5987  if (!NIL_P(limit) && lim <= i) break;
5988  }
5989  }
5990  else if (ascii_isspace(c)) {
5991  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
5992  skip = 1;
5993  beg = ptr - bptr;
5994  if (!NIL_P(limit)) ++i;
5995  }
5996  else {
5997  end = ptr - bptr;
5998  }
5999  }
6000  }
6001  else {
6002  while (ptr < eptr) {
6003  int n;
6004 
6005  c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
6006  ptr += n;
6007  if (skip) {
6008  if (rb_isspace(c)) {
6009  beg = ptr - bptr;
6010  }
6011  else {
6012  end = ptr - bptr;
6013  skip = 0;
6014  if (!NIL_P(limit) && lim <= i) break;
6015  }
6016  }
6017  else if (rb_isspace(c)) {
6018  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6019  skip = 1;
6020  beg = ptr - bptr;
6021  if (!NIL_P(limit)) ++i;
6022  }
6023  else {
6024  end = ptr - bptr;
6025  }
6026  }
6027  }
6028  }
6029  else if (split_type == string) {
6030  char *ptr = RSTRING_PTR(str);
6031  char *temp = ptr;
6032  char *eptr = RSTRING_END(str);
6033  char *sptr = RSTRING_PTR(spat);
6034  long slen = RSTRING_LEN(spat);
6035 
6036  if (is_broken_string(str)) {
6037  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
6038  }
6039  if (is_broken_string(spat)) {
6040  rb_raise(rb_eArgError, "invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(spat)));
6041  }
6042  enc = rb_enc_check(str, spat);
6043  while (ptr < eptr &&
6044  (end = rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
6045  /* Check we are at the start of a char */
6046  char *t = rb_enc_right_char_head(ptr, ptr + end, eptr, enc);
6047  if (t != ptr + end) {
6048  ptr = t;
6049  continue;
6050  }
6051  rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
6052  ptr += end + slen;
6053  if (!NIL_P(limit) && lim <= ++i) break;
6054  }
6055  beg = ptr - temp;
6056  }
6057  else {
6058  char *ptr = RSTRING_PTR(str);
6059  long len = RSTRING_LEN(str);
6060  long start = beg;
6061  long idx;
6062  int last_null = 0;
6063  struct re_registers *regs;
6064 
6065  while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
6066  regs = RMATCH_REGS(rb_backref_get());
6067  if (start == end && BEG(0) == END(0)) {
6068  if (!ptr) {
6069  rb_ary_push(result, str_new_empty(str));
6070  break;
6071  }
6072  else if (last_null == 1) {
6073  rb_ary_push(result, rb_str_subseq(str, beg,
6074  rb_enc_fast_mbclen(ptr+beg,
6075  ptr+len,
6076  enc)));
6077  beg = start;
6078  }
6079  else {
6080  if (ptr+start == ptr+len)
6081  start++;
6082  else
6083  start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
6084  last_null = 1;
6085  continue;
6086  }
6087  }
6088  else {
6089  rb_ary_push(result, rb_str_subseq(str, beg, end-beg));
6090  beg = start = END(0);
6091  }
6092  last_null = 0;
6093 
6094  for (idx=1; idx < regs->num_regs; idx++) {
6095  if (BEG(idx) == -1) continue;
6096  if (BEG(idx) == END(idx))
6097  tmp = str_new_empty(str);
6098  else
6099  tmp = rb_str_subseq(str, BEG(idx), END(idx)-BEG(idx));
6100  rb_ary_push(result, tmp);
6101  }
6102  if (!NIL_P(limit) && lim <= ++i) break;
6103  }
6104  }
6105  if (RSTRING_LEN(str) > 0 && (!NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
6106  if (RSTRING_LEN(str) == beg)
6107  tmp = str_new_empty(str);
6108  else
6109  tmp = rb_str_subseq(str, beg, RSTRING_LEN(str)-beg);
6110  rb_ary_push(result, tmp);
6111  }
6112  if (NIL_P(limit) && lim == 0) {
6113  long len;
6114  while ((len = RARRAY_LEN(result)) > 0 &&
6115  (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
6116  rb_ary_pop(result);
6117  }
6118 
6119  return result;
6120 }
6121 
6122 VALUE
6123 rb_str_split(VALUE str, const char *sep0)
6124 {
6125  VALUE sep;
6126 
6127  StringValue(str);
6128  sep = rb_str_new2(sep0);
6129  return rb_str_split_m(1, &sep, str);
6130 }
6131 
6132 
6133 static VALUE
6134 rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
6135 {
6136  rb_encoding *enc;
6137  VALUE rs;
6138  unsigned int newline;
6139  const char *p, *pend, *s, *ptr;
6140  long len, rslen;
6141  VALUE line;
6142  int n;
6143  VALUE orig = str;
6145 
6146  if (argc == 0) {
6147  rs = rb_rs;
6148  }
6149  else {
6150  rb_scan_args(argc, argv, "01", &rs);
6151  }
6152 
6153  if (rb_block_given_p()) {
6154  if (wantarray) {
6155 #if 0 /* next major */
6156  rb_warn("given block not used");
6157  ary = rb_ary_new();
6158 #else
6159  rb_warning("passing a block to String#lines is deprecated");
6160  wantarray = 0;
6161 #endif
6162  }
6163  }
6164  else {
6165  if (wantarray)
6166  ary = rb_ary_new();
6167  else
6168  RETURN_ENUMERATOR(str, argc, argv);
6169  }
6170 
6171  if (NIL_P(rs)) {
6172  if (wantarray) {
6173  rb_ary_push(ary, str);
6174  return ary;
6175  }
6176  else {
6177  rb_yield(str);
6178  return orig;
6179  }
6180  }
6181  str = rb_str_new4(str);
6182  ptr = p = s = RSTRING_PTR(str);
6183  pend = p + RSTRING_LEN(str);
6184  len = RSTRING_LEN(str);
6185  StringValue(rs);
6186  if (rs == rb_default_rs) {
6187  enc = rb_enc_get(str);
6188  while (p < pend) {
6189  char *p0;
6190 
6191  p = memchr(p, '\n', pend - p);
6192  if (!p) break;
6193  p0 = rb_enc_left_char_head(s, p, pend, enc);
6194  if (!rb_enc_is_newline(p0, pend, enc)) {
6195  p++;
6196  continue;
6197  }
6198  p = p0 + rb_enc_mbclen(p0, pend, enc);
6199  line = rb_str_subseq(str, s - ptr, p - s);
6200  if (wantarray)
6201  rb_ary_push(ary, line);
6202  else
6203  rb_yield(line);
6204  str_mod_check(str, ptr, len);
6205  s = p;
6206  }
6207  goto finish;
6208  }
6209 
6210  enc = rb_enc_check(str, rs);
6211  rslen = RSTRING_LEN(rs);
6212  if (rslen == 0) {
6213  newline = '\n';
6214  }
6215  else {
6216  newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
6217  }
6218 
6219  while (p < pend) {
6220  unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
6221 
6222  again:
6223  if (rslen == 0 && c == newline) {
6224  p += n;
6225  if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
6226  goto again;
6227  }
6228  while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
6229  p += n;
6230  }
6231  p -= n;
6232  }
6233  if (c == newline &&
6234  (rslen <= 1 ||
6235  (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
6236  const char *pp = p + (rslen ? rslen : n);
6237  line = rb_str_subseq(str, s - ptr, pp - s);
6238  if (wantarray)
6239  rb_ary_push(ary, line);
6240  else
6241  rb_yield(line);
6242  str_mod_check(str, ptr, len);
6243  s = pp;
6244  }
6245  p += n;
6246  }
6247 
6248  finish:
6249  if (s != pend) {
6250  line = rb_str_subseq(str, s - ptr, pend - s);
6251  if (wantarray)
6252  rb_ary_push(ary, line);
6253  else
6254  rb_yield(line);
6255  RB_GC_GUARD(str);
6256  }
6257 
6258  if (wantarray)
6259  return ary;
6260  else
6261  return orig;
6262 }
6263 
6264 /*
6265  * call-seq:
6266  * str.each_line(separator=$/) {|substr| block } -> str
6267  * str.each_line(separator=$/) -> an_enumerator
6268  *
6269  * Splits <i>str</i> using the supplied parameter as the record
6270  * separator (<code>$/</code> by default), passing each substring in
6271  * turn to the supplied block. If a zero-length record separator is
6272  * supplied, the string is split into paragraphs delimited by
6273  * multiple successive newlines.
6274  *
6275  * If no block is given, an enumerator is returned instead.
6276  *
6277  * print "Example one\n"
6278  * "hello\nworld".each_line {|s| p s}
6279  * print "Example two\n"
6280  * "hello\nworld".each_line('l') {|s| p s}
6281  * print "Example three\n"
6282  * "hello\n\n\nworld".each_line('') {|s| p s}
6283  *
6284  * <em>produces:</em>
6285  *
6286  * Example one
6287  * "hello\n"
6288  * "world"
6289  * Example two
6290  * "hel"
6291  * "l"
6292  * "o\nworl"
6293  * "d"
6294  * Example three
6295  * "hello\n\n\n"
6296  * "world"
6297  */
6298 
6299 static VALUE
6301 {
6302  return rb_str_enumerate_lines(argc, argv, str, 0);
6303 }
6304 
6305 /*
6306  * call-seq:
6307  * str.lines(separator=$/) -> an_array
6308  *
6309  * Returns an array of lines in <i>str</i> split using the supplied
6310  * record separator (<code>$/</code> by default). This is a
6311  * shorthand for <code>str.each_line(separator).to_a</code>.
6312  *
6313  * If a block is given, which is a deprecated form, works the same as
6314  * <code>each_line</code>.
6315  */
6316 
6317 static VALUE
6319 {
6320  return rb_str_enumerate_lines(argc, argv, str, 1);
6321 }
6322 
6323 static VALUE
6325 {
6326  return LONG2FIX(RSTRING_LEN(str));
6327 }
6328 
6329 static VALUE
6330 rb_str_enumerate_bytes(VALUE str, int wantarray)
6331 {
6332  long i;
6334 
6335  if (rb_block_given_p()) {
6336  if (wantarray) {
6337 #if 0 /* next major */
6338  rb_warn("given block not used");
6339  ary = rb_ary_new();
6340 #else
6341  rb_warning("passing a block to String#bytes is deprecated");
6342  wantarray = 0;
6343 #endif
6344  }
6345  }
6346  else {
6347  if (wantarray)
6348  ary = rb_ary_new2(RSTRING_LEN(str));
6349  else
6351  }
6352 
6353  for (i=0; i<RSTRING_LEN(str); i++) {
6354  if (wantarray)
6355  rb_ary_push(ary, INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6356  else
6357  rb_yield(INT2FIX(RSTRING_PTR(str)[i] & 0xff));
6358  }
6359  if (wantarray)
6360  return ary;
6361  else
6362  return str;
6363 }
6364 
6365 /*
6366  * call-seq:
6367  * str.each_byte {|fixnum| block } -> str
6368  * str.each_byte -> an_enumerator
6369  *
6370  * Passes each byte in <i>str</i> to the given block, or returns an
6371  * enumerator if no block is given.
6372  *
6373  * "hello".each_byte {|c| print c, ' ' }
6374  *
6375  * <em>produces:</em>
6376  *
6377  * 104 101 108 108 111
6378  */
6379 
6380 static VALUE
6382 {
6383  return rb_str_enumerate_bytes(str, 0);
6384 }
6385 
6386 /*
6387  * call-seq:
6388  * str.bytes -> an_array
6389  *
6390  * Returns an array of bytes in <i>str</i>. This is a shorthand for
6391  * <code>str.each_byte.to_a</code>.
6392  *
6393  * If a block is given, which is a deprecated form, works the same as
6394  * <code>each_byte</code>.
6395  */
6396 
6397 static VALUE
6399 {
6400  return rb_str_enumerate_bytes(str, 1);
6401 }
6402 
6403 static VALUE
6405 {
6406  long len = RSTRING_LEN(str);
6407  if (!single_byte_optimizable(str)) {
6408  const char *ptr = RSTRING_PTR(str);
6409  rb_encoding *enc = rb_enc_get(str);
6410  const char *end_ptr = ptr + len;
6411  for (len = 0; ptr < end_ptr; ++len) {
6412  ptr += rb_enc_mbclen(ptr, end_ptr, enc);
6413  }
6414  }
6415  return LONG2FIX(len);
6416 }
6417 
6418 static VALUE
6419 rb_str_enumerate_chars(VALUE str, int wantarray)
6420 {
6421  VALUE orig = str;
6422  VALUE substr;
6423  long i, len, n;
6424  const char *ptr;
6425  rb_encoding *enc;
6427 
6428  if (rb_block_given_p()) {
6429  if (wantarray) {
6430 #if 0 /* next major */
6431  rb_warn("given block not used");
6432  ary = rb_ary_new();
6433 #else
6434  rb_warning("passing a block to String#chars is deprecated");
6435  wantarray = 0;
6436 #endif
6437  }
6438  }
6439  else {
6440  if (wantarray)
6441  ary = rb_ary_new();
6442  else
6444  }
6445 
6446  str = rb_str_new4(str);
6447  ptr = RSTRING_PTR(str);
6448  len = RSTRING_LEN(str);
6449  enc = rb_enc_get(str);
6450  switch (ENC_CODERANGE(str)) {
6451  case ENC_CODERANGE_VALID:
6452  case ENC_CODERANGE_7BIT:
6453  for (i = 0; i < len; i += n) {
6454  n = rb_enc_fast_mbclen(ptr + i, ptr + len, enc);
6455  substr = rb_str_subseq(str, i, n);
6456  if (wantarray)
6457  rb_ary_push(ary, substr);
6458  else
6459  rb_yield(substr);
6460  }
6461  break;
6462  default:
6463  for (i = 0; i < len; i += n) {
6464  n = rb_enc_mbclen(ptr + i, ptr + len, enc);
6465  substr = rb_str_subseq(str, i, n);
6466  if (wantarray)
6467  rb_ary_push(ary, substr);
6468  else
6469  rb_yield(substr);
6470  }
6471  }
6472  RB_GC_GUARD(str);
6473  if (wantarray)
6474  return ary;
6475  else
6476  return orig;
6477 }
6478 
6479 /*
6480  * call-seq:
6481  * str.each_char {|cstr| block } -> str
6482  * str.each_char -> an_enumerator
6483  *
6484  * Passes each character in <i>str</i> to the given block, or returns
6485  * an enumerator if no block is given.
6486  *
6487  * "hello".each_char {|c| print c, ' ' }
6488  *
6489  * <em>produces:</em>
6490  *
6491  * h e l l o
6492  */
6493 
6494 static VALUE
6496 {
6497  return rb_str_enumerate_chars(str, 0);
6498 }
6499 
6500 /*
6501  * call-seq:
6502  * str.chars -> an_array
6503  *
6504  * Returns an array of characters in <i>str</i>. This is a shorthand
6505  * for <code>str.each_char.to_a</code>.
6506  *
6507  * If a block is given, which is a deprecated form, works the same as
6508  * <code>each_char</code>.
6509  */
6510 
6511 static VALUE
6513 {
6514  return rb_str_enumerate_chars(str, 1);
6515 }
6516 
6517 
6518 static VALUE
6520 {
6521  VALUE orig = str;
6522  int n;
6523  unsigned int c;
6524  const char *ptr, *end;
6525  rb_encoding *enc;
6527 
6528  if (single_byte_optimizable(str))
6529  return rb_str_enumerate_bytes(str, wantarray);
6530 
6531  if (rb_block_given_p()) {
6532  if (wantarray) {
6533 #if 0 /* next major */
6534  rb_warn("given block not used");
6535  ary = rb_ary_new();
6536 #else
6537  rb_warning("passing a block to String#codepoints is deprecated");
6538  wantarray = 0;
6539 #endif
6540  }
6541  }
6542  else {
6543  if (wantarray)
6544  ary = rb_ary_new();
6545  else
6547  }
6548 
6549  str = rb_str_new4(str);
6550  ptr = RSTRING_PTR(str);
6551  end = RSTRING_END(str);
6552  enc = STR_ENC_GET(str);
6553  while (ptr < end) {
6554  c = rb_enc_codepoint_len(ptr, end, &n, enc);
6555  if (wantarray)
6556  rb_ary_push(ary, UINT2NUM(c));
6557  else
6558  rb_yield(UINT2NUM(c));
6559  ptr += n;
6560  }
6561  RB_GC_GUARD(str);
6562  if (wantarray)
6563  return ary;
6564  else
6565  return orig;
6566 }
6567 
6568 /*
6569  * call-seq:
6570  * str.each_codepoint {|integer| block } -> str
6571  * str.each_codepoint -> an_enumerator
6572  *
6573  * Passes the <code>Integer</code> ordinal of each character in <i>str</i>,
6574  * also known as a <i>codepoint</i> when applied to Unicode strings to the
6575  * given block.
6576  *
6577  * If no block is given, an enumerator is returned instead.
6578  *
6579  * "hello\u0639".each_codepoint {|c| print c, ' ' }
6580  *
6581  * <em>produces:</em>
6582  *
6583  * 104 101 108 108 111 1593
6584  */
6585 
6586 static VALUE
6588 {
6589  return rb_str_enumerate_codepoints(str, 0);
6590 }
6591 
6592 /*
6593  * call-seq:
6594  * str.codepoints -> an_array
6595  *
6596  * Returns an array of the <code>Integer</code> ordinals of the
6597  * characters in <i>str</i>. This is a shorthand for
6598  * <code>str.each_codepoint.to_a</code>.
6599  *
6600  * If a block is given, which is a deprecated form, works the same as
6601  * <code>each_codepoint</code>.
6602  */
6603 
6604 static VALUE
6606 {
6607  return rb_str_enumerate_codepoints(str, 1);
6608 }
6609 
6610 
6611 static long
6613 {
6614  rb_encoding *enc = STR_ENC_GET(str);
6615  const char *p, *p2, *beg, *end;
6616 
6617  beg = RSTRING_PTR(str);
6618  end = beg + RSTRING_LEN(str);
6619  if (beg > end) return 0;
6620  p = rb_enc_prev_char(beg, end, end, enc);
6621  if (!p) return 0;
6622  if (p > beg && rb_enc_ascget(p, end, 0, enc) == '\n') {
6623  p2 = rb_enc_prev_char(beg, p, end, enc);
6624  if (p2 && rb_enc_ascget(p2, end, 0, enc) == '\r') p = p2;
6625  }
6626  return p - beg;
6627 }
6628 
6629 /*
6630  * call-seq:
6631  * str.chop! -> str or nil
6632  *
6633  * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
6634  * or <code>nil</code> if <i>str</i> is the empty string. See also
6635  * <code>String#chomp!</code>.
6636  */
6637 
6638 static VALUE
6640 {
6641  str_modify_keep_cr(str);
6642  if (RSTRING_LEN(str) > 0) {
6643  long len;
6644  len = chopped_length(str);
6645  STR_SET_LEN(str, len);
6646  RSTRING_PTR(str)[len] = '\0';
6647  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6648  ENC_CODERANGE_CLEAR(str);
6649  }
6650  return str;
6651  }
6652  return Qnil;
6653 }
6654 
6655 
6656 /*
6657  * call-seq:
6658  * str.chop -> new_str
6659  *
6660  * Returns a new <code>String</code> with the last character removed. If the
6661  * string ends with <code>\r\n</code>, both characters are removed. Applying
6662  * <code>chop</code> to an empty string returns an empty
6663  * string. <code>String#chomp</code> is often a safer alternative, as it leaves
6664  * the string unchanged if it doesn't end in a record separator.
6665  *
6666  * "string\r\n".chop #=> "string"
6667  * "string\n\r".chop #=> "string\n"
6668  * "string\n".chop #=> "string"
6669  * "string".chop #=> "strin"
6670  * "x".chop.chop #=> ""
6671  */
6672 
6673 static VALUE
6675 {
6676  return rb_str_subseq(str, 0, chopped_length(str));
6677 }
6678 
6679 
6680 /*
6681  * call-seq:
6682  * str.chomp!(separator=$/) -> str or nil
6683  *
6684  * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
6685  * returning <i>str</i>, or <code>nil</code> if no modifications were made.
6686  */
6687 
6688 static VALUE
6690 {
6691  rb_encoding *enc;
6692  VALUE rs;
6693  int newline;
6694  char *p, *pp, *e;
6695  long len, rslen;
6696 
6697  str_modify_keep_cr(str);
6698  len = RSTRING_LEN(str);
6699  if (len == 0) return Qnil;
6700  p = RSTRING_PTR(str);
6701  e = p + len;
6702  if (argc == 0) {
6703  rs = rb_rs;
6704  if (rs == rb_default_rs) {
6705  smart_chomp:
6706  enc = rb_enc_get(str);
6707  if (rb_enc_mbminlen(enc) > 1) {
6708  pp = rb_enc_left_char_head(p, e-rb_enc_mbminlen(enc), e, enc);
6709  if (rb_enc_is_newline(pp, e, enc)) {
6710  e = pp;
6711  }
6712  pp = e - rb_enc_mbminlen(enc);
6713  if (pp >= p) {
6714  pp = rb_enc_left_char_head(p, pp, e, enc);
6715  if (rb_enc_ascget(pp, e, 0, enc) == '\r') {
6716  e = pp;
6717  }
6718  }
6719  if (e == RSTRING_END(str)) {
6720  return Qnil;
6721  }
6722  len = e - RSTRING_PTR(str);
6723  STR_SET_LEN(str, len);
6724  }
6725  else {
6726  if (RSTRING_PTR(str)[len-1] == '\n') {
6727  STR_DEC_LEN(str);
6728  if (RSTRING_LEN(str) > 0 &&
6729  RSTRING_PTR(str)[RSTRING_LEN(str)-1] == '\r') {
6730  STR_DEC_LEN(str);
6731  }
6732  }
6733  else if (RSTRING_PTR(str)[len-1] == '\r') {
6734  STR_DEC_LEN(str);
6735  }
6736  else {
6737  return Qnil;
6738  }
6739  }
6740  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6741  return str;
6742  }
6743  }
6744  else {
6745  rb_scan_args(argc, argv, "01", &rs);
6746  }
6747  if (NIL_P(rs)) return Qnil;
6748  StringValue(rs);
6749  rslen = RSTRING_LEN(rs);
6750  if (rslen == 0) {
6751  while (len>0 && p[len-1] == '\n') {
6752  len--;
6753  if (len>0 && p[len-1] == '\r')
6754  len--;
6755  }
6756  if (len < RSTRING_LEN(str)) {
6757  STR_SET_LEN(str, len);
6758  RSTRING_PTR(str)[len] = '\0';
6759  return str;
6760  }
6761  return Qnil;
6762  }
6763  if (rslen > len) return Qnil;
6764  newline = RSTRING_PTR(rs)[rslen-1];
6765  if (rslen == 1 && newline == '\n')
6766  goto smart_chomp;
6767 
6768  enc = rb_enc_check(str, rs);
6769  if (is_broken_string(rs)) {
6770  return Qnil;
6771  }
6772  pp = e - rslen;
6773  if (p[len-1] == newline &&
6774  (rslen <= 1 ||
6775  memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
6776  if (rb_enc_left_char_head(p, pp, e, enc) != pp)
6777  return Qnil;
6778  if (ENC_CODERANGE(str) != ENC_CODERANGE_7BIT) {
6779  ENC_CODERANGE_CLEAR(str);
6780  }
6781  STR_SET_LEN(str, RSTRING_LEN(str) - rslen);
6782  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6783  return str;
6784  }
6785  return Qnil;
6786 }
6787 
6788 
6789 /*
6790  * call-seq:
6791  * str.chomp(separator=$/) -> new_str
6792  *
6793  * Returns a new <code>String</code> with the given record separator removed
6794  * from the end of <i>str</i> (if present). If <code>$/</code> has not been
6795  * changed from the default Ruby record separator, then <code>chomp</code> also
6796  * removes carriage return characters (that is it will remove <code>\n</code>,
6797  * <code>\r</code>, and <code>\r\n</code>).
6798  *
6799  * "hello".chomp #=> "hello"
6800  * "hello\n".chomp #=> "hello"
6801  * "hello\r\n".chomp #=> "hello"
6802  * "hello\n\r".chomp #=> "hello\n"
6803  * "hello\r".chomp #=> "hello"
6804  * "hello \n there".chomp #=> "hello \n there"
6805  * "hello".chomp("llo") #=> "he"
6806  */
6807 
6808 static VALUE
6810 {
6811  str = rb_str_dup(str);
6812  rb_str_chomp_bang(argc, argv, str);
6813  return str;
6814 }
6815 
6816 /*
6817  * call-seq:
6818  * str.lstrip! -> self or nil
6819  *
6820  * Removes leading whitespace from <i>str</i>, returning <code>nil</code> if no
6821  * change was made. See also <code>String#rstrip!</code> and
6822  * <code>String#strip!</code>.
6823  *
6824  * " hello ".lstrip #=> "hello "
6825  * "hello".lstrip! #=> nil
6826  */
6827 
6828 static VALUE
6830 {
6831  rb_encoding *enc;
6832  char *s, *t, *e;
6833 
6834  str_modify_keep_cr(str);
6835  enc = STR_ENC_GET(str);
6836  s = RSTRING_PTR(str);
6837  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6838  e = t = RSTRING_END(str);
6839  /* remove spaces at head */
6840  while (s < e) {
6841  int n;
6842  unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
6843 
6844  if (!rb_isspace(cc)) break;
6845  s += n;
6846  }
6847 
6848  if (s > RSTRING_PTR(str)) {
6849  STR_SET_LEN(str, t-s);
6850  memmove(RSTRING_PTR(str), s, RSTRING_LEN(str));
6851  RSTRING_PTR(str)[RSTRING_LEN(str)] = '\0';
6852  return str;
6853  }
6854  return Qnil;
6855 }
6856 
6857 
6858 /*
6859  * call-seq:
6860  * str.lstrip -> new_str
6861  *
6862  * Returns a copy of <i>str</i> with leading whitespace removed. See also
6863  * <code>String#rstrip</code> and <code>String#strip</code>.
6864  *
6865  * " hello ".lstrip #=> "hello "
6866  * "hello".lstrip #=> "hello"
6867  */
6868 
6869 static VALUE
6871 {
6872  str = rb_str_dup(str);
6873  rb_str_lstrip_bang(str);
6874  return str;
6875 }
6876 
6877 
6878 /*
6879  * call-seq:
6880  * str.rstrip! -> self or nil
6881  *
6882  * Removes trailing whitespace from <i>str</i>, returning <code>nil</code> if
6883  * no change was made. See also <code>String#lstrip!</code> and
6884  * <code>String#strip!</code>.
6885  *
6886  * " hello ".rstrip #=> " hello"
6887  * "hello".rstrip! #=> nil
6888  */
6889 
6890 static VALUE
6892 {
6893  rb_encoding *enc;
6894  char *s, *t, *e;
6895 
6896  str_modify_keep_cr(str);
6897  enc = STR_ENC_GET(str);
6899  s = RSTRING_PTR(str);
6900  if (!s || RSTRING_LEN(str) == 0) return Qnil;
6901  t = e = RSTRING_END(str);
6902 
6903  /* remove trailing spaces or '\0's */
6904  if (single_byte_optimizable(str)) {
6905  unsigned char c;
6906  while (s < t && ((c = *(t-1)) == '\0' || ascii_isspace(c))) t--;
6907  }
6908  else {
6909  char *tp;
6910 
6911  while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
6912  unsigned int c = rb_enc_codepoint(tp, e, enc);
6913  if (c && !rb_isspace(c)) break;
6914  t = tp;
6915  }
6916  }
6917  if (t < e) {
6918  long len = t-RSTRING_PTR(str);
6919 
6920  STR_SET_LEN(str, len);
6921  RSTRING_PTR(str)[len] = '\0';
6922  return str;
6923  }
6924  return Qnil;
6925 }
6926 
6927 
6928 /*
6929  * call-seq:
6930  * str.rstrip -> new_str
6931  *
6932  * Returns a copy of <i>str</i> with trailing whitespace removed. See also
6933  * <code>String#lstrip</code> and <code>String#strip</code>.
6934  *
6935  * " hello ".rstrip #=> " hello"
6936  * "hello".rstrip #=> "hello"
6937  */
6938 
6939 static VALUE
6941 {
6942  str = rb_str_dup(str);
6943  rb_str_rstrip_bang(str);
6944  return str;
6945 }
6946 
6947 
6948 /*
6949  * call-seq:
6950  * str.strip! -> str or nil
6951  *
6952  * Removes leading and trailing whitespace from <i>str</i>. Returns
6953  * <code>nil</code> if <i>str</i> was not altered.
6954  */
6955 
6956 static VALUE
6958 {
6959  VALUE l = rb_str_lstrip_bang(str);
6960  VALUE r = rb_str_rstrip_bang(str);
6961 
6962  if (NIL_P(l) && NIL_P(r)) return Qnil;
6963  return str;
6964 }
6965 
6966 
6967 /*
6968  * call-seq:
6969  * str.strip -> new_str
6970  *
6971  * Returns a copy of <i>str</i> with leading and trailing whitespace removed.
6972  *
6973  * " hello ".strip #=> "hello"
6974  * "\tgoodbye\r\n".strip #=> "goodbye"
6975  */
6976 
6977 static VALUE
6979 {
6980  str = rb_str_dup(str);
6981  rb_str_strip_bang(str);
6982  return str;
6983 }
6984 
6985 static VALUE
6986 scan_once(VALUE str, VALUE pat, long *start)
6987 {
6988  VALUE result, match;
6989  struct re_registers *regs;
6990  int i;
6991 
6992  if (rb_reg_search(pat, str, *start, 0) >= 0) {
6993  match = rb_backref_get();
6994  regs = RMATCH_REGS(match);
6995  if (BEG(0) == END(0)) {
6996  rb_encoding *enc = STR_ENC_GET(str);
6997  /*
6998  * Always consume at least one character of the input string
6999  */
7000  if (RSTRING_LEN(str) > END(0))
7001  *start = END(0)+rb_enc_fast_mbclen(RSTRING_PTR(str)+END(0),
7002  RSTRING_END(str), enc);
7003  else
7004  *start = END(0)+1;
7005  }
7006  else {
7007  *start = END(0);
7008  }
7009  if (regs->num_regs == 1) {
7010  return rb_reg_nth_match(0, match);
7011  }
7012  result = rb_ary_new2(regs->num_regs);
7013  for (i=1; i < regs->num_regs; i++) {
7014  rb_ary_push(result, rb_reg_nth_match(i, match));
7015  }
7016 
7017  return result;
7018  }
7019  return Qnil;
7020 }
7021 
7022 
7023 /*
7024  * call-seq:
7025  * str.scan(pattern) -> array
7026  * str.scan(pattern) {|match, ...| block } -> str
7027  *
7028  * Both forms iterate through <i>str</i>, matching the pattern (which may be a
7029  * <code>Regexp</code> or a <code>String</code>). For each match, a result is
7030  * generated and either added to the result array or passed to the block. If
7031  * the pattern contains no groups, each individual result consists of the
7032  * matched string, <code>$&</code>. If the pattern contains groups, each
7033  * individual result is itself an array containing one entry per group.
7034  *
7035  * a = "cruel world"
7036  * a.scan(/\w+/) #=> ["cruel", "world"]
7037  * a.scan(/.../) #=> ["cru", "el ", "wor"]
7038  * a.scan(/(...)/) #=> [["cru"], ["el "], ["wor"]]
7039  * a.scan(/(..)(..)/) #=> [["cr", "ue"], ["l ", "wo"]]
7040  *
7041  * And the block form:
7042  *
7043  * a.scan(/\w+/) {|w| print "<<#{w}>> " }
7044  * print "\n"
7045  * a.scan(/(.)(.)/) {|x,y| print y, x }
7046  * print "\n"
7047  *
7048  * <em>produces:</em>
7049  *
7050  * <<cruel>> <<world>>
7051  * rceu lowlr
7052  */
7053 
7054 static VALUE
7056 {
7057  VALUE result;
7058  long start = 0;
7059  long last = -1, prev = 0;
7060  char *p = RSTRING_PTR(str); long len = RSTRING_LEN(str);
7061 
7062  pat = get_pat(pat, 1);
7063  if (!rb_block_given_p()) {
7064  VALUE ary = rb_ary_new();
7065 
7066  while (!NIL_P(result = scan_once(str, pat, &start))) {
7067  last = prev;
7068  prev = start;
7069  rb_ary_push(ary, result);
7070  }
7071  if (last >= 0) rb_reg_search(pat, str, last, 0);
7072  return ary;
7073  }
7074 
7075  while (!NIL_P(result = scan_once(str, pat, &start))) {
7076  last = prev;
7077  prev = start;
7078  rb_yield(result);
7079  str_mod_check(str, p, len);
7080  }
7081  if (last >= 0) rb_reg_search(pat, str, last, 0);
7082  return str;
7083 }
7084 
7085 
7086 /*
7087  * call-seq:
7088  * str.hex -> integer
7089  *
7090  * Treats leading characters from <i>str</i> as a string of hexadecimal digits
7091  * (with an optional sign and an optional <code>0x</code>) and returns the
7092  * corresponding number. Zero is returned on error.
7093  *
7094  * "0x0a".hex #=> 10
7095  * "-1234".hex #=> -4660
7096  * "0".hex #=> 0
7097  * "wombat".hex #=> 0
7098  */
7099 
7100 static VALUE
7102 {
7103  return rb_str_to_inum(str, 16, FALSE);
7104 }
7105 
7106 
7107 /*
7108  * call-seq:
7109  * str.oct -> integer
7110  *
7111  * Treats leading characters of <i>str</i> as a string of octal digits (with an
7112  * optional sign) and returns the corresponding number. Returns 0 if the
7113  * conversion fails.
7114  *
7115  * "123".oct #=> 83
7116  * "-377".oct #=> -255
7117  * "bad".oct #=> 0
7118  * "0377bad".oct #=> 255
7119  */
7120 
7121 static VALUE
7123 {
7124  return rb_str_to_inum(str, -8, FALSE);
7125 }
7126 
7127 
7128 /*
7129  * call-seq:
7130  * str.crypt(salt_str) -> new_str
7131  *
7132  * Applies a one-way cryptographic hash to <i>str</i> by invoking the
7133  * standard library function <code>crypt(3)</code> with the given
7134  * salt string. While the format and the result are system and
7135  * implementation dependent, using a salt matching the regular
7136  * expression <code>\A[a-zA-Z0-9./]{2}</code> should be valid and
7137  * safe on any platform, in which only the first two characters are
7138  * significant.
7139  *
7140  * This method is for use in system specific scripts, so if you want
7141  * a cross-platform hash function consider using Digest or OpenSSL
7142  * instead.
7143  */
7144 
7145 static VALUE
7147 {
7148  extern char *crypt(const char *, const char *);
7149  VALUE result;
7150  const char *s, *saltp;
7151  char *res;
7152 #ifdef BROKEN_CRYPT
7153  char salt_8bit_clean[3];
7154 #endif
7155 
7156  StringValue(salt);
7157  if (RSTRING_LEN(salt) < 2)
7158  rb_raise(rb_eArgError, "salt too short (need >=2 bytes)");
7159 
7160  s = RSTRING_PTR(str);
7161  if (!s) s = "";
7162  saltp = RSTRING_PTR(salt);
7163 #ifdef BROKEN_CRYPT
7164  if (!ISASCII((unsigned char)saltp[0]) || !ISASCII((unsigned char)saltp[1])) {
7165  salt_8bit_clean[0] = saltp[0] & 0x7f;
7166  salt_8bit_clean[1] = saltp[1] & 0x7f;
7167  salt_8bit_clean[2] = '\0';
7168  saltp = salt_8bit_clean;
7169  }
7170 #endif
7171  res = crypt(s, saltp);
7172  if (!res) {
7173  rb_sys_fail("crypt");
7174  }
7175  result = rb_str_new2(res);
7176  OBJ_INFECT(result, str);
7177  OBJ_INFECT(result, salt);
7178  return result;
7179 }
7180 
7181 
7182 /*
7183  * call-seq:
7184  * str.intern -> symbol
7185  * str.to_sym -> symbol
7186  *
7187  * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
7188  * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
7189  *
7190  * "Koala".intern #=> :Koala
7191  * s = 'cat'.to_sym #=> :cat
7192  * s == :cat #=> true
7193  * s = '@cat'.to_sym #=> :@cat
7194  * s == :@cat #=> true
7195  *
7196  * This can also be used to create symbols that cannot be represented using the
7197  * <code>:xxx</code> notation.
7198  *
7199  * 'cat and dog'.to_sym #=> :"cat and dog"
7200  */
7201 
7202 VALUE
7204 {
7205  VALUE str = RB_GC_GUARD(s);
7206  ID id;
7207 
7208  id = rb_intern_str(str);
7209  return ID2SYM(id);
7210 }
7211 
7212 
7213 /*
7214  * call-seq:
7215  * str.ord -> integer
7216  *
7217  * Return the <code>Integer</code> ordinal of a one-character string.
7218  *
7219  * "a".ord #=> 97
7220  */
7221 
7222 VALUE
7224 {
7225  unsigned int c;
7226 
7228  return UINT2NUM(c);
7229 }
7230 /*
7231  * call-seq:
7232  * str.sum(n=16) -> integer
7233  *
7234  * Returns a basic <em>n</em>-bit checksum of the characters in <i>str</i>,
7235  * where <em>n</em> is the optional <code>Fixnum</code> parameter, defaulting
7236  * to 16. The result is simply the sum of the binary value of each character in
7237  * <i>str</i> modulo <code>2**n - 1</code>. This is not a particularly good
7238  * checksum.
7239  */
7240 
7241 static VALUE
7243 {
7244  VALUE vbits;
7245  int bits;
7246  char *ptr, *p, *pend;
7247  long len;
7248  VALUE sum = INT2FIX(0);
7249  unsigned long sum0 = 0;
7250 
7251  if (argc == 0) {
7252  bits = 16;
7253  }
7254  else {
7255  rb_scan_args(argc, argv, "01", &vbits);
7256  bits = NUM2INT(vbits);
7257  }
7258  ptr = p = RSTRING_PTR(str);
7259  len = RSTRING_LEN(str);
7260  pend = p + len;
7261 
7262  while (p < pend) {
7263  if (FIXNUM_MAX - UCHAR_MAX < sum0) {
7264  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7265  str_mod_check(str, ptr, len);
7266  sum0 = 0;
7267  }
7268  sum0 += (unsigned char)*p;
7269  p++;
7270  }
7271 
7272  if (bits == 0) {
7273  if (sum0) {
7274  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7275  }
7276  }
7277  else {
7278  if (sum == INT2FIX(0)) {
7279  if (bits < (int)sizeof(long)*CHAR_BIT) {
7280  sum0 &= (((unsigned long)1)<<bits)-1;
7281  }
7282  sum = LONG2FIX(sum0);
7283  }
7284  else {
7285  VALUE mod;
7286 
7287  if (sum0) {
7288  sum = rb_funcall(sum, '+', 1, LONG2FIX(sum0));
7289  }
7290 
7291  mod = rb_funcall(INT2FIX(1), rb_intern("<<"), 1, INT2FIX(bits));
7292  mod = rb_funcall(mod, '-', 1, INT2FIX(1));
7293  sum = rb_funcall(sum, '&', 1, mod);
7294  }
7295  }
7296  return sum;
7297 }
7298 
7299 static VALUE
7300 rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
7301 {
7302  rb_encoding *enc;
7303  VALUE w;
7304  long width, len, flen = 1, fclen = 1;
7305  VALUE res;
7306  char *p;
7307  const char *f = " ";
7308  long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
7309  volatile VALUE pad;
7310  int singlebyte = 1, cr;
7311 
7312  rb_scan_args(argc, argv, "11", &w, &pad);
7313  enc = STR_ENC_GET(str);
7314  width = NUM2LONG(w);
7315  if (argc == 2) {
7316  StringValue(pad);
7317  enc = rb_enc_check(str, pad);
7318  f = RSTRING_PTR(pad);
7319  flen = RSTRING_LEN(pad);
7320  fclen = str_strlen(pad, enc);
7321  singlebyte = single_byte_optimizable(pad);
7322  if (flen == 0 || fclen == 0) {
7323  rb_raise(rb_eArgError, "zero width padding");
7324  }
7325  }
7326  len = str_strlen(str, enc);
7327  if (width < 0 || len >= width) return rb_str_dup(str);
7328  n = width - len;
7329  llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n/2);
7330  rlen = n - llen;
7331  cr = ENC_CODERANGE(str);
7332  if (flen > 1) {
7333  llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
7334  rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
7335  }
7336  size = RSTRING_LEN(str);
7337  if ((len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
7338  (len *= flen) >= LONG_MAX - llen2 - rlen2 ||
7339  (len += llen2 + rlen2) >= LONG_MAX - size) {
7340  rb_raise(rb_eArgError, "argument too big");
7341  }
7342  len += size;
7343  res = rb_str_new5(str, 0, len);
7344  p = RSTRING_PTR(res);
7345  if (flen <= 1) {
7346  memset(p, *f, llen);
7347  p += llen;
7348  }
7349  else {
7350  while (llen >= fclen) {
7351  memcpy(p,f,flen);
7352  p += flen;
7353  llen -= fclen;
7354  }
7355  if (llen > 0) {
7356  memcpy(p, f, llen2);
7357  p += llen2;
7358  }
7359  }
7360  memcpy(p, RSTRING_PTR(str), size);
7361  p += size;
7362  if (flen <= 1) {
7363  memset(p, *f, rlen);
7364  p += rlen;
7365  }
7366  else {
7367  while (rlen >= fclen) {
7368  memcpy(p,f,flen);
7369  p += flen;
7370  rlen -= fclen;
7371  }
7372  if (rlen > 0) {
7373  memcpy(p, f, rlen2);
7374  p += rlen2;
7375  }
7376  }
7377  *p = '\0';
7378  STR_SET_LEN(res, p-RSTRING_PTR(res));
7379  OBJ_INFECT(res, str);
7380  if (!NIL_P(pad)) OBJ_INFECT(res, pad);
7381  rb_enc_associate(res, enc);
7382  if (argc == 2)
7383  cr = ENC_CODERANGE_AND(cr, ENC_CODERANGE(pad));
7384  if (cr != ENC_CODERANGE_BROKEN)
7385  ENC_CODERANGE_SET(res, cr);
7386  return res;
7387 }
7388 
7389 
7390 /*
7391  * call-seq:
7392  * str.ljust(integer, padstr=' ') -> new_str
7393  *
7394  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7395  * <code>String</code> of length <i>integer</i> with <i>str</i> left justified
7396  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7397  *
7398  * "hello".ljust(4) #=> "hello"
7399  * "hello".ljust(20) #=> "hello "
7400  * "hello".ljust(20, '1234') #=> "hello123412341234123"
7401  */
7402 
7403 static VALUE
7405 {
7406  return rb_str_justify(argc, argv, str, 'l');
7407 }
7408 
7409 
7410 /*
7411  * call-seq:
7412  * str.rjust(integer, padstr=' ') -> new_str
7413  *
7414  * If <i>integer</i> is greater than the length of <i>str</i>, returns a new
7415  * <code>String</code> of length <i>integer</i> with <i>str</i> right justified
7416  * and padded with <i>padstr</i>; otherwise, returns <i>str</i>.
7417  *
7418  * "hello".rjust(4) #=> "hello"
7419  * "hello".rjust(20) #=> " hello"
7420  * "hello".rjust(20, '1234') #=> "123412341234123hello"
7421  */
7422 
7423 static VALUE
7425 {
7426  return rb_str_justify(argc, argv, str, 'r');
7427 }
7428 
7429 
7430 /*
7431  * call-seq:
7432  * str.center(width, padstr=' ') -> new_str
7433  *
7434  * Centers +str+ in +width+. If +width+ is greater than the length of +str+,
7435  * returns a new String of length +width+ with +str+ centered and padded with
7436  * +padstr+; otherwise, returns +str+.
7437  *
7438  * "hello".center(4) #=> "hello"
7439  * "hello".center(20) #=> " hello "
7440  * "hello".center(20, '123') #=> "1231231hello12312312"
7441  */
7442 
7443 static VALUE
7445 {
7446  return rb_str_justify(argc, argv, str, 'c');
7447 }
7448 
7449 /*
7450  * call-seq:
7451  * str.partition(sep) -> [head, sep, tail]
7452  * str.partition(regexp) -> [head, match, tail]
7453  *
7454  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string
7455  * and returns the part before it, the match, and the part
7456  * after it.
7457  * If it is not found, returns two empty strings and <i>str</i>.
7458  *
7459  * "hello".partition("l") #=> ["he", "l", "lo"]
7460  * "hello".partition("x") #=> ["hello", "", ""]
7461  * "hello".partition(/.l/) #=> ["h", "el", "lo"]
7462  */
7463 
7464 static VALUE
7466 {
7467  long pos;
7468  int regex = FALSE;
7469 
7470  if (RB_TYPE_P(sep, T_REGEXP)) {
7471  pos = rb_reg_search(sep, str, 0, 0);
7472  regex = TRUE;
7473  }
7474  else {
7475  VALUE tmp;
7476 
7477  tmp = rb_check_string_type(sep);
7478  if (NIL_P(tmp)) {
7479  rb_raise(rb_eTypeError, "type mismatch: %s given",
7480  rb_obj_classname(sep));
7481  }
7482  sep = tmp;
7483  pos = rb_str_index(str, sep, 0);
7484  }
7485  if (pos < 0) {
7486  failed:
7487  return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7488  }
7489  if (regex) {
7490  sep = rb_str_subpat(str, sep, INT2FIX(0));
7491  if (pos == 0 && RSTRING_LEN(sep) == 0) goto failed;
7492  }
7493  return rb_ary_new3(3, rb_str_subseq(str, 0, pos),
7494  sep,
7495  rb_str_subseq(str, pos+RSTRING_LEN(sep),
7496  RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
7497 }
7498 
7499 /*
7500  * call-seq:
7501  * str.rpartition(sep) -> [head, sep, tail]
7502  * str.rpartition(regexp) -> [head, match, tail]
7503  *
7504  * Searches <i>sep</i> or pattern (<i>regexp</i>) in the string from the end
7505  * of the string, and returns the part before it, the match, and the part
7506  * after it.
7507  * If it is not found, returns two empty strings and <i>str</i>.
7508  *
7509  * "hello".rpartition("l") #=> ["hel", "l", "o"]
7510  * "hello".rpartition("x") #=> ["", "", "hello"]
7511  * "hello".rpartition(/.l/) #=> ["he", "ll", "o"]
7512  */
7513 
7514 static VALUE
7516 {
7517  long pos = RSTRING_LEN(str);
7518  int regex = FALSE;
7519 
7520  if (RB_TYPE_P(sep, T_REGEXP)) {
7521  pos = rb_reg_search(sep, str, pos, 1);
7522  regex = TRUE;
7523  }
7524  else {
7525  VALUE tmp;
7526 
7527  tmp = rb_check_string_type(sep);
7528  if (NIL_P(tmp)) {
7529  rb_raise(rb_eTypeError, "type mismatch: %s given",
7530  rb_obj_classname(sep));
7531  }
7532  sep = tmp;
7533  pos = rb_str_sublen(str, pos);
7534  pos = rb_str_rindex(str, sep, pos);
7535  }
7536  if (pos < 0) {
7537  return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7538  }
7539  if (regex) {
7540  sep = rb_reg_nth_match(0, rb_backref_get());
7541  }
7542  return rb_ary_new3(3, rb_str_substr(str, 0, pos),
7543  sep,
7544  rb_str_substr(str,pos+str_strlen(sep,STR_ENC_GET(sep)),RSTRING_LEN(str)));
7545 }
7546 
7547 /*
7548  * call-seq:
7549  * str.start_with?([prefixes]+) -> true or false
7550  *
7551  * Returns true if +str+ starts with one of the +prefixes+ given.
7552  *
7553  * "hello".start_with?("hell") #=> true
7554  *
7555  * # returns true if one of the prefixes matches.
7556  * "hello".start_with?("heaven", "hell") #=> true
7557  * "hello".start_with?("heaven", "paradise") #=> false
7558  */
7559 
7560 static VALUE
7562 {
7563  int i;
7564 
7565  for (i=0; i<argc; i++) {
7566  VALUE tmp = argv[i];
7567  StringValue(tmp);
7568  rb_enc_check(str, tmp);
7569  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7570  if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7571  return Qtrue;
7572  }
7573  return Qfalse;
7574 }
7575 
7576 /*
7577  * call-seq:
7578  * str.end_with?([suffixes]+) -> true or false
7579  *
7580  * Returns true if +str+ ends with one of the +suffixes+ given.
7581  */
7582 
7583 static VALUE
7585 {
7586  int i;
7587  char *p, *s, *e;
7588  rb_encoding *enc;
7589 
7590  for (i=0; i<argc; i++) {
7591  VALUE tmp = argv[i];
7592  StringValue(tmp);
7593  enc = rb_enc_check(str, tmp);
7594  if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
7595  p = RSTRING_PTR(str);
7596  e = p + RSTRING_LEN(str);
7597  s = e - RSTRING_LEN(tmp);
7598  if (rb_enc_left_char_head(p, s, e, enc) != s)
7599  continue;
7600  if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
7601  return Qtrue;
7602  }
7603  return Qfalse;
7604 }
7605 
7606 void
7608 {
7609  if (!NIL_P(val) && !RB_TYPE_P(val, T_STRING)) {
7610  rb_raise(rb_eTypeError, "value of %s must be String", rb_id2name(id));
7611  }
7612  *var = val;
7613 }
7614 
7615 
7616 /*
7617  * call-seq:
7618  * str.force_encoding(encoding) -> str
7619  *
7620  * Changes the encoding to +encoding+ and returns self.
7621  */
7622 
7623 static VALUE
7625 {
7626  str_modifiable(str);
7627  rb_enc_associate(str, rb_to_encoding(enc));
7628  ENC_CODERANGE_CLEAR(str);
7629  return str;
7630 }
7631 
7632 /*
7633  * call-seq:
7634  * str.b -> str
7635  *
7636  * Returns a copied string whose encoding is ASCII-8BIT.
7637  */
7638 
7639 static VALUE
7641 {
7642  VALUE str2 = str_alloc(rb_cString);
7643  str_replace_shared_without_enc(str2, str);
7644  OBJ_INFECT(str2, str);
7646  return str2;
7647 }
7648 
7649 /*
7650  * call-seq:
7651  * str.valid_encoding? -> true or false
7652  *
7653  * Returns true for a string which encoded correctly.
7654  *
7655  * "\xc2\xa1".force_encoding("UTF-8").valid_encoding? #=> true
7656  * "\xc2".force_encoding("UTF-8").valid_encoding? #=> false
7657  * "\x80".force_encoding("UTF-8").valid_encoding? #=> false
7658  */
7659 
7660 static VALUE
7662 {
7663  int cr = rb_enc_str_coderange(str);
7664 
7665  return cr == ENC_CODERANGE_BROKEN ? Qfalse : Qtrue;
7666 }
7667 
7668 /*
7669  * call-seq:
7670  * str.ascii_only? -> true or false
7671  *
7672  * Returns true for a string which has only ASCII characters.
7673  *
7674  * "abc".force_encoding("UTF-8").ascii_only? #=> true
7675  * "abc\u{6666}".force_encoding("UTF-8").ascii_only? #=> false
7676  */
7677 
7678 static VALUE
7680 {
7681  int cr = rb_enc_str_coderange(str);
7682 
7683  return cr == ENC_CODERANGE_7BIT ? Qtrue : Qfalse;
7684 }
7685 
7700 VALUE
7701 rb_str_ellipsize(VALUE str, long len)
7702 {
7703  static const char ellipsis[] = "...";
7704  const long ellipsislen = sizeof(ellipsis) - 1;
7705  rb_encoding *const enc = rb_enc_get(str);
7706  const long blen = RSTRING_LEN(str);
7707  const char *const p = RSTRING_PTR(str), *e = p + blen;
7708  VALUE estr, ret = 0;
7709 
7710  if (len < 0) rb_raise(rb_eIndexError, "negative length %ld", len);
7711  if (len * rb_enc_mbminlen(enc) >= blen ||
7712  (e = rb_enc_nth(p, e, len, enc)) - p == blen) {
7713  ret = str;
7714  }
7715  else if (len <= ellipsislen ||
7716  !(e = rb_enc_step_back(p, e, e, len = ellipsislen, enc))) {
7717  if (rb_enc_asciicompat(enc)) {
7718  ret = rb_str_new_with_class(str, ellipsis, len);
7719  rb_enc_associate(ret, enc);
7720  }
7721  else {
7722  estr = rb_usascii_str_new(ellipsis, len);
7723  ret = rb_str_encode(estr, rb_enc_from_encoding(enc), 0, Qnil);
7724  }
7725  }
7726  else if (ret = rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
7727  rb_str_cat(ret, ellipsis, ellipsislen);
7728  }
7729  else {
7730  estr = rb_str_encode(rb_usascii_str_new(ellipsis, ellipsislen),
7731  rb_enc_from_encoding(enc), 0, Qnil);
7732  rb_str_append(ret, estr);
7733  }
7734  return ret;
7735 }
7736 
7737 /**********************************************************************
7738  * Document-class: Symbol
7739  *
7740  * <code>Symbol</code> objects represent names and some strings
7741  * inside the Ruby
7742  * interpreter. They are generated using the <code>:name</code> and
7743  * <code>:"string"</code> literals
7744  * syntax, and by the various <code>to_sym</code> methods. The same
7745  * <code>Symbol</code> object will be created for a given name or string
7746  * for the duration of a program's execution, regardless of the context
7747  * or meaning of that name. Thus if <code>Fred</code> is a constant in
7748  * one context, a method in another, and a class in a third, the
7749  * <code>Symbol</code> <code>:Fred</code> will be the same object in
7750  * all three contexts.
7751  *
7752  * module One
7753  * class Fred
7754  * end
7755  * $f1 = :Fred
7756  * end
7757  * module Two
7758  * Fred = 1
7759  * $f2 = :Fred
7760  * end
7761  * def Fred()
7762  * end
7763  * $f3 = :Fred
7764  * $f1.object_id #=> 2514190
7765  * $f2.object_id #=> 2514190
7766  * $f3.object_id #=> 2514190
7767  *
7768  */
7769 
7770 
7771 /*
7772  * call-seq:
7773  * sym == obj -> true or false
7774  *
7775  * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
7776  * symbol, returns <code>true</code>.
7777  */
7778 
7779 static VALUE
7780 sym_equal(VALUE sym1, VALUE sym2)
7781 {
7782  if (sym1 == sym2) return Qtrue;
7783  return Qfalse;
7784 }
7785 
7786 
7787 static int
7788 sym_printable(const char *s, const char *send, rb_encoding *enc)
7789 {
7790  while (s < send) {
7791  int n;
7792  int c = rb_enc_codepoint_len(s, send, &n, enc);
7793 
7794  if (!rb_enc_isprint(c, enc)) return FALSE;
7795  s += n;
7796  }
7797  return TRUE;
7798 }
7799 
7800 int
7802 {
7803  rb_encoding *enc;
7804  const char *ptr;
7805  long len;
7807 
7808  if (resenc == NULL) resenc = rb_default_external_encoding();
7809  enc = STR_ENC_GET(sym);
7810  ptr = RSTRING_PTR(sym);
7811  len = RSTRING_LEN(sym);
7812  if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) || len != (long)strlen(ptr) ||
7813  !rb_enc_symname_p(ptr, enc) || !sym_printable(ptr, ptr + len, enc)) {
7814  return FALSE;
7815  }
7816  return TRUE;
7817 }
7818 
7819 VALUE
7821 {
7822  rb_encoding *enc;
7823  const char *ptr;
7824  long len;
7825  rb_encoding *resenc;
7826 
7827  Check_Type(str, T_STRING);
7828  resenc = rb_default_internal_encoding();
7829  if (resenc == NULL) resenc = rb_default_external_encoding();
7830  enc = STR_ENC_GET(str);
7831  ptr = RSTRING_PTR(str);
7832  len = RSTRING_LEN(str);
7833  if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
7834  !sym_printable(ptr, ptr + len, enc)) {
7835  return rb_str_inspect(str);
7836  }
7837  return str;
7838 }
7839 
7840 VALUE
7842 {
7843  return rb_str_quote_unprintable(rb_id2str(id));
7844 }
7845 
7846 /*
7847  * call-seq:
7848  * sym.inspect -> string
7849  *
7850  * Returns the representation of <i>sym</i> as a symbol literal.
7851  *
7852  * :fred.inspect #=> ":fred"
7853  */
7854 
7855 static VALUE
7857 {
7858  VALUE str;
7859  const char *ptr;
7860  long len;
7861  ID id = SYM2ID(sym);
7862  char *dest;
7863 
7864  sym = rb_id2str(id);
7865  if (!rb_str_symname_p(sym)) {
7866  str = rb_str_inspect(sym);
7867  len = RSTRING_LEN(str);
7868  rb_str_resize(str, len + 1);
7869  dest = RSTRING_PTR(str);
7870  memmove(dest + 1, dest, len);
7871  dest[0] = ':';
7872  }
7873  else {
7874  rb_encoding *enc = STR_ENC_GET(sym);
7875  ptr = RSTRING_PTR(sym);
7876  len = RSTRING_LEN(sym);
7877  str = rb_enc_str_new(0, len + 1, enc);
7878  dest = RSTRING_PTR(str);
7879  dest[0] = ':';
7880  memcpy(dest + 1, ptr, len);
7881  }
7882  return str;
7883 }
7884 
7885 
7886 /*
7887  * call-seq:
7888  * sym.id2name -> string
7889  * sym.to_s -> string
7890  *
7891  * Returns the name or string corresponding to <i>sym</i>.
7892  *
7893  * :fred.id2name #=> "fred"
7894  */
7895 
7896 
7897 VALUE
7899 {
7900  ID id = SYM2ID(sym);
7901 
7902  return str_new3(rb_cString, rb_id2str(id));
7903 }
7904 
7905 
7906 /*
7907  * call-seq:
7908  * sym.to_sym -> sym
7909  * sym.intern -> sym
7910  *
7911  * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
7912  * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
7913  * in this case.
7914  */
7915 
7916 static VALUE
7918 {
7919  return sym;
7920 }
7921 
7922 static VALUE
7923 sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
7924 {
7925  VALUE obj;
7926 
7927  if (argc < 1) {
7928  rb_raise(rb_eArgError, "no receiver given");
7929  }
7930  obj = argv[0];
7931  return rb_funcall_with_block(obj, (ID)sym, argc - 1, argv + 1, passed_proc);
7932 }
7933 
7934 /*
7935  * call-seq:
7936  * sym.to_proc
7937  *
7938  * Returns a _Proc_ object which respond to the given method by _sym_.
7939  *
7940  * (1..3).collect(&:to_s) #=> ["1", "2", "3"]
7941  */
7942 
7943 static VALUE
7945 {
7946  static VALUE sym_proc_cache = Qfalse;
7947  enum {SYM_PROC_CACHE_SIZE = 67};
7948  VALUE proc;
7949  long id, index;
7950  VALUE *aryp;
7951 
7952  if (!sym_proc_cache) {
7953  sym_proc_cache = rb_ary_tmp_new(SYM_PROC_CACHE_SIZE * 2);
7954  rb_gc_register_mark_object(sym_proc_cache);
7955  rb_ary_store(sym_proc_cache, SYM_PROC_CACHE_SIZE*2 - 1, Qnil);
7956  }
7957 
7958  id = SYM2ID(sym);
7959  index = (id % SYM_PROC_CACHE_SIZE) << 1;
7960 
7961  aryp = RARRAY_PTR(sym_proc_cache);
7962  if (aryp[index] == sym) {
7963  return aryp[index + 1];
7964  }
7965  else {
7966  proc = rb_proc_new(sym_call, (VALUE)id);
7967  aryp[index] = sym;
7968  aryp[index + 1] = proc;
7969  return proc;
7970  }
7971 }
7972 
7973 /*
7974  * call-seq:
7975  *
7976  * sym.succ
7977  *
7978  * Same as <code>sym.to_s.succ.intern</code>.
7979  */
7980 
7981 static VALUE
7983 {
7984  return rb_str_intern(rb_str_succ(rb_sym_to_s(sym)));
7985 }
7986 
7987 /*
7988  * call-seq:
7989  *
7990  * symbol <=> other_symbol -> -1, 0, +1 or nil
7991  *
7992  * Compares +symbol+ with +other_symbol+ after calling #to_s on each of the
7993  * symbols. Returns -1, 0, +1 or nil depending on whether +symbol+ is less
7994  * than, equal to, or greater than +other_symbol+.
7995  *
7996  * +nil+ is returned if the two values are incomparable.
7997  *
7998  * See String#<=> for more information.
7999  */
8000 
8001 static VALUE
8003 {
8004  if (!SYMBOL_P(other)) {
8005  return Qnil;
8006  }
8007  return rb_str_cmp_m(rb_sym_to_s(sym), rb_sym_to_s(other));
8008 }
8009 
8010 /*
8011  * call-seq:
8012  *
8013  * sym.casecmp(other) -> -1, 0, +1 or nil
8014  *
8015  * Case-insensitive version of <code>Symbol#<=></code>.
8016  */
8017 
8018 static VALUE
8020 {
8021  if (!SYMBOL_P(other)) {
8022  return Qnil;
8023  }
8024  return rb_str_casecmp(rb_sym_to_s(sym), rb_sym_to_s(other));
8025 }
8026 
8027 /*
8028  * call-seq:
8029  * sym =~ obj -> fixnum or nil
8030  *
8031  * Returns <code>sym.to_s =~ obj</code>.
8032  */
8033 
8034 static VALUE
8036 {
8037  return rb_str_match(rb_sym_to_s(sym), other);
8038 }
8039 
8040 /*
8041  * call-seq:
8042  * sym[idx] -> char
8043  * sym[b, n] -> char
8044  *
8045  * Returns <code>sym.to_s[]</code>.
8046  */
8047 
8048 static VALUE
8050 {
8051  return rb_str_aref_m(argc, argv, rb_sym_to_s(sym));
8052 }
8053 
8054 /*
8055  * call-seq:
8056  * sym.length -> integer
8057  *
8058  * Same as <code>sym.to_s.length</code>.
8059  */
8060 
8061 static VALUE
8063 {
8064  return rb_str_length(rb_id2str(SYM2ID(sym)));
8065 }
8066 
8067 /*
8068  * call-seq:
8069  * sym.empty? -> true or false
8070  *
8071  * Returns that _sym_ is :"" or not.
8072  */
8073 
8074 static VALUE
8076 {
8077  return rb_str_empty(rb_id2str(SYM2ID(sym)));
8078 }
8079 
8080 /*
8081  * call-seq:
8082  * sym.upcase -> symbol
8083  *
8084  * Same as <code>sym.to_s.upcase.intern</code>.
8085  */
8086 
8087 static VALUE
8089 {
8091 }
8092 
8093 /*
8094  * call-seq:
8095  * sym.downcase -> symbol
8096  *
8097  * Same as <code>sym.to_s.downcase.intern</code>.
8098  */
8099 
8100 static VALUE
8102 {
8104 }
8105 
8106 /*
8107  * call-seq:
8108  * sym.capitalize -> symbol
8109  *
8110  * Same as <code>sym.to_s.capitalize.intern</code>.
8111  */
8112 
8113 static VALUE
8115 {
8117 }
8118 
8119 /*
8120  * call-seq:
8121  * sym.swapcase -> symbol
8122  *
8123  * Same as <code>sym.to_s.swapcase.intern</code>.
8124  */
8125 
8126 static VALUE
8128 {
8130 }
8131 
8132 /*
8133  * call-seq:
8134  * sym.encoding -> encoding
8135  *
8136  * Returns the Encoding object that represents the encoding of _sym_.
8137  */
8138 
8139 static VALUE
8141 {
8142  return rb_obj_encoding(rb_id2str(SYM2ID(sym)));
8143 }
8144 
8145 ID
8147 {
8148  VALUE tmp;
8149 
8150  switch (TYPE(name)) {
8151  default:
8152  tmp = rb_check_string_type(name);
8153  if (NIL_P(tmp)) {
8154  tmp = rb_inspect(name);
8155  rb_raise(rb_eTypeError, "%s is not a symbol",
8156  RSTRING_PTR(tmp));
8157  }
8158  name = tmp;
8159  /* fall through */
8160  case T_STRING:
8161  name = rb_str_intern(name);
8162  /* fall through */
8163  case T_SYMBOL:
8164  return SYM2ID(name);
8165  }
8166 
8167  UNREACHABLE;
8168 }
8169 
8170 /*
8171  * A <code>String</code> object holds and manipulates an arbitrary sequence of
8172  * bytes, typically representing characters. String objects may be created
8173  * using <code>String::new</code> or as literals.
8174  *
8175  * Because of aliasing issues, users of strings should be aware of the methods
8176  * that modify the contents of a <code>String</code> object. Typically,
8177  * methods with names ending in ``!'' modify their receiver, while those
8178  * without a ``!'' return a new <code>String</code>. However, there are
8179  * exceptions, such as <code>String#[]=</code>.
8180  *
8181  */
8182 
8183 void
8185 {
8186 #undef rb_intern
8187 #define rb_intern(str) rb_intern_const(str)
8188 
8189  rb_cString = rb_define_class("String", rb_cObject);
8193  rb_define_method(rb_cString, "initialize", rb_str_init, -1);
8194  rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);
8198  rb_define_method(rb_cString, "eql?", rb_str_eql, 1);
8200  rb_define_method(rb_cString, "casecmp", rb_str_casecmp, 1);
8206  rb_define_method(rb_cString, "insert", rb_str_insert, 2);
8207  rb_define_method(rb_cString, "length", rb_str_length, 0);
8209  rb_define_method(rb_cString, "bytesize", rb_str_bytesize, 0);
8210  rb_define_method(rb_cString, "empty?", rb_str_empty, 0);
8217  rb_define_method(rb_cString, "upto", rb_str_upto, -1);
8220  rb_define_method(rb_cString, "replace", rb_str_replace, 1);
8223  rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
8224  rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
8225  rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
8226 
8227  rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
8230  rb_define_method(rb_cString, "to_str", rb_str_to_s, 0);
8231  rb_define_method(rb_cString, "inspect", rb_str_inspect, 0);
8233 
8234  rb_define_method(rb_cString, "upcase", rb_str_upcase, 0);
8235  rb_define_method(rb_cString, "downcase", rb_str_downcase, 0);
8236  rb_define_method(rb_cString, "capitalize", rb_str_capitalize, 0);
8237  rb_define_method(rb_cString, "swapcase", rb_str_swapcase, 0);
8238 
8243 
8247  rb_define_method(rb_cString, "lines", rb_str_lines, -1);
8250  rb_define_method(rb_cString, "codepoints", rb_str_codepoints, 0);
8251  rb_define_method(rb_cString, "reverse", rb_str_reverse, 0);
8253  rb_define_method(rb_cString, "concat", rb_str_concat, 1);
8255  rb_define_method(rb_cString, "prepend", rb_str_prepend, 1);
8257  rb_define_method(rb_cString, "intern", rb_str_intern, 0);
8258  rb_define_method(rb_cString, "to_sym", rb_str_intern, 0);
8260 
8261  rb_define_method(rb_cString, "include?", rb_str_include, 1);
8262  rb_define_method(rb_cString, "start_with?", rb_str_start_with, -1);
8263  rb_define_method(rb_cString, "end_with?", rb_str_end_with, -1);
8264 
8266 
8267  rb_define_method(rb_cString, "ljust", rb_str_ljust, -1);
8268  rb_define_method(rb_cString, "rjust", rb_str_rjust, -1);
8269  rb_define_method(rb_cString, "center", rb_str_center, -1);
8270 
8271  rb_define_method(rb_cString, "sub", rb_str_sub, -1);
8272  rb_define_method(rb_cString, "gsub", rb_str_gsub, -1);
8274  rb_define_method(rb_cString, "chomp", rb_str_chomp, -1);
8276  rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0);
8277  rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0);
8278 
8286 
8289  rb_define_method(rb_cString, "delete", rb_str_delete, -1);
8290  rb_define_method(rb_cString, "squeeze", rb_str_squeeze, -1);
8291  rb_define_method(rb_cString, "count", rb_str_count, -1);
8292 
8297 
8298  rb_define_method(rb_cString, "each_line", rb_str_each_line, -1);
8299  rb_define_method(rb_cString, "each_byte", rb_str_each_byte, 0);
8300  rb_define_method(rb_cString, "each_char", rb_str_each_char, 0);
8301  rb_define_method(rb_cString, "each_codepoint", rb_str_each_codepoint, 0);
8302 
8303  rb_define_method(rb_cString, "sum", rb_str_sum, -1);
8304 
8305  rb_define_method(rb_cString, "slice", rb_str_aref_m, -1);
8307 
8308  rb_define_method(rb_cString, "partition", rb_str_partition, 1);
8309  rb_define_method(rb_cString, "rpartition", rb_str_rpartition, 1);
8310 
8311  rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
8312  rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
8314  rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
8316 
8317  id_to_s = rb_intern("to_s");
8318 
8319  rb_fs = Qnil;
8320  rb_define_variable("$;", &rb_fs);
8321  rb_define_variable("$-F", &rb_fs);
8322 
8323  rb_cSymbol = rb_define_class("Symbol", rb_cObject);
8327  rb_define_singleton_method(rb_cSymbol, "all_symbols", rb_sym_all_symbols, 0); /* in parse.y */
8328 
8331  rb_define_method(rb_cSymbol, "inspect", sym_inspect, 0);
8333  rb_define_method(rb_cSymbol, "id2name", rb_sym_to_s, 0);
8334  rb_define_method(rb_cSymbol, "intern", sym_to_sym, 0);
8335  rb_define_method(rb_cSymbol, "to_sym", sym_to_sym, 0);
8336  rb_define_method(rb_cSymbol, "to_proc", sym_to_proc, 0);
8337  rb_define_method(rb_cSymbol, "succ", sym_succ, 0);
8338  rb_define_method(rb_cSymbol, "next", sym_succ, 0);
8339 
8340  rb_define_method(rb_cSymbol, "<=>", sym_cmp, 1);
8341  rb_define_method(rb_cSymbol, "casecmp", sym_casecmp, 1);
8343 
8344  rb_define_method(rb_cSymbol, "[]", sym_aref, -1);
8345  rb_define_method(rb_cSymbol, "slice", sym_aref, -1);
8346  rb_define_method(rb_cSymbol, "length", sym_length, 0);
8347  rb_define_method(rb_cSymbol, "size", sym_length, 0);
8348  rb_define_method(rb_cSymbol, "empty?", sym_empty, 0);
8349  rb_define_method(rb_cSymbol, "match", sym_match, 1);
8350 
8351  rb_define_method(rb_cSymbol, "upcase", sym_upcase, 0);
8352  rb_define_method(rb_cSymbol, "downcase", sym_downcase, 0);
8353  rb_define_method(rb_cSymbol, "capitalize", sym_capitalize, 0);
8354  rb_define_method(rb_cSymbol, "swapcase", sym_swapcase, 0);
8355 
8356  rb_define_method(rb_cSymbol, "encoding", sym_encoding, 0);
8357 }
static int str_independent(VALUE str)
Definition: string.c:1338
#define rb_enc_islower(c, enc)
#define FIXNUM_MAX
#define RB_TYPE_P(obj, type)
static VALUE sym_upcase(VALUE sym)
Definition: string.c:8088
RARRAY_PTR(q->result)[0]
static long chopped_length(VALUE str)
Definition: string.c:6612
VALUE rb_str_associated(VALUE)
Definition: string.c:1453
volatile VALUE tmp
Definition: tcltklib.c:10209
static VALUE str_replace_shared_without_enc(VALUE str2, VALUE str)
Definition: string.c:638
Definition: string.c:5063
int rb_enc_codelen(int c, rb_encoding *enc)
Definition: encoding.c:952
static VALUE rb_str_bytesize(VALUE str)
Definition: string.c:1201
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
static VALUE str_buf_cat(VALUE str, const char *ptr, long len)
Definition: string.c:1894
ssize_t n
Definition: bigdecimal.c:5655
#define RSTRING(obj)
VALUE sym
Definition: tkutil.c:1299
VALUE rb_str_times(VALUE, VALUE)
Definition: string.c:1268
static long rb_str_rindex(VALUE str, VALUE sub, long pos)
Definition: string.c:2612
volatile VALUE ary
Definition: tcltklib.c:9713
rb_encoding * rb_enc_check(VALUE str1, VALUE str2)
Definition: encoding.c:776
VP_EXPORT int
Definition: bigdecimal.c:5050
VALUE rb_str_ellipsize(VALUE, long)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition: string.c:7701
VALUE rb_ary_pop(VALUE ary)
Definition: array.c:879
#define RESIZE_CAPA(str, capacity)
Definition: string.c:102
VALUE rb_any_to_s(VALUE)
Definition: object.c:384
UChar * pat
Definition: regerror.c:390
void rb_bug(const char *fmt,...)
Definition: error.c:290
void rb_enc_copy(VALUE obj1, VALUE obj2)
Definition: encoding.c:854
#define FALSE
Definition: nkf.h:174
VALUE rb_str_resurrect(VALUE str)
Definition: string.c:952
#define rb_hash_lookup
Definition: tcltklib.c:268
code
Definition: tcltklib.c:3381
size_t strlen(const char *)
gz enc2
Definition: zlib.c:2272
#define OBJ_INFECT(x, s)
#define CHECK_IF_ASCII(c)
gz ec
Definition: zlib.c:2273
#define TOUPPER(c)
const char * rb_obj_classname(VALUE)
Definition: variable.c:391
VALUE rb_str_buf_cat_ascii(VALUE, const char *)
Definition: string.c:2074
VALUE rb_id2str(ID id)
Definition: ripper.c:16007
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define RSTRING_END(str)
static int sym_printable(const char *s, const char *send, rb_encoding *enc)
Definition: string.c:7788
#define UNLIMITED_ARGUMENTS
#define rb_tainted_str_new2
#define FL_TEST(x, f)
static int max(int a, int b)
Definition: strftime.c:141
#define ascii_isspace(c)
Definition: string.c:5858
static int coderange_scan(const char *p, long len, rb_encoding *enc)
Definition: string.c:183
VALUE rb_str_tmp_new(long)
void rb_define_singleton_method(VALUE obj, const char *name, VALUE(*func)(ANYARGS), int argc)
Defines a singleton method for obj.
Definition: class.c:1493
VALUE rb_str_buf_append(VALUE, VALUE)
Definition: string.c:2098
static VALUE rb_str_to_f(VALUE str)
Definition: string.c:4417
VALUE rb_sym_to_s(VALUE)
Definition: string.c:7898
volatile VALUE str_enc
Definition: tkutil.c:458
static VALUE rb_str_oct(VALUE str)
Definition: string.c:7122
VALUE rb_string_value(volatile VALUE *)
Definition: string.c:1472
#define FL_SET(x, f)
VALUE rb_str_quote_unprintable(VALUE)
Definition: string.c:7820
#define STR_NOCAPA
Definition: string.c:63
static VALUE rb_str_scan(VALUE str, VALUE pat)
Definition: string.c:7055
VALUE proc
Definition: tcltklib.c:2959
static VALUE rb_str_gsub(int argc, VALUE *argv, VALUE str)
Definition: string.c:4004
static VALUE rb_str_match(VALUE x, VALUE y)
Definition: string.c:2746
static int VALUE table
Definition: tcltklib.c:10138
#define rb_usascii_str_new2
#define rb_enc_codepoint(p, e, enc)
int rb_str_cmp(VALUE, VALUE)
Definition: string.c:2301
static void rb_enc_cr_str_copy_for_substr(VALUE dest, VALUE src)
Definition: string.c:290
VALUE rb_external_str_new_cstr(const char *)
Definition: string.c:590
void rb_gc_force_recycle(VALUE)
Definition: gc.c:2961
ssize_t i
Definition: bigdecimal.c:5655
unsigned char * USTR
Definition: string.c:5061
char * rb_string_value_ptr(volatile VALUE *)
Definition: string.c:1483
static unsigned int trnext(struct tr *t, rb_encoding *enc)
Definition: string.c:5070
VALUE rb_str_locktmp(VALUE)
#define rb_check_frozen(obj)
#define is_broken_string(str)
Definition: string.c:121
RUBY_EXTERN void * memmove(void *, const void *, size_t)
Definition: memmove.c:7
#define rb_enc_right_char_head(s, p, e, enc)
static VALUE sym_swapcase(VALUE sym)
Definition: string.c:8127
#define rb_enc_name(enc)
static VALUE rb_str_b(VALUE str)
Definition: string.c:7640
char * pend
Definition: string.c:5066
void Init_String(void)
Definition: string.c:8184
VALUE rb_str_subseq(VALUE, long, long)
Definition: string.c:1668
static VALUE rb_str_clear(VALUE str)
Definition: string.c:4043
rb_encoding * rb_to_encoding(VALUE enc)
Definition: encoding.c:194
VALUE rb_str_new_cstr(const char *)
Definition: string.c:447
int ret
Definition: tcltklib.c:280
#define STR_UNSET_NOCAPA(s)
Definition: string.c:65
VALUE rb_enc_from_encoding(rb_encoding *encoding)
Definition: encoding.c:103
int rb_enc_tolower(int c, rb_encoding *enc)
Definition: encoding.c:968
RUBY_EXTERN VALUE rb_cSymbol
Definition: ripper.y:1458
VALUE rb_obj_freeze(VALUE)
Definition: object.c:971
long rb_str_strlen(VALUE)
Definition: string.c:1168
VALUE rb_eTypeError
Definition: error.c:511
int rb_num_to_uint(VALUE val, unsigned int *ret)
Definition: numeric.c:122
#define OBJ_FREEZE(x)
void rb_define_alloc_func(VALUE, rb_alloc_func_t)
#define OBJ_TAINTED(x)
static VALUE str_gsub(int argc, VALUE *argv, VALUE str, int bang)
Definition: string.c:3828
#define UNREACHABLE
Definition: ruby.h:40
static VALUE rb_str_succ_bang(VALUE str)
Definition: string.c:3059
static VALUE rb_str_enumerate_bytes(VALUE str, int wantarray)
Definition: string.c:6330
static VALUE rb_str_each_line(int argc, VALUE *argv, VALUE str)
Definition: string.c:6300
#define rb_enc_prev_char(s, p, e, enc)
rb_encoding * rb_default_internal_encoding(void)
Definition: encoding.c:1371
VALUE enc
Definition: tcltklib.c:10311
VALUE rb_ary_push(VALUE ary, VALUE item)
Definition: array.c:822
static VALUE str_new3(VALUE klass, VALUE str)
Definition: string.c:671
VALUE rb_reg_regsub(VALUE, VALUE, struct re_registers *, VALUE)
Definition: re.c:3286
int rb_usascii_encindex(void)
Definition: encoding.c:1190
VALUE rb_str_export(VALUE)
Definition: string.c:620
#define TYPE(x)
static VALUE rb_str_prepend(VALUE str, VALUE str2)
Definition: string.c:2228
rb_encoding * rb_enc_compatible(VALUE str1, VALUE str2)
Definition: encoding.c:787
static VALUE rb_str_gsub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3953
VALUE rb_ary_tmp_new(long capa)
Definition: array.c:465
#define RSTRING_PTR(str)
#define CLASS_OF(v)
NIL_P(eventloop_thread)
Definition: tcltklib.c:4068
VALUE rb_enc_str_new(const char *, long, rb_encoding *)
Definition: string.c:439
static VALUE rb_str_codepoints(VALUE str)
Definition: string.c:6605
#define str_buf_cat2(str, ptr)
Definition: string.c:1937
static VALUE rb_str_swapcase_bang(VALUE str)
Definition: string.c:5009
int rb_str_comparable(VALUE, VALUE)
Definition: string.c:2276
VALUE rb_str_buf_cat2(VALUE, const char *)
Definition: string.c:1950
static VALUE rb_str_rstrip(VALUE str)
Definition: string.c:6940
static VALUE rb_str_justify(int argc, VALUE *argv, VALUE str, char jflag)
Definition: string.c:7300
VALUE var
Definition: tcltklib.c:5517
static VALUE rb_str_include(VALUE str, VALUE arg)
Definition: string.c:4350
static void rb_str_check_dummy_enc(rb_encoding *enc)
Definition: string.c:4754
#define xfree
#define str_make_independent(str)
Definition: string.c:1366
VALUE rb_funcall(VALUE, ID, int,...)
Calls a method.
Definition: vm_eval.c:774
unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_encoding *enc)
Definition: encoding.c:931
register C_block * tp
Definition: crypt.c:311
long rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr)
Definition: string.c:1031
void rb_raise(VALUE exc, const char *fmt,...)
Definition: error.c:1780
VALUE rb_funcall_with_block(VALUE, ID, int, const VALUE *, VALUE)
Definition: vm_eval.c:834
char * p
Definition: string.c:5066
char * rb_enc_nth(const char *, const char *, long, rb_encoding *)
Definition: string.c:1582
static VALUE sym_downcase(VALUE sym)
Definition: string.c:8101
VALUE rb_proc_new(VALUE(*)(ANYARGS), VALUE)
Definition: proc.c:2018
VALUE rb_str_succ(VALUE)
Definition: string.c:2976
static VALUE str_replace(VALUE str, VALUE str2)
Definition: string.c:910
return Qtrue
Definition: tcltklib.c:9610
#define rb_str_new4
VALUE rb_enc_associate(VALUE obj, rb_encoding *enc)
Definition: encoding.c:764
VALUE rb_obj_class(VALUE)
Definition: object.c:194
#define RETURN_ENUMERATOR(obj, argc, argv)
static VALUE rb_str_to_i(int argc, VALUE *argv, VALUE str)
Definition: string.c:4384
static VALUE rb_str_slice_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3622
#define rb_enc_left_char_head(s, p, e, enc)
#define STR_NOEMBED
Definition: string.c:58
VALUE rb_external_str_new(const char *, long)
Definition: string.c:584
int index
Definition: tcltklib.c:4478
RUBY_EXTERN VALUE rb_fs
Definition: ripper.y:485
int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:884
VALUE rb_ary_new3(long n,...)
Definition: array.c:432
static VALUE rb_str_empty(VALUE str)
Definition: string.c:1218
static VALUE rb_str_chars(VALUE str)
Definition: string.c:6512
VALUE rb_eSecurityError
Definition: error.c:520
static VALUE rb_str_reverse_bang(VALUE str)
Definition: string.c:4311
void rb_include_module(VALUE klass, VALUE module)
Definition: class.c:695
#define rb_enc_to_index(enc)
static VALUE rb_str_center(int argc, VALUE *argv, VALUE str)
Definition: string.c:7444
static VALUE rb_str_each_char_size(VALUE str)
Definition: string.c:6404
VALUE rb_locale_str_new(const char *, long)
Definition: string.c:596
r
Definition: bigdecimal.c:1196
#define FL_UNTRUSTED
#define rb_str_new2
VALUE rb_reg_nth_match(int, VALUE)
Definition: re.c:1457
static VALUE rb_str_subpat(VALUE str, VALUE re, VALUE backref)
Definition: string.c:3196
void rb_must_asciicompat(VALUE)
Definition: string.c:1463
static VALUE rb_str_aset_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3554
VALUE rb_str_unlocktmp(VALUE)
Definition: string.c:1820
static VALUE rb_str_upcase_bang(VALUE str)
Definition: string.c:4772
#define ISDIGIT(c)
unsigned int last
Definition: nkf.c:4310
static VALUE rb_str_format_m(VALUE str, VALUE arg)
Definition: string.c:1316
#define STR_SET_NOEMBED(str)
Definition: string.c:70
#define ENCODING_IS_ASCII8BIT(obj)
#define STR_DEC_LEN(str)
Definition: string.c:91
VALUE rb_str_substr(VALUE, long, long)
Definition: string.c:1774
#define numberof(array)
Definition: string.c:32
static long str_strlen(VALUE str, rb_encoding *enc)
Definition: string.c:1122
static VALUE rb_str_chomp(int argc, VALUE *argv, VALUE str)
Definition: string.c:6809
rb_encoding * rb_utf8_encoding(void)
Definition: encoding.c:1166
#define ID2SYM(x)
#define BEG(no)
Definition: string.c:22
VALUE VALUE args
Definition: tcltklib.c:2561
static VALUE sym_length(VALUE sym)
Definition: string.c:8062
void rb_undef_method(VALUE klass, const char *name)
Definition: class.c:1358
#define CHAR_ESC_LEN
Definition: string.c:4452
#define ENC_CODERANGE_BROKEN
VALUE rb_sym_all_symbols(void)
Definition: ripper.c:16100
static VALUE empty_str_alloc(VALUE klass)
Definition: string.c:386
static VALUE rb_str_upcase(VALUE str)
Definition: string.c:4837
#define LONG2NUM(x)
VALUE rb_str_append(VALUE, VALUE)
Definition: string.c:2114
VALUE rb_str_new_frozen(VALUE)
Definition: string.c:713
static VALUE rb_str_hash_m(VALUE str)
Definition: string.c:2267
static int tr_find(unsigned int c, char table[TR_TABLE_SIZE], VALUE del, VALUE nodel)
Definition: string.c:5495
int rb_reg_backref_number(VALUE match, VALUE backref)
Definition: re.c:1075
#define rb_enc_isctype(c, t, enc)
VALUE rb_equal(VALUE, VALUE)
Definition: object.c:56
VALUE rb_str_concat(VALUE, VALUE)
Definition: string.c:2155
static VALUE rb_str_aset(VALUE str, VALUE indx, VALUE val)
Definition: string.c:3487
VALUE rb_str_replace(VALUE, VALUE)
Definition: string.c:4022
VALUE rb_str_to_str(VALUE)
Definition: string.c:849
void rb_str_modify_expand(VALUE, long)
Definition: string.c:1377
VALUE rb_eRangeError
Definition: error.c:515
d
Definition: strlcat.c:58
VALUE rb_enc_sprintf(rb_encoding *enc, const char *format,...)
Definition: sprintf.c:1251
VALUE rb_str_equal(VALUE str1, VALUE str2)
Definition: string.c:2351
const char * name
Definition: ripper.y:163
const char * fmt
Definition: tcltklib.c:841
#define ENCODING_GET(obj)
int rb_enc_toupper(int c, rb_encoding *enc)
Definition: encoding.c:962
static VALUE rb_str_insert(VALUE str, VALUE idx, VALUE str2)
Definition: string.c:3587
#define ISALPHA(c)
Definition: ruby.h:1636
#define MEMZERO(p, type, n)
static VALUE sym_equal(VALUE sym1, VALUE sym2)
Definition: string.c:7780
static VALUE sym_inspect(VALUE sym)
Definition: string.c:7856
static VALUE rb_str_partition(VALUE str, VALUE sep)
Definition: string.c:7465
VALUE rb_usascii_str_new(const char *, long)
Definition: string.c:431
static long str_offset(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1602
static VALUE rb_str_ljust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7404
gz ecflags
Definition: zlib.c:2274
VALUE hash
Definition: tkutil.c:267
VALUE rb_str_dump(VALUE)
Definition: string.c:4622
void rb_str_update(VALUE, long, long, VALUE)
Definition: string.c:3443
#define STR_SHARED_P(s)
Definition: string.c:61
static VALUE rb_str_setbyte(VALUE str, VALUE index, VALUE value)
Definition: string.c:4098
rb_encoding * rb_default_external_encoding(void)
Definition: encoding.c:1286
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
VALUE rb_mComparable
Definition: compar.c:14
neighbor_char
Definition: string.c:2810
static VALUE rb_str_capitalize_bang(VALUE str)
Definition: string.c:4943
int rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p)
Definition: string.c:4455
static VALUE rb_str_strip(VALUE str)
Definition: string.c:6978
#define FIXNUM_P(f)
return Qfalse
Definition: tcltklib.c:6779
#define rb_intern_str(string)
Definition: generator.h:17
unsigned int now
Definition: string.c:5065
int rb_block_given_p(void)
Definition: eval.c:672
#define RARRAY_LEN(a)
long rb_str_offset(VALUE, long)
Definition: string.c:1610
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Definition: transcode.c:1446
size_t rb_str_capacity(VALUE)
Definition: string.c:360
#define rb_enc_step_back(s, p, e, n, enc)
static VALUE rb_str_split_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:5908
#define Qnil
Definition: tcltklib.c:1896
#define val
Definition: tcltklib.c:1949
static int single_byte_optimizable(VALUE str)
Definition: string.c:126
int rb_enc_fast_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:878
static void rb_str_splice_0(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3367
void rb_free_tmp_buffer(volatile VALUE *store)
Definition: string.c:822
VALUE rb_eRuntimeError
Definition: error.c:510
static VALUE sym_to_sym(VALUE sym)
Definition: string.c:7917
VALUE rb_str_to_inum(VALUE str, int base, int badcheck)
Definition: bignum.c:777
static VALUE str_new_shared(VALUE klass, VALUE str)
Definition: string.c:665
int rb_str_symname_p(VALUE)
Definition: string.c:7801
static VALUE rb_str_rpartition(VALUE str, VALUE sep)
Definition: string.c:7515
static VALUE char * str
Definition: tcltklib.c:3547
int rb_isspace(int c)
Definition: encoding.c:1891
static VALUE rb_str_crypt(VALUE str, VALUE salt)
Definition: string.c:7146
VALUE rb_locale_str_new_cstr(const char *)
Definition: string.c:602
static VALUE rb_str_cmp_m(VALUE str1, VALUE str2)
Definition: string.c:2404
VALUE rb_ary_new(void)
Definition: array.c:424
#define Check_Type(v, t)
static void str_modify_keep_cr(VALUE str)
Definition: string.c:1401
#define dp(v)
Definition: vm_debug.h:23
int flags
Definition: tcltklib.c:3023
unsigned long ID
Definition: ripper.y:105
#define STR_BUF_MIN_SIZE
Definition: string.c:774
#define STR_SET_EMBED(str)
Definition: string.c:74
static VALUE rb_str_tr_s(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5745
#define ISASCII(c)
Definition: ruby.h:1629
#define ONIGENC_CTYPE_ALPHA
#define ENC_CODERANGE_CLEAR(obj)
VALUE rb_str_cat2(VALUE, const char *)
Definition: string.c:1975
#define add(x, y)
Definition: date_strftime.c:23
static VALUE rb_str_delete(int argc, VALUE *argv, VALUE str)
Definition: string.c:5601
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
Definition: class.c:499
static VALUE VALUE obj
Definition: tcltklib.c:3158
#define RSTRING_LEN(str)
void rb_enc_set_index(VALUE obj, int idx)
Definition: encoding.c:739
#define INT2FIX(i)
void rb_str_shared_replace(VALUE, VALUE)
Definition: string.c:857
static VALUE rb_str_each_byte_size(VALUE str, VALUE args)
Definition: string.c:6324
int idx
Definition: tcltklib.c:9716
static VALUE rb_str_enumerate_chars(VALUE str, int wantarray)
Definition: string.c:6419
static VALUE rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
Definition: string.c:6134
void rb_ary_store(VALUE ary, long idx, VALUE val)
Definition: array.c:719
static VALUE rb_str_tr_s_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5725
#define RUBY_DTRACE_STRING_CREATE_ENABLED()
Definition: probes.h:63
#define FIX2LONG(x)
void rb_backref_set(VALUE)
Definition: vm.c:768
static int rb_enc_dummy_p(rb_encoding *enc)
Definition: ripper.y:235
#define T_STRING
#define END(no)
Definition: string.c:23
#define MBCLEN_CHARFOUND_P(ret)
#define ENC_CODERANGE_AND(a, b)
#define rb_enc_isprint(c, enc)
#define STR_ENC_GET(str)
Definition: string.c:123
static VALUE rb_str_strip_bang(VALUE str)
Definition: string.c:6957
double rb_str_to_dbl(VALUE, int)
Definition: object.c:2600
VALUE rb_cEncodingConverter
Definition: transcode.c:25
#define rb_sourcefile()
Definition: tcltklib.c:97
#define STR_SET_EMBED_LEN(str, n)
Definition: string.c:76
VALUE rb_str_freeze(VALUE)
Definition: string.c:1797
#define range(low, item, hi)
Definition: date_strftime.c:21
VALUE rb_check_hash_type(VALUE)
Definition: hash.c:461
#define LONG_MAX
Definition: ruby.h:201
static VALUE rb_str_tr_bang(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5380
#define RUBY_FUNC_EXPORTED
Definition: defines.h:184
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
VALUE rb_eEncCompatError
Definition: error.c:518
static VALUE VALUE assoc
Definition: tkutil.c:545
VALUE rb_str_buf_new_cstr(const char *)
Definition: string.c:793
unsigned int max
Definition: string.c:5065
#define DBL2NUM(dbl)
#define ALLOCA_N(type, n)
VALUE rb_check_funcall(VALUE, ID, int, VALUE *)
Definition: vm_eval.c:408
static VALUE sym_call(VALUE args, VALUE sym, int argc, VALUE *argv, VALUE passed_proc)
Definition: string.c:7923
#define ENC_CODERANGE_UNKNOWN
void rb_str_setter(VALUE, ID, VALUE *)
Definition: string.c:7607
VALUE rb_eIndexError
Definition: error.c:513
static VALUE rb_str_rjust(int argc, VALUE *argv, VALUE str)
Definition: string.c:7424
static int VALUE key
Definition: tkutil.c:265
#define rb_enc_mbc_to_codepoint(p, e, enc)
#define ENC_CODERANGE_SET(obj, cr)
VALUE rb_reg_match(VALUE, VALUE)
Definition: re.c:2746
register int hval
Definition: lex.c:89
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
VALUE rb_str_new_shared(VALUE)
Definition: string.c:677
static VALUE sym_capitalize(VALUE sym)
Definition: string.c:8114
VALUE rb_str_buf_cat(VALUE, const char *, long)
Definition: string.c:1940
VALUE rb_str_dup(VALUE)
Definition: string.c:946
VALUE rb_tainted_str_new_cstr(const char *)
Definition: string.c:479
VALUE rb_filesystem_str_new(const char *, long)
Definition: string.c:608
#define rb_long2int(n)
VALUE rb_obj_as_string(VALUE)
Definition: string.c:895
VALUE * argv
Definition: tcltklib.c:1971
int rb_enc_ascget(const char *p, const char *e, int *len, rb_encoding *enc)
Definition: encoding.c:908
static VALUE sym_cmp(VALUE sym, VALUE other)
Definition: string.c:8002
#define sub(x, y)
Definition: date_strftime.c:24
VALUE rb_hash_aset(VALUE, VALUE, VALUE)
static void rb_str_splice(VALUE str, long beg, long len, VALUE val)
Definition: string.c:3400
VALUE rb_yield(VALUE)
Definition: vm_eval.c:934
VALUE rb_tainted_str_new(const char *, long)
static VALUE str_eql(const VALUE str1, const VALUE str2)
Definition: string.c:2328
VALUE rb_str_resize(VALUE, long)
Definition: string.c:1846
memcpy(buf+1, str, len)
#define RTEST(v)
const int id
Definition: nkf.c:209
static VALUE sym_encoding(VALUE sym)
Definition: string.c:8140
VALUE rb_str_format(int, const VALUE *, VALUE)
Definition: sprintf.c:439
static VALUE rb_str_swapcase(VALUE str)
Definition: string.c:5054
VALUE rb_str_export_to_enc(VALUE, rb_encoding *)
Definition: string.c:632
#define rb_enc_mbminlen(enc)
#define RUBY_MAX_CHAR_LEN
Definition: string.c:56
#define TRUE
Definition: nkf.h:175
static VALUE rb_str_byteslice(int argc, VALUE *argv, VALUE str)
Definition: string.c:4228
q result
Definition: tcltklib.c:7070
void * rb_alloc_tmp_buffer(volatile VALUE *store, long len)
Definition: string.c:814
VALUE rb_str_split(VALUE, const char *)
Definition: string.c:6123
volatile VALUE value
Definition: tcltklib.c:9442
#define StringValue(v)
void rb_econv_close(rb_econv_t *ec)
Definition: transcode.c:1702
#define rb_enc_mbcput(c, buf, enc)
long rb_memsearch(const void *, long, const void *, long, rb_encoding *)
Definition: re.c:227
#define MBCLEN_CHARFOUND_LEN(ret)
int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc)
Definition: encoding.c:896
int rb_enc_unicode_p(rb_encoding *enc)
Definition: encoding.c:459
#define STR_TMPLOCK
Definition: string.c:57
#define T_REGEXP
register char * s
Definition: os2.c:56
long rb_str_coderange_scan_restartable(const char *, const char *, rb_encoding *, int *)
Definition: string.c:232
int rb_enc_symname_p(const char *name, rb_encoding *enc)
Definition: ripper.c:15703
static VALUE rb_str_tr(VALUE str, VALUE src, VALUE repl)
Definition: string.c:5422
#define CONST_ID(var, str)
static VALUE rb_str_chop_bang(VALUE str)
Definition: string.c:6639
void rb_gc_register_mark_object(VALUE)
Definition: gc.c:2980
#define STR_ASSOC
Definition: string.c:60
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Definition: class.c:1566
static VALUE rb_str_enumerate_codepoints(VALUE str, int wantarray)
Definition: string.c:6519
static VALUE rb_str_squeeze(int argc, VALUE *argv, VALUE str)
Definition: string.c:5708
long rb_reg_search(VALUE, VALUE, long, int)
Definition: re.c:1352
static VALUE str_duplicate(VALUE klass, VALUE str)
Definition: string.c:938
char * rb_string_value_cstr(volatile VALUE *)
Definition: string.c:1490
VALUE retval
Definition: tcltklib.c:7830
#define no_digits()
rb_encoding * rb_usascii_encoding(void)
Definition: encoding.c:1181
static VALUE rb_str_aref_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:3326
static VALUE sym_to_proc(VALUE sym)
Definition: string.c:7944
VALUE rb_str_inspect(VALUE)
Definition: string.c:4500
#define OBJ_FROZEN(x)
#define RB_GC_GUARD(v)
static VALUE rb_str_squeeze_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:5618
RUBY_EXTERN char * crypt(const char *, const char *)
#define FL_TAINT
static VALUE get_pat(VALUE, int)
Definition: string.c:3642
#define T_FIXNUM
static enum neighbor_char enc_pred_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2851
int argc
Definition: tcltklib.c:1970
VALUE rb_str_buf_new(long)
Definition: string.c:777
rb_encoding * rb_locale_encoding(void)
Definition: encoding.c:1212
static VALUE rb_str_lstrip_bang(VALUE str)
Definition: string.c:6829
static VALUE str_new(VALUE klass, const char *ptr, long len)
Definition: string.c:395
static VALUE str_alloc(VALUE klass)
Definition: string.c:374
#define UNINITIALIZED_VAR(x)
Definition: vm_core.h:121
#define ELTS_SHARED
#define RUBY_ALIAS_FUNCTION(prot, name, args)
Definition: defines.h:249
static VALUE rb_str_sub_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:3681
static VALUE rb_str_is_ascii_only_p(VALUE str)
Definition: string.c:7679
void rb_undef_alloc_func(VALUE)
Definition: vm_method.c:482
VALUE rb_obj_encoding(VALUE obj)
Definition: encoding.c:868
#define RUBY_DTRACE_STRING_CREATE(arg0, arg1, arg2)
Definition: probes.h:64
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Definition: transcode.c:2570
static VALUE rb_str_chomp_bang(int argc, VALUE *argv, VALUE str)
Definition: string.c:6689
int memcmp(const void *s1, const void *s2, size_t len)
Definition: memcmp.c:7
static VALUE sym_casecmp(VALUE sym, VALUE other)
Definition: string.c:8019
static char * str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc)
Definition: string.c:1532
int rb_sourceline(void)
Definition: vm.c:816
static VALUE rb_str_getbyte(VALUE str, VALUE index)
Definition: string.c:4079
static void rb_enc_cr_str_exact_copy(VALUE dest, VALUE src)
Definition: string.c:320
void rb_sys_fail(const char *mesg)
Definition: error.c:1899
static VALUE rb_str_chr(VALUE str)
Definition: string.c:4067
#define rb_str_new3
RUBY_EXTERN VALUE rb_cString
Definition: ripper.y:1456
Real * b
Definition: bigdecimal.c:1182
static const char * search_nonascii(const char *p, const char *e)
Definition: string.c:146
VALUE rb_id_quote_unprintable(ID)
Definition: string.c:7841
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Definition: transcode.c:2867
return ptr
Definition: tcltklib.c:784
static void str_modifiable(VALUE str)
Definition: string.c:1327
VpDivd * c
Definition: bigdecimal.c:1205
static VALUE rb_str_bytes(VALUE str)
Definition: string.c:6398
static VALUE rb_str_index_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2555
#define CHAR_BIT
Definition: ruby.h:208
static VALUE rb_str_match_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2796
#define RSTRING_EMBED_LEN_MAX
static void str_mod_check(VALUE s, const char *p, long len)
Definition: string.c:352
static VALUE rb_str_lines(int argc, VALUE *argv, VALUE str)
Definition: string.c:6318
static const char isspacetable[256]
Definition: string.c:5839
#define T_BIGNUM
#define MEMCPY(p1, p2, type, n)
static VALUE scan_once(VALUE str, VALUE pat, long *start)
Definition: string.c:6986
static VALUE rb_str_sub(int argc, VALUE *argv, VALUE str)
Definition: string.c:3820
VALUE rb_str_export_locale(VALUE)
Definition: string.c:626
#define ENC_CODERANGE_VALID
static VALUE rb_str_s_try_convert(VALUE dummy, VALUE str)
Definition: string.c:1526
gz end
Definition: zlib.c:2270
ID rb_to_id(VALUE)
Definition: string.c:8146
#define RMATCH_REGS(obj)
Definition: re.h:54
static VALUE sym_succ(VALUE sym)
Definition: string.c:7982
static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7584
arg
Definition: ripper.y:1312
static void str_enc_copy(VALUE str1, VALUE str2)
Definition: string.c:284
VALUE src
Definition: tcltklib.c:7953
void rb_str_modify(VALUE)
Definition: string.c:1369
#define rb_str_buf_new2
#define NEWOBJ_OF(obj, type, klass, flags)
#define T_SYMBOL
VALUE rb_str_cat(VALUE, const char *, long)
Definition: string.c:1956
static ID id_to_s
Definition: string.c:892
VALUE rb_str_length(VALUE)
Definition: string.c:1182
#define ENC_CODERANGE_7BIT
rb_encoding * rb_enc_get(VALUE obj)
Definition: encoding.c:770
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Definition: string.c:563
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
int size
Definition: encoding.c:52
static VALUE rb_str_hex(VALUE str)
Definition: string.c:7101
static char * str_nth(const char *p, const char *e, long nth, rb_encoding *enc, int singlebyte)
Definition: string.c:1588
#define f
#define NUM2LONG(x)
static VALUE rb_str_reverse(VALUE str)
Definition: string.c:4247
static VALUE rb_str_downcase(VALUE str)
Definition: string.c:4920
#define SYMBOL_P(x)
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
RUBY_EXTERN VALUE rb_default_rs
Definition: ripper.y:488
static VALUE rb_str_valid_encoding_p(VALUE str)
Definition: string.c:7661
static VALUE rb_str_each_byte(VALUE str)
Definition: string.c:6381
static VALUE rb_str_chop(VALUE str)
Definition: string.c:6674
long rb_str_sublen(VALUE, long)
Definition: string.c:1657
static VALUE rb_str_count(int argc, VALUE *argv, VALUE str)
Definition: string.c:5781
#define STR_SET_LEN(str, n)
Definition: string.c:82
static VALUE rb_str_eql(VALUE str1, VALUE str2)
Definition: string.c:2371
int rb_enc_str_asciionly_p(VALUE)
Definition: string.c:340
static void rb_str_subpat_set(VALUE str, VALUE re, VALUE backref, VALUE val)
Definition: string.c:3449
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *)
Definition: string.c:569
int t
Definition: ripper.c:13760
void rb_str_associate(VALUE, VALUE)
Definition: string.c:1422
static long enc_strlen(const char *p, const char *e, rb_encoding *enc, int cr)
Definition: string.c:979
static VALUE rb_str_lstrip(VALUE str)
Definition: string.c:6870
#define MBCLEN_INVALID_P(ret)
st_index_t rb_memhash(const void *ptr, long len)
Definition: random.c:1422
int num_regs
Definition: ripper.y:615
#define ENC_CODERANGE(obj)
#define lesser(a, b)
Definition: string.c:2273
VALUE rb_check_array_type(VALUE ary)
Definition: array.c:557
static enum neighbor_char enc_succ_alnum_char(char *p, long len, rb_encoding *enc, char *carry)
Definition: string.c:2894
DATA_PTR(self)
#define rb_str_dup_frozen
static VALUE sym_match(VALUE sym, VALUE other)
Definition: string.c:8035
VALUE rb_reg_quote(VALUE)
Definition: re.c:2965
static long rb_str_index(VALUE str, VALUE sub, long offset)
Definition: string.c:2494
st_index_t rb_str_hash(VALUE)
Definition: string.c:2237
#define rb_str_new5
RUBY_EXTERN VALUE rb_cObject
Definition: ripper.y:1426
static VALUE rb_str_upto(int argc, VALUE *argv, VALUE beg)
Definition: string.c:3100
st_data_t st_index_t
Definition: ripper.y:63
static VALUE str_byte_substr(VALUE str, long beg, long len)
Definition: string.c:4116
#define ALLOC_N(type, n)
#define LONG2FIX(i)
#define RBASIC(obj)
VALUE rb_str_new_with_class(VALUE, const char *, long)
rb_econv_result_t
Definition: ripper.y:242
#define STR_EMBED_P(str)
Definition: string.c:75
static VALUE tr_trans(VALUE str, VALUE src, VALUE repl, int sflag)
Definition: string.c:5126
#define STR_NOCAPA_P(s)
Definition: string.c:64
#define RARRAY_LENINT(ary)
#define ONIGENC_CTYPE_DIGIT
klass
Definition: tcltklib.c:3504
#define UINT2NUM(x)
#define INT2NUM(x)
static VALUE rb_str_capitalize(VALUE str)
Definition: string.c:4991
void rb_define_variable(const char *, VALUE *)
Definition: variable.c:589
rb_encoding * rb_filesystem_encoding(void)
Definition: encoding.c:1246
static VALUE rb_str_init(int argc, VALUE *argv, VALUE str)
Definition: string.c:969
int rb_respond_to(VALUE, ID)
Definition: vm_method.c:1557
#define ONIGENC_CODE_TO_MBC_MAXLEN
RUBY_EXTERN VALUE rb_rs
Definition: ripper.y:487
static VALUE rb_str_rstrip_bang(VALUE str)
Definition: string.c:6891
static VALUE rb_str_each_char(VALUE str)
Definition: string.c:6495
#define ISPRINT(c)
Definition: ruby.h:1631
static VALUE str_replace_shared(VALUE str2, VALUE str)
Definition: string.c:657
VALUE rb_backref_get(void)
Definition: vm.c:762
static void str_make_independent_expand(VALUE str, long expand)
Definition: string.c:1347
VALUE rb_ary_concat(VALUE x, VALUE y)
Definition: array.c:3382
static VALUE rb_str_start_with(int argc, VALUE *argv, VALUE str)
Definition: string.c:7561
VALUE rb_ary_new2(long capa)
Definition: array.c:417
#define OBJ_UNTRUST(x)
VALUE rb_str_new(const char *, long)
Definition: string.c:425
#define rb_safe_level()
Definition: tcltklib.c:94
#define rb_enc_is_newline(p, end, enc)
VALUE rb_str_drop_bytes(VALUE, long)
Definition: string.c:3339
static void str_discard(VALUE str)
Definition: string.c:1411
Real * res
Definition: bigdecimal.c:1233
#define assert(condition)
Definition: ossl.h:45
VALUE rb_range_beg_len(VALUE, long *, long *, long, int)
Definition: range.c:987
#define rb_enc_asciicompat(enc)
#define NUM2INT(x)
VALUE rb_hash_new(void)
Definition: hash.c:234
size_t rb_str_memsize(VALUE)
Definition: string.c:838
VALUE rb_obj_alloc(VALUE)
Definition: object.c:1702
const char * rb_id2name(ID id)
Definition: ripper.c:16068
int gen
Definition: string.c:5064
VALUE rb_str_intern(VALUE)
Definition: string.c:7203
unsigned long sum
Definition: zlib.c:400
static VALUE sym_empty(VALUE sym)
Definition: string.c:8075
static VALUE rb_str_to_s(VALUE str)
Definition: string.c:4432
#define rb_enc_isupper(c, enc)
#define rb_check_arity(argc, min, max)
#define BUILTIN_TYPE(x)
static VALUE str_byte_aref(VALUE str, VALUE indx)
Definition: string.c:4171
#define OBJ_UNTRUSTED(x)
BDIGIT e
Definition: bigdecimal.c:5085
#define rb_enc_isascii(c, enc)
int rb_str_hash_cmp(VALUE, VALUE)
Definition: string.c:2247
#define SIZEOF_VALUE
VALUE rb_hash_aref(VALUE, VALUE)
Definition: hash.c:560
VALUE rb_funcall2(VALUE, ID, int, const VALUE *)
Calls a method.
Definition: vm_eval.c:805
unsigned long VALUE
Definition: ripper.y:104
rb_encoding * rb_ascii8bit_encoding(void)
Definition: encoding.c:1151
static VALUE rb_str_downcase_bang(VALUE str)
Definition: string.c:4855
static VALUE rb_enc_cr_str_buf_cat(VALUE str, const char *ptr, long len, int ptr_encindex, int ptr_cr, int *ptr_cr_ret)
Definition: string.c:1981
void rb_warning(const char *fmt,...)
Definition: error.c:229
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define RREGEXP(obj)
#define RSTRING_GETMEM(str, ptrvar, lenvar)
static VALUE rb_str_sum(int argc, VALUE *argv, VALUE str)
Definition: string.c:7242
static void tr_setup_table(VALUE str, char stable[TR_TABLE_SIZE], int first, VALUE *tablep, VALUE *ctablep, rb_encoding *enc)
Definition: string.c:5431
gz ecopts
Definition: zlib.c:2275
#define is_ascii_string(str)
Definition: string.c:120
long salt
Definition: crypt.c:507
#define RREGEXP_SRC_LEN(r)
#define snprintf
#define SPECIAL_CONST_P(x)
#define OBJ_TAINT(x)
static VALUE rb_str_casecmp(VALUE str1, VALUE str2)
Definition: string.c:2436
#define rb_intern(str)
BDIGIT v
Definition: bigdecimal.c:5656
#define mod(x, y)
Definition: date_strftime.c:28
VALUE rb_filesystem_str_new_cstr(const char *)
Definition: string.c:614
static char * rb_str_subpos(VALUE str, long beg, long *lenp)
Definition: string.c:1689
VALUE rb_str_ord(VALUE s)
Definition: string.c:7223
#define STR_ASSOC_P(s)
Definition: string.c:62
static VALUE sym_aref(int argc, VALUE *argv, VALUE sym)
Definition: string.c:8049
#define NULL
Definition: _sdbm.c:103
q
Definition: tcltklib.c:2968
const char * name
Definition: nkf.c:208
VALUE rb_invcmp(VALUE x, VALUE y)
Definition: compar.c:42
static VALUE rb_str_aref(VALUE str, VALUE indx)
Definition: string.c:3207
VALUE rb_check_string_type(VALUE)
Definition: string.c:1508
#define REALLOC_N(var, type, n)
VALUE rb_reg_regcomp(VALUE)
Definition: re.c:2547
int rb_enc_str_coderange(VALUE)
Definition: string.c:327
long rb_enc_strlen(const char *, const char *, rb_encoding *)
Definition: string.c:1025
static int match(VALUE str, VALUE pat, VALUE hash, int(*cb)(VALUE, VALUE))
Definition: date_parse.c:273
void rb_define_method(VALUE klass, const char *name, VALUE(*func)(ANYARGS), int argc)
Definition: class.c:1340
volatile VALUE current
Definition: tcltklib.c:7112
#define rb_enc_mbmaxlen(enc)
static VALUE rb_str_delete_bang(int, VALUE *, VALUE)
Definition: string.c:5525
void rb_warn(const char *fmt,...)
Definition: error.c:216
#define SYM2ID(x)
VALUE rb_eArgError
Definition: error.c:512
VALUE rb_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2349
static VALUE rb_str_force_encoding(VALUE str, VALUE enc)
Definition: string.c:7624
rb_encoding * rb_enc_find(const char *name)
Definition: encoding.c:657
#define IS_EVSTR(p, e)
Definition: string.c:4609
VALUE rb_check_convert_type(VALUE, int, const char *, const char *)
Definition: object.c:2364
void rb_str_free(VALUE)
Definition: string.c:830
static VALUE rb_str_rindex_m(int argc, VALUE *argv, VALUE str)
Definition: string.c:2669
VALUE rb_usascii_str_new_cstr(const char *)
#define TR_TABLE_SIZE
Definition: string.c:5429
int dummy
Definition: tcltklib.c:4483
static VALUE rb_str_each_codepoint(VALUE str)
Definition: string.c:6587
STATIC void unsigned char * cp
Definition: crypt.c:307
VALUE rb_str_plus(VALUE, VALUE)
Definition: string.c:1236
#define FL_UNSET(x, f)
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Definition: string.c:2067
static enum neighbor_char enc_succ_char(char *p, long len, rb_encoding *enc)
Definition: string.c:2817
VALUE rb_inspect(VALUE)
Definition: object.c:402
rb_encoding * rb_enc_from_index(int index)
Definition: encoding.c:548
size_t len
Definition: tcltklib.c:3568
void rb_str_set_len(VALUE, long)
Definition: string.c:1830