20 #define BEG(no) (regs->beg[(no)])
21 #define END(no) (regs->end[(no)])
30 #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
32 #undef rb_str_new_cstr
33 #undef rb_tainted_str_new_cstr
34 #undef rb_usascii_str_new_cstr
35 #undef rb_external_str_new_cstr
36 #undef rb_locale_str_new_cstr
41 #undef rb_tainted_str_new2
42 #undef rb_usascii_str_new2
43 #undef rb_str_dup_frozen
44 #undef rb_str_buf_new_cstr
45 #undef rb_str_buf_new2
46 #undef rb_str_buf_cat2
54 #define RUBY_MAX_CHAR_LEN 16
55 #define STR_TMPLOCK FL_USER7
56 #define STR_NOEMBED FL_USER1
57 #define STR_SHARED FL_USER2
58 #define STR_ASSOC FL_USER3
59 #define STR_SHARED_P(s) FL_ALL((s), STR_NOEMBED|ELTS_SHARED)
60 #define STR_ASSOC_P(s) FL_ALL((s), STR_NOEMBED|STR_ASSOC)
61 #define STR_NOCAPA (STR_NOEMBED|ELTS_SHARED|STR_ASSOC)
62 #define STR_NOCAPA_P(s) (FL_TEST((s),STR_NOEMBED) && FL_ANY((s),ELTS_SHARED|STR_ASSOC))
63 #define STR_UNSET_NOCAPA(s) do {\
64 if (FL_TEST((s),STR_NOEMBED)) FL_UNSET((s),(ELTS_SHARED|STR_ASSOC));\
68 #define STR_SET_NOEMBED(str) do {\
69 FL_SET((str), STR_NOEMBED);\
70 STR_SET_EMBED_LEN((str), 0);\
72 #define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED)
73 #define STR_EMBED_P(str) (!FL_TEST((str), STR_NOEMBED))
74 #define STR_SET_EMBED_LEN(str, n) do { \
76 RBASIC(str)->flags &= ~RSTRING_EMBED_LEN_MASK;\
77 RBASIC(str)->flags |= (tmp_n) << RSTRING_EMBED_LEN_SHIFT;\
80 #define STR_SET_LEN(str, n) do { \
81 if (STR_EMBED_P(str)) {\
82 STR_SET_EMBED_LEN((str), (n));\
85 RSTRING(str)->as.heap.len = (n);\
89 #define STR_DEC_LEN(str) do {\
90 if (STR_EMBED_P(str)) {\
91 long n = RSTRING_LEN(str);\
93 STR_SET_EMBED_LEN((str), n);\
96 RSTRING(str)->as.heap.len--;\
100 #define RESIZE_CAPA(str,capacity) do {\
101 if (STR_EMBED_P(str)) {\
102 if ((capacity) > RSTRING_EMBED_LEN_MAX) {\
103 char *tmp = ALLOC_N(char, (capacity)+1);\
104 memcpy(tmp, RSTRING_PTR(str), RSTRING_LEN(str));\
105 RSTRING(str)->as.heap.ptr = tmp;\
106 RSTRING(str)->as.heap.len = RSTRING_LEN(str);\
107 STR_SET_NOEMBED(str);\
108 RSTRING(str)->as.heap.aux.capa = (capacity);\
112 REALLOC_N(RSTRING(str)->as.heap.ptr, char, (capacity)+1);\
113 if (!STR_NOCAPA_P(str))\
114 RSTRING(str)->as.heap.aux.capa = (capacity);\
118 #define is_ascii_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT)
119 #define is_broken_string(str) (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN)
121 #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
143 static inline const char *
146 #if SIZEOF_VALUE == 8
147 # define NONASCII_MASK 0x8080808080808080ULL
148 #elif SIZEOF_VALUE == 4
149 # define NONASCII_MASK 0x80808080UL
152 if ((
int)
sizeof(
VALUE) * 2 < e - p) {
155 s = (
const VALUE*)(~lowbits & ((
VALUE)p + lowbits));
156 while (p < (
const char *)s) {
163 if (*s & NONASCII_MASK) {
183 const char *e = p +
len;
363 return RSTRING(str)->as.heap.len;
366 return RSTRING(str)->as.heap.aux.capa;
376 str->as.heap.ptr = 0;
377 str->as.heap.len = 0;
378 str->as.heap.aux.capa = 0;
441 #define rb_str_new2 rb_str_new_cstr
452 #define rb_usascii_str_new2 rb_usascii_str_new_cstr
473 #define rb_tainted_str_new2 rb_tainted_str_new_cstr
482 const unsigned char *sp;
486 if (from == to)
return str;
511 len = len < 2 ? 2 : len * 2;
615 RSTRING(str2)->as.heap.aux.shared = str;
645 #define rb_str_new3 rb_str_new_shared
660 RSTRING(str2)->as.heap.aux.shared = shared;
664 RSTRING(str)->as.heap.aux.shared = str2;
682 if ((ofs > 0) || (klass !=
RBASIC(str)->klass) ||
686 RSTRING(str)->as.heap.ptr += ofs;
687 RSTRING(str)->as.heap.len -= ofs;
700 str = str_new4(klass, orig);
702 RSTRING(str)->as.heap.aux.shared = assoc;
705 str = str_new4(klass, orig);
712 #define rb_str_new4 rb_str_new_frozen
722 #define rb_str_new5 rb_str_new_with_class
725 str_new_empty(
VALUE str)
733 #define STR_BUF_MIN_SIZE 128
744 RSTRING(str)->as.heap.aux.capa = capa;
746 RSTRING(str)->as.heap.ptr[0] =
'\0';
764 #define rb_str_buf_new2 rb_str_buf_new_cstr
800 return RSTRING(str)->as.heap.aux.capa;
820 if (str == str2)
return;
839 RSTRING(str)->as.heap.aux.shared =
RSTRING(str2)->as.heap.aux.shared;
885 RSTRING(str)->as.heap.aux.shared = shared;
928 if (argc > 0 &&
rb_scan_args(argc, argv,
"01", &orig) == 1)
973 for (c=0; p<e; c++) {
1023 for (c=0; p<e; c++) {
1041 #ifdef NONASCII_MASK
1042 #define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
1057 count_utf8_lead_bytes_with_word(
const VALUE *s)
1064 d &= NONASCII_MASK >> 7;
1069 #if SIZEOF_VALUE == 8
1088 #ifdef NONASCII_MASK
1093 if ((
int)
sizeof(
VALUE) * 2 < e - p) {
1096 s = (
const VALUE*)(~lowbits & ((
VALUE)p + lowbits));
1098 while (p < (
const char *)s) {
1099 if (is_utf8_lead_byte(*p)) len++;
1103 len += count_utf8_lead_bytes_with_word(s);
1106 p = (
const char *)s;
1109 if (is_utf8_lead_byte(*p)) len++;
1237 while (n <= len/2) {
1238 memcpy(ptr2 + n, ptr2, n);
1241 memcpy(ptr2 + n, ptr2, len-n);
1301 long capa = len + expand;
1303 if (len > capa) len = capa;
1304 ptr =
ALLOC_N(
char, capa + 1);
1311 RSTRING(str)->as.heap.ptr = ptr;
1313 RSTRING(str)->as.heap.aux.capa = capa;
1316 #define str_make_independent(str) str_make_independent_expand((str), 0L)
1335 else if (expand > 0) {
1337 long capa = len + expand;
1340 RSTRING(str)->as.heap.aux.capa = capa;
1366 RSTRING(str)->as.heap.ptr = 0;
1367 RSTRING(str)->as.heap.len = 0;
1385 assoc =
RSTRING(assoc)->as.heap.aux.shared;
1407 return RSTRING(str)->as.heap.aux.shared;
1437 if (!s || memchr(s, 0, len)) {
1483 const char *p2, *e2;
1486 while (p < e && 0 < nth) {
1512 while (p < e && nth--) {
1544 const char *pp =
str_nth(p, e, nth, enc, singlebyte);
1545 if (!pp)
return e -
p;
1556 #ifdef NONASCII_MASK
1558 str_utf8_nth(
const char *
p,
const char *e,
long *nthp)
1564 s = (
const VALUE*)(~lowbits & ((
VALUE)p + lowbits));
1566 while (p < (
const char *)s) {
1567 if (is_utf8_lead_byte(*p)) nth--;
1571 nth -= count_utf8_lead_bytes_with_word(s);
1573 }
while (s < t && (
int)
sizeof(
VALUE) <= nth);
1577 if (is_utf8_lead_byte(*p)) {
1578 if (nth == 0)
break;
1588 str_utf8_offset(
const char *p,
const char *e,
long nth)
1590 const char *pp = str_utf8_nth(p, e, &nth);
1634 if (len < 0)
return Qnil;
1642 if (beg < 0)
return Qnil;
1655 if (len > -beg) len = -beg;
1660 if (!p)
return Qnil;
1662 if (!p)
return Qnil;
1668 if (beg < 0)
return Qnil;
1678 #ifdef NONASCII_MASK
1681 p = str_utf8_nth(s, e, &beg);
1682 if (beg > 0)
return Qnil;
1683 len = str_utf8_offset(p, e, len);
1689 p = s + beg * char_sz;
1693 else if (len * char_sz > e - p)
1698 else if ((p =
str_nth_len(s, e, &beg, enc)) == e) {
1699 if (beg > 0)
return Qnil;
1732 #define rb_str_dup_frozen rb_str_new_frozen
1764 rb_bug(
"probable buffer overflow: %ld for %ld", len, capa);
1794 char *ptr =
RSTRING(str)->as.heap.ptr;
1796 if (slen > len) slen =
len;
1797 if (slen > 0)
MEMCPY(
RSTRING(str)->as.ary, ptr,
char, slen);
1800 if (independent)
xfree(ptr);
1803 else if (!independent) {
1806 else if (slen < len || slen - len > 1024) {
1821 long capa, total, off = -1;
1827 if (len == 0)
return 0;
1836 capa =
RSTRING(str)->as.heap.aux.capa;
1842 if (capa <= total) {
1843 while (total > capa) {
1845 capa = (total + 4095) / 4096;
1848 capa = (capa + 1) * 2;
1862 #define str_buf_cat2(str, ptr) str_buf_cat((str), (ptr), strlen(ptr))
1867 if (len == 0)
return str;
1889 p =
RSTRING(str)->as.heap.ptr;
1890 memcpy(p +
RSTRING(str)->as.heap.len, ptr, len);
1907 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
1915 if (str_encindex == ptr_encindex) {
1945 *ptr_cr_ret = ptr_cr;
1947 if (str_encindex != ptr_encindex &&
1957 res_encindex = str_encindex;
1962 res_encindex = str_encindex;
1966 res_encindex = ptr_encindex;
1971 res_encindex = str_encindex;
1978 res_encindex = str_encindex;
2011 unsigned int c = (
unsigned char)*ptr;
2102 buf[0] = (char)code;
2198 #define lesser(a,b) (((a)>(b))?(b):(a))
2210 if (idx1 == idx2)
return TRUE;
2229 const char *ptr1, *ptr2;
2232 if (str1 == str2)
return 0;
2235 if (ptr1 == ptr2 || (retval =
memcmp(ptr1, ptr2,
lesser(len1, len2))) == 0) {
2244 if (len1 > len2)
return 1;
2247 if (retval > 0)
return 1;
2256 const char *ptr1, *ptr2;
2262 if (
memcmp(ptr1, ptr2, len) == 0)
2278 if (str1 == str2)
return Qtrue;
2298 if (str1 == str2)
return Qtrue;
2371 char *p1, *p1end, *p2, *p2end;
2382 while (p1 < p1end && p2 < p2end) {
2384 unsigned int c1 =
TOUPPER(*p1 & 0xff);
2385 unsigned int c2 =
TOUPPER(*p2 & 0xff);
2387 return INT2FIX(c1 < c2 ? -1 : 1);
2394 while (p1 < p1end && p2 < p2end) {
2398 if (0 <= c1 && 0 <= c2) {
2402 return INT2FIX(c1 < c2 ? -1 : 1);
2408 len = l1 < l2 ? l1 : l2;
2411 return INT2FIX(r < 0 ? -1 : 1);
2413 return INT2FIX(l1 < l2 ? -1 : 1);
2440 if (offset < 0)
return -1;
2442 if (len - offset < slen)
return -1;
2449 if (slen == 0)
return offset;
2457 if (pos < 0)
return pos;
2459 if (t == s + pos)
break;
2460 if ((len -= t - s) <= 0)
return -1;
2464 return pos + offset;
2492 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
2508 switch (
TYPE(sub)) {
2536 if (pos == -1)
return Qnil;
2544 char *s, *sbeg, *e, *t;
2555 if (len < slen)
return -1;
2556 if (len - pos < slen) {
2566 s =
str_nth(sbeg, e, pos, enc, singlebyte);
2568 if (
memcmp(s, t, slen) == 0) {
2571 if (pos == 0)
break;
2605 if (
rb_scan_args(argc, argv,
"11", &sub, &vpos) == 2) {
2616 if (pos > len) pos =
len;
2622 switch (
TYPE(sub)) {
2632 if (pos >= 0)
return LONG2NUM(pos);
2648 if (pos >= 0)
return LONG2NUM(pos);
2743 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
2747 ++((
unsigned char*)p)[
i];
2755 memset(p+l, 0xff, len-l);
2761 for (len2 = len-1; 0 < len2; len2--) {
2766 memset(p+len2+1, 0xff, len-(len2+1));
2777 for (i = len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
2781 --((
unsigned char*)p)[
i];
2789 memset(p+l, 0, len-l);
2795 for (len2 = len-1; 0 < len2; len2--) {
2800 memset(p+len2+1, 0, len-(len2+1));
2831 MEMCPY(save, p,
char, len);
2838 MEMCPY(p, save,
char, len);
2841 MEMCPY(save, p,
char, len);
2846 MEMCPY(p, save,
char, len);
2851 MEMCPY(p, save,
char, len);
2861 MEMCPY(carry, p,
char, len);
2865 MEMCPY(carry, p,
char, len);
2901 char *sbeg, *s, *e, *last_alnum = 0;
2905 long carry_pos = 0, carry_len = 1;
2937 carry_pos = s - sbeg;
2953 MEMCPY(carry, s,
char, l);
2956 carry_pos = s - sbeg;
3023 VALUE end, exclusive;
3024 VALUE current, after_end;
3031 excl =
RTEST(exclusive);
3041 if (c > e || (excl && c == e))
return beg;
3044 if (!excl && c == e)
break;
3046 if (excl && c == e)
break;
3075 if (excl && bi == ei)
break;
3096 if (n > 0 || (excl && n == 0))
return beg;
3105 if (
NIL_P(next))
break;
3132 switch (
TYPE(indx)) {
3244 if (len > olen) len = olen;
3252 memmove(ptr, oldptr + len, nlen);
3258 RSTRING(str)->as.heap.len = nlen;
3323 if (slen < len || slen < beg + len) {
3352 long start, end,
len;
3390 switch (
TYPE(indx)) {
3526 if (argc < 1 || 2 < argc) {
3529 for (i=0; i<
argc; i++) {
3534 if (!
NIL_P(result)) {
3546 switch (
TYPE(pat)) {
3591 else if (argc == 2) {
3616 if (iter || !
NIL_P(hash)) {
3664 memmove(p + beg0 + rlen, p + beg0 + plen, len - beg0 - plen);
3666 memcpy(p + beg0, rp, rlen);
3735 long offset, blen, slen,
len,
last;
3761 if (bang)
return Qnil;
3782 if (iter || !
NIL_P(hash)) {
3818 offset = end0 +
len;
3857 return str_gsub(argc, argv, str, 1);
3907 return str_gsub(argc, argv, str, 0);
3926 if (str == str2)
return str;
4023 if (beg > n || len < 0)
return Qnil;
4026 if (beg < 0)
return Qnil;
4075 switch (
TYPE(indx)) {
4172 if (clen > 1 || (*s & 0x80)) single = 0;
4182 if (clen > 1 || (*s & 0x80)) single = 0;
4259 if (i == -1)
return Qfalse;
4290 if (argc == 0) base = 10;
4354 #define CHAR_ESC_LEN 13
4369 else if (c < 0x10000) {
4405 const char *
p, *pend, *prev;
4423 const unsigned char *q = (
const unsigned char *)p;
4424 if (q[0] == 0xFE && q[1] == 0xFF)
4426 else if (q[0] == 0xFF && q[1] == 0xFE)
4431 else if (enc == utf32) {
4432 const unsigned char *q = (
const unsigned char *)p;
4433 if (q[0] == 0 && q[1] == 0 && q[2] == 0xFE && q[3] == 0xFF)
4435 else if (q[3] == 0 && q[2] == 0 && q[1] == 0xFE && q[0] == 0xFF)
4446 if (p > prev)
str_buf_cat(result, prev, p - prev);
4449 n = (int)(pend - p);
4460 if ((asciicompat || unicode_p) &&
4461 (c ==
'"'|| c ==
'\\' ||
4466 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
4467 if (p - n > prev)
str_buf_cat(result, prev, p - n - prev);
4469 if (asciicompat || enc == resenc) {
4475 case '\n': cc =
'n';
break;
4476 case '\r': cc =
'r';
break;
4477 case '\t': cc =
't';
break;
4478 case '\f': cc =
'f';
break;
4479 case '\013': cc =
'v';
break;
4480 case '\010': cc =
'b';
break;
4481 case '\007': cc =
'a';
break;
4482 case 033: cc =
'e';
break;
4483 default: cc = 0;
break;
4486 if (p - n > prev)
str_buf_cat(result, prev, p - n - prev);
4498 if (p - n > prev)
str_buf_cat(result, prev, p - n - prev);
4504 if (p > prev)
str_buf_cat(result, prev, p - prev);
4511 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
4526 const char *
p, *pend;
4534 unsigned char c = *p++;
4536 case '"':
case '\\':
4537 case '\n':
case '\r':
4538 case '\t':
case '\f':
4539 case '\013':
case '\010':
case '\007':
case '\033':
4556 while (cc >>= 4) len++;
4578 unsigned char c = *p++;
4580 if (c ==
'"' || c ==
'\\') {
4584 else if (c ==
'#') {
4585 if (
IS_EVSTR(p, pend)) *q++ =
'\\';
4588 else if (c ==
'\n') {
4592 else if (c ==
'\r') {
4596 else if (c ==
'\t') {
4600 else if (c ==
'\f') {
4604 else if (c ==
'\013') {
4608 else if (c ==
'\010') {
4612 else if (c ==
'\007') {
4616 else if (c ==
'\033') {
4642 snprintf(q, qend-q,
".force_encoding(\"%s\")", enc->
name);
4685 unsigned int c = *(
unsigned char*)s;
4688 *s =
'A' + (c -
'a');
4700 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
4702 *s =
'A' + (c -
'a');
4719 if (modify)
return str;
4767 unsigned int c = *(
unsigned char*)s;
4770 *s =
'a' + (c -
'A');
4783 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
4785 *s =
'a' + (c -
'A');
4802 if (modify)
return str;
4872 if (modify)
return str;
4936 if (modify)
return str;
4976 if (t->
p == t->
pend)
return -1;
4977 if (t->
p < t->
pend - 1 && *t->
p ==
'\\') {
4982 if (t->
p < t->
pend - 1 && *t->
p ==
'-') {
4984 if (t->
p < t->
pend) {
4988 if (t->
now < 0x80 && c < 0x80) {
4990 "invalid range \"%c-%c\" in string transliteration",
5004 else if (++t->
now < t->
max) {
5019 const unsigned int errc = -1;
5020 unsigned int trans[256];
5022 struct tr trsrc, trrepl;
5024 unsigned int c, c0,
last = 0;
5025 int modify = 0,
i, l;
5031 #define CHECK_IF_ASCII(c) \
5032 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
5033 (cr = ENC_CODERANGE_VALID) : 0)
5054 trsrc.
p + l < trsrc.
pend) {
5060 trsrc.
gen = trrepl.
gen = 0;
5061 trsrc.
now = trrepl.
now = 0;
5062 trsrc.
max = trrepl.
max = 0;
5065 for (
i=0;
i<256;
i++) {
5068 while ((c =
trnext(&trsrc, enc)) != errc) {
5077 while ((c =
trnext(&trrepl, enc)) != errc)
5080 for (
i=0;
i<256;
i++) {
5081 if (trans[
i] != errc) {
5089 for (
i=0;
i<256;
i++) {
5092 while ((c =
trnext(&trsrc, enc)) != errc) {
5093 r =
trnext(&trrepl, enc);
5094 if (r == errc) r = trrepl.
now;
5113 unsigned int save = -1;
5129 if (cflag) c =
last;
5132 else if (cflag) c = errc;
5138 if (c != (
unsigned int)-1) {
5150 if (enc != e1) may_modify = 1;
5152 while (t - buf + tlen >= max) {
5159 if (may_modify &&
memcmp(s, t, tlen) != 0) {
5176 c = (
unsigned char)*s;
5177 if (trans[c] != errc) {
5208 if (cflag) c =
last;
5211 else if (cflag) c = errc;
5215 c = cflag ? last : errc;
5223 if (enc != e1) may_modify = 1;
5225 while (t - buf + tlen >= max) {
5233 if (may_modify &&
memcmp(s, t, tlen) != 0) {
5273 return tr_trans(str, src, repl, 0);
5305 #define TR_TABLE_SIZE 257
5310 const unsigned int errc = -1;
5314 VALUE table = 0, ptable = 0;
5315 int i, l, cflag = 0;
5325 for (i=0; i<256; i++) {
5328 stable[256] = cflag;
5330 else if (stable[256] && !cflag) {
5333 for (i=0; i<256; i++) {
5337 while ((c =
trnext(&tr, enc)) != errc) {
5339 buf[c & 0xff] = !cflag;
5360 for (i=0; i<256; i++) {
5361 stable[
i] = stable[
i] && buf[
i];
5370 return table[c] != 0;
5402 VALUE del = 0, nodel = 0;
5404 int i, ascompat, cr;
5410 for (i=0; i<
argc; i++) {
5427 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
5440 if (
tr_find(c, squeez, del, nodel)) {
5455 if (modify)
return str;
5496 VALUE del = 0, nodel = 0;
5506 for (i=0; i<
argc; i++) {
5526 unsigned int c = *(
unsigned char*)s++;
5527 if (c != save || (argc > 0 && !squeez[c])) {
5536 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
5537 if (c != save || (argc > 0 && !squeez[c])) {
5545 if (c != save || (argc > 0 && !
tr_find(c, squeez, del, nodel))) {
5561 if (modify)
return str;
5601 return tr_trans(str, src, repl, 1);
5648 VALUE del = 0, nodel = 0;
5656 for (i=0; i<
argc; i++) {
5670 if (*(
unsigned char*)s++ == c) n++;
5685 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
5694 if (
tr_find(c, table, del, nodel)) {
5705 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
5706 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5707 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5708 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5709 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5710 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5711 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5712 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
5723 #define ascii_isspace(c) isspacetable[(unsigned char)(c)]
5773 enum {awk, string, regexp} split_type;
5774 long beg, end,
i = 0;
5778 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
5780 if (lim <= 0) limit =
Qnil;
5781 else if (lim == 1) {
5802 split_type = string;
5806 split_type = regexp;
5823 split_type = regexp;
5829 if (split_type == awk) {
5838 while (ptr < eptr) {
5839 c = (
unsigned char)*ptr++;
5847 if (!
NIL_P(limit) && lim <=
i)
break;
5862 while (ptr < eptr) {
5874 if (!
NIL_P(limit) && lim <= i)
break;
5889 else if (split_type ==
string) {
5903 while (ptr < eptr &&
5904 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
5907 if (t != ptr + end) {
5913 if (!
NIL_P(limit) && lim <= ++i)
break;
5927 if (start == end &&
BEG(0) ==
END(0)) {
5932 else if (last_null == 1) {
5940 if (ptr+start == ptr+len)
5950 beg = start =
END(0);
5954 for (idx=1; idx < regs->
num_regs; idx++) {
5955 if (
BEG(idx) == -1)
continue;
5956 if (
BEG(idx) ==
END(idx))
5957 tmp = str_new_empty(str);
5962 if (!
NIL_P(limit) && lim <= ++i)
break;
5967 tmp = str_new_empty(str);
5972 if (
NIL_P(limit) && lim == 0) {
6035 unsigned int newline;
6036 const char *
p, *pend, *s, *ptr;
6063 p = memchr(p,
'\n', pend - p);
6094 if (rslen == 0 && c == newline) {
6107 line =
rb_str_new5(str, s, p - s + (rslen ? rslen : n));
6112 s = p + (rslen ? rslen : n);
6194 for (i = 0; i <
len; i += n) {
6200 for (i = 0; i <
len; i += n) {
6235 const char *ptr, *
end;
6256 const char *
p, *p2, *
beg, *
end;
6260 if (beg > end)
return 0;
6343 if (len == 0)
return Qnil;
6395 while (len>0 && p[len-1] ==
'\n') {
6397 if (len>0 && p[len-1] ==
'\r')
6407 if (rslen > len)
return Qnil;
6409 if (rslen == 1 && newline ==
'\n')
6417 if (p[len-1] == newline &&
6550 while (s < t && ((c = *(t-1)) ==
'\0' ||
ascii_isspace(c))) t--;
6657 for (i=1; i < regs->
num_regs; i++) {
6703 long last = -1, prev = 0;
6795 extern char *
crypt(
const char *,
const char *);
6797 const char *s, *saltp;
6800 char salt_8bit_clean[3];
6811 if (!
ISASCII((
unsigned char)saltp[0]) || !
ISASCII((
unsigned char)saltp[1])) {
6812 salt_8bit_clean[0] = saltp[0] & 0x7f;
6813 salt_8bit_clean[1] = saltp[1] & 0x7f;
6814 salt_8bit_clean[2] =
'\0';
6815 saltp = salt_8bit_clean;
6818 res =
crypt(s, saltp);
6893 char *ptr, *
p, *pend;
6896 unsigned long sum0 = 0;
6915 sum0 += (
unsigned char)*p;
6926 if (bits < (
int)
sizeof(
long)*
CHAR_BIT) {
6927 sum0 &= (((
unsigned long)1)<<bits)-1;
6951 long width,
len, flen = 1, fclen = 1;
6954 const char *f =
" ";
6955 long n,
size, llen, rlen, llen2 = 0, rlen2 = 0;
6957 int singlebyte = 1, cr;
6969 if (flen == 0 || fclen == 0) {
6974 if (width < 0 || len >= width)
return rb_str_dup(str);
6976 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
6980 llen2 =
str_offset(f, f + flen, llen % fclen, enc, singlebyte);
6981 rlen2 =
str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
6984 if ((len = llen / fclen + rlen / fclen) >=
LONG_MAX / flen ||
6985 (len *= flen) >=
LONG_MAX - llen2 - rlen2 ||
6986 (len += llen2 + rlen2) >=
LONG_MAX - size) {
6993 memset(p, *f, llen);
6997 while (llen >= fclen) {
7003 memcpy(p, f, llen2);
7010 memset(p, *f, rlen);
7014 while (rlen >= fclen) {
7020 memcpy(p, f, rlen2);
7134 return rb_ary_new3(3, str, str_new_empty(str), str_new_empty(str));
7138 if (pos == 0 &&
RSTRING_LEN(sep) == 0)
goto failed;
7184 return rb_ary_new3(3, str_new_empty(str), str_new_empty(str), str);
7215 for (i=0; i<
argc; i++) {
7217 if (
NIL_P(tmp))
continue;
7240 for (i=0; i<
argc; i++) {
7242 if (
NIL_P(tmp))
continue;
7336 static const char ellipsis[] =
"...";
7337 const long ellipsislen =
sizeof(ellipsis) - 1;
7340 const char *
const p =
RSTRING_PTR(str), *e = p + blen;
7341 VALUE estr, ret = 0;
7345 (e =
rb_enc_nth(p, e, len, enc)) - p == blen) {
7348 else if (len <= ellipsislen ||
7415 if (sym1 == sym2)
return Qtrue;
7472 memcpy(dest + 1, ptr, len);
7539 enum {SYM_PROC_CACHE_SIZE = 67};
7544 if (!sym_proc_cache) {
7551 index = (
id % SYM_PROC_CACHE_SIZE) << 1;
7554 if (aryp[index] == sym) {
7555 return aryp[index + 1];
7560 aryp[index + 1] = proc;
7736 switch (
TYPE(name)) {
7772 #define rb_intern(str) rb_intern_const(str)