Ruby
1.9.3p484(2013-11-22revision43786)
Main Page
Modules
Data Structures
Files
File List
Globals
include
ruby
encoding.h
Go to the documentation of this file.
1
/**********************************************************************
2
3
encoding.h -
4
5
$Author: matz $
6
created at: Thu May 24 11:49:41 JST 2007
7
8
Copyright (C) 2007 Yukihiro Matsumoto
9
10
**********************************************************************/
11
12
#ifndef RUBY_ENCODING_H
13
#define RUBY_ENCODING_H 1
14
15
#if defined(__cplusplus)
16
extern
"C"
{
17
#if 0
18
}
/* satisfy cc-mode */
19
#endif
20
#endif
21
22
#include <stdarg.h>
23
#include "
ruby/oniguruma.h
"
24
25
#if defined __GNUC__ && __GNUC__ >= 4
26
#pragma GCC visibility push(default)
27
#endif
28
29
#define ENCODING_INLINE_MAX 1023
30
#define ENCODING_SHIFT (FL_USHIFT+10)
31
#define ENCODING_MASK (((VALUE)ENCODING_INLINE_MAX)<<ENCODING_SHIFT)
32
33
#define ENCODING_SET_INLINED(obj,i) do {\
34
RBASIC(obj)->flags &= ~ENCODING_MASK;\
35
RBASIC(obj)->flags |= (VALUE)(i) << ENCODING_SHIFT;\
36
} while (0)
37
#define ENCODING_SET(obj,i) do {\
38
VALUE rb_encoding_set_obj = (obj); \
39
int encoding_set_enc_index = (i); \
40
if (encoding_set_enc_index < ENCODING_INLINE_MAX) \
41
ENCODING_SET_INLINED(rb_encoding_set_obj, encoding_set_enc_index); \
42
else \
43
rb_enc_set_index(rb_encoding_set_obj, encoding_set_enc_index); \
44
} while (0)
45
46
#define ENCODING_GET_INLINED(obj) (int)((RBASIC(obj)->flags & ENCODING_MASK)>>ENCODING_SHIFT)
47
#define ENCODING_GET(obj) \
48
(ENCODING_GET_INLINED(obj) != ENCODING_INLINE_MAX ? \
49
ENCODING_GET_INLINED(obj) : \
50
rb_enc_get_index(obj))
51
52
#define ENCODING_IS_ASCII8BIT(obj) (ENCODING_GET_INLINED(obj) == 0)
53
54
#define ENCODING_MAXNAMELEN 42
55
56
#define ENC_CODERANGE_MASK ((int)(FL_USER8|FL_USER9))
57
#define ENC_CODERANGE_UNKNOWN 0
58
#define ENC_CODERANGE_7BIT ((int)FL_USER8)
59
#define ENC_CODERANGE_VALID ((int)FL_USER9)
60
#define ENC_CODERANGE_BROKEN ((int)(FL_USER8|FL_USER9))
61
#define ENC_CODERANGE(obj) ((int)RBASIC(obj)->flags & ENC_CODERANGE_MASK)
62
#define ENC_CODERANGE_ASCIIONLY(obj) (ENC_CODERANGE(obj) == ENC_CODERANGE_7BIT)
63
#define ENC_CODERANGE_SET(obj,cr) (RBASIC(obj)->flags = \
64
(RBASIC(obj)->flags & ~ENC_CODERANGE_MASK) | (cr))
65
#define ENC_CODERANGE_CLEAR(obj) ENC_CODERANGE_SET((obj),0)
66
67
/* assumed ASCII compatibility */
68
#define ENC_CODERANGE_AND(a, b) \
69
((a) == ENC_CODERANGE_7BIT ? (b) : \
70
(a) == ENC_CODERANGE_VALID ? ((b) == ENC_CODERANGE_7BIT ? ENC_CODERANGE_VALID : (b)) : \
71
ENC_CODERANGE_UNKNOWN)
72
73
#define ENCODING_CODERANGE_SET(obj, encindex, cr) \
74
do { \
75
VALUE rb_encoding_coderange_obj = (obj); \
76
ENCODING_SET(rb_encoding_coderange_obj, (encindex)); \
77
ENC_CODERANGE_SET(rb_encoding_coderange_obj, (cr)); \
78
} while (0)
79
80
typedef
OnigEncodingType
rb_encoding
;
81
82
int
rb_char_to_option_kcode
(
int
c,
int
*option,
int
*kcode);
83
84
int
rb_enc_replicate
(
const
char
*,
rb_encoding
*);
85
int
rb_define_dummy_encoding
(
const
char
*);
86
#define rb_enc_to_index(enc) ((enc) ? ENC_TO_ENCINDEX(enc) : 0)
87
int
rb_enc_get_index
(
VALUE
obj);
88
void
rb_enc_set_index
(
VALUE
obj,
int
encindex);
89
int
rb_enc_find_index
(
const
char
*
name
);
90
int
rb_to_encoding_index
(
VALUE
);
91
rb_encoding
*
rb_to_encoding
(
VALUE
);
92
rb_encoding
*
rb_enc_get
(
VALUE
);
93
rb_encoding
*
rb_enc_compatible
(
VALUE
,
VALUE
);
94
rb_encoding
*
rb_enc_check
(
VALUE
,
VALUE
);
95
VALUE
rb_enc_associate_index
(
VALUE
,
int
);
96
VALUE
rb_enc_associate
(
VALUE
,
rb_encoding
*);
97
void
rb_enc_copy
(
VALUE
dst,
VALUE
src);
98
99
VALUE
rb_enc_str_new
(
const
char
*,
long
,
rb_encoding
*);
100
VALUE
rb_enc_reg_new
(
const
char
*,
long
,
rb_encoding
*,
int
);
101
PRINTF_ARGS
(
VALUE
rb_enc_sprintf
(
rb_encoding
*,
const
char
*, ...), 2, 3);
102
VALUE
rb_enc_vsprintf
(
rb_encoding
*,
const
char
*, va_list);
103
long
rb_enc_strlen
(
const
char
*,
const
char
*,
rb_encoding
*);
104
char
*
rb_enc_nth
(
const
char
*,
const
char
*,
long
,
rb_encoding
*);
105
VALUE
rb_obj_encoding
(
VALUE
);
106
VALUE
rb_enc_str_buf_cat
(
VALUE
str,
const
char
*ptr,
long
len
,
rb_encoding
*enc);
107
VALUE
rb_enc_uint_chr
(
unsigned
int
code,
rb_encoding
*enc);
108
109
VALUE
rb_external_str_new_with_enc
(
const
char
*ptr,
long
len
,
rb_encoding
*);
110
VALUE
rb_str_export_to_enc
(
VALUE
,
rb_encoding
*);
111
VALUE
rb_str_conv_enc
(
VALUE
str,
rb_encoding
*from,
rb_encoding
*to);
112
VALUE
rb_str_conv_enc_opts
(
VALUE
str,
rb_encoding
*from,
rb_encoding
*to,
int
ecflags,
VALUE
ecopts);
113
114
/* index -> rb_encoding */
115
rb_encoding
*
rb_enc_from_index
(
int
idx);
116
117
/* name -> rb_encoding */
118
rb_encoding
*
rb_enc_find
(
const
char
*
name
);
119
120
/* rb_encoding * -> name */
121
#define rb_enc_name(enc) (enc)->name
122
123
/* rb_encoding * -> minlen/maxlen */
124
#define rb_enc_mbminlen(enc) (enc)->min_enc_len
125
#define rb_enc_mbmaxlen(enc) (enc)->max_enc_len
126
127
/* -> mbclen (no error notification: 0 < ret <= e-p, no exception) */
128
int
rb_enc_mbclen
(
const
char
*
p
,
const
char
*e,
rb_encoding
*enc);
129
130
/* -> mbclen (only for valid encoding) */
131
int
rb_enc_fast_mbclen
(
const
char
*
p
,
const
char
*e,
rb_encoding
*enc);
132
133
/* -> chlen, invalid or needmore */
134
int
rb_enc_precise_mbclen
(
const
char
*
p
,
const
char
*e,
rb_encoding
*enc);
135
#define MBCLEN_CHARFOUND_P(ret) ONIGENC_MBCLEN_CHARFOUND_P(ret)
136
#define MBCLEN_CHARFOUND_LEN(ret) ONIGENC_MBCLEN_CHARFOUND_LEN(ret)
137
#define MBCLEN_INVALID_P(ret) ONIGENC_MBCLEN_INVALID_P(ret)
138
#define MBCLEN_NEEDMORE_P(ret) ONIGENC_MBCLEN_NEEDMORE_P(ret)
139
#define MBCLEN_NEEDMORE_LEN(ret) ONIGENC_MBCLEN_NEEDMORE_LEN(ret)
140
141
/* -> 0x00..0x7f, -1 */
142
int
rb_enc_ascget
(
const
char
*
p
,
const
char
*e,
int
*
len
,
rb_encoding
*enc);
143
144
145
/* -> code (and len) or raise exception */
146
unsigned
int
rb_enc_codepoint_len
(
const
char
*
p
,
const
char
*e,
int
*
len
,
rb_encoding
*enc);
147
148
/* prototype for obsolete function */
149
unsigned
int
rb_enc_codepoint
(
const
char
*
p
,
const
char
*e,
rb_encoding
*enc);
150
/* overriding macro */
151
#define rb_enc_codepoint(p,e,enc) rb_enc_codepoint_len((p),(e),0,(enc))
152
#define rb_enc_mbc_to_codepoint(p, e, enc) ONIGENC_MBC_TO_CODE((enc),(UChar*)(p),(UChar*)(e))
153
154
/* -> codelen>0 or raise exception */
155
int
rb_enc_codelen
(
int
code,
rb_encoding
*enc);
156
157
/* code,ptr,encoding -> write buf */
158
#define rb_enc_mbcput(c,buf,enc) ONIGENC_CODE_TO_MBC((enc),(c),(UChar*)(buf))
159
160
/* start, ptr, end, encoding -> prev_char */
161
#define rb_enc_prev_char(s,p,e,enc) ((char *)onigenc_get_prev_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
162
/* start, ptr, end, encoding -> next_char */
163
#define rb_enc_left_char_head(s,p,e,enc) ((char *)onigenc_get_left_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
164
#define rb_enc_right_char_head(s,p,e,enc) ((char *)onigenc_get_right_adjust_char_head((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e)))
165
#define rb_enc_step_back(s,p,e,n,enc) ((char *)onigenc_step_back((enc),(UChar*)(s),(UChar*)(p),(UChar*)(e),(int)(n)))
166
167
/* ptr, ptr, encoding -> newline_or_not */
168
#define rb_enc_is_newline(p,end,enc) ONIGENC_IS_MBC_NEWLINE((enc),(UChar*)(p),(UChar*)(end))
169
170
#define rb_enc_isctype(c,t,enc) ONIGENC_IS_CODE_CTYPE((enc),(c),(t))
171
#define rb_enc_isascii(c,enc) ONIGENC_IS_CODE_ASCII(c)
172
#define rb_enc_isalpha(c,enc) ONIGENC_IS_CODE_ALPHA((enc),(c))
173
#define rb_enc_islower(c,enc) ONIGENC_IS_CODE_LOWER((enc),(c))
174
#define rb_enc_isupper(c,enc) ONIGENC_IS_CODE_UPPER((enc),(c))
175
#define rb_enc_ispunct(c,enc) ONIGENC_IS_CODE_PUNCT((enc),(c))
176
#define rb_enc_isalnum(c,enc) ONIGENC_IS_CODE_ALNUM((enc),(c))
177
#define rb_enc_isprint(c,enc) ONIGENC_IS_CODE_PRINT((enc),(c))
178
#define rb_enc_isspace(c,enc) ONIGENC_IS_CODE_SPACE((enc),(c))
179
#define rb_enc_isdigit(c,enc) ONIGENC_IS_CODE_DIGIT((enc),(c))
180
181
#define rb_enc_asciicompat(enc) (rb_enc_mbminlen(enc)==1 && !rb_enc_dummy_p(enc))
182
183
int
rb_enc_casefold
(
char
*to,
const
char
*
p
,
const
char
*e,
rb_encoding
*enc);
184
int
rb_enc_toupper
(
int
c,
rb_encoding
*enc);
185
int
rb_enc_tolower
(
int
c,
rb_encoding
*enc);
186
ID
rb_intern3
(
const
char
*,
long
,
rb_encoding
*);
187
ID
rb_interned_id_p
(
const
char
*,
long
,
rb_encoding
*);
188
int
rb_enc_symname_p
(
const
char
*,
rb_encoding
*);
189
int
rb_enc_symname2_p
(
const
char
*,
long
,
rb_encoding
*);
190
int
rb_enc_str_coderange
(
VALUE
);
191
long
rb_str_coderange_scan_restartable
(
const
char
*,
const
char
*,
rb_encoding
*,
int
*);
192
int
rb_enc_str_asciionly_p
(
VALUE
);
193
#define rb_enc_str_asciicompat_p(str) rb_enc_asciicompat(rb_enc_get(str))
194
VALUE
rb_enc_from_encoding
(
rb_encoding
*enc);
195
int
rb_enc_unicode_p
(
rb_encoding
*enc);
196
rb_encoding
*
rb_ascii8bit_encoding
(
void
);
197
rb_encoding
*
rb_utf8_encoding
(
void
);
198
rb_encoding
*
rb_usascii_encoding
(
void
);
199
rb_encoding
*
rb_locale_encoding
(
void
);
200
rb_encoding
*
rb_filesystem_encoding
(
void
);
201
rb_encoding
*
rb_default_external_encoding
(
void
);
202
rb_encoding
*
rb_default_internal_encoding
(
void
);
203
int
rb_ascii8bit_encindex
(
void
);
204
int
rb_utf8_encindex
(
void
);
205
int
rb_usascii_encindex
(
void
);
206
int
rb_locale_encindex
(
void
);
207
int
rb_filesystem_encindex
(
void
);
208
VALUE
rb_enc_default_external
(
void
);
209
VALUE
rb_enc_default_internal
(
void
);
210
void
rb_enc_set_default_external
(
VALUE
encoding);
211
void
rb_enc_set_default_internal
(
VALUE
encoding);
212
VALUE
rb_locale_charmap
(
VALUE
klass);
213
long
rb_memsearch
(
const
void
*,
long
,
const
void
*,
long
,
rb_encoding
*);
214
char
*
rb_enc_path_next
(
const
char
*,
const
char
*,
rb_encoding
*);
215
char
*
rb_enc_path_skip_prefix
(
const
char
*,
const
char
*,
rb_encoding
*);
216
char
*
rb_enc_path_last_separator
(
const
char
*,
const
char
*,
rb_encoding
*);
217
char
*
rb_enc_path_end
(
const
char
*,
const
char
*,
rb_encoding
*);
218
const
char
*
ruby_enc_find_basename
(
const
char
*
name
,
long
*baselen,
long
*alllen,
rb_encoding
*enc);
219
const
char
*
ruby_enc_find_extname
(
const
char
*
name
,
long
*
len
,
rb_encoding
*enc);
220
221
RUBY_EXTERN
VALUE
rb_cEncoding
;
222
#define ENC_DUMMY_FLAG (1<<24)
223
#define ENC_INDEX_MASK (~(~0U<<24))
224
225
#define ENC_TO_ENCINDEX(enc) (int)((enc)->ruby_encoding_index & ENC_INDEX_MASK)
226
227
#define ENC_DUMMY_P(enc) ((enc)->ruby_encoding_index & ENC_DUMMY_FLAG)
228
#define ENC_SET_DUMMY(enc) ((enc)->ruby_encoding_index |= ENC_DUMMY_FLAG)
229
230
static
inline
int
231
rb_enc_dummy_p
(
rb_encoding
*enc)
232
{
233
return
ENC_DUMMY_P
(enc) != 0;
234
}
235
236
/* econv stuff */
237
238
typedef
enum
{
239
econv_invalid_byte_sequence
,
240
econv_undefined_conversion
,
241
econv_destination_buffer_full
,
242
econv_source_buffer_empty
,
243
econv_finished
,
244
econv_after_output
,
245
econv_incomplete_input
246
}
rb_econv_result_t
;
247
248
typedef
struct
rb_econv_t
rb_econv_t
;
249
250
VALUE
rb_str_encode
(
VALUE
str,
VALUE
to,
int
ecflags,
VALUE
ecopts);
251
int
rb_econv_has_convpath_p
(
const
char
* from_encoding,
const
char
*
to_encoding
);
252
253
int
rb_econv_prepare_options
(
VALUE
opthash,
VALUE
*ecopts,
int
ecflags);
254
int
rb_econv_prepare_opts
(
VALUE
opthash,
VALUE
*ecopts);
255
256
rb_econv_t
*
rb_econv_open
(
const
char
*
source_encoding
,
const
char
*
destination_encoding
,
int
ecflags);
257
rb_econv_t
*
rb_econv_open_opts
(
const
char
*
source_encoding
,
const
char
*
destination_encoding
,
int
ecflags,
VALUE
ecopts);
258
259
rb_econv_result_t
rb_econv_convert
(
rb_econv_t
*ec,
260
const
unsigned
char
**source_buffer_ptr,
const
unsigned
char
*source_buffer_end,
261
unsigned
char
**destination_buffer_ptr,
unsigned
char
*destination_buffer_end,
262
int
flags
);
263
void
rb_econv_close
(
rb_econv_t
*ec);
264
265
/* result: 0:success -1:failure */
266
int
rb_econv_set_replacement
(
rb_econv_t
*ec,
const
unsigned
char
*str,
size_t
len
,
const
char
*encname);
267
268
/* result: 0:success -1:failure */
269
int
rb_econv_decorate_at_first
(
rb_econv_t
*ec,
const
char
*decorator_name);
270
int
rb_econv_decorate_at_last
(
rb_econv_t
*ec,
const
char
*decorator_name);
271
272
VALUE
rb_econv_open_exc
(
const
char
*senc,
const
char
*denc,
int
ecflags);
273
274
/* result: 0:success -1:failure */
275
int
rb_econv_insert_output
(
rb_econv_t
*ec,
276
const
unsigned
char
*str,
size_t
len
,
const
char
*str_encoding);
277
278
/* encoding that rb_econv_insert_output doesn't need conversion */
279
const
char
*
rb_econv_encoding_to_insert_output
(
rb_econv_t
*ec);
280
281
/* raise an error if the last rb_econv_convert is error */
282
void
rb_econv_check_error
(
rb_econv_t
*ec);
283
284
/* returns an exception object or nil */
285
VALUE
rb_econv_make_exception
(
rb_econv_t
*ec);
286
287
int
rb_econv_putbackable
(
rb_econv_t
*ec);
288
void
rb_econv_putback
(
rb_econv_t
*ec,
unsigned
char
*
p
,
int
n);
289
290
/* returns the corresponding ASCII compatible encoding for encname,
291
* or NULL if encname is not ASCII incompatible encoding. */
292
const
char
*
rb_econv_asciicompat_encoding
(
const
char
*encname);
293
294
VALUE
rb_econv_str_convert
(
rb_econv_t
*ec,
VALUE
src,
int
flags
);
295
VALUE
rb_econv_substr_convert
(
rb_econv_t
*ec,
VALUE
src,
long
byteoff,
long
bytesize,
int
flags
);
296
VALUE
rb_econv_str_append
(
rb_econv_t
*ec,
VALUE
src,
VALUE
dst,
int
flags
);
297
VALUE
rb_econv_substr_append
(
rb_econv_t
*ec,
VALUE
src,
long
byteoff,
long
bytesize,
VALUE
dst,
int
flags
);
298
299
void
rb_econv_binmode
(
rb_econv_t
*ec);
300
301
/* flags for rb_econv_open */
302
303
#define ECONV_ERROR_HANDLER_MASK 0x000000ff
304
305
#define ECONV_INVALID_MASK 0x0000000f
306
#define ECONV_INVALID_REPLACE 0x00000002
307
308
#define ECONV_UNDEF_MASK 0x000000f0
309
#define ECONV_UNDEF_REPLACE 0x00000020
310
#define ECONV_UNDEF_HEX_CHARREF 0x00000030
311
312
#define ECONV_DECORATOR_MASK 0x0000ff00
313
#define ECONV_NEWLINE_DECORATOR_MASK 0x00003f00
314
#define ECONV_NEWLINE_DECORATOR_READ_MASK 0x00000f00
315
#define ECONV_NEWLINE_DECORATOR_WRITE_MASK 0x00003000
316
317
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100
318
#define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000
319
#define ECONV_CR_NEWLINE_DECORATOR 0x00002000
320
#define ECONV_XML_TEXT_DECORATOR 0x00004000
321
#define ECONV_XML_ATTR_CONTENT_DECORATOR 0x00008000
322
323
#define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000
324
#define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000
325
326
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
327
#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_CRLF_NEWLINE_DECORATOR
328
#else
329
#define ECONV_DEFAULT_NEWLINE_DECORATOR 0
330
#endif
331
332
/* end of flags for rb_econv_open */
333
334
/* flags for rb_econv_convert */
335
#define ECONV_PARTIAL_INPUT 0x00010000
336
#define ECONV_AFTER_OUTPUT 0x00020000
337
/* end of flags for rb_econv_convert */
338
339
#if defined __GNUC__ && __GNUC__ >= 4
340
#pragma GCC visibility pop
341
#endif
342
343
#if defined(__cplusplus)
344
#if 0
345
{
/* satisfy cc-mode */
346
#endif
347
}
/* extern "C" { */
348
#endif
349
350
#endif
/* RUBY_ENCODING_H */
351
Generated on Fri Nov 22 2013 07:04:10 for Ruby by
1.8.3