Ruby  2.0.0p247(2013-06-27revision41674)
scanner.c
Go to the documentation of this file.
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward. The issues are "block collection start" and
23  * "simple keys". Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented. We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  * STREAM-START(encoding) # The stream start.
31  * STREAM-END # The stream end.
32  * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33  * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34  * DOCUMENT-START # '---'
35  * DOCUMENT-END # '...'
36  * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37  * BLOCK-MAPPING-START # sequence or a block mapping.
38  * BLOCK-END # Indentation decrease.
39  * FLOW-SEQUENCE-START # '['
40  * FLOW-SEQUENCE-END # ']'
41  * BLOCK-SEQUENCE-START # '{'
42  * BLOCK-SEQUENCE-END # '}'
43  * BLOCK-ENTRY # '-'
44  * FLOW-ENTRY # ','
45  * KEY # '?' or nothing (simple keys).
46  * VALUE # ':'
47  * ALIAS(anchor) # '*anchor'
48  * ANCHOR(anchor) # '&anchor'
49  * TAG(handle,suffix) # '!handle!suffix'
50  * SCALAR(value,style) # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  * STREAM-START(encoding)
56  * STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  * VERSION-DIRECTIVE(major,minor)
64  * TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  * %YAML 1.1
69  * %TAG ! !foo
70  * %TAG !yaml! tag:yaml.org,2002:
71  * ---
72  *
73  * The correspoding sequence of tokens:
74  *
75  * STREAM-START(utf-8)
76  * VERSION-DIRECTIVE(1,1)
77  * TAG-DIRECTIVE("!","!foo")
78  * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  * DOCUMENT-START
80  * STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  * DOCUMENT-START
88  * DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  * 1. An implicit document:
98  *
99  * 'a scalar'
100  *
101  * Tokens:
102  *
103  * STREAM-START(utf-8)
104  * SCALAR("a scalar",single-quoted)
105  * STREAM-END
106  *
107  * 2. An explicit document:
108  *
109  * ---
110  * 'a scalar'
111  * ...
112  *
113  * Tokens:
114  *
115  * STREAM-START(utf-8)
116  * DOCUMENT-START
117  * SCALAR("a scalar",single-quoted)
118  * DOCUMENT-END
119  * STREAM-END
120  *
121  * 3. Several documents in a stream:
122  *
123  * 'a scalar'
124  * ---
125  * 'another scalar'
126  * ---
127  * 'yet another scalar'
128  *
129  * Tokens:
130  *
131  * STREAM-START(utf-8)
132  * SCALAR("a scalar",single-quoted)
133  * DOCUMENT-START
134  * SCALAR("another scalar",single-quoted)
135  * DOCUMENT-START
136  * SCALAR("yet another scalar",single-quoted)
137  * STREAM-END
138  *
139  * We have already introduced the SCALAR token above. The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  * ALIAS(anchor)
143  * ANCHOR(anchor)
144  * TAG(handle,suffix)
145  * SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  * 1. A recursive sequence:
150  *
151  * &A [ *A ]
152  *
153  * Tokens:
154  *
155  * STREAM-START(utf-8)
156  * ANCHOR("A")
157  * FLOW-SEQUENCE-START
158  * ALIAS("A")
159  * FLOW-SEQUENCE-END
160  * STREAM-END
161  *
162  * 2. A tagged scalar:
163  *
164  * !!float "3.14" # A good approximation.
165  *
166  * Tokens:
167  *
168  * STREAM-START(utf-8)
169  * TAG("!!","float")
170  * SCALAR("3.14",double-quoted)
171  * STREAM-END
172  *
173  * 3. Various scalar styles:
174  *
175  * --- # Implicit empty plain scalars do not produce tokens.
176  * --- a plain scalar
177  * --- 'a single-quoted scalar'
178  * --- "a double-quoted scalar"
179  * --- |-
180  * a literal scalar
181  * --- >-
182  * a folded
183  * scalar
184  *
185  * Tokens:
186  *
187  * STREAM-START(utf-8)
188  * DOCUMENT-START
189  * DOCUMENT-START
190  * SCALAR("a plain scalar",plain)
191  * DOCUMENT-START
192  * SCALAR("a single-quoted scalar",single-quoted)
193  * DOCUMENT-START
194  * SCALAR("a double-quoted scalar",double-quoted)
195  * DOCUMENT-START
196  * SCALAR("a literal scalar",literal)
197  * DOCUMENT-START
198  * SCALAR("a folded scalar",folded)
199  * STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  * FLOW-SEQUENCE-START
205  * FLOW-SEQUENCE-END
206  * FLOW-MAPPING-START
207  * FLOW-MAPPING-END
208  * FLOW-ENTRY
209  * KEY
210  * VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  * 1. A flow sequence:
221  *
222  * [item 1, item 2, item 3]
223  *
224  * Tokens:
225  *
226  * STREAM-START(utf-8)
227  * FLOW-SEQUENCE-START
228  * SCALAR("item 1",plain)
229  * FLOW-ENTRY
230  * SCALAR("item 2",plain)
231  * FLOW-ENTRY
232  * SCALAR("item 3",plain)
233  * FLOW-SEQUENCE-END
234  * STREAM-END
235  *
236  * 2. A flow mapping:
237  *
238  * {
239  * a simple key: a value, # Note that the KEY token is produced.
240  * ? a complex key: another value,
241  * }
242  *
243  * Tokens:
244  *
245  * STREAM-START(utf-8)
246  * FLOW-MAPPING-START
247  * KEY
248  * SCALAR("a simple key",plain)
249  * VALUE
250  * SCALAR("a value",plain)
251  * FLOW-ENTRY
252  * KEY
253  * SCALAR("a complex key",plain)
254  * VALUE
255  * SCALAR("another value",plain)
256  * FLOW-ENTRY
257  * FLOW-MAPPING-END
258  * STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator. Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  * BLOCK-SEQUENCE-START
267  * BLOCK-MAPPING-START
268  * BLOCK-END
269  * BLOCK-ENTRY
270  * KEY
271  * VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  * 1. Block sequences:
286  *
287  * - item 1
288  * - item 2
289  * -
290  * - item 3.1
291  * - item 3.2
292  * -
293  * key 1: value 1
294  * key 2: value 2
295  *
296  * Tokens:
297  *
298  * STREAM-START(utf-8)
299  * BLOCK-SEQUENCE-START
300  * BLOCK-ENTRY
301  * SCALAR("item 1",plain)
302  * BLOCK-ENTRY
303  * SCALAR("item 2",plain)
304  * BLOCK-ENTRY
305  * BLOCK-SEQUENCE-START
306  * BLOCK-ENTRY
307  * SCALAR("item 3.1",plain)
308  * BLOCK-ENTRY
309  * SCALAR("item 3.2",plain)
310  * BLOCK-END
311  * BLOCK-ENTRY
312  * BLOCK-MAPPING-START
313  * KEY
314  * SCALAR("key 1",plain)
315  * VALUE
316  * SCALAR("value 1",plain)
317  * KEY
318  * SCALAR("key 2",plain)
319  * VALUE
320  * SCALAR("value 2",plain)
321  * BLOCK-END
322  * BLOCK-END
323  * STREAM-END
324  *
325  * 2. Block mappings:
326  *
327  * a simple key: a value # The KEY token is produced here.
328  * ? a complex key
329  * : another value
330  * a mapping:
331  * key 1: value 1
332  * key 2: value 2
333  * a sequence:
334  * - item 1
335  * - item 2
336  *
337  * Tokens:
338  *
339  * STREAM-START(utf-8)
340  * BLOCK-MAPPING-START
341  * KEY
342  * SCALAR("a simple key",plain)
343  * VALUE
344  * SCALAR("a value",plain)
345  * KEY
346  * SCALAR("a complex key",plain)
347  * VALUE
348  * SCALAR("another value",plain)
349  * KEY
350  * SCALAR("a mapping",plain)
351  * BLOCK-MAPPING-START
352  * KEY
353  * SCALAR("key 1",plain)
354  * VALUE
355  * SCALAR("value 1",plain)
356  * KEY
357  * SCALAR("key 2",plain)
358  * VALUE
359  * SCALAR("value 2",plain)
360  * BLOCK-END
361  * KEY
362  * SCALAR("a sequence",plain)
363  * VALUE
364  * BLOCK-SEQUENCE-START
365  * BLOCK-ENTRY
366  * SCALAR("item 1",plain)
367  * BLOCK-ENTRY
368  * SCALAR("item 2",plain)
369  * BLOCK-END
370  * BLOCK-END
371  * STREAM-END
372  *
373  * YAML does not always require to start a new block collection from a new
374  * line. If the current line contains only '-', '?', and ':' indicators, a new
375  * block collection may start at the current line. The following examples
376  * illustrate this case:
377  *
378  * 1. Collections in a sequence:
379  *
380  * - - item 1
381  * - item 2
382  * - key 1: value 1
383  * key 2: value 2
384  * - ? complex key
385  * : complex value
386  *
387  * Tokens:
388  *
389  * STREAM-START(utf-8)
390  * BLOCK-SEQUENCE-START
391  * BLOCK-ENTRY
392  * BLOCK-SEQUENCE-START
393  * BLOCK-ENTRY
394  * SCALAR("item 1",plain)
395  * BLOCK-ENTRY
396  * SCALAR("item 2",plain)
397  * BLOCK-END
398  * BLOCK-ENTRY
399  * BLOCK-MAPPING-START
400  * KEY
401  * SCALAR("key 1",plain)
402  * VALUE
403  * SCALAR("value 1",plain)
404  * KEY
405  * SCALAR("key 2",plain)
406  * VALUE
407  * SCALAR("value 2",plain)
408  * BLOCK-END
409  * BLOCK-ENTRY
410  * BLOCK-MAPPING-START
411  * KEY
412  * SCALAR("complex key")
413  * VALUE
414  * SCALAR("complex value")
415  * BLOCK-END
416  * BLOCK-END
417  * STREAM-END
418  *
419  * 2. Collections in a mapping:
420  *
421  * ? a sequence
422  * : - item 1
423  * - item 2
424  * ? a mapping
425  * : key 1: value 1
426  * key 2: value 2
427  *
428  * Tokens:
429  *
430  * STREAM-START(utf-8)
431  * BLOCK-MAPPING-START
432  * KEY
433  * SCALAR("a sequence",plain)
434  * VALUE
435  * BLOCK-SEQUENCE-START
436  * BLOCK-ENTRY
437  * SCALAR("item 1",plain)
438  * BLOCK-ENTRY
439  * SCALAR("item 2",plain)
440  * BLOCK-END
441  * KEY
442  * SCALAR("a mapping",plain)
443  * VALUE
444  * BLOCK-MAPPING-START
445  * KEY
446  * SCALAR("key 1",plain)
447  * VALUE
448  * SCALAR("value 1",plain)
449  * KEY
450  * SCALAR("key 2",plain)
451  * VALUE
452  * SCALAR("value 2",plain)
453  * BLOCK-END
454  * BLOCK-END
455  * STREAM-END
456  *
457  * YAML also permits non-indented sequences if they are included into a block
458  * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459  *
460  * key:
461  * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462  * - item 2
463  *
464  * Tokens:
465  *
466  * STREAM-START(utf-8)
467  * BLOCK-MAPPING-START
468  * KEY
469  * SCALAR("key",plain)
470  * VALUE
471  * BLOCK-ENTRY
472  * SCALAR("item 1",plain)
473  * BLOCK-ENTRY
474  * SCALAR("item 2",plain)
475  * BLOCK-END
476  */
477 
478 #include "yaml_private.h"
479 
480 /*
481  * Ensure that the buffer contains the required number of characters.
482  * Return 1 on success, 0 on failure (reader error or memory error).
483  */
484 
485 #define CACHE(parser,length) \
486  (parser->unread >= (length) \
487  ? 1 \
488  : yaml_parser_update_buffer(parser, (length)))
489 
490 /*
491  * Advance the buffer pointer.
492  */
493 
494 #define SKIP(parser) \
495  (parser->mark.index ++, \
496  parser->mark.column ++, \
497  parser->unread --, \
498  parser->buffer.pointer += WIDTH(parser->buffer))
499 
500 #define SKIP_LINE(parser) \
501  (IS_CRLF(parser->buffer) ? \
502  (parser->mark.index += 2, \
503  parser->mark.column = 0, \
504  parser->mark.line ++, \
505  parser->unread -= 2, \
506  parser->buffer.pointer += 2) : \
507  IS_BREAK(parser->buffer) ? \
508  (parser->mark.index ++, \
509  parser->mark.column = 0, \
510  parser->mark.line ++, \
511  parser->unread --, \
512  parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
513 
514 /*
515  * Copy a character to a string buffer and advance pointers.
516  */
517 
518 #define READ(parser,string) \
519  (STRING_EXTEND(parser,string) ? \
520  (COPY(string,parser->buffer), \
521  parser->mark.index ++, \
522  parser->mark.column ++, \
523  parser->unread --, \
524  1) : 0)
525 
526 /*
527  * Copy a line break character to a string buffer and advance pointers.
528  */
529 
530 #define READ_LINE(parser,string) \
531  (STRING_EXTEND(parser,string) ? \
532  (((CHECK_AT(parser->buffer,'\r',0) \
533  && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
534  (*((string).pointer++) = (yaml_char_t) '\n', \
535  parser->buffer.pointer += 2, \
536  parser->mark.index += 2, \
537  parser->mark.column = 0, \
538  parser->mark.line ++, \
539  parser->unread -= 2) : \
540  (CHECK_AT(parser->buffer,'\r',0) \
541  || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
542  (*((string).pointer++) = (yaml_char_t) '\n', \
543  parser->buffer.pointer ++, \
544  parser->mark.index ++, \
545  parser->mark.column = 0, \
546  parser->mark.line ++, \
547  parser->unread --) : \
548  (CHECK_AT(parser->buffer,'\xC2',0) \
549  && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
550  (*((string).pointer++) = (yaml_char_t) '\n', \
551  parser->buffer.pointer += 2, \
552  parser->mark.index ++, \
553  parser->mark.column = 0, \
554  parser->mark.line ++, \
555  parser->unread --) : \
556  (CHECK_AT(parser->buffer,'\xE2',0) && \
557  CHECK_AT(parser->buffer,'\x80',1) && \
558  (CHECK_AT(parser->buffer,'\xA8',2) || \
559  CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
560  (*((string).pointer++) = *(parser->buffer.pointer++), \
561  *((string).pointer++) = *(parser->buffer.pointer++), \
562  *((string).pointer++) = *(parser->buffer.pointer++), \
563  parser->mark.index ++, \
564  parser->mark.column = 0, \
565  parser->mark.line ++, \
566  parser->unread --) : 0), \
567  1) : 0)
568 
569 /*
570  * Public API declarations.
571  */
572 
573 YAML_DECLARE(int)
575 
576 /*
577  * Error handling.
578  */
579 
580 static int
581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
582  yaml_mark_t context_mark, const char *problem);
583 
584 /*
585  * High-level token API.
586  */
587 
588 YAML_DECLARE(int)
590 
591 static int
593 
594 /*
595  * Potential simple keys.
596  */
597 
598 static int
600 
601 static int
603 
604 static int
606 
607 static int
609 
610 static int
612 
613 /*
614  * Indentation treatment.
615  */
616 
617 static int
618 yaml_parser_roll_indent(yaml_parser_t *parser, int column,
619  int number, yaml_token_type_t type, yaml_mark_t mark);
620 
621 static int
622 yaml_parser_unroll_indent(yaml_parser_t *parser, int column);
623 
624 /*
625  * Token fetchers.
626  */
627 
628 static int
630 
631 static int
633 
634 static int
636 
637 static int
639  yaml_token_type_t type);
640 
641 static int
643  yaml_token_type_t type);
644 
645 static int
647  yaml_token_type_t type);
648 
649 static int
651 
652 static int
654 
655 static int
657 
658 static int
660 
661 static int
663 
664 static int
666 
667 static int
668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
669 
670 static int
671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
672 
673 static int
675 
676 /*
677  * Token scanners.
678  */
679 
680 static int
682 
683 static int
685 
686 static int
688  yaml_mark_t start_mark, yaml_char_t **name);
689 
690 static int
692  yaml_mark_t start_mark, int *major, int *minor);
693 
694 static int
696  yaml_mark_t start_mark, int *number);
697 
698 static int
700  yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
701 
702 static int
704  yaml_token_type_t type);
705 
706 static int
708 
709 static int
710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
711  yaml_mark_t start_mark, yaml_char_t **handle);
712 
713 static int
714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
715  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
716 
717 static int
718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
719  yaml_mark_t start_mark, yaml_string_t *string);
720 
721 static int
723  int literal);
724 
725 static int
727  int *indent, yaml_string_t *breaks,
728  yaml_mark_t start_mark, yaml_mark_t *end_mark);
729 
730 static int
732  int single);
733 
734 static int
736 
737 /*
738  * Get the next token.
739  */
740 
741 YAML_DECLARE(int)
743 {
744  assert(parser); /* Non-NULL parser object is expected. */
745  assert(token); /* Non-NULL token object is expected. */
746 
747  /* Erase the token object. */
748 
749  memset(token, 0, sizeof(yaml_token_t));
750 
751  /* No tokens after STREAM-END or error. */
752 
753  if (parser->stream_end_produced || parser->error) {
754  return 1;
755  }
756 
757  /* Ensure that the tokens queue contains enough tokens. */
758 
759  if (!parser->token_available) {
760  if (!yaml_parser_fetch_more_tokens(parser))
761  return 0;
762  }
763 
764  /* Fetch the next token from the queue. */
765 
766  *token = DEQUEUE(parser, parser->tokens);
767  parser->token_available = 0;
768  parser->tokens_parsed ++;
769 
770  if (token->type == YAML_STREAM_END_TOKEN) {
771  parser->stream_end_produced = 1;
772  }
773 
774  return 1;
775 }
776 
777 /*
778  * Set the scanner error and return 0.
779  */
780 
781 static int
783  yaml_mark_t context_mark, const char *problem)
784 {
785  parser->error = YAML_SCANNER_ERROR;
786  parser->context = context;
787  parser->context_mark = context_mark;
788  parser->problem = problem;
789  parser->problem_mark = parser->mark;
790 
791  return 0;
792 }
793 
794 /*
795  * Ensure that the tokens queue contains at least one token which can be
796  * returned to the Parser.
797  */
798 
799 YAML_DECLARE(int)
801 {
802  int need_more_tokens;
803 
804  /* While we need more tokens to fetch, do it. */
805 
806  while (1)
807  {
808  /*
809  * Check if we really need to fetch more tokens.
810  */
811 
812  need_more_tokens = 0;
813 
814  if (parser->tokens.head == parser->tokens.tail)
815  {
816  /* Queue is empty. */
817 
818  need_more_tokens = 1;
819  }
820  else
821  {
822  yaml_simple_key_t *simple_key;
823 
824  /* Check if any potential simple key may occupy the head position. */
825 
826  if (!yaml_parser_stale_simple_keys(parser))
827  return 0;
828 
829  for (simple_key = parser->simple_keys.start;
830  simple_key != parser->simple_keys.top; simple_key++) {
831  if (simple_key->possible
832  && simple_key->token_number == parser->tokens_parsed) {
833  need_more_tokens = 1;
834  break;
835  }
836  }
837  }
838 
839  /* We are finished. */
840 
841  if (!need_more_tokens)
842  break;
843 
844  /* Fetch the next token. */
845 
846  if (!yaml_parser_fetch_next_token(parser))
847  return 0;
848  }
849 
850  parser->token_available = 1;
851 
852  return 1;
853 }
854 
855 /*
856  * The dispatcher for token fetchers.
857  */
858 
859 static int
861 {
862  /* Ensure that the buffer is initialized. */
863 
864  if (!CACHE(parser, 1))
865  return 0;
866 
867  /* Check if we just started scanning. Fetch STREAM-START then. */
868 
869  if (!parser->stream_start_produced)
870  return yaml_parser_fetch_stream_start(parser);
871 
872  /* Eat whitespaces and comments until we reach the next token. */
873 
874  if (!yaml_parser_scan_to_next_token(parser))
875  return 0;
876 
877  /* Remove obsolete potential simple keys. */
878 
879  if (!yaml_parser_stale_simple_keys(parser))
880  return 0;
881 
882  /* Check the indentation level against the current column. */
883 
884  if (!yaml_parser_unroll_indent(parser, parser->mark.column))
885  return 0;
886 
887  /*
888  * Ensure that the buffer contains at least 4 characters. 4 is the length
889  * of the longest indicators ('--- ' and '... ').
890  */
891 
892  if (!CACHE(parser, 4))
893  return 0;
894 
895  /* Is it the end of the stream? */
896 
897  if (IS_Z(parser->buffer))
898  return yaml_parser_fetch_stream_end(parser);
899 
900  /* Is it a directive? */
901 
902  if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
903  return yaml_parser_fetch_directive(parser);
904 
905  /* Is it the document start indicator? */
906 
907  if (parser->mark.column == 0
908  && CHECK_AT(parser->buffer, '-', 0)
909  && CHECK_AT(parser->buffer, '-', 1)
910  && CHECK_AT(parser->buffer, '-', 2)
911  && IS_BLANKZ_AT(parser->buffer, 3))
914 
915  /* Is it the document end indicator? */
916 
917  if (parser->mark.column == 0
918  && CHECK_AT(parser->buffer, '.', 0)
919  && CHECK_AT(parser->buffer, '.', 1)
920  && CHECK_AT(parser->buffer, '.', 2)
921  && IS_BLANKZ_AT(parser->buffer, 3))
924 
925  /* Is it the flow sequence start indicator? */
926 
927  if (CHECK(parser->buffer, '['))
930 
931  /* Is it the flow mapping start indicator? */
932 
933  if (CHECK(parser->buffer, '{'))
936 
937  /* Is it the flow sequence end indicator? */
938 
939  if (CHECK(parser->buffer, ']'))
942 
943  /* Is it the flow mapping end indicator? */
944 
945  if (CHECK(parser->buffer, '}'))
948 
949  /* Is it the flow entry indicator? */
950 
951  if (CHECK(parser->buffer, ','))
952  return yaml_parser_fetch_flow_entry(parser);
953 
954  /* Is it the block entry indicator? */
955 
956  if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
957  return yaml_parser_fetch_block_entry(parser);
958 
959  /* Is it the key indicator? */
960 
961  if (CHECK(parser->buffer, '?')
962  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
963  return yaml_parser_fetch_key(parser);
964 
965  /* Is it the value indicator? */
966 
967  if (CHECK(parser->buffer, ':')
968  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
969  return yaml_parser_fetch_value(parser);
970 
971  /* Is it an alias? */
972 
973  if (CHECK(parser->buffer, '*'))
975 
976  /* Is it an anchor? */
977 
978  if (CHECK(parser->buffer, '&'))
980 
981  /* Is it a tag? */
982 
983  if (CHECK(parser->buffer, '!'))
984  return yaml_parser_fetch_tag(parser);
985 
986  /* Is it a literal scalar? */
987 
988  if (CHECK(parser->buffer, '|') && !parser->flow_level)
989  return yaml_parser_fetch_block_scalar(parser, 1);
990 
991  /* Is it a folded scalar? */
992 
993  if (CHECK(parser->buffer, '>') && !parser->flow_level)
994  return yaml_parser_fetch_block_scalar(parser, 0);
995 
996  /* Is it a single-quoted scalar? */
997 
998  if (CHECK(parser->buffer, '\''))
999  return yaml_parser_fetch_flow_scalar(parser, 1);
1000 
1001  /* Is it a double-quoted scalar? */
1002 
1003  if (CHECK(parser->buffer, '"'))
1004  return yaml_parser_fetch_flow_scalar(parser, 0);
1005 
1006  /*
1007  * Is it a plain scalar?
1008  *
1009  * A plain scalar may start with any non-blank characters except
1010  *
1011  * '-', '?', ':', ',', '[', ']', '{', '}',
1012  * '#', '&', '*', '!', '|', '>', '\'', '\"',
1013  * '%', '@', '`'.
1014  *
1015  * In the block context (and, for the '-' indicator, in the flow context
1016  * too), it may also start with the characters
1017  *
1018  * '-', '?', ':'
1019  *
1020  * if it is followed by a non-space character.
1021  *
1022  * The last rule is more restrictive than the specification requires.
1023  */
1024 
1025  if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1026  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1027  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1028  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1029  || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1030  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1031  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1032  || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1033  || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1034  || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1035  (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1036  (!parser->flow_level &&
1037  (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1038  && !IS_BLANKZ_AT(parser->buffer, 1)))
1039  return yaml_parser_fetch_plain_scalar(parser);
1040 
1041  /*
1042  * If we don't determine the token type so far, it is an error.
1043  */
1044 
1045  return yaml_parser_set_scanner_error(parser,
1046  "while scanning for the next token", parser->mark,
1047  "found character that cannot start any token");
1048 }
1049 
1050 /*
1051  * Check the list of potential simple keys and remove the positions that
1052  * cannot contain simple keys anymore.
1053  */
1054 
1055 static int
1057 {
1058  yaml_simple_key_t *simple_key;
1059 
1060  /* Check for a potential simple key for each flow level. */
1061 
1062  for (simple_key = parser->simple_keys.start;
1063  simple_key != parser->simple_keys.top; simple_key ++)
1064  {
1065  /*
1066  * The specification requires that a simple key
1067  *
1068  * - is limited to a single line,
1069  * - is shorter than 1024 characters.
1070  */
1071 
1072  if (simple_key->possible
1073  && (simple_key->mark.line < parser->mark.line
1074  || simple_key->mark.index+1024 < parser->mark.index)) {
1075 
1076  /* Check if the potential simple key to be removed is required. */
1077 
1078  if (simple_key->required) {
1079  return yaml_parser_set_scanner_error(parser,
1080  "while scanning a simple key", simple_key->mark,
1081  "could not find expected ':'");
1082  }
1083 
1084  simple_key->possible = 0;
1085  }
1086  }
1087 
1088  return 1;
1089 }
1090 
1091 /*
1092  * Check if a simple key may start at the current position and add it if
1093  * needed.
1094  */
1095 
1096 static int
1098 {
1099  /*
1100  * A simple key is required at the current position if the scanner is in
1101  * the block context and the current column coincides with the indentation
1102  * level.
1103  */
1104 
1105  int required = (!parser->flow_level
1106  && parser->indent == (int)parser->mark.column);
1107 
1108  /*
1109  * A simple key is required only when it is the first token in the current
1110  * line. Therefore it is always allowed. But we add a check anyway.
1111  */
1112 
1113  assert(parser->simple_key_allowed || !required); /* Impossible. */
1114 
1115  /*
1116  * If the current position may start a simple key, save it.
1117  */
1118 
1119  if (parser->simple_key_allowed)
1120  {
1121  yaml_simple_key_t simple_key;
1122  simple_key.possible = 1;
1123  simple_key.required = required;
1124  simple_key.token_number =
1125  parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1126  simple_key.mark = parser->mark;
1127 
1128  if (!yaml_parser_remove_simple_key(parser)) return 0;
1129 
1130  *(parser->simple_keys.top-1) = simple_key;
1131  }
1132 
1133  return 1;
1134 }
1135 
1136 /*
1137  * Remove a potential simple key at the current flow level.
1138  */
1139 
1140 static int
1142 {
1143  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1144 
1145  if (simple_key->possible)
1146  {
1147  /* If the key is required, it is an error. */
1148 
1149  if (simple_key->required) {
1150  return yaml_parser_set_scanner_error(parser,
1151  "while scanning a simple key", simple_key->mark,
1152  "could not find expected ':'");
1153  }
1154  }
1155 
1156  /* Remove the key from the stack. */
1157 
1158  simple_key->possible = 0;
1159 
1160  return 1;
1161 }
1162 
1163 /*
1164  * Increase the flow level and resize the simple key list if needed.
1165  */
1166 
1167 static int
1169 {
1170  yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1171 
1172  /* Reset the simple key on the next level. */
1173 
1174  if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1175  return 0;
1176 
1177  /* Increase the flow level. */
1178 
1179  parser->flow_level++;
1180 
1181  return 1;
1182 }
1183 
1184 /*
1185  * Decrease the flow level.
1186  */
1187 
1188 static int
1190 {
1191  yaml_simple_key_t dummy_key; /* Used to eliminate a compiler warning. */
1192 
1193  if (parser->flow_level) {
1194  parser->flow_level --;
1195  dummy_key = POP(parser, parser->simple_keys);
1196  }
1197 
1198  return 1;
1199 }
1200 
1201 /*
1202  * Push the current indentation level to the stack and set the new level
1203  * the current column is greater than the indentation level. In this case,
1204  * append or insert the specified token into the token queue.
1205  *
1206  */
1207 
1208 static int
1210  int number, yaml_token_type_t type, yaml_mark_t mark)
1211 {
1213 
1214  /* In the flow context, do nothing. */
1215 
1216  if (parser->flow_level)
1217  return 1;
1218 
1219  if (parser->indent < column)
1220  {
1221  /*
1222  * Push the current indentation level to the stack and set the new
1223  * indentation level.
1224  */
1225 
1226  if (!PUSH(parser, parser->indents, parser->indent))
1227  return 0;
1228 
1229  parser->indent = column;
1230 
1231  /* Create a token and insert it into the queue. */
1232 
1233  TOKEN_INIT(token, type, mark, mark);
1234 
1235  if (number == -1) {
1236  if (!ENQUEUE(parser, parser->tokens, token))
1237  return 0;
1238  }
1239  else {
1240  if (!QUEUE_INSERT(parser,
1241  parser->tokens, number - parser->tokens_parsed, token))
1242  return 0;
1243  }
1244  }
1245 
1246  return 1;
1247 }
1248 
1249 /*
1250  * Pop indentation levels from the indents stack until the current level
1251  * becomes less or equal to the column. For each indentation level, append
1252  * the BLOCK-END token.
1253  */
1254 
1255 
1256 static int
1258 {
1260 
1261  /* In the flow context, do nothing. */
1262 
1263  if (parser->flow_level)
1264  return 1;
1265 
1266  /* Loop through the indentation levels in the stack. */
1267 
1268  while (parser->indent > column)
1269  {
1270  /* Create a token and append it to the queue. */
1271 
1272  TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1273 
1274  if (!ENQUEUE(parser, parser->tokens, token))
1275  return 0;
1276 
1277  /* Pop the indentation level. */
1278 
1279  parser->indent = POP(parser, parser->indents);
1280  }
1281 
1282  return 1;
1283 }
1284 
1285 /*
1286  * Initialize the scanner and produce the STREAM-START token.
1287  */
1288 
1289 static int
1291 {
1292  yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1294 
1295  /* Set the initial indentation. */
1296 
1297  parser->indent = -1;
1298 
1299  /* Initialize the simple key stack. */
1300 
1301  if (!PUSH(parser, parser->simple_keys, simple_key))
1302  return 0;
1303 
1304  /* A simple key is allowed at the beginning of the stream. */
1305 
1306  parser->simple_key_allowed = 1;
1307 
1308  /* We have started. */
1309 
1310  parser->stream_start_produced = 1;
1311 
1312  /* Create the STREAM-START token and append it to the queue. */
1313 
1314  STREAM_START_TOKEN_INIT(token, parser->encoding,
1315  parser->mark, parser->mark);
1316 
1317  if (!ENQUEUE(parser, parser->tokens, token))
1318  return 0;
1319 
1320  return 1;
1321 }
1322 
1323 /*
1324  * Produce the STREAM-END token and shut down the scanner.
1325  */
1326 
1327 static int
1329 {
1331 
1332  /* Force new line. */
1333 
1334  if (parser->mark.column != 0) {
1335  parser->mark.column = 0;
1336  parser->mark.line ++;
1337  }
1338 
1339  /* Reset the indentation level. */
1340 
1341  if (!yaml_parser_unroll_indent(parser, -1))
1342  return 0;
1343 
1344  /* Reset simple keys. */
1345 
1346  if (!yaml_parser_remove_simple_key(parser))
1347  return 0;
1348 
1349  parser->simple_key_allowed = 0;
1350 
1351  /* Create the STREAM-END token and append it to the queue. */
1352 
1353  STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1354 
1355  if (!ENQUEUE(parser, parser->tokens, token))
1356  return 0;
1357 
1358  return 1;
1359 }
1360 
1361 /*
1362  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1363  */
1364 
1365 static int
1367 {
1369 
1370  /* Reset the indentation level. */
1371 
1372  if (!yaml_parser_unroll_indent(parser, -1))
1373  return 0;
1374 
1375  /* Reset simple keys. */
1376 
1377  if (!yaml_parser_remove_simple_key(parser))
1378  return 0;
1379 
1380  parser->simple_key_allowed = 0;
1381 
1382  /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1383 
1384  if (!yaml_parser_scan_directive(parser, &token))
1385  return 0;
1386 
1387  /* Append the token to the queue. */
1388 
1389  if (!ENQUEUE(parser, parser->tokens, token)) {
1390  yaml_token_delete(&token);
1391  return 0;
1392  }
1393 
1394  return 1;
1395 }
1396 
1397 /*
1398  * Produce the DOCUMENT-START or DOCUMENT-END token.
1399  */
1400 
1401 static int
1404 {
1405  yaml_mark_t start_mark, end_mark;
1407 
1408  /* Reset the indentation level. */
1409 
1410  if (!yaml_parser_unroll_indent(parser, -1))
1411  return 0;
1412 
1413  /* Reset simple keys. */
1414 
1415  if (!yaml_parser_remove_simple_key(parser))
1416  return 0;
1417 
1418  parser->simple_key_allowed = 0;
1419 
1420  /* Consume the token. */
1421 
1422  start_mark = parser->mark;
1423 
1424  SKIP(parser);
1425  SKIP(parser);
1426  SKIP(parser);
1427 
1428  end_mark = parser->mark;
1429 
1430  /* Create the DOCUMENT-START or DOCUMENT-END token. */
1431 
1432  TOKEN_INIT(token, type, start_mark, end_mark);
1433 
1434  /* Append the token to the queue. */
1435 
1436  if (!ENQUEUE(parser, parser->tokens, token))
1437  return 0;
1438 
1439  return 1;
1440 }
1441 
1442 /*
1443  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1444  */
1445 
1446 static int
1449 {
1450  yaml_mark_t start_mark, end_mark;
1452 
1453  /* The indicators '[' and '{' may start a simple key. */
1454 
1455  if (!yaml_parser_save_simple_key(parser))
1456  return 0;
1457 
1458  /* Increase the flow level. */
1459 
1460  if (!yaml_parser_increase_flow_level(parser))
1461  return 0;
1462 
1463  /* A simple key may follow the indicators '[' and '{'. */
1464 
1465  parser->simple_key_allowed = 1;
1466 
1467  /* Consume the token. */
1468 
1469  start_mark = parser->mark;
1470  SKIP(parser);
1471  end_mark = parser->mark;
1472 
1473  /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1474 
1475  TOKEN_INIT(token, type, start_mark, end_mark);
1476 
1477  /* Append the token to the queue. */
1478 
1479  if (!ENQUEUE(parser, parser->tokens, token))
1480  return 0;
1481 
1482  return 1;
1483 }
1484 
1485 /*
1486  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1487  */
1488 
1489 static int
1492 {
1493  yaml_mark_t start_mark, end_mark;
1495 
1496  /* Reset any potential simple key on the current flow level. */
1497 
1498  if (!yaml_parser_remove_simple_key(parser))
1499  return 0;
1500 
1501  /* Decrease the flow level. */
1502 
1503  if (!yaml_parser_decrease_flow_level(parser))
1504  return 0;
1505 
1506  /* No simple keys after the indicators ']' and '}'. */
1507 
1508  parser->simple_key_allowed = 0;
1509 
1510  /* Consume the token. */
1511 
1512  start_mark = parser->mark;
1513  SKIP(parser);
1514  end_mark = parser->mark;
1515 
1516  /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1517 
1518  TOKEN_INIT(token, type, start_mark, end_mark);
1519 
1520  /* Append the token to the queue. */
1521 
1522  if (!ENQUEUE(parser, parser->tokens, token))
1523  return 0;
1524 
1525  return 1;
1526 }
1527 
1528 /*
1529  * Produce the FLOW-ENTRY token.
1530  */
1531 
1532 static int
1534 {
1535  yaml_mark_t start_mark, end_mark;
1537 
1538  /* Reset any potential simple keys on the current flow level. */
1539 
1540  if (!yaml_parser_remove_simple_key(parser))
1541  return 0;
1542 
1543  /* Simple keys are allowed after ','. */
1544 
1545  parser->simple_key_allowed = 1;
1546 
1547  /* Consume the token. */
1548 
1549  start_mark = parser->mark;
1550  SKIP(parser);
1551  end_mark = parser->mark;
1552 
1553  /* Create the FLOW-ENTRY token and append it to the queue. */
1554 
1555  TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1556 
1557  if (!ENQUEUE(parser, parser->tokens, token))
1558  return 0;
1559 
1560  return 1;
1561 }
1562 
1563 /*
1564  * Produce the BLOCK-ENTRY token.
1565  */
1566 
1567 static int
1569 {
1570  yaml_mark_t start_mark, end_mark;
1572 
1573  /* Check if the scanner is in the block context. */
1574 
1575  if (!parser->flow_level)
1576  {
1577  /* Check if we are allowed to start a new entry. */
1578 
1579  if (!parser->simple_key_allowed) {
1580  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1581  "block sequence entries are not allowed in this context");
1582  }
1583 
1584  /* Add the BLOCK-SEQUENCE-START token if needed. */
1585 
1586  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1588  return 0;
1589  }
1590  else
1591  {
1592  /*
1593  * It is an error for the '-' indicator to occur in the flow context,
1594  * but we let the Parser detect and report about it because the Parser
1595  * is able to point to the context.
1596  */
1597  }
1598 
1599  /* Reset any potential simple keys on the current flow level. */
1600 
1601  if (!yaml_parser_remove_simple_key(parser))
1602  return 0;
1603 
1604  /* Simple keys are allowed after '-'. */
1605 
1606  parser->simple_key_allowed = 1;
1607 
1608  /* Consume the token. */
1609 
1610  start_mark = parser->mark;
1611  SKIP(parser);
1612  end_mark = parser->mark;
1613 
1614  /* Create the BLOCK-ENTRY token and append it to the queue. */
1615 
1616  TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1617 
1618  if (!ENQUEUE(parser, parser->tokens, token))
1619  return 0;
1620 
1621  return 1;
1622 }
1623 
1624 /*
1625  * Produce the KEY token.
1626  */
1627 
1628 static int
1630 {
1631  yaml_mark_t start_mark, end_mark;
1633 
1634  /* In the block context, additional checks are required. */
1635 
1636  if (!parser->flow_level)
1637  {
1638  /* Check if we are allowed to start a new key (not nessesary simple). */
1639 
1640  if (!parser->simple_key_allowed) {
1641  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1642  "mapping keys are not allowed in this context");
1643  }
1644 
1645  /* Add the BLOCK-MAPPING-START token if needed. */
1646 
1647  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1649  return 0;
1650  }
1651 
1652  /* Reset any potential simple keys on the current flow level. */
1653 
1654  if (!yaml_parser_remove_simple_key(parser))
1655  return 0;
1656 
1657  /* Simple keys are allowed after '?' in the block context. */
1658 
1659  parser->simple_key_allowed = (!parser->flow_level);
1660 
1661  /* Consume the token. */
1662 
1663  start_mark = parser->mark;
1664  SKIP(parser);
1665  end_mark = parser->mark;
1666 
1667  /* Create the KEY token and append it to the queue. */
1668 
1669  TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1670 
1671  if (!ENQUEUE(parser, parser->tokens, token))
1672  return 0;
1673 
1674  return 1;
1675 }
1676 
1677 /*
1678  * Produce the VALUE token.
1679  */
1680 
1681 static int
1683 {
1684  yaml_mark_t start_mark, end_mark;
1686  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1687 
1688  /* Have we found a simple key? */
1689 
1690  if (simple_key->possible)
1691  {
1692 
1693  /* Create the KEY token and insert it into the queue. */
1694 
1695  TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1696 
1697  if (!QUEUE_INSERT(parser, parser->tokens,
1698  simple_key->token_number - parser->tokens_parsed, token))
1699  return 0;
1700 
1701  /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1702 
1703  if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1704  simple_key->token_number,
1705  YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1706  return 0;
1707 
1708  /* Remove the simple key. */
1709 
1710  simple_key->possible = 0;
1711 
1712  /* A simple key cannot follow another simple key. */
1713 
1714  parser->simple_key_allowed = 0;
1715  }
1716  else
1717  {
1718  /* The ':' indicator follows a complex key. */
1719 
1720  /* In the block context, extra checks are required. */
1721 
1722  if (!parser->flow_level)
1723  {
1724  /* Check if we are allowed to start a complex value. */
1725 
1726  if (!parser->simple_key_allowed) {
1727  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1728  "mapping values are not allowed in this context");
1729  }
1730 
1731  /* Add the BLOCK-MAPPING-START token if needed. */
1732 
1733  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1735  return 0;
1736  }
1737 
1738  /* Simple keys after ':' are allowed in the block context. */
1739 
1740  parser->simple_key_allowed = (!parser->flow_level);
1741  }
1742 
1743  /* Consume the token. */
1744 
1745  start_mark = parser->mark;
1746  SKIP(parser);
1747  end_mark = parser->mark;
1748 
1749  /* Create the VALUE token and append it to the queue. */
1750 
1751  TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1752 
1753  if (!ENQUEUE(parser, parser->tokens, token))
1754  return 0;
1755 
1756  return 1;
1757 }
1758 
1759 /*
1760  * Produce the ALIAS or ANCHOR token.
1761  */
1762 
1763 static int
1765 {
1767 
1768  /* An anchor or an alias could be a simple key. */
1769 
1770  if (!yaml_parser_save_simple_key(parser))
1771  return 0;
1772 
1773  /* A simple key cannot follow an anchor or an alias. */
1774 
1775  parser->simple_key_allowed = 0;
1776 
1777  /* Create the ALIAS or ANCHOR token and append it to the queue. */
1778 
1779  if (!yaml_parser_scan_anchor(parser, &token, type))
1780  return 0;
1781 
1782  if (!ENQUEUE(parser, parser->tokens, token)) {
1783  yaml_token_delete(&token);
1784  return 0;
1785  }
1786  return 1;
1787 }
1788 
1789 /*
1790  * Produce the TAG token.
1791  */
1792 
1793 static int
1795 {
1797 
1798  /* A tag could be a simple key. */
1799 
1800  if (!yaml_parser_save_simple_key(parser))
1801  return 0;
1802 
1803  /* A simple key cannot follow a tag. */
1804 
1805  parser->simple_key_allowed = 0;
1806 
1807  /* Create the TAG token and append it to the queue. */
1808 
1809  if (!yaml_parser_scan_tag(parser, &token))
1810  return 0;
1811 
1812  if (!ENQUEUE(parser, parser->tokens, token)) {
1813  yaml_token_delete(&token);
1814  return 0;
1815  }
1816 
1817  return 1;
1818 }
1819 
1820 /*
1821  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1822  */
1823 
1824 static int
1826 {
1828 
1829  /* Remove any potential simple keys. */
1830 
1831  if (!yaml_parser_remove_simple_key(parser))
1832  return 0;
1833 
1834  /* A simple key may follow a block scalar. */
1835 
1836  parser->simple_key_allowed = 1;
1837 
1838  /* Create the SCALAR token and append it to the queue. */
1839 
1840  if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1841  return 0;
1842 
1843  if (!ENQUEUE(parser, parser->tokens, token)) {
1844  yaml_token_delete(&token);
1845  return 0;
1846  }
1847 
1848  return 1;
1849 }
1850 
1851 /*
1852  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1853  */
1854 
1855 static int
1857 {
1859 
1860  /* A plain scalar could be a simple key. */
1861 
1862  if (!yaml_parser_save_simple_key(parser))
1863  return 0;
1864 
1865  /* A simple key cannot follow a flow scalar. */
1866 
1867  parser->simple_key_allowed = 0;
1868 
1869  /* Create the SCALAR token and append it to the queue. */
1870 
1871  if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1872  return 0;
1873 
1874  if (!ENQUEUE(parser, parser->tokens, token)) {
1875  yaml_token_delete(&token);
1876  return 0;
1877  }
1878 
1879  return 1;
1880 }
1881 
1882 /*
1883  * Produce the SCALAR(...,plain) token.
1884  */
1885 
1886 static int
1888 {
1890 
1891  /* A plain scalar could be a simple key. */
1892 
1893  if (!yaml_parser_save_simple_key(parser))
1894  return 0;
1895 
1896  /* A simple key cannot follow a flow scalar. */
1897 
1898  parser->simple_key_allowed = 0;
1899 
1900  /* Create the SCALAR token and append it to the queue. */
1901 
1902  if (!yaml_parser_scan_plain_scalar(parser, &token))
1903  return 0;
1904 
1905  if (!ENQUEUE(parser, parser->tokens, token)) {
1906  yaml_token_delete(&token);
1907  return 0;
1908  }
1909 
1910  return 1;
1911 }
1912 
1913 /*
1914  * Eat whitespaces and comments until the next token is found.
1915  */
1916 
1917 static int
1919 {
1920  /* Until the next token is not found. */
1921 
1922  while (1)
1923  {
1924  /* Allow the BOM mark to start a line. */
1925 
1926  if (!CACHE(parser, 1)) return 0;
1927 
1928  if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1929  SKIP(parser);
1930 
1931  /*
1932  * Eat whitespaces.
1933  *
1934  * Tabs are allowed:
1935  *
1936  * - in the flow context;
1937  * - in the block context, but not at the beginning of the line or
1938  * after '-', '?', or ':' (complex value).
1939  */
1940 
1941  if (!CACHE(parser, 1)) return 0;
1942 
1943  while (CHECK(parser->buffer,' ') ||
1944  ((parser->flow_level || !parser->simple_key_allowed) &&
1945  CHECK(parser->buffer, '\t'))) {
1946  SKIP(parser);
1947  if (!CACHE(parser, 1)) return 0;
1948  }
1949 
1950  /* Eat a comment until a line break. */
1951 
1952  if (CHECK(parser->buffer, '#')) {
1953  while (!IS_BREAKZ(parser->buffer)) {
1954  SKIP(parser);
1955  if (!CACHE(parser, 1)) return 0;
1956  }
1957  }
1958 
1959  /* If it is a line break, eat it. */
1960 
1961  if (IS_BREAK(parser->buffer))
1962  {
1963  if (!CACHE(parser, 2)) return 0;
1964  SKIP_LINE(parser);
1965 
1966  /* In the block context, a new line may start a simple key. */
1967 
1968  if (!parser->flow_level) {
1969  parser->simple_key_allowed = 1;
1970  }
1971  }
1972  else
1973  {
1974  /* We have found a token. */
1975 
1976  break;
1977  }
1978  }
1979 
1980  return 1;
1981 }
1982 
1983 /*
1984  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1985  *
1986  * Scope:
1987  * %YAML 1.1 # a comment \n
1988  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1989  * %TAG !yaml! tag:yaml.org,2002: \n
1990  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1991  */
1992 
1993 int
1995 {
1996  yaml_mark_t start_mark, end_mark;
1997  yaml_char_t *name = NULL;
1998  int major, minor;
1999  yaml_char_t *handle = NULL, *prefix = NULL;
2000 
2001  /* Eat '%'. */
2002 
2003  start_mark = parser->mark;
2004 
2005  SKIP(parser);
2006 
2007  /* Scan the directive name. */
2008 
2009  if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2010  goto error;
2011 
2012  /* Is it a YAML directive? */
2013 
2014  if (strcmp((char *)name, "YAML") == 0)
2015  {
2016  /* Scan the VERSION directive value. */
2017 
2018  if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2019  &major, &minor))
2020  goto error;
2021 
2022  end_mark = parser->mark;
2023 
2024  /* Create a VERSION-DIRECTIVE token. */
2025 
2026  VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2027  start_mark, end_mark);
2028  }
2029 
2030  /* Is it a TAG directive? */
2031 
2032  else if (strcmp((char *)name, "TAG") == 0)
2033  {
2034  /* Scan the TAG directive value. */
2035 
2036  if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2037  &handle, &prefix))
2038  goto error;
2039 
2040  end_mark = parser->mark;
2041 
2042  /* Create a TAG-DIRECTIVE token. */
2043 
2044  TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2045  start_mark, end_mark);
2046  }
2047 
2048  /* Unknown directive. */
2049 
2050  else
2051  {
2052  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2053  start_mark, "found uknown directive name");
2054  goto error;
2055  }
2056 
2057  /* Eat the rest of the line including any comments. */
2058 
2059  if (!CACHE(parser, 1)) goto error;
2060 
2061  while (IS_BLANK(parser->buffer)) {
2062  SKIP(parser);
2063  if (!CACHE(parser, 1)) goto error;
2064  }
2065 
2066  if (CHECK(parser->buffer, '#')) {
2067  while (!IS_BREAKZ(parser->buffer)) {
2068  SKIP(parser);
2069  if (!CACHE(parser, 1)) goto error;
2070  }
2071  }
2072 
2073  /* Check if we are at the end of the line. */
2074 
2075  if (!IS_BREAKZ(parser->buffer)) {
2076  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2077  start_mark, "did not find expected comment or line break");
2078  goto error;
2079  }
2080 
2081  /* Eat a line break. */
2082 
2083  if (IS_BREAK(parser->buffer)) {
2084  if (!CACHE(parser, 2)) goto error;
2085  SKIP_LINE(parser);
2086  }
2087 
2088  yaml_free(name);
2089 
2090  return 1;
2091 
2092 error:
2093  yaml_free(prefix);
2094  yaml_free(handle);
2095  yaml_free(name);
2096  return 0;
2097 }
2098 
2099 /*
2100  * Scan the directive name.
2101  *
2102  * Scope:
2103  * %YAML 1.1 # a comment \n
2104  * ^^^^
2105  * %TAG !yaml! tag:yaml.org,2002: \n
2106  * ^^^
2107  */
2108 
2109 static int
2111  yaml_mark_t start_mark, yaml_char_t **name)
2112 {
2113  yaml_string_t string = NULL_STRING;
2114 
2115  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2116 
2117  /* Consume the directive name. */
2118 
2119  if (!CACHE(parser, 1)) goto error;
2120 
2121  while (IS_ALPHA(parser->buffer))
2122  {
2123  if (!READ(parser, string)) goto error;
2124  if (!CACHE(parser, 1)) goto error;
2125  }
2126 
2127  /* Check if the name is empty. */
2128 
2129  if (string.start == string.pointer) {
2130  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2131  start_mark, "could not find expected directive name");
2132  goto error;
2133  }
2134 
2135  /* Check for an blank character after the name. */
2136 
2137  if (!IS_BLANKZ(parser->buffer)) {
2138  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2139  start_mark, "found unexpected non-alphabetical character");
2140  goto error;
2141  }
2142 
2143  *name = string.start;
2144 
2145  return 1;
2146 
2147 error:
2148  STRING_DEL(parser, string);
2149  return 0;
2150 }
2151 
2152 /*
2153  * Scan the value of VERSION-DIRECTIVE.
2154  *
2155  * Scope:
2156  * %YAML 1.1 # a comment \n
2157  * ^^^^^^
2158  */
2159 
2160 static int
2162  yaml_mark_t start_mark, int *major, int *minor)
2163 {
2164  /* Eat whitespaces. */
2165 
2166  if (!CACHE(parser, 1)) return 0;
2167 
2168  while (IS_BLANK(parser->buffer)) {
2169  SKIP(parser);
2170  if (!CACHE(parser, 1)) return 0;
2171  }
2172 
2173  /* Consume the major version number. */
2174 
2175  if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2176  return 0;
2177 
2178  /* Eat '.'. */
2179 
2180  if (!CHECK(parser->buffer, '.')) {
2181  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2182  start_mark, "did not find expected digit or '.' character");
2183  }
2184 
2185  SKIP(parser);
2186 
2187  /* Consume the minor version number. */
2188 
2189  if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2190  return 0;
2191 
2192  return 1;
2193 }
2194 
2195 #define MAX_NUMBER_LENGTH 9
2196 
2197 /*
2198  * Scan the version number of VERSION-DIRECTIVE.
2199  *
2200  * Scope:
2201  * %YAML 1.1 # a comment \n
2202  * ^
2203  * %YAML 1.1 # a comment \n
2204  * ^
2205  */
2206 
2207 static int
2209  yaml_mark_t start_mark, int *number)
2210 {
2211  int value = 0;
2212  size_t length = 0;
2213 
2214  /* Repeat while the next character is digit. */
2215 
2216  if (!CACHE(parser, 1)) return 0;
2217 
2218  while (IS_DIGIT(parser->buffer))
2219  {
2220  /* Check if the number is too long. */
2221 
2222  if (++length > MAX_NUMBER_LENGTH) {
2223  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2224  start_mark, "found extremely long version number");
2225  }
2226 
2227  value = value*10 + AS_DIGIT(parser->buffer);
2228 
2229  SKIP(parser);
2230 
2231  if (!CACHE(parser, 1)) return 0;
2232  }
2233 
2234  /* Check if the number was present. */
2235 
2236  if (!length) {
2237  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2238  start_mark, "did not find expected version number");
2239  }
2240 
2241  *number = value;
2242 
2243  return 1;
2244 }
2245 
2246 /*
2247  * Scan the value of a TAG-DIRECTIVE token.
2248  *
2249  * Scope:
2250  * %TAG !yaml! tag:yaml.org,2002: \n
2251  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2252  */
2253 
2254 static int
2256  yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2257 {
2258  yaml_char_t *handle_value = NULL;
2259  yaml_char_t *prefix_value = NULL;
2260 
2261  /* Eat whitespaces. */
2262 
2263  if (!CACHE(parser, 1)) goto error;
2264 
2265  while (IS_BLANK(parser->buffer)) {
2266  SKIP(parser);
2267  if (!CACHE(parser, 1)) goto error;
2268  }
2269 
2270  /* Scan a handle. */
2271 
2272  if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2273  goto error;
2274 
2275  /* Expect a whitespace. */
2276 
2277  if (!CACHE(parser, 1)) goto error;
2278 
2279  if (!IS_BLANK(parser->buffer)) {
2280  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2281  start_mark, "did not find expected whitespace");
2282  goto error;
2283  }
2284 
2285  /* Eat whitespaces. */
2286 
2287  while (IS_BLANK(parser->buffer)) {
2288  SKIP(parser);
2289  if (!CACHE(parser, 1)) goto error;
2290  }
2291 
2292  /* Scan a prefix. */
2293 
2294  if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2295  goto error;
2296 
2297  /* Expect a whitespace or line break. */
2298 
2299  if (!CACHE(parser, 1)) goto error;
2300 
2301  if (!IS_BLANKZ(parser->buffer)) {
2302  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2303  start_mark, "did not find expected whitespace or line break");
2304  goto error;
2305  }
2306 
2307  *handle = handle_value;
2308  *prefix = prefix_value;
2309 
2310  return 1;
2311 
2312 error:
2313  yaml_free(handle_value);
2314  yaml_free(prefix_value);
2315  return 0;
2316 }
2317 
2318 static int
2321 {
2322  int length = 0;
2323  yaml_mark_t start_mark, end_mark;
2324  yaml_string_t string = NULL_STRING;
2325 
2326  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2327 
2328  /* Eat the indicator character. */
2329 
2330  start_mark = parser->mark;
2331 
2332  SKIP(parser);
2333 
2334  /* Consume the value. */
2335 
2336  if (!CACHE(parser, 1)) goto error;
2337 
2338  while (IS_ALPHA(parser->buffer)) {
2339  if (!READ(parser, string)) goto error;
2340  if (!CACHE(parser, 1)) goto error;
2341  length ++;
2342  }
2343 
2344  end_mark = parser->mark;
2345 
2346  /*
2347  * Check if length of the anchor is greater than 0 and it is followed by
2348  * a whitespace character or one of the indicators:
2349  *
2350  * '?', ':', ',', ']', '}', '%', '@', '`'.
2351  */
2352 
2353  if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2354  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2355  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2356  || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2357  || CHECK(parser->buffer, '`'))) {
2359  "while scanning an anchor" : "while scanning an alias", start_mark,
2360  "did not find expected alphabetic or numeric character");
2361  goto error;
2362  }
2363 
2364  /* Create a token. */
2365 
2366  if (type == YAML_ANCHOR_TOKEN) {
2367  ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2368  }
2369  else {
2370  ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2371  }
2372 
2373  return 1;
2374 
2375 error:
2376  STRING_DEL(parser, string);
2377  return 0;
2378 }
2379 
2380 /*
2381  * Scan a TAG token.
2382  */
2383 
2384 static int
2386 {
2387  yaml_char_t *handle = NULL;
2388  yaml_char_t *suffix = NULL;
2389  yaml_mark_t start_mark, end_mark;
2390 
2391  start_mark = parser->mark;
2392 
2393  /* Check if the tag is in the canonical form. */
2394 
2395  if (!CACHE(parser, 2)) goto error;
2396 
2397  if (CHECK_AT(parser->buffer, '<', 1))
2398  {
2399  /* Set the handle to '' */
2400 
2401  handle = yaml_malloc(1);
2402  if (!handle) goto error;
2403  handle[0] = '\0';
2404 
2405  /* Eat '!<' */
2406 
2407  SKIP(parser);
2408  SKIP(parser);
2409 
2410  /* Consume the tag value. */
2411 
2412  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2413  goto error;
2414 
2415  /* Check for '>' and eat it. */
2416 
2417  if (!CHECK(parser->buffer, '>')) {
2418  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2419  start_mark, "did not find the expected '>'");
2420  goto error;
2421  }
2422 
2423  SKIP(parser);
2424  }
2425  else
2426  {
2427  /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2428 
2429  /* First, try to scan a handle. */
2430 
2431  if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2432  goto error;
2433 
2434  /* Check if it is, indeed, handle. */
2435 
2436  if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2437  {
2438  /* Scan the suffix now. */
2439 
2440  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2441  goto error;
2442  }
2443  else
2444  {
2445  /* It wasn't a handle after all. Scan the rest of the tag. */
2446 
2447  if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
2448  goto error;
2449 
2450  /* Set the handle to '!'. */
2451 
2452  yaml_free(handle);
2453  handle = yaml_malloc(2);
2454  if (!handle) goto error;
2455  handle[0] = '!';
2456  handle[1] = '\0';
2457 
2458  /*
2459  * A special case: the '!' tag. Set the handle to '' and the
2460  * suffix to '!'.
2461  */
2462 
2463  if (suffix[0] == '\0') {
2464  yaml_char_t *tmp = handle;
2465  handle = suffix;
2466  suffix = tmp;
2467  }
2468  }
2469  }
2470 
2471  /* Check the character which ends the tag. */
2472 
2473  if (!CACHE(parser, 1)) goto error;
2474 
2475  if (!IS_BLANKZ(parser->buffer)) {
2476  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2477  start_mark, "did not find expected whitespace or line break");
2478  goto error;
2479  }
2480 
2481  end_mark = parser->mark;
2482 
2483  /* Create a token. */
2484 
2485  TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2486 
2487  return 1;
2488 
2489 error:
2490  yaml_free(handle);
2491  yaml_free(suffix);
2492  return 0;
2493 }
2494 
2495 /*
2496  * Scan a tag handle.
2497  */
2498 
2499 static int
2501  yaml_mark_t start_mark, yaml_char_t **handle)
2502 {
2503  yaml_string_t string = NULL_STRING;
2504 
2505  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2506 
2507  /* Check the initial '!' character. */
2508 
2509  if (!CACHE(parser, 1)) goto error;
2510 
2511  if (!CHECK(parser->buffer, '!')) {
2512  yaml_parser_set_scanner_error(parser, directive ?
2513  "while scanning a tag directive" : "while scanning a tag",
2514  start_mark, "did not find expected '!'");
2515  goto error;
2516  }
2517 
2518  /* Copy the '!' character. */
2519 
2520  if (!READ(parser, string)) goto error;
2521 
2522  /* Copy all subsequent alphabetical and numerical characters. */
2523 
2524  if (!CACHE(parser, 1)) goto error;
2525 
2526  while (IS_ALPHA(parser->buffer))
2527  {
2528  if (!READ(parser, string)) goto error;
2529  if (!CACHE(parser, 1)) goto error;
2530  }
2531 
2532  /* Check if the trailing character is '!' and copy it. */
2533 
2534  if (CHECK(parser->buffer, '!'))
2535  {
2536  if (!READ(parser, string)) goto error;
2537  }
2538  else
2539  {
2540  /*
2541  * It's either the '!' tag or not really a tag handle. If it's a %TAG
2542  * directive, it's an error. If it's a tag token, it must be a part of
2543  * URI.
2544  */
2545 
2546  if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2547  yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2548  start_mark, "did not find expected '!'");
2549  goto error;
2550  }
2551  }
2552 
2553  *handle = string.start;
2554 
2555  return 1;
2556 
2557 error:
2558  STRING_DEL(parser, string);
2559  return 0;
2560 }
2561 
2562 /*
2563  * Scan a tag.
2564  */
2565 
2566 static int
2568  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2569 {
2570  size_t length = head ? strlen((char *)head) : 0;
2571  yaml_string_t string = NULL_STRING;
2572 
2573  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2574 
2575  /* Resize the string to include the head. */
2576 
2577  while (string.end - string.start <= (int)length) {
2578  if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2579  parser->error = YAML_MEMORY_ERROR;
2580  goto error;
2581  }
2582  }
2583 
2584  /*
2585  * Copy the head if needed.
2586  *
2587  * Note that we don't copy the leading '!' character.
2588  */
2589 
2590  if (length > 1) {
2591  memcpy(string.start, head+1, length-1);
2592  string.pointer += length-1;
2593  }
2594 
2595  /* Scan the tag. */
2596 
2597  if (!CACHE(parser, 1)) goto error;
2598 
2599  /*
2600  * The set of characters that may appear in URI is as follows:
2601  *
2602  * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2603  * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
2604  * '%'.
2605  */
2606 
2607  while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2608  || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2609  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2610  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2611  || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2612  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
2613  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2614  || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2615  || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2616  || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2617  || CHECK(parser->buffer, '%'))
2618  {
2619  /* Check if it is a URI-escape sequence. */
2620 
2621  if (CHECK(parser->buffer, '%')) {
2622  if (!yaml_parser_scan_uri_escapes(parser,
2623  directive, start_mark, &string)) goto error;
2624  }
2625  else {
2626  if (!READ(parser, string)) goto error;
2627  }
2628 
2629  length ++;
2630  if (!CACHE(parser, 1)) goto error;
2631  }
2632 
2633  /* Check if the tag is non-empty. */
2634 
2635  if (!length) {
2636  if (!STRING_EXTEND(parser, string))
2637  goto error;
2638 
2639  yaml_parser_set_scanner_error(parser, directive ?
2640  "while parsing a %TAG directive" : "while parsing a tag",
2641  start_mark, "did not find expected tag URI");
2642  goto error;
2643  }
2644 
2645  *uri = string.start;
2646 
2647  return 1;
2648 
2649 error:
2650  STRING_DEL(parser, string);
2651  return 0;
2652 }
2653 
2654 /*
2655  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2656  */
2657 
2658 static int
2660  yaml_mark_t start_mark, yaml_string_t *string)
2661 {
2662  int width = 0;
2663 
2664  /* Decode the required number of characters. */
2665 
2666  do {
2667 
2668  unsigned char octet = 0;
2669 
2670  /* Check for a URI-escaped octet. */
2671 
2672  if (!CACHE(parser, 3)) return 0;
2673 
2674  if (!(CHECK(parser->buffer, '%')
2675  && IS_HEX_AT(parser->buffer, 1)
2676  && IS_HEX_AT(parser->buffer, 2))) {
2677  return yaml_parser_set_scanner_error(parser, directive ?
2678  "while parsing a %TAG directive" : "while parsing a tag",
2679  start_mark, "did not find URI escaped octet");
2680  }
2681 
2682  /* Get the octet. */
2683 
2684  octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2685 
2686  /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2687 
2688  if (!width)
2689  {
2690  width = (octet & 0x80) == 0x00 ? 1 :
2691  (octet & 0xE0) == 0xC0 ? 2 :
2692  (octet & 0xF0) == 0xE0 ? 3 :
2693  (octet & 0xF8) == 0xF0 ? 4 : 0;
2694  if (!width) {
2695  return yaml_parser_set_scanner_error(parser, directive ?
2696  "while parsing a %TAG directive" : "while parsing a tag",
2697  start_mark, "found an incorrect leading UTF-8 octet");
2698  }
2699  }
2700  else
2701  {
2702  /* Check if the trailing octet is correct. */
2703 
2704  if ((octet & 0xC0) != 0x80) {
2705  return yaml_parser_set_scanner_error(parser, directive ?
2706  "while parsing a %TAG directive" : "while parsing a tag",
2707  start_mark, "found an incorrect trailing UTF-8 octet");
2708  }
2709  }
2710 
2711  /* Copy the octet and move the pointers. */
2712 
2713  *(string->pointer++) = octet;
2714  SKIP(parser);
2715  SKIP(parser);
2716  SKIP(parser);
2717 
2718  } while (--width);
2719 
2720  return 1;
2721 }
2722 
2723 /*
2724  * Scan a block scalar.
2725  */
2726 
2727 static int
2729  int literal)
2730 {
2731  yaml_mark_t start_mark;
2732  yaml_mark_t end_mark;
2733  yaml_string_t string = NULL_STRING;
2734  yaml_string_t leading_break = NULL_STRING;
2735  yaml_string_t trailing_breaks = NULL_STRING;
2736  int chomping = 0;
2737  int increment = 0;
2738  int indent = 0;
2739  int leading_blank = 0;
2740  int trailing_blank = 0;
2741 
2742  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2743  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2744  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2745 
2746  /* Eat the indicator '|' or '>'. */
2747 
2748  start_mark = parser->mark;
2749 
2750  SKIP(parser);
2751 
2752  /* Scan the additional block scalar indicators. */
2753 
2754  if (!CACHE(parser, 1)) goto error;
2755 
2756  /* Check for a chomping indicator. */
2757 
2758  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2759  {
2760  /* Set the chomping method and eat the indicator. */
2761 
2762  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2763 
2764  SKIP(parser);
2765 
2766  /* Check for an indentation indicator. */
2767 
2768  if (!CACHE(parser, 1)) goto error;
2769 
2770  if (IS_DIGIT(parser->buffer))
2771  {
2772  /* Check that the indentation is greater than 0. */
2773 
2774  if (CHECK(parser->buffer, '0')) {
2775  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2776  start_mark, "found an indentation indicator equal to 0");
2777  goto error;
2778  }
2779 
2780  /* Get the indentation level and eat the indicator. */
2781 
2782  increment = AS_DIGIT(parser->buffer);
2783 
2784  SKIP(parser);
2785  }
2786  }
2787 
2788  /* Do the same as above, but in the opposite order. */
2789 
2790  else if (IS_DIGIT(parser->buffer))
2791  {
2792  if (CHECK(parser->buffer, '0')) {
2793  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2794  start_mark, "found an indentation indicator equal to 0");
2795  goto error;
2796  }
2797 
2798  increment = AS_DIGIT(parser->buffer);
2799 
2800  SKIP(parser);
2801 
2802  if (!CACHE(parser, 1)) goto error;
2803 
2804  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2805  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2806 
2807  SKIP(parser);
2808  }
2809  }
2810 
2811  /* Eat whitespaces and comments to the end of the line. */
2812 
2813  if (!CACHE(parser, 1)) goto error;
2814 
2815  while (IS_BLANK(parser->buffer)) {
2816  SKIP(parser);
2817  if (!CACHE(parser, 1)) goto error;
2818  }
2819 
2820  if (CHECK(parser->buffer, '#')) {
2821  while (!IS_BREAKZ(parser->buffer)) {
2822  SKIP(parser);
2823  if (!CACHE(parser, 1)) goto error;
2824  }
2825  }
2826 
2827  /* Check if we are at the end of the line. */
2828 
2829  if (!IS_BREAKZ(parser->buffer)) {
2830  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2831  start_mark, "did not find expected comment or line break");
2832  goto error;
2833  }
2834 
2835  /* Eat a line break. */
2836 
2837  if (IS_BREAK(parser->buffer)) {
2838  if (!CACHE(parser, 2)) goto error;
2839  SKIP_LINE(parser);
2840  }
2841 
2842  end_mark = parser->mark;
2843 
2844  /* Set the indentation level if it was specified. */
2845 
2846  if (increment) {
2847  indent = parser->indent >= 0 ? parser->indent+increment : increment;
2848  }
2849 
2850  /* Scan the leading line breaks and determine the indentation level if needed. */
2851 
2852  if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2853  start_mark, &end_mark)) goto error;
2854 
2855  /* Scan the block scalar content. */
2856 
2857  if (!CACHE(parser, 1)) goto error;
2858 
2859  while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
2860  {
2861  /*
2862  * We are at the beginning of a non-empty line.
2863  */
2864 
2865  /* Is it a trailing whitespace? */
2866 
2867  trailing_blank = IS_BLANK(parser->buffer);
2868 
2869  /* Check if we need to fold the leading line break. */
2870 
2871  if (!literal && (*leading_break.start == '\n')
2872  && !leading_blank && !trailing_blank)
2873  {
2874  /* Do we need to join the lines by space? */
2875 
2876  if (*trailing_breaks.start == '\0') {
2877  if (!STRING_EXTEND(parser, string)) goto error;
2878  *(string.pointer ++) = ' ';
2879  }
2880 
2881  CLEAR(parser, leading_break);
2882  }
2883  else {
2884  if (!JOIN(parser, string, leading_break)) goto error;
2885  CLEAR(parser, leading_break);
2886  }
2887 
2888  /* Append the remaining line breaks. */
2889 
2890  if (!JOIN(parser, string, trailing_breaks)) goto error;
2891  CLEAR(parser, trailing_breaks);
2892 
2893  /* Is it a leading whitespace? */
2894 
2895  leading_blank = IS_BLANK(parser->buffer);
2896 
2897  /* Consume the current line. */
2898 
2899  while (!IS_BREAKZ(parser->buffer)) {
2900  if (!READ(parser, string)) goto error;
2901  if (!CACHE(parser, 1)) goto error;
2902  }
2903 
2904  /* Consume the line break. */
2905 
2906  if (!CACHE(parser, 2)) goto error;
2907 
2908  if (!READ_LINE(parser, leading_break)) goto error;
2909 
2910  /* Eat the following indentation spaces and line breaks. */
2911 
2913  &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2914  }
2915 
2916  /* Chomp the tail. */
2917 
2918  if (chomping != -1) {
2919  if (!JOIN(parser, string, leading_break)) goto error;
2920  }
2921  if (chomping == 1) {
2922  if (!JOIN(parser, string, trailing_breaks)) goto error;
2923  }
2924 
2925  /* Create a token. */
2926 
2927  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2929  start_mark, end_mark);
2930 
2931  STRING_DEL(parser, leading_break);
2932  STRING_DEL(parser, trailing_breaks);
2933 
2934  return 1;
2935 
2936 error:
2937  STRING_DEL(parser, string);
2938  STRING_DEL(parser, leading_break);
2939  STRING_DEL(parser, trailing_breaks);
2940 
2941  return 0;
2942 }
2943 
2944 /*
2945  * Scan indentation spaces and line breaks for a block scalar. Determine the
2946  * indentation level if needed.
2947  */
2948 
2949 static int
2951  int *indent, yaml_string_t *breaks,
2952  yaml_mark_t start_mark, yaml_mark_t *end_mark)
2953 {
2954  int max_indent = 0;
2955 
2956  *end_mark = parser->mark;
2957 
2958  /* Eat the indentation spaces and line breaks. */
2959 
2960  while (1)
2961  {
2962  /* Eat the indentation spaces. */
2963 
2964  if (!CACHE(parser, 1)) return 0;
2965 
2966  while ((!*indent || (int)parser->mark.column < *indent)
2967  && IS_SPACE(parser->buffer)) {
2968  SKIP(parser);
2969  if (!CACHE(parser, 1)) return 0;
2970  }
2971 
2972  if ((int)parser->mark.column > max_indent)
2973  max_indent = (int)parser->mark.column;
2974 
2975  /* Check for a tab character messing the indentation. */
2976 
2977  if ((!*indent || (int)parser->mark.column < *indent)
2978  && IS_TAB(parser->buffer)) {
2979  return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2980  start_mark, "found a tab character where an indentation space is expected");
2981  }
2982 
2983  /* Have we found a non-empty line? */
2984 
2985  if (!IS_BREAK(parser->buffer)) break;
2986 
2987  /* Consume the line break. */
2988 
2989  if (!CACHE(parser, 2)) return 0;
2990  if (!READ_LINE(parser, *breaks)) return 0;
2991  *end_mark = parser->mark;
2992  }
2993 
2994  /* Determine the indentation level if needed. */
2995 
2996  if (!*indent) {
2997  *indent = max_indent;
2998  if (*indent < parser->indent + 1)
2999  *indent = parser->indent + 1;
3000  if (*indent < 1)
3001  *indent = 1;
3002  }
3003 
3004  return 1;
3005 }
3006 
3007 /*
3008  * Scan a quoted scalar.
3009  */
3010 
3011 static int
3013  int single)
3014 {
3015  yaml_mark_t start_mark;
3016  yaml_mark_t end_mark;
3017  yaml_string_t string = NULL_STRING;
3018  yaml_string_t leading_break = NULL_STRING;
3019  yaml_string_t trailing_breaks = NULL_STRING;
3020  yaml_string_t whitespaces = NULL_STRING;
3021  int leading_blanks;
3022 
3023  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3024  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3025  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3026  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3027 
3028  /* Eat the left quote. */
3029 
3030  start_mark = parser->mark;
3031 
3032  SKIP(parser);
3033 
3034  /* Consume the content of the quoted scalar. */
3035 
3036  while (1)
3037  {
3038  /* Check that there are no document indicators at the beginning of the line. */
3039 
3040  if (!CACHE(parser, 4)) goto error;
3041 
3042  if (parser->mark.column == 0 &&
3043  ((CHECK_AT(parser->buffer, '-', 0) &&
3044  CHECK_AT(parser->buffer, '-', 1) &&
3045  CHECK_AT(parser->buffer, '-', 2)) ||
3046  (CHECK_AT(parser->buffer, '.', 0) &&
3047  CHECK_AT(parser->buffer, '.', 1) &&
3048  CHECK_AT(parser->buffer, '.', 2))) &&
3049  IS_BLANKZ_AT(parser->buffer, 3))
3050  {
3051  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3052  start_mark, "found unexpected document indicator");
3053  goto error;
3054  }
3055 
3056  /* Check for EOF. */
3057 
3058  if (IS_Z(parser->buffer)) {
3059  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3060  start_mark, "found unexpected end of stream");
3061  goto error;
3062  }
3063 
3064  /* Consume non-blank characters. */
3065 
3066  if (!CACHE(parser, 2)) goto error;
3067 
3068  leading_blanks = 0;
3069 
3070  while (!IS_BLANKZ(parser->buffer))
3071  {
3072  /* Check for an escaped single quote. */
3073 
3074  if (single && CHECK_AT(parser->buffer, '\'', 0)
3075  && CHECK_AT(parser->buffer, '\'', 1))
3076  {
3077  if (!STRING_EXTEND(parser, string)) goto error;
3078  *(string.pointer++) = '\'';
3079  SKIP(parser);
3080  SKIP(parser);
3081  }
3082 
3083  /* Check for the right quote. */
3084 
3085  else if (CHECK(parser->buffer, single ? '\'' : '"'))
3086  {
3087  break;
3088  }
3089 
3090  /* Check for an escaped line break. */
3091 
3092  else if (!single && CHECK(parser->buffer, '\\')
3093  && IS_BREAK_AT(parser->buffer, 1))
3094  {
3095  if (!CACHE(parser, 3)) goto error;
3096  SKIP(parser);
3097  SKIP_LINE(parser);
3098  leading_blanks = 1;
3099  break;
3100  }
3101 
3102  /* Check for an escape sequence. */
3103 
3104  else if (!single && CHECK(parser->buffer, '\\'))
3105  {
3106  size_t code_length = 0;
3107 
3108  if (!STRING_EXTEND(parser, string)) goto error;
3109 
3110  /* Check the escape character. */
3111 
3112  switch (parser->buffer.pointer[1])
3113  {
3114  case '0':
3115  *(string.pointer++) = '\0';
3116  break;
3117 
3118  case 'a':
3119  *(string.pointer++) = '\x07';
3120  break;
3121 
3122  case 'b':
3123  *(string.pointer++) = '\x08';
3124  break;
3125 
3126  case 't':
3127  case '\t':
3128  *(string.pointer++) = '\x09';
3129  break;
3130 
3131  case 'n':
3132  *(string.pointer++) = '\x0A';
3133  break;
3134 
3135  case 'v':
3136  *(string.pointer++) = '\x0B';
3137  break;
3138 
3139  case 'f':
3140  *(string.pointer++) = '\x0C';
3141  break;
3142 
3143  case 'r':
3144  *(string.pointer++) = '\x0D';
3145  break;
3146 
3147  case 'e':
3148  *(string.pointer++) = '\x1B';
3149  break;
3150 
3151  case ' ':
3152  *(string.pointer++) = '\x20';
3153  break;
3154 
3155  case '"':
3156  *(string.pointer++) = '"';
3157  break;
3158 
3159  case '\'':
3160  *(string.pointer++) = '\'';
3161  break;
3162 
3163  case '\\':
3164  *(string.pointer++) = '\\';
3165  break;
3166 
3167  case 'N': /* NEL (#x85) */
3168  *(string.pointer++) = '\xC2';
3169  *(string.pointer++) = '\x85';
3170  break;
3171 
3172  case '_': /* #xA0 */
3173  *(string.pointer++) = '\xC2';
3174  *(string.pointer++) = '\xA0';
3175  break;
3176 
3177  case 'L': /* LS (#x2028) */
3178  *(string.pointer++) = '\xE2';
3179  *(string.pointer++) = '\x80';
3180  *(string.pointer++) = '\xA8';
3181  break;
3182 
3183  case 'P': /* PS (#x2029) */
3184  *(string.pointer++) = '\xE2';
3185  *(string.pointer++) = '\x80';
3186  *(string.pointer++) = '\xA9';
3187  break;
3188 
3189  case 'x':
3190  code_length = 2;
3191  break;
3192 
3193  case 'u':
3194  code_length = 4;
3195  break;
3196 
3197  case 'U':
3198  code_length = 8;
3199  break;
3200 
3201  default:
3202  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3203  start_mark, "found unknown escape character");
3204  goto error;
3205  }
3206 
3207  SKIP(parser);
3208  SKIP(parser);
3209 
3210  /* Consume an arbitrary escape code. */
3211 
3212  if (code_length)
3213  {
3214  unsigned int value = 0;
3215  size_t k;
3216 
3217  /* Scan the character value. */
3218 
3219  if (!CACHE(parser, code_length)) goto error;
3220 
3221  for (k = 0; k < code_length; k ++) {
3222  if (!IS_HEX_AT(parser->buffer, k)) {
3223  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3224  start_mark, "did not find expected hexdecimal number");
3225  goto error;
3226  }
3227  value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3228  }
3229 
3230  /* Check the value and write the character. */
3231 
3232  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3233  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3234  start_mark, "found invalid Unicode character escape code");
3235  goto error;
3236  }
3237 
3238  if (value <= 0x7F) {
3239  *(string.pointer++) = value;
3240  }
3241  else if (value <= 0x7FF) {
3242  *(string.pointer++) = 0xC0 + (value >> 6);
3243  *(string.pointer++) = 0x80 + (value & 0x3F);
3244  }
3245  else if (value <= 0xFFFF) {
3246  *(string.pointer++) = 0xE0 + (value >> 12);
3247  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3248  *(string.pointer++) = 0x80 + (value & 0x3F);
3249  }
3250  else {
3251  *(string.pointer++) = 0xF0 + (value >> 18);
3252  *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3253  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3254  *(string.pointer++) = 0x80 + (value & 0x3F);
3255  }
3256 
3257  /* Advance the pointer. */
3258 
3259  for (k = 0; k < code_length; k ++) {
3260  SKIP(parser);
3261  }
3262  }
3263  }
3264 
3265  else
3266  {
3267  /* It is a non-escaped non-blank character. */
3268 
3269  if (!READ(parser, string)) goto error;
3270  }
3271 
3272  if (!CACHE(parser, 2)) goto error;
3273  }
3274 
3275  /* Check if we are at the end of the scalar. */
3276 
3277  if (CHECK(parser->buffer, single ? '\'' : '"'))
3278  break;
3279 
3280  /* Consume blank characters. */
3281 
3282  if (!CACHE(parser, 1)) goto error;
3283 
3284  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3285  {
3286  if (IS_BLANK(parser->buffer))
3287  {
3288  /* Consume a space or a tab character. */
3289 
3290  if (!leading_blanks) {
3291  if (!READ(parser, whitespaces)) goto error;
3292  }
3293  else {
3294  SKIP(parser);
3295  }
3296  }
3297  else
3298  {
3299  if (!CACHE(parser, 2)) goto error;
3300 
3301  /* Check if it is a first line break. */
3302 
3303  if (!leading_blanks)
3304  {
3305  CLEAR(parser, whitespaces);
3306  if (!READ_LINE(parser, leading_break)) goto error;
3307  leading_blanks = 1;
3308  }
3309  else
3310  {
3311  if (!READ_LINE(parser, trailing_breaks)) goto error;
3312  }
3313  }
3314  if (!CACHE(parser, 1)) goto error;
3315  }
3316 
3317  /* Join the whitespaces or fold line breaks. */
3318 
3319  if (leading_blanks)
3320  {
3321  /* Do we need to fold line breaks? */
3322 
3323  if (leading_break.start[0] == '\n') {
3324  if (trailing_breaks.start[0] == '\0') {
3325  if (!STRING_EXTEND(parser, string)) goto error;
3326  *(string.pointer++) = ' ';
3327  }
3328  else {
3329  if (!JOIN(parser, string, trailing_breaks)) goto error;
3330  CLEAR(parser, trailing_breaks);
3331  }
3332  CLEAR(parser, leading_break);
3333  }
3334  else {
3335  if (!JOIN(parser, string, leading_break)) goto error;
3336  if (!JOIN(parser, string, trailing_breaks)) goto error;
3337  CLEAR(parser, leading_break);
3338  CLEAR(parser, trailing_breaks);
3339  }
3340  }
3341  else
3342  {
3343  if (!JOIN(parser, string, whitespaces)) goto error;
3344  CLEAR(parser, whitespaces);
3345  }
3346  }
3347 
3348  /* Eat the right quote. */
3349 
3350  SKIP(parser);
3351 
3352  end_mark = parser->mark;
3353 
3354  /* Create a token. */
3355 
3356  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3358  start_mark, end_mark);
3359 
3360  STRING_DEL(parser, leading_break);
3361  STRING_DEL(parser, trailing_breaks);
3362  STRING_DEL(parser, whitespaces);
3363 
3364  return 1;
3365 
3366 error:
3367  STRING_DEL(parser, string);
3368  STRING_DEL(parser, leading_break);
3369  STRING_DEL(parser, trailing_breaks);
3370  STRING_DEL(parser, whitespaces);
3371 
3372  return 0;
3373 }
3374 
3375 /*
3376  * Scan a plain scalar.
3377  */
3378 
3379 static int
3381 {
3382  yaml_mark_t start_mark;
3383  yaml_mark_t end_mark;
3384  yaml_string_t string = NULL_STRING;
3385  yaml_string_t leading_break = NULL_STRING;
3386  yaml_string_t trailing_breaks = NULL_STRING;
3387  yaml_string_t whitespaces = NULL_STRING;
3388  int leading_blanks = 0;
3389  int indent = parser->indent+1;
3390 
3391  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3392  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3393  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3394  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3395 
3396  start_mark = end_mark = parser->mark;
3397 
3398  /* Consume the content of the plain scalar. */
3399 
3400  while (1)
3401  {
3402  /* Check for a document indicator. */
3403 
3404  if (!CACHE(parser, 4)) goto error;
3405 
3406  if (parser->mark.column == 0 &&
3407  ((CHECK_AT(parser->buffer, '-', 0) &&
3408  CHECK_AT(parser->buffer, '-', 1) &&
3409  CHECK_AT(parser->buffer, '-', 2)) ||
3410  (CHECK_AT(parser->buffer, '.', 0) &&
3411  CHECK_AT(parser->buffer, '.', 1) &&
3412  CHECK_AT(parser->buffer, '.', 2))) &&
3413  IS_BLANKZ_AT(parser->buffer, 3)) break;
3414 
3415  /* Check for a comment. */
3416 
3417  if (CHECK(parser->buffer, '#'))
3418  break;
3419 
3420  /* Consume non-blank characters. */
3421 
3422  while (!IS_BLANKZ(parser->buffer))
3423  {
3424  /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
3425 
3426  if (parser->flow_level
3427  && CHECK(parser->buffer, ':')
3428  && !IS_BLANKZ_AT(parser->buffer, 1)) {
3429  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3430  start_mark, "found unexpected ':'");
3431  goto error;
3432  }
3433 
3434  /* Check for indicators that may end a plain scalar. */
3435 
3436  if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3437  || (parser->flow_level &&
3438  (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
3439  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
3440  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3441  || CHECK(parser->buffer, '}'))))
3442  break;
3443 
3444  /* Check if we need to join whitespaces and breaks. */
3445 
3446  if (leading_blanks || whitespaces.start != whitespaces.pointer)
3447  {
3448  if (leading_blanks)
3449  {
3450  /* Do we need to fold line breaks? */
3451 
3452  if (leading_break.start[0] == '\n') {
3453  if (trailing_breaks.start[0] == '\0') {
3454  if (!STRING_EXTEND(parser, string)) goto error;
3455  *(string.pointer++) = ' ';
3456  }
3457  else {
3458  if (!JOIN(parser, string, trailing_breaks)) goto error;
3459  CLEAR(parser, trailing_breaks);
3460  }
3461  CLEAR(parser, leading_break);
3462  }
3463  else {
3464  if (!JOIN(parser, string, leading_break)) goto error;
3465  if (!JOIN(parser, string, trailing_breaks)) goto error;
3466  CLEAR(parser, leading_break);
3467  CLEAR(parser, trailing_breaks);
3468  }
3469 
3470  leading_blanks = 0;
3471  }
3472  else
3473  {
3474  if (!JOIN(parser, string, whitespaces)) goto error;
3475  CLEAR(parser, whitespaces);
3476  }
3477  }
3478 
3479  /* Copy the character. */
3480 
3481  if (!READ(parser, string)) goto error;
3482 
3483  end_mark = parser->mark;
3484 
3485  if (!CACHE(parser, 2)) goto error;
3486  }
3487 
3488  /* Is it the end? */
3489 
3490  if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3491  break;
3492 
3493  /* Consume blank characters. */
3494 
3495  if (!CACHE(parser, 1)) goto error;
3496 
3497  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3498  {
3499  if (IS_BLANK(parser->buffer))
3500  {
3501  /* Check for tab character that abuse indentation. */
3502 
3503  if (leading_blanks && (int)parser->mark.column < indent
3504  && IS_TAB(parser->buffer)) {
3505  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3506  start_mark, "found a tab character that violates indentation");
3507  goto error;
3508  }
3509 
3510  /* Consume a space or a tab character. */
3511 
3512  if (!leading_blanks) {
3513  if (!READ(parser, whitespaces)) goto error;
3514  }
3515  else {
3516  SKIP(parser);
3517  }
3518  }
3519  else
3520  {
3521  if (!CACHE(parser, 2)) goto error;
3522 
3523  /* Check if it is a first line break. */
3524 
3525  if (!leading_blanks)
3526  {
3527  CLEAR(parser, whitespaces);
3528  if (!READ_LINE(parser, leading_break)) goto error;
3529  leading_blanks = 1;
3530  }
3531  else
3532  {
3533  if (!READ_LINE(parser, trailing_breaks)) goto error;
3534  }
3535  }
3536  if (!CACHE(parser, 1)) goto error;
3537  }
3538 
3539  /* Check indentation level. */
3540 
3541  if (!parser->flow_level && (int)parser->mark.column < indent)
3542  break;
3543  }
3544 
3545  /* Create a token. */
3546 
3547  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3548  YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3549 
3550  /* Note that we change the 'simple_key_allowed' flag. */
3551 
3552  if (leading_blanks) {
3553  parser->simple_key_allowed = 1;
3554  }
3555 
3556  STRING_DEL(parser, leading_break);
3557  STRING_DEL(parser, trailing_breaks);
3558  STRING_DEL(parser, whitespaces);
3559 
3560  return 1;
3561 
3562 error:
3563  STRING_DEL(parser, string);
3564  STRING_DEL(parser, leading_break);
3565  STRING_DEL(parser, trailing_breaks);
3566  STRING_DEL(parser, whitespaces);
3567 
3568  return 0;
3569 }
3570 
The double-quoted scalar style.
Definition: yaml.h:174
volatile VALUE tmp
Definition: tcltklib.c:10209
#define PUSH(x)
Definition: bigdecimal.c:64
VP_EXPORT int
Definition: bigdecimal.c:5050
A BLOCK-SEQUENCE-START token.
Definition: yaml.h:232
The pointer position.
Definition: yaml.h:145
static int yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:1994
static int yaml_parser_fetch_directive(yaml_parser_t *parser)
Definition: scanner.c:1366
size_t strlen(const char *)
yaml_token_t * tail
The tail of the tokens queue.
Definition: yaml.h:1200
int minor
Definition: tcltklib.c:110
struct yaml_parser_s::@38 buffer
The working buffer.
A FLOW-SEQUENCE-START token.
Definition: yaml.h:239
#define NULL_STRING
Definition: yaml_private.h:115
A VALUE token.
Definition: yaml.h:254
#define IS_BOM(string)
Definition: yaml_private.h:287
static int yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, int single)
Definition: scanner.c:3012
Cannot allocate or reallocate a block of memory.
Definition: yaml.h:127
static int yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:2385
#define ALIAS_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:495
static int yaml_parser_fetch_stream_start(yaml_parser_t *parser)
Definition: scanner.c:1290
A BLOCK-END token.
Definition: yaml.h:236
yaml_string_extend(yaml_char_t **start, yaml_char_t **pointer, yaml_char_t **end)
Definition: api.c:74
#define AS_DIGIT(string)
Definition: yaml_private.h:206
struct yaml_parser_s::@40 tokens
The tokens queue.
static int yaml_parser_roll_indent(yaml_parser_t *parser, int column, int number, yaml_token_type_t type, yaml_mark_t mark)
Definition: scanner.c:1209
int stream_start_produced
Have we started to scan the input stream?
Definition: yaml.h:1183
static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
Definition: scanner.c:1825
static int yaml_parser_fetch_block_entry(yaml_parser_t *parser)
Definition: scanner.c:1568
yaml_encoding_t encoding
The input encoding.
Definition: yaml.h:1165
The parser structure.
Definition: yaml.h:1081
A BLOCK-SEQUENCE-END token.
Definition: yaml.h:234
#define SKIP(parser)
Definition: scanner.c:494
A FLOW-ENTRY token.
Definition: yaml.h:250
#define READ(parser, string)
Definition: scanner.c:518
static int yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1490
static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix)
Definition: scanner.c:2255
static int yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, int literal)
Definition: scanner.c:2728
unsigned char yaml_char_t
The character type (UTF-8 octet).
Definition: yaml.h:78
const unsigned char * start
The string start pointer.
Definition: yaml.h:1123
const char * context
The error context.
Definition: yaml.h:1099
yaml_mark_t mark
The position mark.
Definition: yaml.h:1004
#define IS_BREAK_AT(string, offset)
Definition: yaml_private.h:318
yaml_char_t * pointer
Definition: yaml_private.h:103
static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
Definition: scanner.c:1856
#define READ_LINE(parser, string)
Definition: scanner.c:530
int indent
The current indentation level.
Definition: yaml.h:1220
#define IS_BLANK(string)
Definition: yaml_private.h:312
static int yaml_parser_scan_version_directive_value(yaml_parser_t *parser, yaml_mark_t start_mark, int *major, int *minor)
Definition: scanner.c:2161
#define TAG_DIRECTIVE_TOKEN_INIT(token, token_handle, token_prefix, start_mark, end_mark)
Definition: yaml_private.h:519
static int yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1447
static int yaml_parser_unroll_indent(yaml_parser_t *parser, int column)
Definition: scanner.c:1257
yaml_char_t * pointer
The current position of the buffer.
Definition: yaml.h:1144
An ALIAS token.
Definition: yaml.h:257
yaml_mark_t mark
The mark of the current position.
Definition: yaml.h:1171
#define head
Definition: st.c:107
#define IS_HEX_AT(string, offset)
Definition: yaml_private.h:212
yaml_char_t * start
Definition: yaml_private.h:101
static int yaml_parser_remove_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1141
#define DEQUEUE(context, queue)
Definition: yaml_private.h:463
A FLOW-SEQUENCE-END token.
Definition: yaml.h:241
#define INITIAL_STRING_SIZE
Definition: yaml_private.h:78
#define STRING_DEL(context, string)
Definition: yaml_private.h:133
#define IS_SPACE(string)
Definition: yaml_private.h:295
yaml_free(void *ptr)
Definition: api.c:51
The folded scalar style.
Definition: yaml.h:179
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define AS_HEX_AT(string, offset)
Definition: yaml_private.h:226
int * top
The top of the stack.
Definition: yaml.h:1216
#define QUEUE_INSERT(context, queue, index, value)
Definition: yaml_private.h:466
static int yaml_parser_fetch_next_token(yaml_parser_t *parser)
Definition: scanner.c:860
int required
Is a simple key required?
Definition: yaml.h:998
int simple_key_allowed
May a simple key occur at the current position?
Definition: yaml.h:1223
#define CACHE(parser, length)
Definition: scanner.c:485
#define IS_TAB(string)
Definition: yaml_private.h:303
static int yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1402
static int yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_string_t *string)
Definition: scanner.c:2659
static int yaml_parser_fetch_stream_end(yaml_parser_t *parser)
Definition: scanner.c:1328
static int yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, yaml_mark_t context_mark, const char *problem)
Definition: scanner.c:782
#define IS_BREAKZ(string)
Definition: yaml_private.h:344
#define CHECK(string, octet)
Definition: yaml_private.h:170
#define IS_ALPHA(string)
Definition: yaml_private.h:187
static int yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, int *indent, yaml_string_t *breaks, yaml_mark_t start_mark, yaml_mark_t *end_mark)
Definition: scanner.c:2950
yaml_token_delete(yaml_token_t *token)
Free any memory allocated for a token object.
Definition: api.c:578
#define TOKEN_INIT(token, token_type, token_start_mark, token_end_mark)
Definition: yaml_private.h:482
static int yaml_parser_fetch_tag(yaml_parser_t *parser)
Definition: scanner.c:1794
struct yaml_parser_s::@41 indents
The indentation levels stack.
#define YAML_DECLARE(type)
The public API declaration.
Definition: yaml.h:38
ID token
Definition: ripper.c:15558
A FLOW-MAPPING-START token.
Definition: yaml.h:243
static int yaml_parser_decrease_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1189
#define TAG_TOKEN_INIT(token, token_handle, token_suffix, start_mark, end_mark)
Definition: yaml_private.h:503
#define IS_BLANK_AT(string, offset)
Definition: yaml_private.h:309
yaml_error_type_t error
Error type.
Definition: yaml.h:1089
memcpy(buf+1, str, len)
#define IS_DIGIT(string)
Definition: yaml_private.h:197
static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_char_t **handle)
Definition: scanner.c:2500
#define STREAM_START_TOKEN_INIT(token, token_encoding, start_mark, end_mark)
Definition: yaml_private.h:488
static int yaml_parser_fetch_value(yaml_parser_t *parser)
Definition: scanner.c:1682
volatile VALUE value
Definition: tcltklib.c:9442
#define CHECK_AT(string, octet, offset)
Definition: yaml_private.h:163
#define const
Definition: strftime.c:102
size_t token_number
The number of the token.
Definition: yaml.h:1001
static int yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
Definition: scanner.c:1533
static int yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:3380
size_t length
Definition: tcltklib.c:4559
static int yaml_parser_increase_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1168
int type
Definition: tcltklib.c:111
A STREAM-END token.
Definition: yaml.h:220
enum yaml_token_type_e yaml_token_type_t
Token types.
struct parser_params * parser
Definition: ripper.c:4578
yaml_token_t * head
The head of the tokens queue.
Definition: yaml.h:1198
#define ENQUEUE(context, queue, value)
Definition: yaml_private.h:454
An ANCHOR token.
Definition: yaml.h:259
#define JOIN(context, string_a, string_b)
Definition: yaml_private.h:146
#define ANCHOR_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:499
#define SCALAR_TOKEN_INIT(token, token_value, token_length, token_style, start_mark, end_mark)
Definition: yaml_private.h:508
yaml_mark_t context_mark
The context position.
Definition: yaml.h:1101
static int yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1764
struct yaml_parser_s::@42 simple_keys
The stack of simple keys.
A FLOW-MAPPING-END token.
Definition: yaml.h:245
yaml_mark_t problem_mark
The problem position.
Definition: yaml.h:1097
size_t tokens_parsed
The number of tokens fetched from the queue.
Definition: yaml.h:1204
static int yaml_parser_save_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1097
gz end
Definition: zlib.c:2270
#define STRING_EXTEND(context, string)
Definition: yaml_private.h:137
#define IS_BLANKZ_AT(string, offset)
Definition: yaml_private.h:359
A DOCUMENT-START token.
Definition: yaml.h:227
static int yaml_parser_scan_to_next_token(yaml_parser_t *parser)
Definition: scanner.c:1918
#define STRING_INIT(context, string, size)
Definition: yaml_private.h:124
#define STREAM_END_TOKEN_INIT(token, start_mark, end_mark)
Definition: yaml_private.h:492
if(RB_TYPE_P(r, T_FLOAT))
Definition: bigdecimal.c:1186
static int yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
Definition: scanner.c:2567
This structure holds information about a potential simple key.
Definition: yaml.h:993
#define MAX_NUMBER_LENGTH
Definition: scanner.c:2195
The plain scalar style.
Definition: yaml.h:169
The literal scalar style.
Definition: yaml.h:177
static int yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type)
Definition: scanner.c:2319
size_t line
The position line.
Definition: yaml.h:150
#define POP(context, stack)
Definition: yaml_private.h:436
A DOCUMENT-END token.
Definition: yaml.h:229
#define IS_Z(string)
Definition: yaml_private.h:276
static VALUE mark(VALUE self)
Definition: psych_parser.c:523
static int yaml_parser_stale_simple_keys(yaml_parser_t *parser)
Definition: scanner.c:1056
Cannot scan the input stream.
Definition: yaml.h:132
static int yaml_parser_fetch_key(yaml_parser_t *parser)
Definition: scanner.c:1629
size_t index
The position index.
Definition: yaml.h:147
static int yaml_parser_scan_version_directive_number(yaml_parser_t *parser, yaml_mark_t start_mark, int *number)
Definition: scanner.c:2208
#define IS_BLANKZ(string)
Definition: yaml_private.h:362
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
Scan the input stream and produce the next token.
Definition: scanner.c:742
#define assert(condition)
Definition: ossl.h:45
yaml_malloc(size_t size)
Definition: api.c:31
const char * problem
Error description.
Definition: yaml.h:1091
int possible
Is a simple key possible?
Definition: yaml.h:995
A BLOCK-ENTRY token.
Definition: yaml.h:248
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
Definition: scanner.c:800
int major
Definition: tcltklib.c:109
static int yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name)
Definition: scanner.c:2110
#define VERSION_DIRECTIVE_TOKEN_INIT(token, token_major, token_minor, start_mark, end_mark)
Definition: yaml_private.h:514
#define IS_BREAK(string)
Definition: yaml_private.h:330
size_t column
The position column.
Definition: yaml.h:153
#define NULL
Definition: _sdbm.c:103
const char * name
Definition: nkf.c:208
A KEY token.
Definition: yaml.h:252
int flow_level
The number of unclosed &#39;[&#39; and &#39;{&#39; indicators.
Definition: yaml.h:1189
#define SKIP_LINE(parser)
Definition: scanner.c:500
The token structure.
Definition: yaml.h:267
#define CLEAR(context, string)
Definition: yaml_private.h:142
static int yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
Definition: scanner.c:1887
The single-quoted scalar style.
Definition: yaml.h:172