Ruby  2.0.0p594(2014-10-27revision48167)
scanner.c
Go to the documentation of this file.
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward. The issues are "block collection start" and
23  * "simple keys". Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented. We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  * STREAM-START(encoding) # The stream start.
31  * STREAM-END # The stream end.
32  * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33  * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34  * DOCUMENT-START # '---'
35  * DOCUMENT-END # '...'
36  * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37  * BLOCK-MAPPING-START # sequence or a block mapping.
38  * BLOCK-END # Indentation decrease.
39  * FLOW-SEQUENCE-START # '['
40  * FLOW-SEQUENCE-END # ']'
41  * BLOCK-SEQUENCE-START # '{'
42  * BLOCK-SEQUENCE-END # '}'
43  * BLOCK-ENTRY # '-'
44  * FLOW-ENTRY # ','
45  * KEY # '?' or nothing (simple keys).
46  * VALUE # ':'
47  * ALIAS(anchor) # '*anchor'
48  * ANCHOR(anchor) # '&anchor'
49  * TAG(handle,suffix) # '!handle!suffix'
50  * SCALAR(value,style) # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  * STREAM-START(encoding)
56  * STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  * VERSION-DIRECTIVE(major,minor)
64  * TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  * %YAML 1.1
69  * %TAG ! !foo
70  * %TAG !yaml! tag:yaml.org,2002:
71  * ---
72  *
73  * The correspoding sequence of tokens:
74  *
75  * STREAM-START(utf-8)
76  * VERSION-DIRECTIVE(1,1)
77  * TAG-DIRECTIVE("!","!foo")
78  * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  * DOCUMENT-START
80  * STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  * DOCUMENT-START
88  * DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  * 1. An implicit document:
98  *
99  * 'a scalar'
100  *
101  * Tokens:
102  *
103  * STREAM-START(utf-8)
104  * SCALAR("a scalar",single-quoted)
105  * STREAM-END
106  *
107  * 2. An explicit document:
108  *
109  * ---
110  * 'a scalar'
111  * ...
112  *
113  * Tokens:
114  *
115  * STREAM-START(utf-8)
116  * DOCUMENT-START
117  * SCALAR("a scalar",single-quoted)
118  * DOCUMENT-END
119  * STREAM-END
120  *
121  * 3. Several documents in a stream:
122  *
123  * 'a scalar'
124  * ---
125  * 'another scalar'
126  * ---
127  * 'yet another scalar'
128  *
129  * Tokens:
130  *
131  * STREAM-START(utf-8)
132  * SCALAR("a scalar",single-quoted)
133  * DOCUMENT-START
134  * SCALAR("another scalar",single-quoted)
135  * DOCUMENT-START
136  * SCALAR("yet another scalar",single-quoted)
137  * STREAM-END
138  *
139  * We have already introduced the SCALAR token above. The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  * ALIAS(anchor)
143  * ANCHOR(anchor)
144  * TAG(handle,suffix)
145  * SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  * 1. A recursive sequence:
150  *
151  * &A [ *A ]
152  *
153  * Tokens:
154  *
155  * STREAM-START(utf-8)
156  * ANCHOR("A")
157  * FLOW-SEQUENCE-START
158  * ALIAS("A")
159  * FLOW-SEQUENCE-END
160  * STREAM-END
161  *
162  * 2. A tagged scalar:
163  *
164  * !!float "3.14" # A good approximation.
165  *
166  * Tokens:
167  *
168  * STREAM-START(utf-8)
169  * TAG("!!","float")
170  * SCALAR("3.14",double-quoted)
171  * STREAM-END
172  *
173  * 3. Various scalar styles:
174  *
175  * --- # Implicit empty plain scalars do not produce tokens.
176  * --- a plain scalar
177  * --- 'a single-quoted scalar'
178  * --- "a double-quoted scalar"
179  * --- |-
180  * a literal scalar
181  * --- >-
182  * a folded
183  * scalar
184  *
185  * Tokens:
186  *
187  * STREAM-START(utf-8)
188  * DOCUMENT-START
189  * DOCUMENT-START
190  * SCALAR("a plain scalar",plain)
191  * DOCUMENT-START
192  * SCALAR("a single-quoted scalar",single-quoted)
193  * DOCUMENT-START
194  * SCALAR("a double-quoted scalar",double-quoted)
195  * DOCUMENT-START
196  * SCALAR("a literal scalar",literal)
197  * DOCUMENT-START
198  * SCALAR("a folded scalar",folded)
199  * STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  * FLOW-SEQUENCE-START
205  * FLOW-SEQUENCE-END
206  * FLOW-MAPPING-START
207  * FLOW-MAPPING-END
208  * FLOW-ENTRY
209  * KEY
210  * VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  * 1. A flow sequence:
221  *
222  * [item 1, item 2, item 3]
223  *
224  * Tokens:
225  *
226  * STREAM-START(utf-8)
227  * FLOW-SEQUENCE-START
228  * SCALAR("item 1",plain)
229  * FLOW-ENTRY
230  * SCALAR("item 2",plain)
231  * FLOW-ENTRY
232  * SCALAR("item 3",plain)
233  * FLOW-SEQUENCE-END
234  * STREAM-END
235  *
236  * 2. A flow mapping:
237  *
238  * {
239  * a simple key: a value, # Note that the KEY token is produced.
240  * ? a complex key: another value,
241  * }
242  *
243  * Tokens:
244  *
245  * STREAM-START(utf-8)
246  * FLOW-MAPPING-START
247  * KEY
248  * SCALAR("a simple key",plain)
249  * VALUE
250  * SCALAR("a value",plain)
251  * FLOW-ENTRY
252  * KEY
253  * SCALAR("a complex key",plain)
254  * VALUE
255  * SCALAR("another value",plain)
256  * FLOW-ENTRY
257  * FLOW-MAPPING-END
258  * STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator. Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  * BLOCK-SEQUENCE-START
267  * BLOCK-MAPPING-START
268  * BLOCK-END
269  * BLOCK-ENTRY
270  * KEY
271  * VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  * 1. Block sequences:
286  *
287  * - item 1
288  * - item 2
289  * -
290  * - item 3.1
291  * - item 3.2
292  * -
293  * key 1: value 1
294  * key 2: value 2
295  *
296  * Tokens:
297  *
298  * STREAM-START(utf-8)
299  * BLOCK-SEQUENCE-START
300  * BLOCK-ENTRY
301  * SCALAR("item 1",plain)
302  * BLOCK-ENTRY
303  * SCALAR("item 2",plain)
304  * BLOCK-ENTRY
305  * BLOCK-SEQUENCE-START
306  * BLOCK-ENTRY
307  * SCALAR("item 3.1",plain)
308  * BLOCK-ENTRY
309  * SCALAR("item 3.2",plain)
310  * BLOCK-END
311  * BLOCK-ENTRY
312  * BLOCK-MAPPING-START
313  * KEY
314  * SCALAR("key 1",plain)
315  * VALUE
316  * SCALAR("value 1",plain)
317  * KEY
318  * SCALAR("key 2",plain)
319  * VALUE
320  * SCALAR("value 2",plain)
321  * BLOCK-END
322  * BLOCK-END
323  * STREAM-END
324  *
325  * 2. Block mappings:
326  *
327  * a simple key: a value # The KEY token is produced here.
328  * ? a complex key
329  * : another value
330  * a mapping:
331  * key 1: value 1
332  * key 2: value 2
333  * a sequence:
334  * - item 1
335  * - item 2
336  *
337  * Tokens:
338  *
339  * STREAM-START(utf-8)
340  * BLOCK-MAPPING-START
341  * KEY
342  * SCALAR("a simple key",plain)
343  * VALUE
344  * SCALAR("a value",plain)
345  * KEY
346  * SCALAR("a complex key",plain)
347  * VALUE
348  * SCALAR("another value",plain)
349  * KEY
350  * SCALAR("a mapping",plain)
351  * BLOCK-MAPPING-START
352  * KEY
353  * SCALAR("key 1",plain)
354  * VALUE
355  * SCALAR("value 1",plain)
356  * KEY
357  * SCALAR("key 2",plain)
358  * VALUE
359  * SCALAR("value 2",plain)
360  * BLOCK-END
361  * KEY
362  * SCALAR("a sequence",plain)
363  * VALUE
364  * BLOCK-SEQUENCE-START
365  * BLOCK-ENTRY
366  * SCALAR("item 1",plain)
367  * BLOCK-ENTRY
368  * SCALAR("item 2",plain)
369  * BLOCK-END
370  * BLOCK-END
371  * STREAM-END
372  *
373  * YAML does not always require to start a new block collection from a new
374  * line. If the current line contains only '-', '?', and ':' indicators, a new
375  * block collection may start at the current line. The following examples
376  * illustrate this case:
377  *
378  * 1. Collections in a sequence:
379  *
380  * - - item 1
381  * - item 2
382  * - key 1: value 1
383  * key 2: value 2
384  * - ? complex key
385  * : complex value
386  *
387  * Tokens:
388  *
389  * STREAM-START(utf-8)
390  * BLOCK-SEQUENCE-START
391  * BLOCK-ENTRY
392  * BLOCK-SEQUENCE-START
393  * BLOCK-ENTRY
394  * SCALAR("item 1",plain)
395  * BLOCK-ENTRY
396  * SCALAR("item 2",plain)
397  * BLOCK-END
398  * BLOCK-ENTRY
399  * BLOCK-MAPPING-START
400  * KEY
401  * SCALAR("key 1",plain)
402  * VALUE
403  * SCALAR("value 1",plain)
404  * KEY
405  * SCALAR("key 2",plain)
406  * VALUE
407  * SCALAR("value 2",plain)
408  * BLOCK-END
409  * BLOCK-ENTRY
410  * BLOCK-MAPPING-START
411  * KEY
412  * SCALAR("complex key")
413  * VALUE
414  * SCALAR("complex value")
415  * BLOCK-END
416  * BLOCK-END
417  * STREAM-END
418  *
419  * 2. Collections in a mapping:
420  *
421  * ? a sequence
422  * : - item 1
423  * - item 2
424  * ? a mapping
425  * : key 1: value 1
426  * key 2: value 2
427  *
428  * Tokens:
429  *
430  * STREAM-START(utf-8)
431  * BLOCK-MAPPING-START
432  * KEY
433  * SCALAR("a sequence",plain)
434  * VALUE
435  * BLOCK-SEQUENCE-START
436  * BLOCK-ENTRY
437  * SCALAR("item 1",plain)
438  * BLOCK-ENTRY
439  * SCALAR("item 2",plain)
440  * BLOCK-END
441  * KEY
442  * SCALAR("a mapping",plain)
443  * VALUE
444  * BLOCK-MAPPING-START
445  * KEY
446  * SCALAR("key 1",plain)
447  * VALUE
448  * SCALAR("value 1",plain)
449  * KEY
450  * SCALAR("key 2",plain)
451  * VALUE
452  * SCALAR("value 2",plain)
453  * BLOCK-END
454  * BLOCK-END
455  * STREAM-END
456  *
457  * YAML also permits non-indented sequences if they are included into a block
458  * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
459  *
460  * key:
461  * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
462  * - item 2
463  *
464  * Tokens:
465  *
466  * STREAM-START(utf-8)
467  * BLOCK-MAPPING-START
468  * KEY
469  * SCALAR("key",plain)
470  * VALUE
471  * BLOCK-ENTRY
472  * SCALAR("item 1",plain)
473  * BLOCK-ENTRY
474  * SCALAR("item 2",plain)
475  * BLOCK-END
476  */
477 
478 #include "yaml_private.h"
479 
480 /*
481  * Ensure that the buffer contains the required number of characters.
482  * Return 1 on success, 0 on failure (reader error or memory error).
483  */
484 
485 #define CACHE(parser,length) \
486  (parser->unread >= (length) \
487  ? 1 \
488  : yaml_parser_update_buffer(parser, (length)))
489 
490 /*
491  * Advance the buffer pointer.
492  */
493 
494 #define SKIP(parser) \
495  (parser->mark.index ++, \
496  parser->mark.column ++, \
497  parser->unread --, \
498  parser->buffer.pointer += WIDTH(parser->buffer))
499 
500 #define SKIP_LINE(parser) \
501  (IS_CRLF(parser->buffer) ? \
502  (parser->mark.index += 2, \
503  parser->mark.column = 0, \
504  parser->mark.line ++, \
505  parser->unread -= 2, \
506  parser->buffer.pointer += 2) : \
507  IS_BREAK(parser->buffer) ? \
508  (parser->mark.index ++, \
509  parser->mark.column = 0, \
510  parser->mark.line ++, \
511  parser->unread --, \
512  parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
513 
514 /*
515  * Copy a character to a string buffer and advance pointers.
516  */
517 
518 #define READ(parser,string) \
519  (STRING_EXTEND(parser,string) ? \
520  (COPY(string,parser->buffer), \
521  parser->mark.index ++, \
522  parser->mark.column ++, \
523  parser->unread --, \
524  1) : 0)
525 
526 /*
527  * Copy a line break character to a string buffer and advance pointers.
528  */
529 
530 #define READ_LINE(parser,string) \
531  (STRING_EXTEND(parser,string) ? \
532  (((CHECK_AT(parser->buffer,'\r',0) \
533  && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
534  (*((string).pointer++) = (yaml_char_t) '\n', \
535  parser->buffer.pointer += 2, \
536  parser->mark.index += 2, \
537  parser->mark.column = 0, \
538  parser->mark.line ++, \
539  parser->unread -= 2) : \
540  (CHECK_AT(parser->buffer,'\r',0) \
541  || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
542  (*((string).pointer++) = (yaml_char_t) '\n', \
543  parser->buffer.pointer ++, \
544  parser->mark.index ++, \
545  parser->mark.column = 0, \
546  parser->mark.line ++, \
547  parser->unread --) : \
548  (CHECK_AT(parser->buffer,'\xC2',0) \
549  && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
550  (*((string).pointer++) = (yaml_char_t) '\n', \
551  parser->buffer.pointer += 2, \
552  parser->mark.index ++, \
553  parser->mark.column = 0, \
554  parser->mark.line ++, \
555  parser->unread --) : \
556  (CHECK_AT(parser->buffer,'\xE2',0) && \
557  CHECK_AT(parser->buffer,'\x80',1) && \
558  (CHECK_AT(parser->buffer,'\xA8',2) || \
559  CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
560  (*((string).pointer++) = *(parser->buffer.pointer++), \
561  *((string).pointer++) = *(parser->buffer.pointer++), \
562  *((string).pointer++) = *(parser->buffer.pointer++), \
563  parser->mark.index ++, \
564  parser->mark.column = 0, \
565  parser->mark.line ++, \
566  parser->unread --) : 0), \
567  1) : 0)
568 
569 /*
570  * Public API declarations.
571  */
572 
573 YAML_DECLARE(int)
575 
576 /*
577  * Error handling.
578  */
579 
580 static int
581 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
582  yaml_mark_t context_mark, const char *problem);
583 
584 /*
585  * High-level token API.
586  */
587 
588 YAML_DECLARE(int)
590 
591 static int
593 
594 /*
595  * Potential simple keys.
596  */
597 
598 static int
600 
601 static int
603 
604 static int
606 
607 static int
609 
610 static int
612 
613 /*
614  * Indentation treatment.
615  */
616 
617 static int
618 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
619  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
620 
621 static int
622 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
623 
624 /*
625  * Token fetchers.
626  */
627 
628 static int
630 
631 static int
633 
634 static int
636 
637 static int
639  yaml_token_type_t type);
640 
641 static int
643  yaml_token_type_t type);
644 
645 static int
647  yaml_token_type_t type);
648 
649 static int
651 
652 static int
654 
655 static int
657 
658 static int
660 
661 static int
663 
664 static int
666 
667 static int
668 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
669 
670 static int
671 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
672 
673 static int
675 
676 /*
677  * Token scanners.
678  */
679 
680 static int
682 
683 static int
685 
686 static int
688  yaml_mark_t start_mark, yaml_char_t **name);
689 
690 static int
692  yaml_mark_t start_mark, int *major, int *minor);
693 
694 static int
696  yaml_mark_t start_mark, int *number);
697 
698 static int
700  yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
701 
702 static int
704  yaml_token_type_t type);
705 
706 static int
708 
709 static int
710 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
711  yaml_mark_t start_mark, yaml_char_t **handle);
712 
713 static int
714 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive,
715  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
716 
717 static int
718 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
719  yaml_mark_t start_mark, yaml_string_t *string);
720 
721 static int
723  int literal);
724 
725 static int
727  int *indent, yaml_string_t *breaks,
728  yaml_mark_t start_mark, yaml_mark_t *end_mark);
729 
730 static int
732  int single);
733 
734 static int
736 
737 /*
738  * Get the next token.
739  */
740 
741 YAML_DECLARE(int)
743 {
744  assert(parser); /* Non-NULL parser object is expected. */
745  assert(token); /* Non-NULL token object is expected. */
746 
747  /* Erase the token object. */
748 
749  memset(token, 0, sizeof(yaml_token_t));
750 
751  /* No tokens after STREAM-END or error. */
752 
753  if (parser->stream_end_produced || parser->error) {
754  return 1;
755  }
756 
757  /* Ensure that the tokens queue contains enough tokens. */
758 
759  if (!parser->token_available) {
760  if (!yaml_parser_fetch_more_tokens(parser))
761  return 0;
762  }
763 
764  /* Fetch the next token from the queue. */
765 
766  *token = DEQUEUE(parser, parser->tokens);
767  parser->token_available = 0;
768  parser->tokens_parsed ++;
769 
770  if (token->type == YAML_STREAM_END_TOKEN) {
771  parser->stream_end_produced = 1;
772  }
773 
774  return 1;
775 }
776 
777 /*
778  * Set the scanner error and return 0.
779  */
780 
781 static int
783  yaml_mark_t context_mark, const char *problem)
784 {
785  parser->error = YAML_SCANNER_ERROR;
786  parser->context = context;
787  parser->context_mark = context_mark;
788  parser->problem = problem;
789  parser->problem_mark = parser->mark;
790 
791  return 0;
792 }
793 
794 /*
795  * Ensure that the tokens queue contains at least one token which can be
796  * returned to the Parser.
797  */
798 
799 YAML_DECLARE(int)
801 {
802  int need_more_tokens;
803 
804  /* While we need more tokens to fetch, do it. */
805 
806  while (1)
807  {
808  /*
809  * Check if we really need to fetch more tokens.
810  */
811 
812  need_more_tokens = 0;
813 
814  if (parser->tokens.head == parser->tokens.tail)
815  {
816  /* Queue is empty. */
817 
818  need_more_tokens = 1;
819  }
820  else
821  {
822  yaml_simple_key_t *simple_key;
823 
824  /* Check if any potential simple key may occupy the head position. */
825 
826  if (!yaml_parser_stale_simple_keys(parser))
827  return 0;
828 
829  for (simple_key = parser->simple_keys.start;
830  simple_key != parser->simple_keys.top; simple_key++) {
831  if (simple_key->possible
832  && simple_key->token_number == parser->tokens_parsed) {
833  need_more_tokens = 1;
834  break;
835  }
836  }
837  }
838 
839  /* We are finished. */
840 
841  if (!need_more_tokens)
842  break;
843 
844  /* Fetch the next token. */
845 
846  if (!yaml_parser_fetch_next_token(parser))
847  return 0;
848  }
849 
850  parser->token_available = 1;
851 
852  return 1;
853 }
854 
855 /*
856  * The dispatcher for token fetchers.
857  */
858 
859 static int
861 {
862  /* Ensure that the buffer is initialized. */
863 
864  if (!CACHE(parser, 1))
865  return 0;
866 
867  /* Check if we just started scanning. Fetch STREAM-START then. */
868 
869  if (!parser->stream_start_produced)
870  return yaml_parser_fetch_stream_start(parser);
871 
872  /* Eat whitespaces and comments until we reach the next token. */
873 
874  if (!yaml_parser_scan_to_next_token(parser))
875  return 0;
876 
877  /* Remove obsolete potential simple keys. */
878 
879  if (!yaml_parser_stale_simple_keys(parser))
880  return 0;
881 
882  /* Check the indentation level against the current column. */
883 
884  if (!yaml_parser_unroll_indent(parser, parser->mark.column))
885  return 0;
886 
887  /*
888  * Ensure that the buffer contains at least 4 characters. 4 is the length
889  * of the longest indicators ('--- ' and '... ').
890  */
891 
892  if (!CACHE(parser, 4))
893  return 0;
894 
895  /* Is it the end of the stream? */
896 
897  if (IS_Z(parser->buffer))
898  return yaml_parser_fetch_stream_end(parser);
899 
900  /* Is it a directive? */
901 
902  if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
903  return yaml_parser_fetch_directive(parser);
904 
905  /* Is it the document start indicator? */
906 
907  if (parser->mark.column == 0
908  && CHECK_AT(parser->buffer, '-', 0)
909  && CHECK_AT(parser->buffer, '-', 1)
910  && CHECK_AT(parser->buffer, '-', 2)
911  && IS_BLANKZ_AT(parser->buffer, 3))
914 
915  /* Is it the document end indicator? */
916 
917  if (parser->mark.column == 0
918  && CHECK_AT(parser->buffer, '.', 0)
919  && CHECK_AT(parser->buffer, '.', 1)
920  && CHECK_AT(parser->buffer, '.', 2)
921  && IS_BLANKZ_AT(parser->buffer, 3))
924 
925  /* Is it the flow sequence start indicator? */
926 
927  if (CHECK(parser->buffer, '['))
930 
931  /* Is it the flow mapping start indicator? */
932 
933  if (CHECK(parser->buffer, '{'))
936 
937  /* Is it the flow sequence end indicator? */
938 
939  if (CHECK(parser->buffer, ']'))
942 
943  /* Is it the flow mapping end indicator? */
944 
945  if (CHECK(parser->buffer, '}'))
948 
949  /* Is it the flow entry indicator? */
950 
951  if (CHECK(parser->buffer, ','))
952  return yaml_parser_fetch_flow_entry(parser);
953 
954  /* Is it the block entry indicator? */
955 
956  if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
957  return yaml_parser_fetch_block_entry(parser);
958 
959  /* Is it the key indicator? */
960 
961  if (CHECK(parser->buffer, '?')
962  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
963  return yaml_parser_fetch_key(parser);
964 
965  /* Is it the value indicator? */
966 
967  if (CHECK(parser->buffer, ':')
968  && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
969  return yaml_parser_fetch_value(parser);
970 
971  /* Is it an alias? */
972 
973  if (CHECK(parser->buffer, '*'))
975 
976  /* Is it an anchor? */
977 
978  if (CHECK(parser->buffer, '&'))
980 
981  /* Is it a tag? */
982 
983  if (CHECK(parser->buffer, '!'))
984  return yaml_parser_fetch_tag(parser);
985 
986  /* Is it a literal scalar? */
987 
988  if (CHECK(parser->buffer, '|') && !parser->flow_level)
989  return yaml_parser_fetch_block_scalar(parser, 1);
990 
991  /* Is it a folded scalar? */
992 
993  if (CHECK(parser->buffer, '>') && !parser->flow_level)
994  return yaml_parser_fetch_block_scalar(parser, 0);
995 
996  /* Is it a single-quoted scalar? */
997 
998  if (CHECK(parser->buffer, '\''))
999  return yaml_parser_fetch_flow_scalar(parser, 1);
1000 
1001  /* Is it a double-quoted scalar? */
1002 
1003  if (CHECK(parser->buffer, '"'))
1004  return yaml_parser_fetch_flow_scalar(parser, 0);
1005 
1006  /*
1007  * Is it a plain scalar?
1008  *
1009  * A plain scalar may start with any non-blank characters except
1010  *
1011  * '-', '?', ':', ',', '[', ']', '{', '}',
1012  * '#', '&', '*', '!', '|', '>', '\'', '\"',
1013  * '%', '@', '`'.
1014  *
1015  * In the block context (and, for the '-' indicator, in the flow context
1016  * too), it may also start with the characters
1017  *
1018  * '-', '?', ':'
1019  *
1020  * if it is followed by a non-space character.
1021  *
1022  * The last rule is more restrictive than the specification requires.
1023  */
1024 
1025  if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1026  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1027  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1028  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1029  || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1030  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1031  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1032  || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1033  || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1034  || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1035  (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1036  (!parser->flow_level &&
1037  (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1038  && !IS_BLANKZ_AT(parser->buffer, 1)))
1039  return yaml_parser_fetch_plain_scalar(parser);
1040 
1041  /*
1042  * If we don't determine the token type so far, it is an error.
1043  */
1044 
1045  return yaml_parser_set_scanner_error(parser,
1046  "while scanning for the next token", parser->mark,
1047  "found character that cannot start any token");
1048 }
1049 
1050 /*
1051  * Check the list of potential simple keys and remove the positions that
1052  * cannot contain simple keys anymore.
1053  */
1054 
1055 static int
1057 {
1058  yaml_simple_key_t *simple_key;
1059 
1060  /* Check for a potential simple key for each flow level. */
1061 
1062  for (simple_key = parser->simple_keys.start;
1063  simple_key != parser->simple_keys.top; simple_key ++)
1064  {
1065  /*
1066  * The specification requires that a simple key
1067  *
1068  * - is limited to a single line,
1069  * - is shorter than 1024 characters.
1070  */
1071 
1072  if (simple_key->possible
1073  && (simple_key->mark.line < parser->mark.line
1074  || simple_key->mark.index+1024 < parser->mark.index)) {
1075 
1076  /* Check if the potential simple key to be removed is required. */
1077 
1078  if (simple_key->required) {
1079  return yaml_parser_set_scanner_error(parser,
1080  "while scanning a simple key", simple_key->mark,
1081  "could not find expected ':'");
1082  }
1083 
1084  simple_key->possible = 0;
1085  }
1086  }
1087 
1088  return 1;
1089 }
1090 
1091 /*
1092  * Check if a simple key may start at the current position and add it if
1093  * needed.
1094  */
1095 
1096 static int
1098 {
1099  /*
1100  * A simple key is required at the current position if the scanner is in
1101  * the block context and the current column coincides with the indentation
1102  * level.
1103  */
1104 
1105  int required = (!parser->flow_level
1106  && parser->indent == (ptrdiff_t)parser->mark.column);
1107 
1108  /*
1109  * A simple key is required only when it is the first token in the current
1110  * line. Therefore it is always allowed. But we add a check anyway.
1111  */
1112 
1113  assert(parser->simple_key_allowed || !required); /* Impossible. */
1114 
1115  /*
1116  * If the current position may start a simple key, save it.
1117  */
1118 
1119  if (parser->simple_key_allowed)
1120  {
1121  yaml_simple_key_t simple_key;
1122  simple_key.possible = 1;
1123  simple_key.required = required;
1124  simple_key.token_number =
1125  parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1126  simple_key.mark = parser->mark;
1127 
1128  if (!yaml_parser_remove_simple_key(parser)) return 0;
1129 
1130  *(parser->simple_keys.top-1) = simple_key;
1131  }
1132 
1133  return 1;
1134 }
1135 
1136 /*
1137  * Remove a potential simple key at the current flow level.
1138  */
1139 
1140 static int
1142 {
1143  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1144 
1145  if (simple_key->possible)
1146  {
1147  /* If the key is required, it is an error. */
1148 
1149  if (simple_key->required) {
1150  return yaml_parser_set_scanner_error(parser,
1151  "while scanning a simple key", simple_key->mark,
1152  "could not find expected ':'");
1153  }
1154  }
1155 
1156  /* Remove the key from the stack. */
1157 
1158  simple_key->possible = 0;
1159 
1160  return 1;
1161 }
1162 
1163 /*
1164  * Increase the flow level and resize the simple key list if needed.
1165  */
1166 
1167 static int
1169 {
1170  yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1171 
1172  /* Reset the simple key on the next level. */
1173 
1174  if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1175  return 0;
1176 
1177  /* Increase the flow level. */
1178 
1179  if (parser->flow_level == INT_MAX) {
1180  parser->error = YAML_MEMORY_ERROR;
1181  return 0;
1182  }
1183 
1184  parser->flow_level++;
1185 
1186  return 1;
1187 }
1188 
1189 /*
1190  * Decrease the flow level.
1191  */
1192 
1193 static int
1195 {
1196  if (parser->flow_level) {
1197  parser->flow_level --;
1198  (void)POP(parser, parser->simple_keys);
1199  }
1200 
1201  return 1;
1202 }
1203 
1204 /*
1205  * Push the current indentation level to the stack and set the new level
1206  * the current column is greater than the indentation level. In this case,
1207  * append or insert the specified token into the token queue.
1208  *
1209  */
1210 
1211 static int
1213  ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1214 {
1216 
1217  /* In the flow context, do nothing. */
1218 
1219  if (parser->flow_level)
1220  return 1;
1221 
1222  if (parser->indent < column)
1223  {
1224  /*
1225  * Push the current indentation level to the stack and set the new
1226  * indentation level.
1227  */
1228 
1229  if (!PUSH(parser, parser->indents, parser->indent))
1230  return 0;
1231 
1232 #if PTRDIFF_MAX > INT_MAX
1233  if (column > INT_MAX) {
1234  parser->error = YAML_MEMORY_ERROR;
1235  return 0;
1236  }
1237 #endif
1238 
1239  parser->indent = (int)column;
1240 
1241  /* Create a token and insert it into the queue. */
1242 
1243  TOKEN_INIT(token, type, mark, mark);
1244 
1245  if (number == -1) {
1246  if (!ENQUEUE(parser, parser->tokens, token))
1247  return 0;
1248  }
1249  else {
1250  if (!QUEUE_INSERT(parser,
1251  parser->tokens, number - parser->tokens_parsed, token))
1252  return 0;
1253  }
1254  }
1255 
1256  return 1;
1257 }
1258 
1259 /*
1260  * Pop indentation levels from the indents stack until the current level
1261  * becomes less or equal to the column. For each intendation level, append
1262  * the BLOCK-END token.
1263  */
1264 
1265 
1266 static int
1268 {
1270 
1271  /* In the flow context, do nothing. */
1272 
1273  if (parser->flow_level)
1274  return 1;
1275 
1276  /* Loop through the intendation levels in the stack. */
1277 
1278  while (parser->indent > column)
1279  {
1280  /* Create a token and append it to the queue. */
1281 
1282  TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1283 
1284  if (!ENQUEUE(parser, parser->tokens, token))
1285  return 0;
1286 
1287  /* Pop the indentation level. */
1288 
1289  parser->indent = POP(parser, parser->indents);
1290  }
1291 
1292  return 1;
1293 }
1294 
1295 /*
1296  * Initialize the scanner and produce the STREAM-START token.
1297  */
1298 
1299 static int
1301 {
1302  yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1304 
1305  /* Set the initial indentation. */
1306 
1307  parser->indent = -1;
1308 
1309  /* Initialize the simple key stack. */
1310 
1311  if (!PUSH(parser, parser->simple_keys, simple_key))
1312  return 0;
1313 
1314  /* A simple key is allowed at the beginning of the stream. */
1315 
1316  parser->simple_key_allowed = 1;
1317 
1318  /* We have started. */
1319 
1320  parser->stream_start_produced = 1;
1321 
1322  /* Create the STREAM-START token and append it to the queue. */
1323 
1324  STREAM_START_TOKEN_INIT(token, parser->encoding,
1325  parser->mark, parser->mark);
1326 
1327  if (!ENQUEUE(parser, parser->tokens, token))
1328  return 0;
1329 
1330  return 1;
1331 }
1332 
1333 /*
1334  * Produce the STREAM-END token and shut down the scanner.
1335  */
1336 
1337 static int
1339 {
1341 
1342  /* Force new line. */
1343 
1344  if (parser->mark.column != 0) {
1345  parser->mark.column = 0;
1346  parser->mark.line ++;
1347  }
1348 
1349  /* Reset the indentation level. */
1350 
1351  if (!yaml_parser_unroll_indent(parser, -1))
1352  return 0;
1353 
1354  /* Reset simple keys. */
1355 
1356  if (!yaml_parser_remove_simple_key(parser))
1357  return 0;
1358 
1359  parser->simple_key_allowed = 0;
1360 
1361  /* Create the STREAM-END token and append it to the queue. */
1362 
1363  STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1364 
1365  if (!ENQUEUE(parser, parser->tokens, token))
1366  return 0;
1367 
1368  return 1;
1369 }
1370 
1371 /*
1372  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1373  */
1374 
1375 static int
1377 {
1379 
1380  /* Reset the indentation level. */
1381 
1382  if (!yaml_parser_unroll_indent(parser, -1))
1383  return 0;
1384 
1385  /* Reset simple keys. */
1386 
1387  if (!yaml_parser_remove_simple_key(parser))
1388  return 0;
1389 
1390  parser->simple_key_allowed = 0;
1391 
1392  /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1393 
1394  if (!yaml_parser_scan_directive(parser, &token))
1395  return 0;
1396 
1397  /* Append the token to the queue. */
1398 
1399  if (!ENQUEUE(parser, parser->tokens, token)) {
1400  yaml_token_delete(&token);
1401  return 0;
1402  }
1403 
1404  return 1;
1405 }
1406 
1407 /*
1408  * Produce the DOCUMENT-START or DOCUMENT-END token.
1409  */
1410 
1411 static int
1414 {
1415  yaml_mark_t start_mark, end_mark;
1417 
1418  /* Reset the indentation level. */
1419 
1420  if (!yaml_parser_unroll_indent(parser, -1))
1421  return 0;
1422 
1423  /* Reset simple keys. */
1424 
1425  if (!yaml_parser_remove_simple_key(parser))
1426  return 0;
1427 
1428  parser->simple_key_allowed = 0;
1429 
1430  /* Consume the token. */
1431 
1432  start_mark = parser->mark;
1433 
1434  SKIP(parser);
1435  SKIP(parser);
1436  SKIP(parser);
1437 
1438  end_mark = parser->mark;
1439 
1440  /* Create the DOCUMENT-START or DOCUMENT-END token. */
1441 
1442  TOKEN_INIT(token, type, start_mark, end_mark);
1443 
1444  /* Append the token to the queue. */
1445 
1446  if (!ENQUEUE(parser, parser->tokens, token))
1447  return 0;
1448 
1449  return 1;
1450 }
1451 
1452 /*
1453  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1454  */
1455 
1456 static int
1459 {
1460  yaml_mark_t start_mark, end_mark;
1462 
1463  /* The indicators '[' and '{' may start a simple key. */
1464 
1465  if (!yaml_parser_save_simple_key(parser))
1466  return 0;
1467 
1468  /* Increase the flow level. */
1469 
1470  if (!yaml_parser_increase_flow_level(parser))
1471  return 0;
1472 
1473  /* A simple key may follow the indicators '[' and '{'. */
1474 
1475  parser->simple_key_allowed = 1;
1476 
1477  /* Consume the token. */
1478 
1479  start_mark = parser->mark;
1480  SKIP(parser);
1481  end_mark = parser->mark;
1482 
1483  /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1484 
1485  TOKEN_INIT(token, type, start_mark, end_mark);
1486 
1487  /* Append the token to the queue. */
1488 
1489  if (!ENQUEUE(parser, parser->tokens, token))
1490  return 0;
1491 
1492  return 1;
1493 }
1494 
1495 /*
1496  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1497  */
1498 
1499 static int
1502 {
1503  yaml_mark_t start_mark, end_mark;
1505 
1506  /* Reset any potential simple key on the current flow level. */
1507 
1508  if (!yaml_parser_remove_simple_key(parser))
1509  return 0;
1510 
1511  /* Decrease the flow level. */
1512 
1513  if (!yaml_parser_decrease_flow_level(parser))
1514  return 0;
1515 
1516  /* No simple keys after the indicators ']' and '}'. */
1517 
1518  parser->simple_key_allowed = 0;
1519 
1520  /* Consume the token. */
1521 
1522  start_mark = parser->mark;
1523  SKIP(parser);
1524  end_mark = parser->mark;
1525 
1526  /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1527 
1528  TOKEN_INIT(token, type, start_mark, end_mark);
1529 
1530  /* Append the token to the queue. */
1531 
1532  if (!ENQUEUE(parser, parser->tokens, token))
1533  return 0;
1534 
1535  return 1;
1536 }
1537 
1538 /*
1539  * Produce the FLOW-ENTRY token.
1540  */
1541 
1542 static int
1544 {
1545  yaml_mark_t start_mark, end_mark;
1547 
1548  /* Reset any potential simple keys on the current flow level. */
1549 
1550  if (!yaml_parser_remove_simple_key(parser))
1551  return 0;
1552 
1553  /* Simple keys are allowed after ','. */
1554 
1555  parser->simple_key_allowed = 1;
1556 
1557  /* Consume the token. */
1558 
1559  start_mark = parser->mark;
1560  SKIP(parser);
1561  end_mark = parser->mark;
1562 
1563  /* Create the FLOW-ENTRY token and append it to the queue. */
1564 
1565  TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1566 
1567  if (!ENQUEUE(parser, parser->tokens, token))
1568  return 0;
1569 
1570  return 1;
1571 }
1572 
1573 /*
1574  * Produce the BLOCK-ENTRY token.
1575  */
1576 
1577 static int
1579 {
1580  yaml_mark_t start_mark, end_mark;
1582 
1583  /* Check if the scanner is in the block context. */
1584 
1585  if (!parser->flow_level)
1586  {
1587  /* Check if we are allowed to start a new entry. */
1588 
1589  if (!parser->simple_key_allowed) {
1590  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1591  "block sequence entries are not allowed in this context");
1592  }
1593 
1594  /* Add the BLOCK-SEQUENCE-START token if needed. */
1595 
1596  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1598  return 0;
1599  }
1600  else
1601  {
1602  /*
1603  * It is an error for the '-' indicator to occur in the flow context,
1604  * but we let the Parser detect and report about it because the Parser
1605  * is able to point to the context.
1606  */
1607  }
1608 
1609  /* Reset any potential simple keys on the current flow level. */
1610 
1611  if (!yaml_parser_remove_simple_key(parser))
1612  return 0;
1613 
1614  /* Simple keys are allowed after '-'. */
1615 
1616  parser->simple_key_allowed = 1;
1617 
1618  /* Consume the token. */
1619 
1620  start_mark = parser->mark;
1621  SKIP(parser);
1622  end_mark = parser->mark;
1623 
1624  /* Create the BLOCK-ENTRY token and append it to the queue. */
1625 
1626  TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1627 
1628  if (!ENQUEUE(parser, parser->tokens, token))
1629  return 0;
1630 
1631  return 1;
1632 }
1633 
1634 /*
1635  * Produce the KEY token.
1636  */
1637 
1638 static int
1640 {
1641  yaml_mark_t start_mark, end_mark;
1643 
1644  /* In the block context, additional checks are required. */
1645 
1646  if (!parser->flow_level)
1647  {
1648  /* Check if we are allowed to start a new key (not nessesary simple). */
1649 
1650  if (!parser->simple_key_allowed) {
1651  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1652  "mapping keys are not allowed in this context");
1653  }
1654 
1655  /* Add the BLOCK-MAPPING-START token if needed. */
1656 
1657  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1659  return 0;
1660  }
1661 
1662  /* Reset any potential simple keys on the current flow level. */
1663 
1664  if (!yaml_parser_remove_simple_key(parser))
1665  return 0;
1666 
1667  /* Simple keys are allowed after '?' in the block context. */
1668 
1669  parser->simple_key_allowed = (!parser->flow_level);
1670 
1671  /* Consume the token. */
1672 
1673  start_mark = parser->mark;
1674  SKIP(parser);
1675  end_mark = parser->mark;
1676 
1677  /* Create the KEY token and append it to the queue. */
1678 
1679  TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1680 
1681  if (!ENQUEUE(parser, parser->tokens, token))
1682  return 0;
1683 
1684  return 1;
1685 }
1686 
1687 /*
1688  * Produce the VALUE token.
1689  */
1690 
1691 static int
1693 {
1694  yaml_mark_t start_mark, end_mark;
1696  yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1697 
1698  /* Have we found a simple key? */
1699 
1700  if (simple_key->possible)
1701  {
1702 
1703  /* Create the KEY token and insert it into the queue. */
1704 
1705  TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1706 
1707  if (!QUEUE_INSERT(parser, parser->tokens,
1708  simple_key->token_number - parser->tokens_parsed, token))
1709  return 0;
1710 
1711  /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1712 
1713  if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1714  simple_key->token_number,
1715  YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1716  return 0;
1717 
1718  /* Remove the simple key. */
1719 
1720  simple_key->possible = 0;
1721 
1722  /* A simple key cannot follow another simple key. */
1723 
1724  parser->simple_key_allowed = 0;
1725  }
1726  else
1727  {
1728  /* The ':' indicator follows a complex key. */
1729 
1730  /* In the block context, extra checks are required. */
1731 
1732  if (!parser->flow_level)
1733  {
1734  /* Check if we are allowed to start a complex value. */
1735 
1736  if (!parser->simple_key_allowed) {
1737  return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1738  "mapping values are not allowed in this context");
1739  }
1740 
1741  /* Add the BLOCK-MAPPING-START token if needed. */
1742 
1743  if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1745  return 0;
1746  }
1747 
1748  /* Simple keys after ':' are allowed in the block context. */
1749 
1750  parser->simple_key_allowed = (!parser->flow_level);
1751  }
1752 
1753  /* Consume the token. */
1754 
1755  start_mark = parser->mark;
1756  SKIP(parser);
1757  end_mark = parser->mark;
1758 
1759  /* Create the VALUE token and append it to the queue. */
1760 
1761  TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1762 
1763  if (!ENQUEUE(parser, parser->tokens, token))
1764  return 0;
1765 
1766  return 1;
1767 }
1768 
1769 /*
1770  * Produce the ALIAS or ANCHOR token.
1771  */
1772 
1773 static int
1775 {
1777 
1778  /* An anchor or an alias could be a simple key. */
1779 
1780  if (!yaml_parser_save_simple_key(parser))
1781  return 0;
1782 
1783  /* A simple key cannot follow an anchor or an alias. */
1784 
1785  parser->simple_key_allowed = 0;
1786 
1787  /* Create the ALIAS or ANCHOR token and append it to the queue. */
1788 
1789  if (!yaml_parser_scan_anchor(parser, &token, type))
1790  return 0;
1791 
1792  if (!ENQUEUE(parser, parser->tokens, token)) {
1793  yaml_token_delete(&token);
1794  return 0;
1795  }
1796  return 1;
1797 }
1798 
1799 /*
1800  * Produce the TAG token.
1801  */
1802 
1803 static int
1805 {
1807 
1808  /* A tag could be a simple key. */
1809 
1810  if (!yaml_parser_save_simple_key(parser))
1811  return 0;
1812 
1813  /* A simple key cannot follow a tag. */
1814 
1815  parser->simple_key_allowed = 0;
1816 
1817  /* Create the TAG token and append it to the queue. */
1818 
1819  if (!yaml_parser_scan_tag(parser, &token))
1820  return 0;
1821 
1822  if (!ENQUEUE(parser, parser->tokens, token)) {
1823  yaml_token_delete(&token);
1824  return 0;
1825  }
1826 
1827  return 1;
1828 }
1829 
1830 /*
1831  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1832  */
1833 
1834 static int
1836 {
1838 
1839  /* Remove any potential simple keys. */
1840 
1841  if (!yaml_parser_remove_simple_key(parser))
1842  return 0;
1843 
1844  /* A simple key may follow a block scalar. */
1845 
1846  parser->simple_key_allowed = 1;
1847 
1848  /* Create the SCALAR token and append it to the queue. */
1849 
1850  if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1851  return 0;
1852 
1853  if (!ENQUEUE(parser, parser->tokens, token)) {
1854  yaml_token_delete(&token);
1855  return 0;
1856  }
1857 
1858  return 1;
1859 }
1860 
1861 /*
1862  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1863  */
1864 
1865 static int
1867 {
1869 
1870  /* A plain scalar could be a simple key. */
1871 
1872  if (!yaml_parser_save_simple_key(parser))
1873  return 0;
1874 
1875  /* A simple key cannot follow a flow scalar. */
1876 
1877  parser->simple_key_allowed = 0;
1878 
1879  /* Create the SCALAR token and append it to the queue. */
1880 
1881  if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1882  return 0;
1883 
1884  if (!ENQUEUE(parser, parser->tokens, token)) {
1885  yaml_token_delete(&token);
1886  return 0;
1887  }
1888 
1889  return 1;
1890 }
1891 
1892 /*
1893  * Produce the SCALAR(...,plain) token.
1894  */
1895 
1896 static int
1898 {
1900 
1901  /* A plain scalar could be a simple key. */
1902 
1903  if (!yaml_parser_save_simple_key(parser))
1904  return 0;
1905 
1906  /* A simple key cannot follow a flow scalar. */
1907 
1908  parser->simple_key_allowed = 0;
1909 
1910  /* Create the SCALAR token and append it to the queue. */
1911 
1912  if (!yaml_parser_scan_plain_scalar(parser, &token))
1913  return 0;
1914 
1915  if (!ENQUEUE(parser, parser->tokens, token)) {
1916  yaml_token_delete(&token);
1917  return 0;
1918  }
1919 
1920  return 1;
1921 }
1922 
1923 /*
1924  * Eat whitespaces and comments until the next token is found.
1925  */
1926 
1927 static int
1929 {
1930  /* Until the next token is not found. */
1931 
1932  while (1)
1933  {
1934  /* Allow the BOM mark to start a line. */
1935 
1936  if (!CACHE(parser, 1)) return 0;
1937 
1938  if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1939  SKIP(parser);
1940 
1941  /*
1942  * Eat whitespaces.
1943  *
1944  * Tabs are allowed:
1945  *
1946  * - in the flow context;
1947  * - in the block context, but not at the beginning of the line or
1948  * after '-', '?', or ':' (complex value).
1949  */
1950 
1951  if (!CACHE(parser, 1)) return 0;
1952 
1953  while (CHECK(parser->buffer,' ') ||
1954  ((parser->flow_level || !parser->simple_key_allowed) &&
1955  CHECK(parser->buffer, '\t'))) {
1956  SKIP(parser);
1957  if (!CACHE(parser, 1)) return 0;
1958  }
1959 
1960  /* Eat a comment until a line break. */
1961 
1962  if (CHECK(parser->buffer, '#')) {
1963  while (!IS_BREAKZ(parser->buffer)) {
1964  SKIP(parser);
1965  if (!CACHE(parser, 1)) return 0;
1966  }
1967  }
1968 
1969  /* If it is a line break, eat it. */
1970 
1971  if (IS_BREAK(parser->buffer))
1972  {
1973  if (!CACHE(parser, 2)) return 0;
1974  SKIP_LINE(parser);
1975 
1976  /* In the block context, a new line may start a simple key. */
1977 
1978  if (!parser->flow_level) {
1979  parser->simple_key_allowed = 1;
1980  }
1981  }
1982  else
1983  {
1984  /* We have found a token. */
1985 
1986  break;
1987  }
1988  }
1989 
1990  return 1;
1991 }
1992 
1993 /*
1994  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1995  *
1996  * Scope:
1997  * %YAML 1.1 # a comment \n
1998  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1999  * %TAG !yaml! tag:yaml.org,2002: \n
2000  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2001  */
2002 
2003 int
2005 {
2006  yaml_mark_t start_mark, end_mark;
2007  yaml_char_t *name = NULL;
2008  int major, minor;
2009  yaml_char_t *handle = NULL, *prefix = NULL;
2010 
2011  /* Eat '%'. */
2012 
2013  start_mark = parser->mark;
2014 
2015  SKIP(parser);
2016 
2017  /* Scan the directive name. */
2018 
2019  if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2020  goto error;
2021 
2022  /* Is it a YAML directive? */
2023 
2024  if (strcmp((char *)name, "YAML") == 0)
2025  {
2026  /* Scan the VERSION directive value. */
2027 
2028  if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2029  &major, &minor))
2030  goto error;
2031 
2032  end_mark = parser->mark;
2033 
2034  /* Create a VERSION-DIRECTIVE token. */
2035 
2036  VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2037  start_mark, end_mark);
2038  }
2039 
2040  /* Is it a TAG directive? */
2041 
2042  else if (strcmp((char *)name, "TAG") == 0)
2043  {
2044  /* Scan the TAG directive value. */
2045 
2046  if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2047  &handle, &prefix))
2048  goto error;
2049 
2050  end_mark = parser->mark;
2051 
2052  /* Create a TAG-DIRECTIVE token. */
2053 
2054  TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2055  start_mark, end_mark);
2056  }
2057 
2058  /* Unknown directive. */
2059 
2060  else
2061  {
2062  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2063  start_mark, "found uknown directive name");
2064  goto error;
2065  }
2066 
2067  /* Eat the rest of the line including any comments. */
2068 
2069  if (!CACHE(parser, 1)) goto error;
2070 
2071  while (IS_BLANK(parser->buffer)) {
2072  SKIP(parser);
2073  if (!CACHE(parser, 1)) goto error;
2074  }
2075 
2076  if (CHECK(parser->buffer, '#')) {
2077  while (!IS_BREAKZ(parser->buffer)) {
2078  SKIP(parser);
2079  if (!CACHE(parser, 1)) goto error;
2080  }
2081  }
2082 
2083  /* Check if we are at the end of the line. */
2084 
2085  if (!IS_BREAKZ(parser->buffer)) {
2086  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2087  start_mark, "did not find expected comment or line break");
2088  goto error;
2089  }
2090 
2091  /* Eat a line break. */
2092 
2093  if (IS_BREAK(parser->buffer)) {
2094  if (!CACHE(parser, 2)) goto error;
2095  SKIP_LINE(parser);
2096  }
2097 
2098  yaml_free(name);
2099 
2100  return 1;
2101 
2102 error:
2103  yaml_free(prefix);
2104  yaml_free(handle);
2105  yaml_free(name);
2106  return 0;
2107 }
2108 
2109 /*
2110  * Scan the directive name.
2111  *
2112  * Scope:
2113  * %YAML 1.1 # a comment \n
2114  * ^^^^
2115  * %TAG !yaml! tag:yaml.org,2002: \n
2116  * ^^^
2117  */
2118 
2119 static int
2121  yaml_mark_t start_mark, yaml_char_t **name)
2122 {
2123  yaml_string_t string = NULL_STRING;
2124 
2125  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2126 
2127  /* Consume the directive name. */
2128 
2129  if (!CACHE(parser, 1)) goto error;
2130 
2131  while (IS_ALPHA(parser->buffer))
2132  {
2133  if (!READ(parser, string)) goto error;
2134  if (!CACHE(parser, 1)) goto error;
2135  }
2136 
2137  /* Check if the name is empty. */
2138 
2139  if (string.start == string.pointer) {
2140  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141  start_mark, "could not find expected directive name");
2142  goto error;
2143  }
2144 
2145  /* Check for an blank character after the name. */
2146 
2147  if (!IS_BLANKZ(parser->buffer)) {
2148  yaml_parser_set_scanner_error(parser, "while scanning a directive",
2149  start_mark, "found unexpected non-alphabetical character");
2150  goto error;
2151  }
2152 
2153  *name = string.start;
2154 
2155  return 1;
2156 
2157 error:
2158  STRING_DEL(parser, string);
2159  return 0;
2160 }
2161 
2162 /*
2163  * Scan the value of VERSION-DIRECTIVE.
2164  *
2165  * Scope:
2166  * %YAML 1.1 # a comment \n
2167  * ^^^^^^
2168  */
2169 
2170 static int
2172  yaml_mark_t start_mark, int *major, int *minor)
2173 {
2174  /* Eat whitespaces. */
2175 
2176  if (!CACHE(parser, 1)) return 0;
2177 
2178  while (IS_BLANK(parser->buffer)) {
2179  SKIP(parser);
2180  if (!CACHE(parser, 1)) return 0;
2181  }
2182 
2183  /* Consume the major version number. */
2184 
2185  if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2186  return 0;
2187 
2188  /* Eat '.'. */
2189 
2190  if (!CHECK(parser->buffer, '.')) {
2191  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2192  start_mark, "did not find expected digit or '.' character");
2193  }
2194 
2195  SKIP(parser);
2196 
2197  /* Consume the minor version number. */
2198 
2199  if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2200  return 0;
2201 
2202  return 1;
2203 }
2204 
2205 #define MAX_NUMBER_LENGTH 9
2206 
2207 /*
2208  * Scan the version number of VERSION-DIRECTIVE.
2209  *
2210  * Scope:
2211  * %YAML 1.1 # a comment \n
2212  * ^
2213  * %YAML 1.1 # a comment \n
2214  * ^
2215  */
2216 
2217 static int
2219  yaml_mark_t start_mark, int *number)
2220 {
2221  int value = 0;
2222  size_t length = 0;
2223 
2224  /* Repeat while the next character is digit. */
2225 
2226  if (!CACHE(parser, 1)) return 0;
2227 
2228  while (IS_DIGIT(parser->buffer))
2229  {
2230  /* Check if the number is too long. */
2231 
2232  if (++length > MAX_NUMBER_LENGTH) {
2233  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2234  start_mark, "found extremely long version number");
2235  }
2236 
2237  value = value*10 + AS_DIGIT(parser->buffer);
2238 
2239  SKIP(parser);
2240 
2241  if (!CACHE(parser, 1)) return 0;
2242  }
2243 
2244  /* Check if the number was present. */
2245 
2246  if (!length) {
2247  return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2248  start_mark, "did not find expected version number");
2249  }
2250 
2251  *number = value;
2252 
2253  return 1;
2254 }
2255 
2256 /*
2257  * Scan the value of a TAG-DIRECTIVE token.
2258  *
2259  * Scope:
2260  * %TAG !yaml! tag:yaml.org,2002: \n
2261  * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2262  */
2263 
2264 static int
2266  yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2267 {
2268  yaml_char_t *handle_value = NULL;
2269  yaml_char_t *prefix_value = NULL;
2270 
2271  /* Eat whitespaces. */
2272 
2273  if (!CACHE(parser, 1)) goto error;
2274 
2275  while (IS_BLANK(parser->buffer)) {
2276  SKIP(parser);
2277  if (!CACHE(parser, 1)) goto error;
2278  }
2279 
2280  /* Scan a handle. */
2281 
2282  if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2283  goto error;
2284 
2285  /* Expect a whitespace. */
2286 
2287  if (!CACHE(parser, 1)) goto error;
2288 
2289  if (!IS_BLANK(parser->buffer)) {
2290  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2291  start_mark, "did not find expected whitespace");
2292  goto error;
2293  }
2294 
2295  /* Eat whitespaces. */
2296 
2297  while (IS_BLANK(parser->buffer)) {
2298  SKIP(parser);
2299  if (!CACHE(parser, 1)) goto error;
2300  }
2301 
2302  /* Scan a prefix. */
2303 
2304  if (!yaml_parser_scan_tag_uri(parser, 1, NULL, start_mark, &prefix_value))
2305  goto error;
2306 
2307  /* Expect a whitespace or line break. */
2308 
2309  if (!CACHE(parser, 1)) goto error;
2310 
2311  if (!IS_BLANKZ(parser->buffer)) {
2312  yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2313  start_mark, "did not find expected whitespace or line break");
2314  goto error;
2315  }
2316 
2317  *handle = handle_value;
2318  *prefix = prefix_value;
2319 
2320  return 1;
2321 
2322 error:
2323  yaml_free(handle_value);
2324  yaml_free(prefix_value);
2325  return 0;
2326 }
2327 
2328 static int
2331 {
2332  int length = 0;
2333  yaml_mark_t start_mark, end_mark;
2334  yaml_string_t string = NULL_STRING;
2335 
2336  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2337 
2338  /* Eat the indicator character. */
2339 
2340  start_mark = parser->mark;
2341 
2342  SKIP(parser);
2343 
2344  /* Consume the value. */
2345 
2346  if (!CACHE(parser, 1)) goto error;
2347 
2348  while (IS_ALPHA(parser->buffer)) {
2349  if (!READ(parser, string)) goto error;
2350  if (!CACHE(parser, 1)) goto error;
2351  length ++;
2352  }
2353 
2354  end_mark = parser->mark;
2355 
2356  /*
2357  * Check if length of the anchor is greater than 0 and it is followed by
2358  * a whitespace character or one of the indicators:
2359  *
2360  * '?', ':', ',', ']', '}', '%', '@', '`'.
2361  */
2362 
2363  if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2364  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2365  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2366  || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2367  || CHECK(parser->buffer, '`'))) {
2369  "while scanning an anchor" : "while scanning an alias", start_mark,
2370  "did not find expected alphabetic or numeric character");
2371  goto error;
2372  }
2373 
2374  /* Create a token. */
2375 
2376  if (type == YAML_ANCHOR_TOKEN) {
2377  ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2378  }
2379  else {
2380  ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2381  }
2382 
2383  return 1;
2384 
2385 error:
2386  STRING_DEL(parser, string);
2387  return 0;
2388 }
2389 
2390 /*
2391  * Scan a TAG token.
2392  */
2393 
2394 static int
2396 {
2397  yaml_char_t *handle = NULL;
2398  yaml_char_t *suffix = NULL;
2399  yaml_mark_t start_mark, end_mark;
2400 
2401  start_mark = parser->mark;
2402 
2403  /* Check if the tag is in the canonical form. */
2404 
2405  if (!CACHE(parser, 2)) goto error;
2406 
2407  if (CHECK_AT(parser->buffer, '<', 1))
2408  {
2409  /* Set the handle to '' */
2410 
2411  handle = yaml_malloc(1);
2412  if (!handle) goto error;
2413  handle[0] = '\0';
2414 
2415  /* Eat '!<' */
2416 
2417  SKIP(parser);
2418  SKIP(parser);
2419 
2420  /* Consume the tag value. */
2421 
2422  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2423  goto error;
2424 
2425  /* Check for '>' and eat it. */
2426 
2427  if (!CHECK(parser->buffer, '>')) {
2428  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2429  start_mark, "did not find the expected '>'");
2430  goto error;
2431  }
2432 
2433  SKIP(parser);
2434  }
2435  else
2436  {
2437  /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2438 
2439  /* First, try to scan a handle. */
2440 
2441  if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2442  goto error;
2443 
2444  /* Check if it is, indeed, handle. */
2445 
2446  if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2447  {
2448  /* Scan the suffix now. */
2449 
2450  if (!yaml_parser_scan_tag_uri(parser, 0, NULL, start_mark, &suffix))
2451  goto error;
2452  }
2453  else
2454  {
2455  /* It wasn't a handle after all. Scan the rest of the tag. */
2456 
2457  if (!yaml_parser_scan_tag_uri(parser, 0, handle, start_mark, &suffix))
2458  goto error;
2459 
2460  /* Set the handle to '!'. */
2461 
2462  yaml_free(handle);
2463  handle = yaml_malloc(2);
2464  if (!handle) goto error;
2465  handle[0] = '!';
2466  handle[1] = '\0';
2467 
2468  /*
2469  * A special case: the '!' tag. Set the handle to '' and the
2470  * suffix to '!'.
2471  */
2472 
2473  if (suffix[0] == '\0') {
2474  yaml_char_t *tmp = handle;
2475  handle = suffix;
2476  suffix = tmp;
2477  }
2478  }
2479  }
2480 
2481  /* Check the character which ends the tag. */
2482 
2483  if (!CACHE(parser, 1)) goto error;
2484 
2485  if (!IS_BLANKZ(parser->buffer)) {
2486  yaml_parser_set_scanner_error(parser, "while scanning a tag",
2487  start_mark, "did not find expected whitespace or line break");
2488  goto error;
2489  }
2490 
2491  end_mark = parser->mark;
2492 
2493  /* Create a token. */
2494 
2495  TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2496 
2497  return 1;
2498 
2499 error:
2500  yaml_free(handle);
2501  yaml_free(suffix);
2502  return 0;
2503 }
2504 
2505 /*
2506  * Scan a tag handle.
2507  */
2508 
2509 static int
2511  yaml_mark_t start_mark, yaml_char_t **handle)
2512 {
2513  yaml_string_t string = NULL_STRING;
2514 
2515  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2516 
2517  /* Check the initial '!' character. */
2518 
2519  if (!CACHE(parser, 1)) goto error;
2520 
2521  if (!CHECK(parser->buffer, '!')) {
2522  yaml_parser_set_scanner_error(parser, directive ?
2523  "while scanning a tag directive" : "while scanning a tag",
2524  start_mark, "did not find expected '!'");
2525  goto error;
2526  }
2527 
2528  /* Copy the '!' character. */
2529 
2530  if (!READ(parser, string)) goto error;
2531 
2532  /* Copy all subsequent alphabetical and numerical characters. */
2533 
2534  if (!CACHE(parser, 1)) goto error;
2535 
2536  while (IS_ALPHA(parser->buffer))
2537  {
2538  if (!READ(parser, string)) goto error;
2539  if (!CACHE(parser, 1)) goto error;
2540  }
2541 
2542  /* Check if the trailing character is '!' and copy it. */
2543 
2544  if (CHECK(parser->buffer, '!'))
2545  {
2546  if (!READ(parser, string)) goto error;
2547  }
2548  else
2549  {
2550  /*
2551  * It's either the '!' tag or not really a tag handle. If it's a %TAG
2552  * directive, it's an error. If it's a tag token, it must be a part of
2553  * URI.
2554  */
2555 
2556  if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2557  yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2558  start_mark, "did not find expected '!'");
2559  goto error;
2560  }
2561  }
2562 
2563  *handle = string.start;
2564 
2565  return 1;
2566 
2567 error:
2568  STRING_DEL(parser, string);
2569  return 0;
2570 }
2571 
2572 /*
2573  * Scan a tag.
2574  */
2575 
2576 static int
2578  yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2579 {
2580  size_t length = head ? strlen((char *)head) : 0;
2581  yaml_string_t string = NULL_STRING;
2582 
2583  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2584 
2585  /* Resize the string to include the head. */
2586 
2587  while ((size_t)(string.end - string.start) <= length) {
2588  if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2589  parser->error = YAML_MEMORY_ERROR;
2590  goto error;
2591  }
2592  }
2593 
2594  /*
2595  * Copy the head if needed.
2596  *
2597  * Note that we don't copy the leading '!' character.
2598  */
2599 
2600  if (length > 1) {
2601  memcpy(string.start, head+1, length-1);
2602  string.pointer += length-1;
2603  }
2604 
2605  /* Scan the tag. */
2606 
2607  if (!CACHE(parser, 1)) goto error;
2608 
2609  /*
2610  * The set of characters that may appear in URI is as follows:
2611  *
2612  * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2613  * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']',
2614  * '%'.
2615  */
2616 
2617  while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2618  || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2619  || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2620  || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2621  || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2622  || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '.')
2623  || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2624  || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2625  || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2626  || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2627  || CHECK(parser->buffer, '%'))
2628  {
2629  /* Check if it is a URI-escape sequence. */
2630 
2631  if (CHECK(parser->buffer, '%')) {
2632  if (!STRING_EXTEND(parser, string))
2633  goto error;
2634 
2635  if (!yaml_parser_scan_uri_escapes(parser,
2636  directive, start_mark, &string)) goto error;
2637  }
2638  else {
2639  if (!READ(parser, string)) goto error;
2640  }
2641 
2642  length ++;
2643  if (!CACHE(parser, 1)) goto error;
2644  }
2645 
2646  /* Check if the tag is non-empty. */
2647 
2648  if (!length) {
2649  if (!STRING_EXTEND(parser, string))
2650  goto error;
2651 
2652  yaml_parser_set_scanner_error(parser, directive ?
2653  "while parsing a %TAG directive" : "while parsing a tag",
2654  start_mark, "did not find expected tag URI");
2655  goto error;
2656  }
2657 
2658  *uri = string.start;
2659 
2660  return 1;
2661 
2662 error:
2663  STRING_DEL(parser, string);
2664  return 0;
2665 }
2666 
2667 /*
2668  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669  */
2670 
2671 static int
2673  yaml_mark_t start_mark, yaml_string_t *string)
2674 {
2675  int width = 0;
2676 
2677  /* Decode the required number of characters. */
2678 
2679  do {
2680 
2681  unsigned char octet = 0;
2682 
2683  /* Check for a URI-escaped octet. */
2684 
2685  if (!CACHE(parser, 3)) return 0;
2686 
2687  if (!(CHECK(parser->buffer, '%')
2688  && IS_HEX_AT(parser->buffer, 1)
2689  && IS_HEX_AT(parser->buffer, 2))) {
2690  return yaml_parser_set_scanner_error(parser, directive ?
2691  "while parsing a %TAG directive" : "while parsing a tag",
2692  start_mark, "did not find URI escaped octet");
2693  }
2694 
2695  /* Get the octet. */
2696 
2697  octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2698 
2699  /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2700 
2701  if (!width)
2702  {
2703  width = (octet & 0x80) == 0x00 ? 1 :
2704  (octet & 0xE0) == 0xC0 ? 2 :
2705  (octet & 0xF0) == 0xE0 ? 3 :
2706  (octet & 0xF8) == 0xF0 ? 4 : 0;
2707  if (!width) {
2708  return yaml_parser_set_scanner_error(parser, directive ?
2709  "while parsing a %TAG directive" : "while parsing a tag",
2710  start_mark, "found an incorrect leading UTF-8 octet");
2711  }
2712  }
2713  else
2714  {
2715  /* Check if the trailing octet is correct. */
2716 
2717  if ((octet & 0xC0) != 0x80) {
2718  return yaml_parser_set_scanner_error(parser, directive ?
2719  "while parsing a %TAG directive" : "while parsing a tag",
2720  start_mark, "found an incorrect trailing UTF-8 octet");
2721  }
2722  }
2723 
2724  /* Copy the octet and move the pointers. */
2725 
2726  *(string->pointer++) = octet;
2727  SKIP(parser);
2728  SKIP(parser);
2729  SKIP(parser);
2730 
2731  } while (--width);
2732 
2733  return 1;
2734 }
2735 
2736 /*
2737  * Scan a block scalar.
2738  */
2739 
2740 static int
2742  int literal)
2743 {
2744  yaml_mark_t start_mark;
2745  yaml_mark_t end_mark;
2746  yaml_string_t string = NULL_STRING;
2747  yaml_string_t leading_break = NULL_STRING;
2748  yaml_string_t trailing_breaks = NULL_STRING;
2749  int chomping = 0;
2750  int increment = 0;
2751  int indent = 0;
2752  int leading_blank = 0;
2753  int trailing_blank = 0;
2754 
2755  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2758 
2759  /* Eat the indicator '|' or '>'. */
2760 
2761  start_mark = parser->mark;
2762 
2763  SKIP(parser);
2764 
2765  /* Scan the additional block scalar indicators. */
2766 
2767  if (!CACHE(parser, 1)) goto error;
2768 
2769  /* Check for a chomping indicator. */
2770 
2771  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772  {
2773  /* Set the chomping method and eat the indicator. */
2774 
2775  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2776 
2777  SKIP(parser);
2778 
2779  /* Check for an indentation indicator. */
2780 
2781  if (!CACHE(parser, 1)) goto error;
2782 
2783  if (IS_DIGIT(parser->buffer))
2784  {
2785  /* Check that the intendation is greater than 0. */
2786 
2787  if (CHECK(parser->buffer, '0')) {
2788  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789  start_mark, "found an intendation indicator equal to 0");
2790  goto error;
2791  }
2792 
2793  /* Get the intendation level and eat the indicator. */
2794 
2795  increment = AS_DIGIT(parser->buffer);
2796 
2797  SKIP(parser);
2798  }
2799  }
2800 
2801  /* Do the same as above, but in the opposite order. */
2802 
2803  else if (IS_DIGIT(parser->buffer))
2804  {
2805  if (CHECK(parser->buffer, '0')) {
2806  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807  start_mark, "found an intendation indicator equal to 0");
2808  goto error;
2809  }
2810 
2811  increment = AS_DIGIT(parser->buffer);
2812 
2813  SKIP(parser);
2814 
2815  if (!CACHE(parser, 1)) goto error;
2816 
2817  if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818  chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2819 
2820  SKIP(parser);
2821  }
2822  }
2823 
2824  /* Eat whitespaces and comments to the end of the line. */
2825 
2826  if (!CACHE(parser, 1)) goto error;
2827 
2828  while (IS_BLANK(parser->buffer)) {
2829  SKIP(parser);
2830  if (!CACHE(parser, 1)) goto error;
2831  }
2832 
2833  if (CHECK(parser->buffer, '#')) {
2834  while (!IS_BREAKZ(parser->buffer)) {
2835  SKIP(parser);
2836  if (!CACHE(parser, 1)) goto error;
2837  }
2838  }
2839 
2840  /* Check if we are at the end of the line. */
2841 
2842  if (!IS_BREAKZ(parser->buffer)) {
2843  yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844  start_mark, "did not find expected comment or line break");
2845  goto error;
2846  }
2847 
2848  /* Eat a line break. */
2849 
2850  if (IS_BREAK(parser->buffer)) {
2851  if (!CACHE(parser, 2)) goto error;
2852  SKIP_LINE(parser);
2853  }
2854 
2855  end_mark = parser->mark;
2856 
2857  /* Set the intendation level if it was specified. */
2858 
2859  if (increment) {
2860  indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861  }
2862 
2863  /* Scan the leading line breaks and determine the indentation level if needed. */
2864 
2865  if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866  start_mark, &end_mark)) goto error;
2867 
2868  /* Scan the block scalar content. */
2869 
2870  if (!CACHE(parser, 1)) goto error;
2871 
2872  while ((int)parser->mark.column == indent && !IS_Z(parser->buffer))
2873  {
2874  /*
2875  * We are at the beginning of a non-empty line.
2876  */
2877 
2878  /* Is it a trailing whitespace? */
2879 
2880  trailing_blank = IS_BLANK(parser->buffer);
2881 
2882  /* Check if we need to fold the leading line break. */
2883 
2884  if (!literal && (*leading_break.start == '\n')
2885  && !leading_blank && !trailing_blank)
2886  {
2887  /* Do we need to join the lines by space? */
2888 
2889  if (*trailing_breaks.start == '\0') {
2890  if (!STRING_EXTEND(parser, string)) goto error;
2891  *(string.pointer ++) = ' ';
2892  }
2893 
2894  CLEAR(parser, leading_break);
2895  }
2896  else {
2897  if (!JOIN(parser, string, leading_break)) goto error;
2898  CLEAR(parser, leading_break);
2899  }
2900 
2901  /* Append the remaining line breaks. */
2902 
2903  if (!JOIN(parser, string, trailing_breaks)) goto error;
2904  CLEAR(parser, trailing_breaks);
2905 
2906  /* Is it a leading whitespace? */
2907 
2908  leading_blank = IS_BLANK(parser->buffer);
2909 
2910  /* Consume the current line. */
2911 
2912  while (!IS_BREAKZ(parser->buffer)) {
2913  if (!READ(parser, string)) goto error;
2914  if (!CACHE(parser, 1)) goto error;
2915  }
2916 
2917  /* Consume the line break. */
2918 
2919  if (!CACHE(parser, 2)) goto error;
2920 
2921  if (!READ_LINE(parser, leading_break)) goto error;
2922 
2923  /* Eat the following intendation spaces and line breaks. */
2924 
2926  &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927  }
2928 
2929  /* Chomp the tail. */
2930 
2931  if (chomping != -1) {
2932  if (!JOIN(parser, string, leading_break)) goto error;
2933  }
2934  if (chomping == 1) {
2935  if (!JOIN(parser, string, trailing_breaks)) goto error;
2936  }
2937 
2938  /* Create a token. */
2939 
2940  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2942  start_mark, end_mark);
2943 
2944  STRING_DEL(parser, leading_break);
2945  STRING_DEL(parser, trailing_breaks);
2946 
2947  return 1;
2948 
2949 error:
2950  STRING_DEL(parser, string);
2951  STRING_DEL(parser, leading_break);
2952  STRING_DEL(parser, trailing_breaks);
2953 
2954  return 0;
2955 }
2956 
2957 /*
2958  * Scan intendation spaces and line breaks for a block scalar. Determine the
2959  * intendation level if needed.
2960  */
2961 
2962 static int
2964  int *indent, yaml_string_t *breaks,
2965  yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966 {
2967  int max_indent = 0;
2968 
2969  *end_mark = parser->mark;
2970 
2971  /* Eat the intendation spaces and line breaks. */
2972 
2973  while (1)
2974  {
2975  /* Eat the intendation spaces. */
2976 
2977  if (!CACHE(parser, 1)) return 0;
2978 
2979  while ((!*indent || (int)parser->mark.column < *indent)
2980  && IS_SPACE(parser->buffer)) {
2981  SKIP(parser);
2982  if (!CACHE(parser, 1)) return 0;
2983  }
2984 
2985  if ((int)parser->mark.column > max_indent)
2986  max_indent = (int)parser->mark.column;
2987 
2988  /* Check for a tab character messing the intendation. */
2989 
2990  if ((!*indent || (int)parser->mark.column < *indent)
2991  && IS_TAB(parser->buffer)) {
2992  return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993  start_mark, "found a tab character where an intendation space is expected");
2994  }
2995 
2996  /* Have we found a non-empty line? */
2997 
2998  if (!IS_BREAK(parser->buffer)) break;
2999 
3000  /* Consume the line break. */
3001 
3002  if (!CACHE(parser, 2)) return 0;
3003  if (!READ_LINE(parser, *breaks)) return 0;
3004  *end_mark = parser->mark;
3005  }
3006 
3007  /* Determine the indentation level if needed. */
3008 
3009  if (!*indent) {
3010  *indent = max_indent;
3011  if (*indent < parser->indent + 1)
3012  *indent = parser->indent + 1;
3013  if (*indent < 1)
3014  *indent = 1;
3015  }
3016 
3017  return 1;
3018 }
3019 
3020 /*
3021  * Scan a quoted scalar.
3022  */
3023 
3024 static int
3026  int single)
3027 {
3028  yaml_mark_t start_mark;
3029  yaml_mark_t end_mark;
3030  yaml_string_t string = NULL_STRING;
3031  yaml_string_t leading_break = NULL_STRING;
3032  yaml_string_t trailing_breaks = NULL_STRING;
3033  yaml_string_t whitespaces = NULL_STRING;
3034  int leading_blanks;
3035 
3036  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3040 
3041  /* Eat the left quote. */
3042 
3043  start_mark = parser->mark;
3044 
3045  SKIP(parser);
3046 
3047  /* Consume the content of the quoted scalar. */
3048 
3049  while (1)
3050  {
3051  /* Check that there are no document indicators at the beginning of the line. */
3052 
3053  if (!CACHE(parser, 4)) goto error;
3054 
3055  if (parser->mark.column == 0 &&
3056  ((CHECK_AT(parser->buffer, '-', 0) &&
3057  CHECK_AT(parser->buffer, '-', 1) &&
3058  CHECK_AT(parser->buffer, '-', 2)) ||
3059  (CHECK_AT(parser->buffer, '.', 0) &&
3060  CHECK_AT(parser->buffer, '.', 1) &&
3061  CHECK_AT(parser->buffer, '.', 2))) &&
3062  IS_BLANKZ_AT(parser->buffer, 3))
3063  {
3064  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065  start_mark, "found unexpected document indicator");
3066  goto error;
3067  }
3068 
3069  /* Check for EOF. */
3070 
3071  if (IS_Z(parser->buffer)) {
3072  yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073  start_mark, "found unexpected end of stream");
3074  goto error;
3075  }
3076 
3077  /* Consume non-blank characters. */
3078 
3079  if (!CACHE(parser, 2)) goto error;
3080 
3081  leading_blanks = 0;
3082 
3083  while (!IS_BLANKZ(parser->buffer))
3084  {
3085  /* Check for an escaped single quote. */
3086 
3087  if (single && CHECK_AT(parser->buffer, '\'', 0)
3088  && CHECK_AT(parser->buffer, '\'', 1))
3089  {
3090  if (!STRING_EXTEND(parser, string)) goto error;
3091  *(string.pointer++) = '\'';
3092  SKIP(parser);
3093  SKIP(parser);
3094  }
3095 
3096  /* Check for the right quote. */
3097 
3098  else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099  {
3100  break;
3101  }
3102 
3103  /* Check for an escaped line break. */
3104 
3105  else if (!single && CHECK(parser->buffer, '\\')
3106  && IS_BREAK_AT(parser->buffer, 1))
3107  {
3108  if (!CACHE(parser, 3)) goto error;
3109  SKIP(parser);
3110  SKIP_LINE(parser);
3111  leading_blanks = 1;
3112  break;
3113  }
3114 
3115  /* Check for an escape sequence. */
3116 
3117  else if (!single && CHECK(parser->buffer, '\\'))
3118  {
3119  size_t code_length = 0;
3120 
3121  if (!STRING_EXTEND(parser, string)) goto error;
3122 
3123  /* Check the escape character. */
3124 
3125  switch (parser->buffer.pointer[1])
3126  {
3127  case '0':
3128  *(string.pointer++) = '\0';
3129  break;
3130 
3131  case 'a':
3132  *(string.pointer++) = '\x07';
3133  break;
3134 
3135  case 'b':
3136  *(string.pointer++) = '\x08';
3137  break;
3138 
3139  case 't':
3140  case '\t':
3141  *(string.pointer++) = '\x09';
3142  break;
3143 
3144  case 'n':
3145  *(string.pointer++) = '\x0A';
3146  break;
3147 
3148  case 'v':
3149  *(string.pointer++) = '\x0B';
3150  break;
3151 
3152  case 'f':
3153  *(string.pointer++) = '\x0C';
3154  break;
3155 
3156  case 'r':
3157  *(string.pointer++) = '\x0D';
3158  break;
3159 
3160  case 'e':
3161  *(string.pointer++) = '\x1B';
3162  break;
3163 
3164  case ' ':
3165  *(string.pointer++) = '\x20';
3166  break;
3167 
3168  case '"':
3169  *(string.pointer++) = '"';
3170  break;
3171 
3172  case '\'':
3173  *(string.pointer++) = '\'';
3174  break;
3175 
3176  case '\\':
3177  *(string.pointer++) = '\\';
3178  break;
3179 
3180  case 'N': /* NEL (#x85) */
3181  *(string.pointer++) = '\xC2';
3182  *(string.pointer++) = '\x85';
3183  break;
3184 
3185  case '_': /* #xA0 */
3186  *(string.pointer++) = '\xC2';
3187  *(string.pointer++) = '\xA0';
3188  break;
3189 
3190  case 'L': /* LS (#x2028) */
3191  *(string.pointer++) = '\xE2';
3192  *(string.pointer++) = '\x80';
3193  *(string.pointer++) = '\xA8';
3194  break;
3195 
3196  case 'P': /* PS (#x2029) */
3197  *(string.pointer++) = '\xE2';
3198  *(string.pointer++) = '\x80';
3199  *(string.pointer++) = '\xA9';
3200  break;
3201 
3202  case 'x':
3203  code_length = 2;
3204  break;
3205 
3206  case 'u':
3207  code_length = 4;
3208  break;
3209 
3210  case 'U':
3211  code_length = 8;
3212  break;
3213 
3214  default:
3215  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216  start_mark, "found unknown escape character");
3217  goto error;
3218  }
3219 
3220  SKIP(parser);
3221  SKIP(parser);
3222 
3223  /* Consume an arbitrary escape code. */
3224 
3225  if (code_length)
3226  {
3227  unsigned int value = 0;
3228  size_t k;
3229 
3230  /* Scan the character value. */
3231 
3232  if (!CACHE(parser, code_length)) goto error;
3233 
3234  for (k = 0; k < code_length; k ++) {
3235  if (!IS_HEX_AT(parser->buffer, k)) {
3236  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237  start_mark, "did not find expected hexdecimal number");
3238  goto error;
3239  }
3240  value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241  }
3242 
3243  /* Check the value and write the character. */
3244 
3245  if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246  yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247  start_mark, "found invalid Unicode character escape code");
3248  goto error;
3249  }
3250 
3251  if (value <= 0x7F) {
3252  *(string.pointer++) = value;
3253  }
3254  else if (value <= 0x7FF) {
3255  *(string.pointer++) = 0xC0 + (value >> 6);
3256  *(string.pointer++) = 0x80 + (value & 0x3F);
3257  }
3258  else if (value <= 0xFFFF) {
3259  *(string.pointer++) = 0xE0 + (value >> 12);
3260  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261  *(string.pointer++) = 0x80 + (value & 0x3F);
3262  }
3263  else {
3264  *(string.pointer++) = 0xF0 + (value >> 18);
3265  *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266  *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267  *(string.pointer++) = 0x80 + (value & 0x3F);
3268  }
3269 
3270  /* Advance the pointer. */
3271 
3272  for (k = 0; k < code_length; k ++) {
3273  SKIP(parser);
3274  }
3275  }
3276  }
3277 
3278  else
3279  {
3280  /* It is a non-escaped non-blank character. */
3281 
3282  if (!READ(parser, string)) goto error;
3283  }
3284 
3285  if (!CACHE(parser, 2)) goto error;
3286  }
3287 
3288  /* Check if we are at the end of the scalar. */
3289 
3290  if (CHECK(parser->buffer, single ? '\'' : '"'))
3291  break;
3292 
3293  /* Consume blank characters. */
3294 
3295  if (!CACHE(parser, 1)) goto error;
3296 
3297  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3298  {
3299  if (IS_BLANK(parser->buffer))
3300  {
3301  /* Consume a space or a tab character. */
3302 
3303  if (!leading_blanks) {
3304  if (!READ(parser, whitespaces)) goto error;
3305  }
3306  else {
3307  SKIP(parser);
3308  }
3309  }
3310  else
3311  {
3312  if (!CACHE(parser, 2)) goto error;
3313 
3314  /* Check if it is a first line break. */
3315 
3316  if (!leading_blanks)
3317  {
3318  CLEAR(parser, whitespaces);
3319  if (!READ_LINE(parser, leading_break)) goto error;
3320  leading_blanks = 1;
3321  }
3322  else
3323  {
3324  if (!READ_LINE(parser, trailing_breaks)) goto error;
3325  }
3326  }
3327  if (!CACHE(parser, 1)) goto error;
3328  }
3329 
3330  /* Join the whitespaces or fold line breaks. */
3331 
3332  if (leading_blanks)
3333  {
3334  /* Do we need to fold line breaks? */
3335 
3336  if (leading_break.start[0] == '\n') {
3337  if (trailing_breaks.start[0] == '\0') {
3338  if (!STRING_EXTEND(parser, string)) goto error;
3339  *(string.pointer++) = ' ';
3340  }
3341  else {
3342  if (!JOIN(parser, string, trailing_breaks)) goto error;
3343  CLEAR(parser, trailing_breaks);
3344  }
3345  CLEAR(parser, leading_break);
3346  }
3347  else {
3348  if (!JOIN(parser, string, leading_break)) goto error;
3349  if (!JOIN(parser, string, trailing_breaks)) goto error;
3350  CLEAR(parser, leading_break);
3351  CLEAR(parser, trailing_breaks);
3352  }
3353  }
3354  else
3355  {
3356  if (!JOIN(parser, string, whitespaces)) goto error;
3357  CLEAR(parser, whitespaces);
3358  }
3359  }
3360 
3361  /* Eat the right quote. */
3362 
3363  SKIP(parser);
3364 
3365  end_mark = parser->mark;
3366 
3367  /* Create a token. */
3368 
3369  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3371  start_mark, end_mark);
3372 
3373  STRING_DEL(parser, leading_break);
3374  STRING_DEL(parser, trailing_breaks);
3375  STRING_DEL(parser, whitespaces);
3376 
3377  return 1;
3378 
3379 error:
3380  STRING_DEL(parser, string);
3381  STRING_DEL(parser, leading_break);
3382  STRING_DEL(parser, trailing_breaks);
3383  STRING_DEL(parser, whitespaces);
3384 
3385  return 0;
3386 }
3387 
3388 /*
3389  * Scan a plain scalar.
3390  */
3391 
3392 static int
3394 {
3395  yaml_mark_t start_mark;
3396  yaml_mark_t end_mark;
3397  yaml_string_t string = NULL_STRING;
3398  yaml_string_t leading_break = NULL_STRING;
3399  yaml_string_t trailing_breaks = NULL_STRING;
3400  yaml_string_t whitespaces = NULL_STRING;
3401  int leading_blanks = 0;
3402  int indent = parser->indent+1;
3403 
3404  if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3405  if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3406  if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3407  if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3408 
3409  start_mark = end_mark = parser->mark;
3410 
3411  /* Consume the content of the plain scalar. */
3412 
3413  while (1)
3414  {
3415  /* Check for a document indicator. */
3416 
3417  if (!CACHE(parser, 4)) goto error;
3418 
3419  if (parser->mark.column == 0 &&
3420  ((CHECK_AT(parser->buffer, '-', 0) &&
3421  CHECK_AT(parser->buffer, '-', 1) &&
3422  CHECK_AT(parser->buffer, '-', 2)) ||
3423  (CHECK_AT(parser->buffer, '.', 0) &&
3424  CHECK_AT(parser->buffer, '.', 1) &&
3425  CHECK_AT(parser->buffer, '.', 2))) &&
3426  IS_BLANKZ_AT(parser->buffer, 3)) break;
3427 
3428  /* Check for a comment. */
3429 
3430  if (CHECK(parser->buffer, '#'))
3431  break;
3432 
3433  /* Consume non-blank characters. */
3434 
3435  while (!IS_BLANKZ(parser->buffer))
3436  {
3437  /* Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". */
3438 
3439  if (parser->flow_level
3440  && CHECK(parser->buffer, ':')
3441  && !IS_BLANKZ_AT(parser->buffer, 1)) {
3442  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3443  start_mark, "found unexpected ':'");
3444  goto error;
3445  }
3446 
3447  /* Check for indicators that may end a plain scalar. */
3448 
3449  if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3450  || (parser->flow_level &&
3451  (CHECK(parser->buffer, ',') || CHECK(parser->buffer, ':')
3452  || CHECK(parser->buffer, '?') || CHECK(parser->buffer, '[')
3453  || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3454  || CHECK(parser->buffer, '}'))))
3455  break;
3456 
3457  /* Check if we need to join whitespaces and breaks. */
3458 
3459  if (leading_blanks || whitespaces.start != whitespaces.pointer)
3460  {
3461  if (leading_blanks)
3462  {
3463  /* Do we need to fold line breaks? */
3464 
3465  if (leading_break.start[0] == '\n') {
3466  if (trailing_breaks.start[0] == '\0') {
3467  if (!STRING_EXTEND(parser, string)) goto error;
3468  *(string.pointer++) = ' ';
3469  }
3470  else {
3471  if (!JOIN(parser, string, trailing_breaks)) goto error;
3472  CLEAR(parser, trailing_breaks);
3473  }
3474  CLEAR(parser, leading_break);
3475  }
3476  else {
3477  if (!JOIN(parser, string, leading_break)) goto error;
3478  if (!JOIN(parser, string, trailing_breaks)) goto error;
3479  CLEAR(parser, leading_break);
3480  CLEAR(parser, trailing_breaks);
3481  }
3482 
3483  leading_blanks = 0;
3484  }
3485  else
3486  {
3487  if (!JOIN(parser, string, whitespaces)) goto error;
3488  CLEAR(parser, whitespaces);
3489  }
3490  }
3491 
3492  /* Copy the character. */
3493 
3494  if (!READ(parser, string)) goto error;
3495 
3496  end_mark = parser->mark;
3497 
3498  if (!CACHE(parser, 2)) goto error;
3499  }
3500 
3501  /* Is it the end? */
3502 
3503  if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3504  break;
3505 
3506  /* Consume blank characters. */
3507 
3508  if (!CACHE(parser, 1)) goto error;
3509 
3510  while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3511  {
3512  if (IS_BLANK(parser->buffer))
3513  {
3514  /* Check for tab character that abuse intendation. */
3515 
3516  if (leading_blanks && (int)parser->mark.column < indent
3517  && IS_TAB(parser->buffer)) {
3518  yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3519  start_mark, "found a tab character that violate intendation");
3520  goto error;
3521  }
3522 
3523  /* Consume a space or a tab character. */
3524 
3525  if (!leading_blanks) {
3526  if (!READ(parser, whitespaces)) goto error;
3527  }
3528  else {
3529  SKIP(parser);
3530  }
3531  }
3532  else
3533  {
3534  if (!CACHE(parser, 2)) goto error;
3535 
3536  /* Check if it is a first line break. */
3537 
3538  if (!leading_blanks)
3539  {
3540  CLEAR(parser, whitespaces);
3541  if (!READ_LINE(parser, leading_break)) goto error;
3542  leading_blanks = 1;
3543  }
3544  else
3545  {
3546  if (!READ_LINE(parser, trailing_breaks)) goto error;
3547  }
3548  }
3549  if (!CACHE(parser, 1)) goto error;
3550  }
3551 
3552  /* Check intendation level. */
3553 
3554  if (!parser->flow_level && (int)parser->mark.column < indent)
3555  break;
3556  }
3557 
3558  /* Create a token. */
3559 
3560  SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3561  YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3562 
3563  /* Note that we change the 'simple_key_allowed' flag. */
3564 
3565  if (leading_blanks) {
3566  parser->simple_key_allowed = 1;
3567  }
3568 
3569  STRING_DEL(parser, leading_break);
3570  STRING_DEL(parser, trailing_breaks);
3571  STRING_DEL(parser, whitespaces);
3572 
3573  return 1;
3574 
3575 error:
3576  STRING_DEL(parser, string);
3577  STRING_DEL(parser, leading_break);
3578  STRING_DEL(parser, trailing_breaks);
3579  STRING_DEL(parser, whitespaces);
3580 
3581  return 0;
3582 }
3583 
The double-quoted scalar style.
Definition: yaml.h:174
volatile VALUE tmp
Definition: tcltklib.c:10208
#define PUSH(x)
Definition: bigdecimal.c:64
VP_EXPORT int
Definition: bigdecimal.c:5071
A BLOCK-SEQUENCE-START token.
Definition: yaml.h:232
The pointer position.
Definition: yaml.h:145
static int yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:2004
static int yaml_parser_fetch_directive(yaml_parser_t *parser)
Definition: scanner.c:1376
size_t strlen(const char *)
yaml_token_t * tail
The tail of the tokens queue.
Definition: yaml.h:1200
int minor
Definition: tcltklib.c:110
struct yaml_parser_s::@38 buffer
The working buffer.
A FLOW-SEQUENCE-START token.
Definition: yaml.h:239
#define NULL_STRING
Definition: yaml_private.h:126
A VALUE token.
Definition: yaml.h:254
#define IS_BOM(string)
Definition: yaml_private.h:301
static int yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token, int single)
Definition: scanner.c:3025
Cannot allocate or reallocate a block of memory.
Definition: yaml.h:127
static int yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:2395
#define ALIAS_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:516
static int yaml_parser_fetch_stream_start(yaml_parser_t *parser)
Definition: scanner.c:1300
A BLOCK-END token.
Definition: yaml.h:236
yaml_string_extend(yaml_char_t **start, yaml_char_t **pointer, yaml_char_t **end)
Definition: api.c:74
#define AS_DIGIT(string)
Definition: yaml_private.h:220
struct yaml_parser_s::@40 tokens
The tokens queue.
int stream_start_produced
Have we started to scan the input stream?
Definition: yaml.h:1183
static int yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
Definition: scanner.c:1835
static int yaml_parser_fetch_block_entry(yaml_parser_t *parser)
Definition: scanner.c:1578
yaml_encoding_t encoding
The input encoding.
Definition: yaml.h:1165
The parser structure.
Definition: yaml.h:1081
A BLOCK-SEQUENCE-END token.
Definition: yaml.h:234
#define SKIP(parser)
Definition: scanner.c:494
A FLOW-ENTRY token.
Definition: yaml.h:250
#define READ(parser, string)
Definition: scanner.c:518
static int yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1500
static int yaml_parser_scan_tag_directive_value(yaml_parser_t *parser, yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix)
Definition: scanner.c:2265
static int yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token, int literal)
Definition: scanner.c:2741
unsigned char yaml_char_t
The character type (UTF-8 octet).
Definition: yaml.h:78
const unsigned char * start
The string start pointer.
Definition: yaml.h:1123
const char * context
The error context.
Definition: yaml.h:1099
yaml_mark_t mark
The position mark.
Definition: yaml.h:1004
#define IS_BREAK_AT(string, offset)
Definition: yaml_private.h:332
yaml_char_t * pointer
Definition: yaml_private.h:114
static int yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
Definition: scanner.c:1866
#define READ_LINE(parser, string)
Definition: scanner.c:530
int indent
The current indentation level.
Definition: yaml.h:1220
#define IS_BLANK(string)
Definition: yaml_private.h:326
static int yaml_parser_scan_version_directive_value(yaml_parser_t *parser, yaml_mark_t start_mark, int *major, int *minor)
Definition: scanner.c:2171
#define TAG_DIRECTIVE_TOKEN_INIT(token, token_handle, token_prefix, start_mark, end_mark)
Definition: yaml_private.h:540
static int yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1457
yaml_char_t * pointer
The current position of the buffer.
Definition: yaml.h:1144
An ALIAS token.
Definition: yaml.h:257
yaml_mark_t mark
The mark of the current position.
Definition: yaml.h:1171
#define head
Definition: st.c:107
#define IS_HEX_AT(string, offset)
Definition: yaml_private.h:226
yaml_char_t * start
Definition: yaml_private.h:112
static int yaml_parser_remove_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1141
#define DEQUEUE(context, queue)
Definition: yaml_private.h:484
A FLOW-SEQUENCE-END token.
Definition: yaml.h:241
#define INITIAL_STRING_SIZE
Definition: yaml_private.h:89
#define STRING_DEL(context, string)
Definition: yaml_private.h:144
#define IS_SPACE(string)
Definition: yaml_private.h:309
yaml_free(void *ptr)
Definition: api.c:51
The folded scalar style.
Definition: yaml.h:179
memset(y->frac+ix+1, 0,(y->Prec-(ix+1))*sizeof(BDIGIT))
#define AS_HEX_AT(string, offset)
Definition: yaml_private.h:240
int * top
The top of the stack.
Definition: yaml.h:1216
#define QUEUE_INSERT(context, queue, index, value)
Definition: yaml_private.h:487
static int yaml_parser_fetch_next_token(yaml_parser_t *parser)
Definition: scanner.c:860
int required
Is a simple key required?
Definition: yaml.h:998
int simple_key_allowed
May a simple key occur at the current position?
Definition: yaml.h:1223
#define CACHE(parser, length)
Definition: scanner.c:485
#define IS_TAB(string)
Definition: yaml_private.h:317
static int yaml_parser_fetch_document_indicator(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1412
static int yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_string_t *string)
Definition: scanner.c:2672
static int yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
Definition: scanner.c:1267
static int yaml_parser_fetch_stream_end(yaml_parser_t *parser)
Definition: scanner.c:1338
static int yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context, yaml_mark_t context_mark, const char *problem)
Definition: scanner.c:782
#define IS_BREAKZ(string)
Definition: yaml_private.h:358
#define CHECK(string, octet)
Definition: yaml_private.h:184
#define IS_ALPHA(string)
Definition: yaml_private.h:201
static int yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser, int *indent, yaml_string_t *breaks, yaml_mark_t start_mark, yaml_mark_t *end_mark)
Definition: scanner.c:2963
yaml_token_delete(yaml_token_t *token)
Free any memory allocated for a token object.
Definition: api.c:578
#define TOKEN_INIT(token, token_type, token_start_mark, token_end_mark)
Definition: yaml_private.h:503
static int yaml_parser_fetch_tag(yaml_parser_t *parser)
Definition: scanner.c:1804
struct yaml_parser_s::@41 indents
The indentation levels stack.
#define YAML_DECLARE(type)
The public API declaration.
Definition: yaml.h:38
ID token
Definition: ripper.c:16481
A FLOW-MAPPING-START token.
Definition: yaml.h:243
static int yaml_parser_decrease_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1194
#define TAG_TOKEN_INIT(token, token_handle, token_suffix, start_mark, end_mark)
Definition: yaml_private.h:524
#define IS_BLANK_AT(string, offset)
Definition: yaml_private.h:323
yaml_error_type_t error
Error type.
Definition: yaml.h:1089
memcpy(buf+1, str, len)
#define IS_DIGIT(string)
Definition: yaml_private.h:211
static int yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive, yaml_mark_t start_mark, yaml_char_t **handle)
Definition: scanner.c:2510
#define STREAM_START_TOKEN_INIT(token, token_encoding, start_mark, end_mark)
Definition: yaml_private.h:509
static int yaml_parser_fetch_value(yaml_parser_t *parser)
Definition: scanner.c:1692
volatile VALUE value
Definition: tcltklib.c:9441
#define CHECK_AT(string, octet, offset)
Definition: yaml_private.h:177
#define const
Definition: strftime.c:102
size_t token_number
The number of the token.
Definition: yaml.h:1001
static int yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
Definition: scanner.c:1543
VP_EXPORT void
Definition: bigdecimal.c:5104
static int yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
Definition: scanner.c:3393
size_t length
Definition: tcltklib.c:4558
static int yaml_parser_increase_flow_level(yaml_parser_t *parser)
Definition: scanner.c:1168
int type
Definition: tcltklib.c:111
A STREAM-END token.
Definition: yaml.h:220
enum yaml_token_type_e yaml_token_type_t
Token types.
yaml_token_t * head
The head of the tokens queue.
Definition: yaml.h:1198
#define ENQUEUE(context, queue, value)
Definition: yaml_private.h:475
An ANCHOR token.
Definition: yaml.h:259
#define JOIN(context, string_a, string_b)
Definition: yaml_private.h:160
#define ANCHOR_TOKEN_INIT(token, token_value, start_mark, end_mark)
Definition: yaml_private.h:520
#define SCALAR_TOKEN_INIT(token, token_value, token_length, token_style, start_mark, end_mark)
Definition: yaml_private.h:529
yaml_mark_t context_mark
The context position.
Definition: yaml.h:1101
static int yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
Definition: scanner.c:1774
struct yaml_parser_s::@42 simple_keys
The stack of simple keys.
A FLOW-MAPPING-END token.
Definition: yaml.h:245
yaml_mark_t problem_mark
The problem position.
Definition: yaml.h:1097
size_t tokens_parsed
The number of tokens fetched from the queue.
Definition: yaml.h:1204
static int yaml_parser_save_simple_key(yaml_parser_t *parser)
Definition: scanner.c:1097
gz end
Definition: zlib.c:2270
#define STRING_EXTEND(context, string)
Definition: yaml_private.h:148
#define IS_BLANKZ_AT(string, offset)
Definition: yaml_private.h:373
A DOCUMENT-START token.
Definition: yaml.h:227
static int yaml_parser_scan_to_next_token(yaml_parser_t *parser)
Definition: scanner.c:1928
#define STRING_INIT(context, string, size)
Definition: yaml_private.h:135
#define STREAM_END_TOKEN_INIT(token, start_mark, end_mark)
Definition: yaml_private.h:513
if(RB_TYPE_P(r, T_FLOAT))
Definition: bigdecimal.c:1200
static int yaml_parser_scan_tag_uri(yaml_parser_t *parser, int directive, yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
Definition: scanner.c:2577
This structure holds information about a potential simple key.
Definition: yaml.h:993
#define MAX_NUMBER_LENGTH
Definition: scanner.c:2205
The plain scalar style.
Definition: yaml.h:169
The literal scalar style.
Definition: yaml.h:177
static int yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token, yaml_token_type_t type)
Definition: scanner.c:2329
size_t line
The position line.
Definition: yaml.h:150
#define POP(context, stack)
Definition: yaml_private.h:457
A DOCUMENT-END token.
Definition: yaml.h:229
#define IS_Z(string)
Definition: yaml_private.h:290
static VALUE mark(VALUE self)
Definition: psych_parser.c:523
static int yaml_parser_stale_simple_keys(yaml_parser_t *parser)
Definition: scanner.c:1056
Cannot scan the input stream.
Definition: yaml.h:132
static int yaml_parser_fetch_key(yaml_parser_t *parser)
Definition: scanner.c:1639
size_t index
The position index.
Definition: yaml.h:147
static int yaml_parser_scan_version_directive_number(yaml_parser_t *parser, yaml_mark_t start_mark, int *number)
Definition: scanner.c:2218
struct parser_params * parser
Definition: ripper.c:4437
#define IS_BLANKZ(string)
Definition: yaml_private.h:376
yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
Scan the input stream and produce the next token.
Definition: scanner.c:742
#define assert(condition)
Definition: ossl.h:45
yaml_malloc(size_t size)
Definition: api.c:31
const char * problem
Error description.
Definition: yaml.h:1091
int possible
Is a simple key possible?
Definition: yaml.h:995
A BLOCK-ENTRY token.
Definition: yaml.h:248
yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
Definition: scanner.c:800
int major
Definition: tcltklib.c:109
static int yaml_parser_scan_directive_name(yaml_parser_t *parser, yaml_mark_t start_mark, yaml_char_t **name)
Definition: scanner.c:2120
#define VERSION_DIRECTIVE_TOKEN_INIT(token, token_major, token_minor, start_mark, end_mark)
Definition: yaml_private.h:535
#define IS_BREAK(string)
Definition: yaml_private.h:344
size_t column
The position column.
Definition: yaml.h:153
#define NULL
Definition: _sdbm.c:103
const char * name
Definition: nkf.c:208
A KEY token.
Definition: yaml.h:252
int flow_level
The number of unclosed &#39;[&#39; and &#39;{&#39; indicators.
Definition: yaml.h:1189
static int yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column, ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
Definition: scanner.c:1212
#define SKIP_LINE(parser)
Definition: scanner.c:500
The token structure.
Definition: yaml.h:267
#define CLEAR(context, string)
Definition: yaml_private.h:156
static int yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
Definition: scanner.c:1897
The single-quoted scalar style.
Definition: yaml.h:172