JsonCpp project page JsonCpp home page

json_reader.cpp
Go to the documentation of this file.
1 #include <json/reader.h>
2 #include <json/value.h>
3 #include <utility>
4 #include <cstdio>
5 #include <cassert>
6 #include <cstring>
7 #include <iostream>
8 #include <stdexcept>
9 
10 #if _MSC_VER >= 1400 // VC++ 8.0
11 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
12 #endif
13 
14 namespace Json {
15 
16 // Implementation of class Features
17 // ////////////////////////////////
18 
20  : allowComments_( true )
21  , strictRoot_( false )
22 {
23 }
24 
25 
26 Features
28 {
29  return Features();
30 }
31 
32 
33 Features
35 {
36  Features features;
37  features.allowComments_ = false;
38  features.strictRoot_ = true;
39  return features;
40 }
41 
42 // Implementation of class Reader
43 // ////////////////////////////////
44 
45 
46 static inline bool
48 {
49  return c == c1 || c == c2 || c == c3 || c == c4;
50 }
51 
52 static inline bool
54 {
55  return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
56 }
57 
58 
59 static bool
61  Reader::Location end )
62 {
63  for ( ;begin < end; ++begin )
64  if ( *begin == '\n' || *begin == '\r' )
65  return true;
66  return false;
67 }
68 
69 static std::string codePointToUTF8(unsigned int cp)
70 {
71  std::string result;
72 
73  // based on description from http://en.wikipedia.org/wiki/UTF-8
74 
75  if (cp <= 0x7f)
76  {
77  result.resize(1);
78  result[0] = static_cast<char>(cp);
79  }
80  else if (cp <= 0x7FF)
81  {
82  result.resize(2);
83  result[1] = static_cast<char>(0x80 | (0x3f & cp));
84  result[0] = static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
85  }
86  else if (cp <= 0xFFFF)
87  {
88  result.resize(3);
89  result[2] = static_cast<char>(0x80 | (0x3f & cp));
90  result[1] = 0x80 | static_cast<char>((0x3f & (cp >> 6)));
91  result[0] = 0xE0 | static_cast<char>((0xf & (cp >> 12)));
92  }
93  else if (cp <= 0x10FFFF)
94  {
95  result.resize(4);
96  result[3] = static_cast<char>(0x80 | (0x3f & cp));
97  result[2] = static_cast<char>(0x80 | (0x3f & (cp >> 6)));
98  result[1] = static_cast<char>(0x80 | (0x3f & (cp >> 12)));
99  result[0] = static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
100  }
101 
102  return result;
103 }
104 
105 
106 // Class Reader
107 // //////////////////////////////////////////////////////////////////
108 
110  : features_( Features::all() )
111 {
112 }
113 
114 
115 Reader::Reader( const Features &features )
116  : features_( features )
117 {
118 }
119 
120 
121 bool
122 Reader::parse( const std::string &document,
123  Value &root,
124  bool collectComments )
125 {
126  document_ = document;
127  const char *begin = document_.c_str();
128  const char *end = begin + document_.length();
129  return parse( begin, end, root, collectComments );
130 }
131 
132 
133 bool
134 Reader::parse( std::istream& sin,
135  Value &root,
136  bool collectComments )
137 {
138  //std::istream_iterator<char> begin(sin);
139  //std::istream_iterator<char> end;
140  // Those would allow streamed input from a file, if parse() were a
141  // template function.
142 
143  // Since std::string is reference-counted, this at least does not
144  // create an extra copy.
145  std::string doc;
146  std::getline(sin, doc, (char)EOF);
147  return parse( doc, root, collectComments );
148 }
149 
150 bool
151 Reader::parse( const char *beginDoc, const char *endDoc,
152  Value &root,
153  bool collectComments )
154 {
155  if ( !features_.allowComments_ )
156  {
157  collectComments = false;
158  }
159 
160  begin_ = beginDoc;
161  end_ = endDoc;
162  collectComments_ = collectComments;
163  current_ = begin_;
164  lastValueEnd_ = 0;
165  lastValue_ = 0;
166  commentsBefore_ = "";
167  errors_.clear();
168  while ( !nodes_.empty() )
169  nodes_.pop();
170  nodes_.push( &root );
171 
172  bool successful = readValue();
173  Token token;
174  skipCommentTokens( token );
175  if ( collectComments_ && !commentsBefore_.empty() )
176  root.setComment( commentsBefore_, commentAfter );
177  if ( features_.strictRoot_ )
178  {
179  if ( !root.isArray() && !root.isObject() )
180  {
181  // Set error location to start of doc, ideally should be first token found in doc
182  token.type_ = tokenError;
183  token.start_ = beginDoc;
184  token.end_ = endDoc;
185  addError( "A valid JSON document must be either an array or an object value.",
186  token );
187  return false;
188  }
189  }
190  return successful;
191 }
192 
193 
194 bool
195 Reader::readValue()
196 {
197  Token token;
198  skipCommentTokens( token );
199  bool successful = true;
200 
201  if ( collectComments_ && !commentsBefore_.empty() )
202  {
203  currentValue().setComment( commentsBefore_, commentBefore );
204  commentsBefore_ = "";
205  }
206 
207 
208  switch ( token.type_ )
209  {
210  case tokenObjectBegin:
211  successful = readObject( token );
212  break;
213  case tokenArrayBegin:
214  successful = readArray( token );
215  break;
216  case tokenNumber:
217  successful = decodeNumber( token );
218  break;
219  case tokenString:
220  successful = decodeString( token );
221  break;
222  case tokenTrue:
223  currentValue() = true;
224  break;
225  case tokenFalse:
226  currentValue() = false;
227  break;
228  case tokenNull:
229  currentValue() = Value();
230  break;
231  default:
232  return addError( "Syntax error: value, object or array expected.", token );
233  }
234 
235  if ( collectComments_ )
236  {
237  lastValueEnd_ = current_;
238  lastValue_ = &currentValue();
239  }
240 
241  return successful;
242 }
243 
244 
245 void
246 Reader::skipCommentTokens( Token &token )
247 {
248  if ( features_.allowComments_ )
249  {
250  do
251  {
252  readToken( token );
253  }
254  while ( token.type_ == tokenComment );
255  }
256  else
257  {
258  readToken( token );
259  }
260 }
261 
262 
263 bool
264 Reader::expectToken( TokenType type, Token &token, const char *message )
265 {
266  readToken( token );
267  if ( token.type_ != type )
268  return addError( message, token );
269  return true;
270 }
271 
272 
273 bool
274 Reader::readToken( Token &token )
275 {
276  skipSpaces();
277  token.start_ = current_;
278  Char c = getNextChar();
279  bool ok = true;
280  switch ( c )
281  {
282  case '{':
283  token.type_ = tokenObjectBegin;
284  break;
285  case '}':
286  token.type_ = tokenObjectEnd;
287  break;
288  case '[':
289  token.type_ = tokenArrayBegin;
290  break;
291  case ']':
292  token.type_ = tokenArrayEnd;
293  break;
294  case '"':
295  token.type_ = tokenString;
296  ok = readString();
297  break;
298  case '/':
299  token.type_ = tokenComment;
300  ok = readComment();
301  break;
302  case '0':
303  case '1':
304  case '2':
305  case '3':
306  case '4':
307  case '5':
308  case '6':
309  case '7':
310  case '8':
311  case '9':
312  case '-':
313  token.type_ = tokenNumber;
314  readNumber();
315  break;
316  case 't':
317  token.type_ = tokenTrue;
318  ok = match( "rue", 3 );
319  break;
320  case 'f':
321  token.type_ = tokenFalse;
322  ok = match( "alse", 4 );
323  break;
324  case 'n':
325  token.type_ = tokenNull;
326  ok = match( "ull", 3 );
327  break;
328  case ',':
329  token.type_ = tokenArraySeparator;
330  break;
331  case ':':
332  token.type_ = tokenMemberSeparator;
333  break;
334  case 0:
335  token.type_ = tokenEndOfStream;
336  break;
337  default:
338  ok = false;
339  break;
340  }
341  if ( !ok )
342  token.type_ = tokenError;
343  token.end_ = current_;
344  return true;
345 }
346 
347 
348 void
349 Reader::skipSpaces()
350 {
351  while ( current_ != end_ )
352  {
353  Char c = *current_;
354  if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
355  ++current_;
356  else
357  break;
358  }
359 }
360 
361 
362 bool
363 Reader::match( Location pattern,
364  int patternLength )
365 {
366  if ( end_ - current_ < patternLength )
367  return false;
368  int index = patternLength;
369  while ( index-- )
370  if ( current_[index] != pattern[index] )
371  return false;
372  current_ += patternLength;
373  return true;
374 }
375 
376 
377 bool
378 Reader::readComment()
379 {
380  Location commentBegin = current_ - 1;
381  Char c = getNextChar();
382  bool successful = false;
383  if ( c == '*' )
384  successful = readCStyleComment();
385  else if ( c == '/' )
386  successful = readCppStyleComment();
387  if ( !successful )
388  return false;
389 
390  if ( collectComments_ )
391  {
392  CommentPlacement placement = commentBefore;
393  if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) )
394  {
395  if ( c != '*' || !containsNewLine( commentBegin, current_ ) )
396  placement = commentAfterOnSameLine;
397  }
398 
399  addComment( commentBegin, current_, placement );
400  }
401  return true;
402 }
403 
404 
405 void
406 Reader::addComment( Location begin,
407  Location end,
408  CommentPlacement placement )
409 {
410  assert( collectComments_ );
411  if ( placement == commentAfterOnSameLine )
412  {
413  assert( lastValue_ != 0 );
414  lastValue_->setComment( std::string( begin, end ), placement );
415  }
416  else
417  {
418  if ( !commentsBefore_.empty() )
419  commentsBefore_ += "\n";
420  commentsBefore_ += std::string( begin, end );
421  }
422 }
423 
424 
425 bool
426 Reader::readCStyleComment()
427 {
428  while ( current_ != end_ )
429  {
430  Char c = getNextChar();
431  if ( c == '*' && *current_ == '/' )
432  break;
433  }
434  return getNextChar() == '/';
435 }
436 
437 
438 bool
439 Reader::readCppStyleComment()
440 {
441  while ( current_ != end_ )
442  {
443  Char c = getNextChar();
444  if ( c == '\r' || c == '\n' )
445  break;
446  }
447  return true;
448 }
449 
450 
451 void
452 Reader::readNumber()
453 {
454  while ( current_ != end_ )
455  {
456  if ( !(*current_ >= '0' && *current_ <= '9') &&
457  !in( *current_, '.', 'e', 'E', '+', '-' ) )
458  break;
459  ++current_;
460  }
461 }
462 
463 bool
464 Reader::readString()
465 {
466  Char c = 0;
467  while ( current_ != end_ )
468  {
469  c = getNextChar();
470  if ( c == '\\' )
471  getNextChar();
472  else if ( c == '"' )
473  break;
474  }
475  return c == '"';
476 }
477 
478 
479 bool
480 Reader::readObject( Token &tokenStart )
481 {
482  Token tokenName;
483  std::string name;
484  currentValue() = Value( objectValue );
485  while ( readToken( tokenName ) )
486  {
487  bool initialTokenOk = true;
488  while ( tokenName.type_ == tokenComment && initialTokenOk )
489  initialTokenOk = readToken( tokenName );
490  if ( !initialTokenOk )
491  break;
492  if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object
493  return true;
494  if ( tokenName.type_ != tokenString )
495  break;
496 
497  name = "";
498  if ( !decodeString( tokenName, name ) )
499  return recoverFromError( tokenObjectEnd );
500 
501  Token colon;
502  if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
503  {
504  return addErrorAndRecover( "Missing ':' after object member name",
505  colon,
506  tokenObjectEnd );
507  }
508  Value &value = currentValue()[ name ];
509  nodes_.push( &value );
510  bool ok = readValue();
511  nodes_.pop();
512  if ( !ok ) // error already set
513  return recoverFromError( tokenObjectEnd );
514 
515  Token comma;
516  if ( !readToken( comma )
517  || ( comma.type_ != tokenObjectEnd &&
518  comma.type_ != tokenArraySeparator &&
519  comma.type_ != tokenComment ) )
520  {
521  return addErrorAndRecover( "Missing ',' or '}' in object declaration",
522  comma,
523  tokenObjectEnd );
524  }
525  bool finalizeTokenOk = true;
526  while ( comma.type_ == tokenComment &&
527  finalizeTokenOk )
528  finalizeTokenOk = readToken( comma );
529  if ( comma.type_ == tokenObjectEnd )
530  return true;
531  }
532  return addErrorAndRecover( "Missing '}' or object member name",
533  tokenName,
534  tokenObjectEnd );
535 }
536 
537 
538 bool
539 Reader::readArray( Token &tokenStart )
540 {
541  currentValue() = Value( arrayValue );
542  skipSpaces();
543  if ( *current_ == ']' ) // empty array
544  {
545  Token endArray;
546  readToken( endArray );
547  return true;
548  }
549  int index = 0;
550  while ( true )
551  {
552  Value &value = currentValue()[ index++ ];
553  nodes_.push( &value );
554  bool ok = readValue();
555  nodes_.pop();
556  if ( !ok ) // error already set
557  return recoverFromError( tokenArrayEnd );
558 
559  Token token;
560  // Accept Comment after last item in the array.
561  ok = readToken( token );
562  while ( token.type_ == tokenComment && ok )
563  {
564  ok = readToken( token );
565  }
566  bool badTokenType = ( token.type_ == tokenArraySeparator &&
567  token.type_ == tokenArrayEnd );
568  if ( !ok || badTokenType )
569  {
570  return addErrorAndRecover( "Missing ',' or ']' in array declaration",
571  token,
572  tokenArrayEnd );
573  }
574  if ( token.type_ == tokenArrayEnd )
575  break;
576  }
577  return true;
578 }
579 
580 
581 bool
582 Reader::decodeNumber( Token &token )
583 {
584  bool isDouble = false;
585  for ( Location inspect = token.start_; inspect != token.end_; ++inspect )
586  {
587  isDouble = isDouble
588  || in( *inspect, '.', 'e', 'E', '+' )
589  || ( *inspect == '-' && inspect != token.start_ );
590  }
591  if ( isDouble )
592  return decodeDouble( token );
593  Location current = token.start_;
594  bool isNegative = *current == '-';
595  if ( isNegative )
596  ++current;
597  Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt)
598  : Value::maxUInt) / 10;
599  Value::UInt value = 0;
600  while ( current < token.end_ )
601  {
602  Char c = *current++;
603  if ( c < '0' || c > '9' )
604  return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
605  if ( value >= threshold )
606  return decodeDouble( token );
607  value = value * 10 + Value::UInt(c - '0');
608  }
609  if ( isNegative )
610  currentValue() = -Value::Int( value );
611  else if ( value <= Value::UInt(Value::maxInt) )
612  currentValue() = Value::Int( value );
613  else
614  currentValue() = value;
615  return true;
616 }
617 
618 
619 bool
620 Reader::decodeDouble( Token &token )
621 {
622  double value = 0;
623  const int bufferSize = 32;
624  int count;
625  int length = int(token.end_ - token.start_);
626  if ( length <= bufferSize )
627  {
628  Char buffer[bufferSize];
629  memcpy( buffer, token.start_, length );
630  buffer[length] = 0;
631  count = sscanf( buffer, "%lf", &value );
632  }
633  else
634  {
635  std::string buffer( token.start_, token.end_ );
636  count = sscanf( buffer.c_str(), "%lf", &value );
637  }
638 
639  if ( count != 1 )
640  return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token );
641  currentValue() = value;
642  return true;
643 }
644 
645 
646 bool
647 Reader::decodeString( Token &token )
648 {
649  std::string decoded;
650  if ( !decodeString( token, decoded ) )
651  return false;
652  currentValue() = decoded;
653  return true;
654 }
655 
656 
657 bool
658 Reader::decodeString( Token &token, std::string &decoded )
659 {
660  decoded.reserve( token.end_ - token.start_ - 2 );
661  Location current = token.start_ + 1; // skip '"'
662  Location end = token.end_ - 1; // do not include '"'
663  while ( current != end )
664  {
665  Char c = *current++;
666  if ( c == '"' )
667  break;
668  else if ( c == '\\' )
669  {
670  if ( current == end )
671  return addError( "Empty escape sequence in string", token, current );
672  Char escape = *current++;
673  switch ( escape )
674  {
675  case '"': decoded += '"'; break;
676  case '/': decoded += '/'; break;
677  case '\\': decoded += '\\'; break;
678  case 'b': decoded += '\b'; break;
679  case 'f': decoded += '\f'; break;
680  case 'n': decoded += '\n'; break;
681  case 'r': decoded += '\r'; break;
682  case 't': decoded += '\t'; break;
683  case 'u':
684  {
685  unsigned int unicode;
686  if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
687  return false;
688  decoded += codePointToUTF8(unicode);
689  }
690  break;
691  default:
692  return addError( "Bad escape sequence in string", token, current );
693  }
694  }
695  else
696  {
697  decoded += c;
698  }
699  }
700  return true;
701 }
702 
703 bool
704 Reader::decodeUnicodeCodePoint( Token &token,
705  Location &current,
706  Location end,
707  unsigned int &unicode )
708 {
709 
710  if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
711  return false;
712  if (unicode >= 0xD800 && unicode <= 0xDBFF)
713  {
714  // surrogate pairs
715  if (end - current < 6)
716  return addError( "additional six characters expected to parse unicode surrogate pair.", token, current );
717  unsigned int surrogatePair;
718  if (*(current++) == '\\' && *(current++)== 'u')
719  {
720  if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
721  {
722  unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
723  }
724  else
725  return false;
726  }
727  else
728  return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
729  }
730  return true;
731 }
732 
733 bool
734 Reader::decodeUnicodeEscapeSequence( Token &token,
735  Location &current,
736  Location end,
737  unsigned int &unicode )
738 {
739  if ( end - current < 4 )
740  return addError( "Bad unicode escape sequence in string: four digits expected.", token, current );
741  unicode = 0;
742  for ( int index =0; index < 4; ++index )
743  {
744  Char c = *current++;
745  unicode *= 16;
746  if ( c >= '0' && c <= '9' )
747  unicode += c - '0';
748  else if ( c >= 'a' && c <= 'f' )
749  unicode += c - 'a' + 10;
750  else if ( c >= 'A' && c <= 'F' )
751  unicode += c - 'A' + 10;
752  else
753  return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
754  }
755  return true;
756 }
757 
758 
759 bool
760 Reader::addError( const std::string &message,
761  Token &token,
762  Location extra )
763 {
764  ErrorInfo info;
765  info.token_ = token;
766  info.message_ = message;
767  info.extra_ = extra;
768  errors_.push_back( info );
769  return false;
770 }
771 
772 
773 bool
774 Reader::recoverFromError( TokenType skipUntilToken )
775 {
776  int errorCount = int(errors_.size());
777  Token skip;
778  while ( true )
779  {
780  if ( !readToken(skip) )
781  errors_.resize( errorCount ); // discard errors caused by recovery
782  if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
783  break;
784  }
785  errors_.resize( errorCount );
786  return false;
787 }
788 
789 
790 bool
791 Reader::addErrorAndRecover( const std::string &message,
792  Token &token,
793  TokenType skipUntilToken )
794 {
795  addError( message, token );
796  return recoverFromError( skipUntilToken );
797 }
798 
799 
800 Value &
801 Reader::currentValue()
802 {
803  return *(nodes_.top());
804 }
805 
806 
808 Reader::getNextChar()
809 {
810  if ( current_ == end_ )
811  return 0;
812  return *current_++;
813 }
814 
815 
816 void
817 Reader::getLocationLineAndColumn( Location location,
818  int &line,
819  int &column ) const
820 {
821  Location current = begin_;
822  Location lastLineStart = current;
823  line = 0;
824  while ( current < location && current != end_ )
825  {
826  Char c = *current++;
827  if ( c == '\r' )
828  {
829  if ( *current == '\n' )
830  ++current;
831  lastLineStart = current;
832  ++line;
833  }
834  else if ( c == '\n' )
835  {
836  lastLineStart = current;
837  ++line;
838  }
839  }
840  // column & line start at 1
841  column = int(location - lastLineStart) + 1;
842  ++line;
843 }
844 
845 
846 std::string
847 Reader::getLocationLineAndColumn( Location location ) const
848 {
849  int line, column;
850  getLocationLineAndColumn( location, line, column );
851  char buffer[18+16+16+1];
852  sprintf( buffer, "Line %d, Column %d", line, column );
853  return buffer;
854 }
855 
856 
857 std::string
859 {
860  std::string formattedMessage;
861  for ( Errors::const_iterator itError = errors_.begin();
862  itError != errors_.end();
863  ++itError )
864  {
865  const ErrorInfo &error = *itError;
866  formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n";
867  formattedMessage += " " + error.message_ + "\n";
868  if ( error.extra_ )
869  formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n";
870  }
871  return formattedMessage;
872 }
873 
874 
875 std::istream& operator>>( std::istream &sin, Value &root )
876 {
877  Json::Reader reader;
878  bool ok = reader.parse(sin, root, true);
879  //JSON_ASSERT( ok );
880  if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages());
881  return sin;
882 }
883 
884 
885 } // namespace Json

SourceForge Logo hosts this site. Send comments to:
Json-cpp Developers