10 #if _MSC_VER >= 1400 // VC++ 8.0
11 #pragma warning( disable : 4996 ) // disable warning about strdup being deprecated.
20 : allowComments_( true )
21 , strictRoot_( false )
49 return c == c1 || c == c2 || c == c3 || c == c4;
55 return c == c1 || c == c2 || c == c3 || c == c4 || c == c5;
63 for ( ;begin < end; ++begin )
64 if ( *begin ==
'\n' || *begin ==
'\r' )
78 result[0] =
static_cast<char>(cp);
83 result[1] =
static_cast<char>(0x80 | (0x3f & cp));
84 result[0] =
static_cast<char>(0xC0 | (0x1f & (cp >> 6)));
86 else if (cp <= 0xFFFF)
89 result[2] =
static_cast<char>(0x80 | (0x3f & cp));
90 result[1] = 0x80 |
static_cast<char>((0x3f & (cp >> 6)));
91 result[0] = 0xE0 |
static_cast<char>((0xf & (cp >> 12)));
93 else if (cp <= 0x10FFFF)
96 result[3] =
static_cast<char>(0x80 | (0x3f & cp));
97 result[2] =
static_cast<char>(0x80 | (0x3f & (cp >> 6)));
98 result[1] =
static_cast<char>(0x80 | (0x3f & (cp >> 12)));
99 result[0] =
static_cast<char>(0xF0 | (0x7 & (cp >> 18)));
116 : features_( features )
124 bool collectComments )
126 document_ = document;
127 const char *begin = document_.c_str();
128 const char *end = begin + document_.length();
129 return parse( begin, end, root, collectComments );
136 bool collectComments )
146 std::getline(sin, doc, (
char)EOF);
147 return parse( doc, root, collectComments );
153 bool collectComments )
157 collectComments =
false;
162 collectComments_ = collectComments;
166 commentsBefore_ =
"";
168 while ( !nodes_.empty() )
170 nodes_.push( &root );
172 bool successful = readValue();
174 skipCommentTokens( token );
175 if ( collectComments_ && !commentsBefore_.empty() )
182 token.type_ = tokenError;
183 token.start_ = beginDoc;
185 addError(
"A valid JSON document must be either an array or an object value.",
198 skipCommentTokens( token );
199 bool successful =
true;
201 if ( collectComments_ && !commentsBefore_.empty() )
204 commentsBefore_ =
"";
208 switch ( token.type_ )
210 case tokenObjectBegin:
211 successful = readObject( token );
213 case tokenArrayBegin:
214 successful = readArray( token );
217 successful = decodeNumber( token );
220 successful = decodeString( token );
223 currentValue() =
true;
226 currentValue() =
false;
229 currentValue() = Value();
232 return addError(
"Syntax error: value, object or array expected.", token );
235 if ( collectComments_ )
237 lastValueEnd_ = current_;
238 lastValue_ = ¤tValue();
246 Reader::skipCommentTokens( Token &token )
254 while ( token.type_ == tokenComment );
264 Reader::expectToken( TokenType type, Token &token,
const char *message )
267 if ( token.type_ != type )
268 return addError( message, token );
274 Reader::readToken( Token &token )
277 token.start_ = current_;
278 Char c = getNextChar();
283 token.type_ = tokenObjectBegin;
286 token.type_ = tokenObjectEnd;
289 token.type_ = tokenArrayBegin;
292 token.type_ = tokenArrayEnd;
295 token.type_ = tokenString;
299 token.type_ = tokenComment;
313 token.type_ = tokenNumber;
317 token.type_ = tokenTrue;
318 ok = match(
"rue", 3 );
321 token.type_ = tokenFalse;
322 ok = match(
"alse", 4 );
325 token.type_ = tokenNull;
326 ok = match(
"ull", 3 );
329 token.type_ = tokenArraySeparator;
332 token.type_ = tokenMemberSeparator;
335 token.type_ = tokenEndOfStream;
342 token.type_ = tokenError;
343 token.end_ = current_;
351 while ( current_ != end_ )
354 if ( c ==
' ' || c ==
'\t' || c ==
'\r' || c ==
'\n' )
363 Reader::match( Location pattern,
366 if ( end_ - current_ < patternLength )
368 int index = patternLength;
370 if ( current_[index] != pattern[index] )
372 current_ += patternLength;
378 Reader::readComment()
380 Location commentBegin = current_ - 1;
381 Char c = getNextChar();
382 bool successful =
false;
384 successful = readCStyleComment();
386 successful = readCppStyleComment();
390 if ( collectComments_ )
393 if ( lastValueEnd_ && !
containsNewLine( lastValueEnd_, commentBegin ) )
399 addComment( commentBegin, current_, placement );
406 Reader::addComment( Location begin,
410 assert( collectComments_ );
413 assert( lastValue_ != 0 );
414 lastValue_->
setComment( std::string( begin, end ), placement );
418 if ( !commentsBefore_.empty() )
419 commentsBefore_ +=
"\n";
420 commentsBefore_ += std::string( begin, end );
426 Reader::readCStyleComment()
428 while ( current_ != end_ )
430 Char c = getNextChar();
431 if ( c ==
'*' && *current_ ==
'/' )
434 return getNextChar() ==
'/';
439 Reader::readCppStyleComment()
441 while ( current_ != end_ )
443 Char c = getNextChar();
444 if ( c ==
'\r' || c ==
'\n' )
454 while ( current_ != end_ )
456 if ( !(*current_ >=
'0' && *current_ <=
'9') &&
457 !
in( *current_,
'.',
'e',
'E',
'+',
'-' ) )
467 while ( current_ != end_ )
480 Reader::readObject( Token &tokenStart )
485 while ( readToken( tokenName ) )
487 bool initialTokenOk =
true;
488 while ( tokenName.type_ == tokenComment && initialTokenOk )
489 initialTokenOk = readToken( tokenName );
490 if ( !initialTokenOk )
492 if ( tokenName.type_ == tokenObjectEnd && name.empty() )
494 if ( tokenName.type_ != tokenString )
498 if ( !decodeString( tokenName, name ) )
499 return recoverFromError( tokenObjectEnd );
502 if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator )
504 return addErrorAndRecover(
"Missing ':' after object member name",
508 Value &value = currentValue()[ name ];
509 nodes_.push( &value );
510 bool ok = readValue();
513 return recoverFromError( tokenObjectEnd );
516 if ( !readToken( comma )
517 || ( comma.type_ != tokenObjectEnd &&
518 comma.type_ != tokenArraySeparator &&
519 comma.type_ != tokenComment ) )
521 return addErrorAndRecover(
"Missing ',' or '}' in object declaration",
525 bool finalizeTokenOk =
true;
526 while ( comma.type_ == tokenComment &&
528 finalizeTokenOk = readToken( comma );
529 if ( comma.type_ == tokenObjectEnd )
532 return addErrorAndRecover(
"Missing '}' or object member name",
539 Reader::readArray( Token &tokenStart )
543 if ( *current_ ==
']' )
546 readToken( endArray );
552 Value &value = currentValue()[ index++ ];
553 nodes_.push( &value );
554 bool ok = readValue();
557 return recoverFromError( tokenArrayEnd );
561 ok = readToken( token );
562 while ( token.type_ == tokenComment && ok )
564 ok = readToken( token );
566 bool badTokenType = ( token.type_ == tokenArraySeparator &&
567 token.type_ == tokenArrayEnd );
568 if ( !ok || badTokenType )
570 return addErrorAndRecover(
"Missing ',' or ']' in array declaration",
574 if ( token.type_ == tokenArrayEnd )
582 Reader::decodeNumber( Token &token )
584 bool isDouble =
false;
585 for (
Location inspect = token.start_; inspect != token.end_; ++inspect )
588 ||
in( *inspect,
'.',
'e',
'E',
'+' )
589 || ( *inspect ==
'-' && inspect != token.start_ );
592 return decodeDouble( token );
594 bool isNegative = *current ==
'-';
598 : Value::maxUInt) / 10;
599 Value::UInt value = 0;
600 while ( current < token.end_ )
603 if ( c < '0' || c >
'9' )
604 return addError(
"'" + std::string( token.start_, token.end_ ) +
"' is not a number.", token );
605 if ( value >= threshold )
606 return decodeDouble( token );
614 currentValue() = value;
620 Reader::decodeDouble( Token &token )
623 const int bufferSize = 32;
625 int length = int(token.end_ - token.start_);
626 if ( length <= bufferSize )
628 Char buffer[bufferSize];
629 memcpy( buffer, token.start_, length );
631 count = sscanf( buffer,
"%lf", &value );
635 std::string buffer( token.start_, token.end_ );
636 count = sscanf( buffer.c_str(),
"%lf", &value );
640 return addError(
"'" + std::string( token.start_, token.end_ ) +
"' is not a number.", token );
641 currentValue() = value;
647 Reader::decodeString( Token &token )
650 if ( !decodeString( token, decoded ) )
652 currentValue() = decoded;
658 Reader::decodeString( Token &token, std::string &decoded )
660 decoded.reserve( token.end_ - token.start_ - 2 );
661 Location current = token.start_ + 1;
663 while ( current != end )
668 else if ( c ==
'\\' )
670 if ( current == end )
671 return addError(
"Empty escape sequence in string", token, current );
672 Char escape = *current++;
675 case '"': decoded +=
'"';
break;
676 case '/': decoded +=
'/';
break;
677 case '\\': decoded +=
'\\';
break;
678 case 'b': decoded +=
'\b';
break;
679 case 'f': decoded +=
'\f';
break;
680 case 'n': decoded +=
'\n';
break;
681 case 'r': decoded +=
'\r';
break;
682 case 't': decoded +=
'\t';
break;
685 unsigned int unicode;
686 if ( !decodeUnicodeCodePoint( token, current, end, unicode ) )
692 return addError(
"Bad escape sequence in string", token, current );
704 Reader::decodeUnicodeCodePoint( Token &token,
707 unsigned int &unicode )
710 if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) )
712 if (unicode >= 0xD800 && unicode <= 0xDBFF)
715 if (end - current < 6)
716 return addError(
"additional six characters expected to parse unicode surrogate pair.", token, current );
717 unsigned int surrogatePair;
718 if (*(current++) ==
'\\' && *(current++)==
'u')
720 if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair ))
722 unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
728 return addError(
"expecting another \\u token to begin the second half of a unicode surrogate pair", token, current );
734 Reader::decodeUnicodeEscapeSequence( Token &token,
737 unsigned int &unicode )
739 if ( end - current < 4 )
740 return addError(
"Bad unicode escape sequence in string: four digits expected.", token, current );
742 for (
int index =0; index < 4; ++index )
746 if ( c >=
'0' && c <=
'9' )
748 else if ( c >=
'a' && c <=
'f' )
749 unicode += c -
'a' + 10;
750 else if ( c >=
'A' && c <=
'F' )
751 unicode += c -
'A' + 10;
753 return addError(
"Bad unicode escape sequence in string: hexadecimal digit expected.", token, current );
760 Reader::addError(
const std::string &message,
766 info.message_ = message;
768 errors_.push_back( info );
774 Reader::recoverFromError( TokenType skipUntilToken )
776 int errorCount = int(errors_.size());
780 if ( !readToken(skip) )
781 errors_.resize( errorCount );
782 if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream )
785 errors_.resize( errorCount );
791 Reader::addErrorAndRecover(
const std::string &message,
793 TokenType skipUntilToken )
795 addError( message, token );
796 return recoverFromError( skipUntilToken );
801 Reader::currentValue()
803 return *(nodes_.top());
808 Reader::getNextChar()
810 if ( current_ == end_ )
817 Reader::getLocationLineAndColumn( Location location,
824 while ( current < location && current != end_ )
829 if ( *current ==
'\n' )
831 lastLineStart = current;
834 else if ( c ==
'\n' )
836 lastLineStart = current;
841 column = int(location - lastLineStart) + 1;
847 Reader::getLocationLineAndColumn( Location location )
const
850 getLocationLineAndColumn( location, line, column );
851 char buffer[18+16+16+1];
852 sprintf( buffer,
"Line %d, Column %d", line, column );
860 std::string formattedMessage;
861 for ( Errors::const_iterator itError = errors_.begin();
862 itError != errors_.end();
865 const ErrorInfo &error = *itError;
866 formattedMessage +=
"* " + getLocationLineAndColumn( error.token_.start_ ) +
"\n";
867 formattedMessage +=
" " + error.message_ +
"\n";
869 formattedMessage +=
"See " + getLocationLineAndColumn( error.extra_ ) +
" for detail.\n";
871 return formattedMessage;
878 bool ok = reader.
parse(sin, root,
true);