$extrastylesheet
JsonCpp project page Classes Namespace JsonCpp home page

src/lib_json/json_reader.cpp
Go to the documentation of this file.
00001 // Copyright 2007-2011 Baptiste Lepilleur and The JsonCpp Authors
00002 // Copyright (C) 2016 InfoTeCS JSC. All rights reserved.
00003 // Distributed under MIT license, or public domain if desired and
00004 // recognized in your jurisdiction.
00005 // See file LICENSE for detail or copy at http://jsoncpp.sourceforge.net/LICENSE
00006 
00007 #if !defined(JSON_IS_AMALGAMATION)
00008 #include <json/assertions.h>
00009 #include <json/reader.h>
00010 #include <json/value.h>
00011 #include "json_tool.h"
00012 #endif // if !defined(JSON_IS_AMALGAMATION)
00013 #include <utility>
00014 #include <cstdio>
00015 #include <cassert>
00016 #include <cstring>
00017 #include <istream>
00018 #include <sstream>
00019 #include <memory>
00020 #include <set>
00021 #include <limits>
00022 
00023 #if defined(_MSC_VER)
00024 #if !defined(WINCE) && defined(__STDC_SECURE_LIB__) && _MSC_VER >= 1500 // VC++ 9.0 and above 
00025 #define snprintf sprintf_s
00026 #elif _MSC_VER >= 1900 // VC++ 14.0 and above
00027 #define snprintf std::snprintf
00028 #else
00029 #define snprintf _snprintf
00030 #endif
00031 #elif defined(__ANDROID__) || defined(__QNXNTO__)
00032 #define snprintf snprintf
00033 #elif __cplusplus >= 201103L
00034 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
00035 #define snprintf std::snprintf
00036 #endif
00037 #endif
00038 
00039 #if defined(__QNXNTO__)
00040 #define sscanf std::sscanf
00041 #endif
00042 
00043 #if defined(_MSC_VER) && _MSC_VER >= 1400 // VC++ 8.0
00044 // Disable warning about strdup being deprecated.
00045 #pragma warning(disable : 4996)
00046 #endif
00047 
00048 // Define JSONCPP_DEPRECATED_STACK_LIMIT as an appropriate integer at compile time to change the stack limit
00049 #if !defined(JSONCPP_DEPRECATED_STACK_LIMIT)
00050 #define JSONCPP_DEPRECATED_STACK_LIMIT 1000
00051 #endif
00052 
00053 static size_t const stackLimit_g = JSONCPP_DEPRECATED_STACK_LIMIT; // see readValue()
00054 
00055 namespace Json {
00056 
00057 #if __cplusplus >= 201103L || (defined(_CPPLIB_VER) && _CPPLIB_VER >= 520)
00058 typedef std::unique_ptr<CharReader> CharReaderPtr;
00059 #else
00060 typedef std::auto_ptr<CharReader>   CharReaderPtr;
00061 #endif
00062 
00063 // Implementation of class Features
00064 // ////////////////////////////////
00065 
00066 Features::Features()
00067     : allowComments_(true), strictRoot_(false),
00068       allowDroppedNullPlaceholders_(false), allowNumericKeys_(false) {}
00069 
00070 Features Features::all() { return Features(); }
00071 
00072 Features Features::strictMode() {
00073   Features features;
00074   features.allowComments_ = false;
00075   features.strictRoot_ = true;
00076   features.allowDroppedNullPlaceholders_ = false;
00077   features.allowNumericKeys_ = false;
00078   return features;
00079 }
00080 
00081 // Implementation of class Reader
00082 // ////////////////////////////////
00083 
00084 bool Reader::containsNewLine(Reader::Location begin, Reader::Location end) {
00085   for (; begin < end; ++begin)
00086     if (*begin == '\n' || *begin == '\r')
00087       return true;
00088   return false;
00089 }
00090 
00091 // Class Reader
00092 // //////////////////////////////////////////////////////////////////
00093 
00094 Reader::Reader()
00095     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00096       lastValue_(), commentsBefore_(), features_(Features::all()),
00097       collectComments_() {}
00098 
00099 Reader::Reader(const Features& features)
00100     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
00101       lastValue_(), commentsBefore_(), features_(features), collectComments_() {
00102 }
00103 
00104 bool
00105 Reader::parse(const std::string& document, Value& root, bool collectComments) {
00106   document_.assign(document.begin(), document.end());
00107   const char* begin = document_.c_str();
00108   const char* end = begin + document_.length();
00109   return parse(begin, end, root, collectComments);
00110 }
00111 
00112 bool Reader::parse(std::istream& sin, Value& root, bool collectComments) {
00113   // std::istream_iterator<char> begin(sin);
00114   // std::istream_iterator<char> end;
00115   // Those would allow streamed input from a file, if parse() were a
00116   // template function.
00117 
00118   // Since JSONCPP_STRING is reference-counted, this at least does not
00119   // create an extra copy.
00120   JSONCPP_STRING doc;
00121   std::getline(sin, doc, (char)EOF);
00122   return parse(doc.data(), doc.data() + doc.size(), root, collectComments);
00123 }
00124 
00125 bool Reader::parse(const char* beginDoc,
00126                    const char* endDoc,
00127                    Value& root,
00128                    bool collectComments) {
00129   if (!features_.allowComments_) {
00130     collectComments = false;
00131   }
00132 
00133   begin_ = beginDoc;
00134   end_ = endDoc;
00135   collectComments_ = collectComments;
00136   current_ = begin_;
00137   lastValueEnd_ = 0;
00138   lastValue_ = 0;
00139   commentsBefore_.clear();
00140   errors_.clear();
00141   while (!nodes_.empty())
00142     nodes_.pop();
00143   nodes_.push(&root);
00144 
00145   bool successful = readValue();
00146   Token token;
00147   skipCommentTokens(token);
00148   if (collectComments_ && !commentsBefore_.empty())
00149     root.setComment(commentsBefore_, commentAfter);
00150   if (features_.strictRoot_) {
00151     if (!root.isArray() && !root.isObject()) {
00152       // Set error location to start of doc, ideally should be first token found
00153       // in doc
00154       token.type_ = tokenError;
00155       token.start_ = beginDoc;
00156       token.end_ = endDoc;
00157       addError(
00158           "A valid JSON document must be either an array or an object value.",
00159           token);
00160       return false;
00161     }
00162   }
00163   return successful;
00164 }
00165 
00166 bool Reader::readValue() {
00167   // readValue() may call itself only if it calls readObject() or ReadArray().
00168   // These methods execute nodes_.push() just before and nodes_.pop)() just after calling readValue(). 
00169   // parse() executes one nodes_.push(), so > instead of >=.
00170   if (nodes_.size() > stackLimit_g) throwRuntimeError("Exceeded stackLimit in readValue().");
00171 
00172   Token token;
00173   skipCommentTokens(token);
00174   bool successful = true;
00175 
00176   if (collectComments_ && !commentsBefore_.empty()) {
00177     currentValue().setComment(commentsBefore_, commentBefore);
00178     commentsBefore_.clear();
00179   }
00180 
00181   switch (token.type_) {
00182   case tokenObjectBegin:
00183     successful = readObject(token);
00184     currentValue().setOffsetLimit(current_ - begin_);
00185     break;
00186   case tokenArrayBegin:
00187     successful = readArray(token);
00188     currentValue().setOffsetLimit(current_ - begin_);
00189     break;
00190   case tokenNumber:
00191     successful = decodeNumber(token);
00192     break;
00193   case tokenString:
00194     successful = decodeString(token);
00195     break;
00196   case tokenTrue:
00197     {
00198     Value v(true);
00199     currentValue().swapPayload(v);
00200     currentValue().setOffsetStart(token.start_ - begin_);
00201     currentValue().setOffsetLimit(token.end_ - begin_);
00202     }
00203     break;
00204   case tokenFalse:
00205     {
00206     Value v(false);
00207     currentValue().swapPayload(v);
00208     currentValue().setOffsetStart(token.start_ - begin_);
00209     currentValue().setOffsetLimit(token.end_ - begin_);
00210     }
00211     break;
00212   case tokenNull:
00213     {
00214     Value v;
00215     currentValue().swapPayload(v);
00216     currentValue().setOffsetStart(token.start_ - begin_);
00217     currentValue().setOffsetLimit(token.end_ - begin_);
00218     }
00219     break;
00220   case tokenArraySeparator:
00221   case tokenObjectEnd:
00222   case tokenArrayEnd:
00223     if (features_.allowDroppedNullPlaceholders_) {
00224       // "Un-read" the current token and mark the current value as a null
00225       // token.
00226       current_--;
00227       Value v;
00228       currentValue().swapPayload(v);
00229       currentValue().setOffsetStart(current_ - begin_ - 1);
00230       currentValue().setOffsetLimit(current_ - begin_);
00231       break;
00232     } // Else, fall through...
00233   default:
00234     currentValue().setOffsetStart(token.start_ - begin_);
00235     currentValue().setOffsetLimit(token.end_ - begin_);
00236     return addError("Syntax error: value, object or array expected.", token);
00237   }
00238 
00239   if (collectComments_) {
00240     lastValueEnd_ = current_;
00241     lastValue_ = &currentValue();
00242   }
00243 
00244   return successful;
00245 }
00246 
00247 void Reader::skipCommentTokens(Token& token) {
00248   if (features_.allowComments_) {
00249     do {
00250       readToken(token);
00251     } while (token.type_ == tokenComment);
00252   } else {
00253     readToken(token);
00254   }
00255 }
00256 
00257 bool Reader::readToken(Token& token) {
00258   skipSpaces();
00259   token.start_ = current_;
00260   Char c = getNextChar();
00261   bool ok = true;
00262   switch (c) {
00263   case '{':
00264     token.type_ = tokenObjectBegin;
00265     break;
00266   case '}':
00267     token.type_ = tokenObjectEnd;
00268     break;
00269   case '[':
00270     token.type_ = tokenArrayBegin;
00271     break;
00272   case ']':
00273     token.type_ = tokenArrayEnd;
00274     break;
00275   case '"':
00276     token.type_ = tokenString;
00277     ok = readString();
00278     break;
00279   case '/':
00280     token.type_ = tokenComment;
00281     ok = readComment();
00282     break;
00283   case '0':
00284   case '1':
00285   case '2':
00286   case '3':
00287   case '4':
00288   case '5':
00289   case '6':
00290   case '7':
00291   case '8':
00292   case '9':
00293   case '-':
00294     token.type_ = tokenNumber;
00295     readNumber();
00296     break;
00297   case 't':
00298     token.type_ = tokenTrue;
00299     ok = match("rue", 3);
00300     break;
00301   case 'f':
00302     token.type_ = tokenFalse;
00303     ok = match("alse", 4);
00304     break;
00305   case 'n':
00306     token.type_ = tokenNull;
00307     ok = match("ull", 3);
00308     break;
00309   case ',':
00310     token.type_ = tokenArraySeparator;
00311     break;
00312   case ':':
00313     token.type_ = tokenMemberSeparator;
00314     break;
00315   case 0:
00316     token.type_ = tokenEndOfStream;
00317     break;
00318   default:
00319     ok = false;
00320     break;
00321   }
00322   if (!ok)
00323     token.type_ = tokenError;
00324   token.end_ = current_;
00325   return true;
00326 }
00327 
00328 void Reader::skipSpaces() {
00329   while (current_ != end_) {
00330     Char c = *current_;
00331     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
00332       ++current_;
00333     else
00334       break;
00335   }
00336 }
00337 
00338 bool Reader::match(Location pattern, int patternLength) {
00339   if (end_ - current_ < patternLength)
00340     return false;
00341   int index = patternLength;
00342   while (index--)
00343     if (current_[index] != pattern[index])
00344       return false;
00345   current_ += patternLength;
00346   return true;
00347 }
00348 
00349 bool Reader::readComment() {
00350   Location commentBegin = current_ - 1;
00351   Char c = getNextChar();
00352   bool successful = false;
00353   if (c == '*')
00354     successful = readCStyleComment();
00355   else if (c == '/')
00356     successful = readCppStyleComment();
00357   if (!successful)
00358     return false;
00359 
00360   if (collectComments_) {
00361     CommentPlacement placement = commentBefore;
00362     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
00363       if (c != '*' || !containsNewLine(commentBegin, current_))
00364         placement = commentAfterOnSameLine;
00365     }
00366 
00367     addComment(commentBegin, current_, placement);
00368   }
00369   return true;
00370 }
00371 
00372 JSONCPP_STRING Reader::normalizeEOL(Reader::Location begin, Reader::Location end) {
00373   JSONCPP_STRING normalized;
00374   normalized.reserve(static_cast<size_t>(end - begin));
00375   Reader::Location current = begin;
00376   while (current != end) {
00377     char c = *current++;
00378     if (c == '\r') {
00379       if (current != end && *current == '\n')
00380          // convert dos EOL
00381          ++current;
00382       // convert Mac EOL
00383       normalized += '\n';
00384     } else {
00385       normalized += c;
00386     }
00387   }
00388   return normalized;
00389 }
00390 
00391 void
00392 Reader::addComment(Location begin, Location end, CommentPlacement placement) {
00393   assert(collectComments_);
00394   const JSONCPP_STRING& normalized = normalizeEOL(begin, end);
00395   if (placement == commentAfterOnSameLine) {
00396     assert(lastValue_ != 0);
00397     lastValue_->setComment(normalized, placement);
00398   } else {
00399     commentsBefore_ += normalized;
00400   }
00401 }
00402 
00403 bool Reader::readCStyleComment() {
00404   while ((current_ + 1) < end_) {
00405     Char c = getNextChar();
00406     if (c == '*' && *current_ == '/')
00407       break;
00408   }
00409   return getNextChar() == '/';
00410 }
00411 
00412 bool Reader::readCppStyleComment() {
00413   while (current_ != end_) {
00414     Char c = getNextChar();
00415     if (c == '\n')
00416       break;
00417     if (c == '\r') {
00418       // Consume DOS EOL. It will be normalized in addComment.
00419       if (current_ != end_ && *current_ == '\n')
00420         getNextChar();
00421       // Break on Moc OS 9 EOL.
00422       break;
00423     }
00424   }
00425   return true;
00426 }
00427 
00428 void Reader::readNumber() {
00429   const char *p = current_;
00430   char c = '0'; // stopgap for already consumed character
00431   // integral part
00432   while (c >= '0' && c <= '9')
00433     c = (current_ = p) < end_ ? *p++ : '\0';
00434   // fractional part
00435   if (c == '.') {
00436     c = (current_ = p) < end_ ? *p++ : '\0';
00437     while (c >= '0' && c <= '9')
00438       c = (current_ = p) < end_ ? *p++ : '\0';
00439   }
00440   // exponential part
00441   if (c == 'e' || c == 'E') {
00442     c = (current_ = p) < end_ ? *p++ : '\0';
00443     if (c == '+' || c == '-')
00444       c = (current_ = p) < end_ ? *p++ : '\0';
00445     while (c >= '0' && c <= '9')
00446       c = (current_ = p) < end_ ? *p++ : '\0';
00447   }
00448 }
00449 
00450 bool Reader::readString() {
00451   Char c = '\0';
00452   while (current_ != end_) {
00453     c = getNextChar();
00454     if (c == '\\')
00455       getNextChar();
00456     else if (c == '"')
00457       break;
00458   }
00459   return c == '"';
00460 }
00461 
00462 bool Reader::readObject(Token& tokenStart) {
00463   Token tokenName;
00464   JSONCPP_STRING name;
00465   Value init(objectValue);
00466   currentValue().swapPayload(init);
00467   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00468   while (readToken(tokenName)) {
00469     bool initialTokenOk = true;
00470     while (tokenName.type_ == tokenComment && initialTokenOk)
00471       initialTokenOk = readToken(tokenName);
00472     if (!initialTokenOk)
00473       break;
00474     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
00475       return true;
00476     name.clear();
00477     if (tokenName.type_ == tokenString) {
00478       if (!decodeString(tokenName, name))
00479         return recoverFromError(tokenObjectEnd);
00480     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
00481       Value numberName;
00482       if (!decodeNumber(tokenName, numberName))
00483         return recoverFromError(tokenObjectEnd);
00484       name = JSONCPP_STRING(numberName.asCString());
00485     } else {
00486       break;
00487     }
00488 
00489     Token colon;
00490     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
00491       return addErrorAndRecover(
00492           "Missing ':' after object member name", colon, tokenObjectEnd);
00493     }
00494     Value& value = currentValue()[name];
00495     nodes_.push(&value);
00496     bool ok = readValue();
00497     nodes_.pop();
00498     if (!ok) // error already set
00499       return recoverFromError(tokenObjectEnd);
00500 
00501     Token comma;
00502     if (!readToken(comma) ||
00503         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
00504          comma.type_ != tokenComment)) {
00505       return addErrorAndRecover(
00506           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
00507     }
00508     bool finalizeTokenOk = true;
00509     while (comma.type_ == tokenComment && finalizeTokenOk)
00510       finalizeTokenOk = readToken(comma);
00511     if (comma.type_ == tokenObjectEnd)
00512       return true;
00513   }
00514   return addErrorAndRecover(
00515       "Missing '}' or object member name", tokenName, tokenObjectEnd);
00516 }
00517 
00518 bool Reader::readArray(Token& tokenStart) {
00519   Value init(arrayValue);
00520   currentValue().swapPayload(init);
00521   currentValue().setOffsetStart(tokenStart.start_ - begin_);
00522   skipSpaces();
00523   if (current_ != end_ && *current_ == ']') // empty array
00524   {
00525     Token endArray;
00526     readToken(endArray);
00527     return true;
00528   }
00529   int index = 0;
00530   for (;;) {
00531     Value& value = currentValue()[index++];
00532     nodes_.push(&value);
00533     bool ok = readValue();
00534     nodes_.pop();
00535     if (!ok) // error already set
00536       return recoverFromError(tokenArrayEnd);
00537 
00538     Token token;
00539     // Accept Comment after last item in the array.
00540     ok = readToken(token);
00541     while (token.type_ == tokenComment && ok) {
00542       ok = readToken(token);
00543     }
00544     bool badTokenType =
00545         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
00546     if (!ok || badTokenType) {
00547       return addErrorAndRecover(
00548           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
00549     }
00550     if (token.type_ == tokenArrayEnd)
00551       break;
00552   }
00553   return true;
00554 }
00555 
00556 bool Reader::decodeNumber(Token& token) {
00557   Value decoded;
00558   if (!decodeNumber(token, decoded))
00559     return false;
00560   currentValue().swapPayload(decoded);
00561   currentValue().setOffsetStart(token.start_ - begin_);
00562   currentValue().setOffsetLimit(token.end_ - begin_);
00563   return true;
00564 }
00565 
00566 bool Reader::decodeNumber(Token& token, Value& decoded) {
00567   // Attempts to parse the number as an integer. If the number is
00568   // larger than the maximum supported value of an integer then
00569   // we decode the number as a double.
00570   Location current = token.start_;
00571   bool isNegative = *current == '-';
00572   if (isNegative)
00573     ++current;
00574   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
00575   Value::LargestUInt maxIntegerValue =
00576       isNegative ? Value::LargestUInt(Value::maxLargestInt) + 1
00577                  : Value::maxLargestUInt;
00578   Value::LargestUInt threshold = maxIntegerValue / 10;
00579   Value::LargestUInt value = 0;
00580   while (current < token.end_) {
00581     Char c = *current++;
00582     if (c < '0' || c > '9')
00583       return decodeDouble(token, decoded);
00584     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
00585     if (value >= threshold) {
00586       // We've hit or exceeded the max value divided by 10 (rounded down). If
00587       // a) we've only just touched the limit, b) this is the last digit, and
00588       // c) it's small enough to fit in that rounding delta, we're okay.
00589       // Otherwise treat this number as a double to avoid overflow.
00590       if (value > threshold || current != token.end_ ||
00591           digit > maxIntegerValue % 10) {
00592         return decodeDouble(token, decoded);
00593       }
00594     }
00595     value = value * 10 + digit;
00596   }
00597   if (isNegative && value == maxIntegerValue)
00598     decoded = Value::minLargestInt;
00599   else if (isNegative)
00600     decoded = -Value::LargestInt(value);
00601   else if (value <= Value::LargestUInt(Value::maxInt))
00602     decoded = Value::LargestInt(value);
00603   else
00604     decoded = value;
00605   return true;
00606 }
00607 
00608 bool Reader::decodeDouble(Token& token) {
00609   Value decoded;
00610   if (!decodeDouble(token, decoded))
00611     return false;
00612   currentValue().swapPayload(decoded);
00613   currentValue().setOffsetStart(token.start_ - begin_);
00614   currentValue().setOffsetLimit(token.end_ - begin_);
00615   return true;
00616 }
00617 
00618 bool Reader::decodeDouble(Token& token, Value& decoded) {
00619   double value = 0;
00620   JSONCPP_STRING buffer(token.start_, token.end_);
00621   JSONCPP_ISTRINGSTREAM is(buffer);
00622   if (!(is >> value))
00623     return addError("'" + JSONCPP_STRING(token.start_, token.end_) +
00624                         "' is not a number.",
00625                     token);
00626   decoded = value;
00627   return true;
00628 }
00629 
00630 bool Reader::decodeString(Token& token) {
00631   JSONCPP_STRING decoded_string;
00632   if (!decodeString(token, decoded_string))
00633     return false;
00634   Value decoded(decoded_string);
00635   currentValue().swapPayload(decoded);
00636   currentValue().setOffsetStart(token.start_ - begin_);
00637   currentValue().setOffsetLimit(token.end_ - begin_);
00638   return true;
00639 }
00640 
00641 bool Reader::decodeString(Token& token, JSONCPP_STRING& decoded) {
00642   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
00643   Location current = token.start_ + 1; // skip '"'
00644   Location end = token.end_ - 1;       // do not include '"'
00645   while (current != end) {
00646     Char c = *current++;
00647     if (c == '"')
00648       break;
00649     else if (c == '\\') {
00650       if (current == end)
00651         return addError("Empty escape sequence in string", token, current);
00652       Char escape = *current++;
00653       switch (escape) {
00654       case '"':
00655         decoded += '"';
00656         break;
00657       case '/':
00658         decoded += '/';
00659         break;
00660       case '\\':
00661         decoded += '\\';
00662         break;
00663       case 'b':
00664         decoded += '\b';
00665         break;
00666       case 'f':
00667         decoded += '\f';
00668         break;
00669       case 'n':
00670         decoded += '\n';
00671         break;
00672       case 'r':
00673         decoded += '\r';
00674         break;
00675       case 't':
00676         decoded += '\t';
00677         break;
00678       case 'u': {
00679         unsigned int unicode;
00680         if (!decodeUnicodeCodePoint(token, current, end, unicode))
00681           return false;
00682         decoded += codePointToUTF8(unicode);
00683       } break;
00684       default:
00685         return addError("Bad escape sequence in string", token, current);
00686       }
00687     } else {
00688       decoded += c;
00689     }
00690   }
00691   return true;
00692 }
00693 
00694 bool Reader::decodeUnicodeCodePoint(Token& token,
00695                                     Location& current,
00696                                     Location end,
00697                                     unsigned int& unicode) {
00698 
00699   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
00700     return false;
00701   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
00702     // surrogate pairs
00703     if (end - current < 6)
00704       return addError(
00705           "additional six characters expected to parse unicode surrogate pair.",
00706           token,
00707           current);
00708     unsigned int surrogatePair;
00709     if (*(current++) == '\\' && *(current++) == 'u') {
00710       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
00711         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
00712       } else
00713         return false;
00714     } else
00715       return addError("expecting another \\u token to begin the second half of "
00716                       "a unicode surrogate pair",
00717                       token,
00718                       current);
00719   }
00720   return true;
00721 }
00722 
00723 bool Reader::decodeUnicodeEscapeSequence(Token& token,
00724                                          Location& current,
00725                                          Location end,
00726                                          unsigned int& ret_unicode) {
00727   if (end - current < 4)
00728     return addError(
00729         "Bad unicode escape sequence in string: four digits expected.",
00730         token,
00731         current);
00732   int unicode = 0;
00733   for (int index = 0; index < 4; ++index) {
00734     Char c = *current++;
00735     unicode *= 16;
00736     if (c >= '0' && c <= '9')
00737       unicode += c - '0';
00738     else if (c >= 'a' && c <= 'f')
00739       unicode += c - 'a' + 10;
00740     else if (c >= 'A' && c <= 'F')
00741       unicode += c - 'A' + 10;
00742     else
00743       return addError(
00744           "Bad unicode escape sequence in string: hexadecimal digit expected.",
00745           token,
00746           current);
00747   }
00748   ret_unicode = static_cast<unsigned int>(unicode);
00749   return true;
00750 }
00751 
00752 bool
00753 Reader::addError(const JSONCPP_STRING& message, Token& token, Location extra) {
00754   ErrorInfo info;
00755   info.token_ = token;
00756   info.message_ = message;
00757   info.extra_ = extra;
00758   errors_.push_back(info);
00759   return false;
00760 }
00761 
00762 bool Reader::recoverFromError(TokenType skipUntilToken) {
00763   size_t const errorCount = errors_.size();
00764   Token skip;
00765   for (;;) {
00766     if (!readToken(skip))
00767       errors_.resize(errorCount); // discard errors caused by recovery
00768     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
00769       break;
00770   }
00771   errors_.resize(errorCount);
00772   return false;
00773 }
00774 
00775 bool Reader::addErrorAndRecover(const JSONCPP_STRING& message,
00776                                 Token& token,
00777                                 TokenType skipUntilToken) {
00778   addError(message, token);
00779   return recoverFromError(skipUntilToken);
00780 }
00781 
00782 Value& Reader::currentValue() { return *(nodes_.top()); }
00783 
00784 Reader::Char Reader::getNextChar() {
00785   if (current_ == end_)
00786     return 0;
00787   return *current_++;
00788 }
00789 
00790 void Reader::getLocationLineAndColumn(Location location,
00791                                       int& line,
00792                                       int& column) const {
00793   Location current = begin_;
00794   Location lastLineStart = current;
00795   line = 0;
00796   while (current < location && current != end_) {
00797     Char c = *current++;
00798     if (c == '\r') {
00799       if (*current == '\n')
00800         ++current;
00801       lastLineStart = current;
00802       ++line;
00803     } else if (c == '\n') {
00804       lastLineStart = current;
00805       ++line;
00806     }
00807   }
00808   // column & line start at 1
00809   column = int(location - lastLineStart) + 1;
00810   ++line;
00811 }
00812 
00813 JSONCPP_STRING Reader::getLocationLineAndColumn(Location location) const {
00814   int line, column;
00815   getLocationLineAndColumn(location, line, column);
00816   char buffer[18 + 16 + 16 + 1];
00817   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
00818   return buffer;
00819 }
00820 
00821 // Deprecated. Preserved for backward compatibility
00822 JSONCPP_STRING Reader::getFormatedErrorMessages() const {
00823   return getFormattedErrorMessages();
00824 }
00825 
00826 JSONCPP_STRING Reader::getFormattedErrorMessages() const {
00827   JSONCPP_STRING formattedMessage;
00828   for (Errors::const_iterator itError = errors_.begin();
00829        itError != errors_.end();
00830        ++itError) {
00831     const ErrorInfo& error = *itError;
00832     formattedMessage +=
00833         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
00834     formattedMessage += "  " + error.message_ + "\n";
00835     if (error.extra_)
00836       formattedMessage +=
00837           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
00838   }
00839   return formattedMessage;
00840 }
00841 
00842 std::vector<Reader::StructuredError> Reader::getStructuredErrors() const {
00843   std::vector<Reader::StructuredError> allErrors;
00844   for (Errors::const_iterator itError = errors_.begin();
00845        itError != errors_.end();
00846        ++itError) {
00847     const ErrorInfo& error = *itError;
00848     Reader::StructuredError structured;
00849     structured.offset_start = error.token_.start_ - begin_;
00850     structured.offset_limit = error.token_.end_ - begin_;
00851     structured.message = error.message_;
00852     allErrors.push_back(structured);
00853   }
00854   return allErrors;
00855 }
00856 
00857 bool Reader::pushError(const Value& value, const JSONCPP_STRING& message) {
00858   ptrdiff_t const length = end_ - begin_;
00859   if(value.getOffsetStart() > length
00860     || value.getOffsetLimit() > length)
00861     return false;
00862   Token token;
00863   token.type_ = tokenError;
00864   token.start_ = begin_ + value.getOffsetStart();
00865   token.end_ = end_ + value.getOffsetLimit();
00866   ErrorInfo info;
00867   info.token_ = token;
00868   info.message_ = message;
00869   info.extra_ = 0;
00870   errors_.push_back(info);
00871   return true;
00872 }
00873 
00874 bool Reader::pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra) {
00875   ptrdiff_t const length = end_ - begin_;
00876   if(value.getOffsetStart() > length
00877     || value.getOffsetLimit() > length
00878     || extra.getOffsetLimit() > length)
00879     return false;
00880   Token token;
00881   token.type_ = tokenError;
00882   token.start_ = begin_ + value.getOffsetStart();
00883   token.end_ = begin_ + value.getOffsetLimit();
00884   ErrorInfo info;
00885   info.token_ = token;
00886   info.message_ = message;
00887   info.extra_ = begin_ + extra.getOffsetStart();
00888   errors_.push_back(info);
00889   return true;
00890 }
00891 
00892 bool Reader::good() const {
00893   return !errors_.size();
00894 }
00895 
00896 // exact copy of Features
00897 class OurFeatures {
00898 public:
00899   static OurFeatures all();
00900   bool allowComments_;
00901   bool strictRoot_;
00902   bool allowDroppedNullPlaceholders_;
00903   bool allowNumericKeys_;
00904   bool allowSingleQuotes_;
00905   bool failIfExtra_;
00906   bool rejectDupKeys_;
00907   bool allowSpecialFloats_;
00908   int stackLimit_;
00909 };  // OurFeatures
00910 
00911 // exact copy of Implementation of class Features
00912 // ////////////////////////////////
00913 
00914 OurFeatures OurFeatures::all() { return OurFeatures(); }
00915 
00916 // Implementation of class Reader
00917 // ////////////////////////////////
00918 
00919 // exact copy of Reader, renamed to OurReader
00920 class OurReader {
00921 public:
00922   typedef char Char;
00923   typedef const Char* Location;
00924   struct StructuredError {
00925     ptrdiff_t offset_start;
00926     ptrdiff_t offset_limit;
00927     JSONCPP_STRING message;
00928   };
00929 
00930   OurReader(OurFeatures const& features);
00931   bool parse(const char* beginDoc,
00932              const char* endDoc,
00933              Value& root,
00934              bool collectComments = true);
00935   JSONCPP_STRING getFormattedErrorMessages() const;
00936   std::vector<StructuredError> getStructuredErrors() const;
00937   bool pushError(const Value& value, const JSONCPP_STRING& message);
00938   bool pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra);
00939   bool good() const;
00940 
00941 private:
00942   OurReader(OurReader const&);  // no impl
00943   void operator=(OurReader const&);  // no impl
00944 
00945   enum TokenType {
00946     tokenEndOfStream = 0,
00947     tokenObjectBegin,
00948     tokenObjectEnd,
00949     tokenArrayBegin,
00950     tokenArrayEnd,
00951     tokenString,
00952     tokenNumber,
00953     tokenTrue,
00954     tokenFalse,
00955     tokenNull,
00956     tokenNaN,
00957     tokenPosInf,
00958     tokenNegInf,
00959     tokenArraySeparator,
00960     tokenMemberSeparator,
00961     tokenComment,
00962     tokenError
00963   };
00964 
00965   class Token {
00966   public:
00967     TokenType type_;
00968     Location start_;
00969     Location end_;
00970   };
00971 
00972   class ErrorInfo {
00973   public:
00974     Token token_;
00975     JSONCPP_STRING message_;
00976     Location extra_;
00977   };
00978 
00979   typedef std::deque<ErrorInfo> Errors;
00980 
00981   bool readToken(Token& token);
00982   void skipSpaces();
00983   bool match(Location pattern, int patternLength);
00984   bool readComment();
00985   bool readCStyleComment();
00986   bool readCppStyleComment();
00987   bool readString();
00988   bool readStringSingleQuote();
00989   bool readNumber(bool checkInf);
00990   bool readValue();
00991   bool readObject(Token& token);
00992   bool readArray(Token& token);
00993   bool decodeNumber(Token& token);
00994   bool decodeNumber(Token& token, Value& decoded);
00995   bool decodeString(Token& token);
00996   bool decodeString(Token& token, JSONCPP_STRING& decoded);
00997   bool decodeDouble(Token& token);
00998   bool decodeDouble(Token& token, Value& decoded);
00999   bool decodeUnicodeCodePoint(Token& token,
01000                               Location& current,
01001                               Location end,
01002                               unsigned int& unicode);
01003   bool decodeUnicodeEscapeSequence(Token& token,
01004                                    Location& current,
01005                                    Location end,
01006                                    unsigned int& unicode);
01007   bool addError(const JSONCPP_STRING& message, Token& token, Location extra = 0);
01008   bool recoverFromError(TokenType skipUntilToken);
01009   bool addErrorAndRecover(const JSONCPP_STRING& message,
01010                           Token& token,
01011                           TokenType skipUntilToken);
01012   void skipUntilSpace();
01013   Value& currentValue();
01014   Char getNextChar();
01015   void
01016   getLocationLineAndColumn(Location location, int& line, int& column) const;
01017   JSONCPP_STRING getLocationLineAndColumn(Location location) const;
01018   void addComment(Location begin, Location end, CommentPlacement placement);
01019   void skipCommentTokens(Token& token);
01020 
01021   static JSONCPP_STRING normalizeEOL(Location begin, Location end);
01022   static bool containsNewLine(Location begin, Location end);
01023 
01024   typedef std::stack<Value*> Nodes;
01025   Nodes nodes_;
01026   Errors errors_;
01027   JSONCPP_STRING document_;
01028   Location begin_;
01029   Location end_;
01030   Location current_;
01031   Location lastValueEnd_;
01032   Value* lastValue_;
01033   JSONCPP_STRING commentsBefore_;
01034 
01035   OurFeatures const features_;
01036   bool collectComments_;
01037 };  // OurReader
01038 
01039 // complete copy of Read impl, for OurReader
01040 
01041 bool OurReader::containsNewLine(OurReader::Location begin, OurReader::Location end) {
01042   for (; begin < end; ++begin)
01043     if (*begin == '\n' || *begin == '\r')
01044       return true;
01045   return false;
01046 }
01047 
01048 OurReader::OurReader(OurFeatures const& features)
01049     : errors_(), document_(), begin_(), end_(), current_(), lastValueEnd_(),
01050       lastValue_(), commentsBefore_(),
01051       features_(features), collectComments_() {
01052 }
01053 
01054 bool OurReader::parse(const char* beginDoc,
01055                    const char* endDoc,
01056                    Value& root,
01057                    bool collectComments) {
01058   if (!features_.allowComments_) {
01059     collectComments = false;
01060   }
01061 
01062   begin_ = beginDoc;
01063   end_ = endDoc;
01064   collectComments_ = collectComments;
01065   current_ = begin_;
01066   lastValueEnd_ = 0;
01067   lastValue_ = 0;
01068   commentsBefore_.clear();
01069   errors_.clear();
01070   while (!nodes_.empty())
01071     nodes_.pop();
01072   nodes_.push(&root);
01073 
01074   bool successful = readValue();
01075   Token token;
01076   skipCommentTokens(token);
01077   if (features_.failIfExtra_) {
01078     if ((features_.strictRoot_ || token.type_ != tokenError) && token.type_ != tokenEndOfStream) {
01079       addError("Extra non-whitespace after JSON value.", token);
01080       return false;
01081     }
01082   }
01083   if (collectComments_ && !commentsBefore_.empty())
01084     root.setComment(commentsBefore_, commentAfter);
01085   if (features_.strictRoot_) {
01086     if (!root.isArray() && !root.isObject()) {
01087       // Set error location to start of doc, ideally should be first token found
01088       // in doc
01089       token.type_ = tokenError;
01090       token.start_ = beginDoc;
01091       token.end_ = endDoc;
01092       addError(
01093           "A valid JSON document must be either an array or an object value.",
01094           token);
01095       return false;
01096     }
01097   }
01098   return successful;
01099 }
01100 
01101 bool OurReader::readValue() {
01102   //  To preserve the old behaviour we cast size_t to int.
01103   if (static_cast<int>(nodes_.size()) > features_.stackLimit_) throwRuntimeError("Exceeded stackLimit in readValue().");
01104   Token token;
01105   skipCommentTokens(token);
01106   bool successful = true;
01107 
01108   if (collectComments_ && !commentsBefore_.empty()) {
01109     currentValue().setComment(commentsBefore_, commentBefore);
01110     commentsBefore_.clear();
01111   }
01112 
01113   switch (token.type_) {
01114   case tokenObjectBegin:
01115     successful = readObject(token);
01116     currentValue().setOffsetLimit(current_ - begin_);
01117     break;
01118   case tokenArrayBegin:
01119     successful = readArray(token);
01120     currentValue().setOffsetLimit(current_ - begin_);
01121     break;
01122   case tokenNumber:
01123     successful = decodeNumber(token);
01124     break;
01125   case tokenString:
01126     successful = decodeString(token);
01127     break;
01128   case tokenTrue:
01129     {
01130     Value v(true);
01131     currentValue().swapPayload(v);
01132     currentValue().setOffsetStart(token.start_ - begin_);
01133     currentValue().setOffsetLimit(token.end_ - begin_);
01134     }
01135     break;
01136   case tokenFalse:
01137     {
01138     Value v(false);
01139     currentValue().swapPayload(v);
01140     currentValue().setOffsetStart(token.start_ - begin_);
01141     currentValue().setOffsetLimit(token.end_ - begin_);
01142     }
01143     break;
01144   case tokenNull:
01145     {
01146     Value v;
01147     currentValue().swapPayload(v);
01148     currentValue().setOffsetStart(token.start_ - begin_);
01149     currentValue().setOffsetLimit(token.end_ - begin_);
01150     }
01151     break;
01152   case tokenNaN:
01153     {
01154     Value v(std::numeric_limits<double>::quiet_NaN());
01155     currentValue().swapPayload(v);
01156     currentValue().setOffsetStart(token.start_ - begin_);
01157     currentValue().setOffsetLimit(token.end_ - begin_);
01158     }
01159     break;
01160   case tokenPosInf:
01161     {
01162     Value v(std::numeric_limits<double>::infinity());
01163     currentValue().swapPayload(v);
01164     currentValue().setOffsetStart(token.start_ - begin_);
01165     currentValue().setOffsetLimit(token.end_ - begin_);
01166     }
01167     break;
01168   case tokenNegInf:
01169     {
01170     Value v(-std::numeric_limits<double>::infinity());
01171     currentValue().swapPayload(v);
01172     currentValue().setOffsetStart(token.start_ - begin_);
01173     currentValue().setOffsetLimit(token.end_ - begin_);
01174     }
01175     break;
01176   case tokenArraySeparator:
01177   case tokenObjectEnd:
01178   case tokenArrayEnd:
01179     if (features_.allowDroppedNullPlaceholders_) {
01180       // "Un-read" the current token and mark the current value as a null
01181       // token.
01182       current_--;
01183       Value v;
01184       currentValue().swapPayload(v);
01185       currentValue().setOffsetStart(current_ - begin_ - 1);
01186       currentValue().setOffsetLimit(current_ - begin_);
01187       break;
01188     } // else, fall through ...
01189   default:
01190     currentValue().setOffsetStart(token.start_ - begin_);
01191     currentValue().setOffsetLimit(token.end_ - begin_);
01192     return addError("Syntax error: value, object or array expected.", token);
01193   }
01194 
01195   if (collectComments_) {
01196     lastValueEnd_ = current_;
01197     lastValue_ = &currentValue();
01198   }
01199 
01200   return successful;
01201 }
01202 
01203 void OurReader::skipCommentTokens(Token& token) {
01204   if (features_.allowComments_) {
01205     do {
01206       readToken(token);
01207     } while (token.type_ == tokenComment);
01208   } else {
01209     readToken(token);
01210   }
01211 }
01212 
01213 bool OurReader::readToken(Token& token) {
01214   skipSpaces();
01215   token.start_ = current_;
01216   Char c = getNextChar();
01217   bool ok = true;
01218   switch (c) {
01219   case '{':
01220     token.type_ = tokenObjectBegin;
01221     break;
01222   case '}':
01223     token.type_ = tokenObjectEnd;
01224     break;
01225   case '[':
01226     token.type_ = tokenArrayBegin;
01227     break;
01228   case ']':
01229     token.type_ = tokenArrayEnd;
01230     break;
01231   case '"':
01232     token.type_ = tokenString;
01233     ok = readString();
01234     break;
01235   case '\'':
01236     if (features_.allowSingleQuotes_) {
01237     token.type_ = tokenString;
01238     ok = readStringSingleQuote();
01239     break;
01240     } // else fall through
01241   case '/':
01242     token.type_ = tokenComment;
01243     ok = readComment();
01244     break;
01245   case '0':
01246   case '1':
01247   case '2':
01248   case '3':
01249   case '4':
01250   case '5':
01251   case '6':
01252   case '7':
01253   case '8':
01254   case '9':
01255     token.type_ = tokenNumber;
01256     readNumber(false);
01257     break;
01258   case '-':
01259     if (readNumber(true)) {
01260       token.type_ = tokenNumber;
01261     } else {
01262       token.type_ = tokenNegInf;
01263       ok = features_.allowSpecialFloats_ && match("nfinity", 7);
01264     }
01265     break;
01266   case 't':
01267     token.type_ = tokenTrue;
01268     ok = match("rue", 3);
01269     break;
01270   case 'f':
01271     token.type_ = tokenFalse;
01272     ok = match("alse", 4);
01273     break;
01274   case 'n':
01275     token.type_ = tokenNull;
01276     ok = match("ull", 3);
01277     break;
01278   case 'N':
01279     if (features_.allowSpecialFloats_) {
01280       token.type_ = tokenNaN;
01281       ok = match("aN", 2);
01282     } else {
01283       ok = false;
01284     }
01285     break;
01286   case 'I':
01287     if (features_.allowSpecialFloats_) {
01288       token.type_ = tokenPosInf;
01289       ok = match("nfinity", 7);
01290     } else {
01291       ok = false;
01292     }
01293     break;
01294   case ',':
01295     token.type_ = tokenArraySeparator;
01296     break;
01297   case ':':
01298     token.type_ = tokenMemberSeparator;
01299     break;
01300   case 0:
01301     token.type_ = tokenEndOfStream;
01302     break;
01303   default:
01304     ok = false;
01305     break;
01306   }
01307   if (!ok)
01308     token.type_ = tokenError;
01309   token.end_ = current_;
01310   return true;
01311 }
01312 
01313 void OurReader::skipSpaces() {
01314   while (current_ != end_) {
01315     Char c = *current_;
01316     if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
01317       ++current_;
01318     else
01319       break;
01320   }
01321 }
01322 
01323 bool OurReader::match(Location pattern, int patternLength) {
01324   if (end_ - current_ < patternLength)
01325     return false;
01326   int index = patternLength;
01327   while (index--)
01328     if (current_[index] != pattern[index])
01329       return false;
01330   current_ += patternLength;
01331   return true;
01332 }
01333 
01334 bool OurReader::readComment() {
01335   Location commentBegin = current_ - 1;
01336   Char c = getNextChar();
01337   bool successful = false;
01338   if (c == '*')
01339     successful = readCStyleComment();
01340   else if (c == '/')
01341     successful = readCppStyleComment();
01342   if (!successful)
01343     return false;
01344 
01345   if (collectComments_) {
01346     CommentPlacement placement = commentBefore;
01347     if (lastValueEnd_ && !containsNewLine(lastValueEnd_, commentBegin)) {
01348       if (c != '*' || !containsNewLine(commentBegin, current_))
01349         placement = commentAfterOnSameLine;
01350     }
01351 
01352     addComment(commentBegin, current_, placement);
01353   }
01354   return true;
01355 }
01356 
01357 JSONCPP_STRING OurReader::normalizeEOL(OurReader::Location begin, OurReader::Location end) {
01358   JSONCPP_STRING normalized;
01359   normalized.reserve(static_cast<size_t>(end - begin));
01360   OurReader::Location current = begin;
01361   while (current != end) {
01362     char c = *current++;
01363     if (c == '\r') {
01364       if (current != end && *current == '\n')
01365          // convert dos EOL
01366          ++current;
01367       // convert Mac EOL
01368       normalized += '\n';
01369     } else {
01370       normalized += c;
01371     }
01372   }
01373   return normalized;
01374 }
01375 
01376 void
01377 OurReader::addComment(Location begin, Location end, CommentPlacement placement) {
01378   assert(collectComments_);
01379   const JSONCPP_STRING& normalized = normalizeEOL(begin, end);
01380   if (placement == commentAfterOnSameLine) {
01381     assert(lastValue_ != 0);
01382     lastValue_->setComment(normalized, placement);
01383   } else {
01384     commentsBefore_ += normalized;
01385   }
01386 }
01387 
01388 bool OurReader::readCStyleComment() {
01389   while ((current_ + 1) < end_) {
01390     Char c = getNextChar();
01391     if (c == '*' && *current_ == '/')
01392       break;
01393   }
01394   return getNextChar() == '/';
01395 }
01396 
01397 bool OurReader::readCppStyleComment() {
01398   while (current_ != end_) {
01399     Char c = getNextChar();
01400     if (c == '\n')
01401       break;
01402     if (c == '\r') {
01403       // Consume DOS EOL. It will be normalized in addComment.
01404       if (current_ != end_ && *current_ == '\n')
01405         getNextChar();
01406       // Break on Moc OS 9 EOL.
01407       break;
01408     }
01409   }
01410   return true;
01411 }
01412 
01413 bool OurReader::readNumber(bool checkInf) {
01414   const char *p = current_;
01415   if (checkInf && p != end_ && *p == 'I') {
01416     current_ = ++p;
01417     return false;
01418   }
01419   char c = '0'; // stopgap for already consumed character
01420   // integral part
01421   while (c >= '0' && c <= '9')
01422     c = (current_ = p) < end_ ? *p++ : '\0';
01423   // fractional part
01424   if (c == '.') {
01425     c = (current_ = p) < end_ ? *p++ : '\0';
01426     while (c >= '0' && c <= '9')
01427       c = (current_ = p) < end_ ? *p++ : '\0';
01428   }
01429   // exponential part
01430   if (c == 'e' || c == 'E') {
01431     c = (current_ = p) < end_ ? *p++ : '\0';
01432     if (c == '+' || c == '-')
01433       c = (current_ = p) < end_ ? *p++ : '\0';
01434     while (c >= '0' && c <= '9')
01435       c = (current_ = p) < end_ ? *p++ : '\0';
01436   }
01437   return true;
01438 }
01439 bool OurReader::readString() {
01440   Char c = 0;
01441   while (current_ != end_) {
01442     c = getNextChar();
01443     if (c == '\\')
01444       getNextChar();
01445     else if (c == '"')
01446       break;
01447   }
01448   return c == '"';
01449 }
01450 
01451 
01452 bool OurReader::readStringSingleQuote() {
01453   Char c = 0;
01454   while (current_ != end_) {
01455     c = getNextChar();
01456     if (c == '\\')
01457       getNextChar();
01458     else if (c == '\'')
01459       break;
01460   }
01461   return c == '\'';
01462 }
01463 
01464 bool OurReader::readObject(Token& tokenStart) {
01465   Token tokenName;
01466   JSONCPP_STRING name;
01467   Value init(objectValue);
01468   currentValue().swapPayload(init);
01469   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01470   while (readToken(tokenName)) {
01471     bool initialTokenOk = true;
01472     while (tokenName.type_ == tokenComment && initialTokenOk)
01473       initialTokenOk = readToken(tokenName);
01474     if (!initialTokenOk)
01475       break;
01476     if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
01477       return true;
01478     name.clear();
01479     if (tokenName.type_ == tokenString) {
01480       if (!decodeString(tokenName, name))
01481         return recoverFromError(tokenObjectEnd);
01482     } else if (tokenName.type_ == tokenNumber && features_.allowNumericKeys_) {
01483       Value numberName;
01484       if (!decodeNumber(tokenName, numberName))
01485         return recoverFromError(tokenObjectEnd);
01486       name = numberName.asString();
01487     } else {
01488       break;
01489     }
01490 
01491     Token colon;
01492     if (!readToken(colon) || colon.type_ != tokenMemberSeparator) {
01493       return addErrorAndRecover(
01494           "Missing ':' after object member name", colon, tokenObjectEnd);
01495     }
01496     if (name.length() >= (1U<<30)) throwRuntimeError("keylength >= 2^30");
01497     if (features_.rejectDupKeys_ && currentValue().isMember(name)) {
01498       JSONCPP_STRING msg = "Duplicate key: '" + name + "'";
01499       return addErrorAndRecover(
01500           msg, tokenName, tokenObjectEnd);
01501     }
01502     Value& value = currentValue()[name];
01503     nodes_.push(&value);
01504     bool ok = readValue();
01505     nodes_.pop();
01506     if (!ok) // error already set
01507       return recoverFromError(tokenObjectEnd);
01508 
01509     Token comma;
01510     if (!readToken(comma) ||
01511         (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator &&
01512          comma.type_ != tokenComment)) {
01513       return addErrorAndRecover(
01514           "Missing ',' or '}' in object declaration", comma, tokenObjectEnd);
01515     }
01516     bool finalizeTokenOk = true;
01517     while (comma.type_ == tokenComment && finalizeTokenOk)
01518       finalizeTokenOk = readToken(comma);
01519     if (comma.type_ == tokenObjectEnd)
01520       return true;
01521   }
01522   return addErrorAndRecover(
01523       "Missing '}' or object member name", tokenName, tokenObjectEnd);
01524 }
01525 
01526 bool OurReader::readArray(Token& tokenStart) {
01527   Value init(arrayValue);
01528   currentValue().swapPayload(init);
01529   currentValue().setOffsetStart(tokenStart.start_ - begin_);
01530   skipSpaces();
01531   if (current_ != end_ && *current_ == ']') // empty array
01532   {
01533     Token endArray;
01534     readToken(endArray);
01535     return true;
01536   }
01537   int index = 0;
01538   for (;;) {
01539     Value& value = currentValue()[index++];
01540     nodes_.push(&value);
01541     bool ok = readValue();
01542     nodes_.pop();
01543     if (!ok) // error already set
01544       return recoverFromError(tokenArrayEnd);
01545 
01546     Token token;
01547     // Accept Comment after last item in the array.
01548     ok = readToken(token);
01549     while (token.type_ == tokenComment && ok) {
01550       ok = readToken(token);
01551     }
01552     bool badTokenType =
01553         (token.type_ != tokenArraySeparator && token.type_ != tokenArrayEnd);
01554     if (!ok || badTokenType) {
01555       return addErrorAndRecover(
01556           "Missing ',' or ']' in array declaration", token, tokenArrayEnd);
01557     }
01558     if (token.type_ == tokenArrayEnd)
01559       break;
01560   }
01561   return true;
01562 }
01563 
01564 bool OurReader::decodeNumber(Token& token) {
01565   Value decoded;
01566   if (!decodeNumber(token, decoded))
01567     return false;
01568   currentValue().swapPayload(decoded);
01569   currentValue().setOffsetStart(token.start_ - begin_);
01570   currentValue().setOffsetLimit(token.end_ - begin_);
01571   return true;
01572 }
01573 
01574 bool OurReader::decodeNumber(Token& token, Value& decoded) {
01575   // Attempts to parse the number as an integer. If the number is
01576   // larger than the maximum supported value of an integer then
01577   // we decode the number as a double.
01578   Location current = token.start_;
01579   bool isNegative = *current == '-';
01580   if (isNegative)
01581     ++current;
01582   // TODO: Help the compiler do the div and mod at compile time or get rid of them.
01583   Value::LargestUInt maxIntegerValue =
01584       isNegative ? Value::LargestUInt(-Value::minLargestInt)
01585                  : Value::maxLargestUInt;
01586   Value::LargestUInt threshold = maxIntegerValue / 10;
01587   Value::LargestUInt value = 0;
01588   while (current < token.end_) {
01589     Char c = *current++;
01590     if (c < '0' || c > '9')
01591       return decodeDouble(token, decoded);
01592     Value::UInt digit(static_cast<Value::UInt>(c - '0'));
01593     if (value >= threshold) {
01594       // We've hit or exceeded the max value divided by 10 (rounded down). If
01595       // a) we've only just touched the limit, b) this is the last digit, and
01596       // c) it's small enough to fit in that rounding delta, we're okay.
01597       // Otherwise treat this number as a double to avoid overflow.
01598       if (value > threshold || current != token.end_ ||
01599           digit > maxIntegerValue % 10) {
01600         return decodeDouble(token, decoded);
01601       }
01602     }
01603     value = value * 10 + digit;
01604   }
01605   if (isNegative)
01606     decoded = -Value::LargestInt(value);
01607   else if (value <= Value::LargestUInt(Value::maxInt))
01608     decoded = Value::LargestInt(value);
01609   else
01610     decoded = value;
01611   return true;
01612 }
01613 
01614 bool OurReader::decodeDouble(Token& token) {
01615   Value decoded;
01616   if (!decodeDouble(token, decoded))
01617     return false;
01618   currentValue().swapPayload(decoded);
01619   currentValue().setOffsetStart(token.start_ - begin_);
01620   currentValue().setOffsetLimit(token.end_ - begin_);
01621   return true;
01622 }
01623 
01624 bool OurReader::decodeDouble(Token& token, Value& decoded) {
01625   double value = 0;
01626   const int bufferSize = 32;
01627   int count;
01628   ptrdiff_t const length = token.end_ - token.start_;
01629 
01630   // Sanity check to avoid buffer overflow exploits.
01631   if (length < 0) {
01632     return addError("Unable to parse token length", token);
01633   }
01634   size_t const ulength = static_cast<size_t>(length);
01635 
01636   // Avoid using a string constant for the format control string given to
01637   // sscanf, as this can cause hard to debug crashes on OS X. See here for more
01638   // info:
01639   //
01640   //     http://developer.apple.com/library/mac/#DOCUMENTATION/DeveloperTools/gcc-4.0.1/gcc/Incompatibilities.html
01641   char format[] = "%lf";
01642 
01643   if (length <= bufferSize) {
01644     Char buffer[bufferSize + 1];
01645     memcpy(buffer, token.start_, ulength);
01646     buffer[length] = 0;
01647     fixNumericLocaleInput(buffer, buffer + length);
01648     count = sscanf(buffer, format, &value);
01649   } else {
01650     JSONCPP_STRING buffer(token.start_, token.end_);
01651     count = sscanf(buffer.c_str(), format, &value);
01652   }
01653 
01654   if (count != 1)
01655     return addError("'" + JSONCPP_STRING(token.start_, token.end_) +
01656                         "' is not a number.",
01657                     token);
01658   decoded = value;
01659   return true;
01660 }
01661 
01662 bool OurReader::decodeString(Token& token) {
01663   JSONCPP_STRING decoded_string;
01664   if (!decodeString(token, decoded_string))
01665     return false;
01666   Value decoded(decoded_string);
01667   currentValue().swapPayload(decoded);
01668   currentValue().setOffsetStart(token.start_ - begin_);
01669   currentValue().setOffsetLimit(token.end_ - begin_);
01670   return true;
01671 }
01672 
01673 bool OurReader::decodeString(Token& token, JSONCPP_STRING& decoded) {
01674   decoded.reserve(static_cast<size_t>(token.end_ - token.start_ - 2));
01675   Location current = token.start_ + 1; // skip '"'
01676   Location end = token.end_ - 1;       // do not include '"'
01677   while (current != end) {
01678     Char c = *current++;
01679     if (c == '"')
01680       break;
01681     else if (c == '\\') {
01682       if (current == end)
01683         return addError("Empty escape sequence in string", token, current);
01684       Char escape = *current++;
01685       switch (escape) {
01686       case '"':
01687         decoded += '"';
01688         break;
01689       case '/':
01690         decoded += '/';
01691         break;
01692       case '\\':
01693         decoded += '\\';
01694         break;
01695       case 'b':
01696         decoded += '\b';
01697         break;
01698       case 'f':
01699         decoded += '\f';
01700         break;
01701       case 'n':
01702         decoded += '\n';
01703         break;
01704       case 'r':
01705         decoded += '\r';
01706         break;
01707       case 't':
01708         decoded += '\t';
01709         break;
01710       case 'u': {
01711         unsigned int unicode;
01712         if (!decodeUnicodeCodePoint(token, current, end, unicode))
01713           return false;
01714         decoded += codePointToUTF8(unicode);
01715       } break;
01716       default:
01717         return addError("Bad escape sequence in string", token, current);
01718       }
01719     } else {
01720       decoded += c;
01721     }
01722   }
01723   return true;
01724 }
01725 
01726 bool OurReader::decodeUnicodeCodePoint(Token& token,
01727                                     Location& current,
01728                                     Location end,
01729                                     unsigned int& unicode) {
01730 
01731   if (!decodeUnicodeEscapeSequence(token, current, end, unicode))
01732     return false;
01733   if (unicode >= 0xD800 && unicode <= 0xDBFF) {
01734     // surrogate pairs
01735     if (end - current < 6)
01736       return addError(
01737           "additional six characters expected to parse unicode surrogate pair.",
01738           token,
01739           current);
01740     unsigned int surrogatePair;
01741     if (*(current++) == '\\' && *(current++) == 'u') {
01742       if (decodeUnicodeEscapeSequence(token, current, end, surrogatePair)) {
01743         unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF);
01744       } else
01745         return false;
01746     } else
01747       return addError("expecting another \\u token to begin the second half of "
01748                       "a unicode surrogate pair",
01749                       token,
01750                       current);
01751   }
01752   return true;
01753 }
01754 
01755 bool OurReader::decodeUnicodeEscapeSequence(Token& token,
01756                                          Location& current,
01757                                          Location end,
01758                                          unsigned int& ret_unicode) {
01759   if (end - current < 4)
01760     return addError(
01761         "Bad unicode escape sequence in string: four digits expected.",
01762         token,
01763         current);
01764   int unicode = 0;
01765   for (int index = 0; index < 4; ++index) {
01766     Char c = *current++;
01767     unicode *= 16;
01768     if (c >= '0' && c <= '9')
01769       unicode += c - '0';
01770     else if (c >= 'a' && c <= 'f')
01771       unicode += c - 'a' + 10;
01772     else if (c >= 'A' && c <= 'F')
01773       unicode += c - 'A' + 10;
01774     else
01775       return addError(
01776           "Bad unicode escape sequence in string: hexadecimal digit expected.",
01777           token,
01778           current);
01779   }
01780   ret_unicode = static_cast<unsigned int>(unicode);
01781   return true;
01782 }
01783 
01784 bool
01785 OurReader::addError(const JSONCPP_STRING& message, Token& token, Location extra) {
01786   ErrorInfo info;
01787   info.token_ = token;
01788   info.message_ = message;
01789   info.extra_ = extra;
01790   errors_.push_back(info);
01791   return false;
01792 }
01793 
01794 bool OurReader::recoverFromError(TokenType skipUntilToken) {
01795   size_t errorCount = errors_.size();
01796   Token skip;
01797   for (;;) {
01798     if (!readToken(skip))
01799       errors_.resize(errorCount); // discard errors caused by recovery
01800     if (skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream)
01801       break;
01802   }
01803   errors_.resize(errorCount);
01804   return false;
01805 }
01806 
01807 bool OurReader::addErrorAndRecover(const JSONCPP_STRING& message,
01808                                 Token& token,
01809                                 TokenType skipUntilToken) {
01810   addError(message, token);
01811   return recoverFromError(skipUntilToken);
01812 }
01813 
01814 Value& OurReader::currentValue() { return *(nodes_.top()); }
01815 
01816 OurReader::Char OurReader::getNextChar() {
01817   if (current_ == end_)
01818     return 0;
01819   return *current_++;
01820 }
01821 
01822 void OurReader::getLocationLineAndColumn(Location location,
01823                                       int& line,
01824                                       int& column) const {
01825   Location current = begin_;
01826   Location lastLineStart = current;
01827   line = 0;
01828   while (current < location && current != end_) {
01829     Char c = *current++;
01830     if (c == '\r') {
01831       if (*current == '\n')
01832         ++current;
01833       lastLineStart = current;
01834       ++line;
01835     } else if (c == '\n') {
01836       lastLineStart = current;
01837       ++line;
01838     }
01839   }
01840   // column & line start at 1
01841   column = int(location - lastLineStart) + 1;
01842   ++line;
01843 }
01844 
01845 JSONCPP_STRING OurReader::getLocationLineAndColumn(Location location) const {
01846   int line, column;
01847   getLocationLineAndColumn(location, line, column);
01848   char buffer[18 + 16 + 16 + 1];
01849   snprintf(buffer, sizeof(buffer), "Line %d, Column %d", line, column);
01850   return buffer;
01851 }
01852 
01853 JSONCPP_STRING OurReader::getFormattedErrorMessages() const {
01854   JSONCPP_STRING formattedMessage;
01855   for (Errors::const_iterator itError = errors_.begin();
01856        itError != errors_.end();
01857        ++itError) {
01858     const ErrorInfo& error = *itError;
01859     formattedMessage +=
01860         "* " + getLocationLineAndColumn(error.token_.start_) + "\n";
01861     formattedMessage += "  " + error.message_ + "\n";
01862     if (error.extra_)
01863       formattedMessage +=
01864           "See " + getLocationLineAndColumn(error.extra_) + " for detail.\n";
01865   }
01866   return formattedMessage;
01867 }
01868 
01869 std::vector<OurReader::StructuredError> OurReader::getStructuredErrors() const {
01870   std::vector<OurReader::StructuredError> allErrors;
01871   for (Errors::const_iterator itError = errors_.begin();
01872        itError != errors_.end();
01873        ++itError) {
01874     const ErrorInfo& error = *itError;
01875     OurReader::StructuredError structured;
01876     structured.offset_start = error.token_.start_ - begin_;
01877     structured.offset_limit = error.token_.end_ - begin_;
01878     structured.message = error.message_;
01879     allErrors.push_back(structured);
01880   }
01881   return allErrors;
01882 }
01883 
01884 bool OurReader::pushError(const Value& value, const JSONCPP_STRING& message) {
01885   ptrdiff_t length = end_ - begin_;
01886   if(value.getOffsetStart() > length
01887     || value.getOffsetLimit() > length)
01888     return false;
01889   Token token;
01890   token.type_ = tokenError;
01891   token.start_ = begin_ + value.getOffsetStart();
01892   token.end_ = end_ + value.getOffsetLimit();
01893   ErrorInfo info;
01894   info.token_ = token;
01895   info.message_ = message;
01896   info.extra_ = 0;
01897   errors_.push_back(info);
01898   return true;
01899 }
01900 
01901 bool OurReader::pushError(const Value& value, const JSONCPP_STRING& message, const Value& extra) {
01902   ptrdiff_t length = end_ - begin_;
01903   if(value.getOffsetStart() > length
01904     || value.getOffsetLimit() > length
01905     || extra.getOffsetLimit() > length)
01906     return false;
01907   Token token;
01908   token.type_ = tokenError;
01909   token.start_ = begin_ + value.getOffsetStart();
01910   token.end_ = begin_ + value.getOffsetLimit();
01911   ErrorInfo info;
01912   info.token_ = token;
01913   info.message_ = message;
01914   info.extra_ = begin_ + extra.getOffsetStart();
01915   errors_.push_back(info);
01916   return true;
01917 }
01918 
01919 bool OurReader::good() const {
01920   return !errors_.size();
01921 }
01922 
01923 
01924 class OurCharReader : public CharReader {
01925   bool const collectComments_;
01926   OurReader reader_;
01927 public:
01928   OurCharReader(
01929     bool collectComments,
01930     OurFeatures const& features)
01931   : collectComments_(collectComments)
01932   , reader_(features)
01933   {}
01934   bool parse(
01935       char const* beginDoc, char const* endDoc,
01936       Value* root, JSONCPP_STRING* errs) JSONCPP_OVERRIDE {
01937     bool ok = reader_.parse(beginDoc, endDoc, *root, collectComments_);
01938     if (errs) {
01939       *errs = reader_.getFormattedErrorMessages();
01940     }
01941     return ok;
01942   }
01943 };
01944 
01945 CharReaderBuilder::CharReaderBuilder()
01946 {
01947   setDefaults(&settings_);
01948 }
01949 CharReaderBuilder::~CharReaderBuilder()
01950 {}
01951 CharReader* CharReaderBuilder::newCharReader() const
01952 {
01953   bool collectComments = settings_["collectComments"].asBool();
01954   OurFeatures features = OurFeatures::all();
01955   features.allowComments_ = settings_["allowComments"].asBool();
01956   features.strictRoot_ = settings_["strictRoot"].asBool();
01957   features.allowDroppedNullPlaceholders_ = settings_["allowDroppedNullPlaceholders"].asBool();
01958   features.allowNumericKeys_ = settings_["allowNumericKeys"].asBool();
01959   features.allowSingleQuotes_ = settings_["allowSingleQuotes"].asBool();
01960   features.stackLimit_ = settings_["stackLimit"].asInt();
01961   features.failIfExtra_ = settings_["failIfExtra"].asBool();
01962   features.rejectDupKeys_ = settings_["rejectDupKeys"].asBool();
01963   features.allowSpecialFloats_ = settings_["allowSpecialFloats"].asBool();
01964   return new OurCharReader(collectComments, features);
01965 }
01966 static void getValidReaderKeys(std::set<JSONCPP_STRING>* valid_keys)
01967 {
01968   valid_keys->clear();
01969   valid_keys->insert("collectComments");
01970   valid_keys->insert("allowComments");
01971   valid_keys->insert("strictRoot");
01972   valid_keys->insert("allowDroppedNullPlaceholders");
01973   valid_keys->insert("allowNumericKeys");
01974   valid_keys->insert("allowSingleQuotes");
01975   valid_keys->insert("stackLimit");
01976   valid_keys->insert("failIfExtra");
01977   valid_keys->insert("rejectDupKeys");
01978   valid_keys->insert("allowSpecialFloats");
01979 }
01980 bool CharReaderBuilder::validate(Json::Value* invalid) const
01981 {
01982   Json::Value my_invalid;
01983   if (!invalid) invalid = &my_invalid;  // so we do not need to test for NULL
01984   Json::Value& inv = *invalid;
01985   std::set<JSONCPP_STRING> valid_keys;
01986   getValidReaderKeys(&valid_keys);
01987   Value::Members keys = settings_.getMemberNames();
01988   size_t n = keys.size();
01989   for (size_t i = 0; i < n; ++i) {
01990     JSONCPP_STRING const& key = keys[i];
01991     if (valid_keys.find(key) == valid_keys.end()) {
01992       inv[key] = settings_[key];
01993     }
01994   }
01995   return 0u == inv.size();
01996 }
01997 Value& CharReaderBuilder::operator[](JSONCPP_STRING key)
01998 {
01999   return settings_[key];
02000 }
02001 // static
02002 void CharReaderBuilder::strictMode(Json::Value* settings)
02003 {
02005   (*settings)["allowComments"] = false;
02006   (*settings)["strictRoot"] = true;
02007   (*settings)["allowDroppedNullPlaceholders"] = false;
02008   (*settings)["allowNumericKeys"] = false;
02009   (*settings)["allowSingleQuotes"] = false;
02010   (*settings)["stackLimit"] = 1000;
02011   (*settings)["failIfExtra"] = true;
02012   (*settings)["rejectDupKeys"] = true;
02013   (*settings)["allowSpecialFloats"] = false;
02015 }
02016 // static
02017 void CharReaderBuilder::setDefaults(Json::Value* settings)
02018 {
02020   (*settings)["collectComments"] = true;
02021   (*settings)["allowComments"] = true;
02022   (*settings)["strictRoot"] = false;
02023   (*settings)["allowDroppedNullPlaceholders"] = false;
02024   (*settings)["allowNumericKeys"] = false;
02025   (*settings)["allowSingleQuotes"] = false;
02026   (*settings)["stackLimit"] = 1000;
02027   (*settings)["failIfExtra"] = false;
02028   (*settings)["rejectDupKeys"] = false;
02029   (*settings)["allowSpecialFloats"] = false;
02031 }
02032 
02034 // global functions
02035 
02036 bool parseFromStream(
02037     CharReader::Factory const& fact, JSONCPP_ISTREAM& sin,
02038     Value* root, JSONCPP_STRING* errs)
02039 {
02040   JSONCPP_OSTRINGSTREAM ssin;
02041   ssin << sin.rdbuf();
02042   JSONCPP_STRING doc = ssin.str();
02043   char const* begin = doc.data();
02044   char const* end = begin + doc.size();
02045   // Note that we do not actually need a null-terminator.
02046   CharReaderPtr const reader(fact.newCharReader());
02047   return reader->parse(begin, end, root, errs);
02048 }
02049 
02050 JSONCPP_ISTREAM& operator>>(JSONCPP_ISTREAM& sin, Value& root) {
02051   CharReaderBuilder b;
02052   JSONCPP_STRING errs;
02053   bool ok = parseFromStream(b, sin, &root, &errs);
02054   if (!ok) {
02055     throwRuntimeError(errs);
02056   }
02057   return sin;
02058 }
02059 
02060 } // namespace Json