rapidxml
|
00001 #ifndef RAPIDXML_HPP_INCLUDED 00002 #define RAPIDXML_HPP_INCLUDED 00003 00004 // Copyright (C) 2006, 2009 Marcin Kalicinski 00005 // Version 1.13 00006 // Revision $DateTime: 2009/05/13 01:46:17 $ 00008 00009 // If standard library is disabled, user must provide implementations of required functions and typedefs 00010 #if !defined(RAPIDXML_NO_STDLIB) 00011 #include <cstdlib> // For std::size_t 00012 #include <cassert> // For assert 00013 #include <new> // For placement new 00014 #endif 00015 00016 // On MSVC, disable "conditional expression is constant" warning (level 4). 00017 // This warning is almost impossible to avoid with certain types of templated code 00018 #ifdef _MSC_VER 00019 #pragma warning(push) 00020 #pragma warning(disable:4127) // Conditional expression is constant 00021 #endif 00022 00024 // RAPIDXML_PARSE_ERROR 00025 00026 #if defined(RAPIDXML_NO_EXCEPTIONS) 00027 00028 #define RAPIDXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); } 00029 00030 namespace rapidxml 00031 { 00048 void parse_error_handler(const char *what, void *where); 00049 } 00050 00051 #else 00052 00053 #include <exception> // For std::exception 00054 00055 #define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where) 00056 00057 namespace rapidxml 00058 { 00059 00071 class parse_error: public std::exception 00072 { 00073 00074 public: 00075 00077 parse_error(const char *what, void *where) 00078 : m_what(what) 00079 , m_where(where) 00080 { 00081 } 00082 00085 virtual const char *what() const throw() 00086 { 00087 return m_what; 00088 } 00089 00093 template<class Ch> 00094 Ch *where() const 00095 { 00096 return reinterpret_cast<Ch *>(m_where); 00097 } 00098 00099 private: 00100 00101 const char *m_what; 00102 void *m_where; 00103 00104 }; 00105 } 00106 00107 #endif 00108 00110 // Pool sizes 00111 00112 #ifndef RAPIDXML_STATIC_POOL_SIZE 00113 // Size of static memory block of memory_pool. 00114 // Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. 00115 // No dynamic memory allocations are performed by memory_pool until static memory is exhausted. 00116 #define RAPIDXML_STATIC_POOL_SIZE (64 * 1024) 00117 #endif 00118 00119 #ifndef RAPIDXML_DYNAMIC_POOL_SIZE 00120 // Size of dynamic memory block of memory_pool. 00121 // Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. 00122 // After the static block is exhausted, dynamic blocks with approximately this size are allocated by memory_pool. 00123 #define RAPIDXML_DYNAMIC_POOL_SIZE (64 * 1024) 00124 #endif 00125 00126 #ifndef RAPIDXML_ALIGNMENT 00127 // Memory allocation alignment. 00128 // Define RAPIDXML_ALIGNMENT before including rapidxml.hpp if you want to override the default value, which is the size of pointer. 00129 // All memory allocations for nodes, attributes and strings will be aligned to this value. 00130 // This must be a power of 2 and at least 1, otherwise memory_pool will not work. 00131 #define RAPIDXML_ALIGNMENT sizeof(void *) 00132 #endif 00133 00134 namespace rapidxml 00135 { 00136 // Forward declarations 00137 template<class Ch> class xml_node; 00138 template<class Ch> class xml_attribute; 00139 template<class Ch> class xml_document; 00140 00143 enum node_type 00144 { 00145 node_document, 00146 node_element, 00147 node_data, 00148 node_cdata, 00149 node_comment, 00150 node_declaration, 00151 node_doctype, 00152 node_pi 00153 }; 00154 00156 // Parsing flags 00157 00163 const int parse_no_data_nodes = 0x1; 00164 00172 const int parse_no_element_values = 0x2; 00173 00179 const int parse_no_string_terminators = 0x4; 00180 00186 const int parse_no_entity_translation = 0x8; 00187 00193 const int parse_no_utf8 = 0x10; 00194 00200 const int parse_declaration_node = 0x20; 00201 00207 const int parse_comment_nodes = 0x40; 00208 00215 const int parse_doctype_node = 0x80; 00216 00222 const int parse_pi_nodes = 0x100; 00223 00230 const int parse_validate_closing_tags = 0x200; 00231 00238 const int parse_trim_whitespace = 0x400; 00239 00247 const int parse_normalize_whitespace = 0x800; 00248 00249 // Compound flags 00250 00259 const int parse_default = 0; 00260 00269 const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation; 00270 00274 const int parse_fastest = parse_non_destructive | parse_no_data_nodes; 00275 00280 const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags; 00281 00283 // Internals 00284 00286 namespace internal 00287 { 00288 00289 // Struct that contains lookup tables for the parser 00290 // It must be a template to allow correct linking (because it has static data members, which are defined in a header file). 00291 template<int Dummy> 00292 struct lookup_tables 00293 { 00294 static const unsigned char lookup_whitespace[256]; // Whitespace table 00295 static const unsigned char lookup_node_name[256]; // Node name table 00296 static const unsigned char lookup_text[256]; // Text table 00297 static const unsigned char lookup_text_pure_no_ws[256]; // Text table 00298 static const unsigned char lookup_text_pure_with_ws[256]; // Text table 00299 static const unsigned char lookup_attribute_name[256]; // Attribute name table 00300 static const unsigned char lookup_attribute_data_1[256]; // Attribute data table with single quote 00301 static const unsigned char lookup_attribute_data_1_pure[256]; // Attribute data table with single quote 00302 static const unsigned char lookup_attribute_data_2[256]; // Attribute data table with double quotes 00303 static const unsigned char lookup_attribute_data_2_pure[256]; // Attribute data table with double quotes 00304 static const unsigned char lookup_digits[256]; // Digits 00305 static const unsigned char lookup_upcase[256]; // To uppercase conversion table for ASCII characters 00306 }; 00307 00308 // Find length of the string 00309 template<class Ch> 00310 inline std::size_t measure(const Ch *p) 00311 { 00312 const Ch *tmp = p; 00313 while (*tmp) 00314 ++tmp; 00315 return tmp - p; 00316 } 00317 00318 // Compare strings for equality 00319 template<class Ch> 00320 inline bool compare(const Ch *p1, std::size_t size1, const Ch *p2, std::size_t size2, bool case_sensitive) 00321 { 00322 if (size1 != size2) 00323 return false; 00324 if (case_sensitive) 00325 { 00326 for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) 00327 if (*p1 != *p2) 00328 return false; 00329 } 00330 else 00331 { 00332 for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) 00333 if (lookup_tables<0>::lookup_upcase[static_cast<unsigned char>(*p1)] != lookup_tables<0>::lookup_upcase[static_cast<unsigned char>(*p2)]) 00334 return false; 00335 } 00336 return true; 00337 } 00338 } 00340 00342 // Memory pool 00343 00378 template<class Ch = char> 00379 class memory_pool 00380 { 00381 00382 public: 00383 00385 typedef void *(alloc_func)(std::size_t); // Type of user-defined function used to allocate memory 00386 typedef void (free_func)(void *); // Type of user-defined function used to free memory 00388 00390 memory_pool() 00391 : m_alloc_func(0) 00392 , m_free_func(0) 00393 { 00394 init(); 00395 } 00396 00400 ~memory_pool() 00401 { 00402 clear(); 00403 } 00404 00415 xml_node<Ch> *allocate_node(node_type type, 00416 const Ch *name = 0, const Ch *value = 0, 00417 std::size_t name_size = 0, std::size_t value_size = 0) 00418 { 00419 void *memory = allocate_aligned(sizeof(xml_node<Ch>)); 00420 xml_node<Ch> *node = new(memory) xml_node<Ch>(type); 00421 if (name) 00422 { 00423 if (name_size > 0) 00424 node->name(name, name_size); 00425 else 00426 node->name(name); 00427 } 00428 if (value) 00429 { 00430 if (value_size > 0) 00431 node->value(value, value_size); 00432 else 00433 node->value(value); 00434 } 00435 return node; 00436 } 00437 00447 xml_attribute<Ch> *allocate_attribute(const Ch *name = 0, const Ch *value = 0, 00448 std::size_t name_size = 0, std::size_t value_size = 0) 00449 { 00450 void *memory = allocate_aligned(sizeof(xml_attribute<Ch>)); 00451 xml_attribute<Ch> *attribute = new(memory) xml_attribute<Ch>; 00452 if (name) 00453 { 00454 if (name_size > 0) 00455 attribute->name(name, name_size); 00456 else 00457 attribute->name(name); 00458 } 00459 if (value) 00460 { 00461 if (value_size > 0) 00462 attribute->value(value, value_size); 00463 else 00464 attribute->value(value); 00465 } 00466 return attribute; 00467 } 00468 00476 Ch *allocate_string(const Ch *source = 0, std::size_t size = 0) 00477 { 00478 assert(source || size); // Either source or size (or both) must be specified 00479 if (size == 0) 00480 size = internal::measure(source) + 1; 00481 Ch *result = static_cast<Ch *>(allocate_aligned(size * sizeof(Ch))); 00482 if (source) 00483 for (std::size_t i = 0; i < size; ++i) 00484 result[i] = source[i]; 00485 return result; 00486 } 00487 00497 xml_node<Ch> *clone_node(const xml_node<Ch> *source, xml_node<Ch> *result = 0) 00498 { 00499 // Prepare result node 00500 if (result) 00501 { 00502 result->remove_all_attributes(); 00503 result->remove_all_nodes(); 00504 result->type(source->type()); 00505 } 00506 else 00507 result = allocate_node(source->type()); 00508 00509 // Clone name and value 00510 result->name(source->name(), source->name_size()); 00511 result->value(source->value(), source->value_size()); 00512 00513 // Clone child nodes and attributes 00514 for (xml_node<Ch> *child = source->first_node(); child; child = child->next_sibling()) 00515 result->append_node(clone_node(child)); 00516 for (xml_attribute<Ch> *attr = source->first_attribute(); attr; attr = attr->next_attribute()) 00517 result->append_attribute(allocate_attribute(attr->name(), attr->value(), attr->name_size(), attr->value_size())); 00518 00519 return result; 00520 } 00521 00525 void clear() 00526 { 00527 while (m_begin != m_static_memory) 00528 { 00529 char *previous_begin = reinterpret_cast<header *>(align(m_begin))->previous_begin; 00530 if (m_free_func) 00531 m_free_func(m_begin); 00532 else 00533 delete[] m_begin; 00534 m_begin = previous_begin; 00535 } 00536 init(); 00537 } 00538 00552 void set_allocator(alloc_func *af, free_func *ff) 00553 { 00554 assert(m_begin == m_static_memory && m_ptr == align(m_begin)); // Verify that no memory is allocated yet 00555 m_alloc_func = af; 00556 m_free_func = ff; 00557 } 00558 00559 private: 00560 00561 struct header 00562 { 00563 char *previous_begin; 00564 }; 00565 00566 void init() 00567 { 00568 m_begin = m_static_memory; 00569 m_ptr = align(m_begin); 00570 m_end = m_static_memory + sizeof(m_static_memory); 00571 } 00572 00573 char *align(char *ptr) 00574 { 00575 std::size_t alignment = ((RAPIDXML_ALIGNMENT - (std::size_t(ptr) & (RAPIDXML_ALIGNMENT - 1))) & (RAPIDXML_ALIGNMENT - 1)); 00576 return ptr + alignment; 00577 } 00578 00579 char *allocate_raw(std::size_t size) 00580 { 00581 // Allocate 00582 void *memory; 00583 if (m_alloc_func) // Allocate memory using either user-specified allocation function or global operator new[] 00584 { 00585 memory = m_alloc_func(size); 00586 assert(memory); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp 00587 } 00588 else 00589 { 00590 memory = new char[size]; 00591 #ifdef RAPIDXML_NO_EXCEPTIONS 00592 if (!memory) // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc 00593 RAPIDXML_PARSE_ERROR("out of memory", 0); 00594 #endif 00595 } 00596 return static_cast<char *>(memory); 00597 } 00598 00599 void *allocate_aligned(std::size_t size) 00600 { 00601 // Calculate aligned pointer 00602 char *result = align(m_ptr); 00603 00604 // If not enough memory left in current pool, allocate a new pool 00605 if (result + size > m_end) 00606 { 00607 // Calculate required pool size (may be bigger than RAPIDXML_DYNAMIC_POOL_SIZE) 00608 std::size_t pool_size = RAPIDXML_DYNAMIC_POOL_SIZE; 00609 if (pool_size < size) 00610 pool_size = size; 00611 00612 // Allocate 00613 std::size_t alloc_size = sizeof(header) + (2 * RAPIDXML_ALIGNMENT - 2) + pool_size; // 2 alignments required in worst case: one for header, one for actual allocation 00614 char *raw_memory = allocate_raw(alloc_size); 00615 00616 // Setup new pool in allocated memory 00617 char *pool = align(raw_memory); 00618 header *new_header = reinterpret_cast<header *>(pool); 00619 new_header->previous_begin = m_begin; 00620 m_begin = raw_memory; 00621 m_ptr = pool + sizeof(header); 00622 m_end = raw_memory + alloc_size; 00623 00624 // Calculate aligned pointer again using new pool 00625 result = align(m_ptr); 00626 } 00627 00628 // Update pool and return aligned pointer 00629 m_ptr = result + size; 00630 return result; 00631 } 00632 00633 char *m_begin; // Start of raw memory making up current pool 00634 char *m_ptr; // First free byte in current pool 00635 char *m_end; // One past last available byte in current pool 00636 char m_static_memory[RAPIDXML_STATIC_POOL_SIZE]; // Static raw memory 00637 alloc_func *m_alloc_func; // Allocator function, or 0 if default is to be used 00638 free_func *m_free_func; // Free function, or 0 if default is to be used 00639 }; 00640 00642 // XML base 00643 00647 template<class Ch = char> 00648 class xml_base 00649 { 00650 00651 public: 00652 00654 // Construction & destruction 00655 00656 // Construct a base with empty name, value and parent 00657 xml_base() 00658 : m_name(0) 00659 , m_value(0) 00660 , m_parent(0) 00661 { 00662 } 00663 00665 // Node data access 00666 00673 Ch *name() const 00674 { 00675 return m_name ? m_name : nullstr(); 00676 } 00677 00681 std::size_t name_size() const 00682 { 00683 return m_name ? m_name_size : 0; 00684 } 00685 00692 Ch *value() const 00693 { 00694 return m_value ? m_value : nullstr(); 00695 } 00696 00700 std::size_t value_size() const 00701 { 00702 return m_value ? m_value_size : 0; 00703 } 00704 00706 // Node modification 00707 00721 void name(const Ch *name, std::size_t size) 00722 { 00723 m_name = const_cast<Ch *>(name); 00724 m_name_size = size; 00725 } 00726 00730 void name(const Ch *name) 00731 { 00732 this->name(name, internal::measure(name)); 00733 } 00734 00751 void value(const Ch *value, std::size_t size) 00752 { 00753 m_value = const_cast<Ch *>(value); 00754 m_value_size = size; 00755 } 00756 00760 void value(const Ch *value) 00761 { 00762 this->value(value, internal::measure(value)); 00763 } 00764 00766 // Related nodes access 00767 00770 xml_node<Ch> *parent() const 00771 { 00772 return m_parent; 00773 } 00774 00775 protected: 00776 00777 // Return empty string 00778 static Ch *nullstr() 00779 { 00780 static Ch zero = Ch('\0'); 00781 return &zero; 00782 } 00783 00784 Ch *m_name; // Name of node, or 0 if no name 00785 Ch *m_value; // Value of node, or 0 if no value 00786 std::size_t m_name_size; // Length of node name, or undefined of no name 00787 std::size_t m_value_size; // Length of node value, or undefined if no value 00788 xml_node<Ch> *m_parent; // Pointer to parent node, or 0 if none 00789 00790 }; 00791 00797 template<class Ch = char> 00798 class xml_attribute: public xml_base<Ch> 00799 { 00800 00801 friend class xml_node<Ch>; 00802 00803 public: 00804 00806 // Construction & destruction 00807 00810 xml_attribute() 00811 { 00812 } 00813 00815 // Related nodes access 00816 00819 xml_document<Ch> *document() const 00820 { 00821 if (xml_node<Ch> *node = this->parent()) 00822 { 00823 while (node->parent()) 00824 node = node->parent(); 00825 return node->type() == node_document ? static_cast<xml_document<Ch> *>(node) : 0; 00826 } 00827 else 00828 return 0; 00829 } 00830 00836 xml_attribute<Ch> *previous_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 00837 { 00838 if (name) 00839 { 00840 if (name_size == 0) 00841 name_size = internal::measure(name); 00842 for (xml_attribute<Ch> *attribute = m_prev_attribute; attribute; attribute = attribute->m_prev_attribute) 00843 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 00844 return attribute; 00845 return 0; 00846 } 00847 else 00848 return this->m_parent ? m_prev_attribute : 0; 00849 } 00850 00856 xml_attribute<Ch> *next_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 00857 { 00858 if (name) 00859 { 00860 if (name_size == 0) 00861 name_size = internal::measure(name); 00862 for (xml_attribute<Ch> *attribute = m_next_attribute; attribute; attribute = attribute->m_next_attribute) 00863 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 00864 return attribute; 00865 return 0; 00866 } 00867 else 00868 return this->m_parent ? m_next_attribute : 0; 00869 } 00870 00871 private: 00872 00873 xml_attribute<Ch> *m_prev_attribute; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero 00874 xml_attribute<Ch> *m_next_attribute; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero 00875 00876 }; 00877 00879 // XML node 00880 00889 template<class Ch = char> 00890 class xml_node: public xml_base<Ch> 00891 { 00892 00893 public: 00894 00896 // Construction & destruction 00897 00901 xml_node(node_type type) 00902 : m_type(type) 00903 , m_first_node(0) 00904 , m_first_attribute(0) 00905 { 00906 } 00907 00909 // Node data access 00910 00913 node_type type() const 00914 { 00915 return m_type; 00916 } 00917 00919 // Related nodes access 00920 00923 xml_document<Ch> *document() const 00924 { 00925 xml_node<Ch> *node = const_cast<xml_node<Ch> *>(this); 00926 while (node->parent()) 00927 node = node->parent(); 00928 return node->type() == node_document ? static_cast<xml_document<Ch> *>(node) : 0; 00929 } 00930 00936 xml_node<Ch> *first_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 00937 { 00938 if (name) 00939 { 00940 if (name_size == 0) 00941 name_size = internal::measure(name); 00942 for (xml_node<Ch> *child = m_first_node; child; child = child->next_sibling()) 00943 if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) 00944 return child; 00945 return 0; 00946 } 00947 else 00948 return m_first_node; 00949 } 00950 00958 xml_node<Ch> *last_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 00959 { 00960 assert(m_first_node); // Cannot query for last child if node has no children 00961 if (name) 00962 { 00963 if (name_size == 0) 00964 name_size = internal::measure(name); 00965 for (xml_node<Ch> *child = m_last_node; child; child = child->previous_sibling()) 00966 if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) 00967 return child; 00968 return 0; 00969 } 00970 else 00971 return m_last_node; 00972 } 00973 00981 xml_node<Ch> *previous_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 00982 { 00983 assert(this->m_parent); // Cannot query for siblings if node has no parent 00984 if (name) 00985 { 00986 if (name_size == 0) 00987 name_size = internal::measure(name); 00988 for (xml_node<Ch> *sibling = m_prev_sibling; sibling; sibling = sibling->m_prev_sibling) 00989 if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) 00990 return sibling; 00991 return 0; 00992 } 00993 else 00994 return m_prev_sibling; 00995 } 00996 01004 xml_node<Ch> *next_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 01005 { 01006 assert(this->m_parent); // Cannot query for siblings if node has no parent 01007 if (name) 01008 { 01009 if (name_size == 0) 01010 name_size = internal::measure(name); 01011 for (xml_node<Ch> *sibling = m_next_sibling; sibling; sibling = sibling->m_next_sibling) 01012 if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) 01013 return sibling; 01014 return 0; 01015 } 01016 else 01017 return m_next_sibling; 01018 } 01019 01025 xml_attribute<Ch> *first_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 01026 { 01027 if (name) 01028 { 01029 if (name_size == 0) 01030 name_size = internal::measure(name); 01031 for (xml_attribute<Ch> *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) 01032 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 01033 return attribute; 01034 return 0; 01035 } 01036 else 01037 return m_first_attribute; 01038 } 01039 01045 xml_attribute<Ch> *last_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 01046 { 01047 if (name) 01048 { 01049 if (name_size == 0) 01050 name_size = internal::measure(name); 01051 for (xml_attribute<Ch> *attribute = m_last_attribute; attribute; attribute = attribute->m_prev_attribute) 01052 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 01053 return attribute; 01054 return 0; 01055 } 01056 else 01057 return m_first_attribute ? m_last_attribute : 0; 01058 } 01059 01061 // Node modification 01062 01065 void type(node_type type) 01066 { 01067 m_type = type; 01068 } 01069 01071 // Node manipulation 01072 01076 void prepend_node(xml_node<Ch> *child) 01077 { 01078 assert(child && !child->parent() && child->type() != node_document); 01079 if (first_node()) 01080 { 01081 child->m_next_sibling = m_first_node; 01082 m_first_node->m_prev_sibling = child; 01083 } 01084 else 01085 { 01086 child->m_next_sibling = 0; 01087 m_last_node = child; 01088 } 01089 m_first_node = child; 01090 child->m_parent = this; 01091 child->m_prev_sibling = 0; 01092 } 01093 01097 void append_node(xml_node<Ch> *child) 01098 { 01099 assert(child && !child->parent() && child->type() != node_document); 01100 if (first_node()) 01101 { 01102 child->m_prev_sibling = m_last_node; 01103 m_last_node->m_next_sibling = child; 01104 } 01105 else 01106 { 01107 child->m_prev_sibling = 0; 01108 m_first_node = child; 01109 } 01110 m_last_node = child; 01111 child->m_parent = this; 01112 child->m_next_sibling = 0; 01113 } 01114 01119 void insert_node(xml_node<Ch> *where, xml_node<Ch> *child) 01120 { 01121 assert(!where || where->parent() == this); 01122 assert(child && !child->parent() && child->type() != node_document); 01123 if (where == m_first_node) 01124 prepend_node(child); 01125 else if (where == 0) 01126 append_node(child); 01127 else 01128 { 01129 child->m_prev_sibling = where->m_prev_sibling; 01130 child->m_next_sibling = where; 01131 where->m_prev_sibling->m_next_sibling = child; 01132 where->m_prev_sibling = child; 01133 child->m_parent = this; 01134 } 01135 } 01136 01140 void remove_first_node() 01141 { 01142 assert(first_node()); 01143 xml_node<Ch> *child = m_first_node; 01144 m_first_node = child->m_next_sibling; 01145 if (child->m_next_sibling) 01146 child->m_next_sibling->m_prev_sibling = 0; 01147 else 01148 m_last_node = 0; 01149 child->m_parent = 0; 01150 } 01151 01155 void remove_last_node() 01156 { 01157 assert(first_node()); 01158 xml_node<Ch> *child = m_last_node; 01159 if (child->m_prev_sibling) 01160 { 01161 m_last_node = child->m_prev_sibling; 01162 child->m_prev_sibling->m_next_sibling = 0; 01163 } 01164 else 01165 m_first_node = 0; 01166 child->m_parent = 0; 01167 } 01168 01170 // \param where Pointer to child to be removed. 01171 void remove_node(xml_node<Ch> *where) 01172 { 01173 assert(where && where->parent() == this); 01174 assert(first_node()); 01175 if (where == m_first_node) 01176 remove_first_node(); 01177 else if (where == m_last_node) 01178 remove_last_node(); 01179 else 01180 { 01181 where->m_prev_sibling->m_next_sibling = where->m_next_sibling; 01182 where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; 01183 where->m_parent = 0; 01184 } 01185 } 01186 01188 void remove_all_nodes() 01189 { 01190 for (xml_node<Ch> *node = first_node(); node; node = node->m_next_sibling) 01191 node->m_parent = 0; 01192 m_first_node = 0; 01193 } 01194 01197 void prepend_attribute(xml_attribute<Ch> *attribute) 01198 { 01199 assert(attribute && !attribute->parent()); 01200 if (first_attribute()) 01201 { 01202 attribute->m_next_attribute = m_first_attribute; 01203 m_first_attribute->m_prev_attribute = attribute; 01204 } 01205 else 01206 { 01207 attribute->m_next_attribute = 0; 01208 m_last_attribute = attribute; 01209 } 01210 m_first_attribute = attribute; 01211 attribute->m_parent = this; 01212 attribute->m_prev_attribute = 0; 01213 } 01214 01217 void append_attribute(xml_attribute<Ch> *attribute) 01218 { 01219 assert(attribute && !attribute->parent()); 01220 if (first_attribute()) 01221 { 01222 attribute->m_prev_attribute = m_last_attribute; 01223 m_last_attribute->m_next_attribute = attribute; 01224 } 01225 else 01226 { 01227 attribute->m_prev_attribute = 0; 01228 m_first_attribute = attribute; 01229 } 01230 m_last_attribute = attribute; 01231 attribute->m_parent = this; 01232 attribute->m_next_attribute = 0; 01233 } 01234 01239 void insert_attribute(xml_attribute<Ch> *where, xml_attribute<Ch> *attribute) 01240 { 01241 assert(!where || where->parent() == this); 01242 assert(attribute && !attribute->parent()); 01243 if (where == m_first_attribute) 01244 prepend_attribute(attribute); 01245 else if (where == 0) 01246 append_attribute(attribute); 01247 else 01248 { 01249 attribute->m_prev_attribute = where->m_prev_attribute; 01250 attribute->m_next_attribute = where; 01251 where->m_prev_attribute->m_next_attribute = attribute; 01252 where->m_prev_attribute = attribute; 01253 attribute->m_parent = this; 01254 } 01255 } 01256 01260 void remove_first_attribute() 01261 { 01262 assert(first_attribute()); 01263 xml_attribute<Ch> *attribute = m_first_attribute; 01264 if (attribute->m_next_attribute) 01265 { 01266 attribute->m_next_attribute->m_prev_attribute = 0; 01267 } 01268 else 01269 m_last_attribute = 0; 01270 attribute->m_parent = 0; 01271 m_first_attribute = attribute->m_next_attribute; 01272 } 01273 01277 void remove_last_attribute() 01278 { 01279 assert(first_attribute()); 01280 xml_attribute<Ch> *attribute = m_last_attribute; 01281 if (attribute->m_prev_attribute) 01282 { 01283 attribute->m_prev_attribute->m_next_attribute = 0; 01284 m_last_attribute = attribute->m_prev_attribute; 01285 } 01286 else 01287 m_first_attribute = 0; 01288 attribute->m_parent = 0; 01289 } 01290 01293 void remove_attribute(xml_attribute<Ch> *where) 01294 { 01295 assert(first_attribute() && where->parent() == this); 01296 if (where == m_first_attribute) 01297 remove_first_attribute(); 01298 else if (where == m_last_attribute) 01299 remove_last_attribute(); 01300 else 01301 { 01302 where->m_prev_attribute->m_next_attribute = where->m_next_attribute; 01303 where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; 01304 where->m_parent = 0; 01305 } 01306 } 01307 01309 void remove_all_attributes() 01310 { 01311 for (xml_attribute<Ch> *attribute = first_attribute(); attribute; attribute = attribute->m_next_attribute) 01312 attribute->m_parent = 0; 01313 m_first_attribute = 0; 01314 } 01315 01316 private: 01317 01319 // Restrictions 01320 01321 // No copying 01322 xml_node(const xml_node &); 01323 void operator =(const xml_node &); 01324 01326 // Data members 01327 01328 // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0. 01329 // This is required for maximum performance, as it allows the parser to omit initialization of 01330 // unneded/redundant values. 01331 // 01332 // The rules are as follows: 01333 // 1. first_node and first_attribute contain valid pointers, or 0 if node has no children/attributes respectively 01334 // 2. last_node and last_attribute are valid only if node has at least one child/attribute respectively, otherwise they contain garbage 01335 // 3. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage 01336 01337 node_type m_type; // Type of node; always valid 01338 xml_node<Ch> *m_first_node; // Pointer to first child node, or 0 if none; always valid 01339 xml_node<Ch> *m_last_node; // Pointer to last child node, or 0 if none; this value is only valid if m_first_node is non-zero 01340 xml_attribute<Ch> *m_first_attribute; // Pointer to first attribute of node, or 0 if none; always valid 01341 xml_attribute<Ch> *m_last_attribute; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero 01342 xml_node<Ch> *m_prev_sibling; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero 01343 xml_node<Ch> *m_next_sibling; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero 01344 01345 }; 01346 01348 // XML document 01349 01357 template<class Ch = char> 01358 class xml_document: public xml_node<Ch>, public memory_pool<Ch> 01359 { 01360 01361 public: 01362 01364 xml_document() 01365 : xml_node<Ch>(node_document) 01366 { 01367 } 01368 01380 template<int Flags> 01381 void parse(Ch *text) 01382 { 01383 assert(text); 01384 01385 // Remove current contents 01386 this->remove_all_nodes(); 01387 this->remove_all_attributes(); 01388 01389 // Parse BOM, if any 01390 parse_bom<Flags>(text); 01391 01392 // Parse children 01393 while (1) 01394 { 01395 // Skip whitespace before node 01396 skip<whitespace_pred, Flags>(text); 01397 if (*text == 0) 01398 break; 01399 01400 // Parse and append new child 01401 if (*text == Ch('<')) 01402 { 01403 ++text; // Skip '<' 01404 if (xml_node<Ch> *node = parse_node<Flags>(text)) 01405 this->append_node(node); 01406 } 01407 else 01408 RAPIDXML_PARSE_ERROR("expected <", text); 01409 } 01410 01411 } 01412 01415 void clear() 01416 { 01417 this->remove_all_nodes(); 01418 this->remove_all_attributes(); 01419 memory_pool<Ch>::clear(); 01420 } 01421 01422 private: 01423 01425 // Internal character utility functions 01426 01427 // Detect whitespace character 01428 struct whitespace_pred 01429 { 01430 static unsigned char test(Ch ch) 01431 { 01432 return internal::lookup_tables<0>::lookup_whitespace[static_cast<unsigned char>(ch)]; 01433 } 01434 }; 01435 01436 // Detect node name character 01437 struct node_name_pred 01438 { 01439 static unsigned char test(Ch ch) 01440 { 01441 return internal::lookup_tables<0>::lookup_node_name[static_cast<unsigned char>(ch)]; 01442 } 01443 }; 01444 01445 // Detect attribute name character 01446 struct attribute_name_pred 01447 { 01448 static unsigned char test(Ch ch) 01449 { 01450 return internal::lookup_tables<0>::lookup_attribute_name[static_cast<unsigned char>(ch)]; 01451 } 01452 }; 01453 01454 // Detect text character (PCDATA) 01455 struct text_pred 01456 { 01457 static unsigned char test(Ch ch) 01458 { 01459 return internal::lookup_tables<0>::lookup_text[static_cast<unsigned char>(ch)]; 01460 } 01461 }; 01462 01463 // Detect text character (PCDATA) that does not require processing 01464 struct text_pure_no_ws_pred 01465 { 01466 static unsigned char test(Ch ch) 01467 { 01468 return internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast<unsigned char>(ch)]; 01469 } 01470 }; 01471 01472 // Detect text character (PCDATA) that does not require processing 01473 struct text_pure_with_ws_pred 01474 { 01475 static unsigned char test(Ch ch) 01476 { 01477 return internal::lookup_tables<0>::lookup_text_pure_with_ws[static_cast<unsigned char>(ch)]; 01478 } 01479 }; 01480 01481 // Detect attribute value character 01482 template<Ch Quote> 01483 struct attribute_value_pred 01484 { 01485 static unsigned char test(Ch ch) 01486 { 01487 if (Quote == Ch('\'')) 01488 return internal::lookup_tables<0>::lookup_attribute_data_1[static_cast<unsigned char>(ch)]; 01489 if (Quote == Ch('\"')) 01490 return internal::lookup_tables<0>::lookup_attribute_data_2[static_cast<unsigned char>(ch)]; 01491 return 0; // Should never be executed, to avoid warnings on Comeau 01492 } 01493 }; 01494 01495 // Detect attribute value character 01496 template<Ch Quote> 01497 struct attribute_value_pure_pred 01498 { 01499 static unsigned char test(Ch ch) 01500 { 01501 if (Quote == Ch('\'')) 01502 return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast<unsigned char>(ch)]; 01503 if (Quote == Ch('\"')) 01504 return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast<unsigned char>(ch)]; 01505 return 0; // Should never be executed, to avoid warnings on Comeau 01506 } 01507 }; 01508 01509 // Insert coded character, using UTF8 or 8-bit ASCII 01510 template<int Flags> 01511 static void insert_coded_character(Ch *&text, unsigned long code) 01512 { 01513 if (Flags & parse_no_utf8) 01514 { 01515 // Insert 8-bit ASCII character 01516 // Todo: possibly verify that code is less than 256 and use replacement char otherwise? 01517 text[0] = static_cast<unsigned char>(code); 01518 text += 1; 01519 } 01520 else 01521 { 01522 // Insert UTF8 sequence 01523 if (code < 0x80) // 1 byte sequence 01524 { 01525 text[0] = static_cast<unsigned char>(code); 01526 text += 1; 01527 } 01528 else if (code < 0x800) // 2 byte sequence 01529 { 01530 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01531 text[0] = static_cast<unsigned char>(code | 0xC0); 01532 text += 2; 01533 } 01534 else if (code < 0x10000) // 3 byte sequence 01535 { 01536 text[2] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01537 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01538 text[0] = static_cast<unsigned char>(code | 0xE0); 01539 text += 3; 01540 } 01541 else if (code < 0x110000) // 4 byte sequence 01542 { 01543 text[3] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01544 text[2] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01545 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 01546 text[0] = static_cast<unsigned char>(code | 0xF0); 01547 text += 4; 01548 } 01549 else // Invalid, only codes up to 0x10FFFF are allowed in Unicode 01550 { 01551 RAPIDXML_PARSE_ERROR("invalid numeric character entity", text); 01552 } 01553 } 01554 } 01555 01556 // Skip characters until predicate evaluates to true 01557 template<class StopPred, int Flags> 01558 static void skip(Ch *&text) 01559 { 01560 Ch *tmp = text; 01561 while (StopPred::test(*tmp)) 01562 ++tmp; 01563 text = tmp; 01564 } 01565 01566 // Skip characters until predicate evaluates to true while doing the following: 01567 // - replacing XML character entity references with proper characters (' & " < > &#...;) 01568 // - condensing whitespace sequences to single space character 01569 template<class StopPred, class StopPredPure, int Flags> 01570 static Ch *skip_and_expand_character_refs(Ch *&text) 01571 { 01572 // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip 01573 if (Flags & parse_no_entity_translation && 01574 !(Flags & parse_normalize_whitespace) && 01575 !(Flags & parse_trim_whitespace)) 01576 { 01577 skip<StopPred, Flags>(text); 01578 return text; 01579 } 01580 01581 // Use simple skip until first modification is detected 01582 skip<StopPredPure, Flags>(text); 01583 01584 // Use translation skip 01585 Ch *src = text; 01586 Ch *dest = src; 01587 while (StopPred::test(*src)) 01588 { 01589 // If entity translation is enabled 01590 if (!(Flags & parse_no_entity_translation)) 01591 { 01592 // Test if replacement is needed 01593 if (src[0] == Ch('&')) 01594 { 01595 switch (src[1]) 01596 { 01597 01598 // & ' 01599 case Ch('a'): 01600 if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) 01601 { 01602 *dest = Ch('&'); 01603 ++dest; 01604 src += 5; 01605 continue; 01606 } 01607 if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';')) 01608 { 01609 *dest = Ch('\''); 01610 ++dest; 01611 src += 6; 01612 continue; 01613 } 01614 break; 01615 01616 // " 01617 case Ch('q'): 01618 if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';')) 01619 { 01620 *dest = Ch('"'); 01621 ++dest; 01622 src += 6; 01623 continue; 01624 } 01625 break; 01626 01627 // > 01628 case Ch('g'): 01629 if (src[2] == Ch('t') && src[3] == Ch(';')) 01630 { 01631 *dest = Ch('>'); 01632 ++dest; 01633 src += 4; 01634 continue; 01635 } 01636 break; 01637 01638 // < 01639 case Ch('l'): 01640 if (src[2] == Ch('t') && src[3] == Ch(';')) 01641 { 01642 *dest = Ch('<'); 01643 ++dest; 01644 src += 4; 01645 continue; 01646 } 01647 break; 01648 01649 // &#...; - assumes ASCII 01650 case Ch('#'): 01651 if (src[2] == Ch('x')) 01652 { 01653 unsigned long code = 0; 01654 src += 3; // Skip &#x 01655 while (1) 01656 { 01657 unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)]; 01658 if (digit == 0xFF) 01659 break; 01660 code = code * 16 + digit; 01661 ++src; 01662 } 01663 insert_coded_character<Flags>(dest, code); // Put character in output 01664 } 01665 else 01666 { 01667 unsigned long code = 0; 01668 src += 2; // Skip &# 01669 while (1) 01670 { 01671 unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)]; 01672 if (digit == 0xFF) 01673 break; 01674 code = code * 10 + digit; 01675 ++src; 01676 } 01677 insert_coded_character<Flags>(dest, code); // Put character in output 01678 } 01679 if (*src == Ch(';')) 01680 ++src; 01681 else 01682 RAPIDXML_PARSE_ERROR("expected ;", src); 01683 continue; 01684 01685 // Something else 01686 default: 01687 // Ignore, just copy '&' verbatim 01688 break; 01689 01690 } 01691 } 01692 } 01693 01694 // If whitespace condensing is enabled 01695 if (Flags & parse_normalize_whitespace) 01696 { 01697 // Test if condensing is needed 01698 if (whitespace_pred::test(*src)) 01699 { 01700 *dest = Ch(' '); ++dest; // Put single space in dest 01701 ++src; // Skip first whitespace char 01702 // Skip remaining whitespace chars 01703 while (whitespace_pred::test(*src)) 01704 ++src; 01705 continue; 01706 } 01707 } 01708 01709 // No replacement, only copy character 01710 *dest++ = *src++; 01711 01712 } 01713 01714 // Return new end 01715 text = src; 01716 return dest; 01717 01718 } 01719 01721 // Internal parsing functions 01722 01723 // Parse BOM, if any 01724 template<int Flags> 01725 void parse_bom(Ch *&text) 01726 { 01727 // UTF-8? 01728 if (static_cast<unsigned char>(text[0]) == 0xEF && 01729 static_cast<unsigned char>(text[1]) == 0xBB && 01730 static_cast<unsigned char>(text[2]) == 0xBF) 01731 { 01732 text += 3; // Skup utf-8 bom 01733 } 01734 } 01735 01736 // Parse XML declaration (<?xml...) 01737 template<int Flags> 01738 xml_node<Ch> *parse_xml_declaration(Ch *&text) 01739 { 01740 // If parsing of declaration is disabled 01741 if (!(Flags & parse_declaration_node)) 01742 { 01743 // Skip until end of declaration 01744 while (text[0] != Ch('?') || text[1] != Ch('>')) 01745 { 01746 if (!text[0]) 01747 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01748 ++text; 01749 } 01750 text += 2; // Skip '?>' 01751 return 0; 01752 } 01753 01754 // Create declaration 01755 xml_node<Ch> *declaration = this->allocate_node(node_declaration); 01756 01757 // Skip whitespace before attributes or ?> 01758 skip<whitespace_pred, Flags>(text); 01759 01760 // Parse declaration attributes 01761 parse_node_attributes<Flags>(text, declaration); 01762 01763 // Skip ?> 01764 if (text[0] != Ch('?') || text[1] != Ch('>')) 01765 RAPIDXML_PARSE_ERROR("expected ?>", text); 01766 text += 2; 01767 01768 return declaration; 01769 } 01770 01771 // Parse XML comment (<!--...) 01772 template<int Flags> 01773 xml_node<Ch> *parse_comment(Ch *&text) 01774 { 01775 // If parsing of comments is disabled 01776 if (!(Flags & parse_comment_nodes)) 01777 { 01778 // Skip until end of comment 01779 while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) 01780 { 01781 if (!text[0]) 01782 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01783 ++text; 01784 } 01785 text += 3; // Skip '-->' 01786 return 0; // Do not produce comment node 01787 } 01788 01789 // Remember value start 01790 Ch *value = text; 01791 01792 // Skip until end of comment 01793 while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) 01794 { 01795 if (!text[0]) 01796 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01797 ++text; 01798 } 01799 01800 // Create comment node 01801 xml_node<Ch> *comment = this->allocate_node(node_comment); 01802 comment->value(value, text - value); 01803 01804 // Place zero terminator after comment value 01805 if (!(Flags & parse_no_string_terminators)) 01806 *text = Ch('\0'); 01807 01808 text += 3; // Skip '-->' 01809 return comment; 01810 } 01811 01812 // Parse DOCTYPE 01813 template<int Flags> 01814 xml_node<Ch> *parse_doctype(Ch *&text) 01815 { 01816 // Remember value start 01817 Ch *value = text; 01818 01819 // Skip to > 01820 while (*text != Ch('>')) 01821 { 01822 // Determine character type 01823 switch (*text) 01824 { 01825 01826 // If '[' encountered, scan for matching ending ']' using naive algorithm with depth 01827 // This works for all W3C test files except for 2 most wicked 01828 case Ch('['): 01829 { 01830 ++text; // Skip '[' 01831 int depth = 1; 01832 while (depth > 0) 01833 { 01834 switch (*text) 01835 { 01836 case Ch('['): ++depth; break; 01837 case Ch(']'): --depth; break; 01838 case 0: RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01839 } 01840 ++text; 01841 } 01842 break; 01843 } 01844 01845 // Error on end of text 01846 case Ch('\0'): 01847 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01848 01849 // Other character, skip it 01850 default: 01851 ++text; 01852 01853 } 01854 } 01855 01856 // If DOCTYPE nodes enabled 01857 if (Flags & parse_doctype_node) 01858 { 01859 // Create a new doctype node 01860 xml_node<Ch> *doctype = this->allocate_node(node_doctype); 01861 doctype->value(value, text - value); 01862 01863 // Place zero terminator after value 01864 if (!(Flags & parse_no_string_terminators)) 01865 *text = Ch('\0'); 01866 01867 text += 1; // skip '>' 01868 return doctype; 01869 } 01870 else 01871 { 01872 text += 1; // skip '>' 01873 return 0; 01874 } 01875 01876 } 01877 01878 // Parse PI 01879 template<int Flags> 01880 xml_node<Ch> *parse_pi(Ch *&text) 01881 { 01882 // If creation of PI nodes is enabled 01883 if (Flags & parse_pi_nodes) 01884 { 01885 // Create pi node 01886 xml_node<Ch> *pi = this->allocate_node(node_pi); 01887 01888 // Extract PI target name 01889 Ch *name = text; 01890 skip<node_name_pred, Flags>(text); 01891 if (text == name) 01892 RAPIDXML_PARSE_ERROR("expected PI target", text); 01893 pi->name(name, text - name); 01894 01895 // Skip whitespace between pi target and pi 01896 skip<whitespace_pred, Flags>(text); 01897 01898 // Remember start of pi 01899 Ch *value = text; 01900 01901 // Skip to '?>' 01902 while (text[0] != Ch('?') || text[1] != Ch('>')) 01903 { 01904 if (*text == Ch('\0')) 01905 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01906 ++text; 01907 } 01908 01909 // Set pi value (verbatim, no entity expansion or whitespace normalization) 01910 pi->value(value, text - value); 01911 01912 // Place zero terminator after name and value 01913 if (!(Flags & parse_no_string_terminators)) 01914 { 01915 pi->name()[pi->name_size()] = Ch('\0'); 01916 pi->value()[pi->value_size()] = Ch('\0'); 01917 } 01918 01919 text += 2; // Skip '?>' 01920 return pi; 01921 } 01922 else 01923 { 01924 // Skip to '?>' 01925 while (text[0] != Ch('?') || text[1] != Ch('>')) 01926 { 01927 if (*text == Ch('\0')) 01928 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 01929 ++text; 01930 } 01931 text += 2; // Skip '?>' 01932 return 0; 01933 } 01934 } 01935 01936 // Parse and append data 01937 // Return character that ends data. 01938 // This is necessary because this character might have been overwritten by a terminating 0 01939 template<int Flags> 01940 Ch parse_and_append_data(xml_node<Ch> *node, Ch *&text, Ch *contents_start) 01941 { 01942 // Backup to contents start if whitespace trimming is disabled 01943 if (!(Flags & parse_trim_whitespace)) 01944 text = contents_start; 01945 01946 // Skip until end of data 01947 Ch *value = text, *end; 01948 if (Flags & parse_normalize_whitespace) 01949 end = skip_and_expand_character_refs<text_pred, text_pure_with_ws_pred, Flags>(text); 01950 else 01951 end = skip_and_expand_character_refs<text_pred, text_pure_no_ws_pred, Flags>(text); 01952 01953 // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > 01954 if (Flags & parse_trim_whitespace) 01955 { 01956 if (Flags & parse_normalize_whitespace) 01957 { 01958 // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end 01959 if (*(end - 1) == Ch(' ')) 01960 --end; 01961 } 01962 else 01963 { 01964 // Backup until non-whitespace character is found 01965 while (whitespace_pred::test(*(end - 1))) 01966 --end; 01967 } 01968 } 01969 01970 // If characters are still left between end and value (this test is only necessary if normalization is enabled) 01971 // Create new data node 01972 if (!(Flags & parse_no_data_nodes)) 01973 { 01974 xml_node<Ch> *data = this->allocate_node(node_data); 01975 data->value(value, end - value); 01976 node->append_node(data); 01977 } 01978 01979 // Add data to parent node if no data exists yet 01980 if (!(Flags & parse_no_element_values)) 01981 if (*node->value() == Ch('\0')) 01982 node->value(value, end - value); 01983 01984 // Place zero terminator after value 01985 if (!(Flags & parse_no_string_terminators)) 01986 { 01987 Ch ch = *text; 01988 *end = Ch('\0'); 01989 return ch; // Return character that ends data; this is required because zero terminator overwritten it 01990 } 01991 01992 // Return character that ends data 01993 return *text; 01994 } 01995 01996 // Parse CDATA 01997 template<int Flags> 01998 xml_node<Ch> *parse_cdata(Ch *&text) 01999 { 02000 // If CDATA is disabled 02001 if (Flags & parse_no_data_nodes) 02002 { 02003 // Skip until end of cdata 02004 while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) 02005 { 02006 if (!text[0]) 02007 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 02008 ++text; 02009 } 02010 text += 3; // Skip ]]> 02011 return 0; // Do not produce CDATA node 02012 } 02013 02014 // Skip until end of cdata 02015 Ch *value = text; 02016 while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) 02017 { 02018 if (!text[0]) 02019 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 02020 ++text; 02021 } 02022 02023 // Create new cdata node 02024 xml_node<Ch> *cdata = this->allocate_node(node_cdata); 02025 cdata->value(value, text - value); 02026 02027 // Place zero terminator after value 02028 if (!(Flags & parse_no_string_terminators)) 02029 *text = Ch('\0'); 02030 02031 text += 3; // Skip ]]> 02032 return cdata; 02033 } 02034 02035 // Parse element node 02036 template<int Flags> 02037 xml_node<Ch> *parse_element(Ch *&text) 02038 { 02039 // Create element node 02040 xml_node<Ch> *element = this->allocate_node(node_element); 02041 02042 // Extract element name 02043 Ch *name = text; 02044 skip<node_name_pred, Flags>(text); 02045 if (text == name) 02046 RAPIDXML_PARSE_ERROR("expected element name", text); 02047 element->name(name, text - name); 02048 02049 // Skip whitespace between element name and attributes or > 02050 skip<whitespace_pred, Flags>(text); 02051 02052 // Parse attributes, if any 02053 parse_node_attributes<Flags>(text, element); 02054 02055 // Determine ending type 02056 if (*text == Ch('>')) 02057 { 02058 ++text; 02059 parse_node_contents<Flags>(text, element); 02060 } 02061 else if (*text == Ch('/')) 02062 { 02063 ++text; 02064 if (*text != Ch('>')) 02065 RAPIDXML_PARSE_ERROR("expected >", text); 02066 ++text; 02067 } 02068 else 02069 RAPIDXML_PARSE_ERROR("expected >", text); 02070 02071 // Place zero terminator after name 02072 if (!(Flags & parse_no_string_terminators)) 02073 element->name()[element->name_size()] = Ch('\0'); 02074 02075 // Return parsed element 02076 return element; 02077 } 02078 02079 // Determine node type, and parse it 02080 template<int Flags> 02081 xml_node<Ch> *parse_node(Ch *&text) 02082 { 02083 // Parse proper node type 02084 switch (text[0]) 02085 { 02086 02087 // <... 02088 default: 02089 // Parse and append element node 02090 return parse_element<Flags>(text); 02091 02092 // <?... 02093 case Ch('?'): 02094 ++text; // Skip ? 02095 if ((text[0] == Ch('x') || text[0] == Ch('X')) && 02096 (text[1] == Ch('m') || text[1] == Ch('M')) && 02097 (text[2] == Ch('l') || text[2] == Ch('L')) && 02098 whitespace_pred::test(text[3])) 02099 { 02100 // '<?xml ' - xml declaration 02101 text += 4; // Skip 'xml ' 02102 return parse_xml_declaration<Flags>(text); 02103 } 02104 else 02105 { 02106 // Parse PI 02107 return parse_pi<Flags>(text); 02108 } 02109 02110 // <!... 02111 case Ch('!'): 02112 02113 // Parse proper subset of <! node 02114 switch (text[1]) 02115 { 02116 02117 // <!- 02118 case Ch('-'): 02119 if (text[2] == Ch('-')) 02120 { 02121 // '<!--' - xml comment 02122 text += 3; // Skip '!--' 02123 return parse_comment<Flags>(text); 02124 } 02125 break; 02126 02127 // <![ 02128 case Ch('['): 02129 if (text[2] == Ch('C') && text[3] == Ch('D') && text[4] == Ch('A') && 02130 text[5] == Ch('T') && text[6] == Ch('A') && text[7] == Ch('[')) 02131 { 02132 // '<![CDATA[' - cdata 02133 text += 8; // Skip '![CDATA[' 02134 return parse_cdata<Flags>(text); 02135 } 02136 break; 02137 02138 // <!D 02139 case Ch('D'): 02140 if (text[2] == Ch('O') && text[3] == Ch('C') && text[4] == Ch('T') && 02141 text[5] == Ch('Y') && text[6] == Ch('P') && text[7] == Ch('E') && 02142 whitespace_pred::test(text[8])) 02143 { 02144 // '<!DOCTYPE ' - doctype 02145 text += 9; // skip '!DOCTYPE ' 02146 return parse_doctype<Flags>(text); 02147 } 02148 02149 } // switch 02150 02151 // Attempt to skip other, unrecognized node types starting with <! 02152 ++text; // Skip ! 02153 while (*text != Ch('>')) 02154 { 02155 if (*text == 0) 02156 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 02157 ++text; 02158 } 02159 ++text; // Skip '>' 02160 return 0; // No node recognized 02161 02162 } 02163 } 02164 02165 // Parse contents of the node - children, data etc. 02166 template<int Flags> 02167 void parse_node_contents(Ch *&text, xml_node<Ch> *node) 02168 { 02169 // For all children and text 02170 while (1) 02171 { 02172 // Skip whitespace between > and node contents 02173 Ch *contents_start = text; // Store start of node contents before whitespace is skipped 02174 skip<whitespace_pred, Flags>(text); 02175 Ch next_char = *text; 02176 02177 // After data nodes, instead of continuing the loop, control jumps here. 02178 // This is because zero termination inside parse_and_append_data() function 02179 // would wreak havoc with the above code. 02180 // Also, skipping whitespace after data nodes is unnecessary. 02181 after_data_node: 02182 02183 // Determine what comes next: node closing, child node, data node, or 0? 02184 switch (next_char) 02185 { 02186 02187 // Node closing or child node 02188 case Ch('<'): 02189 if (text[1] == Ch('/')) 02190 { 02191 // Node closing 02192 text += 2; // Skip '</' 02193 if (Flags & parse_validate_closing_tags) 02194 { 02195 // Skip and validate closing tag name 02196 Ch *closing_name = text; 02197 skip<node_name_pred, Flags>(text); 02198 if (!internal::compare(node->name(), node->name_size(), closing_name, text - closing_name, true)) 02199 RAPIDXML_PARSE_ERROR("invalid closing tag name", text); 02200 } 02201 else 02202 { 02203 // No validation, just skip name 02204 skip<node_name_pred, Flags>(text); 02205 } 02206 // Skip remaining whitespace after node name 02207 skip<whitespace_pred, Flags>(text); 02208 if (*text != Ch('>')) 02209 RAPIDXML_PARSE_ERROR("expected >", text); 02210 ++text; // Skip '>' 02211 return; // Node closed, finished parsing contents 02212 } 02213 else 02214 { 02215 // Child node 02216 ++text; // Skip '<' 02217 if (xml_node<Ch> *child = parse_node<Flags>(text)) 02218 node->append_node(child); 02219 } 02220 break; 02221 02222 // End of data - error 02223 case Ch('\0'): 02224 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 02225 02226 // Data node 02227 default: 02228 next_char = parse_and_append_data<Flags>(node, text, contents_start); 02229 goto after_data_node; // Bypass regular processing after data nodes 02230 02231 } 02232 } 02233 } 02234 02235 // Parse XML attributes of the node 02236 template<int Flags> 02237 void parse_node_attributes(Ch *&text, xml_node<Ch> *node) 02238 { 02239 // For all attributes 02240 while (attribute_name_pred::test(*text)) 02241 { 02242 // Extract attribute name 02243 Ch *name = text; 02244 ++text; // Skip first character of attribute name 02245 skip<attribute_name_pred, Flags>(text); 02246 if (text == name) 02247 RAPIDXML_PARSE_ERROR("expected attribute name", name); 02248 02249 // Create new attribute 02250 xml_attribute<Ch> *attribute = this->allocate_attribute(); 02251 attribute->name(name, text - name); 02252 node->append_attribute(attribute); 02253 02254 // Skip whitespace after attribute name 02255 skip<whitespace_pred, Flags>(text); 02256 02257 // Skip = 02258 if (*text != Ch('=')) 02259 RAPIDXML_PARSE_ERROR("expected =", text); 02260 ++text; 02261 02262 // Add terminating zero after name 02263 if (!(Flags & parse_no_string_terminators)) 02264 attribute->name()[attribute->name_size()] = 0; 02265 02266 // Skip whitespace after = 02267 skip<whitespace_pred, Flags>(text); 02268 02269 // Skip quote and remember if it was ' or " 02270 Ch quote = *text; 02271 if (quote != Ch('\'') && quote != Ch('"')) 02272 RAPIDXML_PARSE_ERROR("expected ' or \"", text); 02273 ++text; 02274 02275 // Extract attribute value and expand char refs in it 02276 Ch *value = text, *end; 02277 const int AttFlags = Flags & ~parse_normalize_whitespace; // No whitespace normalization in attributes 02278 if (quote == Ch('\'')) 02279 end = skip_and_expand_character_refs<attribute_value_pred<Ch('\'')>, attribute_value_pure_pred<Ch('\'')>, AttFlags>(text); 02280 else 02281 end = skip_and_expand_character_refs<attribute_value_pred<Ch('"')>, attribute_value_pure_pred<Ch('"')>, AttFlags>(text); 02282 02283 // Set attribute value 02284 attribute->value(value, end - value); 02285 02286 // Make sure that end quote is present 02287 if (*text != quote) 02288 RAPIDXML_PARSE_ERROR("expected ' or \"", text); 02289 ++text; // Skip quote 02290 02291 // Add terminating zero after value 02292 if (!(Flags & parse_no_string_terminators)) 02293 attribute->value()[attribute->value_size()] = 0; 02294 02295 // Skip whitespace after attribute value 02296 skip<whitespace_pred, Flags>(text); 02297 } 02298 } 02299 02300 }; 02301 02303 namespace internal 02304 { 02305 02306 // Whitespace (space \n \r \t) 02307 template<int Dummy> 02308 const unsigned char lookup_tables<Dummy>::lookup_whitespace[256] = 02309 { 02310 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02311 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 02312 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 02313 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 02314 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 02315 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 02316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 02317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 02318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 02319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 02320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 02321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A 02322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B 02323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C 02324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D 02325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E 02326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F 02327 }; 02328 02329 // Node name (anything but space \n \r \t / > ? \0) 02330 template<int Dummy> 02331 const unsigned char lookup_tables<Dummy>::lookup_node_name[256] = 02332 { 02333 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02334 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 02335 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02336 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 02337 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 02338 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02339 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02340 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02342 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02345 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02346 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02350 }; 02351 02352 // Text (i.e. PCDATA) (anything but < \0) 02353 template<int Dummy> 02354 const unsigned char lookup_tables<Dummy>::lookup_text[256] = 02355 { 02356 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02357 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02358 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02359 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02360 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 02361 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02362 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02363 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02367 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02368 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02369 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02370 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02372 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02373 }; 02374 02375 // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled 02376 // (anything but < \0 &) 02377 template<int Dummy> 02378 const unsigned char lookup_tables<Dummy>::lookup_text_pure_no_ws[256] = 02379 { 02380 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02381 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02382 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02383 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02384 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 02385 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02386 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02387 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02388 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02389 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02390 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02391 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02392 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02393 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02394 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02395 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02396 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02397 }; 02398 02399 // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled 02400 // (anything but < \0 & space \n \r \t) 02401 template<int Dummy> 02402 const unsigned char lookup_tables<Dummy>::lookup_text_pure_with_ws[256] = 02403 { 02404 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02405 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 02406 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02407 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02408 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 02409 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02410 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02411 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02412 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02413 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02414 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02415 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02417 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02421 }; 02422 02423 // Attribute name (anything but space \n \r \t / < > = ? ! \0) 02424 template<int Dummy> 02425 const unsigned char lookup_tables<Dummy>::lookup_attribute_name[256] = 02426 { 02427 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02428 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 02429 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02430 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 02431 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 02432 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02433 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02434 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02436 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02444 }; 02445 02446 // Attribute data with single quote (anything but ' \0) 02447 template<int Dummy> 02448 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_1[256] = 02449 { 02450 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02451 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02452 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02453 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02454 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 02455 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02456 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02457 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02458 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02459 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02467 }; 02468 02469 // Attribute data with single quote that does not require processing (anything but ' \0 &) 02470 template<int Dummy> 02471 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_1_pure[256] = 02472 { 02473 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02474 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02475 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02476 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02477 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 02478 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02479 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02480 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02482 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02490 }; 02491 02492 // Attribute data with double quote (anything but " \0) 02493 template<int Dummy> 02494 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_2[256] = 02495 { 02496 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02497 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02498 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02499 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02500 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 02501 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02502 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02503 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02505 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02506 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02507 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02508 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02509 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02510 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02513 }; 02514 02515 // Attribute data with double quote that does not require processing (anything but " \0 &) 02516 template<int Dummy> 02517 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_2_pure[256] = 02518 { 02519 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02520 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 02521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 02522 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 02523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 02524 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 02525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 02526 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 02527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 02528 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 02529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 02530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 02531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 02532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 02533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 02534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 02535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 02536 }; 02537 02538 // Digits (dec and hex, 255 denotes end of numeric character reference) 02539 template<int Dummy> 02540 const unsigned char lookup_tables<Dummy>::lookup_digits[256] = 02541 { 02542 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 02543 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 02544 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 02545 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 02546 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 02547 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 02548 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 02549 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 02550 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 02551 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 02552 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 02553 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A 02554 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B 02555 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C 02556 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D 02557 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E 02558 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F 02559 }; 02560 02561 // Upper case conversion 02562 template<int Dummy> 02563 const unsigned char lookup_tables<Dummy>::lookup_upcase[256] = 02564 { 02565 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F 02566 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0 02567 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, // 1 02568 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, // 2 02569 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, // 3 02570 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 4 02571 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, // 5 02572 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 6 02573 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127, // 7 02574 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, // 8 02575 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, // 9 02576 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, // A 02577 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, // B 02578 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, // C 02579 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, // D 02580 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, // E 02581 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 // F 02582 }; 02583 } 02585 02586 } 02587 02588 // Undefine internal macros 02589 #undef RAPIDXML_PARSE_ERROR 02590 02591 // On MSVC, restore warnings state 02592 #ifdef _MSC_VER 02593 #pragma warning(pop) 02594 #endif 02595 02596 #endif