ICU 49.1.1
49.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * Copyright (C) 2010-2012, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ******************************************************************************* 00006 * file name: stringtriebuilder.h 00007 * encoding: US-ASCII 00008 * tab size: 8 (not used) 00009 * indentation:4 00010 * 00011 * created on: 2010dec24 00012 * created by: Markus W. Scherer 00013 */ 00014 00015 #ifndef __STRINGTRIEBUILDER_H__ 00016 #define __STRINGTRIEBUILDER_H__ 00017 00018 #include "unicode/utypes.h" 00019 #include "unicode/uobject.h" 00020 00021 // Forward declaration. 00022 struct UHashtable; 00023 typedef struct UHashtable UHashtable; 00024 00029 enum UStringTrieBuildOption { 00034 USTRINGTRIE_BUILD_FAST, 00045 USTRINGTRIE_BUILD_SMALL 00046 }; 00047 00048 U_NAMESPACE_BEGIN 00049 00056 class U_COMMON_API StringTrieBuilder : public UObject { 00057 public: 00058 #ifndef U_HIDE_INTERNAL_API 00059 00060 static UBool hashNode(const void *node); 00062 static UBool equalNodes(const void *left, const void *right); 00063 #endif /* U_HIDE_INTERNAL_API */ 00064 00065 protected: 00066 // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API 00067 // or else the compiler will create a public default constructor. 00069 StringTrieBuilder(); 00071 virtual ~StringTrieBuilder(); 00072 00073 #ifndef U_HIDE_INTERNAL_API 00074 00075 void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); 00077 void deleteCompactBuilder(); 00078 00080 void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); 00081 00083 int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex); 00085 int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length); 00086 #endif /* U_HIDE_INTERNAL_API */ 00087 00088 class Node; 00089 00090 #ifndef U_HIDE_INTERNAL_API 00091 00092 Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode); 00094 Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, 00095 int32_t length, UErrorCode &errorCode); 00096 #endif /* U_HIDE_INTERNAL_API */ 00097 00099 virtual int32_t getElementStringLength(int32_t i) const = 0; 00101 virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; 00103 virtual int32_t getElementValue(int32_t i) const = 0; 00104 00105 // Finds the first unit index after this one where 00106 // the first and last element have different units again. 00108 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0; 00109 00110 // Number of different units at unitIndex. 00112 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0; 00114 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; 00116 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; 00117 00119 virtual UBool matchNodesCanHaveValues() const = 0; 00120 00122 virtual int32_t getMaxBranchLinearSubNodeLength() const = 0; 00124 virtual int32_t getMinLinearMatch() const = 0; 00126 virtual int32_t getMaxLinearMatchLength() const = 0; 00127 00128 #ifndef U_HIDE_INTERNAL_API 00129 // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength). 00131 static const int32_t kMaxBranchLinearSubNodeLength=5; 00132 00133 // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. 00134 // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. 00136 static const int32_t kMaxSplitBranchLevels=14; 00137 00148 Node *registerNode(Node *newNode, UErrorCode &errorCode); 00159 Node *registerFinalValue(int32_t value, UErrorCode &errorCode); 00160 00161 /* 00162 * C++ note: 00163 * registerNode() and registerFinalValue() take ownership of their input nodes, 00164 * and only return owned nodes. 00165 * If they see a failure UErrorCode, they will delete the input node. 00166 * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR. 00167 * If there is a failure, they return NULL. 00168 * 00169 * NULL Node pointers can be safely passed into other Nodes because 00170 * they call the static Node::hashCode() which checks for a NULL pointer first. 00171 * 00172 * Therefore, as long as builder functions register a new node, 00173 * they need to check for failures only before explicitly dereferencing 00174 * a Node pointer, or before setting a new UErrorCode. 00175 */ 00176 00177 // Hash set of nodes, maps from nodes to integer 1. 00179 UHashtable *nodes; 00180 00182 class Node : public UObject { 00183 public: 00184 Node(int32_t initialHash) : hash(initialHash), offset(0) {} 00185 inline int32_t hashCode() const { return hash; } 00186 // Handles node==NULL. 00187 static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); } 00188 // Base class operator==() compares the actual class types. 00189 virtual UBool operator==(const Node &other) const; 00190 inline UBool operator!=(const Node &other) const { return !operator==(other); } 00218 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00219 // write() must set the offset to a positive value. 00220 virtual void write(StringTrieBuilder &builder) = 0; 00221 // See markRightEdgesFirst. 00222 inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, 00223 StringTrieBuilder &builder) { 00224 // Note: Edge numbers are negative, lastRight<=firstRight. 00225 // If offset>0 then this node and its sub-nodes have been written already 00226 // and we need not write them again. 00227 // If this node is part of the unwritten right branch edge, 00228 // then we wait until that is written. 00229 if(offset<0 && (offset<lastRight || firstRight<offset)) { 00230 write(builder); 00231 } 00232 } 00233 inline int32_t getOffset() const { return offset; } 00234 protected: 00235 int32_t hash; 00236 int32_t offset; 00237 private: 00238 // No ICU "poor man's RTTI" for this class nor its subclasses. 00239 virtual UClassID getDynamicClassID() const; 00240 }; 00241 00242 // This class should not be overridden because 00243 // registerFinalValue() compares a stack-allocated FinalValueNode 00244 // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes) 00245 // with the input node, and the 00246 // !Node::operator==(other) used inside FinalValueNode::operator==(other) 00247 // will be false if the typeid's are different. 00249 class FinalValueNode : public Node { 00250 public: 00251 FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} 00252 virtual UBool operator==(const Node &other) const; 00253 virtual void write(StringTrieBuilder &builder); 00254 protected: 00255 int32_t value; 00256 }; 00257 00259 class ValueNode : public Node { 00260 public: 00261 ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {} 00262 virtual UBool operator==(const Node &other) const; 00263 void setValue(int32_t v) { 00264 hasValue=TRUE; 00265 value=v; 00266 hash=hash*37+v; 00267 } 00268 protected: 00269 UBool hasValue; 00270 int32_t value; 00271 }; 00272 00274 class IntermediateValueNode : public ValueNode { 00275 public: 00276 IntermediateValueNode(int32_t v, Node *nextNode) 00277 : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } 00278 virtual UBool operator==(const Node &other) const; 00279 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00280 virtual void write(StringTrieBuilder &builder); 00281 protected: 00282 Node *next; 00283 }; 00284 00286 class LinearMatchNode : public ValueNode { 00287 public: 00288 LinearMatchNode(int32_t len, Node *nextNode) 00289 : ValueNode((0x333333*37+len)*37+hashCode(nextNode)), 00290 length(len), next(nextNode) {} 00291 virtual UBool operator==(const Node &other) const; 00292 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00293 protected: 00294 int32_t length; 00295 Node *next; 00296 }; 00297 00299 class BranchNode : public Node { 00300 public: 00301 BranchNode(int32_t initialHash) : Node(initialHash) {} 00302 protected: 00303 int32_t firstEdgeNumber; 00304 }; 00305 00307 class ListBranchNode : public BranchNode { 00308 public: 00309 ListBranchNode() : BranchNode(0x444444), length(0) {} 00310 virtual UBool operator==(const Node &other) const; 00311 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00312 virtual void write(StringTrieBuilder &builder); 00313 // Adds a unit with a final value. 00314 void add(int32_t c, int32_t value) { 00315 units[length]=(UChar)c; 00316 equal[length]=NULL; 00317 values[length]=value; 00318 ++length; 00319 hash=(hash*37+c)*37+value; 00320 } 00321 // Adds a unit which leads to another match node. 00322 void add(int32_t c, Node *node) { 00323 units[length]=(UChar)c; 00324 equal[length]=node; 00325 values[length]=0; 00326 ++length; 00327 hash=(hash*37+c)*37+hashCode(node); 00328 } 00329 protected: 00330 Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". 00331 int32_t length; 00332 int32_t values[kMaxBranchLinearSubNodeLength]; 00333 UChar units[kMaxBranchLinearSubNodeLength]; 00334 }; 00335 00337 class SplitBranchNode : public BranchNode { 00338 public: 00339 SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) 00340 : BranchNode(((0x555555*37+middleUnit)*37+ 00341 hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), 00342 unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} 00343 virtual UBool operator==(const Node &other) const; 00344 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00345 virtual void write(StringTrieBuilder &builder); 00346 protected: 00347 UChar unit; 00348 Node *lessThan; 00349 Node *greaterOrEqual; 00350 }; 00351 00352 // Branch head node, for writing the actual node lead unit. 00354 class BranchHeadNode : public ValueNode { 00355 public: 00356 BranchHeadNode(int32_t len, Node *subNode) 00357 : ValueNode((0x666666*37+len)*37+hashCode(subNode)), 00358 length(len), next(subNode) {} 00359 virtual UBool operator==(const Node &other) const; 00360 virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 00361 virtual void write(StringTrieBuilder &builder); 00362 protected: 00363 int32_t length; 00364 Node *next; // A branch sub-node. 00365 }; 00366 #endif /* U_HIDE_INTERNAL_API */ 00367 00369 virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, 00370 Node *nextNode) const = 0; 00371 00373 virtual int32_t write(int32_t unit) = 0; 00375 virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0; 00377 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0; 00379 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0; 00381 virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0; 00382 00383 private: 00384 // No ICU "poor man's RTTI" for this class nor its subclasses. 00385 virtual UClassID getDynamicClassID() const; 00386 }; 00387 00388 U_NAMESPACE_END 00389 00390 #endif // __STRINGTRIEBUILDER_H__