Leptonica  1.83.1
Image processing and image analysis suite
hashmap.h
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 #ifndef LEPTONICA_HASHMAP_H
28 #define LEPTONICA_HASHMAP_H
29 
30 /*
31  * \file hashmap.h
32  *
33  * <pre>
34  * Contains the following structs:
35  * struct L_Hashmap
36  * struct L_Hashitem
37  *
38  * Goal:
39  * You have a set of objects (integers, strings, pts, whatever),
40  * and you want to store them in a data structure (L_Hashmap) that allows
41  * O(1) to find, insert and count the occurrences of such an object.
42  * The tool is a hash map. This is not ordered, unlike the O(log n)
43  * ordered map (L_Amap), which is implemented by an rbtree.
44  *
45  * In slightly more detail:
46  * Store the set of objects in an array, which in general can be
47  * held in a pointer array (L_Ptra). You need a hash function that
48  * will generate a unique uint64 key from each object. For our simple
49  * built-in arrays, such as float, double and Pta (points), these hash
50  * functions are in utils1.c. Then for each object in the array,
51  * you store the key and the index to the array of objects (the val)
52  * in a list of hashitems in the hash table, where the specific
53  * list is determined by the key (specifically, the mod of the key
54  * with the size of the hashtable).
55  *
56  * In yet more detail:
57  * (1) The design loosely follows the design of a hashmap in "The Practice
58  * of Programming by Brian Kernighan and Rob Pike, Addison Wesley, 1999.
59  * (2) The L_Hashmap contains a hashtable with a prime number of pointers
60  * to lists of hashitems. The lookup function takes a key and a value,
61  * which are both 64-bit unsigned integers. The key has been generated
62  * by hashing the input object in a way that avoids collisions between
63  * different objects. The value is an integer that identifies the
64  * object; typically it is the index into an array of objects.
65  * The hashtable size is a prime number, and an index into the table
66  * is made from the key by taking its mod with the hashtable size.
67  * The index points to a list of hashitems, which have all been hashed
68  * by the mod function into the same index in the table.
69  * Because the key is expected to be randomly distributed in uint64,
70  * the table indices should be uniformly distributed, resulting in
71  * approximately the same number of items being placed in each of
72  * these lists. The list of hashitems is traversed, comparing the
73  * input uint64 key in the lookup() function with the key stored in
74  * each hashitem. If a hashitem is found with a matching key,
75  * return a pointer to that hashitem. If not found and the op is
76  * L_HASH_CREATE, make a new hash item, add it to the list, and
77  * return a pointer to it.
78  * (3) The count field in the hashitem gives the number of times the
79  * key has been seen when storing key/value pairs.
80  * (4) The val field is the index into an array of the objects. When
81  * the hashmap is initially made, it is the index of the first item
82  * seen with its key.
83  * (5) For the hashmap to work efficiently, the lists must not become too
84  * long. Because in general you do not know the number of objects
85  * in advance, it is important to be able to dynamically resize
86  * the hashtable as it grows. The hashmap is initialized with
87  * room for some number of hashitems and the maximum average list
88  * size. These two numbers determine the size of the hashtable,
89  * which is constrained to be a prime number. As the hashtable grows,
90  * if the average occupancy exceeds the input %maxocc, the hashtable
91  * size is approximately doubled and the existing items are re-hashed
92  * into it, mod the new (prime number) table size.
93  * </pre>
94  */
95 
96 /*------------------------------------------------------------------------*
97  * Hash map structs *
98  *------------------------------------------------------------------------*/
100 struct L_Hashmap
101 {
102  l_int32 nitems;
103  l_int32 ntogo;
105  l_int32 maxocc;
106  struct L_Hashitem **hashtab;
107  l_int32 tabsize;
108 };
109 typedef struct L_Hashmap L_HASHMAP;
110 
115 {
116  l_uint64 key;
117  l_uint64 val;
118  l_int32 count;
119  struct L_Hashitem *next;
120 };
121 typedef struct L_Hashitem L_HASHITEM;
122 
123 
124 /*------------------------------------------------------------------------*
125  * Hashmap flags *
126  *------------------------------------------------------------------------*/
128 enum {
129  L_UNDEFINED = 0,
130  L_HMAP_CHECK = 1,
131  L_HMAP_CREATE = 2
132 };
133 
134 #endif /* LEPTONICA_HASHMAP_H */
l_uint64 val
Definition: hashmap.h:117
l_uint64 key
Definition: hashmap.h:116
l_int32 count
Definition: hashmap.h:118
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 maxocc
Definition: hashmap.h:105
l_int32 tabsize
Definition: hashmap.h:107
l_int32 nitems
Definition: hashmap.h:102
l_int32 ntogo
Definition: hashmap.h:103