Leptonica  1.83.1
Image processing and image analysis suite
hashmap.c
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
27 /*
28  * \file hashmap.c
29  * <pre>
30  *
31  * Hashmap creation, destruction
32  * L_HASHMAP *l_hmapCreate()
33  * void l_hmapDestroy()
34  *
35  * Hashmap: Accessors and modifiers
36  * L_HASHITEM *l_hmapLookup()
37  * l_int32 l_hmapRehash()
38  *
39  * (1) See also hashmap.h for a brief description of the design.
40  * (2) In a typical use, a set of objects (in an array or associated
41  * with image pixels) is represented by a hashmap. A uint64 key is
42  * produced for each object. This integer is then hashed into an
43  * index in a hashtable, using the mod function with the table size
44  * which is a prime number. Each entry in the hash table is a list
45  * of hash items. In lookup, the appropriate list is traversed,
46  * looking for the object key found earlier.
47  * (3) Hash functions that map points, strings and float64 to uint64
48  * are given in utils1.c.
49  * (4) Use of the hashmap on points, strings and float64 data are
50  * given in ptafunc2.c, sarray2.c and dnafunc1.c.
51  * (5) Useful rule of thumb for hashing collisions:
52  * For a random hashing function (say, from strings to l_uint64),
53  * the probability of a collision increases as N^2 for N much
54  * less than 2^32. The quadratic behavior switches over to
55  * approaching 1.0 around 2^32, which is the square root of 2^64.
56  * So, for example, if you have 10^7 strings, the probability
57  * of a single collision using an l_uint64 key is on the order of
58  * (10^7/4x10^9)^2 ~ 10^-5.
59  * For a million strings, collisions are very rare (~10^-7 probability).
60  * </pre>
61  */
62 
63 #ifdef HAVE_CONFIG_H
64 #include <config_auto.h>
65 #endif /* HAVE_CONFIG_H */
66 
67 #include "allheaders.h"
68 
69  /* Limit on the hashtable size */
70 static const l_uint32 MaxTabsize = 50000000;
71  /* Default values for creating the hashmap. */
72 static const l_int32 DefaultInitNItems = 2000;
73 static const l_int32 DefaultMaxOcc = 2;
74 
75 /*--------------------------------------------------------------------------*
76  * Hashmap creation and destruction *
77  *--------------------------------------------------------------------------*/
101 L_HASHMAP *
102 l_hmapCreate(l_int32 ninit,
103  l_int32 maxocc)
104 {
105 l_uint32 size, tabsize;
106 L_HASHMAP *hmap;
107 
108  ninit = L_MAX(ninit, DefaultInitNItems);
109  if (maxocc <= 0) maxocc = DefaultMaxOcc;
110  if (maxocc > 5) {
111  L_WARNING("maxocc = %d; non-optimal value. Set to default = %d\n",
112  __func__, maxocc, DefaultMaxOcc);
113  maxocc = DefaultMaxOcc;
114  }
115  size = ninit / maxocc;
116  if (size > MaxTabsize) {
117  L_ERROR("ninit/maxocc = %d > MaxTabsize = %d\n", __func__,
118  size, MaxTabsize);
119  return NULL;
120  }
121 
122  hmap = (L_HASHMAP *)LEPT_CALLOC(1, sizeof(L_HASHMAP));
123  findNextLargerPrime(size, &tabsize); /* at least 101 */
124  if ((hmap->hashtab =
125  (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *))) == NULL) {
126  LEPT_FREE(hmap);
127  return (L_HASHMAP *)ERROR_PTR("hashtab not made", __func__, NULL);
128  }
129 
130  hmap->nitems = 0;
131  hmap->ntogo = ninit;
132  hmap->maxocc = maxocc;
133  hmap->tabsize = tabsize;
134  return hmap;
135 }
136 
137 
144 void
145 l_hmapDestroy(L_HASHMAP **phmap)
146 {
147 l_int32 i;
148 L_HASHITEM *hitem, *next;
149 L_HASHMAP *hmap;
150 
151  if (phmap == NULL) {
152  L_WARNING("ptr address is NULL!\n", __func__);
153  return;
154  }
155 
156  if ((hmap = *phmap) == NULL)
157  return;
158 
159  for (i = 0; i < hmap->tabsize; i++) {
160  for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
161  next = hitem->next;
162  LEPT_FREE(hitem);
163  }
164  }
165  LEPT_FREE(hmap->hashtab);
166  LEPT_FREE(hmap);
167  *phmap = NULL;
168 }
169 
170 
171 /*--------------------------------------------------------------------------*
172  * Hashmap: Accessors and modifiers *
173  *--------------------------------------------------------------------------*/
198 L_HASHITEM *
199 l_hmapLookup(L_HASHMAP *hmap,
200  l_uint64 key,
201  l_uint64 val,
202  l_int32 op)
203 {
204 l_uint32 index;
205 L_HASHITEM *hlist, *hitem;
206 
207  if (!hmap)
208  return (L_HASHITEM *)ERROR_PTR("hmap not defined", __func__, NULL);
209  if (op != L_HMAP_CHECK && op != L_HMAP_CREATE)
210  return (L_HASHITEM *)ERROR_PTR("invalid op", __func__, NULL);
211 
212  /* If found, return a ptr to the hitem (not a copy) */
213  index = key % hmap->tabsize; /* into hashtab */
214  hlist = hmap->hashtab[index]; /* head of the list */
215  for (hitem = hlist; hitem != NULL; hitem = hitem->next) {
216  if (key == hitem->key) {
217  if (op == L_HMAP_CREATE) hitem->count++;
218  return hitem;
219  }
220  }
221  if (op == L_HMAP_CHECK) return NULL;
222 
223  /* Not found and %op == L_HMAP_CREATE.
224  * Make a new hitem and add to the head of the list */
225  hitem = (L_HASHITEM *)LEPT_CALLOC(1, sizeof(L_HASHITEM));
226  hitem->key = key;
227  hitem->val = val;
228  hitem->count = 1;
229  hitem->next = hlist;
230  hmap->hashtab[index] = hitem;
231  hmap->nitems++;
232  hmap->ntogo--;
233 
234  /* If hmap is full based on average occupancy, rehash */
235  if (hmap->ntogo == 0)
236  l_hmapRehash(hmap);
237 
238  return hitem;
239 }
240 
241 
254 l_ok
255 l_hmapRehash(L_HASHMAP *hmap)
256 {
257 l_int32 i, index;
258 l_uint32 tabsize;
259 L_HASHITEM *hstore, *hitem, *next;
260 
261  if (!hmap)
262  return ERROR_INT("hmap not defined", __func__, 1);
263 
264  /* Put hash items in temporary storage in a single list,
265  * successively adding each to the list head. */
266  hstore = NULL; /* ptr to resulting list */
267  for (i = 0; i < hmap->tabsize; i++) {
268  for (hitem = hmap->hashtab[i]; hitem != NULL; hitem = next) {
269  next = hitem->next;
270  hitem->next = hstore;
271  hstore = hitem;
272  }
273  }
274 
275  /* Destroy the old hashtab and make a new one that is twice as big */
276  LEPT_FREE(hmap->hashtab);
277  findNextLargerPrime(2 * hmap->tabsize, &tabsize);
278  hmap->tabsize = tabsize;
279  hmap->hashtab = (L_HASHITEM **)LEPT_CALLOC(tabsize, sizeof(L_HASHITEM *));
280  if (hmap->hashtab == NULL) {
281  hmap->tabsize = 0;
282  return ERROR_INT("hashtab ptr array not made", __func__, 1);
283  }
284  hmap->ntogo = hmap->maxocc * tabsize - hmap->nitems;
285 
286  /* Populate with the stored hash items */
287  for (hitem = hstore; hitem != NULL; hitem = next) {
288  next = hitem->next;
289  index = hitem->key % tabsize; /* into new hashtab */
290  hitem->next = hmap->hashtab[index]; /* link to head of existing list */
291  hmap->hashtab[index] = hitem; /* put at head */
292  }
293 
294  return 0;
295 }
l_uint64 val
Definition: hashmap.h:117
l_uint64 key
Definition: hashmap.h:116
l_int32 count
Definition: hashmap.h:118
struct L_Hashitem * next
Definition: hashmap.h:119
struct L_Hashitem ** hashtab
Definition: hashmap.h:106
l_int32 maxocc
Definition: hashmap.h:105
l_int32 tabsize
Definition: hashmap.h:107
l_int32 nitems
Definition: hashmap.h:102
l_int32 ntogo
Definition: hashmap.h:103
l_ok findNextLargerPrime(l_int32 start, l_uint32 *pprime)
findNextLargerPrime()
Definition: utils1.c:843