Fri, 27 May 2022 17:40:27 +0200
#189 #199 implement and test map rehash
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <string.h>
30 #include "cx/hash_map.h"
31 #include "cx/utils.h"
33 /**
34 * Computes a murmur hash-2.
35 *
36 * @param data the data to hash
37 * @param len the length of the data
38 * @return the murmur hash-2 of the data
39 */
40 static unsigned cx_hash_map_murmur(
41 unsigned char const *data,
42 size_t len
43 ) {
44 unsigned m = 0x5bd1e995;
45 unsigned r = 24;
46 unsigned h = 25 ^ len;
47 unsigned i = 0;
48 while (len >= 4) {
49 unsigned k = data[i + 0] & 0xFF;
50 k |= (data[i + 1] & 0xFF) << 8;
51 k |= (data[i + 2] & 0xFF) << 16;
52 k |= (data[i + 3] & 0xFF) << 24;
54 k *= m;
55 k ^= k >> r;
56 k *= m;
58 h *= m;
59 h ^= k;
61 i += 4;
62 len -= 4;
63 }
65 switch (len) {
66 case 3:
67 h ^= (data[i + 2] & 0xFF) << 16;
68 __attribute__((__fallthrough__));
69 case 2:
70 h ^= (data[i + 1] & 0xFF) << 8;
71 __attribute__((__fallthrough__));
72 case 1:
73 h ^= (data[i + 0] & 0xFF);
74 h *= m;
75 __attribute__((__fallthrough__));
76 default:
77 /* do nothing */;
78 }
80 h ^= h >> 13;
81 h *= m;
82 h ^= h >> 15;
84 return h;
85 }
87 static void cx_hash_map_clear(struct cx_map_s *map) {
88 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
89 cx_for_n(i, hash_map->bucket_count) {
90 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
91 if (elem != NULL) {
92 do {
93 struct cx_hash_map_element_s *next = elem->next;
94 // free the key data
95 cxFree(map->allocator, elem->key.data);
96 // free the node
97 cxFree(map->allocator, elem);
98 // proceed
99 elem = next;
100 } while (elem != NULL);
102 // do not leave a dangling pointer
103 hash_map->buckets[i] = NULL;
104 }
105 }
106 map->size = 0;
107 }
109 static void cx_hash_map_destructor(struct cx_map_s *map) {
110 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
112 // free the buckets
113 cx_hash_map_clear(map);
114 cxFree(map->allocator, hash_map->buckets);
116 // free the map structure
117 cxFree(map->allocator, map);
118 }
120 static int cx_hash_map_put(
121 CxMap *map,
122 CxDataPtr key,
123 void *value
124 ) {
125 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
126 CxAllocator *allocator = map->allocator;
128 unsigned hash = cx_hash_map_murmur(key.data, key.len);
130 size_t slot = hash % hash_map->bucket_count;
131 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
132 struct cx_hash_map_element_s *prev = NULL;
134 while (elm != NULL && elm->key.hash < hash) {
135 prev = elm;
136 elm = elm->next;
137 }
139 if (elm == NULL || elm->key.hash != hash) {
140 struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
141 if (e == NULL) {
142 return -1;
143 }
145 // write the value
146 // TODO: depending on future map features, we may want to copy here
147 e->data = value;
149 // copy the key
150 void *kd = cxMalloc(allocator, key.len);
151 if (kd == NULL) {
152 return -1;
153 }
154 memcpy(kd, key.data, key.len);
155 e->key.data = kd;
156 e->key.len = key.len;
157 e->key.hash = hash;
159 // insert the element into the linked list
160 if (prev == NULL) {
161 hash_map->buckets[slot] = e;
162 } else {
163 prev->next = e;
164 }
165 e->next = elm;
167 // increase the size
168 map->size++;
169 } else {
170 // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
171 elm->data = value;
172 }
174 return 0;
175 }
177 static void cx_hash_map_unlink(
178 struct cx_hash_map_s *hash_map,
179 size_t slot,
180 struct cx_hash_map_element_s *prev,
181 struct cx_hash_map_element_s *elm
182 ) {
183 // unlink
184 if (prev == NULL) {
185 hash_map->buckets[slot] = elm->next;
186 } else {
187 prev->next = elm->next;
188 }
189 // free element
190 cxFree(hash_map->base.allocator, elm->key.data);
191 cxFree(hash_map->base.allocator, elm);
192 // decrease size
193 hash_map->base.size--;
194 }
196 /**
197 * Helper function to avoid code duplication.
198 *
199 * @param map the map
200 * @param key the key to look up
201 * @param remove flag indicating whether the looked up entry shall be removed
202 * @return the value corresponding to the key or \c NULL
203 */
204 static void *cx_hash_map_get_remove(
205 CxMap *map,
206 CxDataPtr key,
207 bool remove
208 ) {
209 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
211 unsigned hash = cx_hash_map_murmur(key.data, key.len);
213 size_t slot = hash % hash_map->bucket_count;
214 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
215 struct cx_hash_map_element_s *prev = NULL;
216 while (elm && elm->key.hash <= hash) {
217 if (elm->key.hash == hash && elm->key.len == key.len) {
218 if (memcmp(elm->key.data, key.data, key.len) == 0) {
219 void *data = elm->data;
220 if (remove) {
221 cx_hash_map_unlink(hash_map, slot, prev, elm);
222 }
223 return data;
224 }
225 }
226 prev = elm;
227 elm = prev->next;
228 }
230 return NULL;
231 }
233 static void *cx_hash_map_get(
234 CxMap const *map,
235 CxDataPtr key
236 ) {
237 // we can safely cast, because we know when remove=false, the map stays untouched
238 return cx_hash_map_get_remove((CxMap *) map, key, false);
239 }
241 static void *cx_hash_map_remove(
242 CxMap *map,
243 CxDataPtr key
244 ) {
245 return cx_hash_map_get_remove(map, key, true);
246 }
248 static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
249 // struct has to have a compatible signature
250 struct cx_map_entry_s *entry = (struct cx_map_entry_s *) &(iter->kv_data);
251 return entry;
252 }
254 static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
255 struct cx_hash_map_element_s *elm = iter->elem_handle;
256 return &elm->key;
257 }
259 static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
260 struct cx_hash_map_element_s *elm = iter->elem_handle;
261 // TODO: return a pointer to data if this map is storing copies
262 return elm->data;
263 }
265 static bool cx_hash_map_iter_valid(CxIterator const *iter) {
266 return iter->elem_handle != NULL;
267 }
269 static void cx_hash_map_iter_next(CxIterator *iter) {
270 struct cx_hash_map_s *map = iter->src_handle;
271 struct cx_hash_map_element_s *elm = iter->elem_handle;
273 // remove current element, if asked
274 if (iter->remove) {
275 // clear the flag
276 iter->remove = false;
278 // determine the next element
279 struct cx_hash_map_element_s *next = elm->next;
281 // search the previous element
282 struct cx_hash_map_element_s *prev = NULL;
283 if (map->buckets[iter->slot] != elm) {
284 prev = map->buckets[iter->slot];
285 while (prev->next != elm) {
286 prev = prev->next;
287 }
288 }
290 // unlink
291 cx_hash_map_unlink(map, iter->slot, prev, elm);
293 // advance
294 elm = next;
295 } else {
296 // just advance
297 elm = elm->next;
298 iter->index++;
299 }
301 // search the next bucket, if required
302 while (elm == NULL && ++iter->slot < map->bucket_count) {
303 elm = map->buckets[iter->slot];
304 }
306 // fill the struct with the next element
307 iter->elem_handle = elm;
308 if (elm == NULL) {
309 iter->kv_data.key = NULL;
310 iter->kv_data.value = NULL;
311 } else {
312 iter->kv_data.key = &elm->key;
313 // TODO: pointer to data if this map is storing copies
314 iter->kv_data.value = elm->data;
315 }
316 }
318 static CxIterator cx_hash_map_iterator(CxMap *map) {
319 CxIterator iter;
321 iter.src_handle = map;
322 iter.valid = cx_hash_map_iter_valid;
323 iter.next = cx_hash_map_iter_next;
324 iter.current = cx_hash_map_iter_current_entry;
326 iter.slot = 0;
327 iter.index = 0;
328 iter.remove = false;
330 if (map->size > 0) {
331 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
332 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
333 for (; elm == NULL; iter.slot++) {
334 elm = hash_map->buckets[iter.slot];
335 }
336 iter.elem_handle = elm;
337 iter.kv_data.key = &elm->key;
338 // TODO: pointer to data if this map is storing copies
339 iter.kv_data.value = elm->data;
340 } else {
341 iter.elem_handle = NULL;
342 iter.kv_data.key = NULL;
343 iter.kv_data.value = NULL;
344 }
346 return iter;
347 }
349 static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
350 CxIterator iter = cx_hash_map_iterator(map);
351 iter.current = cx_hash_map_iter_current_key;
352 return iter;
353 }
355 static CxIterator cx_hash_map_iterator_values(CxMap *map) {
356 CxIterator iter = cx_hash_map_iterator(map);
357 iter.current = cx_hash_map_iter_current_value;
358 return iter;
359 }
361 static cx_map_class cx_hash_map_class = {
362 cx_hash_map_destructor,
363 cx_hash_map_clear,
364 cx_hash_map_put,
365 cx_hash_map_get,
366 cx_hash_map_remove,
367 cx_hash_map_iterator,
368 cx_hash_map_iterator_keys,
369 cx_hash_map_iterator_values,
370 };
372 CxMap *cxHashMapCreate(
373 CxAllocator *allocator,
374 size_t buckets
375 ) {
376 if (buckets == 0) {
377 // implementation defined default
378 buckets = 16;
379 }
381 struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
382 if (map == NULL) return NULL;
384 // initialize hash map members
385 map->bucket_count = buckets;
386 map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
387 if (map->buckets == NULL) {
388 cxFree(allocator, map);
389 return NULL;
390 }
392 // initialize base members
393 map->base.cl = &cx_hash_map_class;
394 map->base.allocator = allocator;
395 map->base.size = 0;
397 return (CxMap *) map;
398 }
400 int cxMapRehash(CxMap *map) {
401 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
402 if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
404 size_t new_bucket_count = (map->size * 5) >> 1;
405 struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
406 new_bucket_count, sizeof(struct cx_hash_map_element_s *));
408 if (new_buckets == NULL) {
409 return 1;
410 }
412 // iterate through the elements and assign them to their new slots
413 cx_for_n(slot, hash_map->bucket_count) {
414 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
415 while (elm != NULL) {
416 struct cx_hash_map_element_s *next = elm->next;
417 size_t new_slot = elm->key.hash % new_bucket_count;
419 // find position where to insert
420 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
421 struct cx_hash_map_element_s *bucket_prev = NULL;
422 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
423 bucket_prev = bucket_next;
424 bucket_next = bucket_next->next;
425 }
427 // insert
428 if (bucket_prev == NULL) {
429 elm->next = new_buckets[new_slot];
430 new_buckets[new_slot] = elm;
431 } else {
432 bucket_prev->next = elm;
433 elm->next = bucket_next;
434 }
436 // advance
437 elm = next;
438 }
439 }
441 // assign result to the map
442 hash_map->bucket_count = new_bucket_count;
443 cxFree(map->allocator, hash_map->buckets);
444 hash_map->buckets = new_buckets;
445 }
446 return 0;
447 }