src/hash_map.c

Fri, 27 May 2022 14:02:27 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 May 2022 14:02:27 +0200
changeset 560
2d6a3e2dc8ff
parent 557
2aae1246b578
child 562
fd3368c20413
permissions
-rw-r--r--

fix wrong slot and index numbers

universe@549 1 /*
universe@549 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@549 3 *
universe@549 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
universe@549 5 *
universe@549 6 * Redistribution and use in source and binary forms, with or without
universe@549 7 * modification, are permitted provided that the following conditions are met:
universe@549 8 *
universe@549 9 * 1. Redistributions of source code must retain the above copyright
universe@549 10 * notice, this list of conditions and the following disclaimer.
universe@549 11 *
universe@549 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@549 13 * notice, this list of conditions and the following disclaimer in the
universe@549 14 * documentation and/or other materials provided with the distribution.
universe@549 15 *
universe@549 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@549 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@549 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@549 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@549 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@549 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@549 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@549 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@549 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@549 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@549 26 * POSSIBILITY OF SUCH DAMAGE.
universe@549 27 */
universe@549 28
universe@549 29 #include <string.h>
universe@549 30 #include "cx/hash_map.h"
universe@550 31 #include "cx/utils.h"
universe@549 32
universe@549 33 /**
universe@549 34 * Computes a murmur hash-2.
universe@549 35 *
universe@549 36 * @param data the data to hash
universe@549 37 * @param len the length of the data
universe@549 38 * @return the murmur hash-2 of the data
universe@549 39 */
universe@549 40 static unsigned cx_hash_map_murmur(
universe@549 41 unsigned char const *data,
universe@549 42 size_t len
universe@549 43 ) {
universe@549 44 unsigned m = 0x5bd1e995;
universe@549 45 unsigned r = 24;
universe@549 46 unsigned h = 25 ^ len;
universe@549 47 unsigned i = 0;
universe@549 48 while (len >= 4) {
universe@549 49 unsigned k = data[i + 0] & 0xFF;
universe@549 50 k |= (data[i + 1] & 0xFF) << 8;
universe@549 51 k |= (data[i + 2] & 0xFF) << 16;
universe@549 52 k |= (data[i + 3] & 0xFF) << 24;
universe@549 53
universe@549 54 k *= m;
universe@549 55 k ^= k >> r;
universe@549 56 k *= m;
universe@549 57
universe@549 58 h *= m;
universe@549 59 h ^= k;
universe@549 60
universe@549 61 i += 4;
universe@549 62 len -= 4;
universe@549 63 }
universe@549 64
universe@549 65 switch (len) {
universe@549 66 case 3:
universe@549 67 h ^= (data[i + 2] & 0xFF) << 16;
universe@549 68 __attribute__((__fallthrough__));
universe@549 69 case 2:
universe@549 70 h ^= (data[i + 1] & 0xFF) << 8;
universe@549 71 __attribute__((__fallthrough__));
universe@549 72 case 1:
universe@549 73 h ^= (data[i + 0] & 0xFF);
universe@549 74 h *= m;
universe@549 75 __attribute__((__fallthrough__));
universe@549 76 default:
universe@549 77 /* do nothing */;
universe@549 78 }
universe@549 79
universe@549 80 h ^= h >> 13;
universe@549 81 h *= m;
universe@549 82 h ^= h >> 15;
universe@549 83
universe@549 84 return h;
universe@549 85 }
universe@549 86
universe@550 87 static void cx_hash_map_clear(struct cx_map_s *map) {
universe@550 88 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 89 cx_for_n(i, hash_map->bucket_count) {
universe@550 90 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
universe@550 91 if (elem != NULL) {
universe@550 92 do {
universe@550 93 struct cx_hash_map_element_s *next = elem->next;
universe@550 94 // free the key data
universe@550 95 cxFree(map->allocator, elem->key.data);
universe@550 96 // free the node
universe@550 97 cxFree(map->allocator, elem);
universe@550 98 // proceed
universe@550 99 elem = next;
universe@550 100 } while (elem != NULL);
universe@550 101
universe@550 102 // do not leave a dangling pointer
universe@550 103 hash_map->buckets[i] = NULL;
universe@550 104 }
universe@550 105 }
universe@550 106 map->size = 0;
universe@550 107 }
universe@550 108
universe@550 109 static void cx_hash_map_destructor(struct cx_map_s *map) {
universe@550 110 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 111
universe@550 112 // free the buckets
universe@550 113 cx_hash_map_clear(map);
universe@550 114 cxFree(map->allocator, hash_map->buckets);
universe@550 115
universe@550 116 // free the map structure
universe@550 117 cxFree(map->allocator, map);
universe@550 118 }
universe@550 119
universe@550 120 static int cx_hash_map_put(
universe@550 121 CxMap *map,
universe@550 122 CxDataPtr key,
universe@550 123 void *value
universe@550 124 ) {
universe@550 125 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 126 CxAllocator *allocator = map->allocator;
universe@550 127
universe@550 128 unsigned hash = cx_hash_map_murmur(key.data, key.len);
universe@550 129
universe@550 130 size_t slot = hash % hash_map->bucket_count;
universe@550 131 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
universe@550 132 struct cx_hash_map_element_s *prev = NULL;
universe@550 133
universe@550 134 while (elm != NULL && elm->key.hash < hash) {
universe@550 135 prev = elm;
universe@550 136 elm = elm->next;
universe@550 137 }
universe@550 138
universe@550 139 if (elm == NULL || elm->key.hash != hash) {
universe@550 140 struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
universe@550 141 if (e == NULL) {
universe@550 142 return -1;
universe@550 143 }
universe@550 144
universe@550 145 // write the value
universe@550 146 // TODO: depending on future map features, we may want to copy here
universe@550 147 e->data = value;
universe@550 148
universe@550 149 // copy the key
universe@550 150 void *kd = cxMalloc(allocator, key.len);
universe@550 151 if (kd == NULL) {
universe@550 152 return -1;
universe@550 153 }
universe@550 154 memcpy(kd, key.data, key.len);
universe@550 155 e->key.data = kd;
universe@550 156 e->key.len = key.len;
universe@550 157 e->key.hash = hash;
universe@550 158
universe@550 159 // insert the element into the linked list
universe@550 160 if (prev == NULL) {
universe@550 161 hash_map->buckets[slot] = e;
universe@550 162 } else {
universe@550 163 prev->next = e;
universe@550 164 }
universe@550 165 e->next = elm;
universe@550 166
universe@550 167 // increase the size
universe@550 168 map->size++;
universe@550 169 } else {
universe@550 170 // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
universe@550 171 elm->data = value;
universe@550 172 }
universe@550 173
universe@550 174 return 0;
universe@550 175 }
universe@549 176
universe@551 177 static void cx_hash_map_unlink(
universe@551 178 struct cx_hash_map_s *hash_map,
universe@551 179 size_t slot,
universe@551 180 struct cx_hash_map_element_s *prev,
universe@551 181 struct cx_hash_map_element_s *elm
universe@551 182 ) {
universe@551 183 // unlink
universe@551 184 if (prev == NULL) {
universe@551 185 hash_map->buckets[slot] = elm->next;
universe@551 186 } else {
universe@551 187 prev->next = elm->next;
universe@551 188 }
universe@551 189 // free element
universe@551 190 cxFree(hash_map->base.allocator, elm->key.data);
universe@551 191 cxFree(hash_map->base.allocator, elm);
universe@551 192 // decrease size
universe@551 193 hash_map->base.size--;
universe@551 194 }
universe@551 195
universe@549 196 /**
universe@550 197 * Helper function to avoid code duplication.
universe@549 198 *
universe@550 199 * @param map the map
universe@550 200 * @param key the key to look up
universe@550 201 * @param remove flag indicating whether the looked up entry shall be removed
universe@550 202 * @return the value corresponding to the key or \c NULL
universe@549 203 */
universe@550 204 static void *cx_hash_map_get_remove(
universe@550 205 CxMap *map,
universe@550 206 CxDataPtr key,
universe@550 207 bool remove
universe@550 208 ) {
universe@550 209 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 210
universe@550 211 unsigned hash = cx_hash_map_murmur(key.data, key.len);
universe@550 212
universe@550 213 size_t slot = hash % hash_map->bucket_count;
universe@550 214 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
universe@550 215 struct cx_hash_map_element_s *prev = NULL;
universe@550 216 while (elm && elm->key.hash <= hash) {
universe@550 217 if (elm->key.hash == hash && elm->key.len == key.len) {
universe@550 218 if (memcmp(elm->key.data, key.data, key.len) == 0) {
universe@550 219 void *data = elm->data;
universe@550 220 if (remove) {
universe@551 221 cx_hash_map_unlink(hash_map, slot, prev, elm);
universe@550 222 }
universe@550 223 return data;
universe@550 224 }
universe@550 225 }
universe@550 226 prev = elm;
universe@550 227 elm = prev->next;
universe@550 228 }
universe@550 229
universe@550 230 return NULL;
universe@549 231 }
universe@550 232
universe@550 233 static void *cx_hash_map_get(
universe@550 234 CxMap const *map,
universe@550 235 CxDataPtr key
universe@550 236 ) {
universe@550 237 // we can safely cast, because we know when remove=false, the map stays untouched
universe@550 238 return cx_hash_map_get_remove((CxMap *) map, key, false);
universe@550 239 }
universe@550 240
universe@550 241 static void *cx_hash_map_remove(
universe@550 242 CxMap *map,
universe@550 243 CxDataPtr key
universe@550 244 ) {
universe@550 245 return cx_hash_map_get_remove(map, key, true);
universe@550 246 }
universe@550 247
universe@551 248 static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
universe@551 249 // struct has to have a compatible signature
universe@551 250 struct cx_map_entry_s *entry = (struct cx_map_entry_s *) &(iter->kv_data);
universe@551 251 return entry;
universe@551 252 }
universe@551 253
universe@551 254 static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
universe@551 255 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 256 return &elm->key;
universe@551 257 }
universe@551 258
universe@551 259 static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
universe@551 260 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 261 // TODO: return a pointer to data if this map is storing copies
universe@551 262 return elm->data;
universe@551 263 }
universe@551 264
universe@551 265 static bool cx_hash_map_iter_valid(CxIterator const *iter) {
universe@551 266 return iter->elem_handle != NULL;
universe@551 267 }
universe@551 268
universe@551 269 static void cx_hash_map_iter_next(CxIterator *iter) {
universe@551 270 struct cx_hash_map_s *map = iter->src_handle;
universe@551 271 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 272
universe@551 273 // remove current element, if asked
universe@551 274 if (iter->remove) {
universe@551 275 // clear the flag
universe@551 276 iter->remove = false;
universe@551 277
universe@551 278 // determine the next element
universe@551 279 struct cx_hash_map_element_s *next = elm->next;
universe@551 280
universe@551 281 // search the previous element
universe@551 282 struct cx_hash_map_element_s *prev = NULL;
universe@551 283 if (map->buckets[iter->slot] != elm) {
universe@551 284 prev = map->buckets[iter->slot];
universe@551 285 while (prev->next != elm) {
universe@551 286 prev = prev->next;
universe@551 287 }
universe@551 288 }
universe@551 289
universe@551 290 // unlink
universe@551 291 cx_hash_map_unlink(map, iter->slot, prev, elm);
universe@551 292
universe@551 293 // advance
universe@551 294 elm = next;
universe@551 295 } else {
universe@551 296 // just advance
universe@551 297 elm = elm->next;
universe@560 298 iter->index++;
universe@551 299 }
universe@551 300
universe@560 301 // search the next bucket, if required
universe@560 302 while (elm == NULL && ++iter->slot < map->bucket_count) {
universe@560 303 elm = map->buckets[iter->slot];
universe@551 304 }
universe@551 305
universe@560 306 // fill the struct with the next element
universe@551 307 iter->elem_handle = elm;
universe@551 308 if (elm == NULL) {
universe@551 309 iter->kv_data.key = NULL;
universe@551 310 iter->kv_data.value = NULL;
universe@551 311 } else {
universe@551 312 iter->kv_data.key = &elm->key;
universe@551 313 // TODO: pointer to data if this map is storing copies
universe@551 314 iter->kv_data.value = elm->data;
universe@551 315 }
universe@551 316 }
universe@551 317
universe@551 318 static CxIterator cx_hash_map_iterator(CxMap *map) {
universe@550 319 CxIterator iter;
universe@551 320
universe@551 321 iter.src_handle = map;
universe@551 322 iter.valid = cx_hash_map_iter_valid;
universe@551 323 iter.next = cx_hash_map_iter_next;
universe@551 324 iter.current = cx_hash_map_iter_current_entry;
universe@551 325
universe@551 326 iter.slot = 0;
universe@551 327 iter.index = 0;
universe@551 328 iter.remove = false;
universe@551 329
universe@551 330 if (map->size > 0) {
universe@551 331 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@560 332 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
universe@554 333 for (; elm == NULL; iter.slot++) {
universe@554 334 elm = hash_map->buckets[iter.slot];
universe@551 335 }
universe@554 336 iter.elem_handle = elm;
universe@554 337 iter.kv_data.key = &elm->key;
universe@554 338 // TODO: pointer to data if this map is storing copies
universe@554 339 iter.kv_data.value = elm->data;
universe@551 340 } else {
universe@551 341 iter.elem_handle = NULL;
universe@551 342 iter.kv_data.key = NULL;
universe@551 343 iter.kv_data.value = NULL;
universe@551 344 }
universe@551 345
universe@550 346 return iter;
universe@550 347 }
universe@550 348
universe@551 349 static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
universe@551 350 CxIterator iter = cx_hash_map_iterator(map);
universe@551 351 iter.current = cx_hash_map_iter_current_key;
universe@550 352 return iter;
universe@550 353 }
universe@550 354
universe@551 355 static CxIterator cx_hash_map_iterator_values(CxMap *map) {
universe@551 356 CxIterator iter = cx_hash_map_iterator(map);
universe@551 357 iter.current = cx_hash_map_iter_current_value;
universe@550 358 return iter;
universe@550 359 }
universe@550 360
universe@550 361 static cx_map_class cx_hash_map_class = {
universe@550 362 cx_hash_map_destructor,
universe@550 363 cx_hash_map_clear,
universe@550 364 cx_hash_map_put,
universe@550 365 cx_hash_map_get,
universe@550 366 cx_hash_map_remove,
universe@550 367 cx_hash_map_iterator,
universe@550 368 cx_hash_map_iterator_keys,
universe@550 369 cx_hash_map_iterator_values,
universe@550 370 };
universe@550 371
universe@550 372 CxMap *cxHashMapCreate(
universe@550 373 CxAllocator *allocator,
universe@550 374 size_t buckets
universe@550 375 ) {
universe@550 376 if (buckets == 0) {
universe@550 377 // implementation defined default
universe@550 378 buckets = 16;
universe@550 379 }
universe@550 380
universe@550 381 struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
universe@550 382 if (map == NULL) return NULL;
universe@550 383
universe@550 384 // initialize hash map members
universe@550 385 map->bucket_count = buckets;
universe@550 386 map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
universe@550 387 if (map->buckets == NULL) {
universe@550 388 cxFree(allocator, map);
universe@550 389 return NULL;
universe@550 390 }
universe@550 391
universe@550 392 // initialize base members
universe@550 393 map->base.cl = &cx_hash_map_class;
universe@550 394 map->base.allocator = allocator;
universe@550 395 map->base.size = 0;
universe@550 396
universe@550 397 return (CxMap *) map;
universe@550 398 }

mercurial