src/hash_map.c

Sat, 26 Nov 2022 16:58:41 +0100

author
Mike Becker <universe@uap-core.de>
date
Sat, 26 Nov 2022 16:58:41 +0100
changeset 630
ac5e7f789048
parent 575
b05935945637
child 658
56c62780582e
permissions
-rw-r--r--

separate iterators and mutating iterators

Trade tons of code duplication for const-correctness.

universe@549 1 /*
universe@549 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@549 3 *
universe@549 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
universe@549 5 *
universe@549 6 * Redistribution and use in source and binary forms, with or without
universe@549 7 * modification, are permitted provided that the following conditions are met:
universe@549 8 *
universe@549 9 * 1. Redistributions of source code must retain the above copyright
universe@549 10 * notice, this list of conditions and the following disclaimer.
universe@549 11 *
universe@549 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@549 13 * notice, this list of conditions and the following disclaimer in the
universe@549 14 * documentation and/or other materials provided with the distribution.
universe@549 15 *
universe@549 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@549 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@549 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@549 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@549 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@549 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@549 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@549 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@549 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@549 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@549 26 * POSSIBILITY OF SUCH DAMAGE.
universe@549 27 */
universe@549 28
universe@549 29 #include <string.h>
universe@549 30 #include "cx/hash_map.h"
universe@550 31 #include "cx/utils.h"
universe@549 32
universe@550 33 static void cx_hash_map_clear(struct cx_map_s *map) {
universe@550 34 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 35 cx_for_n(i, hash_map->bucket_count) {
universe@550 36 struct cx_hash_map_element_s *elem = hash_map->buckets[i];
universe@550 37 if (elem != NULL) {
universe@550 38 do {
universe@550 39 struct cx_hash_map_element_s *next = elem->next;
universe@550 40 // free the key data
universe@563 41 cxFree(map->allocator, elem->key.data.obj);
universe@550 42 // free the node
universe@550 43 cxFree(map->allocator, elem);
universe@550 44 // proceed
universe@550 45 elem = next;
universe@550 46 } while (elem != NULL);
universe@550 47
universe@550 48 // do not leave a dangling pointer
universe@550 49 hash_map->buckets[i] = NULL;
universe@550 50 }
universe@550 51 }
universe@550 52 map->size = 0;
universe@550 53 }
universe@550 54
universe@550 55 static void cx_hash_map_destructor(struct cx_map_s *map) {
universe@550 56 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 57
universe@550 58 // free the buckets
universe@550 59 cx_hash_map_clear(map);
universe@550 60 cxFree(map->allocator, hash_map->buckets);
universe@550 61
universe@550 62 // free the map structure
universe@550 63 cxFree(map->allocator, map);
universe@550 64 }
universe@550 65
universe@550 66 static int cx_hash_map_put(
universe@550 67 CxMap *map,
universe@563 68 CxHashKey key,
universe@550 69 void *value
universe@550 70 ) {
universe@550 71 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 72 CxAllocator *allocator = map->allocator;
universe@550 73
universe@563 74 unsigned hash = key.hash;
universe@563 75 if (hash == 0) {
universe@563 76 cx_hash_murmur(&key);
universe@563 77 hash = key.hash;
universe@563 78 }
universe@550 79
universe@550 80 size_t slot = hash % hash_map->bucket_count;
universe@550 81 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
universe@550 82 struct cx_hash_map_element_s *prev = NULL;
universe@550 83
universe@550 84 while (elm != NULL && elm->key.hash < hash) {
universe@550 85 prev = elm;
universe@550 86 elm = elm->next;
universe@550 87 }
universe@550 88
universe@575 89 if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
universe@575 90 memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
universe@574 91 // overwrite existing element
universe@574 92 elm->data = value;
universe@574 93 } else {
universe@575 94 // allocate new element
universe@550 95 struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
universe@550 96 if (e == NULL) {
universe@550 97 return -1;
universe@550 98 }
universe@550 99
universe@550 100 // write the value
universe@550 101 // TODO: depending on future map features, we may want to copy here
universe@550 102 e->data = value;
universe@550 103
universe@550 104 // copy the key
universe@550 105 void *kd = cxMalloc(allocator, key.len);
universe@550 106 if (kd == NULL) {
universe@550 107 return -1;
universe@550 108 }
universe@563 109 memcpy(kd, key.data.obj, key.len);
universe@563 110 e->key.data.obj = kd;
universe@550 111 e->key.len = key.len;
universe@550 112 e->key.hash = hash;
universe@550 113
universe@550 114 // insert the element into the linked list
universe@550 115 if (prev == NULL) {
universe@550 116 hash_map->buckets[slot] = e;
universe@550 117 } else {
universe@550 118 prev->next = e;
universe@550 119 }
universe@550 120 e->next = elm;
universe@550 121
universe@550 122 // increase the size
universe@550 123 map->size++;
universe@550 124 }
universe@550 125
universe@550 126 return 0;
universe@550 127 }
universe@549 128
universe@551 129 static void cx_hash_map_unlink(
universe@551 130 struct cx_hash_map_s *hash_map,
universe@551 131 size_t slot,
universe@551 132 struct cx_hash_map_element_s *prev,
universe@551 133 struct cx_hash_map_element_s *elm
universe@551 134 ) {
universe@551 135 // unlink
universe@551 136 if (prev == NULL) {
universe@551 137 hash_map->buckets[slot] = elm->next;
universe@551 138 } else {
universe@551 139 prev->next = elm->next;
universe@551 140 }
universe@551 141 // free element
universe@563 142 cxFree(hash_map->base.allocator, elm->key.data.obj);
universe@551 143 cxFree(hash_map->base.allocator, elm);
universe@551 144 // decrease size
universe@551 145 hash_map->base.size--;
universe@551 146 }
universe@551 147
universe@549 148 /**
universe@550 149 * Helper function to avoid code duplication.
universe@549 150 *
universe@550 151 * @param map the map
universe@550 152 * @param key the key to look up
universe@550 153 * @param remove flag indicating whether the looked up entry shall be removed
universe@550 154 * @return the value corresponding to the key or \c NULL
universe@549 155 */
universe@550 156 static void *cx_hash_map_get_remove(
universe@550 157 CxMap *map,
universe@563 158 CxHashKey key,
universe@550 159 bool remove
universe@550 160 ) {
universe@550 161 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@550 162
universe@563 163 unsigned hash = key.hash;
universe@563 164 if (hash == 0) {
universe@563 165 cx_hash_murmur(&key);
universe@563 166 hash = key.hash;
universe@563 167 }
universe@550 168
universe@550 169 size_t slot = hash % hash_map->bucket_count;
universe@550 170 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
universe@550 171 struct cx_hash_map_element_s *prev = NULL;
universe@550 172 while (elm && elm->key.hash <= hash) {
universe@550 173 if (elm->key.hash == hash && elm->key.len == key.len) {
universe@563 174 if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
universe@550 175 void *data = elm->data;
universe@550 176 if (remove) {
universe@551 177 cx_hash_map_unlink(hash_map, slot, prev, elm);
universe@550 178 }
universe@550 179 return data;
universe@550 180 }
universe@550 181 }
universe@550 182 prev = elm;
universe@550 183 elm = prev->next;
universe@550 184 }
universe@550 185
universe@550 186 return NULL;
universe@549 187 }
universe@550 188
universe@550 189 static void *cx_hash_map_get(
universe@550 190 CxMap const *map,
universe@563 191 CxHashKey key
universe@550 192 ) {
universe@550 193 // we can safely cast, because we know when remove=false, the map stays untouched
universe@550 194 return cx_hash_map_get_remove((CxMap *) map, key, false);
universe@550 195 }
universe@550 196
universe@550 197 static void *cx_hash_map_remove(
universe@550 198 CxMap *map,
universe@563 199 CxHashKey key
universe@550 200 ) {
universe@550 201 return cx_hash_map_get_remove(map, key, true);
universe@550 202 }
universe@550 203
universe@630 204 static void *cx_hash_map_iter_current_entry(void const *it) {
universe@630 205 struct cx_iterator_s const *iter = it;
universe@551 206 // struct has to have a compatible signature
universe@573 207 return (struct cx_map_entry_s *) &(iter->kv_data);
universe@551 208 }
universe@551 209
universe@630 210 static void *cx_hash_map_iter_current_key(void const *it) {
universe@630 211 struct cx_iterator_s const *iter = it;
universe@551 212 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 213 return &elm->key;
universe@551 214 }
universe@551 215
universe@630 216 static void *cx_hash_map_iter_current_value(void const *it) {
universe@630 217 struct cx_iterator_s const *iter = it;
universe@551 218 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 219 // TODO: return a pointer to data if this map is storing copies
universe@551 220 return elm->data;
universe@551 221 }
universe@551 222
universe@630 223 static bool cx_hash_map_iter_valid(void const *it) {
universe@630 224 struct cx_iterator_s const *iter = it;
universe@551 225 return iter->elem_handle != NULL;
universe@551 226 }
universe@551 227
universe@630 228 static void cx_hash_map_iter_next(void *it) {
universe@630 229 struct cx_iterator_s *iter = it;
universe@551 230 struct cx_hash_map_element_s *elm = iter->elem_handle;
universe@551 231
universe@551 232 // remove current element, if asked
universe@630 233 if (iter->base.remove) {
universe@630 234 // obtain mutable pointer to the map
universe@630 235 struct cx_mut_iterator_s *miter = it;
universe@630 236 struct cx_hash_map_s *map = miter->src_handle;
universe@630 237
universe@551 238 // clear the flag
universe@630 239 iter->base.remove = false;
universe@551 240
universe@551 241 // determine the next element
universe@551 242 struct cx_hash_map_element_s *next = elm->next;
universe@551 243
universe@551 244 // search the previous element
universe@551 245 struct cx_hash_map_element_s *prev = NULL;
universe@551 246 if (map->buckets[iter->slot] != elm) {
universe@551 247 prev = map->buckets[iter->slot];
universe@551 248 while (prev->next != elm) {
universe@551 249 prev = prev->next;
universe@551 250 }
universe@551 251 }
universe@551 252
universe@551 253 // unlink
universe@551 254 cx_hash_map_unlink(map, iter->slot, prev, elm);
universe@551 255
universe@551 256 // advance
universe@551 257 elm = next;
universe@551 258 } else {
universe@551 259 // just advance
universe@551 260 elm = elm->next;
universe@560 261 iter->index++;
universe@551 262 }
universe@551 263
universe@560 264 // search the next bucket, if required
universe@630 265 struct cx_hash_map_s const *map = iter->src_handle;
universe@560 266 while (elm == NULL && ++iter->slot < map->bucket_count) {
universe@560 267 elm = map->buckets[iter->slot];
universe@551 268 }
universe@551 269
universe@560 270 // fill the struct with the next element
universe@551 271 iter->elem_handle = elm;
universe@551 272 if (elm == NULL) {
universe@551 273 iter->kv_data.key = NULL;
universe@551 274 iter->kv_data.value = NULL;
universe@551 275 } else {
universe@551 276 iter->kv_data.key = &elm->key;
universe@551 277 // TODO: pointer to data if this map is storing copies
universe@551 278 iter->kv_data.value = elm->data;
universe@551 279 }
universe@551 280 }
universe@551 281
universe@630 282 static bool cx_hash_map_iter_flag_rm(void *it) {
universe@630 283 struct cx_iterator_base_s *iter = it;
universe@630 284 if (iter->mutating) {
universe@630 285 iter->remove = true;
universe@630 286 return true;
universe@630 287 } else {
universe@630 288 return false;
universe@630 289 }
universe@630 290 }
universe@630 291
universe@630 292 static CxIterator cx_hash_map_iterator(CxMap const *map) {
universe@550 293 CxIterator iter;
universe@551 294
universe@551 295 iter.src_handle = map;
universe@630 296 iter.base.valid = cx_hash_map_iter_valid;
universe@630 297 iter.base.next = cx_hash_map_iter_next;
universe@630 298 iter.base.current = cx_hash_map_iter_current_entry;
universe@630 299 iter.base.flag_removal = cx_hash_map_iter_flag_rm;
universe@630 300 iter.base.remove = false;
universe@630 301 iter.base.mutating = false;
universe@551 302
universe@551 303 iter.slot = 0;
universe@551 304 iter.index = 0;
universe@551 305
universe@551 306 if (map->size > 0) {
universe@551 307 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@560 308 struct cx_hash_map_element_s *elm = hash_map->buckets[0];
universe@554 309 for (; elm == NULL; iter.slot++) {
universe@554 310 elm = hash_map->buckets[iter.slot];
universe@551 311 }
universe@554 312 iter.elem_handle = elm;
universe@554 313 iter.kv_data.key = &elm->key;
universe@554 314 // TODO: pointer to data if this map is storing copies
universe@554 315 iter.kv_data.value = elm->data;
universe@551 316 } else {
universe@551 317 iter.elem_handle = NULL;
universe@551 318 iter.kv_data.key = NULL;
universe@551 319 iter.kv_data.value = NULL;
universe@551 320 }
universe@551 321
universe@550 322 return iter;
universe@550 323 }
universe@550 324
universe@630 325 static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
universe@551 326 CxIterator iter = cx_hash_map_iterator(map);
universe@630 327 iter.base.current = cx_hash_map_iter_current_key;
universe@550 328 return iter;
universe@550 329 }
universe@550 330
universe@630 331 static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
universe@551 332 CxIterator iter = cx_hash_map_iterator(map);
universe@630 333 iter.base.current = cx_hash_map_iter_current_value;
universe@630 334 return iter;
universe@630 335 }
universe@630 336
universe@630 337 static CxMutIterator cx_hash_map_mut_iterator(CxMap *map) {
universe@630 338 CxIterator it = cx_hash_map_iterator(map);
universe@630 339 it.base.mutating = true;
universe@630 340
universe@630 341 // we know the iterators share the same memory layout
universe@630 342 CxMutIterator iter;
universe@630 343 memcpy(&iter, &it, sizeof(CxMutIterator));
universe@630 344 return iter;
universe@630 345 }
universe@630 346
universe@630 347 static CxMutIterator cx_hash_map_mut_iterator_keys(CxMap *map) {
universe@630 348 CxMutIterator iter = cx_hash_map_mut_iterator(map);
universe@630 349 iter.base.current = cx_hash_map_iter_current_key;
universe@630 350 return iter;
universe@630 351 }
universe@630 352
universe@630 353 static CxMutIterator cx_hash_map_mut_iterator_values(CxMap *map) {
universe@630 354 CxMutIterator iter = cx_hash_map_mut_iterator(map);
universe@630 355 iter.base.current = cx_hash_map_iter_current_value;
universe@550 356 return iter;
universe@550 357 }
universe@550 358
universe@550 359 static cx_map_class cx_hash_map_class = {
universe@550 360 cx_hash_map_destructor,
universe@550 361 cx_hash_map_clear,
universe@550 362 cx_hash_map_put,
universe@550 363 cx_hash_map_get,
universe@550 364 cx_hash_map_remove,
universe@550 365 cx_hash_map_iterator,
universe@550 366 cx_hash_map_iterator_keys,
universe@550 367 cx_hash_map_iterator_values,
universe@630 368 cx_hash_map_mut_iterator,
universe@630 369 cx_hash_map_mut_iterator_keys,
universe@630 370 cx_hash_map_mut_iterator_values,
universe@550 371 };
universe@550 372
universe@550 373 CxMap *cxHashMapCreate(
universe@550 374 CxAllocator *allocator,
universe@550 375 size_t buckets
universe@550 376 ) {
universe@550 377 if (buckets == 0) {
universe@550 378 // implementation defined default
universe@550 379 buckets = 16;
universe@550 380 }
universe@550 381
universe@550 382 struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
universe@550 383 if (map == NULL) return NULL;
universe@550 384
universe@550 385 // initialize hash map members
universe@550 386 map->bucket_count = buckets;
universe@550 387 map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
universe@550 388 if (map->buckets == NULL) {
universe@550 389 cxFree(allocator, map);
universe@550 390 return NULL;
universe@550 391 }
universe@550 392
universe@550 393 // initialize base members
universe@550 394 map->base.cl = &cx_hash_map_class;
universe@550 395 map->base.allocator = allocator;
universe@550 396 map->base.size = 0;
universe@550 397
universe@550 398 return (CxMap *) map;
universe@550 399 }
universe@562 400
universe@562 401 int cxMapRehash(CxMap *map) {
universe@562 402 struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
universe@562 403 if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
universe@562 404
universe@562 405 size_t new_bucket_count = (map->size * 5) >> 1;
universe@562 406 struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
universe@562 407 new_bucket_count, sizeof(struct cx_hash_map_element_s *));
universe@562 408
universe@562 409 if (new_buckets == NULL) {
universe@562 410 return 1;
universe@562 411 }
universe@562 412
universe@562 413 // iterate through the elements and assign them to their new slots
universe@562 414 cx_for_n(slot, hash_map->bucket_count) {
universe@562 415 struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
universe@562 416 while (elm != NULL) {
universe@562 417 struct cx_hash_map_element_s *next = elm->next;
universe@562 418 size_t new_slot = elm->key.hash % new_bucket_count;
universe@562 419
universe@562 420 // find position where to insert
universe@562 421 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
universe@562 422 struct cx_hash_map_element_s *bucket_prev = NULL;
universe@562 423 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
universe@562 424 bucket_prev = bucket_next;
universe@562 425 bucket_next = bucket_next->next;
universe@562 426 }
universe@562 427
universe@562 428 // insert
universe@562 429 if (bucket_prev == NULL) {
universe@562 430 elm->next = new_buckets[new_slot];
universe@562 431 new_buckets[new_slot] = elm;
universe@562 432 } else {
universe@562 433 bucket_prev->next = elm;
universe@562 434 elm->next = bucket_next;
universe@562 435 }
universe@562 436
universe@562 437 // advance
universe@562 438 elm = next;
universe@562 439 }
universe@562 440 }
universe@562 441
universe@562 442 // assign result to the map
universe@562 443 hash_map->bucket_count = new_bucket_count;
universe@562 444 cxFree(map->allocator, hash_map->buckets);
universe@562 445 hash_map->buckets = new_buckets;
universe@562 446 }
universe@562 447 return 0;
universe@562 448 }

mercurial