src/hash_map.c

Sun, 09 Apr 2023 19:03:58 +0200

author
Mike Becker <universe@uap-core.de>
date
Sun, 09 Apr 2023 19:03:58 +0200
changeset 677
b09aae58bba4
parent 669
dce9b8450656
child 685
2dd841e364af
permissions
-rw-r--r--

refactoring of collections to make use of destructors in map implementations

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include <string.h>
    30 #include "cx/hash_map.h"
    31 #include "cx/utils.h"
    33 struct cx_hash_map_element_s {
    34     /** A pointer to the next element in the current bucket. */
    35     struct cx_hash_map_element_s *next;
    37     /** The corresponding key. */
    38     CxHashKey key;
    40     /** The value data. */
    41     char data[];
    42 };
    44 static void cx_hash_map_clear(struct cx_map_s *map) {
    45     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    46     cx_for_n(i, hash_map->bucket_count) {
    47         struct cx_hash_map_element_s *elem = hash_map->buckets[i];
    48         if (elem != NULL) {
    49             do {
    50                 struct cx_hash_map_element_s *next = elem->next;
    51                 // free the key data
    52                 cxFree(map->allocator, elem->key.data.obj);
    53                 // free the node
    54                 cxFree(map->allocator, elem);
    55                 // proceed
    56                 elem = next;
    57             } while (elem != NULL);
    59             // do not leave a dangling pointer
    60             hash_map->buckets[i] = NULL;
    61         }
    62     }
    63     map->size = 0;
    64 }
    66 static void cx_hash_map_destructor(struct cx_map_s *map) {
    67     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    69     // free the buckets
    70     cx_hash_map_clear(map);
    71     cxFree(map->allocator, hash_map->buckets);
    73     // free the map structure
    74     cxFree(map->allocator, map);
    75 }
    77 static int cx_hash_map_put(
    78         CxMap *map,
    79         CxHashKey key,
    80         void *value
    81 ) {
    82     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    83     CxAllocator *allocator = map->allocator;
    85     unsigned hash = key.hash;
    86     if (hash == 0) {
    87         cx_hash_murmur(&key);
    88         hash = key.hash;
    89     }
    91     size_t slot = hash % hash_map->bucket_count;
    92     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    93     struct cx_hash_map_element_s *prev = NULL;
    95     while (elm != NULL && elm->key.hash < hash) {
    96         prev = elm;
    97         elm = elm->next;
    98     }
   100     if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
   101         memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
   102         // overwrite existing element
   103         if (map->store_pointers) {
   104             memcpy(elm->data, &value, sizeof(void *));
   105         } else {
   106             memcpy(elm->data, value, map->item_size);
   107         }
   108     } else {
   109         // allocate new element
   110         struct cx_hash_map_element_s *e = cxMalloc(
   111                 allocator,
   112                 sizeof(struct cx_hash_map_element_s) + map->item_size
   113         );
   114         if (e == NULL) {
   115             return -1;
   116         }
   118         // write the value
   119         if (map->store_pointers) {
   120             memcpy(e->data, &value, sizeof(void *));
   121         } else {
   122             memcpy(e->data, value, map->item_size);
   123         }
   125         // copy the key
   126         void *kd = cxMalloc(allocator, key.len);
   127         if (kd == NULL) {
   128             return -1;
   129         }
   130         memcpy(kd, key.data.obj, key.len);
   131         e->key.data.obj = kd;
   132         e->key.len = key.len;
   133         e->key.hash = hash;
   135         // insert the element into the linked list
   136         if (prev == NULL) {
   137             hash_map->buckets[slot] = e;
   138         } else {
   139             prev->next = e;
   140         }
   141         e->next = elm;
   143         // increase the size
   144         map->size++;
   145     }
   147     return 0;
   148 }
   150 static void cx_hash_map_unlink(
   151         struct cx_hash_map_s *hash_map,
   152         size_t slot,
   153         struct cx_hash_map_element_s *prev,
   154         struct cx_hash_map_element_s *elm
   155 ) {
   156     // unlink
   157     if (prev == NULL) {
   158         hash_map->buckets[slot] = elm->next;
   159     } else {
   160         prev->next = elm->next;
   161     }
   162     // free element
   163     cxFree(hash_map->base.allocator, elm->key.data.obj);
   164     cxFree(hash_map->base.allocator, elm);
   165     // decrease size
   166     hash_map->base.size--;
   167 }
   169 /**
   170  * Helper function to avoid code duplication.
   171  *
   172  * @param map the map
   173  * @param key the key to look up
   174  * @param remove flag indicating whether the looked up entry shall be removed
   175  * @return a pointer to the value corresponding to the key or \c NULL
   176  */
   177 static void *cx_hash_map_get_remove(
   178         CxMap *map,
   179         CxHashKey key,
   180         bool remove
   181 ) {
   182     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   184     unsigned hash = key.hash;
   185     if (hash == 0) {
   186         cx_hash_murmur(&key);
   187         hash = key.hash;
   188     }
   190     size_t slot = hash % hash_map->bucket_count;
   191     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   192     struct cx_hash_map_element_s *prev = NULL;
   193     while (elm && elm->key.hash <= hash) {
   194         if (elm->key.hash == hash && elm->key.len == key.len) {
   195             if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
   196                 void *data = NULL;
   197                 if (map->store_pointers) {
   198                     data = *(void **) elm->data;
   199                 } else if (!remove) {
   200                     data = elm->data;
   201                 }
   202                 if (remove) {
   203                     cx_hash_map_unlink(hash_map, slot, prev, elm);
   204                 }
   205                 return data;
   206             }
   207         }
   208         prev = elm;
   209         elm = prev->next;
   210     }
   212     return NULL;
   213 }
   215 static void *cx_hash_map_get(
   216         CxMap const *map,
   217         CxHashKey key
   218 ) {
   219     // we can safely cast, because we know when remove=false, the map stays untouched
   220     return cx_hash_map_get_remove((CxMap *) map, key, false);
   221 }
   223 static void *cx_hash_map_remove(
   224         CxMap *map,
   225         CxHashKey key
   226 ) {
   227     return cx_hash_map_get_remove(map, key, true);
   228 }
   230 static void *cx_hash_map_iter_current_entry(void const *it) {
   231     struct cx_iterator_s const *iter = it;
   232     // struct has to have a compatible signature
   233     return (struct cx_map_entry_s *) &(iter->kv_data);
   234 }
   236 static void *cx_hash_map_iter_current_key(void const *it) {
   237     struct cx_iterator_s const *iter = it;
   238     struct cx_hash_map_element_s *elm = iter->elem_handle;
   239     return &elm->key;
   240 }
   242 static void *cx_hash_map_iter_current_value(void const *it) {
   243     struct cx_iterator_s const *iter = it;
   244     struct cx_hash_map_s const *map = iter->src_handle;
   245     struct cx_hash_map_element_s *elm = iter->elem_handle;
   246     if (map->base.store_pointers) {
   247         return *(void **) elm->data;
   248     } else {
   249         return elm->data;
   250     }
   251 }
   253 static bool cx_hash_map_iter_valid(void const *it) {
   254     struct cx_iterator_s const *iter = it;
   255     return iter->elem_handle != NULL;
   256 }
   258 static void cx_hash_map_iter_next(void *it) {
   259     struct cx_iterator_s *iter = it;
   260     struct cx_hash_map_element_s *elm = iter->elem_handle;
   262     // remove current element, if asked
   263     if (iter->base.remove) {
   264         // obtain mutable pointer to the map
   265         struct cx_mut_iterator_s *miter = it;
   266         struct cx_hash_map_s *map = miter->src_handle;
   268         // clear the flag
   269         iter->base.remove = false;
   271         // determine the next element
   272         struct cx_hash_map_element_s *next = elm->next;
   274         // search the previous element
   275         struct cx_hash_map_element_s *prev = NULL;
   276         if (map->buckets[iter->slot] != elm) {
   277             prev = map->buckets[iter->slot];
   278             while (prev->next != elm) {
   279                 prev = prev->next;
   280             }
   281         }
   283         // unlink
   284         cx_hash_map_unlink(map, iter->slot, prev, elm);
   286         // advance
   287         elm = next;
   288     } else {
   289         // just advance
   290         elm = elm->next;
   291         iter->index++;
   292     }
   294     // search the next bucket, if required
   295     struct cx_hash_map_s const *map = iter->src_handle;
   296     while (elm == NULL && ++iter->slot < map->bucket_count) {
   297         elm = map->buckets[iter->slot];
   298     }
   300     // fill the struct with the next element
   301     iter->elem_handle = elm;
   302     if (elm == NULL) {
   303         iter->kv_data.key = NULL;
   304         iter->kv_data.value = NULL;
   305     } else {
   306         iter->kv_data.key = &elm->key;
   307         if (map->base.store_pointers) {
   308             iter->kv_data.value = *(void **) elm->data;
   309         } else {
   310             iter->kv_data.value = elm->data;
   311         }
   312     }
   313 }
   315 static bool cx_hash_map_iter_flag_rm(void *it) {
   316     struct cx_iterator_base_s *iter = it;
   317     if (iter->mutating) {
   318         iter->remove = true;
   319         return true;
   320     } else {
   321         return false;
   322     }
   323 }
   325 static CxIterator cx_hash_map_iterator(CxMap const *map) {
   326     CxIterator iter;
   328     iter.src_handle = map;
   329     iter.base.valid = cx_hash_map_iter_valid;
   330     iter.base.next = cx_hash_map_iter_next;
   331     iter.base.current = cx_hash_map_iter_current_entry;
   332     iter.base.flag_removal = cx_hash_map_iter_flag_rm;
   333     iter.base.remove = false;
   334     iter.base.mutating = false;
   336     iter.slot = 0;
   337     iter.index = 0;
   339     if (map->size > 0) {
   340         struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   341         struct cx_hash_map_element_s *elm = hash_map->buckets[0];
   342         while (elm == NULL) {
   343             elm = hash_map->buckets[++iter.slot];
   344         }
   345         iter.elem_handle = elm;
   346         iter.kv_data.key = &elm->key;
   347         if (map->store_pointers) {
   348             iter.kv_data.value = *(void **) elm->data;
   349         } else {
   350             iter.kv_data.value = elm->data;
   351         }
   352     } else {
   353         iter.elem_handle = NULL;
   354         iter.kv_data.key = NULL;
   355         iter.kv_data.value = NULL;
   356     }
   358     return iter;
   359 }
   361 static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
   362     CxIterator iter = cx_hash_map_iterator(map);
   363     iter.base.current = cx_hash_map_iter_current_key;
   364     return iter;
   365 }
   367 static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
   368     CxIterator iter = cx_hash_map_iterator(map);
   369     iter.base.current = cx_hash_map_iter_current_value;
   370     return iter;
   371 }
   373 static CxMutIterator cx_hash_map_mut_iterator(CxMap *map) {
   374     CxIterator it = cx_hash_map_iterator(map);
   375     it.base.mutating = true;
   377     // we know the iterators share the same memory layout
   378     CxMutIterator iter;
   379     memcpy(&iter, &it, sizeof(CxMutIterator));
   380     return iter;
   381 }
   383 static CxMutIterator cx_hash_map_mut_iterator_keys(CxMap *map) {
   384     CxMutIterator iter = cx_hash_map_mut_iterator(map);
   385     iter.base.current = cx_hash_map_iter_current_key;
   386     return iter;
   387 }
   389 static CxMutIterator cx_hash_map_mut_iterator_values(CxMap *map) {
   390     CxMutIterator iter = cx_hash_map_mut_iterator(map);
   391     iter.base.current = cx_hash_map_iter_current_value;
   392     return iter;
   393 }
   395 static cx_map_class cx_hash_map_class = {
   396         cx_hash_map_destructor,
   397         cx_hash_map_clear,
   398         cx_hash_map_put,
   399         cx_hash_map_get,
   400         cx_hash_map_remove,
   401         cx_hash_map_iterator,
   402         cx_hash_map_iterator_keys,
   403         cx_hash_map_iterator_values,
   404         cx_hash_map_mut_iterator,
   405         cx_hash_map_mut_iterator_keys,
   406         cx_hash_map_mut_iterator_values,
   407 };
   409 CxMap *cxHashMapCreate(
   410         CxAllocator *allocator,
   411         size_t itemsize,
   412         size_t buckets
   413 ) {
   414     if (buckets == 0) {
   415         // implementation defined default
   416         buckets = 16;
   417     }
   419     struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
   420     if (map == NULL) return NULL;
   422     // initialize hash map members
   423     map->bucket_count = buckets;
   424     map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
   425     if (map->buckets == NULL) {
   426         cxFree(allocator, map);
   427         return NULL;
   428     }
   430     // initialize base members
   431     map->base.cl = &cx_hash_map_class;
   432     map->base.allocator = allocator;
   433     map->base.size = 0;
   435     if (itemsize > 0) {
   436         map->base.store_pointers = false;
   437         map->base.item_size = itemsize;
   438     } else {
   439         map->base.store_pointers = true;
   440         map->base.item_size = sizeof(void *);
   441     }
   443     return (CxMap *) map;
   444 }
   446 int cxMapRehash(CxMap *map) {
   447     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   448     if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
   450         size_t new_bucket_count = (map->size * 5) >> 1;
   451         struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
   452                                                               new_bucket_count, sizeof(struct cx_hash_map_element_s *));
   454         if (new_buckets == NULL) {
   455             return 1;
   456         }
   458         // iterate through the elements and assign them to their new slots
   459         cx_for_n(slot, hash_map->bucket_count) {
   460             struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   461             while (elm != NULL) {
   462                 struct cx_hash_map_element_s *next = elm->next;
   463                 size_t new_slot = elm->key.hash % new_bucket_count;
   465                 // find position where to insert
   466                 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
   467                 struct cx_hash_map_element_s *bucket_prev = NULL;
   468                 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
   469                     bucket_prev = bucket_next;
   470                     bucket_next = bucket_next->next;
   471                 }
   473                 // insert
   474                 if (bucket_prev == NULL) {
   475                     elm->next = new_buckets[new_slot];
   476                     new_buckets[new_slot] = elm;
   477                 } else {
   478                     bucket_prev->next = elm;
   479                     elm->next = bucket_next;
   480                 }
   482                 // advance
   483                 elm = next;
   484             }
   485         }
   487         // assign result to the map
   488         hash_map->bucket_count = new_bucket_count;
   489         cxFree(map->allocator, hash_map->buckets);
   490         hash_map->buckets = new_buckets;
   491     }
   492     return 0;
   493 }

mercurial