src/hash_map.c

Sat, 26 Nov 2022 16:58:41 +0100

author
Mike Becker <universe@uap-core.de>
date
Sat, 26 Nov 2022 16:58:41 +0100
changeset 630
ac5e7f789048
parent 575
b05935945637
child 658
56c62780582e
permissions
-rw-r--r--

separate iterators and mutating iterators

Trade tons of code duplication for const-correctness.

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include <string.h>
    30 #include "cx/hash_map.h"
    31 #include "cx/utils.h"
    33 static void cx_hash_map_clear(struct cx_map_s *map) {
    34     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    35     cx_for_n(i, hash_map->bucket_count) {
    36         struct cx_hash_map_element_s *elem = hash_map->buckets[i];
    37         if (elem != NULL) {
    38             do {
    39                 struct cx_hash_map_element_s *next = elem->next;
    40                 // free the key data
    41                 cxFree(map->allocator, elem->key.data.obj);
    42                 // free the node
    43                 cxFree(map->allocator, elem);
    44                 // proceed
    45                 elem = next;
    46             } while (elem != NULL);
    48             // do not leave a dangling pointer
    49             hash_map->buckets[i] = NULL;
    50         }
    51     }
    52     map->size = 0;
    53 }
    55 static void cx_hash_map_destructor(struct cx_map_s *map) {
    56     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    58     // free the buckets
    59     cx_hash_map_clear(map);
    60     cxFree(map->allocator, hash_map->buckets);
    62     // free the map structure
    63     cxFree(map->allocator, map);
    64 }
    66 static int cx_hash_map_put(
    67         CxMap *map,
    68         CxHashKey key,
    69         void *value
    70 ) {
    71     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    72     CxAllocator *allocator = map->allocator;
    74     unsigned hash = key.hash;
    75     if (hash == 0) {
    76         cx_hash_murmur(&key);
    77         hash = key.hash;
    78     }
    80     size_t slot = hash % hash_map->bucket_count;
    81     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    82     struct cx_hash_map_element_s *prev = NULL;
    84     while (elm != NULL && elm->key.hash < hash) {
    85         prev = elm;
    86         elm = elm->next;
    87     }
    89     if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
    90         memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
    91         // overwrite existing element
    92         elm->data = value;
    93     } else {
    94         // allocate new element
    95         struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
    96         if (e == NULL) {
    97             return -1;
    98         }
   100         // write the value
   101         // TODO: depending on future map features, we may want to copy here
   102         e->data = value;
   104         // copy the key
   105         void *kd = cxMalloc(allocator, key.len);
   106         if (kd == NULL) {
   107             return -1;
   108         }
   109         memcpy(kd, key.data.obj, key.len);
   110         e->key.data.obj = kd;
   111         e->key.len = key.len;
   112         e->key.hash = hash;
   114         // insert the element into the linked list
   115         if (prev == NULL) {
   116             hash_map->buckets[slot] = e;
   117         } else {
   118             prev->next = e;
   119         }
   120         e->next = elm;
   122         // increase the size
   123         map->size++;
   124     }
   126     return 0;
   127 }
   129 static void cx_hash_map_unlink(
   130         struct cx_hash_map_s *hash_map,
   131         size_t slot,
   132         struct cx_hash_map_element_s *prev,
   133         struct cx_hash_map_element_s *elm
   134 ) {
   135     // unlink
   136     if (prev == NULL) {
   137         hash_map->buckets[slot] = elm->next;
   138     } else {
   139         prev->next = elm->next;
   140     }
   141     // free element
   142     cxFree(hash_map->base.allocator, elm->key.data.obj);
   143     cxFree(hash_map->base.allocator, elm);
   144     // decrease size
   145     hash_map->base.size--;
   146 }
   148 /**
   149  * Helper function to avoid code duplication.
   150  *
   151  * @param map the map
   152  * @param key the key to look up
   153  * @param remove flag indicating whether the looked up entry shall be removed
   154  * @return the value corresponding to the key or \c NULL
   155  */
   156 static void *cx_hash_map_get_remove(
   157         CxMap *map,
   158         CxHashKey key,
   159         bool remove
   160 ) {
   161     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   163     unsigned hash = key.hash;
   164     if (hash == 0) {
   165         cx_hash_murmur(&key);
   166         hash = key.hash;
   167     }
   169     size_t slot = hash % hash_map->bucket_count;
   170     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   171     struct cx_hash_map_element_s *prev = NULL;
   172     while (elm && elm->key.hash <= hash) {
   173         if (elm->key.hash == hash && elm->key.len == key.len) {
   174             if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
   175                 void *data = elm->data;
   176                 if (remove) {
   177                     cx_hash_map_unlink(hash_map, slot, prev, elm);
   178                 }
   179                 return data;
   180             }
   181         }
   182         prev = elm;
   183         elm = prev->next;
   184     }
   186     return NULL;
   187 }
   189 static void *cx_hash_map_get(
   190         CxMap const *map,
   191         CxHashKey key
   192 ) {
   193     // we can safely cast, because we know when remove=false, the map stays untouched
   194     return cx_hash_map_get_remove((CxMap *) map, key, false);
   195 }
   197 static void *cx_hash_map_remove(
   198         CxMap *map,
   199         CxHashKey key
   200 ) {
   201     return cx_hash_map_get_remove(map, key, true);
   202 }
   204 static void *cx_hash_map_iter_current_entry(void const *it) {
   205     struct cx_iterator_s const *iter = it;
   206     // struct has to have a compatible signature
   207     return (struct cx_map_entry_s *) &(iter->kv_data);
   208 }
   210 static void *cx_hash_map_iter_current_key(void const *it) {
   211     struct cx_iterator_s const *iter = it;
   212     struct cx_hash_map_element_s *elm = iter->elem_handle;
   213     return &elm->key;
   214 }
   216 static void *cx_hash_map_iter_current_value(void const *it) {
   217     struct cx_iterator_s const *iter = it;
   218     struct cx_hash_map_element_s *elm = iter->elem_handle;
   219     // TODO: return a pointer to data if this map is storing copies
   220     return elm->data;
   221 }
   223 static bool cx_hash_map_iter_valid(void const *it) {
   224     struct cx_iterator_s const *iter = it;
   225     return iter->elem_handle != NULL;
   226 }
   228 static void cx_hash_map_iter_next(void *it) {
   229     struct cx_iterator_s *iter = it;
   230     struct cx_hash_map_element_s *elm = iter->elem_handle;
   232     // remove current element, if asked
   233     if (iter->base.remove) {
   234         // obtain mutable pointer to the map
   235         struct cx_mut_iterator_s *miter = it;
   236         struct cx_hash_map_s *map = miter->src_handle;
   238         // clear the flag
   239         iter->base.remove = false;
   241         // determine the next element
   242         struct cx_hash_map_element_s *next = elm->next;
   244         // search the previous element
   245         struct cx_hash_map_element_s *prev = NULL;
   246         if (map->buckets[iter->slot] != elm) {
   247             prev = map->buckets[iter->slot];
   248             while (prev->next != elm) {
   249                 prev = prev->next;
   250             }
   251         }
   253         // unlink
   254         cx_hash_map_unlink(map, iter->slot, prev, elm);
   256         // advance
   257         elm = next;
   258     } else {
   259         // just advance
   260         elm = elm->next;
   261         iter->index++;
   262     }
   264     // search the next bucket, if required
   265     struct cx_hash_map_s const *map = iter->src_handle;
   266     while (elm == NULL && ++iter->slot < map->bucket_count) {
   267         elm = map->buckets[iter->slot];
   268     }
   270     // fill the struct with the next element
   271     iter->elem_handle = elm;
   272     if (elm == NULL) {
   273         iter->kv_data.key = NULL;
   274         iter->kv_data.value = NULL;
   275     } else {
   276         iter->kv_data.key = &elm->key;
   277         // TODO: pointer to data if this map is storing copies
   278         iter->kv_data.value = elm->data;
   279     }
   280 }
   282 static bool cx_hash_map_iter_flag_rm(void *it) {
   283     struct cx_iterator_base_s *iter = it;
   284     if (iter->mutating) {
   285         iter->remove = true;
   286         return true;
   287     } else {
   288         return false;
   289     }
   290 }
   292 static CxIterator cx_hash_map_iterator(CxMap const *map) {
   293     CxIterator iter;
   295     iter.src_handle = map;
   296     iter.base.valid = cx_hash_map_iter_valid;
   297     iter.base.next = cx_hash_map_iter_next;
   298     iter.base.current = cx_hash_map_iter_current_entry;
   299     iter.base.flag_removal = cx_hash_map_iter_flag_rm;
   300     iter.base.remove = false;
   301     iter.base.mutating = false;
   303     iter.slot = 0;
   304     iter.index = 0;
   306     if (map->size > 0) {
   307         struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   308         struct cx_hash_map_element_s *elm = hash_map->buckets[0];
   309         for (; elm == NULL; iter.slot++) {
   310             elm = hash_map->buckets[iter.slot];
   311         }
   312         iter.elem_handle = elm;
   313         iter.kv_data.key = &elm->key;
   314         // TODO: pointer to data if this map is storing copies
   315         iter.kv_data.value = elm->data;
   316     } else {
   317         iter.elem_handle = NULL;
   318         iter.kv_data.key = NULL;
   319         iter.kv_data.value = NULL;
   320     }
   322     return iter;
   323 }
   325 static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
   326     CxIterator iter = cx_hash_map_iterator(map);
   327     iter.base.current = cx_hash_map_iter_current_key;
   328     return iter;
   329 }
   331 static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
   332     CxIterator iter = cx_hash_map_iterator(map);
   333     iter.base.current = cx_hash_map_iter_current_value;
   334     return iter;
   335 }
   337 static CxMutIterator cx_hash_map_mut_iterator(CxMap *map) {
   338     CxIterator it = cx_hash_map_iterator(map);
   339     it.base.mutating = true;
   341     // we know the iterators share the same memory layout
   342     CxMutIterator iter;
   343     memcpy(&iter, &it, sizeof(CxMutIterator));
   344     return iter;
   345 }
   347 static CxMutIterator cx_hash_map_mut_iterator_keys(CxMap *map) {
   348     CxMutIterator iter = cx_hash_map_mut_iterator(map);
   349     iter.base.current = cx_hash_map_iter_current_key;
   350     return iter;
   351 }
   353 static CxMutIterator cx_hash_map_mut_iterator_values(CxMap *map) {
   354     CxMutIterator iter = cx_hash_map_mut_iterator(map);
   355     iter.base.current = cx_hash_map_iter_current_value;
   356     return iter;
   357 }
   359 static cx_map_class cx_hash_map_class = {
   360         cx_hash_map_destructor,
   361         cx_hash_map_clear,
   362         cx_hash_map_put,
   363         cx_hash_map_get,
   364         cx_hash_map_remove,
   365         cx_hash_map_iterator,
   366         cx_hash_map_iterator_keys,
   367         cx_hash_map_iterator_values,
   368         cx_hash_map_mut_iterator,
   369         cx_hash_map_mut_iterator_keys,
   370         cx_hash_map_mut_iterator_values,
   371 };
   373 CxMap *cxHashMapCreate(
   374         CxAllocator *allocator,
   375         size_t buckets
   376 ) {
   377     if (buckets == 0) {
   378         // implementation defined default
   379         buckets = 16;
   380     }
   382     struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
   383     if (map == NULL) return NULL;
   385     // initialize hash map members
   386     map->bucket_count = buckets;
   387     map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
   388     if (map->buckets == NULL) {
   389         cxFree(allocator, map);
   390         return NULL;
   391     }
   393     // initialize base members
   394     map->base.cl = &cx_hash_map_class;
   395     map->base.allocator = allocator;
   396     map->base.size = 0;
   398     return (CxMap *) map;
   399 }
   401 int cxMapRehash(CxMap *map) {
   402     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   403     if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
   405         size_t new_bucket_count = (map->size * 5) >> 1;
   406         struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
   407                                                               new_bucket_count, sizeof(struct cx_hash_map_element_s *));
   409         if (new_buckets == NULL) {
   410             return 1;
   411         }
   413         // iterate through the elements and assign them to their new slots
   414         cx_for_n(slot, hash_map->bucket_count) {
   415             struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   416             while (elm != NULL) {
   417                 struct cx_hash_map_element_s *next = elm->next;
   418                 size_t new_slot = elm->key.hash % new_bucket_count;
   420                 // find position where to insert
   421                 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
   422                 struct cx_hash_map_element_s *bucket_prev = NULL;
   423                 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
   424                     bucket_prev = bucket_next;
   425                     bucket_next = bucket_next->next;
   426                 }
   428                 // insert
   429                 if (bucket_prev == NULL) {
   430                     elm->next = new_buckets[new_slot];
   431                     new_buckets[new_slot] = elm;
   432                 } else {
   433                     bucket_prev->next = elm;
   434                     elm->next = bucket_next;
   435                 }
   437                 // advance
   438                 elm = next;
   439             }
   440         }
   442         // assign result to the map
   443         hash_map->bucket_count = new_bucket_count;
   444         cxFree(map->allocator, hash_map->buckets);
   445         hash_map->buckets = new_buckets;
   446     }
   447     return 0;
   448 }

mercurial