src/hash_map.c

Wed, 08 Jun 2022 21:33:31 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 08 Jun 2022 21:33:31 +0200
changeset 563
69a83fad8a35
parent 562
fd3368c20413
child 573
3f3a0d19db58
permissions
-rw-r--r--

improve hash key handling

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include <string.h>
    30 #include "cx/hash_map.h"
    31 #include "cx/utils.h"
    33 static void cx_hash_map_clear(struct cx_map_s *map) {
    34     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    35     cx_for_n(i, hash_map->bucket_count) {
    36         struct cx_hash_map_element_s *elem = hash_map->buckets[i];
    37         if (elem != NULL) {
    38             do {
    39                 struct cx_hash_map_element_s *next = elem->next;
    40                 // free the key data
    41                 cxFree(map->allocator, elem->key.data.obj);
    42                 // free the node
    43                 cxFree(map->allocator, elem);
    44                 // proceed
    45                 elem = next;
    46             } while (elem != NULL);
    48             // do not leave a dangling pointer
    49             hash_map->buckets[i] = NULL;
    50         }
    51     }
    52     map->size = 0;
    53 }
    55 static void cx_hash_map_destructor(struct cx_map_s *map) {
    56     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    58     // free the buckets
    59     cx_hash_map_clear(map);
    60     cxFree(map->allocator, hash_map->buckets);
    62     // free the map structure
    63     cxFree(map->allocator, map);
    64 }
    66 static int cx_hash_map_put(
    67         CxMap *map,
    68         CxHashKey key,
    69         void *value
    70 ) {
    71     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    72     CxAllocator *allocator = map->allocator;
    74     unsigned hash = key.hash;
    75     if (hash == 0) {
    76         cx_hash_murmur(&key);
    77         hash = key.hash;
    78     }
    80     size_t slot = hash % hash_map->bucket_count;
    81     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    82     struct cx_hash_map_element_s *prev = NULL;
    84     while (elm != NULL && elm->key.hash < hash) {
    85         prev = elm;
    86         elm = elm->next;
    87     }
    89     if (elm == NULL || elm->key.hash != hash) {
    90         struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
    91         if (e == NULL) {
    92             return -1;
    93         }
    95         // write the value
    96         // TODO: depending on future map features, we may want to copy here
    97         e->data = value;
    99         // copy the key
   100         void *kd = cxMalloc(allocator, key.len);
   101         if (kd == NULL) {
   102             return -1;
   103         }
   104         memcpy(kd, key.data.obj, key.len);
   105         e->key.data.obj = kd;
   106         e->key.len = key.len;
   107         e->key.hash = hash;
   109         // insert the element into the linked list
   110         if (prev == NULL) {
   111             hash_map->buckets[slot] = e;
   112         } else {
   113             prev->next = e;
   114         }
   115         e->next = elm;
   117         // increase the size
   118         map->size++;
   119     } else {
   120         // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
   121         elm->data = value;
   122     }
   124     return 0;
   125 }
   127 static void cx_hash_map_unlink(
   128         struct cx_hash_map_s *hash_map,
   129         size_t slot,
   130         struct cx_hash_map_element_s *prev,
   131         struct cx_hash_map_element_s *elm
   132 ) {
   133     // unlink
   134     if (prev == NULL) {
   135         hash_map->buckets[slot] = elm->next;
   136     } else {
   137         prev->next = elm->next;
   138     }
   139     // free element
   140     cxFree(hash_map->base.allocator, elm->key.data.obj);
   141     cxFree(hash_map->base.allocator, elm);
   142     // decrease size
   143     hash_map->base.size--;
   144 }
   146 /**
   147  * Helper function to avoid code duplication.
   148  *
   149  * @param map the map
   150  * @param key the key to look up
   151  * @param remove flag indicating whether the looked up entry shall be removed
   152  * @return the value corresponding to the key or \c NULL
   153  */
   154 static void *cx_hash_map_get_remove(
   155         CxMap *map,
   156         CxHashKey key,
   157         bool remove
   158 ) {
   159     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   161     unsigned hash = key.hash;
   162     if (hash == 0) {
   163         cx_hash_murmur(&key);
   164         hash = key.hash;
   165     }
   167     size_t slot = hash % hash_map->bucket_count;
   168     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   169     struct cx_hash_map_element_s *prev = NULL;
   170     while (elm && elm->key.hash <= hash) {
   171         if (elm->key.hash == hash && elm->key.len == key.len) {
   172             if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
   173                 void *data = elm->data;
   174                 if (remove) {
   175                     cx_hash_map_unlink(hash_map, slot, prev, elm);
   176                 }
   177                 return data;
   178             }
   179         }
   180         prev = elm;
   181         elm = prev->next;
   182     }
   184     return NULL;
   185 }
   187 static void *cx_hash_map_get(
   188         CxMap const *map,
   189         CxHashKey key
   190 ) {
   191     // we can safely cast, because we know when remove=false, the map stays untouched
   192     return cx_hash_map_get_remove((CxMap *) map, key, false);
   193 }
   195 static void *cx_hash_map_remove(
   196         CxMap *map,
   197         CxHashKey key
   198 ) {
   199     return cx_hash_map_get_remove(map, key, true);
   200 }
   202 static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
   203     // struct has to have a compatible signature
   204     struct cx_map_entry_s *entry = (struct cx_map_entry_s *) &(iter->kv_data);
   205     return entry;
   206 }
   208 static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
   209     struct cx_hash_map_element_s *elm = iter->elem_handle;
   210     return &elm->key;
   211 }
   213 static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
   214     struct cx_hash_map_element_s *elm = iter->elem_handle;
   215     // TODO: return a pointer to data if this map is storing copies
   216     return elm->data;
   217 }
   219 static bool cx_hash_map_iter_valid(CxIterator const *iter) {
   220     return iter->elem_handle != NULL;
   221 }
   223 static void cx_hash_map_iter_next(CxIterator *iter) {
   224     struct cx_hash_map_s *map = iter->src_handle;
   225     struct cx_hash_map_element_s *elm = iter->elem_handle;
   227     // remove current element, if asked
   228     if (iter->remove) {
   229         // clear the flag
   230         iter->remove = false;
   232         // determine the next element
   233         struct cx_hash_map_element_s *next = elm->next;
   235         // search the previous element
   236         struct cx_hash_map_element_s *prev = NULL;
   237         if (map->buckets[iter->slot] != elm) {
   238             prev = map->buckets[iter->slot];
   239             while (prev->next != elm) {
   240                 prev = prev->next;
   241             }
   242         }
   244         // unlink
   245         cx_hash_map_unlink(map, iter->slot, prev, elm);
   247         // advance
   248         elm = next;
   249     } else {
   250         // just advance
   251         elm = elm->next;
   252         iter->index++;
   253     }
   255     // search the next bucket, if required
   256     while (elm == NULL && ++iter->slot < map->bucket_count) {
   257         elm = map->buckets[iter->slot];
   258     }
   260     // fill the struct with the next element
   261     iter->elem_handle = elm;
   262     if (elm == NULL) {
   263         iter->kv_data.key = NULL;
   264         iter->kv_data.value = NULL;
   265     } else {
   266         iter->kv_data.key = &elm->key;
   267         // TODO: pointer to data if this map is storing copies
   268         iter->kv_data.value = elm->data;
   269     }
   270 }
   272 static CxIterator cx_hash_map_iterator(CxMap *map) {
   273     CxIterator iter;
   275     iter.src_handle = map;
   276     iter.valid = cx_hash_map_iter_valid;
   277     iter.next = cx_hash_map_iter_next;
   278     iter.current = cx_hash_map_iter_current_entry;
   280     iter.slot = 0;
   281     iter.index = 0;
   282     iter.remove = false;
   284     if (map->size > 0) {
   285         struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   286         struct cx_hash_map_element_s *elm = hash_map->buckets[0];
   287         for (; elm == NULL; iter.slot++) {
   288             elm = hash_map->buckets[iter.slot];
   289         }
   290         iter.elem_handle = elm;
   291         iter.kv_data.key = &elm->key;
   292         // TODO: pointer to data if this map is storing copies
   293         iter.kv_data.value = elm->data;
   294     } else {
   295         iter.elem_handle = NULL;
   296         iter.kv_data.key = NULL;
   297         iter.kv_data.value = NULL;
   298     }
   300     return iter;
   301 }
   303 static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
   304     CxIterator iter = cx_hash_map_iterator(map);
   305     iter.current = cx_hash_map_iter_current_key;
   306     return iter;
   307 }
   309 static CxIterator cx_hash_map_iterator_values(CxMap *map) {
   310     CxIterator iter = cx_hash_map_iterator(map);
   311     iter.current = cx_hash_map_iter_current_value;
   312     return iter;
   313 }
   315 static cx_map_class cx_hash_map_class = {
   316         cx_hash_map_destructor,
   317         cx_hash_map_clear,
   318         cx_hash_map_put,
   319         cx_hash_map_get,
   320         cx_hash_map_remove,
   321         cx_hash_map_iterator,
   322         cx_hash_map_iterator_keys,
   323         cx_hash_map_iterator_values,
   324 };
   326 CxMap *cxHashMapCreate(
   327         CxAllocator *allocator,
   328         size_t buckets
   329 ) {
   330     if (buckets == 0) {
   331         // implementation defined default
   332         buckets = 16;
   333     }
   335     struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
   336     if (map == NULL) return NULL;
   338     // initialize hash map members
   339     map->bucket_count = buckets;
   340     map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
   341     if (map->buckets == NULL) {
   342         cxFree(allocator, map);
   343         return NULL;
   344     }
   346     // initialize base members
   347     map->base.cl = &cx_hash_map_class;
   348     map->base.allocator = allocator;
   349     map->base.size = 0;
   351     return (CxMap *) map;
   352 }
   354 int cxMapRehash(CxMap *map) {
   355     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   356     if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
   358         size_t new_bucket_count = (map->size * 5) >> 1;
   359         struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
   360                                                               new_bucket_count, sizeof(struct cx_hash_map_element_s *));
   362         if (new_buckets == NULL) {
   363             return 1;
   364         }
   366         // iterate through the elements and assign them to their new slots
   367         cx_for_n(slot, hash_map->bucket_count) {
   368             struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   369             while (elm != NULL) {
   370                 struct cx_hash_map_element_s *next = elm->next;
   371                 size_t new_slot = elm->key.hash % new_bucket_count;
   373                 // find position where to insert
   374                 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
   375                 struct cx_hash_map_element_s *bucket_prev = NULL;
   376                 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
   377                     bucket_prev = bucket_next;
   378                     bucket_next = bucket_next->next;
   379                 }
   381                 // insert
   382                 if (bucket_prev == NULL) {
   383                     elm->next = new_buckets[new_slot];
   384                     new_buckets[new_slot] = elm;
   385                 } else {
   386                     bucket_prev->next = elm;
   387                     elm->next = bucket_next;
   388                 }
   390                 // advance
   391                 elm = next;
   392             }
   393         }
   395         // assign result to the map
   396         hash_map->bucket_count = new_bucket_count;
   397         cxFree(map->allocator, hash_map->buckets);
   398         hash_map->buckets = new_buckets;
   399     }
   400     return 0;
   401 }

mercurial