src/hash_map.c

Fri, 27 May 2022 17:40:27 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 May 2022 17:40:27 +0200
changeset 562
fd3368c20413
parent 560
2d6a3e2dc8ff
child 563
69a83fad8a35
permissions
-rw-r--r--

#189 #199 implement and test map rehash

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include <string.h>
    30 #include "cx/hash_map.h"
    31 #include "cx/utils.h"
    33 /**
    34  * Computes a murmur hash-2.
    35  *
    36  * @param data the data to hash
    37  * @param len the length of the data
    38  * @return the murmur hash-2 of the data
    39  */
    40 static unsigned cx_hash_map_murmur(
    41         unsigned char const *data,
    42         size_t len
    43 ) {
    44     unsigned m = 0x5bd1e995;
    45     unsigned r = 24;
    46     unsigned h = 25 ^ len;
    47     unsigned i = 0;
    48     while (len >= 4) {
    49         unsigned k = data[i + 0] & 0xFF;
    50         k |= (data[i + 1] & 0xFF) << 8;
    51         k |= (data[i + 2] & 0xFF) << 16;
    52         k |= (data[i + 3] & 0xFF) << 24;
    54         k *= m;
    55         k ^= k >> r;
    56         k *= m;
    58         h *= m;
    59         h ^= k;
    61         i += 4;
    62         len -= 4;
    63     }
    65     switch (len) {
    66         case 3:
    67             h ^= (data[i + 2] & 0xFF) << 16;
    68                     __attribute__((__fallthrough__));
    69         case 2:
    70             h ^= (data[i + 1] & 0xFF) << 8;
    71                     __attribute__((__fallthrough__));
    72         case 1:
    73             h ^= (data[i + 0] & 0xFF);
    74             h *= m;
    75                     __attribute__((__fallthrough__));
    76         default:
    77             /* do nothing */;
    78     }
    80     h ^= h >> 13;
    81     h *= m;
    82     h ^= h >> 15;
    84     return h;
    85 }
    87 static void cx_hash_map_clear(struct cx_map_s *map) {
    88     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    89     cx_for_n(i, hash_map->bucket_count) {
    90         struct cx_hash_map_element_s *elem = hash_map->buckets[i];
    91         if (elem != NULL) {
    92             do {
    93                 struct cx_hash_map_element_s *next = elem->next;
    94                 // free the key data
    95                 cxFree(map->allocator, elem->key.data);
    96                 // free the node
    97                 cxFree(map->allocator, elem);
    98                 // proceed
    99                 elem = next;
   100             } while (elem != NULL);
   102             // do not leave a dangling pointer
   103             hash_map->buckets[i] = NULL;
   104         }
   105     }
   106     map->size = 0;
   107 }
   109 static void cx_hash_map_destructor(struct cx_map_s *map) {
   110     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   112     // free the buckets
   113     cx_hash_map_clear(map);
   114     cxFree(map->allocator, hash_map->buckets);
   116     // free the map structure
   117     cxFree(map->allocator, map);
   118 }
   120 static int cx_hash_map_put(
   121         CxMap *map,
   122         CxDataPtr key,
   123         void *value
   124 ) {
   125     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   126     CxAllocator *allocator = map->allocator;
   128     unsigned hash = cx_hash_map_murmur(key.data, key.len);
   130     size_t slot = hash % hash_map->bucket_count;
   131     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   132     struct cx_hash_map_element_s *prev = NULL;
   134     while (elm != NULL && elm->key.hash < hash) {
   135         prev = elm;
   136         elm = elm->next;
   137     }
   139     if (elm == NULL || elm->key.hash != hash) {
   140         struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
   141         if (e == NULL) {
   142             return -1;
   143         }
   145         // write the value
   146         // TODO: depending on future map features, we may want to copy here
   147         e->data = value;
   149         // copy the key
   150         void *kd = cxMalloc(allocator, key.len);
   151         if (kd == NULL) {
   152             return -1;
   153         }
   154         memcpy(kd, key.data, key.len);
   155         e->key.data = kd;
   156         e->key.len = key.len;
   157         e->key.hash = hash;
   159         // insert the element into the linked list
   160         if (prev == NULL) {
   161             hash_map->buckets[slot] = e;
   162         } else {
   163             prev->next = e;
   164         }
   165         e->next = elm;
   167         // increase the size
   168         map->size++;
   169     } else {
   170         // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
   171         elm->data = value;
   172     }
   174     return 0;
   175 }
   177 static void cx_hash_map_unlink(
   178         struct cx_hash_map_s *hash_map,
   179         size_t slot,
   180         struct cx_hash_map_element_s *prev,
   181         struct cx_hash_map_element_s *elm
   182 ) {
   183     // unlink
   184     if (prev == NULL) {
   185         hash_map->buckets[slot] = elm->next;
   186     } else {
   187         prev->next = elm->next;
   188     }
   189     // free element
   190     cxFree(hash_map->base.allocator, elm->key.data);
   191     cxFree(hash_map->base.allocator, elm);
   192     // decrease size
   193     hash_map->base.size--;
   194 }
   196 /**
   197  * Helper function to avoid code duplication.
   198  *
   199  * @param map the map
   200  * @param key the key to look up
   201  * @param remove flag indicating whether the looked up entry shall be removed
   202  * @return the value corresponding to the key or \c NULL
   203  */
   204 static void *cx_hash_map_get_remove(
   205         CxMap *map,
   206         CxDataPtr key,
   207         bool remove
   208 ) {
   209     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   211     unsigned hash = cx_hash_map_murmur(key.data, key.len);
   213     size_t slot = hash % hash_map->bucket_count;
   214     struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   215     struct cx_hash_map_element_s *prev = NULL;
   216     while (elm && elm->key.hash <= hash) {
   217         if (elm->key.hash == hash && elm->key.len == key.len) {
   218             if (memcmp(elm->key.data, key.data, key.len) == 0) {
   219                 void *data = elm->data;
   220                 if (remove) {
   221                     cx_hash_map_unlink(hash_map, slot, prev, elm);
   222                 }
   223                 return data;
   224             }
   225         }
   226         prev = elm;
   227         elm = prev->next;
   228     }
   230     return NULL;
   231 }
   233 static void *cx_hash_map_get(
   234         CxMap const *map,
   235         CxDataPtr key
   236 ) {
   237     // we can safely cast, because we know when remove=false, the map stays untouched
   238     return cx_hash_map_get_remove((CxMap *) map, key, false);
   239 }
   241 static void *cx_hash_map_remove(
   242         CxMap *map,
   243         CxDataPtr key
   244 ) {
   245     return cx_hash_map_get_remove(map, key, true);
   246 }
   248 static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
   249     // struct has to have a compatible signature
   250     struct cx_map_entry_s *entry = (struct cx_map_entry_s *) &(iter->kv_data);
   251     return entry;
   252 }
   254 static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
   255     struct cx_hash_map_element_s *elm = iter->elem_handle;
   256     return &elm->key;
   257 }
   259 static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
   260     struct cx_hash_map_element_s *elm = iter->elem_handle;
   261     // TODO: return a pointer to data if this map is storing copies
   262     return elm->data;
   263 }
   265 static bool cx_hash_map_iter_valid(CxIterator const *iter) {
   266     return iter->elem_handle != NULL;
   267 }
   269 static void cx_hash_map_iter_next(CxIterator *iter) {
   270     struct cx_hash_map_s *map = iter->src_handle;
   271     struct cx_hash_map_element_s *elm = iter->elem_handle;
   273     // remove current element, if asked
   274     if (iter->remove) {
   275         // clear the flag
   276         iter->remove = false;
   278         // determine the next element
   279         struct cx_hash_map_element_s *next = elm->next;
   281         // search the previous element
   282         struct cx_hash_map_element_s *prev = NULL;
   283         if (map->buckets[iter->slot] != elm) {
   284             prev = map->buckets[iter->slot];
   285             while (prev->next != elm) {
   286                 prev = prev->next;
   287             }
   288         }
   290         // unlink
   291         cx_hash_map_unlink(map, iter->slot, prev, elm);
   293         // advance
   294         elm = next;
   295     } else {
   296         // just advance
   297         elm = elm->next;
   298         iter->index++;
   299     }
   301     // search the next bucket, if required
   302     while (elm == NULL && ++iter->slot < map->bucket_count) {
   303         elm = map->buckets[iter->slot];
   304     }
   306     // fill the struct with the next element
   307     iter->elem_handle = elm;
   308     if (elm == NULL) {
   309         iter->kv_data.key = NULL;
   310         iter->kv_data.value = NULL;
   311     } else {
   312         iter->kv_data.key = &elm->key;
   313         // TODO: pointer to data if this map is storing copies
   314         iter->kv_data.value = elm->data;
   315     }
   316 }
   318 static CxIterator cx_hash_map_iterator(CxMap *map) {
   319     CxIterator iter;
   321     iter.src_handle = map;
   322     iter.valid = cx_hash_map_iter_valid;
   323     iter.next = cx_hash_map_iter_next;
   324     iter.current = cx_hash_map_iter_current_entry;
   326     iter.slot = 0;
   327     iter.index = 0;
   328     iter.remove = false;
   330     if (map->size > 0) {
   331         struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   332         struct cx_hash_map_element_s *elm = hash_map->buckets[0];
   333         for (; elm == NULL; iter.slot++) {
   334             elm = hash_map->buckets[iter.slot];
   335         }
   336         iter.elem_handle = elm;
   337         iter.kv_data.key = &elm->key;
   338         // TODO: pointer to data if this map is storing copies
   339         iter.kv_data.value = elm->data;
   340     } else {
   341         iter.elem_handle = NULL;
   342         iter.kv_data.key = NULL;
   343         iter.kv_data.value = NULL;
   344     }
   346     return iter;
   347 }
   349 static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
   350     CxIterator iter = cx_hash_map_iterator(map);
   351     iter.current = cx_hash_map_iter_current_key;
   352     return iter;
   353 }
   355 static CxIterator cx_hash_map_iterator_values(CxMap *map) {
   356     CxIterator iter = cx_hash_map_iterator(map);
   357     iter.current = cx_hash_map_iter_current_value;
   358     return iter;
   359 }
   361 static cx_map_class cx_hash_map_class = {
   362         cx_hash_map_destructor,
   363         cx_hash_map_clear,
   364         cx_hash_map_put,
   365         cx_hash_map_get,
   366         cx_hash_map_remove,
   367         cx_hash_map_iterator,
   368         cx_hash_map_iterator_keys,
   369         cx_hash_map_iterator_values,
   370 };
   372 CxMap *cxHashMapCreate(
   373         CxAllocator *allocator,
   374         size_t buckets
   375 ) {
   376     if (buckets == 0) {
   377         // implementation defined default
   378         buckets = 16;
   379     }
   381     struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
   382     if (map == NULL) return NULL;
   384     // initialize hash map members
   385     map->bucket_count = buckets;
   386     map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
   387     if (map->buckets == NULL) {
   388         cxFree(allocator, map);
   389         return NULL;
   390     }
   392     // initialize base members
   393     map->base.cl = &cx_hash_map_class;
   394     map->base.allocator = allocator;
   395     map->base.size = 0;
   397     return (CxMap *) map;
   398 }
   400 int cxMapRehash(CxMap *map) {
   401     struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   402     if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
   404         size_t new_bucket_count = (map->size * 5) >> 1;
   405         struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
   406                                                               new_bucket_count, sizeof(struct cx_hash_map_element_s *));
   408         if (new_buckets == NULL) {
   409             return 1;
   410         }
   412         // iterate through the elements and assign them to their new slots
   413         cx_for_n(slot, hash_map->bucket_count) {
   414             struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   415             while (elm != NULL) {
   416                 struct cx_hash_map_element_s *next = elm->next;
   417                 size_t new_slot = elm->key.hash % new_bucket_count;
   419                 // find position where to insert
   420                 struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
   421                 struct cx_hash_map_element_s *bucket_prev = NULL;
   422                 while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
   423                     bucket_prev = bucket_next;
   424                     bucket_next = bucket_next->next;
   425                 }
   427                 // insert
   428                 if (bucket_prev == NULL) {
   429                     elm->next = new_buckets[new_slot];
   430                     new_buckets[new_slot] = elm;
   431                 } else {
   432                     bucket_prev->next = elm;
   433                     elm->next = bucket_next;
   434                 }
   436                 // advance
   437                 elm = next;
   438             }
   439         }
   441         // assign result to the map
   442         hash_map->bucket_count = new_bucket_count;
   443         cxFree(map->allocator, hash_map->buckets);
   444         hash_map->buckets = new_buckets;
   445     }
   446     return 0;
   447 }

mercurial