#189 #199 implement and test map rehash

Fri, 27 May 2022 17:40:27 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 May 2022 17:40:27 +0200
changeset 562
fd3368c20413
parent 561
bb17790af41e
child 563
69a83fad8a35

#189 #199 implement and test map rehash

src/cx/hash_map.h file | annotate | diff | comparison | revisions
src/hash_map.c file | annotate | diff | comparison | revisions
test/test_map.cpp file | annotate | diff | comparison | revisions
--- a/src/cx/hash_map.h	Fri May 27 14:14:55 2022 +0200
+++ b/src/cx/hash_map.h	Fri May 27 17:40:27 2022 +0200
@@ -108,41 +108,23 @@
 /**
  * Increases the number of buckets, if necessary.
  *
- * The load value is \c loadFactor*buckets. If the element count exceeds the load
- * value, the map needs to be rehashed. Otherwise, no action is performed and
+ * The load threshold is \c 0.75*buckets. If the element count exceeds the load
+ * threshold, the map will be rehashed. Otherwise, no action is performed and
  * this function simply returns 0.
  *
  * The rehashing process ensures, that the number of buckets is at least
- * \p bucketFactor times the number of stored items. So there is enough room for additional
+ * 2.5 times the element count. So there is enough room for additional
  * elements without the need of another soon rehashing.
  *
- * You can use this function after filling a map to dramatically increase access performance.
+ * You can use this function after filling a map to increase access performance.
  *
  * @note If the specified map is not a hash map, the behavior is undefined.
  *
  * @param map the map to rehash
- * @param loadFactor a percentage threshold for the load of the map
- * @param bucketFactor a factor for the number of buckets that shall be available after rehashing
  * @return zero on success, non-zero if a memory allocation error occurred
  */
 __attribute__((__nonnull__))
-int cxMapRehash2(
-        CxMap *map,
-        float loadFactor,
-        float bucketFactor
-);
-
-/**
- * Rehashes the map with load factor 0.75 and bucket factor 2.5.
- *
- * @param map the map to rehash
- * @return zero on success, non-zero if a memory allocation error occurred
- * @see cxMapRehash2()
- */
-__attribute__((__nonnull__))
-static inline int cxMapRehash(CxMap *map) {
-    return cxMapRehash2(map, 0.75f, 2.5f);
-}
+int cxMapRehash(CxMap *map);
 
 
 #ifdef __cplusplus
--- a/src/hash_map.c	Fri May 27 14:14:55 2022 +0200
+++ b/src/hash_map.c	Fri May 27 17:40:27 2022 +0200
@@ -396,3 +396,52 @@
 
     return (CxMap *) map;
 }
+
+int cxMapRehash(CxMap *map) {
+    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
+    if (map->size > ((hash_map->bucket_count * 3) >> 2)) {
+
+        size_t new_bucket_count = (map->size * 5) >> 1;
+        struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
+                                                              new_bucket_count, sizeof(struct cx_hash_map_element_s *));
+
+        if (new_buckets == NULL) {
+            return 1;
+        }
+
+        // iterate through the elements and assign them to their new slots
+        cx_for_n(slot, hash_map->bucket_count) {
+            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
+            while (elm != NULL) {
+                struct cx_hash_map_element_s *next = elm->next;
+                size_t new_slot = elm->key.hash % new_bucket_count;
+
+                // find position where to insert
+                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
+                struct cx_hash_map_element_s *bucket_prev = NULL;
+                while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
+                    bucket_prev = bucket_next;
+                    bucket_next = bucket_next->next;
+                }
+
+                // insert
+                if (bucket_prev == NULL) {
+                    elm->next = new_buckets[new_slot];
+                    new_buckets[new_slot] = elm;
+                } else {
+                    bucket_prev->next = elm;
+                    elm->next = bucket_next;
+                }
+
+                // advance
+                elm = next;
+            }
+        }
+
+        // assign result to the map
+        hash_map->bucket_count = new_bucket_count;
+        cxFree(map->allocator, hash_map->buckets);
+        hash_map->buckets = new_buckets;
+    }
+    return 0;
+}
--- a/test/test_map.cpp	Fri May 27 14:14:55 2022 +0200
+++ b/test/test_map.cpp	Fri May 27 17:40:27 2022 +0200
@@ -200,3 +200,52 @@
     cxMapDestroy(map);
     EXPECT_TRUE(allocator.verify());
 }
+
+TEST(CxHashMap, RehashNotRequired) {
+    CxTestingAllocator allocator;
+    auto map = cxHashMapCreate(&allocator, 8);
+
+    cxMapPut(map, cxMapKeyStr("key 1"), (void *) "val 1");
+    cxMapPut(map, cxMapKeyStr("key 2"), (void *) "val 2");
+    cxMapPut(map, cxMapKeyStr("key 3"), (void *) "val 3");
+    cxMapPut(map, cxMapKeyStr("key 4"), (void *) "val 4");
+    cxMapPut(map, cxMapKeyStr("key 5"), (void *) "val 5");
+    cxMapPut(map, cxMapKeyStr("key 6"), (void *) "val 6");
+
+    // 6/8 does not exceed 0.75, therefore the function should not rehash
+    int result = cxMapRehash(map);
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(reinterpret_cast<struct cx_hash_map_s *>(map)->bucket_count, 8);
+
+    cxMapDestroy(map);
+    EXPECT_TRUE(allocator.verify());
+}
+
+TEST(CxHashMap, Rehash) {
+    CxTestingAllocator allocator;
+    auto map = cxHashMapCreate(&allocator, 8);
+
+    cxMapPut(map, cxMapKeyStr("key 1"), (void *) "val 1");
+    cxMapPut(map, cxMapKeyStr("key 2"), (void *) "val 2");
+    cxMapPut(map, cxMapKeyStr("key 3"), (void *) "val 3");
+    cxMapPut(map, cxMapKeyStr("key 4"), (void *) "val 4");
+    cxMapPut(map, cxMapKeyStr("key 5"), (void *) "val 5");
+    cxMapPut(map, cxMapKeyStr("key 6"), (void *) "val 6");
+    cxMapPut(map, cxMapKeyStr("key 7"), (void *) "val 7");
+
+    int result = cxMapRehash(map);
+    EXPECT_EQ(result, 0);
+    EXPECT_EQ(reinterpret_cast<struct cx_hash_map_s *>(map)->bucket_count, 17);
+    EXPECT_EQ(map->size, 7);
+
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 1")), "val 1"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 2")), "val 2"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 3")), "val 3"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 4")), "val 4"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 5")), "val 5"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 6")), "val 6"), 0);
+    EXPECT_EQ(strcmp((char *) cxMapGet(map, cxMapKeyStr("key 7")), "val 7"), 0);
+
+    cxMapDestroy(map);
+    EXPECT_TRUE(allocator.verify());
+}
\ No newline at end of file

mercurial