Thu, 19 May 2022 14:30:20 +0200
#189 basic map implementation
src/cx/hash_map.h | file | annotate | diff | comparison | revisions | |
src/cx/map.h | file | annotate | diff | comparison | revisions | |
src/hash_map.c | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cx/hash_map.h Wed May 18 16:26:32 2022 +0200 1.2 +++ b/src/cx/hash_map.h Thu May 19 14:30:20 2022 +0200 1.3 @@ -46,7 +46,11 @@ 1.4 /** Internal structure for a key within a hash map. */ 1.5 struct cx_hash_map_key_s { 1.6 /** The key data. */ 1.7 - CxDataPtr data; 1.8 + unsigned char *data; 1.9 + /** 1.10 + * The key data length. 1.11 + */ 1.12 + size_t len; 1.13 /** The hash value of the key data. */ 1.14 unsigned hash; 1.15 }; 1.16 @@ -63,14 +67,39 @@ 1.17 struct cx_hash_map_key_s key; 1.18 }; 1.19 1.20 +/** 1.21 + * Internal structure for a hash map. 1.22 + */ 1.23 +struct cx_hash_map_s { 1.24 + /** 1.25 + * Base structure for maps. 1.26 + */ 1.27 + struct cx_map_s base; 1.28 + /** 1.29 + * The buckets of this map, each containing a linked list of elements. 1.30 + */ 1.31 + struct cx_hash_map_element_s **buckets; 1.32 + /** 1.33 + * The number of buckets. 1.34 + */ 1.35 + size_t bucket_count; 1.36 +}; 1.37 + 1.38 1.39 /** 1.40 - * Creates a new hash map with the specified size using a UcxAllocator. 1.41 + * Creates a new hash map with the specified number of buckets. 1.42 + * 1.43 + * If \p buckets is zero, an implementation defined default will be used. 1.44 + * 1.45 + * @note Iterators provided by this hash map implementation do provide the remove operation, because 1.46 + * a remove never causes an immediate rehashing. The iterators are also position-aware in the sense 1.47 + * that the index is initialized with zero and incremented when the iterator advances. 1.48 * 1.49 * @param allocator the allocator to use 1.50 * @param buckets the initial number of buckets in this hash map 1.51 * @return a pointer to the new hash map 1.52 */ 1.53 +__attribute__((__nonnull__, __warn_unused_result__)) 1.54 CxMap *cxHashMapCreate( 1.55 CxAllocator *allocator, 1.56 size_t buckets
2.1 --- a/src/cx/map.h Wed May 18 16:26:32 2022 +0200 2.2 +++ b/src/cx/map.h Thu May 19 14:30:20 2022 +0200 2.3 @@ -88,7 +88,7 @@ 2.4 int (*put)( 2.5 CxMap *map, 2.6 CxDataPtr key, 2.7 - void const *value 2.8 + void *value 2.9 ); 2.10 2.11 /** 2.12 @@ -105,7 +105,7 @@ 2.13 */ 2.14 __attribute__((__nonnull__, __warn_unused_result__)) 2.15 void *(*remove)( 2.16 - CxMap const *map, 2.17 + CxMap *map, 2.18 CxDataPtr key 2.19 ); 2.20 2.21 @@ -177,7 +177,7 @@ 2.22 static inline int cxMapPut( 2.23 CxMap *map, 2.24 CxDataPtr key, 2.25 - void const *value 2.26 + void *value 2.27 ) { 2.28 return map->cl->put(map, key, value); 2.29 }
3.1 --- a/src/hash_map.c Wed May 18 16:26:32 2022 +0200 3.2 +++ b/src/hash_map.c Thu May 19 14:30:20 2022 +0200 3.3 @@ -26,8 +26,10 @@ 3.4 * POSSIBILITY OF SUCH DAMAGE. 3.5 */ 3.6 3.7 +#include <errno.h> 3.8 #include <string.h> 3.9 #include "cx/hash_map.h" 3.10 +#include "cx/utils.h" 3.11 3.12 /** 3.13 * Computes a murmur hash-2. 3.14 @@ -83,23 +85,211 @@ 3.15 return h; 3.16 } 3.17 3.18 +static void cx_hash_map_clear(struct cx_map_s *map) { 3.19 + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; 3.20 + cx_for_n(i, hash_map->bucket_count) { 3.21 + struct cx_hash_map_element_s *elem = hash_map->buckets[i]; 3.22 + if (elem != NULL) { 3.23 + do { 3.24 + struct cx_hash_map_element_s *next = elem->next; 3.25 + // free the key data 3.26 + cxFree(map->allocator, elem->key.data); 3.27 + // free the node 3.28 + cxFree(map->allocator, elem); 3.29 + // proceed 3.30 + elem = next; 3.31 + } while (elem != NULL); 3.32 + 3.33 + // do not leave a dangling pointer 3.34 + hash_map->buckets[i] = NULL; 3.35 + } 3.36 + } 3.37 + map->size = 0; 3.38 +} 3.39 + 3.40 +static void cx_hash_map_destructor(struct cx_map_s *map) { 3.41 + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; 3.42 + 3.43 + // free the buckets 3.44 + cx_hash_map_clear(map); 3.45 + cxFree(map->allocator, hash_map->buckets); 3.46 + 3.47 + // free the map structure 3.48 + cxFree(map->allocator, map); 3.49 +} 3.50 + 3.51 +static int cx_hash_map_put( 3.52 + CxMap *map, 3.53 + CxDataPtr key, 3.54 + void *value 3.55 +) { 3.56 + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; 3.57 + CxAllocator *allocator = map->allocator; 3.58 + 3.59 + unsigned hash = cx_hash_map_murmur(key.data, key.len); 3.60 + 3.61 + size_t slot = hash % hash_map->bucket_count; 3.62 + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; 3.63 + struct cx_hash_map_element_s *prev = NULL; 3.64 + 3.65 + while (elm != NULL && elm->key.hash < hash) { 3.66 + prev = elm; 3.67 + elm = elm->next; 3.68 + } 3.69 + 3.70 + if (elm == NULL || elm->key.hash != hash) { 3.71 + struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s)); 3.72 + if (e == NULL) { 3.73 + return -1; 3.74 + } 3.75 + 3.76 + // write the value 3.77 + // TODO: depending on future map features, we may want to copy here 3.78 + e->data = value; 3.79 + 3.80 + // copy the key 3.81 + void *kd = cxMalloc(allocator, key.len); 3.82 + if (kd == NULL) { 3.83 + return -1; 3.84 + } 3.85 + memcpy(kd, key.data, key.len); 3.86 + e->key.data = kd; 3.87 + e->key.len = key.len; 3.88 + e->key.hash = hash; 3.89 + 3.90 + // insert the element into the linked list 3.91 + if (prev == NULL) { 3.92 + hash_map->buckets[slot] = e; 3.93 + } else { 3.94 + prev->next = e; 3.95 + } 3.96 + e->next = elm; 3.97 + 3.98 + // increase the size 3.99 + map->size++; 3.100 + } else { 3.101 + // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element 3.102 + elm->data = value; 3.103 + } 3.104 + 3.105 + return 0; 3.106 +} 3.107 3.108 /** 3.109 - * Creates a hash map key based on the given data. 3.110 + * Helper function to avoid code duplication. 3.111 * 3.112 - * This function implicitly computes the hash. 3.113 - * 3.114 - * @attention The data will be copied to the key structure. 3.115 - * 3.116 - * @param data the data for the key 3.117 - * @return the computed key 3.118 + * @param map the map 3.119 + * @param key the key to look up 3.120 + * @param remove flag indicating whether the looked up entry shall be removed 3.121 + * @return the value corresponding to the key or \c NULL 3.122 */ 3.123 -static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) { 3.124 - unsigned char *target = malloc(data.len); 3.125 - memcpy(target, data.data, data.len); 3.126 - struct cx_hash_map_key_s key; 3.127 - key.data.data = target; 3.128 - key.data.len = data.len; 3.129 - key.hash = cx_hash_map_murmur(target, data.len); 3.130 - return key; 3.131 +static void *cx_hash_map_get_remove( 3.132 + CxMap *map, 3.133 + CxDataPtr key, 3.134 + bool remove 3.135 +) { 3.136 + struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; 3.137 + 3.138 + unsigned hash = cx_hash_map_murmur(key.data, key.len); 3.139 + 3.140 + size_t slot = hash % hash_map->bucket_count; 3.141 + struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; 3.142 + struct cx_hash_map_element_s *prev = NULL; 3.143 + while (elm && elm->key.hash <= hash) { 3.144 + if (elm->key.hash == hash && elm->key.len == key.len) { 3.145 + if (memcmp(elm->key.data, key.data, key.len) == 0) { 3.146 + void *data = elm->data; 3.147 + if (remove) { 3.148 + // unlink 3.149 + if (prev) { 3.150 + prev->next = elm->next; 3.151 + } else { 3.152 + hash_map->buckets[slot] = elm->next; 3.153 + } 3.154 + // free element 3.155 + cxFree(map->allocator, elm->key.data); 3.156 + cxFree(map->allocator, elm); 3.157 + // decrease size 3.158 + map->size--; 3.159 + } 3.160 + return data; 3.161 + } 3.162 + } 3.163 + prev = elm; 3.164 + elm = prev->next; 3.165 + } 3.166 + 3.167 + return NULL; 3.168 } 3.169 + 3.170 +static void *cx_hash_map_get( 3.171 + CxMap const *map, 3.172 + CxDataPtr key 3.173 +) { 3.174 + // we can safely cast, because we know when remove=false, the map stays untouched 3.175 + return cx_hash_map_get_remove((CxMap *) map, key, false); 3.176 +} 3.177 + 3.178 +static void *cx_hash_map_remove( 3.179 + CxMap *map, 3.180 + CxDataPtr key 3.181 +) { 3.182 + return cx_hash_map_get_remove(map, key, true); 3.183 +} 3.184 + 3.185 +static CxIterator cx_hash_map_iterator(CxMap const *map) { 3.186 + CxIterator iter; 3.187 + // TODO: initialize iter 3.188 + return iter; 3.189 +} 3.190 + 3.191 +static CxIterator cx_hash_map_iterator_keys(CxMap const *map) { 3.192 + CxIterator iter; 3.193 + // TODO: initialize iter 3.194 + return iter; 3.195 +} 3.196 + 3.197 +static CxIterator cx_hash_map_iterator_values(CxMap const *map) { 3.198 + CxIterator iter; 3.199 + // TODO: initialize iter 3.200 + return iter; 3.201 +} 3.202 + 3.203 +static cx_map_class cx_hash_map_class = { 3.204 + cx_hash_map_destructor, 3.205 + cx_hash_map_clear, 3.206 + cx_hash_map_put, 3.207 + cx_hash_map_get, 3.208 + cx_hash_map_remove, 3.209 + cx_hash_map_iterator, 3.210 + cx_hash_map_iterator_keys, 3.211 + cx_hash_map_iterator_values, 3.212 +}; 3.213 + 3.214 +CxMap *cxHashMapCreate( 3.215 + CxAllocator *allocator, 3.216 + size_t buckets 3.217 +) { 3.218 + if (buckets == 0) { 3.219 + // implementation defined default 3.220 + buckets = 16; 3.221 + } 3.222 + 3.223 + struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s)); 3.224 + if (map == NULL) return NULL; 3.225 + 3.226 + // initialize hash map members 3.227 + map->bucket_count = buckets; 3.228 + map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *)); 3.229 + if (map->buckets == NULL) { 3.230 + cxFree(allocator, map); 3.231 + return NULL; 3.232 + } 3.233 + 3.234 + // initialize base members 3.235 + map->base.cl = &cx_hash_map_class; 3.236 + map->base.allocator = allocator; 3.237 + map->base.size = 0; 3.238 + 3.239 + return (CxMap *) map; 3.240 +}