#189 basic map implementation

Thu, 19 May 2022 14:30:20 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 19 May 2022 14:30:20 +0200
changeset 550
89b2a83728b1
parent 549
d7f0b5a9a985
child 551
2946e13c89a4

#189 basic map implementation

src/cx/hash_map.h file | annotate | diff | comparison | revisions
src/cx/map.h file | annotate | diff | comparison | revisions
src/hash_map.c file | annotate | diff | comparison | revisions
     1.1 --- a/src/cx/hash_map.h	Wed May 18 16:26:32 2022 +0200
     1.2 +++ b/src/cx/hash_map.h	Thu May 19 14:30:20 2022 +0200
     1.3 @@ -46,7 +46,11 @@
     1.4  /** Internal structure for a key within a hash map. */
     1.5  struct cx_hash_map_key_s {
     1.6      /** The key data. */
     1.7 -    CxDataPtr data;
     1.8 +    unsigned char *data;
     1.9 +    /**
    1.10 +     * The key data length.
    1.11 +     */
    1.12 +    size_t len;
    1.13      /** The hash value of the key data. */
    1.14      unsigned hash;
    1.15  };
    1.16 @@ -63,14 +67,39 @@
    1.17      struct cx_hash_map_key_s key;
    1.18  };
    1.19  
    1.20 +/**
    1.21 + * Internal structure for a hash map.
    1.22 + */
    1.23 +struct cx_hash_map_s {
    1.24 +    /**
    1.25 +     * Base structure for maps.
    1.26 +     */
    1.27 +    struct cx_map_s base;
    1.28 +    /**
    1.29 +     * The buckets of this map, each containing a linked list of elements.
    1.30 +     */
    1.31 +    struct cx_hash_map_element_s **buckets;
    1.32 +    /**
    1.33 +     * The number of buckets.
    1.34 +     */
    1.35 +    size_t bucket_count;
    1.36 +};
    1.37 +
    1.38  
    1.39  /**
    1.40 - * Creates a new hash map with the specified size using a UcxAllocator.
    1.41 + * Creates a new hash map with the specified number of buckets.
    1.42 + *
    1.43 + * If \p buckets is zero, an implementation defined default will be used.
    1.44 + *
    1.45 + * @note Iterators provided by this hash map implementation do provide the remove operation, because
    1.46 + * a remove never causes an immediate rehashing. The iterators are also position-aware in the sense
    1.47 + * that the index is initialized with zero and incremented when the iterator advances.
    1.48   *
    1.49   * @param allocator the allocator to use
    1.50   * @param buckets the initial number of buckets in this hash map
    1.51   * @return a pointer to the new hash map
    1.52   */
    1.53 +__attribute__((__nonnull__, __warn_unused_result__))
    1.54  CxMap *cxHashMapCreate(
    1.55          CxAllocator *allocator,
    1.56          size_t buckets
     2.1 --- a/src/cx/map.h	Wed May 18 16:26:32 2022 +0200
     2.2 +++ b/src/cx/map.h	Thu May 19 14:30:20 2022 +0200
     2.3 @@ -88,7 +88,7 @@
     2.4      int (*put)(
     2.5              CxMap *map,
     2.6              CxDataPtr key,
     2.7 -            void const *value
     2.8 +            void *value
     2.9      );
    2.10  
    2.11      /**
    2.12 @@ -105,7 +105,7 @@
    2.13       */
    2.14      __attribute__((__nonnull__, __warn_unused_result__))
    2.15      void *(*remove)(
    2.16 -            CxMap const *map,
    2.17 +            CxMap *map,
    2.18              CxDataPtr key
    2.19      );
    2.20  
    2.21 @@ -177,7 +177,7 @@
    2.22  static inline int cxMapPut(
    2.23          CxMap *map,
    2.24          CxDataPtr key,
    2.25 -        void const *value
    2.26 +        void *value
    2.27  ) {
    2.28      return map->cl->put(map, key, value);
    2.29  }
     3.1 --- a/src/hash_map.c	Wed May 18 16:26:32 2022 +0200
     3.2 +++ b/src/hash_map.c	Thu May 19 14:30:20 2022 +0200
     3.3 @@ -26,8 +26,10 @@
     3.4   * POSSIBILITY OF SUCH DAMAGE.
     3.5   */
     3.6  
     3.7 +#include <errno.h>
     3.8  #include <string.h>
     3.9  #include "cx/hash_map.h"
    3.10 +#include "cx/utils.h"
    3.11  
    3.12  /**
    3.13   * Computes a murmur hash-2.
    3.14 @@ -83,23 +85,211 @@
    3.15      return h;
    3.16  }
    3.17  
    3.18 +static void cx_hash_map_clear(struct cx_map_s *map) {
    3.19 +    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    3.20 +    cx_for_n(i, hash_map->bucket_count) {
    3.21 +        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
    3.22 +        if (elem != NULL) {
    3.23 +            do {
    3.24 +                struct cx_hash_map_element_s *next = elem->next;
    3.25 +                // free the key data
    3.26 +                cxFree(map->allocator, elem->key.data);
    3.27 +                // free the node
    3.28 +                cxFree(map->allocator, elem);
    3.29 +                // proceed
    3.30 +                elem = next;
    3.31 +            } while (elem != NULL);
    3.32 +
    3.33 +            // do not leave a dangling pointer
    3.34 +            hash_map->buckets[i] = NULL;
    3.35 +        }
    3.36 +    }
    3.37 +    map->size = 0;
    3.38 +}
    3.39 +
    3.40 +static void cx_hash_map_destructor(struct cx_map_s *map) {
    3.41 +    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    3.42 +
    3.43 +    // free the buckets
    3.44 +    cx_hash_map_clear(map);
    3.45 +    cxFree(map->allocator, hash_map->buckets);
    3.46 +
    3.47 +    // free the map structure
    3.48 +    cxFree(map->allocator, map);
    3.49 +}
    3.50 +
    3.51 +static int cx_hash_map_put(
    3.52 +        CxMap *map,
    3.53 +        CxDataPtr key,
    3.54 +        void *value
    3.55 +) {
    3.56 +    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    3.57 +    CxAllocator *allocator = map->allocator;
    3.58 +
    3.59 +    unsigned hash = cx_hash_map_murmur(key.data, key.len);
    3.60 +
    3.61 +    size_t slot = hash % hash_map->bucket_count;
    3.62 +    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    3.63 +    struct cx_hash_map_element_s *prev = NULL;
    3.64 +
    3.65 +    while (elm != NULL && elm->key.hash < hash) {
    3.66 +        prev = elm;
    3.67 +        elm = elm->next;
    3.68 +    }
    3.69 +
    3.70 +    if (elm == NULL || elm->key.hash != hash) {
    3.71 +        struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
    3.72 +        if (e == NULL) {
    3.73 +            return -1;
    3.74 +        }
    3.75 +
    3.76 +        // write the value
    3.77 +        // TODO: depending on future map features, we may want to copy here
    3.78 +        e->data = value;
    3.79 +
    3.80 +        // copy the key
    3.81 +        void *kd = cxMalloc(allocator, key.len);
    3.82 +        if (kd == NULL) {
    3.83 +            return -1;
    3.84 +        }
    3.85 +        memcpy(kd, key.data, key.len);
    3.86 +        e->key.data = kd;
    3.87 +        e->key.len = key.len;
    3.88 +        e->key.hash = hash;
    3.89 +
    3.90 +        // insert the element into the linked list
    3.91 +        if (prev == NULL) {
    3.92 +            hash_map->buckets[slot] = e;
    3.93 +        } else {
    3.94 +            prev->next = e;
    3.95 +        }
    3.96 +        e->next = elm;
    3.97 +
    3.98 +        // increase the size
    3.99 +        map->size++;
   3.100 +    } else {
   3.101 +        // (elem != NULL && elem->key.hash == hash) - overwrite value of existing element
   3.102 +        elm->data = value;
   3.103 +    }
   3.104 +
   3.105 +    return 0;
   3.106 +}
   3.107  
   3.108  /**
   3.109 - * Creates a hash map key based on the given data.
   3.110 + * Helper function to avoid code duplication.
   3.111   *
   3.112 - * This function implicitly computes the hash.
   3.113 - *
   3.114 - * @attention The data will be copied to the key structure.
   3.115 - *
   3.116 - * @param data the data for the key
   3.117 - * @return the computed key
   3.118 + * @param map the map
   3.119 + * @param key the key to look up
   3.120 + * @param remove flag indicating whether the looked up entry shall be removed
   3.121 + * @return the value corresponding to the key or \c NULL
   3.122   */
   3.123 -static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) {
   3.124 -    unsigned char *target = malloc(data.len);
   3.125 -    memcpy(target, data.data, data.len);
   3.126 -    struct cx_hash_map_key_s key;
   3.127 -    key.data.data = target;
   3.128 -    key.data.len = data.len;
   3.129 -    key.hash = cx_hash_map_murmur(target, data.len);
   3.130 -    return key;
   3.131 +static void *cx_hash_map_get_remove(
   3.132 +        CxMap *map,
   3.133 +        CxDataPtr key,
   3.134 +        bool remove
   3.135 +) {
   3.136 +    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
   3.137 +
   3.138 +    unsigned hash = cx_hash_map_murmur(key.data, key.len);
   3.139 +
   3.140 +    size_t slot = hash % hash_map->bucket_count;
   3.141 +    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
   3.142 +    struct cx_hash_map_element_s *prev = NULL;
   3.143 +    while (elm && elm->key.hash <= hash) {
   3.144 +        if (elm->key.hash == hash && elm->key.len == key.len) {
   3.145 +            if (memcmp(elm->key.data, key.data, key.len) == 0) {
   3.146 +                void *data = elm->data;
   3.147 +                if (remove) {
   3.148 +                    // unlink
   3.149 +                    if (prev) {
   3.150 +                        prev->next = elm->next;
   3.151 +                    } else {
   3.152 +                        hash_map->buckets[slot] = elm->next;
   3.153 +                    }
   3.154 +                    // free element
   3.155 +                    cxFree(map->allocator, elm->key.data);
   3.156 +                    cxFree(map->allocator, elm);
   3.157 +                    // decrease size
   3.158 +                    map->size--;
   3.159 +                }
   3.160 +                return data;
   3.161 +            }
   3.162 +        }
   3.163 +        prev = elm;
   3.164 +        elm = prev->next;
   3.165 +    }
   3.166 +
   3.167 +    return NULL;
   3.168  }
   3.169 +
   3.170 +static void *cx_hash_map_get(
   3.171 +        CxMap const *map,
   3.172 +        CxDataPtr key
   3.173 +) {
   3.174 +    // we can safely cast, because we know when remove=false, the map stays untouched
   3.175 +    return cx_hash_map_get_remove((CxMap *) map, key, false);
   3.176 +}
   3.177 +
   3.178 +static void *cx_hash_map_remove(
   3.179 +        CxMap *map,
   3.180 +        CxDataPtr key
   3.181 +) {
   3.182 +    return cx_hash_map_get_remove(map, key, true);
   3.183 +}
   3.184 +
   3.185 +static CxIterator cx_hash_map_iterator(CxMap const *map) {
   3.186 +    CxIterator iter;
   3.187 +    // TODO: initialize iter
   3.188 +    return iter;
   3.189 +}
   3.190 +
   3.191 +static CxIterator cx_hash_map_iterator_keys(CxMap const *map) {
   3.192 +    CxIterator iter;
   3.193 +    // TODO: initialize iter
   3.194 +    return iter;
   3.195 +}
   3.196 +
   3.197 +static CxIterator cx_hash_map_iterator_values(CxMap const *map) {
   3.198 +    CxIterator iter;
   3.199 +    // TODO: initialize iter
   3.200 +    return iter;
   3.201 +}
   3.202 +
   3.203 +static cx_map_class cx_hash_map_class = {
   3.204 +        cx_hash_map_destructor,
   3.205 +        cx_hash_map_clear,
   3.206 +        cx_hash_map_put,
   3.207 +        cx_hash_map_get,
   3.208 +        cx_hash_map_remove,
   3.209 +        cx_hash_map_iterator,
   3.210 +        cx_hash_map_iterator_keys,
   3.211 +        cx_hash_map_iterator_values,
   3.212 +};
   3.213 +
   3.214 +CxMap *cxHashMapCreate(
   3.215 +        CxAllocator *allocator,
   3.216 +        size_t buckets
   3.217 +) {
   3.218 +    if (buckets == 0) {
   3.219 +        // implementation defined default
   3.220 +        buckets = 16;
   3.221 +    }
   3.222 +
   3.223 +    struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
   3.224 +    if (map == NULL) return NULL;
   3.225 +
   3.226 +    // initialize hash map members
   3.227 +    map->bucket_count = buckets;
   3.228 +    map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
   3.229 +    if (map->buckets == NULL) {
   3.230 +        cxFree(allocator, map);
   3.231 +        return NULL;
   3.232 +    }
   3.233 +
   3.234 +    // initialize base members
   3.235 +    map->base.cl = &cx_hash_map_class;
   3.236 +    map->base.allocator = allocator;
   3.237 +    map->base.size = 0;
   3.238 +
   3.239 +    return (CxMap *) map;
   3.240 +}

mercurial