#189 declare basic map functions

Wed, 18 May 2022 16:26:32 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 18 May 2022 16:26:32 +0200
changeset 549
d7f0b5a9a985
parent 548
459bca1cdf8d
child 550
89b2a83728b1

#189 declare basic map functions

src/CMakeLists.txt file | annotate | diff | comparison | revisions
src/cx/common.h file | annotate | diff | comparison | revisions
src/cx/hash_map.h file | annotate | diff | comparison | revisions
src/cx/map.h file | annotate | diff | comparison | revisions
src/hash_map.c file | annotate | diff | comparison | revisions
--- a/src/CMakeLists.txt	Mon May 16 19:25:19 2022 +0200
+++ b/src/CMakeLists.txt	Wed May 18 16:26:32 2022 +0200
@@ -5,8 +5,10 @@
         linked_list.c
         tree.c
         buffer.c
+        hash_map.c
 )
 set(headers
+        cx/common.h
         cx/utils.h
         cx/allocator.h
         cx/iterator.h
@@ -14,7 +16,9 @@
         cx/linked_list.h
         cx/tree.h
         cx/buffer.h
-)
+        cx/map.h
+        cx/hash_map.h
+        )
 
 add_library(ucx SHARED ${sources})
 add_library(ucx_static STATIC ${sources})
--- a/src/cx/common.h	Mon May 16 19:25:19 2022 +0200
+++ b/src/cx/common.h	Wed May 18 16:26:32 2022 +0200
@@ -94,6 +94,25 @@
 #include <stdbool.h>
 
 /**
+ * Structure that contains a pointer to arbitrary data.
+ */
+struct cx_data_ptr_s {
+    /**
+     * A pointer to the data.
+     */
+    unsigned const char *data;
+    /**
+     * The length of the data.
+     */
+    size_t len;
+};
+
+/**
+ * Type for a data pointer with length information.
+ */
+typedef struct cx_data_ptr_s CxDataPtr;
+
+/**
  * Function pointer compatible with fwrite-like functions.
  */
 typedef size_t (*cx_write_func)(
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cx/hash_map.h	Wed May 18 16:26:32 2022 +0200
@@ -0,0 +1,123 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * \file map.h
+ * \brief Interface for map implementations.
+ * \author Mike Becker
+ * \author Olaf Wintermann
+ * \version 3.0
+ * \copyright 2-Clause BSD License
+ */
+
+#ifndef UCX_HASH_MAP_H
+#define UCX_HASH_MAP_H
+
+#include "map.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Internal structure for a key within a hash map. */
+struct cx_hash_map_key_s {
+    /** The key data. */
+    CxDataPtr data;
+    /** The hash value of the key data. */
+    unsigned hash;
+};
+
+/** Internal structure for an element of a hash map. */
+struct cx_hash_map_element_s {
+    /** The value data. */
+    void *data;
+
+    /** A pointer to the next element in the current bucket. */
+    struct cx_hash_map_element_s *next;
+
+    /** The corresponding key. */
+    struct cx_hash_map_key_s key;
+};
+
+
+/**
+ * Creates a new hash map with the specified size using a UcxAllocator.
+ *
+ * @param allocator the allocator to use
+ * @param buckets the initial number of buckets in this hash map
+ * @return a pointer to the new hash map
+ */
+CxMap *cxHashMapCreate(
+        CxAllocator *allocator,
+        size_t buckets
+);
+
+/**
+ * Increases the number of buckets, if necessary.
+ *
+ * The load value is \c loadFactor*buckets. If the element count exceeds the load
+ * value, the map needs to be rehashed. Otherwise, no action is performed and
+ * this function simply returns 0.
+ *
+ * The rehashing process ensures, that the number of buckets is at least
+ * \p bucketFactor times the number of stored items. So there is enough room for additional
+ * elements without the need of another soon rehashing.
+ *
+ * You can use this function after filling a map to dramatically increase access performance.
+ *
+ * @note If the specified map is not a hash map, the behavior is undefined.
+ *
+ * @param map the map to rehash
+ * @param loadFactor a percentage threshold for the load of the map
+ * @param bucketFactor a factor for the number of buckets that shall be available after rehashing
+ * @return zero on success, non-zero if a memory allocation error occurred
+ */
+__attribute__((__nonnull__))
+int cxMapRehash2(
+        CxMap *map,
+        float loadFactor,
+        float bucketFactor
+);
+
+/**
+ * Rehashes the map with load factor 0.75 and bucket factor 2.5.
+ *
+ * @param map the map to rehash
+ * @return zero on success, non-zero if a memory allocation error occurred
+ * @see cxMapRehash2()
+ */
+__attribute__((__nonnull__))
+static inline int cxMapRehash(CxMap *map) {
+    return cxMapRehash2(map, 0.75f, 2.5f);
+}
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // UCX_HASH_MAP_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cx/map.h	Wed May 18 16:26:32 2022 +0200
@@ -0,0 +1,268 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * \file map.h
+ * \brief Interface for map implementations.
+ * \author Mike Becker
+ * \author Olaf Wintermann
+ * \version 3.0
+ * \copyright 2-Clause BSD License
+ */
+
+#ifndef UCX_MAP_H
+#define UCX_MAP_H
+
+#include "common.h"
+#include "allocator.h"
+#include "iterator.h"
+
+#ifdef    __cplusplus
+extern "C" {
+#endif
+
+/** Type for the UCX map. */
+typedef struct cx_map_s CxMap;
+
+/** Type for a map entry. */
+typedef struct cx_map_entry_s CxMapEntry;
+
+/** Type for map class definitions. */
+typedef struct cx_map_class_s cx_map_class;
+
+/** Structure for the UCX map. */
+struct cx_map_s {
+    /** The map class definition. */
+    cx_map_class *cl;
+    /** An allocator that is used for the map elements. */
+    CxAllocator *allocator;
+    /** The number of elements currently stored. */
+    size_t size;
+    // TODO: elemsize + a flag if values shall be copied to the map
+};
+
+/**
+ * The class definition for arbitrary maps.
+ */
+struct cx_map_class_s {
+    /**
+     * Deallocates the entire memory.
+     */
+    __attribute__((__nonnull__))
+    void (*destructor)(struct cx_map_s *map);
+
+    /**
+     * Removes all elements.
+     */
+    __attribute__((__nonnull__))
+    void (*clear)(struct cx_map_s *map);
+
+    /**
+     * Add or overwrite an element.
+     */
+    __attribute__((__nonnull__))
+    int (*put)(
+            CxMap *map,
+            CxDataPtr key,
+            void const *value
+    );
+
+    /**
+     * Returns an element.
+     */
+    __attribute__((__nonnull__, __warn_unused_result__))
+    void *(*get)(
+            CxMap const *map,
+            CxDataPtr key
+    );
+
+    /**
+     * Removes an element.
+     */
+    __attribute__((__nonnull__, __warn_unused_result__))
+    void *(*remove)(
+            CxMap const *map,
+            CxDataPtr key
+    );
+
+    /**
+     * Iterator over the key/value pairs.
+     */
+    __attribute__((__nonnull__, __warn_unused_result__))
+    CxIterator (*iterator)(CxMap const *map);
+
+    /**
+     * Iterator over the keys.
+     */
+    __attribute__((__nonnull__, __warn_unused_result__))
+    CxIterator (*iterator_keys)(CxMap const *map);
+
+    /**
+     * Iterator over the values.
+     */
+    __attribute__((__nonnull__, __warn_unused_result__))
+    CxIterator (*iterator_values)(CxMap const *map);
+};
+
+/**
+ * A map entry.
+ */
+struct cx_map_entry_s {
+    /**
+     * The key.
+     */
+    CxDataPtr key;
+    /**
+     * The value.
+     */
+    void const *value;
+};
+
+
+/**
+ * Deallocates the memory of the specified map.
+ *
+ * @param map the map to be destroyed
+ */
+__attribute__((__nonnull__))
+static inline void cxMapDestroy(CxMap *map) {
+    // TODO: likely to add auto-free feature for contents in the future
+    map->cl->destructor(map);
+}
+
+
+/**
+ * Clears a map by removing all elements.
+ *
+ * @param map the map to be cleared
+ */
+__attribute__((__nonnull__))
+static inline void cxMapClear(CxMap *map) {
+    map->cl->clear(map);
+}
+
+/**
+ * Puts a key/value-pair into the map.
+ *
+ * @param map the map
+ * @param key the key
+ * @param value the value
+ * @return 0 on success, non-zero value on failure
+ */
+__attribute__((__nonnull__))
+static inline int cxMapPut(
+        CxMap *map,
+        CxDataPtr key,
+        void const *value
+) {
+    return map->cl->put(map, key, value);
+}
+
+/**
+ * Retrieves a value by using a key.
+ *
+ * @param map the map
+ * @param key the key
+ * @return the value
+ */
+__attribute__((__nonnull__, __warn_unused_result__))
+void *cxMapGet(
+        CxMap const *map,
+        CxDataPtr key
+) {
+    return map->cl->get(map, key);
+}
+
+/**
+ * Removes a key/value-pair from the map by using the key.
+ *
+ * @param map the map
+ * @param key the key
+ * @return the removed value
+ */
+__attribute__((__nonnull__, __warn_unused_result__))
+void *cxMapRemove(
+        CxMap *map,
+        CxDataPtr key
+) {
+    return map->cl->remove(map, key);
+}
+
+// TODO: set-like map operations (union, intersect, difference)
+
+/**
+ * Creates a value iterator for a map.
+ *
+ * \note An iterator iterates over all elements successively. Therefore the order
+ * highly depends on the map implementation and may change arbitrarily when the contents change.
+ *
+ * @param map the map to create the iterator for
+ * @return an iterator for the currently stored values
+ */
+__attribute__((__nonnull__, __warn_unused_result__))
+static inline CxIterator cxMapIteratorValues(CxMap const *map) {
+    return map->cl->iterator_values(map);
+}
+
+/**
+ * Creates a key iterator for a map.
+ *
+ * \note An iterator iterates over all elements successively. Therefore the order
+ * highly depends on the map implementation and may change arbitrarily when the contents change.
+ *
+ * @param map the map to create the iterator for
+ * @return an iterator for the currently stored keys
+ */
+__attribute__((__nonnull__, __warn_unused_result__))
+static inline CxIterator cxMapIteratorKeys(CxMap const *map) {
+    return map->cl->iterator_keys(map);
+}
+
+/**
+ * Creates an iterator for a map.
+ *
+ * The values of the iterator are dynamically created key/value pairs of type CxMapEntry.
+ *
+ * \note An iterator iterates over all elements successively. Therefore the order
+ * highly depends on the map implementation and may change arbitrarily when the contents change.
+ *
+ * @param map the map to create the iterator for
+ * @return an iterator for the currently stored keys
+ * @see cxMapIteratorKeys()
+ * @see cxMapIteratorValues()
+ */
+__attribute__((__nonnull__, __warn_unused_result__))
+static inline CxIterator cxMapIterator(CxMap const *map) {
+    return map->cl->iterator(map);
+}
+
+
+#ifdef    __cplusplus
+}
+#endif
+
+#endif // UCX_MAP_H
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hash_map.c	Wed May 18 16:26:32 2022 +0200
@@ -0,0 +1,105 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include "cx/hash_map.h"
+
+/**
+ * Computes a murmur hash-2.
+ *
+ * @param data the data to hash
+ * @param len the length of the data
+ * @return the murmur hash-2 of the data
+ */
+static unsigned cx_hash_map_murmur(
+        unsigned char const *data,
+        size_t len
+) {
+    unsigned m = 0x5bd1e995;
+    unsigned r = 24;
+    unsigned h = 25 ^ len;
+    unsigned i = 0;
+    while (len >= 4) {
+        unsigned k = data[i + 0] & 0xFF;
+        k |= (data[i + 1] & 0xFF) << 8;
+        k |= (data[i + 2] & 0xFF) << 16;
+        k |= (data[i + 3] & 0xFF) << 24;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        i += 4;
+        len -= 4;
+    }
+
+    switch (len) {
+        case 3:
+            h ^= (data[i + 2] & 0xFF) << 16;
+                    __attribute__((__fallthrough__));
+        case 2:
+            h ^= (data[i + 1] & 0xFF) << 8;
+                    __attribute__((__fallthrough__));
+        case 1:
+            h ^= (data[i + 0] & 0xFF);
+            h *= m;
+                    __attribute__((__fallthrough__));
+        default:
+            /* do nothing */;
+    }
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+
+
+/**
+ * Creates a hash map key based on the given data.
+ *
+ * This function implicitly computes the hash.
+ *
+ * @attention The data will be copied to the key structure.
+ *
+ * @param data the data for the key
+ * @return the computed key
+ */
+static struct cx_hash_map_key_s cx_hash_map_key(CxDataPtr data) {
+    unsigned char *target = malloc(data.len);
+    memcpy(target, data.data, data.len);
+    struct cx_hash_map_key_s key;
+    key.data.data = target;
+    key.data.len = data.len;
+    key.hash = cx_hash_map_murmur(target, data.len);
+    return key;
+}

mercurial