src/hash_map.c

Tue, 04 Oct 2022 19:25:07 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 04 Oct 2022 19:25:07 +0200
changeset 591
7df0bcaecffa
parent 575
b05935945637
child 630
ac5e7f789048
permissions
-rw-r--r--

fix over-optimization of strstr

1. it's actually less performant to frequently read bytes
from an array instead of using the native word length
2. the SBO buffer should be local and not static to allow
multi-threading usage

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <string.h>
#include "cx/hash_map.h"
#include "cx/utils.h"

static void cx_hash_map_clear(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    cx_for_n(i, hash_map->bucket_count) {
        struct cx_hash_map_element_s *elem = hash_map->buckets[i];
        if (elem != NULL) {
            do {
                struct cx_hash_map_element_s *next = elem->next;
                // free the key data
                cxFree(map->allocator, elem->key.data.obj);
                // free the node
                cxFree(map->allocator, elem);
                // proceed
                elem = next;
            } while (elem != NULL);

            // do not leave a dangling pointer
            hash_map->buckets[i] = NULL;
        }
    }
    map->size = 0;
}

static void cx_hash_map_destructor(struct cx_map_s *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    // free the buckets
    cx_hash_map_clear(map);
    cxFree(map->allocator, hash_map->buckets);

    // free the map structure
    cxFree(map->allocator, map);
}

static int cx_hash_map_put(
        CxMap *map,
        CxHashKey key,
        void *value
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    CxAllocator *allocator = map->allocator;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;

    while (elm != NULL && elm->key.hash < hash) {
        prev = elm;
        elm = elm->next;
    }

    if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len &&
        memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
        // overwrite existing element
        elm->data = value;
    } else {
        // allocate new element
        struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s));
        if (e == NULL) {
            return -1;
        }

        // write the value
        // TODO: depending on future map features, we may want to copy here
        e->data = value;

        // copy the key
        void *kd = cxMalloc(allocator, key.len);
        if (kd == NULL) {
            return -1;
        }
        memcpy(kd, key.data.obj, key.len);
        e->key.data.obj = kd;
        e->key.len = key.len;
        e->key.hash = hash;

        // insert the element into the linked list
        if (prev == NULL) {
            hash_map->buckets[slot] = e;
        } else {
            prev->next = e;
        }
        e->next = elm;

        // increase the size
        map->size++;
    }

    return 0;
}

static void cx_hash_map_unlink(
        struct cx_hash_map_s *hash_map,
        size_t slot,
        struct cx_hash_map_element_s *prev,
        struct cx_hash_map_element_s *elm
) {
    // unlink
    if (prev == NULL) {
        hash_map->buckets[slot] = elm->next;
    } else {
        prev->next = elm->next;
    }
    // free element
    cxFree(hash_map->base.allocator, elm->key.data.obj);
    cxFree(hash_map->base.allocator, elm);
    // decrease size
    hash_map->base.size--;
}

/**
 * Helper function to avoid code duplication.
 *
 * @param map the map
 * @param key the key to look up
 * @param remove flag indicating whether the looked up entry shall be removed
 * @return the value corresponding to the key or \c NULL
 */
static void *cx_hash_map_get_remove(
        CxMap *map,
        CxHashKey key,
        bool remove
) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;

    unsigned hash = key.hash;
    if (hash == 0) {
        cx_hash_murmur(&key);
        hash = key.hash;
    }

    size_t slot = hash % hash_map->bucket_count;
    struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
    struct cx_hash_map_element_s *prev = NULL;
    while (elm && elm->key.hash <= hash) {
        if (elm->key.hash == hash && elm->key.len == key.len) {
            if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) {
                void *data = elm->data;
                if (remove) {
                    cx_hash_map_unlink(hash_map, slot, prev, elm);
                }
                return data;
            }
        }
        prev = elm;
        elm = prev->next;
    }

    return NULL;
}

static void *cx_hash_map_get(
        CxMap const *map,
        CxHashKey key
) {
    // we can safely cast, because we know when remove=false, the map stays untouched
    return cx_hash_map_get_remove((CxMap *) map, key, false);
}

static void *cx_hash_map_remove(
        CxMap *map,
        CxHashKey key
) {
    return cx_hash_map_get_remove(map, key, true);
}

static void *cx_hash_map_iter_current_entry(CxIterator const *iter) {
    // struct has to have a compatible signature
    return (struct cx_map_entry_s *) &(iter->kv_data);
}

static void *cx_hash_map_iter_current_key(CxIterator const *iter) {
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    return &elm->key;
}

static void *cx_hash_map_iter_current_value(CxIterator const *iter) {
    struct cx_hash_map_element_s *elm = iter->elem_handle;
    // TODO: return a pointer to data if this map is storing copies
    return elm->data;
}

static bool cx_hash_map_iter_valid(CxIterator const *iter) {
    return iter->elem_handle != NULL;
}

static void cx_hash_map_iter_next(CxIterator *iter) {
    struct cx_hash_map_s *map = iter->src_handle;
    struct cx_hash_map_element_s *elm = iter->elem_handle;

    // remove current element, if asked
    if (iter->remove) {
        // clear the flag
        iter->remove = false;

        // determine the next element
        struct cx_hash_map_element_s *next = elm->next;

        // search the previous element
        struct cx_hash_map_element_s *prev = NULL;
        if (map->buckets[iter->slot] != elm) {
            prev = map->buckets[iter->slot];
            while (prev->next != elm) {
                prev = prev->next;
            }
        }

        // unlink
        cx_hash_map_unlink(map, iter->slot, prev, elm);

        // advance
        elm = next;
    } else {
        // just advance
        elm = elm->next;
        iter->index++;
    }

    // search the next bucket, if required
    while (elm == NULL && ++iter->slot < map->bucket_count) {
        elm = map->buckets[iter->slot];
    }

    // fill the struct with the next element
    iter->elem_handle = elm;
    if (elm == NULL) {
        iter->kv_data.key = NULL;
        iter->kv_data.value = NULL;
    } else {
        iter->kv_data.key = &elm->key;
        // TODO: pointer to data if this map is storing copies
        iter->kv_data.value = elm->data;
    }
}

static CxIterator cx_hash_map_iterator(CxMap *map) {
    CxIterator iter;

    iter.src_handle = map;
    iter.valid = cx_hash_map_iter_valid;
    iter.next = cx_hash_map_iter_next;
    iter.current = cx_hash_map_iter_current_entry;

    iter.slot = 0;
    iter.index = 0;
    iter.remove = false;

    if (map->size > 0) {
        struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
        struct cx_hash_map_element_s *elm = hash_map->buckets[0];
        for (; elm == NULL; iter.slot++) {
            elm = hash_map->buckets[iter.slot];
        }
        iter.elem_handle = elm;
        iter.kv_data.key = &elm->key;
        // TODO: pointer to data if this map is storing copies
        iter.kv_data.value = elm->data;
    } else {
        iter.elem_handle = NULL;
        iter.kv_data.key = NULL;
        iter.kv_data.value = NULL;
    }

    return iter;
}

static CxIterator cx_hash_map_iterator_keys(CxMap *map) {
    CxIterator iter = cx_hash_map_iterator(map);
    iter.current = cx_hash_map_iter_current_key;
    return iter;
}

static CxIterator cx_hash_map_iterator_values(CxMap *map) {
    CxIterator iter = cx_hash_map_iterator(map);
    iter.current = cx_hash_map_iter_current_value;
    return iter;
}

static cx_map_class cx_hash_map_class = {
        cx_hash_map_destructor,
        cx_hash_map_clear,
        cx_hash_map_put,
        cx_hash_map_get,
        cx_hash_map_remove,
        cx_hash_map_iterator,
        cx_hash_map_iterator_keys,
        cx_hash_map_iterator_values,
};

CxMap *cxHashMapCreate(
        CxAllocator *allocator,
        size_t buckets
) {
    if (buckets == 0) {
        // implementation defined default
        buckets = 16;
    }

    struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s));
    if (map == NULL) return NULL;

    // initialize hash map members
    map->bucket_count = buckets;
    map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *));
    if (map->buckets == NULL) {
        cxFree(allocator, map);
        return NULL;
    }

    // initialize base members
    map->base.cl = &cx_hash_map_class;
    map->base.allocator = allocator;
    map->base.size = 0;

    return (CxMap *) map;
}

int cxMapRehash(CxMap *map) {
    struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map;
    if (map->size > ((hash_map->bucket_count * 3) >> 2)) {

        size_t new_bucket_count = (map->size * 5) >> 1;
        struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator,
                                                              new_bucket_count, sizeof(struct cx_hash_map_element_s *));

        if (new_buckets == NULL) {
            return 1;
        }

        // iterate through the elements and assign them to their new slots
        cx_for_n(slot, hash_map->bucket_count) {
            struct cx_hash_map_element_s *elm = hash_map->buckets[slot];
            while (elm != NULL) {
                struct cx_hash_map_element_s *next = elm->next;
                size_t new_slot = elm->key.hash % new_bucket_count;

                // find position where to insert
                struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot];
                struct cx_hash_map_element_s *bucket_prev = NULL;
                while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) {
                    bucket_prev = bucket_next;
                    bucket_next = bucket_next->next;
                }

                // insert
                if (bucket_prev == NULL) {
                    elm->next = new_buckets[new_slot];
                    new_buckets[new_slot] = elm;
                } else {
                    bucket_prev->next = elm;
                    elm->next = bucket_next;
                }

                // advance
                elm = next;
            }
        }

        // assign result to the map
        hash_map->bucket_count = new_bucket_count;
        cxFree(map->allocator, hash_map->buckets);
        hash_map->buckets = new_buckets;
    }
    return 0;
}

mercurial