Tue, 04 Oct 2022 19:25:07 +0200
fix over-optimization of strstr
1. it's actually less performant to frequently read bytes
from an array instead of using the native word length
2. the SBO buffer should be local and not static to allow
multi-threading usage
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include "cx/hash_map.h" #include "cx/utils.h" static void cx_hash_map_clear(struct cx_map_s *map) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; cx_for_n(i, hash_map->bucket_count) { struct cx_hash_map_element_s *elem = hash_map->buckets[i]; if (elem != NULL) { do { struct cx_hash_map_element_s *next = elem->next; // free the key data cxFree(map->allocator, elem->key.data.obj); // free the node cxFree(map->allocator, elem); // proceed elem = next; } while (elem != NULL); // do not leave a dangling pointer hash_map->buckets[i] = NULL; } } map->size = 0; } static void cx_hash_map_destructor(struct cx_map_s *map) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; // free the buckets cx_hash_map_clear(map); cxFree(map->allocator, hash_map->buckets); // free the map structure cxFree(map->allocator, map); } static int cx_hash_map_put( CxMap *map, CxHashKey key, void *value ) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; CxAllocator *allocator = map->allocator; unsigned hash = key.hash; if (hash == 0) { cx_hash_murmur(&key); hash = key.hash; } size_t slot = hash % hash_map->bucket_count; struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; struct cx_hash_map_element_s *prev = NULL; while (elm != NULL && elm->key.hash < hash) { prev = elm; elm = elm->next; } if (elm != NULL && elm->key.hash == hash && elm->key.len == key.len && memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) { // overwrite existing element elm->data = value; } else { // allocate new element struct cx_hash_map_element_s *e = cxMalloc(allocator, sizeof(struct cx_hash_map_element_s)); if (e == NULL) { return -1; } // write the value // TODO: depending on future map features, we may want to copy here e->data = value; // copy the key void *kd = cxMalloc(allocator, key.len); if (kd == NULL) { return -1; } memcpy(kd, key.data.obj, key.len); e->key.data.obj = kd; e->key.len = key.len; e->key.hash = hash; // insert the element into the linked list if (prev == NULL) { hash_map->buckets[slot] = e; } else { prev->next = e; } e->next = elm; // increase the size map->size++; } return 0; } static void cx_hash_map_unlink( struct cx_hash_map_s *hash_map, size_t slot, struct cx_hash_map_element_s *prev, struct cx_hash_map_element_s *elm ) { // unlink if (prev == NULL) { hash_map->buckets[slot] = elm->next; } else { prev->next = elm->next; } // free element cxFree(hash_map->base.allocator, elm->key.data.obj); cxFree(hash_map->base.allocator, elm); // decrease size hash_map->base.size--; } /** * Helper function to avoid code duplication. * * @param map the map * @param key the key to look up * @param remove flag indicating whether the looked up entry shall be removed * @return the value corresponding to the key or \c NULL */ static void *cx_hash_map_get_remove( CxMap *map, CxHashKey key, bool remove ) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; unsigned hash = key.hash; if (hash == 0) { cx_hash_murmur(&key); hash = key.hash; } size_t slot = hash % hash_map->bucket_count; struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; struct cx_hash_map_element_s *prev = NULL; while (elm && elm->key.hash <= hash) { if (elm->key.hash == hash && elm->key.len == key.len) { if (memcmp(elm->key.data.obj, key.data.obj, key.len) == 0) { void *data = elm->data; if (remove) { cx_hash_map_unlink(hash_map, slot, prev, elm); } return data; } } prev = elm; elm = prev->next; } return NULL; } static void *cx_hash_map_get( CxMap const *map, CxHashKey key ) { // we can safely cast, because we know when remove=false, the map stays untouched return cx_hash_map_get_remove((CxMap *) map, key, false); } static void *cx_hash_map_remove( CxMap *map, CxHashKey key ) { return cx_hash_map_get_remove(map, key, true); } static void *cx_hash_map_iter_current_entry(CxIterator const *iter) { // struct has to have a compatible signature return (struct cx_map_entry_s *) &(iter->kv_data); } static void *cx_hash_map_iter_current_key(CxIterator const *iter) { struct cx_hash_map_element_s *elm = iter->elem_handle; return &elm->key; } static void *cx_hash_map_iter_current_value(CxIterator const *iter) { struct cx_hash_map_element_s *elm = iter->elem_handle; // TODO: return a pointer to data if this map is storing copies return elm->data; } static bool cx_hash_map_iter_valid(CxIterator const *iter) { return iter->elem_handle != NULL; } static void cx_hash_map_iter_next(CxIterator *iter) { struct cx_hash_map_s *map = iter->src_handle; struct cx_hash_map_element_s *elm = iter->elem_handle; // remove current element, if asked if (iter->remove) { // clear the flag iter->remove = false; // determine the next element struct cx_hash_map_element_s *next = elm->next; // search the previous element struct cx_hash_map_element_s *prev = NULL; if (map->buckets[iter->slot] != elm) { prev = map->buckets[iter->slot]; while (prev->next != elm) { prev = prev->next; } } // unlink cx_hash_map_unlink(map, iter->slot, prev, elm); // advance elm = next; } else { // just advance elm = elm->next; iter->index++; } // search the next bucket, if required while (elm == NULL && ++iter->slot < map->bucket_count) { elm = map->buckets[iter->slot]; } // fill the struct with the next element iter->elem_handle = elm; if (elm == NULL) { iter->kv_data.key = NULL; iter->kv_data.value = NULL; } else { iter->kv_data.key = &elm->key; // TODO: pointer to data if this map is storing copies iter->kv_data.value = elm->data; } } static CxIterator cx_hash_map_iterator(CxMap *map) { CxIterator iter; iter.src_handle = map; iter.valid = cx_hash_map_iter_valid; iter.next = cx_hash_map_iter_next; iter.current = cx_hash_map_iter_current_entry; iter.slot = 0; iter.index = 0; iter.remove = false; if (map->size > 0) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; struct cx_hash_map_element_s *elm = hash_map->buckets[0]; for (; elm == NULL; iter.slot++) { elm = hash_map->buckets[iter.slot]; } iter.elem_handle = elm; iter.kv_data.key = &elm->key; // TODO: pointer to data if this map is storing copies iter.kv_data.value = elm->data; } else { iter.elem_handle = NULL; iter.kv_data.key = NULL; iter.kv_data.value = NULL; } return iter; } static CxIterator cx_hash_map_iterator_keys(CxMap *map) { CxIterator iter = cx_hash_map_iterator(map); iter.current = cx_hash_map_iter_current_key; return iter; } static CxIterator cx_hash_map_iterator_values(CxMap *map) { CxIterator iter = cx_hash_map_iterator(map); iter.current = cx_hash_map_iter_current_value; return iter; } static cx_map_class cx_hash_map_class = { cx_hash_map_destructor, cx_hash_map_clear, cx_hash_map_put, cx_hash_map_get, cx_hash_map_remove, cx_hash_map_iterator, cx_hash_map_iterator_keys, cx_hash_map_iterator_values, }; CxMap *cxHashMapCreate( CxAllocator *allocator, size_t buckets ) { if (buckets == 0) { // implementation defined default buckets = 16; } struct cx_hash_map_s *map = cxMalloc(allocator, sizeof(struct cx_hash_map_s)); if (map == NULL) return NULL; // initialize hash map members map->bucket_count = buckets; map->buckets = cxCalloc(allocator, buckets, sizeof(struct cx_hash_map_element_s *)); if (map->buckets == NULL) { cxFree(allocator, map); return NULL; } // initialize base members map->base.cl = &cx_hash_map_class; map->base.allocator = allocator; map->base.size = 0; return (CxMap *) map; } int cxMapRehash(CxMap *map) { struct cx_hash_map_s *hash_map = (struct cx_hash_map_s *) map; if (map->size > ((hash_map->bucket_count * 3) >> 2)) { size_t new_bucket_count = (map->size * 5) >> 1; struct cx_hash_map_element_s **new_buckets = cxCalloc(map->allocator, new_bucket_count, sizeof(struct cx_hash_map_element_s *)); if (new_buckets == NULL) { return 1; } // iterate through the elements and assign them to their new slots cx_for_n(slot, hash_map->bucket_count) { struct cx_hash_map_element_s *elm = hash_map->buckets[slot]; while (elm != NULL) { struct cx_hash_map_element_s *next = elm->next; size_t new_slot = elm->key.hash % new_bucket_count; // find position where to insert struct cx_hash_map_element_s *bucket_next = new_buckets[new_slot]; struct cx_hash_map_element_s *bucket_prev = NULL; while (bucket_next != NULL && bucket_next->key.hash < elm->key.hash) { bucket_prev = bucket_next; bucket_next = bucket_next->next; } // insert if (bucket_prev == NULL) { elm->next = new_buckets[new_slot]; new_buckets[new_slot] = elm; } else { bucket_prev->next = elm; elm->next = bucket_next; } // advance elm = next; } } // assign result to the map hash_map->bucket_count = new_bucket_count; cxFree(map->allocator, hash_map->buckets); hash_map->buckets = new_buckets; } return 0; }