change hash functions

Sun, 06 Nov 2022 16:07:32 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 06 Nov 2022 16:07:32 +0100
changeset 604
056e5f592d84
parent 603
c49104015a6b
child 605
be5a4902d405

change hash functions

1) for zero-terminated strings, the terminator is no longer included in the hash
2) for NULL there is now a special hash value different from the hash for empty data

src/cx/hash_key.h file | annotate | diff | comparison | revisions
src/hash_key.c file | annotate | diff | comparison | revisions
test/test_hash_key.cpp file | annotate | diff | comparison | revisions
test/test_map.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/cx/hash_key.h	Sun Nov 06 14:46:59 2022 +0100
     1.2 +++ b/src/cx/hash_key.h	Sun Nov 06 16:07:32 2022 +0100
     1.3 @@ -69,11 +69,13 @@
     1.4  typedef struct cx_hash_key_s CxHashKey;
     1.5  
     1.6  /**
     1.7 - * Computes a murmur hash-2.
     1.8 + * Computes a murmur2 32 bit hash.
     1.9   *
    1.10 - * You need to initialize data and len in the key struct.
    1.11 + * You need to initialize \c data and \c len in the key struct.
    1.12   * The hash is then directly written to that struct.
    1.13   *
    1.14 + * \note If \c data is \c NULL, the hash is defined as 1574210520.
    1.15 + *
    1.16   * @param key the key, the hash shall be computed for
    1.17   */
    1.18  void cx_hash_murmur(CxHashKey *key);
     2.1 --- a/src/hash_key.c	Sun Nov 06 14:46:59 2022 +0100
     2.2 +++ b/src/hash_key.c	Sun Nov 06 16:07:32 2022 +0100
     2.3 @@ -30,8 +30,13 @@
     2.4  #include <string.h>
     2.5  
     2.6  void cx_hash_murmur(CxHashKey *key) {
     2.7 +    unsigned char const *data = key->data.cbytes;
     2.8 +    if (data == NULL) {
     2.9 +        /* extension: special value for NULL */
    2.10 +        key->hash = 1574210520u;
    2.11 +        return;
    2.12 +    }
    2.13      size_t len = key->len;
    2.14 -    unsigned char const *data = key->data.cbytes;
    2.15  
    2.16      unsigned m = 0x5bd1e995;
    2.17      unsigned r = 24;
    2.18 @@ -79,7 +84,7 @@
    2.19  CxHashKey cx_hash_key_str(char const *str) {
    2.20      CxHashKey key;
    2.21      key.data.cstr = str;
    2.22 -    key.len = str == NULL ? 0 : (1 + strlen(str));
    2.23 +    key.len = str == NULL ? 0 : strlen(str);
    2.24      cx_hash_murmur(&key);
    2.25      return key;
    2.26  }
     3.1 --- a/test/test_hash_key.cpp	Sun Nov 06 14:46:59 2022 +0100
     3.2 +++ b/test/test_hash_key.cpp	Sun Nov 06 16:07:32 2022 +0100
     3.3 @@ -32,7 +32,7 @@
     3.4  
     3.5  TEST(cx_hash_key, functions) {
     3.6      auto str = "my key";
     3.7 -    auto len = 1 + strlen(str);
     3.8 +    auto len = strlen(str);
     3.9  
    3.10      auto str_key = cx_hash_key_str(str);
    3.11      auto bytes_key = cx_hash_key_bytes(
    3.12 @@ -47,5 +47,41 @@
    3.13      EXPECT_EQ(bytes_key.len, len);
    3.14      EXPECT_EQ(str_key.data.cstr, str);
    3.15      EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str));
    3.16 -    EXPECT_EQ(bytes_key.data.obj, (void *) str);
    3.17 +    EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str));
    3.18  }
    3.19 +
    3.20 +TEST(cx_hash_key, empty_string) {
    3.21 +    auto str = "";
    3.22 +
    3.23 +    auto str_key = cx_hash_key_str(str);
    3.24 +    auto bytes_key = cx_hash_key_bytes(
    3.25 +            reinterpret_cast<unsigned char const *>(str), 0);
    3.26 +    auto obj_key = cx_hash_key(
    3.27 +            reinterpret_cast<void const *>(str), 0);
    3.28 +
    3.29 +    EXPECT_EQ(bytes_key.hash, 4152238450u);
    3.30 +    EXPECT_EQ(str_key.hash, 4152238450u);
    3.31 +    EXPECT_EQ(obj_key.hash, 4152238450u);
    3.32 +    EXPECT_EQ(str_key.len, 0);
    3.33 +    EXPECT_EQ(bytes_key.len, 0);
    3.34 +    EXPECT_EQ(bytes_key.len, 0);
    3.35 +    EXPECT_EQ(str_key.data.cstr, str);
    3.36 +    EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str));
    3.37 +    EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str));
    3.38 +}
    3.39 +
    3.40 +TEST(cx_hash_key, null_ptr) {
    3.41 +    auto str_key = cx_hash_key_str(nullptr);
    3.42 +    auto bytes_key = cx_hash_key_bytes(nullptr, 0);
    3.43 +    auto obj_key = cx_hash_key(nullptr, 0);
    3.44 +
    3.45 +    EXPECT_EQ(bytes_key.hash, 1574210520u);
    3.46 +    EXPECT_EQ(str_key.hash, 1574210520u);
    3.47 +    EXPECT_EQ(obj_key.hash, 1574210520u);
    3.48 +    EXPECT_EQ(str_key.len, 0);
    3.49 +    EXPECT_EQ(bytes_key.len, 0);
    3.50 +    EXPECT_EQ(bytes_key.len, 0);
    3.51 +    EXPECT_EQ(str_key.data.cstr, nullptr);
    3.52 +    EXPECT_EQ(bytes_key.data.cbytes, nullptr);
    3.53 +    EXPECT_EQ(bytes_key.data.cobj, nullptr);
    3.54 +}
     4.1 --- a/test/test_map.cpp	Sun Nov 06 14:46:59 2022 +0100
     4.2 +++ b/test/test_map.cpp	Sun Nov 06 16:07:32 2022 +0100
     4.3 @@ -73,8 +73,7 @@
     4.4          auto keyiter = cxMapIteratorKeys(map);
     4.5          std::unordered_set<std::string> keys;
     4.6          cx_foreach(CxHashKey*, elem, keyiter) {
     4.7 -            // we use that our test keys contain NULL-terminated strings
     4.8 -            keys.insert(std::string(elem->data.cstr));
     4.9 +            keys.insert(std::string(elem->data.cstr, elem->len));
    4.10          }
    4.11          EXPECT_EQ(keyiter.index, map->size);
    4.12          ASSERT_EQ(keys.size(), map->size);
    4.13 @@ -103,7 +102,7 @@
    4.14          auto pairiter = cxMapIterator(map);
    4.15          std::unordered_map<std::string, std::string> pairs;
    4.16          cx_foreach(CxMapEntry*, entry, pairiter) {
    4.17 -            pairs[std::string(entry->key->data.cstr)] = std::string((char *) entry->value);
    4.18 +            pairs[std::string(entry->key->data.cstr, entry->key->len)] = std::string((char *) entry->value);
    4.19          }
    4.20          EXPECT_EQ(pairiter.index, map->size);
    4.21          ASSERT_EQ(pairs.size(), refmap.size());

mercurial