Sun, 06 Nov 2022 16:07:32 +0100
change hash functions
1) for zero-terminated strings, the terminator is no longer included in the hash
2) for NULL there is now a special hash value different from the hash for empty data
src/cx/hash_key.h | file | annotate | diff | comparison | revisions | |
src/hash_key.c | file | annotate | diff | comparison | revisions | |
test/test_hash_key.cpp | file | annotate | diff | comparison | revisions | |
test/test_map.cpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/cx/hash_key.h Sun Nov 06 14:46:59 2022 +0100 1.2 +++ b/src/cx/hash_key.h Sun Nov 06 16:07:32 2022 +0100 1.3 @@ -69,11 +69,13 @@ 1.4 typedef struct cx_hash_key_s CxHashKey; 1.5 1.6 /** 1.7 - * Computes a murmur hash-2. 1.8 + * Computes a murmur2 32 bit hash. 1.9 * 1.10 - * You need to initialize data and len in the key struct. 1.11 + * You need to initialize \c data and \c len in the key struct. 1.12 * The hash is then directly written to that struct. 1.13 * 1.14 + * \note If \c data is \c NULL, the hash is defined as 1574210520. 1.15 + * 1.16 * @param key the key, the hash shall be computed for 1.17 */ 1.18 void cx_hash_murmur(CxHashKey *key);
2.1 --- a/src/hash_key.c Sun Nov 06 14:46:59 2022 +0100 2.2 +++ b/src/hash_key.c Sun Nov 06 16:07:32 2022 +0100 2.3 @@ -30,8 +30,13 @@ 2.4 #include <string.h> 2.5 2.6 void cx_hash_murmur(CxHashKey *key) { 2.7 + unsigned char const *data = key->data.cbytes; 2.8 + if (data == NULL) { 2.9 + /* extension: special value for NULL */ 2.10 + key->hash = 1574210520u; 2.11 + return; 2.12 + } 2.13 size_t len = key->len; 2.14 - unsigned char const *data = key->data.cbytes; 2.15 2.16 unsigned m = 0x5bd1e995; 2.17 unsigned r = 24; 2.18 @@ -79,7 +84,7 @@ 2.19 CxHashKey cx_hash_key_str(char const *str) { 2.20 CxHashKey key; 2.21 key.data.cstr = str; 2.22 - key.len = str == NULL ? 0 : (1 + strlen(str)); 2.23 + key.len = str == NULL ? 0 : strlen(str); 2.24 cx_hash_murmur(&key); 2.25 return key; 2.26 }
3.1 --- a/test/test_hash_key.cpp Sun Nov 06 14:46:59 2022 +0100 3.2 +++ b/test/test_hash_key.cpp Sun Nov 06 16:07:32 2022 +0100 3.3 @@ -32,7 +32,7 @@ 3.4 3.5 TEST(cx_hash_key, functions) { 3.6 auto str = "my key"; 3.7 - auto len = 1 + strlen(str); 3.8 + auto len = strlen(str); 3.9 3.10 auto str_key = cx_hash_key_str(str); 3.11 auto bytes_key = cx_hash_key_bytes( 3.12 @@ -47,5 +47,41 @@ 3.13 EXPECT_EQ(bytes_key.len, len); 3.14 EXPECT_EQ(str_key.data.cstr, str); 3.15 EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str)); 3.16 - EXPECT_EQ(bytes_key.data.obj, (void *) str); 3.17 + EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str)); 3.18 } 3.19 + 3.20 +TEST(cx_hash_key, empty_string) { 3.21 + auto str = ""; 3.22 + 3.23 + auto str_key = cx_hash_key_str(str); 3.24 + auto bytes_key = cx_hash_key_bytes( 3.25 + reinterpret_cast<unsigned char const *>(str), 0); 3.26 + auto obj_key = cx_hash_key( 3.27 + reinterpret_cast<void const *>(str), 0); 3.28 + 3.29 + EXPECT_EQ(bytes_key.hash, 4152238450u); 3.30 + EXPECT_EQ(str_key.hash, 4152238450u); 3.31 + EXPECT_EQ(obj_key.hash, 4152238450u); 3.32 + EXPECT_EQ(str_key.len, 0); 3.33 + EXPECT_EQ(bytes_key.len, 0); 3.34 + EXPECT_EQ(bytes_key.len, 0); 3.35 + EXPECT_EQ(str_key.data.cstr, str); 3.36 + EXPECT_EQ(bytes_key.data.cbytes, reinterpret_cast<unsigned char const *>(str)); 3.37 + EXPECT_EQ(bytes_key.data.cobj, reinterpret_cast<void const *>(str)); 3.38 +} 3.39 + 3.40 +TEST(cx_hash_key, null_ptr) { 3.41 + auto str_key = cx_hash_key_str(nullptr); 3.42 + auto bytes_key = cx_hash_key_bytes(nullptr, 0); 3.43 + auto obj_key = cx_hash_key(nullptr, 0); 3.44 + 3.45 + EXPECT_EQ(bytes_key.hash, 1574210520u); 3.46 + EXPECT_EQ(str_key.hash, 1574210520u); 3.47 + EXPECT_EQ(obj_key.hash, 1574210520u); 3.48 + EXPECT_EQ(str_key.len, 0); 3.49 + EXPECT_EQ(bytes_key.len, 0); 3.50 + EXPECT_EQ(bytes_key.len, 0); 3.51 + EXPECT_EQ(str_key.data.cstr, nullptr); 3.52 + EXPECT_EQ(bytes_key.data.cbytes, nullptr); 3.53 + EXPECT_EQ(bytes_key.data.cobj, nullptr); 3.54 +}
4.1 --- a/test/test_map.cpp Sun Nov 06 14:46:59 2022 +0100 4.2 +++ b/test/test_map.cpp Sun Nov 06 16:07:32 2022 +0100 4.3 @@ -73,8 +73,7 @@ 4.4 auto keyiter = cxMapIteratorKeys(map); 4.5 std::unordered_set<std::string> keys; 4.6 cx_foreach(CxHashKey*, elem, keyiter) { 4.7 - // we use that our test keys contain NULL-terminated strings 4.8 - keys.insert(std::string(elem->data.cstr)); 4.9 + keys.insert(std::string(elem->data.cstr, elem->len)); 4.10 } 4.11 EXPECT_EQ(keyiter.index, map->size); 4.12 ASSERT_EQ(keys.size(), map->size); 4.13 @@ -103,7 +102,7 @@ 4.14 auto pairiter = cxMapIterator(map); 4.15 std::unordered_map<std::string, std::string> pairs; 4.16 cx_foreach(CxMapEntry*, entry, pairiter) { 4.17 - pairs[std::string(entry->key->data.cstr)] = std::string((char *) entry->value); 4.18 + pairs[std::string(entry->key->data.cstr, entry->key->len)] = std::string((char *) entry->value); 4.19 } 4.20 EXPECT_EQ(pairiter.index, map->size); 4.21 ASSERT_EQ(pairs.size(), refmap.size());