Sun, 06 Nov 2022 16:07:32 +0100
change hash functions
1) for zero-terminated strings, the terminator is no longer included in the hash
2) for NULL there is now a special hash value different from the hash for empty data
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * \file string.h * \brief Strings that know their length. * \author Mike Becker * \author Olaf Wintermann * \version 3.0 * \copyright 2-Clause BSD License */ #ifndef UCX_STRING_H #define UCX_STRING_H #include "common.h" #include "allocator.h" /** * The UCX string structure. */ struct cx_mutstr_s { /** * A pointer to the string. * \note The string is not necessarily \c NULL terminated. * Always use the length. */ char *ptr; /** The length of the string */ size_t length; }; /** * A mutable string. */ typedef struct cx_mutstr_s cxmutstr; /** * The UCX string structure for immutable (constant) strings. */ struct cx_string_s { /** * A pointer to the immutable string. * \note The string is not necessarily \c NULL terminated. * Always use the length. */ char const *ptr; /** The length of the string */ size_t length; }; /** * An immutable string. */ typedef struct cx_string_s cxstring; /** * A literal initializer for an UCX string structure. * * The argument MUST be a string (const char*) \em literal. * * @param literal the string literal */ #define CX_STR(literal) {literal, sizeof(literal) - 1} #ifdef __cplusplus extern "C" { #endif /** * Wraps a mutable string that must be zero-terminated. * * The length is implicitly inferred by using a call to \c strlen(). * * \note the wrapped string will share the specified pointer to the string. * If you do want a copy, use cx_strdup() on the return value of this function. * * If you need to wrap a constant string, use cx_str(). * * @param cstring the string to wrap, must be zero-terminated * @return the wrapped string * * @see cx_mutstrn() */ __attribute__((__warn_unused_result__, __nonnull__)) cxmutstr cx_mutstr(char *cstring); /** * Wraps a string that does not need to be zero-terminated. * * The argument may be \c NULL if the length is zero. * * \note the wrapped string will share the specified pointer to the string. * If you do want a copy, use cx_strdup() on the return value of this function. * * If you need to wrap a constant string, use cx_strn(). * * @param cstring the string to wrap (or \c NULL, only if the length is zero) * @param length the length of the string * @return the wrapped string * * @see cx_mutstr() */ __attribute__((__warn_unused_result__)) cxmutstr cx_mutstrn( char *cstring, size_t length ); /** * Wraps a string that must be zero-terminated. * * The length is implicitly inferred by using a call to \c strlen(). * * \note the wrapped string will share the specified pointer to the string. * If you do want a copy, use cx_strdup() on the return value of this function. * * If you need to wrap a non-constant string, use cx_mutstr(). * * @param cstring the string to wrap, must be zero-terminated * @return the wrapped string * * @see cx_strn() */ __attribute__((__warn_unused_result__, __nonnull__)) cxstring cx_str(char const *cstring); /** * Wraps a string that does not need to be zero-terminated. * * The argument may be \c NULL if the length is zero. * * \note the wrapped string will share the specified pointer to the string. * If you do want a copy, use cx_strdup() on the return value of this function. * * If you need to wrap a non-constant string, use cx_mutstrn(). * * @param cstring the string to wrap (or \c NULL, only if the length is zero) * @param length the length of the string * @return the wrapped string * * @see cx_str() */ __attribute__((__warn_unused_result__)) cxstring cx_strn( char const *cstring, size_t length ); /** * Casts a mutable string to an immutable string. * * \note This is not seriously a cast. Instead you get a copy * of the struct with the desired pointer type. Both structs still * point to the same location, though! * * @param str the mutable string to cast * @return an immutable copy of the string pointer */ __attribute__((__warn_unused_result__)) cxstring cx_strcast(cxmutstr str); /** * Passes the pointer in this string to \c free(). * * The pointer in the struct is set to \c NULL and the length is set to zero. * * \note There is no implementation for cxstring, because it is unlikely that * you ever have a \c char \c const* you are really supposed to free. If you * encounter such situation, you should double-check your code. * * @param str the string to free */ __attribute__((__nonnull__)) void cx_strfree(cxmutstr *str); /** * Passes the pointer in this string to the allocators free function. * * The pointer in the struct is set to \c NULL and the length is set to zero. * * \note There is no implementation for cxstring, because it is unlikely that * you ever have a \c char \c const* you are really supposed to free. If you * encounter such situation, you should double-check your code. * * @param alloc the allocator * @param str the string to free */ __attribute__((__nonnull__)) void cx_strfree_a( CxAllocator *alloc, cxmutstr *str ); /** * Returns the accumulated length of all specified strings. * * \attention if the count argument is larger than the number of the * specified strings, the behavior is undefined. * * @param count the total number of specified strings * @param ... all strings * @return the accumulated length of all strings */ __attribute__((__warn_unused_result__)) size_t cx_strlen( size_t count, ... ); /** * Concatenates two or more strings. * * The resulting string will be allocated by the specified allocator. * So developers \em must pass the return value to cx_strfree() eventually. * * \note It is guaranteed that there is only one allocation. * It is also guaranteed that the returned string is zero-terminated. * * @param alloc the allocator to use * @param count the total number of strings to concatenate * @param ... all strings * @return the concatenated string */ __attribute__((__warn_unused_result__, __nonnull__)) cxmutstr cx_strcat_a( CxAllocator *alloc, size_t count, ... ); /** * Concatenates two or more strings. * * The resulting string will be allocated by standard \c malloc(). * So developers \em must pass the return value to cx_strfree() eventually. * * \note It is guaranteed that there is only one allocation. * It is also guaranteed that the returned string is zero-terminated. * * @param count the total number of strings to concatenate * @param ... all strings * @return the concatenated string */ #define cx_strcat(count, ...) \ cx_strcat_a(cxDefaultAllocator, count, __VA_ARGS__) /** * Returns a substring starting at the specified location. * * \attention the new string references the same memory area as the * input string and is usually \em not zero-terminated. * Use cx_strdup() to get a copy. * * @param string input string * @param start start location of the substring * @return a substring of \p string starting at \p start * * @see cx_strsubsl() * @see cx_strsubs_m() * @see cx_strsubsl_m() */ __attribute__((__warn_unused_result__)) cxstring cx_strsubs( cxstring string, size_t start ); /** * Returns a substring starting at the specified location. * * The returned string will be limited to \p length bytes or the number * of bytes available in \p string, whichever is smaller. * * \attention the new string references the same memory area as the * input string and is usually \em not zero-terminated. * Use cx_strdup() to get a copy. * * @param string input string * @param start start location of the substring * @param length the maximum length of the returned string * @return a substring of \p string starting at \p start * * @see cx_strsubs() * @see cx_strsubs_m() * @see cx_strsubsl_m() */ __attribute__((__warn_unused_result__)) cxstring cx_strsubsl( cxstring string, size_t start, size_t length ); /** * Returns a substring starting at the specified location. * * \attention the new string references the same memory area as the * input string and is usually \em not zero-terminated. * Use cx_strdup() to get a copy. * * @param string input string * @param start start location of the substring * @return a substring of \p string starting at \p start * * @see cx_strsubsl_m() * @see cx_strsubs() * @see cx_strsubsl() */ __attribute__((__warn_unused_result__)) cxmutstr cx_strsubs_m( cxmutstr string, size_t start ); /** * Returns a substring starting at the specified location. * * The returned string will be limited to \p length bytes or the number * of bytes available in \p string, whichever is smaller. * * \attention the new string references the same memory area as the * input string and is usually \em not zero-terminated. * Use cx_strdup() to get a copy. * * @param string input string * @param start start location of the substring * @param length the maximum length of the returned string * @return a substring of \p string starting at \p start * * @see cx_strsubs_m() * @see cx_strsubs() * @see cx_strsubsl() */ __attribute__((__warn_unused_result__)) cxmutstr cx_strsubsl_m( cxmutstr string, size_t start, size_t length ); /** * Returns a substring starting at the location of the first occurrence of the * specified character. * * If the string does not contain the character, an empty string is returned. * * @param string the string where to locate the character * @param chr the character to locate * @return a substring starting at the first location of \p chr * * @see cx_strchr_m() */ __attribute__((__warn_unused_result__)) cxstring cx_strchr( cxstring string, int chr ); /** * Returns a substring starting at the location of the first occurrence of the * specified character. * * If the string does not contain the character, an empty string is returned. * * @param string the string where to locate the character * @param chr the character to locate * @return a substring starting at the first location of \p chr * * @see cx_strchr() */ __attribute__((__warn_unused_result__)) cxmutstr cx_strchr_m( cxmutstr string, int chr ); /** * Returns a substring starting at the location of the last occurrence of the * specified character. * * If the string does not contain the character, an empty string is returned. * * @param string the string where to locate the character * @param chr the character to locate * @return a substring starting at the last location of \p chr * * @see cx_strrchr_m() */ __attribute__((__warn_unused_result__)) cxstring cx_strrchr( cxstring string, int chr ); /** * Returns a substring starting at the location of the last occurrence of the * specified character. * * If the string does not contain the character, an empty string is returned. * * @param string the string where to locate the character * @param chr the character to locate * @return a substring starting at the last location of \p chr * * @see cx_strrchr() */ __attribute__((__warn_unused_result__)) cxmutstr cx_strrchr_m( cxmutstr string, int chr ); /** * Returns a substring starting at the location of the first occurrence of the * specified string. * * If \p haystack does not contain \p needle, an empty string is returned. * * If \p needle is an empty string, the complete \p haystack is * returned. * * @param haystack the string to be scanned * @param needle string containing the sequence of characters to match * @return a substring starting at the first occurrence of * \p needle, or an empty string, if the sequence is not * contained * @see cx_strstr_m() */ __attribute__((__warn_unused_result__)) cxstring cx_strstr( cxstring haystack, cxstring needle ); /** * Returns a substring starting at the location of the first occurrence of the * specified string. * * If \p haystack does not contain \p needle, an empty string is returned. * * If \p needle is an empty string, the complete \p haystack is * returned. * * @param haystack the string to be scanned * @param needle string containing the sequence of characters to match * @return a substring starting at the first occurrence of * \p needle, or an empty string, if the sequence is not * contained * @see cx_strstr() */ __attribute__((__warn_unused_result__)) cxmutstr cx_strstr_m( cxmutstr haystack, cxstring needle ); /** * Splits a given string using a delimiter string. * * \note The resulting array contains strings that point to the source * \p string. Use cx_strdup() to get copies. * * @param string the string to split * @param delim the delimiter * @param limit the maximum number of split items * @param output a pre-allocated array of at least \p limit length * @return the actual number of split items */ __attribute__((__warn_unused_result__, __nonnull__)) size_t cx_strsplit( cxstring string, cxstring delim, size_t limit, cxstring *output ); /** * Splits a given string using a delimiter string. * * The array pointed to by \p output will be allocated by \p allocator. * * \note The resulting array contains strings that point to the source * \p string. Use cx_strdup() to get copies. * * \attention If allocation fails, the \c NULL pointer will be written to * \p output and the number returned will be zero. * * @param allocator the allocator to use for allocating the resulting array * @param string the string to split * @param delim the delimiter * @param limit the maximum number of split items * @param output a pointer where the address of the allocated array shall be * written to * @return the actual number of split items */ __attribute__((__warn_unused_result__, __nonnull__)) size_t cx_strsplit_a( CxAllocator *allocator, cxstring string, cxstring delim, size_t limit, cxstring **output ); /** * Splits a given string using a delimiter string. * * \note The resulting array contains strings that point to the source * \p string. Use cx_strdup() to get copies. * * @param string the string to split * @param delim the delimiter * @param limit the maximum number of split items * @param output a pre-allocated array of at least \p limit length * @return the actual number of split items */ __attribute__((__warn_unused_result__, __nonnull__)) size_t cx_strsplit_m( cxmutstr string, cxstring delim, size_t limit, cxmutstr *output ); /** * Splits a given string using a delimiter string. * * The array pointed to by \p output will be allocated by \p allocator. * * \note The resulting array contains strings that point to the source * \p string. Use cx_strdup() to get copies. * * \attention If allocation fails, the \c NULL pointer will be written to * \p output and the number returned will be zero. * * @param allocator the allocator to use for allocating the resulting array * @param string the string to split * @param delim the delimiter * @param limit the maximum number of split items * @param output a pointer where the address of the allocated array shall be * written to * @return the actual number of split items */ __attribute__((__warn_unused_result__, __nonnull__)) size_t cx_strsplit_ma( CxAllocator *allocator, cxmutstr string, cxstring delim, size_t limit, cxmutstr **output ); /** * Compares two strings. * * @param s1 the first string * @param s2 the second string * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger * than \p s2, zero if both strings equal */ __attribute__((__warn_unused_result__)) int cx_strcmp( cxstring s1, cxstring s2 ); /** * Compares two strings ignoring case. * * @param s1 the first string * @param s2 the second string * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger * than \p s2, zero if both strings equal ignoring case */ __attribute__((__warn_unused_result__)) int cx_strcasecmp( cxstring s1, cxstring s2 ); /** * Creates a duplicate of the specified string. * * The new string will contain a copy allocated by \p allocator. * * \note The returned string is guaranteed to be zero-terminated. * * @param allocator the allocator to use * @param string the string to duplicate * @return a duplicate of the string * @see cx_strdup() */ __attribute__((__warn_unused_result__, __nonnull__)) cxmutstr cx_strdup_a( CxAllocator *allocator, cxstring string ); /** * Creates a duplicate of the specified string. * * The new string will contain a copy allocated by standard * \c malloc(). So developers \em must pass the return value to cx_strfree(). * * \note The returned string is guaranteed to be zero-terminated. * * @param string the string to duplicate * @return a duplicate of the string * @see cx_strdup_a() */ #define cx_strdup(string) cx_strdup_a(cxDefaultAllocator, string) /** * Omits leading and trailing spaces. * * \note the returned string references the same memory, thus you * must \em not free the returned memory. * * @param string the string that shall be trimmed * @return the trimmed string */ __attribute__((__warn_unused_result__)) cxstring cx_strtrim(cxstring string); /** * Omits leading and trailing spaces. * * \note the returned string references the same memory, thus you * must \em not free the returned memory. * * @param string the string that shall be trimmed * @return the trimmed string */ __attribute__((__warn_unused_result__)) cxmutstr cx_strtrim_m(cxmutstr string); /** * Checks, if a string has a specific prefix. * * @param string the string to check * @param prefix the prefix the string should have * @return \c true, if and only if the string has the specified prefix, * \c false otherwise */ __attribute__((__warn_unused_result__)) bool cx_strprefix( cxstring string, cxstring prefix ); /** * Checks, if a string has a specific suffix. * * @param string the string to check * @param suffix the suffix the string should have * @return \c true, if and only if the string has the specified suffix, * \c false otherwise */ __attribute__((__warn_unused_result__)) bool cx_strsuffix( cxstring string, cxstring suffix ); /** * Checks, if a string has a specific prefix, ignoring the case. * * @param string the string to check * @param prefix the prefix the string should have * @return \c true, if and only if the string has the specified prefix, * \c false otherwise */ __attribute__((__warn_unused_result__)) bool cx_strcaseprefix( cxstring string, cxstring prefix ); /** * Checks, if a string has a specific suffix, ignoring the case. * * @param string the string to check * @param suffix the suffix the string should have * @return \c true, if and only if the string has the specified suffix, * \c false otherwise */ __attribute__((__warn_unused_result__)) bool cx_strcasesuffix( cxstring string, cxstring suffix ); /** * Converts the string to lower case. * * The change is made in-place. If you want a copy, use cx_strdup(), first. * * @param string the string to modify * @see cx_strdup() */ void cx_strlower(cxmutstr string); /** * Converts the string to upper case. * * The change is made in-place. If you want a copy, use cx_strdup(), first. * * @param string the string to modify * @see cx_strdup() */ void cx_strupper(cxmutstr string); /** * Replaces a pattern in a string with another string. * * The pattern is taken literally and is no regular expression. * Replaces at most \p replmax occurrences. * * The returned string will be allocated by \p allocator and is guaranteed * to be zero-terminated. * * If allocation fails, or the input string is empty, * the returned string will be empty. * * @param allocator the allocator to use * @param str the string where replacements should be applied * @param pattern the pattern to search for * @param replacement the replacement string * @param replmax maximum number of replacements * @return the resulting string after applying the replacements */ __attribute__((__warn_unused_result__, __nonnull__)) cxmutstr cx_strreplacen_a( CxAllocator *allocator, cxstring str, cxstring pattern, cxstring replacement, size_t replmax ); /** * Replaces a pattern in a string with another string. * * The pattern is taken literally and is no regular expression. * Replaces at most \p replmax occurrences. * * The returned string will be allocated by \c malloc() and is guaranteed * to be zero-terminated. * * If allocation fails, or the input string is empty, * the returned string will be empty. * * @param str the string where replacements should be applied * @param pattern the pattern to search for * @param replacement the replacement string * @param replmax maximum number of replacements * @return the resulting string after applying the replacements */ #define cx_strreplacen(str, pattern, replacement, replmax) \ cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax) /** * Replaces a pattern in a string with another string. * * The pattern is taken literally and is no regular expression. * * The returned string will be allocated by \p allocator and is guaranteed * to be zero-terminated. * * If allocation fails, or the input string is empty, * the returned string will be empty. * * @param allocator the allocator to use * @param str the string where replacements should be applied * @param pattern the pattern to search for * @param replacement the replacement string * @return the resulting string after applying the replacements */ #define cx_strreplace_a(allocator, str, pattern, replacement) \ cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX) /** * Replaces a pattern in a string with another string. * * The pattern is taken literally and is no regular expression. * Replaces at most \p replmax occurrences. * * The returned string will be allocated by \c malloc() and is guaranteed * to be zero-terminated. * * If allocation fails, or the input string is empty, * the returned string will be empty. * * @param str the string where replacements should be applied * @param pattern the pattern to search for * @param replacement the replacement string * @return the resulting string after applying the replacements */ #define cx_strreplace(str, pattern, replacement) \ cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX) #ifdef __cplusplus } // extern "C" #endif #endif //UCX_STRING_H