src/cx/string.h

Tue, 04 Oct 2022 19:25:07 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 04 Oct 2022 19:25:07 +0200
changeset 591
7df0bcaecffa
parent 589
c290f8fd979e
child 645
ec50abb285ad
permissions
-rw-r--r--

fix over-optimization of strstr

1. it's actually less performant to frequently read bytes
from an array instead of using the native word length
2. the SBO buffer should be local and not static to allow
multi-threading usage

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * \file string.h
 * \brief Strings that know their length.
 * \author Mike Becker
 * \author Olaf Wintermann
 * \version 3.0
 * \copyright 2-Clause BSD License
 */

#ifndef UCX_STRING_H
#define UCX_STRING_H

#include "common.h"
#include "allocator.h"

/**
 * The UCX string structure.
 */
struct cx_mutstr_s {
    /**
     * A pointer to the string.
     * \note The string is not necessarily \c NULL terminated.
     * Always use the length.
     */
    char *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * A mutable string.
 */
typedef struct cx_mutstr_s cxmutstr;

/**
 * The UCX string structure for immutable (constant) strings.
 */
struct cx_string_s {
    /**
     * A pointer to the immutable string.
     * \note The string is not necessarily \c NULL terminated.
     * Always use the length.
     */
    char const *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * An immutable string.
 */
typedef struct cx_string_s cxstring;

/**
 * A literal initializer for an UCX string structure.
 *
 * The argument MUST be a string (const char*) \em literal.
 *
 * @param literal the string literal
 */
#define CX_STR(literal) {literal, sizeof(literal) - 1}

#ifdef __cplusplus
extern "C" {
#endif


/**
 * Wraps a mutable string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to \c strlen().
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_str().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_mutstrn()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_mutstr(char *cstring);

/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be \c NULL if the length is zero.
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_strn().
 *
 * @param cstring  the string to wrap (or \c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_mutstr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
);

/**
 * Wraps a string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to \c strlen().
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstr().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_strn()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxstring cx_str(char const *cstring);


/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be \c NULL if the length is zero.
 *
 * \note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstrn().
 *
 * @param cstring  the string to wrap (or \c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_str()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strn(
        char const *cstring,
        size_t length
);

/**
* Casts a mutable string to an immutable string.
*
* \note This is not seriously a cast. Instead you get a copy
* of the struct with the desired pointer type. Both structs still
* point to the same location, though!
*
* @param str the mutable string to cast
* @return an immutable copy of the string pointer
*/
__attribute__((__warn_unused_result__))
cxstring cx_strcast(cxmutstr str);

/**
 * Passes the pointer in this string to \c free().
 *
 * The pointer in the struct is set to \c NULL and the length is set to zero.
 *
 * \note There is no implementation for cxstring, because it is unlikely that
 * you ever have a \c char \c const* you are really supposed to free. If you
 * encounter such situation, you should double-check your code.
 *
 * @param str the string to free
 */
__attribute__((__nonnull__))
void cx_strfree(cxmutstr *str);

/**
 * Passes the pointer in this string to the allocators free function.
 *
 * The pointer in the struct is set to \c NULL and the length is set to zero.
 *
 * \note There is no implementation for cxstring, because it is unlikely that
 * you ever have a \c char \c const* you are really supposed to free. If you
 * encounter such situation, you should double-check your code.
 *
 * @param alloc the allocator
 * @param str the string to free
 */
__attribute__((__nonnull__))
void cx_strfree_a(
        CxAllocator *alloc,
        cxmutstr *str
);

/**
 * Returns the accumulated length of all specified strings.
 *
 * \attention if the count argument is larger than the number of the
 * specified strings, the behavior is undefined.
 *
 * @param count    the total number of specified strings
 * @param ...      all strings
 * @return the accumulated length of all strings
 */
__attribute__((__warn_unused_result__))
size_t cx_strlen(
        size_t count,
        ...
);

/**
 * Concatenates two or more strings.
 *
 * The resulting string will be allocated by the specified allocator.
  * So developers \em must pass the return value to cx_strfree() eventually.
  *
  * \note It is guaranteed that there is only one allocation.
  * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param alloc the allocator to use
 * @param count   the total number of strings to concatenate
 * @param ...     all strings
 * @return the concatenated string
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strcat_a(
        CxAllocator *alloc,
        size_t count,
        ...
);

/**
 * Concatenates two or more strings.
 *
 * The resulting string will be allocated by standard \c malloc().
 * So developers \em must pass the return value to cx_strfree() eventually.
 *
 * \note It is guaranteed that there is only one allocation.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param count   the total number of strings to concatenate
 * @param ...     all strings
 * @return the concatenated string
 */
#define cx_strcat(count, ...) \
cx_strcat_a(cxDefaultAllocator, count, __VA_ARGS__)

/**
 * Returns a substring starting at the specified location.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubsl()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strsubs(
        cxstring string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to \p length bytes or the number
 * of bytes available in \p string, whichever is smaller.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubs()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the specified location.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubsl_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to \p length bytes or the number
 * of bytes available in \p string, whichever is smaller.
 *
 * \attention the new string references the same memory area as the
 * input string and is usually \em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of \p string starting at \p start
 *
 * @see cx_strsubs_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of \p chr
 *
 * @see cx_strchr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of \p chr
 *
 * @see cx_strchr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of \p chr
 *
 * @see cx_strrchr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strrchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of \p chr
 *
 * @see cx_strrchr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If \p haystack does not contain \p needle, an empty string is returned.
 *
 * If \p needle is an empty string, the complete \p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               \p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr_m()
 */
__attribute__((__warn_unused_result__))
cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If \p haystack does not contain \p needle, an empty string is returned.
 *
 * If \p needle is an empty string, the complete \p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               \p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr()
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
);

/**
 * Splits a given string using a delimiter string.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least \p limit length
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by \p output will be allocated by \p allocator.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * \attention If allocation fails, the \c NULL pointer will be written to
 * \p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_a(
        CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
);


/**
 * Splits a given string using a delimiter string.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least \p limit length
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by \p output will be allocated by \p allocator.
 *
 * \note The resulting array contains strings that point to the source
 * \p string. Use cx_strdup() to get copies.
 *
 * \attention If allocation fails, the \c NULL pointer will be written to
 * \p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
__attribute__((__warn_unused_result__, __nonnull__))
size_t cx_strsplit_ma(
        CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
);

/**
 * Compares two strings.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
 * than \p s2, zero if both strings equal
 */
__attribute__((__warn_unused_result__))
int cx_strcmp(
        cxstring s1,
        cxstring s2
);

/**
 * Compares two strings ignoring case.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
 * than \p s2, zero if both strings equal ignoring case
 */
__attribute__((__warn_unused_result__))
int cx_strcasecmp(
        cxstring s1,
        cxstring s2
);


/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by \p allocator.
 *
 * \note The returned string is guaranteed to be zero-terminated.
 *
 * @param allocator the allocator to use
 * @param string the string to duplicate
 * @return a duplicate of the string
 * @see cx_strdup()
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strdup_a(
        CxAllocator *allocator,
        cxstring string
);

/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by standard
 * \c malloc(). So developers \em must pass the return value to cx_strfree().
 *
 * \note The returned string is guaranteed to be zero-terminated.
 *
 * @param string the string to duplicate
 * @return a duplicate of the string
 * @see cx_strdup_a()
 */
#define cx_strdup(string) cx_strdup_a(cxDefaultAllocator, string)

/**
 * Omits leading and trailing spaces.
 *
 * \note the returned string references the same memory, thus you
 * must \em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
__attribute__((__warn_unused_result__))
cxstring cx_strtrim(cxstring string);

/**
 * Omits leading and trailing spaces.
 *
 * \note the returned string references the same memory, thus you
 * must \em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
__attribute__((__warn_unused_result__))
cxmutstr cx_strtrim_m(cxmutstr string);

/**
 * Checks, if a string has a specific prefix.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return \c true, if and only if the string has the specified prefix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return \c true, if and only if the string has the specified suffix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strsuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Checks, if a string has a specific prefix, ignoring the case.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return \c true, if and only if the string has the specified prefix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strcaseprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix, ignoring the case.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return \c true, if and only if the string has the specified suffix,
 * \c false otherwise
 */
__attribute__((__warn_unused_result__))
bool cx_strcasesuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Converts the string to lower case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strlower(cxmutstr string);

/**
 * Converts the string to upper case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strupper(cxmutstr string);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator the allocator to use
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @param replmax maximum number of replacements
 * @return the resulting string after applying the replacements
 */
__attribute__((__warn_unused_result__, __nonnull__))
cxmutstr cx_strreplacen_a(
        CxAllocator *allocator,
        cxstring str,
        cxstring pattern,
        cxstring replacement,
        size_t replmax
);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @param replmax maximum number of replacements
 * @return the resulting string after applying the replacements
 */
#define cx_strreplacen(str, pattern, replacement, replmax) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 *
 * The returned string will be allocated by \p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator the allocator to use
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @return the resulting string after applying the replacements
 */
#define cx_strreplace_a(allocator, str, pattern, replacement) \
cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most \p replmax occurrences.
 *
 * The returned string will be allocated by \c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @return the resulting string after applying the replacements
 */
#define cx_strreplace(str, pattern, replacement) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX)

#ifdef __cplusplus
} // extern "C"
#endif

#endif //UCX_STRING_H

mercurial