src/cx/string.h

Sat, 11 Jan 2025 12:56:54 +0100

author
Mike Becker <universe@uap-core.de>
date
Sat, 11 Jan 2025 12:56:54 +0100
changeset 1122
49ab92de9a13
parent 1107
9d77c7a99441
permissions
-rw-r--r--

add more escape sequences to unescape function

and change the name of token_start to more clearly express what it actually is
(the start of the currently parsed PART of the token)

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * @file string.h
 * @brief Strings that know their length.
 * @author Mike Becker
 * @author Olaf Wintermann
 * @copyright 2-Clause BSD License
 */

#ifndef UCX_STRING_H
#define UCX_STRING_H

#include "common.h"
#include "allocator.h"

/**
 * The maximum length of the "needle" in cx_strstr() that can use SBO.
 */
extern const unsigned cx_strstr_sbo_size;

/**
 * The UCX string structure.
 */
struct cx_mutstr_s {
    /**
     * A pointer to the string.
     * @note The string is not necessarily @c NULL terminated.
     * Always use the length.
     */
    char *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * A mutable string.
 */
typedef struct cx_mutstr_s cxmutstr;

/**
 * The UCX string structure for immutable (constant) strings.
 */
struct cx_string_s {
    /**
     * A pointer to the immutable string.
     * @note The string is not necessarily @c NULL terminated.
     * Always use the length.
     */
    const char *ptr;
    /** The length of the string */
    size_t length;
};

/**
 * An immutable string.
 */
typedef struct cx_string_s cxstring;

/**
 * Context for string tokenizing.
 */
struct cx_strtok_ctx_s {
    /**
     * The string to tokenize.
     */
    cxstring str;
    /**
     * The primary delimiter.
     */
    cxstring delim;
    /**
     * Optional array of more delimiters.
     */
    const cxstring *delim_more;
    /**
     * Length of the array containing more delimiters.
     */
    size_t delim_more_count;
    /**
     * Position of the currently active token in the source string.
     */
    size_t pos;
    /**
     * Position of next delimiter in the source string.
     *
     * If the tokenizer has not yet returned a token, the content of this field
     * is undefined. If the tokenizer reached the end of the string, this field
     * contains the length of the source string.
     */
    size_t delim_pos;
    /**
     * The position of the next token in the source string.
     */
    size_t next_pos;
    /**
     * The number of already found tokens.
     */
    size_t found;
    /**
     * The maximum number of tokens that shall be returned.
     */
    size_t limit;
};

/**
 * A string tokenizing context.
 */
typedef struct cx_strtok_ctx_s CxStrtokCtx;

#ifdef __cplusplus
extern "C" {

/**
 * A literal initializer for an UCX string structure.
 *
 * @param literal the string literal
 */
#define CX_STR(literal) cxstring{literal, sizeof(literal) - 1}

#else // __cplusplus

/**
 * A literal initializer for an UCX string structure.
 *
 * The argument MUST be a string (const char*) @em literal.
 *
 * @param literal the string literal
 */
#define CX_STR(literal) (cxstring){literal, sizeof(literal) - 1}

#endif


/**
 * Wraps a mutable string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to @c strlen().
 *
 * @note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_str().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_mutstrn()
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_cstr_arg(1)
cxmutstr cx_mutstr(char *cstring);

/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be @c NULL if the length is zero.
 *
 * @note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a constant string, use cx_strn().
 *
 * @param cstring  the string to wrap (or @c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_mutstr()
 */
cx_attr_nodiscard
cx_attr_access_rw(1, 2)
cxmutstr cx_mutstrn(
        char *cstring,
        size_t length
);

/**
 * Wraps a string that must be zero-terminated.
 *
 * The length is implicitly inferred by using a call to @c strlen().
 *
 * @note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstr().
 *
 * @param cstring the string to wrap, must be zero-terminated
 * @return the wrapped string
 *
 * @see cx_strn()
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_cstr_arg(1)
cxstring cx_str(const char *cstring);


/**
 * Wraps a string that does not need to be zero-terminated.
 *
 * The argument may be @c NULL if the length is zero.
 *
 * @note the wrapped string will share the specified pointer to the string.
 * If you do want a copy, use cx_strdup() on the return value of this function.
 *
 * If you need to wrap a non-constant string, use cx_mutstrn().
 *
 * @param cstring  the string to wrap (or @c NULL, only if the length is zero)
 * @param length   the length of the string
 * @return the wrapped string
 *
 * @see cx_str()
 */
cx_attr_nodiscard
cx_attr_access_r(1, 2)
cxstring cx_strn(
        const char *cstring,
        size_t length
);

#ifdef __cplusplus
} // extern "C"
cx_attr_nodiscard
static inline cxstring cx_strcast(cxmutstr str) {
    return cx_strn(str.ptr, str.length);
}
cx_attr_nodiscard
static inline cxstring cx_strcast(cxstring str) {
    return str;
}
extern "C" {
#else
/**
 * Internal function, do not use.
 * @param str
 * @return
 * @see cx_strcast()
 */
cx_attr_nodiscard
static inline cxstring cx_strcast_m(cxmutstr str) {
    return (cxstring) {str.ptr, str.length};
}
/**
 * Internal function, do not use.
 * @param str
 * @return
 * @see cx_strcast()
 */
cx_attr_nodiscard
static inline cxstring cx_strcast_c(cxstring str) {
    return str;
}

/**
* Casts a mutable string to an immutable string.
*
* Does nothing for already immutable strings.
*
* @note This is not seriously a cast. Instead, you get a copy
* of the struct with the desired pointer type. Both structs still
* point to the same location, though!
*
* @param str (@c cxstring or @c cxmutstr) the string to cast
* @return (@c cxstring) an immutable copy of the string pointer
*/
#define cx_strcast(str) _Generic((str), \
        cxmutstr: cx_strcast_m, \
        cxstring: cx_strcast_c) \
        (str)
#endif

/**
 * Passes the pointer in this string to @c free().
 *
 * The pointer in the struct is set to @c NULL and the length is set to zero.
 *
 * @note There is no implementation for cxstring, because it is unlikely that
 * you ever have a <code>const char*</code> you are really supposed to free.
 * If you encounter such situation, you should double-check your code.
 *
 * @param str the string to free
 */
void cx_strfree(cxmutstr *str);

/**
 * Passes the pointer in this string to the allocators free function.
 *
 * The pointer in the struct is set to @c NULL and the length is set to zero.
 *
 * @note There is no implementation for cxstring, because it is unlikely that
 * you ever have a <code>const char*</code> you are really supposed to free.
 * If you encounter such situation, you should double-check your code.
 *
 * @param alloc the allocator
 * @param str the string to free
 */
cx_attr_nonnull_arg(1)
void cx_strfree_a(
        const CxAllocator *alloc,
        cxmutstr *str
);

/**
 * Returns the accumulated length of all specified strings.
 * 
 * If this sum overflows, errno is set to EOVERFLOW.
 *
 * @attention if the count argument is larger than the number of the
 * specified strings, the behavior is undefined.
 *
 * @param count    the total number of specified strings
 * @param ...      all strings
 * @return the accumulated length of all strings
 */
cx_attr_nodiscard
size_t cx_strlen(
        size_t count,
        ...
);

/**
 * Concatenates strings.
 *
 * The resulting string will be allocated by the specified allocator.
 * So developers @em must pass the return value to cx_strfree_a() eventually.
 *
 * If @p str already contains a string, the memory will be reallocated and
 * the other strings are appended. Otherwise, new memory is allocated.
 *
 * If memory allocation fails, the pointer in the returned string will
 * be @c NULL. Depending on the allocator, @c errno might be set.
 *
 * @note It is guaranteed that there is only one allocation for the
 * resulting string.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param alloc the allocator to use
 * @param str   the string the other strings shall be concatenated to
 * @param count the number of the other following strings to concatenate
 * @param ...   all other UCX strings
 * @return the concatenated string
 */
cx_attr_nodiscard
cx_attr_nonnull
cxmutstr cx_strcat_ma(
        const CxAllocator *alloc,
        cxmutstr str,
        size_t count,
        ...
);

/**
 * Concatenates strings and returns a new string.
 *
 * The resulting string will be allocated by the specified allocator.
 * So developers @em must pass the return value to cx_strfree_a() eventually.
 *
* If memory allocation fails, the pointer in the returned string will
 * be @c NULL. Depending on the allocator, @c errno might be set.
 *
 * @note It is guaranteed that there is only one allocation for the
 * resulting string.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param alloc (@c CxAllocator*) the allocator to use
 * @param count (@c size_t) the number of the other following strings to concatenate
 * @param ...   all other UCX strings
 * @return (@c cxmutstr) the concatenated string
 */
#define cx_strcat_a(alloc, count, ...) \
cx_strcat_ma(alloc, cx_mutstrn(NULL, 0), count, __VA_ARGS__)

/**
 * Concatenates strings and returns a new string.
 *
 * The resulting string will be allocated by standard @c malloc().
 * So developers @em must pass the return value to cx_strfree() eventually.
 *
* If memory allocation fails, the pointer in the returned string will
 * be @c NULL and @c errno might be set.
 *
 * @note It is guaranteed that there is only one allocation for the
 * resulting string.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param count (@c size_t) the number of the other following strings to concatenate
 * @param ... all other UCX strings
 * @return (@c cxmutstr) the concatenated string
 */
#define cx_strcat(count, ...) \
cx_strcat_ma(cxDefaultAllocator, cx_mutstrn(NULL, 0), count, __VA_ARGS__)

/**
 * Concatenates strings.
 *
 * The resulting string will be allocated by standard @c malloc().
 * So developers @em must pass the return value to cx_strfree() eventually.
 *
 * If @p str already contains a string, the memory will be reallocated and
 * the other strings are appended. Otherwise, new memory is allocated.
 *
* If memory allocation fails, the pointer in the returned string will
 * be @c NULL and @c errno might be set.
 *
 * @note It is guaranteed that there is only one allocation for the
 * resulting string.
 * It is also guaranteed that the returned string is zero-terminated.
 *
 * @param str (@c cxmutstr) the string the other strings shall be concatenated to
 * @param count (@c size_t) the number of the other following strings to concatenate
 * @param ... all other strings
 * @return (@c cxmutstr) the concatenated string
 */
#define cx_strcat_m(str, count, ...) \
cx_strcat_ma(cxDefaultAllocator, str, count, __VA_ARGS__)

/**
 * Returns a substring starting at the specified location.
 *
 * @attention the new string references the same memory area as the
 * input string and is usually @em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of @p string starting at @p start
 *
 * @see cx_strsubsl()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
cx_attr_nodiscard
cxstring cx_strsubs(
        cxstring string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to @p length bytes or the number
 * of bytes available in @p string, whichever is smaller.
 *
 * @attention the new string references the same memory area as the
 * input string and is usually @em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of @p string starting at @p start
 *
 * @see cx_strsubs()
 * @see cx_strsubs_m()
 * @see cx_strsubsl_m()
 */
cx_attr_nodiscard
cxstring cx_strsubsl(
        cxstring string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the specified location.
 *
 * @attention the new string references the same memory area as the
 * input string and is usually @em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @return a substring of @p string starting at @p start
 *
 * @see cx_strsubsl_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
cx_attr_nodiscard
cxmutstr cx_strsubs_m(
        cxmutstr string,
        size_t start
);

/**
 * Returns a substring starting at the specified location.
 *
 * The returned string will be limited to @p length bytes or the number
 * of bytes available in @p string, whichever is smaller.
 *
 * @attention the new string references the same memory area as the
 * input string and is usually @em not zero-terminated.
 * Use cx_strdup() to get a copy.
 *
 * @param string input string
 * @param start  start location of the substring
 * @param length the maximum length of the returned string
 * @return a substring of @p string starting at @p start
 *
 * @see cx_strsubs_m()
 * @see cx_strsubs()
 * @see cx_strsubsl()
 */
cx_attr_nodiscard
cxmutstr cx_strsubsl_m(
        cxmutstr string,
        size_t start,
        size_t length
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of @p chr
 *
 * @see cx_strchr_m()
 */
cx_attr_nodiscard
cxstring cx_strchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the first location of @p chr
 *
 * @see cx_strchr()
 */
cx_attr_nodiscard
cxmutstr cx_strchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of @p chr
 *
 * @see cx_strrchr_m()
 */
cx_attr_nodiscard
cxstring cx_strrchr(
        cxstring string,
        int chr
);

/**
 * Returns a substring starting at the location of the last occurrence of the
 * specified character.
 *
 * If the string does not contain the character, an empty string is returned.
 *
 * @param string the string where to locate the character
 * @param chr    the character to locate
 * @return       a substring starting at the last location of @p chr
 *
 * @see cx_strrchr()
 */
cx_attr_nodiscard
cxmutstr cx_strrchr_m(
        cxmutstr string,
        int chr
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If @p haystack does not contain @p needle, an empty string is returned.
 *
 * If @p needle is an empty string, the complete @p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               @p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr_m()
 */
cx_attr_nodiscard
cxstring cx_strstr(
        cxstring haystack,
        cxstring needle
);

/**
 * Returns a substring starting at the location of the first occurrence of the
 * specified string.
 *
 * If @p haystack does not contain @p needle, an empty string is returned.
 *
 * If @p needle is an empty string, the complete @p haystack is
 * returned.
 *
 * @param haystack the string to be scanned
 * @param needle  string containing the sequence of characters to match
 * @return       a substring starting at the first occurrence of
 *               @p needle, or an empty string, if the sequence is not
 *               contained
 * @see cx_strstr()
 */
cx_attr_nodiscard
cxmutstr cx_strstr_m(
        cxmutstr haystack,
        cxstring needle
);

/**
 * Splits a given string using a delimiter string.
 *
 * @note The resulting array contains strings that point to the source
 * @p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least @p limit length
 * @return the actual number of split items
 */
cx_attr_nodiscard
cx_attr_nonnull
cx_attr_access_w(4, 3)
size_t cx_strsplit(
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by @p output will be allocated by @p allocator.
 *
 * @note The resulting array contains strings that point to the source
 * @p string. Use cx_strdup() to get copies.
 *
 * @attention If allocation fails, the @c NULL pointer will be written to
 * @p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
cx_attr_nodiscard
cx_attr_nonnull
cx_attr_access_w(5)
size_t cx_strsplit_a(
        const CxAllocator *allocator,
        cxstring string,
        cxstring delim,
        size_t limit,
        cxstring **output
);


/**
 * Splits a given string using a delimiter string.
 *
 * @note The resulting array contains strings that point to the source
 * @p string. Use cx_strdup() to get copies.
 *
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pre-allocated array of at least @p limit length
 * @return the actual number of split items
 */
cx_attr_nodiscard
cx_attr_nonnull
cx_attr_access_w(4, 3)
size_t cx_strsplit_m(
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr *output
);

/**
 * Splits a given string using a delimiter string.
 *
 * The array pointed to by @p output will be allocated by @p allocator.
 *
 * @note The resulting array contains strings that point to the source
 * @p string. Use cx_strdup() to get copies.
 *
 * @attention If allocation fails, the @c NULL pointer will be written to
 * @p output and the number returned will be zero.
 *
 * @param allocator the allocator to use for allocating the resulting array
 * @param string the string to split
 * @param delim  the delimiter
 * @param limit the maximum number of split items
 * @param output a pointer where the address of the allocated array shall be
 * written to
 * @return the actual number of split items
 */
cx_attr_nodiscard
cx_attr_nonnull
cx_attr_access_w(5)
size_t cx_strsplit_ma(
        const CxAllocator *allocator,
        cxmutstr string,
        cxstring delim,
        size_t limit,
        cxmutstr **output
);

/**
 * Compares two strings.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if @p s1 is smaller than @p s2, positive if @p s1 is larger
 * than @p s2, zero if both strings equal
 */
cx_attr_nodiscard
int cx_strcmp(
        cxstring s1,
        cxstring s2
);

/**
 * Compares two strings ignoring case.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if @p s1 is smaller than @p s2, positive if @p s1 is larger
 * than @p s2, zero if both strings equal ignoring case
 */
cx_attr_nodiscard
int cx_strcasecmp(
        cxstring s1,
        cxstring s2
);

/**
 * Compares two strings.
 *
 * This function has a compatible signature for the use as a cx_compare_func.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if @p s1 is smaller than @p s2, positive if @p s1 is larger
 * than @p s2, zero if both strings equal
 */
cx_attr_nodiscard
cx_attr_nonnull
int cx_strcmp_p(
        const void *s1,
        const void *s2
);

/**
 * Compares two strings ignoring case.
 *
 * This function has a compatible signature for the use as a cx_compare_func.
 *
 * @param s1 the first string
 * @param s2 the second string
 * @return negative if @p s1 is smaller than @p s2, positive if @p s1 is larger
 * than @p s2, zero if both strings equal ignoring case
 */
cx_attr_nodiscard
cx_attr_nonnull
int cx_strcasecmp_p(
        const void *s1,
        const void *s2
);


/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by @p allocator.
 *
 * @note The returned string is guaranteed to be zero-terminated.
 *
 * @param allocator the allocator to use
 * @param string the string to duplicate
 * @return a duplicate of the string
 * @see cx_strdup()
 */
cx_attr_nodiscard
cx_attr_nonnull
cxmutstr cx_strdup_a(
        const CxAllocator *allocator,
        cxstring string
);

/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by standard
 * @c malloc(). So developers @em must pass the return value to cx_strfree().
 *
 * @note The returned string is guaranteed to be zero-terminated.
 *
 * @param string (@c cxstring) the string to duplicate
 * @return (@c cxmutstr) a duplicate of the string
 * @see cx_strdup_a()
 */
#define cx_strdup(string) cx_strdup_a(cxDefaultAllocator, string)


/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by @p allocator.
 *
 * @note The returned string is guaranteed to be zero-terminated.
 *
 * @param allocator (@c CxAllocator*) the allocator to use
 * @param string (@c cxmutstr) the string to duplicate
 * @return (@c cxmutstr) a duplicate of the string
 * @see cx_strdup_m()
 */
#define cx_strdup_ma(allocator, string) cx_strdup_a(allocator, cx_strcast(string))

/**
 * Creates a duplicate of the specified string.
 *
 * The new string will contain a copy allocated by standard
 * @c malloc(). So developers @em must pass the return value to cx_strfree().
 *
 * @note The returned string is guaranteed to be zero-terminated.
 *
 * @param string (@c cxmutstr) the string to duplicate
 * @return (@c cxmutstr) a duplicate of the string
 * @see cx_strdup_ma()
 */
#define cx_strdup_m(string) cx_strdup_a(cxDefaultAllocator, cx_strcast(string))

/**
 * Omits leading and trailing spaces.
 *
 * @note the returned string references the same memory, thus you
 * must @em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
cx_attr_nodiscard
cxstring cx_strtrim(cxstring string);

/**
 * Omits leading and trailing spaces.
 *
 * @note the returned string references the same memory, thus you
 * must @em not free the returned memory.
 *
 * @param string the string that shall be trimmed
 * @return the trimmed string
 */
cx_attr_nodiscard
cxmutstr cx_strtrim_m(cxmutstr string);

/**
 * Checks, if a string has a specific prefix.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return @c true, if and only if the string has the specified prefix,
 * @c false otherwise
 */
cx_attr_nodiscard
bool cx_strprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return @c true, if and only if the string has the specified suffix,
 * @c false otherwise
 */
cx_attr_nodiscard
bool cx_strsuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Checks, if a string has a specific prefix, ignoring the case.
 *
 * @param string the string to check
 * @param prefix the prefix the string should have
 * @return @c true, if and only if the string has the specified prefix,
 * @c false otherwise
 */
cx_attr_nodiscard
bool cx_strcaseprefix(
        cxstring string,
        cxstring prefix
);

/**
 * Checks, if a string has a specific suffix, ignoring the case.
 *
 * @param string the string to check
 * @param suffix the suffix the string should have
 * @return @c true, if and only if the string has the specified suffix,
 * @c false otherwise
 */
cx_attr_nodiscard
bool cx_strcasesuffix(
        cxstring string,
        cxstring suffix
);

/**
 * Converts the string to lower case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strlower(cxmutstr string);

/**
 * Converts the string to upper case.
 *
 * The change is made in-place. If you want a copy, use cx_strdup(), first.
 *
 * @param string the string to modify
 * @see cx_strdup()
 */
void cx_strupper(cxmutstr string);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most @p replmax occurrences.
 *
 * The returned string will be allocated by @p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator the allocator to use
 * @param str the string where replacements should be applied
 * @param pattern the pattern to search for
 * @param replacement the replacement string
 * @param replmax maximum number of replacements
 * @return the resulting string after applying the replacements
 */
cx_attr_nodiscard
cx_attr_nonnull
cxmutstr cx_strreplacen_a(
        const CxAllocator *allocator,
        cxstring str,
        cxstring pattern,
        cxstring replacement,
        size_t replmax
);

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most @p replmax occurrences.
 *
 * The returned string will be allocated by @c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str (@c cxstring) the string where replacements should be applied
 * @param pattern (@c cxstring) the pattern to search for
 * @param replacement (@c cxstring) the replacement string
 * @param replmax (@c size_t) maximum number of replacements
 * @return (@c cxmutstr) the resulting string after applying the replacements
 */
#define cx_strreplacen(str, pattern, replacement, replmax) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 *
 * The returned string will be allocated by @p allocator and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param allocator (@c CxAllocator*) the allocator to use
 * @param str (@c cxstring) the string where replacements should be applied
 * @param pattern (@c cxstring) the pattern to search for
 * @param replacement (@c cxstring) the replacement string
 * @return (@c cxmutstr) the resulting string after applying the replacements
 */
#define cx_strreplace_a(allocator, str, pattern, replacement) \
cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX)

/**
 * Replaces a pattern in a string with another string.
 *
 * The pattern is taken literally and is no regular expression.
 * Replaces at most @p replmax occurrences.
 *
 * The returned string will be allocated by @c malloc() and is guaranteed
 * to be zero-terminated.
 *
 * If allocation fails, or the input string is empty,
 * the returned string will be empty.
 *
 * @param str (@c cxstring) the string where replacements should be applied
 * @param pattern (@c cxstring) the pattern to search for
 * @param replacement (@c cxstring) the replacement string
 * @return (@c cxmutstr) the resulting string after applying the replacements
 */
#define cx_strreplace(str, pattern, replacement) \
cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX)

/**
 * Creates a string tokenization context.
 *
 * @param str the string to tokenize
 * @param delim the delimiter (must not be empty)
 * @param limit the maximum number of tokens that shall be returned
 * @return a new string tokenization context
 */
cx_attr_nodiscard
CxStrtokCtx cx_strtok(
        cxstring str,
        cxstring delim,
        size_t limit
);

/**
* Creates a string tokenization context for a mutable string.
*
* @param str the string to tokenize
* @param delim the delimiter (must not be empty)
* @param limit the maximum number of tokens that shall be returned
* @return a new string tokenization context
*/
cx_attr_nodiscard
CxStrtokCtx cx_strtok_m(
        cxmutstr str,
        cxstring delim,
        size_t limit
);

/**
 * Returns the next token.
 *
 * The token will point to the source string.
 *
 * @param ctx the tokenization context
 * @param token a pointer to memory where the next token shall be stored
 * @return true if successful, false if the limit or the end of the string
 * has been reached
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_access_w(2)
bool cx_strtok_next(
        CxStrtokCtx *ctx,
        cxstring *token
);

/**
 * Returns the next token of a mutable string.
 *
 * The token will point to the source string.
 * If the context was not initialized over a mutable string, modifying
 * the data of the returned token is undefined behavior.
 *
 * @param ctx the tokenization context
 * @param token a pointer to memory where the next token shall be stored
 * @return true if successful, false if the limit or the end of the string
 * has been reached
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_access_w(2)
bool cx_strtok_next_m(
        CxStrtokCtx *ctx,
        cxmutstr *token
);

/**
 * Defines an array of more delimiters for the specified tokenization context.
 *
 * @param ctx the tokenization context
 * @param delim array of more delimiters
 * @param count number of elements in the array
 */
cx_attr_nonnull
cx_attr_access_r(2, 3)
void cx_strtok_delim(
        CxStrtokCtx *ctx,
        const cxstring *delim,
        size_t count
);

/* ------------------------------------------------------------------------- *
 *                string to number conversion functions                      *
 * ------------------------------------------------------------------------- */

/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtos_lc(cxstring str, short *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoi_lc(cxstring str, int *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtol_lc(cxstring str, long *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoll_lc(cxstring str, long long *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoi8_lc(cxstring str, int8_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoi16_lc(cxstring str, int16_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoi32_lc(cxstring str, int32_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoi64_lc(cxstring str, int64_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoz_lc(cxstring str, ssize_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtous_lc(cxstring str, unsigned short *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtou_lc(cxstring str, unsigned int *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoul_lc(cxstring str, unsigned long *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtoull_lc(cxstring str, unsigned long long *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtou8_lc(cxstring str, uint8_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtou16_lc(cxstring str, uint16_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtou32_lc(cxstring str, uint32_t *output, int base, const char *groupsep);
/**
 * @copydoc cx_strtouz_lc()
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtou64_lc(cxstring str, uint64_t *output, int base, const char *groupsep);

/**
 * Converts a string to a number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base.
 * It sets errno to ERANGE when the target datatype is too small.
 *
 * @param str the string to convert
 * @param output a pointer to the integer variable where the result shall be stored
 * @param base 2, 8, 10, or 16
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtouz_lc(cxstring str, size_t *output, int base, const char *groupsep);

/**
 * Converts a string to a single precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 * It sets errno to ERANGE when the necessary representation would exceed the limits defined in libc's float.h.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the float variable where the result shall be stored
 * @param decsep the decimal separator
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtof_lc(cxstring str, float *output, char decsep, const char *groupsep);

/**
 * Converts a string to a double precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 * It sets errno to ERANGE when the necessary representation would exceed the limits defined in libc's float.h.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the float variable where the result shall be stored
 * @param decsep the decimal separator
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
cx_attr_access_w(2) cx_attr_nonnull_arg(2)
int cx_strtod_lc(cxstring str, double *output, char decsep, const char *groupsep);

#ifndef CX_STR_IMPLEMENTATION
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtos_lc(str, output, base, groupsep) cx_strtos_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoi_lc(str, output, base, groupsep) cx_strtoi_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtol_lc(str, output, base, groupsep) cx_strtol_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoll_lc(str, output, base, groupsep) cx_strtoll_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoi8_lc(str, output, base, groupsep) cx_strtoi8_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoi16_lc(str, output, base, groupsep) cx_strtoi16_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoi32_lc(str, output, base, groupsep) cx_strtoi32_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoi64_lc(str, output, base, groupsep) cx_strtoi64_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoz_lc(str, output, base, groupsep) cx_strtoz_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtous_lc(str, output, base, groupsep) cx_strtous_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtou_lc(str, output, base, groupsep) cx_strtou_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoul_lc(str, output, base, groupsep) cx_strtoul_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtoull_lc(str, output, base, groupsep) cx_strtoull_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtou8_lc(str, output, base, groupsep) cx_strtou8_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtou16_lc(str, output, base, groupsep) cx_strtou16_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtou32_lc(str, output, base, groupsep) cx_strtou32_lc(cx_strcast(str), output, base, groupsep)
/**
 * @copydoc cx_strtouz_lc()
 */
#define cx_strtou64_lc(str, output, base, groupsep) cx_strtou64_lc(cx_strcast(str), output, base, groupsep)
/**
 * Converts a string to a number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base.
 * It sets errno to ERANGE when the target datatype is too small.
 *
 * @param str the string to convert
 * @param output a pointer to the integer variable where the result shall be stored
 * @param base 2, 8, 10, or 16
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtouz_lc(str, output, base, groupsep) cx_strtouz_lc(cx_strcast(str), output, base, groupsep)

/**
 * @copydoc cx_strtouz()
 */
#define cx_strtos(str, output, base) cx_strtos_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoi(str, output, base) cx_strtoi_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtol(str, output, base) cx_strtol_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoll(str, output, base) cx_strtoll_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoi8(str, output, base) cx_strtoi8_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoi16(str, output, base) cx_strtoi16_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoi32(str, output, base) cx_strtoi32_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoi64(str, output, base) cx_strtoi64_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoz(str, output, base) cx_strtoz_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtous(str, output, base) cx_strtous_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtou(str, output, base) cx_strtou_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoul(str, output, base) cx_strtoul_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtoull(str, output, base) cx_strtoull_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtou8(str, output, base) cx_strtou8_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtou16(str, output, base) cx_strtou16_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtou32(str, output, base) cx_strtou32_lc(str, output, base, ",")
/**
 * @copydoc cx_strtouz()
 */
#define cx_strtou64(str, output, base) cx_strtou64_lc(str, output, base, ",")
/**
 * Converts a string to a number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character or an unsupported base.
 * It sets errno to ERANGE when the target datatype is too small.
 *
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose the set of group separators, use the @c _lc variant of this function (e.g. cx_strtouz_lc()).
 *
 * @param str the string to convert
 * @param output a pointer to the integer variable where the result shall be stored
 * @param base 2, 8, 10, or 16
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtouz(str, output, base) cx_strtouz_lc(str, output, base, ",")

/**
 * Converts a string to a single precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 * It sets errno to ERANGE when the necessary representation would exceed the limits defined in libc's float.h.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the float variable where the result shall be stored
 * @param decsep the decimal separator
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtof_lc(str, output, decsep, groupsep) cx_strtof_lc(cx_strcast(str), output, decsep, groupsep)
/**
 * Converts a string to a double precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the double variable where the result shall be stored
 * @param decsep the decimal separator
 * @param groupsep each character in this string is treated as group separator and ignored during conversion
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtod_lc(str, output, decsep, groupsep) cx_strtod_lc(cx_strcast(str), output, decsep, groupsep)

/**
 * Converts a string to a single precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 * It sets errno to ERANGE when the necessary representation would exceed the limits defined in libc's float.h.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the float variable where the result shall be stored
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtof(str, output) cx_strtof_lc(str, output, '.', ",")
/**
 * Converts a string to a double precision floating point number.
 *
 * The function returns non-zero when conversion is not possible.
 * In that case the function sets errno to EINVAL when the reason is an invalid character.
 *
 * The decimal separator is assumed to be a dot character.
 * The comma character is treated as group separator and ignored during parsing.
 * If you want to choose a different format, use cx_strtof_lc().
 *
 * @param str the string to convert
 * @param output a pointer to the double variable where the result shall be stored
 * @retval zero success
 * @retval non-zero conversion was not possible
 */
#define cx_strtod(str, output) cx_strtod_lc(str, output, '.', ",")

#endif

#ifdef __cplusplus
} // extern "C"
#endif

#endif //UCX_STRING_H

mercurial