diff -r b7d1317b138e -r fae240d633fc src/ucx/string.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ucx/string.h Tue Oct 17 16:15:41 2017 +0200 @@ -0,0 +1,460 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2017 Olaf Wintermann. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/** + * Bounded string implementation. + * + * The UCX strings (sstr_t) provide an alternative to C strings. + * The main difference to C strings is, that sstr_t does not + * need to be NULL-terminated. Instead the length is stored + * within the structure. + * + * When using sstr_t, developers must be full aware of what type + * of string (NULL-terminated) or not) they are using, when + * accessing the char* ptr directly. + * + * The UCX string module provides some common string functions, known from + * standard libc, working with sstr_t. + * + * @file string.h + * @author Mike Becker + * @author Olaf Wintermann + */ + +#ifndef UCX_STRING_H +#define UCX_STRING_H + +#include +#include +#include + +/** Shortcut for a sstr_t struct literal. */ +#define ST(s) { (char*)s, sizeof(s)-1 } + +/** Shortcut for the conversion of a C string to a sstr_t. */ +#define S(s) sstrn((char*)s, sizeof(s)-1) + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * The UCX string structure. + */ +typedef struct { + /** A reference to the string (not necessarily NULL + * -terminated) */ + char *ptr; + /** The length of the string */ + size_t length; +} sstr_t; + +/** + * Creates a new sstr_t based on a C string. + * + * The length is implicitly inferred by using a call to strlen(). + * + * Note: the sstr_t will hold a reference to the C string. If you + * do want a copy, use sstrdup() on the return value of this function. + * + * @param cstring the C string to wrap + * @return a new sstr_t containing the C string + * + * @see sstrn() + */ +sstr_t sstr(char *cstring); + +/** + * Creates a new sstr_t of the specified length based on a C string. + * + * Note: the sstr_t will hold a reference to the C string. If you + * do want a copy, use sstrdup() on the return value of this function. + * + * @param cstring the C string to wrap + * @param length the length of the string + * @return a new sstr_t containing the C string + * + * @see sstr() + * @see S() + */ +sstr_t sstrn(char *cstring, size_t length); + + +/** + * Returns the cumulated length of all specified strings. + * + * At least one string must be specified. + * + * Attention: if the count argument does not match the count of the + * specified strings, the behavior is undefined. + * + * @param count the total number of specified strings (so at least 1) + * @param string the first string + * @param ... all other strings + * @return the cumulated length of all strings + */ +size_t sstrnlen(size_t count, sstr_t string, ...); + +/** + * Concatenates two or more strings. + * + * The resulting string will be allocated by standard malloc(). + * So developers MUST pass the sstr_t.ptr to free(). + * + * The sstr_t.ptr of the return value will always be NULL- + * terminated. + * + * @param count the total number of strings to concatenate + * @param s1 first string + * @param s2 second string + * @param ... all remaining strings + * @return the concatenated string + */ +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...); + +/** + * Concatenates two or more strings using a UcxAllocator. + * + * See sstrcat() for details. + * + * @param a the allocator to use + * @param count the total number of strings to concatenate + * @param s1 first string + * @param s2 second string + * @param ... all remaining strings + * @return the concatenated string + */ +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...); + + +/** + * Returns a substring starting at the specified location. + * + * Attention: the new string references the same memory area as the + * input string and will NOT be NULL-terminated. + * Use sstrdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @return a substring of string starting at start + * + * @see sstrsubsl() + * @see sstrchr() + */ +sstr_t sstrsubs(sstr_t string, size_t start); + +/** + * Returns a substring with a maximum length starting at the specified location. + * + * Attention: the new string references the same memory area as the + * input string and will NOT be NULL-terminated. + * Use sstrdup() to get a copy. + * + * @param string input string + * @param start start location of the substring + * @param length the maximum length of the substring + * @return a substring of string starting at start + * with a maximum length of length + * + * @see sstrsubs() + * @see sstrchr() + */ +sstr_t sstrsubsl(sstr_t string, size_t start, size_t length); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the first location of chr + * + * @see sstrsubs() + */ +sstr_t sstrchr(sstr_t string, int chr); + +/** + * Returns a substring starting at the location of the last occurrence of the + * specified character. + * + * If the string does not contain the character, an empty string is returned. + * + * @param string the string where to locate the character + * @param chr the character to locate + * @return a substring starting at the last location of chr + * + * @see sstrsubs() + */ +sstr_t sstrrchr(sstr_t string, int chr); + +/** + * Returns a substring starting at the location of the first occurrence of the + * specified string. + * + * If the string does not contain the other string, an empty string is returned. + * + * If match is an empty string, the complete string is + * returned. + * + * @param string the string to be scanned + * @param match string containing the sequence of characters to match + * @return a substring starting at the first occurrence of + * match, or an empty string, if the sequence is not + * present in string + */ +sstr_t sstrstr(sstr_t string, sstr_t match); + +/** + * Splits a string into parts by using a delimiter string. + * + * This function will return NULL, if one of the following happens: + *
    + *
  • the string length is zero
  • + *
  • the delimeter length is zero
  • + *
  • the string equals the delimeter
  • + *
  • memory allocation fails
  • + *
+ * + * The integer referenced by count is used as input and determines + * the maximum size of the resulting array, i.e. the maximum count of splits to + * perform + 1. + * + * The integer referenced by count is also used as output and is + * set to + *
    + *
  • -2, on memory allocation errors
  • + *
  • -1, if either the string or the delimiter is an empty string
  • + *
  • 0, if the string equals the delimiter
  • + *
  • 1, if the string does not contain the delimiter
  • + *
  • the count of array items, otherwise
  • + *
+ * + * If the string starts with the delimiter, the first item of the resulting + * array will be an empty string. + * + * If the string ends with the delimiter and the maximum list size is not + * exceeded, the last array item will be an empty string. + * In case the list size would be exceeded, the last array item will be the + * remaining string after the last split, including the terminating + * delimiter. + * + * Attention: The array pointer AND all sstr_t.ptr of the array + * items must be manually passed to free(). Use sstrsplit_a() with + * an allocator to managed memory, to avoid this. + * + * @param string the string to split + * @param delim the delimiter string + * @param count IN: the maximum size of the resulting array (0 = no limit), + * OUT: the actual size of the array + * @return a sstr_t array containing the split strings or + * NULL on error + * + * @see sstrsplit_a() + */ +sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count); + +/** + * Performing sstrsplit() using a UcxAllocator. + * + * Read the description of sstrsplit() for details. + * + * The memory for the sstr_t.ptr pointers of the array items and the memory for + * the sstr_t array itself are allocated by using the UcxAllocator.malloc() + * function. + * + * Note: the allocator is not used for memory that is freed within the + * same call of this function (locally scoped variables). + * + * @param allocator the UcxAllocator used for allocating memory + * @param string the string to split + * @param delim the delimiter string + * @param count IN: the maximum size of the resulting array (0 = no limit), + * OUT: the actual size of the array + * @return a sstr_t array containing the split strings or + * NULL on error + * + * @see sstrsplit() + */ +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim, + ssize_t *count); + +/** + * Compares two UCX strings with standard memcmp(). + * + * At first it compares the sstr_t.length attribute of the two strings. The + * memcmp() function is called, if and only if the lengths match. + * + * @param s1 the first string + * @param s2 the second string + * @return -1, if the length of s1 is less than the length of s2 or 1, if the + * length of s1 is greater than the length of s2 or the result of + * memcmp() otherwise (i.e. 0 if the strings match) + */ +int sstrcmp(sstr_t s1, sstr_t s2); + +/** + * Compares two UCX strings ignoring the case. + * + * At first it compares the sstr_t.length attribute of the two strings. If and + * only if the lengths match, both strings are compared char by char ignoring + * the case. + * + * @param s1 the first string + * @param s2 the second string + * @return -1, if the length of s1 is less than the length of s2 or 1, if the + * length of s1 is greater than the length of s2 or the difference between the + * first two differing characters otherwise (i.e. 0 if the strings match and + * no characters differ) + */ +int sstrcasecmp(sstr_t s1, sstr_t s2); + +/** + * Creates a duplicate of the specified string. + * + * The new sstr_t will contain a copy allocated by standard + * malloc(). So developers MUST pass the sstr_t.ptr to + * free(). + * + * The sstr_t.ptr of the return value will always be NULL- + * terminated. + * + * @param string the string to duplicate + * @return a duplicate of the string + * @see sstrdup_a() + */ +sstr_t sstrdup(sstr_t string); + +/** + * Creates a duplicate of the specified string using a UcxAllocator. + * + * The new sstr_t will contain a copy allocated by the allocators + * ucx_allocator_malloc function. So it is implementation depended, whether the + * returned sstr_t.ptr pointer must be passed to the allocators + * ucx_allocator_free function manually. + * + * The sstr_t.ptr of the return value will always be NULL- + * terminated. + * + * @param allocator a valid instance of a UcxAllocator + * @param string the string to duplicate + * @return a duplicate of the string + * @see sstrdup() + */ +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string); + +/** + * Omits leading and trailing spaces. + * + * This function returns a new sstr_t containing a trimmed version of the + * specified string. + * + * Note: the new sstr_t references the same memory, thus you + * MUST NOT pass the sstr_t.ptr of the return value to + * free(). It is also highly recommended to avoid assignments like + * mystr = sstrtrim(mystr); as you lose the reference to the + * source string. Assignments of this type are only permitted, if the + * sstr_t.ptr of the source string does not need to be freed or if another + * reference to the source string exists. + * + * @param string the string that shall be trimmed + * @return a new sstr_t containing the trimmed string + */ +sstr_t sstrtrim(sstr_t string); + +/** + * Checks, if a string has a specific prefix. + * @param string the string to check + * @param prefix the prefix the string should have + * @return 1, if and only if the string has the specified prefix, 0 otherwise + */ +int sstrprefix(sstr_t string, sstr_t prefix); + +/** + * Checks, if a string has a specific suffix. + * @param string the string to check + * @param suffix the suffix the string should have + * @return 1, if and only if the string has the specified suffix, 0 otherwise + */ +int sstrsuffix(sstr_t string, sstr_t suffix); + +/** + * Returns a lower case version of a string. + * + * This function creates a duplicate of the input string, first. See the + * documentation of sstrdup() for the implications. + * + * @param string the input string + * @return the resulting lower case string + * @see sstrdup() + */ +sstr_t sstrlower(sstr_t string); + +/** + * Returns a lower case version of a string. + * + * This function creates a duplicate of the input string, first. See the + * documentation of sstrdup_a() for the implications. + * + * @param allocator the allocator used for duplicating the string + * @param string the input string + * @return the resulting lower case string + * @see sstrdup_a() + */ +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string); + +/** + * Returns a upper case version of a string. + * + * This function creates a duplicate of the input string, first. See the + * documentation of sstrdup() for the implications. + * + * @param string the input string + * @return the resulting upper case string + * @see sstrdup() + */ +sstr_t sstrupper(sstr_t string); + +/** + * Returns a upper case version of a string. + * + * This function creates a duplicate of the input string, first. See the + * documentation of sstrdup_a() for the implications. + * + * @param allocator the allocator used for duplicating the string + * @param string the input string + * @return the resulting upper case string + * @see sstrdup_a() + */ +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string); + +#ifdef __cplusplus +} +#endif + +#endif /* UCX_STRING_H */