olaf@20: /*
universe@103:  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20:  *
universe@259:  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
universe@103:  *
universe@103:  * Redistribution and use in source and binary forms, with or without
universe@103:  * modification, are permitted provided that the following conditions are met:
universe@103:  *
universe@103:  *   1. Redistributions of source code must retain the above copyright
universe@103:  *      notice, this list of conditions and the following disclaimer.
universe@103:  *
universe@103:  *   2. Redistributions in binary form must reproduce the above copyright
universe@103:  *      notice, this list of conditions and the following disclaimer in the
universe@103:  *      documentation and/or other materials provided with the distribution.
universe@103:  *
universe@103:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103:  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103:  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103:  * POSSIBILITY OF SUCH DAMAGE.
olaf@20:  */
universe@116: /**
universe@116:  * Bounded string implementation.
universe@116:  * 
universe@116:  * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
universe@116:  * The main difference to C strings is, that <code>sstr_t</code> does <b>not
universe@116:  * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
universe@116:  * within the structure.
universe@116:  * 
universe@116:  * When using <code>sstr_t</code>, developers must be full aware of what type
universe@116:  * of string (<code>NULL</code>-terminated) or not) they are using, when 
universe@116:  * accessing the <code>char* ptr</code> directly.
universe@116:  * 
universe@116:  * The UCX string module provides some common string functions, known from
universe@116:  * standard libc, working with <code>sstr_t</code>.
universe@116:  * 
universe@116:  * @file   string.h
universe@116:  * @author Mike Becker
universe@116:  * @author Olaf Wintermann
universe@116:  */
olaf@20: 
universe@116: #ifndef UCX_STRING_H
universe@116: #define	UCX_STRING_H
olaf@20: 
universe@259: #include "ucx.h"
universe@259: #include "allocator.h"
universe@38: #include <stddef.h>
universe@38: 
universe@116: /** Shortcut for a <code>sstr_t struct</code> literal. */
universe@116: #define ST(s) { (char*)s, sizeof(s)-1 }
universe@146: 
universe@116: /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
universe@116: #define S(s) sstrn((char*)s, sizeof(s)-1)
olaf@20: 
universe@283: /** Expands a sstr_t to printf arguments. */
universe@283: #define SFMT(s) (int) (s).length, (s).ptr
universe@283: 
universe@283: /** Format specifier for a sstr_t. */
universe@283: #define PRIsstr ".*s"
universe@283: 
olaf@20: #ifdef	__cplusplus
olaf@20: extern "C" {
olaf@20: #endif
olaf@20: 
universe@116: /**
universe@116:  * The UCX string structure.
universe@116:  */
universe@116: typedef struct {
universe@116:    /** A reference to the string (<b>not necessarily  <code>NULL</code>
universe@116:     * -terminated</b>) */
olaf@20:     char   *ptr;
universe@116:     /** The length of the string */
olaf@20:     size_t length;
olaf@20: } sstr_t;
olaf@20: 
universe@116: /**
universe@116:  * Creates a new sstr_t based on a C string.
universe@116:  * 
universe@116:  * The length is implicitly inferred by using a call to <code>strlen()</code>.
olaf@20:  *
universe@116:  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116:  * do want a copy, use sstrdup() on the return value of this function.
universe@116:  * 
universe@116:  * @param cstring the C string to wrap
universe@116:  * @return a new sstr_t containing the C string
universe@116:  * 
universe@116:  * @see sstrn()
olaf@20:  */
universe@116: sstr_t sstr(char *cstring);
olaf@20: 
universe@116: /**
universe@116:  * Creates a new sstr_t of the specified length based on a C string.
olaf@20:  *
universe@116:  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116:  * do want a copy, use sstrdup() on the return value of this function.
universe@116:  * 
universe@116:  * @param cstring  the C string to wrap
universe@116:  * @param length   the length of the string
universe@116:  * @return a new sstr_t containing the C string
universe@116:  * 
universe@116:  * @see sstr()
universe@116:  * @see S()
olaf@20:  */
universe@116: sstr_t sstrn(char *cstring, size_t length);
olaf@20: 
olaf@20: 
universe@116: /**
universe@116:  * Returns the cumulated length of all specified strings.
olaf@20:  *
universe@116:  * At least one string must be specified.
universe@116:  * 
universe@116:  * <b>Attention:</b> if the count argument does not match the count of the
universe@116:  * specified strings, the behavior is undefined.
universe@116:  *
universe@116:  * @param count    the total number of specified strings (so at least 1)
universe@116:  * @param string   the first string
universe@116:  * @param ...      all other strings
universe@116:  * @return the cumulated length of all strings
olaf@20:  */
universe@116: size_t sstrnlen(size_t count, sstr_t string, ...);
olaf@20: 
universe@119: /**
olaf@183:  * Concatenates two or more strings.
olaf@183:  * 
olaf@183:  * The resulting string will be allocated by standard <code>malloc()</code>. 
olaf@183:  * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
olaf@183:  * 
olaf@183:  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
olaf@183:  * terminated.
olaf@180:  *
olaf@180:  * @param count   the total number of strings to concatenate
olaf@183:  * @param s1      first string
olaf@183:  * @param s2      second string
olaf@183:  * @param ...     all remaining strings
olaf@180:  * @return the concatenated string
olaf@180:  */
olaf@180: sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
olaf@183: 
olaf@183: /**
universe@225:  * Concatenates two or more strings using a UcxAllocator.
olaf@183:  * 
olaf@183:  * See sstrcat() for details.
olaf@183:  *
olaf@183:  * @param a       the allocator to use
olaf@183:  * @param count   the total number of strings to concatenate
olaf@183:  * @param s1      first string
olaf@183:  * @param s2      second string
olaf@183:  * @param ...     all remaining strings
olaf@183:  * @return the concatenated string
olaf@183:  */
olaf@180: sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
olaf@180: 
olaf@180: 
olaf@180: /**
universe@119:  * Returns a substring starting at the specified location.
universe@119:  * 
universe@119:  * <b>Attention:</b> the new string references the same memory area as the
universe@119:  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119:  * Use sstrdup() to get a copy.
universe@119:  * 
universe@119:  * @param string input string
universe@119:  * @param start  start location of the substring
universe@119:  * @return a substring of <code>string</code> starting at <code>start</code>
universe@119:  * 
universe@119:  * @see sstrsubsl()
universe@119:  * @see sstrchr()
universe@119:  */
universe@119: sstr_t sstrsubs(sstr_t string, size_t start);
universe@119: 
universe@119: /**
universe@119:  * Returns a substring with a maximum length starting at the specified location.
universe@119:  * 
universe@119:  * <b>Attention:</b> the new string references the same memory area as the
universe@119:  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119:  * Use sstrdup() to get a copy.
universe@119:  * 
universe@119:  * @param string input string
universe@119:  * @param start  start location of the substring
universe@119:  * @param length the maximum length of the substring
universe@119:  * @return a substring of <code>string</code> starting at <code>start</code>
universe@119:  * with a maximum length of <code>length</code>
universe@119:  * 
universe@119:  * @see sstrsubs()
universe@119:  * @see sstrchr()
universe@119:  */
universe@119: sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
universe@119: 
universe@119: /**
universe@119:  * Returns a substring starting at the location of the first occurrence of the
universe@119:  * specified character.
universe@119:  * 
universe@119:  * If the string does not contain the character, an empty string is returned.
universe@119:  * 
universe@119:  * @param string the string where to locate the character
universe@119:  * @param chr    the character to locate
universe@148:  * @return       a substring starting at the first location of <code>chr</code>
universe@119:  * 
universe@119:  * @see sstrsubs()
universe@119:  */
universe@119: sstr_t sstrchr(sstr_t string, int chr);
universe@119: 
universe@119: /**
universe@148:  * Returns a substring starting at the location of the last occurrence of the
universe@148:  * specified character.
universe@148:  * 
universe@148:  * If the string does not contain the character, an empty string is returned.
universe@148:  * 
universe@148:  * @param string the string where to locate the character
universe@148:  * @param chr    the character to locate
universe@148:  * @return       a substring starting at the last location of <code>chr</code>
universe@148:  * 
universe@148:  * @see sstrsubs()
universe@148:  */
universe@148: sstr_t sstrrchr(sstr_t string, int chr);
universe@148: 
universe@148: /**
universe@214:  * Returns a substring starting at the location of the first occurrence of the
universe@214:  * specified string.
universe@214:  * 
universe@214:  * If the string does not contain the other string, an empty string is returned.
universe@214:  * 
universe@214:  * If <code>match</code> is an empty string, the complete <code>string</code> is
universe@214:  * returned.
universe@214:  * 
universe@214:  * @param string the string to be scanned
universe@214:  * @param match  string containing the sequence of characters to match
universe@214:  * @return       a substring starting at the first occurrence of
universe@214:  *               <code>match</code>, or an empty string, if the sequence is not
universe@214:  *               present in <code>string</code>
universe@214:  */
universe@214: sstr_t sstrstr(sstr_t string, sstr_t match);
universe@214: 
universe@214: /**
universe@119:  * Splits a string into parts by using a delimiter string.
universe@119:  * 
universe@119:  * This function will return <code>NULL</code>, if one of the following happens:
universe@119:  * <ul>
universe@119:  *   <li>the string length is zero</li>
universe@119:  *   <li>the delimeter length is zero</li>
universe@119:  *   <li>the string equals the delimeter</li>
universe@119:  *   <li>memory allocation fails</li>
universe@119:  * </ul>
universe@119:  * 
universe@119:  * The integer referenced by <code>count</code> is used as input and determines
universe@160:  * the maximum size of the resulting array, i.e. the maximum count of splits to
universe@119:  * perform + 1.
universe@119:  * 
universe@119:  * The integer referenced by <code>count</code> is also used as output and is
universe@119:  * set to
universe@119:  * <ul>
universe@119:  *   <li>-2, on memory allocation errors</li>
universe@119:  *   <li>-1, if either the string or the delimiter is an empty string</li>
universe@119:  *   <li>0, if the string equals the delimiter</li>
universe@119:  *   <li>1, if the string does not contain the delimiter</li>
universe@160:  *   <li>the count of array items, otherwise</li>
universe@119:  * </ul>
universe@119:  * 
universe@119:  * If the string starts with the delimiter, the first item of the resulting
universe@160:  * array will be an empty string.
universe@119:  * 
universe@119:  * If the string ends with the delimiter and the maximum list size is not
universe@160:  * exceeded, the last array item will be an empty string.
universe@233:  * In case the list size would be exceeded, the last array item will be the
universe@233:  * remaining string after the last split, <i>including</i> the terminating
universe@233:  * delimiter.
universe@119:  * 
universe@160:  * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
universe@125:  * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
universe@119:  * an allocator to managed memory, to avoid this.
olaf@20:  *
universe@119:  * @param string the string to split
universe@119:  * @param delim  the delimiter string
universe@160:  * @param count  IN: the maximum size of the resulting array (0 = no limit),
universe@160:  *               OUT: the actual size of the array
universe@160:  * @return a sstr_t array containing the split strings or
universe@119:  *         <code>NULL</code> on error
universe@119:  * 
universe@125:  * @see sstrsplit_a()
olaf@20:  */
universe@173: sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
olaf@20: 
universe@119: /**
universe@225:  * Performing sstrsplit() using a UcxAllocator.
universe@119:  * 
universe@119:  * <i>Read the description of sstrsplit() for details.</i>
universe@119:  * 
universe@160:  * The memory for the sstr_t.ptr pointers of the array items and the memory for
universe@119:  * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
universe@119:  * function.
universe@119:  * 
universe@119:  * <b>Note:</b> the allocator is not used for memory that is freed within the
universe@119:  * same call of this function (locally scoped variables).
universe@119:  * 
universe@125:  * @param allocator the UcxAllocator used for allocating memory
universe@119:  * @param string the string to split
universe@119:  * @param delim  the delimiter string
universe@160:  * @param count  IN: the maximum size of the resulting array (0 = no limit),
universe@160:  *               OUT: the actual size of the array
universe@160:  * @return a sstr_t array containing the split strings or
universe@119:  *         <code>NULL</code> on error
universe@119:  * 
universe@119:  * @see sstrsplit()
olaf@20:  */
universe@125: sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
universe@173:         ssize_t *count);
olaf@20: 
universe@116: /**
universe@116:  * Compares two UCX strings with standard <code>memcmp()</code>.
universe@116:  * 
universe@116:  * At first it compares the sstr_t.length attribute of the two strings. The
universe@116:  * <code>memcmp()</code> function is called, if and only if the lengths match.
universe@116:  * 
universe@116:  * @param s1 the first string
universe@116:  * @param s2 the second string
universe@116:  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
universe@116:  * length of s1 is greater than the length of s2 or the result of
universe@116:  * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
universe@116:  */
olaf@68: int sstrcmp(sstr_t s1, sstr_t s2);
olaf@20: 
universe@116: /**
universe@149:  * Compares two UCX strings ignoring the case.
universe@149:  * 
universe@149:  * At first it compares the sstr_t.length attribute of the two strings. If and
universe@149:  * only if the lengths match, both strings are compared char by char ignoring
universe@149:  * the case.
universe@149:  * 
universe@149:  * @param s1 the first string
universe@149:  * @param s2 the second string
universe@149:  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
universe@149:  * length of s1 is greater than the length of s2 or the difference between the
universe@149:  * first two differing characters otherwise (i.e. 0 if the strings match and
universe@149:  * no characters differ)
universe@149:  */
universe@149: int sstrcasecmp(sstr_t s1, sstr_t s2);
universe@149: 
universe@149: /**
universe@116:  * Creates a duplicate of the specified string.
universe@116:  * 
universe@116:  * The new sstr_t will contain a copy allocated by standard
universe@116:  * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
universe@116:  * <code>free()</code>.
universe@116:  * 
universe@118:  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118:  * terminated.
universe@118:  * 
universe@116:  * @param string the string to duplicate
universe@118:  * @return a duplicate of the string
universe@125:  * @see sstrdup_a()
universe@116:  */
universe@116: sstr_t sstrdup(sstr_t string);
olaf@20: 
universe@118: /**
universe@225:  * Creates a duplicate of the specified string using a UcxAllocator.
universe@118:  * 
universe@118:  * The new sstr_t will contain a copy allocated by the allocators
universe@118:  * ucx_allocator_malloc function. So it is implementation depended, whether the
universe@118:  * returned sstr_t.ptr pointer must be passed to the allocators
universe@118:  * ucx_allocator_free function manually.
universe@118:  * 
universe@118:  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118:  * terminated.
universe@118:  * 
universe@225:  * @param allocator a valid instance of a UcxAllocator
universe@118:  * @param string the string to duplicate
universe@118:  * @return a duplicate of the string
universe@119:  * @see sstrdup()
universe@118:  */
universe@125: sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
universe@118: 
universe@118: /**
universe@118:  * Omits leading and trailing spaces.
universe@118:  * 
universe@118:  * This function returns a new sstr_t containing a trimmed version of the
universe@118:  * specified string.
universe@118:  * 
universe@118:  * <b>Note:</b> the new sstr_t references the same memory, thus you
universe@118:  * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
universe@118:  * <code>free()</code>. It is also highly recommended to avoid assignments like
universe@118:  * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
universe@118:  * source string. Assignments of this type are only permitted, if the
universe@118:  * sstr_t.ptr of the source string does not need to be freed or if another
universe@118:  * reference to the source string exists.
universe@118:  * 
universe@118:  * @param string the string that shall be trimmed
universe@118:  * @return a new sstr_t containing the trimmed string
universe@118:  */
olaf@96: sstr_t sstrtrim(sstr_t string);
olaf@96: 
universe@146: /**
universe@146:  * Checks, if a string has a specific prefix.
universe@146:  * @param string the string to check
universe@146:  * @param prefix the prefix the string should have
universe@146:  * @return 1, if and only if the string has the specified prefix, 0 otherwise
universe@146:  */
universe@146: int sstrprefix(sstr_t string, sstr_t prefix);
universe@146: 
universe@146: /**
universe@146:  * Checks, if a string has a specific suffix.
universe@146:  * @param string the string to check
universe@146:  * @param suffix the suffix the string should have
universe@146:  * @return 1, if and only if the string has the specified suffix, 0 otherwise
universe@146:  */
universe@146: int sstrsuffix(sstr_t string, sstr_t suffix);
universe@146: 
universe@210: /**
universe@210:  * Returns a lower case version of a string.
universe@210:  * 
universe@210:  * This function creates a duplicate of the input string, first. See the
universe@210:  * documentation of sstrdup() for the implications.
universe@210:  * 
universe@210:  * @param string the input string
universe@210:  * @return the resulting lower case string
universe@210:  * @see sstrdup()
universe@210:  */
universe@210: sstr_t sstrlower(sstr_t string);
universe@210: 
universe@210: /**
universe@210:  * Returns a lower case version of a string.
universe@210:  * 
universe@210:  * This function creates a duplicate of the input string, first. See the
universe@210:  * documentation of sstrdup_a() for the implications.
universe@210:  * 
universe@210:  * @param allocator the allocator used for duplicating the string
universe@210:  * @param string the input string
universe@210:  * @return the resulting lower case string
universe@210:  * @see sstrdup_a()
universe@210:  */
universe@210: sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
universe@210: 
universe@210: /**
universe@210:  * Returns a upper case version of a string.
universe@210:  * 
universe@210:  * This function creates a duplicate of the input string, first. See the
universe@210:  * documentation of sstrdup() for the implications.
universe@210:  * 
universe@210:  * @param string the input string
universe@210:  * @return the resulting upper case string
universe@210:  * @see sstrdup()
universe@210:  */
universe@210: sstr_t sstrupper(sstr_t string);
universe@210: 
universe@210: /**
universe@210:  * Returns a upper case version of a string.
universe@210:  * 
universe@210:  * This function creates a duplicate of the input string, first. See the
universe@210:  * documentation of sstrdup_a() for the implications.
universe@210:  * 
universe@210:  * @param allocator the allocator used for duplicating the string
universe@210:  * @param string the input string
universe@210:  * @return the resulting upper case string
universe@210:  * @see sstrdup_a()
universe@210:  */
universe@210: sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
universe@210: 
olaf@20: #ifdef	__cplusplus
olaf@20: }
olaf@20: #endif
olaf@20: 
universe@116: #endif	/* UCX_STRING_H */