src/ucx/string.h

Tue, 17 Oct 2017 16:15:41 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 17 Oct 2017 16:15:41 +0200
changeset 251
fae240d633fc
parent 250
ucx/string.h@b7d1317b138e
child 259
2f5dea574a75
permissions
-rw-r--r--

changes source directory structure in preperation for autotools rollout

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Olaf Wintermann. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    28 /**
    29  * Bounded string implementation.
    30  * 
    31  * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
    32  * The main difference to C strings is, that <code>sstr_t</code> does <b>not
    33  * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
    34  * within the structure.
    35  * 
    36  * When using <code>sstr_t</code>, developers must be full aware of what type
    37  * of string (<code>NULL</code>-terminated) or not) they are using, when 
    38  * accessing the <code>char* ptr</code> directly.
    39  * 
    40  * The UCX string module provides some common string functions, known from
    41  * standard libc, working with <code>sstr_t</code>.
    42  * 
    43  * @file   string.h
    44  * @author Mike Becker
    45  * @author Olaf Wintermann
    46  */
    48 #ifndef UCX_STRING_H
    49 #define	UCX_STRING_H
    51 #include <ucx/ucx.h>
    52 #include <ucx/allocator.h>
    53 #include <stddef.h>
    55 /** Shortcut for a <code>sstr_t struct</code> literal. */
    56 #define ST(s) { (char*)s, sizeof(s)-1 }
    58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
    59 #define S(s) sstrn((char*)s, sizeof(s)-1)
    61 #ifdef	__cplusplus
    62 extern "C" {
    63 #endif
    65 /**
    66  * The UCX string structure.
    67  */
    68 typedef struct {
    69    /** A reference to the string (<b>not necessarily  <code>NULL</code>
    70     * -terminated</b>) */
    71     char   *ptr;
    72     /** The length of the string */
    73     size_t length;
    74 } sstr_t;
    76 /**
    77  * Creates a new sstr_t based on a C string.
    78  * 
    79  * The length is implicitly inferred by using a call to <code>strlen()</code>.
    80  *
    81  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
    82  * do want a copy, use sstrdup() on the return value of this function.
    83  * 
    84  * @param cstring the C string to wrap
    85  * @return a new sstr_t containing the C string
    86  * 
    87  * @see sstrn()
    88  */
    89 sstr_t sstr(char *cstring);
    91 /**
    92  * Creates a new sstr_t of the specified length based on a C string.
    93  *
    94  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
    95  * do want a copy, use sstrdup() on the return value of this function.
    96  * 
    97  * @param cstring  the C string to wrap
    98  * @param length   the length of the string
    99  * @return a new sstr_t containing the C string
   100  * 
   101  * @see sstr()
   102  * @see S()
   103  */
   104 sstr_t sstrn(char *cstring, size_t length);
   107 /**
   108  * Returns the cumulated length of all specified strings.
   109  *
   110  * At least one string must be specified.
   111  * 
   112  * <b>Attention:</b> if the count argument does not match the count of the
   113  * specified strings, the behavior is undefined.
   114  *
   115  * @param count    the total number of specified strings (so at least 1)
   116  * @param string   the first string
   117  * @param ...      all other strings
   118  * @return the cumulated length of all strings
   119  */
   120 size_t sstrnlen(size_t count, sstr_t string, ...);
   122 /**
   123  * Concatenates two or more strings.
   124  * 
   125  * The resulting string will be allocated by standard <code>malloc()</code>. 
   126  * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
   127  * 
   128  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   129  * terminated.
   130  *
   131  * @param count   the total number of strings to concatenate
   132  * @param s1      first string
   133  * @param s2      second string
   134  * @param ...     all remaining strings
   135  * @return the concatenated string
   136  */
   137 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
   139 /**
   140  * Concatenates two or more strings using a UcxAllocator.
   141  * 
   142  * See sstrcat() for details.
   143  *
   144  * @param a       the allocator to use
   145  * @param count   the total number of strings to concatenate
   146  * @param s1      first string
   147  * @param s2      second string
   148  * @param ...     all remaining strings
   149  * @return the concatenated string
   150  */
   151 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
   154 /**
   155  * Returns a substring starting at the specified location.
   156  * 
   157  * <b>Attention:</b> the new string references the same memory area as the
   158  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   159  * Use sstrdup() to get a copy.
   160  * 
   161  * @param string input string
   162  * @param start  start location of the substring
   163  * @return a substring of <code>string</code> starting at <code>start</code>
   164  * 
   165  * @see sstrsubsl()
   166  * @see sstrchr()
   167  */
   168 sstr_t sstrsubs(sstr_t string, size_t start);
   170 /**
   171  * Returns a substring with a maximum length starting at the specified location.
   172  * 
   173  * <b>Attention:</b> the new string references the same memory area as the
   174  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   175  * Use sstrdup() to get a copy.
   176  * 
   177  * @param string input string
   178  * @param start  start location of the substring
   179  * @param length the maximum length of the substring
   180  * @return a substring of <code>string</code> starting at <code>start</code>
   181  * with a maximum length of <code>length</code>
   182  * 
   183  * @see sstrsubs()
   184  * @see sstrchr()
   185  */
   186 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
   188 /**
   189  * Returns a substring starting at the location of the first occurrence of the
   190  * specified character.
   191  * 
   192  * If the string does not contain the character, an empty string is returned.
   193  * 
   194  * @param string the string where to locate the character
   195  * @param chr    the character to locate
   196  * @return       a substring starting at the first location of <code>chr</code>
   197  * 
   198  * @see sstrsubs()
   199  */
   200 sstr_t sstrchr(sstr_t string, int chr);
   202 /**
   203  * Returns a substring starting at the location of the last occurrence of the
   204  * specified character.
   205  * 
   206  * If the string does not contain the character, an empty string is returned.
   207  * 
   208  * @param string the string where to locate the character
   209  * @param chr    the character to locate
   210  * @return       a substring starting at the last location of <code>chr</code>
   211  * 
   212  * @see sstrsubs()
   213  */
   214 sstr_t sstrrchr(sstr_t string, int chr);
   216 /**
   217  * Returns a substring starting at the location of the first occurrence of the
   218  * specified string.
   219  * 
   220  * If the string does not contain the other string, an empty string is returned.
   221  * 
   222  * If <code>match</code> is an empty string, the complete <code>string</code> is
   223  * returned.
   224  * 
   225  * @param string the string to be scanned
   226  * @param match  string containing the sequence of characters to match
   227  * @return       a substring starting at the first occurrence of
   228  *               <code>match</code>, or an empty string, if the sequence is not
   229  *               present in <code>string</code>
   230  */
   231 sstr_t sstrstr(sstr_t string, sstr_t match);
   233 /**
   234  * Splits a string into parts by using a delimiter string.
   235  * 
   236  * This function will return <code>NULL</code>, if one of the following happens:
   237  * <ul>
   238  *   <li>the string length is zero</li>
   239  *   <li>the delimeter length is zero</li>
   240  *   <li>the string equals the delimeter</li>
   241  *   <li>memory allocation fails</li>
   242  * </ul>
   243  * 
   244  * The integer referenced by <code>count</code> is used as input and determines
   245  * the maximum size of the resulting array, i.e. the maximum count of splits to
   246  * perform + 1.
   247  * 
   248  * The integer referenced by <code>count</code> is also used as output and is
   249  * set to
   250  * <ul>
   251  *   <li>-2, on memory allocation errors</li>
   252  *   <li>-1, if either the string or the delimiter is an empty string</li>
   253  *   <li>0, if the string equals the delimiter</li>
   254  *   <li>1, if the string does not contain the delimiter</li>
   255  *   <li>the count of array items, otherwise</li>
   256  * </ul>
   257  * 
   258  * If the string starts with the delimiter, the first item of the resulting
   259  * array will be an empty string.
   260  * 
   261  * If the string ends with the delimiter and the maximum list size is not
   262  * exceeded, the last array item will be an empty string.
   263  * In case the list size would be exceeded, the last array item will be the
   264  * remaining string after the last split, <i>including</i> the terminating
   265  * delimiter.
   266  * 
   267  * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
   268  * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
   269  * an allocator to managed memory, to avoid this.
   270  *
   271  * @param string the string to split
   272  * @param delim  the delimiter string
   273  * @param count  IN: the maximum size of the resulting array (0 = no limit),
   274  *               OUT: the actual size of the array
   275  * @return a sstr_t array containing the split strings or
   276  *         <code>NULL</code> on error
   277  * 
   278  * @see sstrsplit_a()
   279  */
   280 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
   282 /**
   283  * Performing sstrsplit() using a UcxAllocator.
   284  * 
   285  * <i>Read the description of sstrsplit() for details.</i>
   286  * 
   287  * The memory for the sstr_t.ptr pointers of the array items and the memory for
   288  * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
   289  * function.
   290  * 
   291  * <b>Note:</b> the allocator is not used for memory that is freed within the
   292  * same call of this function (locally scoped variables).
   293  * 
   294  * @param allocator the UcxAllocator used for allocating memory
   295  * @param string the string to split
   296  * @param delim  the delimiter string
   297  * @param count  IN: the maximum size of the resulting array (0 = no limit),
   298  *               OUT: the actual size of the array
   299  * @return a sstr_t array containing the split strings or
   300  *         <code>NULL</code> on error
   301  * 
   302  * @see sstrsplit()
   303  */
   304 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
   305         ssize_t *count);
   307 /**
   308  * Compares two UCX strings with standard <code>memcmp()</code>.
   309  * 
   310  * At first it compares the sstr_t.length attribute of the two strings. The
   311  * <code>memcmp()</code> function is called, if and only if the lengths match.
   312  * 
   313  * @param s1 the first string
   314  * @param s2 the second string
   315  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   316  * length of s1 is greater than the length of s2 or the result of
   317  * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
   318  */
   319 int sstrcmp(sstr_t s1, sstr_t s2);
   321 /**
   322  * Compares two UCX strings ignoring the case.
   323  * 
   324  * At first it compares the sstr_t.length attribute of the two strings. If and
   325  * only if the lengths match, both strings are compared char by char ignoring
   326  * the case.
   327  * 
   328  * @param s1 the first string
   329  * @param s2 the second string
   330  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   331  * length of s1 is greater than the length of s2 or the difference between the
   332  * first two differing characters otherwise (i.e. 0 if the strings match and
   333  * no characters differ)
   334  */
   335 int sstrcasecmp(sstr_t s1, sstr_t s2);
   337 /**
   338  * Creates a duplicate of the specified string.
   339  * 
   340  * The new sstr_t will contain a copy allocated by standard
   341  * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
   342  * <code>free()</code>.
   343  * 
   344  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   345  * terminated.
   346  * 
   347  * @param string the string to duplicate
   348  * @return a duplicate of the string
   349  * @see sstrdup_a()
   350  */
   351 sstr_t sstrdup(sstr_t string);
   353 /**
   354  * Creates a duplicate of the specified string using a UcxAllocator.
   355  * 
   356  * The new sstr_t will contain a copy allocated by the allocators
   357  * ucx_allocator_malloc function. So it is implementation depended, whether the
   358  * returned sstr_t.ptr pointer must be passed to the allocators
   359  * ucx_allocator_free function manually.
   360  * 
   361  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   362  * terminated.
   363  * 
   364  * @param allocator a valid instance of a UcxAllocator
   365  * @param string the string to duplicate
   366  * @return a duplicate of the string
   367  * @see sstrdup()
   368  */
   369 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
   371 /**
   372  * Omits leading and trailing spaces.
   373  * 
   374  * This function returns a new sstr_t containing a trimmed version of the
   375  * specified string.
   376  * 
   377  * <b>Note:</b> the new sstr_t references the same memory, thus you
   378  * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
   379  * <code>free()</code>. It is also highly recommended to avoid assignments like
   380  * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
   381  * source string. Assignments of this type are only permitted, if the
   382  * sstr_t.ptr of the source string does not need to be freed or if another
   383  * reference to the source string exists.
   384  * 
   385  * @param string the string that shall be trimmed
   386  * @return a new sstr_t containing the trimmed string
   387  */
   388 sstr_t sstrtrim(sstr_t string);
   390 /**
   391  * Checks, if a string has a specific prefix.
   392  * @param string the string to check
   393  * @param prefix the prefix the string should have
   394  * @return 1, if and only if the string has the specified prefix, 0 otherwise
   395  */
   396 int sstrprefix(sstr_t string, sstr_t prefix);
   398 /**
   399  * Checks, if a string has a specific suffix.
   400  * @param string the string to check
   401  * @param suffix the suffix the string should have
   402  * @return 1, if and only if the string has the specified suffix, 0 otherwise
   403  */
   404 int sstrsuffix(sstr_t string, sstr_t suffix);
   406 /**
   407  * Returns a lower case version of a string.
   408  * 
   409  * This function creates a duplicate of the input string, first. See the
   410  * documentation of sstrdup() for the implications.
   411  * 
   412  * @param string the input string
   413  * @return the resulting lower case string
   414  * @see sstrdup()
   415  */
   416 sstr_t sstrlower(sstr_t string);
   418 /**
   419  * Returns a lower case version of a string.
   420  * 
   421  * This function creates a duplicate of the input string, first. See the
   422  * documentation of sstrdup_a() for the implications.
   423  * 
   424  * @param allocator the allocator used for duplicating the string
   425  * @param string the input string
   426  * @return the resulting lower case string
   427  * @see sstrdup_a()
   428  */
   429 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
   431 /**
   432  * Returns a upper case version of a string.
   433  * 
   434  * This function creates a duplicate of the input string, first. See the
   435  * documentation of sstrdup() for the implications.
   436  * 
   437  * @param string the input string
   438  * @return the resulting upper case string
   439  * @see sstrdup()
   440  */
   441 sstr_t sstrupper(sstr_t string);
   443 /**
   444  * Returns a upper case version of a string.
   445  * 
   446  * This function creates a duplicate of the input string, first. See the
   447  * documentation of sstrdup_a() for the implications.
   448  * 
   449  * @param allocator the allocator used for duplicating the string
   450  * @param string the input string
   451  * @return the resulting upper case string
   452  * @see sstrdup_a()
   453  */
   454 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
   456 #ifdef	__cplusplus
   457 }
   458 #endif
   460 #endif	/* UCX_STRING_H */

mercurial