src/ucx/string.h

Mon, 14 May 2018 17:56:03 +0200

author
Mike Becker <universe@uap-core.de>
date
Mon, 14 May 2018 17:56:03 +0200
changeset 306
90b6d69bb499
parent 283
c3b6ff227481
parent 300
d1f814633049
child 315
5b97de37aada
permissions
-rw-r--r--

merges constsstr branch

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    28 /**
    29  * Bounded string implementation.
    30  * 
    31  * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
    32  * The main difference to C strings is, that <code>sstr_t</code> does <b>not
    33  * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
    34  * within the structure.
    35  * 
    36  * When using <code>sstr_t</code>, developers must be full aware of what type
    37  * of string (<code>NULL</code>-terminated) or not) they are using, when 
    38  * accessing the <code>char* ptr</code> directly.
    39  * 
    40  * The UCX string module provides some common string functions, known from
    41  * standard libc, working with <code>sstr_t</code>.
    42  * 
    43  * @file   string.h
    44  * @author Mike Becker
    45  * @author Olaf Wintermann
    46  */
    48 #ifndef UCX_STRING_H
    49 #define	UCX_STRING_H
    51 #include "ucx.h"
    52 #include "allocator.h"
    53 #include <stddef.h>
    55 /** Shortcut for a <code>sstr_t struct</code> literal. */
    56 #define ST(s) { (char*)s, sizeof(s)-1 }
    58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
    59 #define S(s) sstrn((char*)s, sizeof(s)-1)
    61 /** Expands a sstr_t to printf arguments. */
    62 #define SFMT(s) (int) (s).length, (s).ptr
    64 /** Format specifier for a sstr_t. */
    65 #define PRIsstr ".*s"
    67 #ifdef	__cplusplus
    68 extern "C" {
    69 #endif
    70 /**
    71  * The UCX string structure.
    72  */
    73 typedef struct {
    74    /** A reference to the string (<b>not necessarily  <code>NULL</code>
    75     * -terminated</b>) */
    76     char   *ptr;
    77     /** The length of the string */
    78     size_t length;
    79 } sstr_t;
    81 typedef struct {
    82     const char *ptr;
    83     size_t     length;
    84 } scstr_t;
    86 #ifdef	__cplusplus
    87 }
    88 #endif
    91 #ifdef __cplusplus
    92 inline scstr_t s2scstr(sstr_t s) {
    93     scstr_t c;
    94     c.ptr = s.ptr;
    95     c.length = s.ptr;
    96     return c;
    97 }
    98 inline scstr_t s2scstr(scstr_t c) {
    99     return c;
   100 }
   101 #define SCSTR s2scstr
   102 #else
   104 scstr_t ucx_sc2sc(scstr_t c);
   105 scstr_t ucx_ss2sc(sstr_t str);
   106 #if __STDC_VERSION__ >= 201112L
   107 #define SCSTR(str) _Generic(str, sstr_t: ucx_ss2sc, scstr_t: ucx_sc2sc)(str)
   108 #elif defined(__GNUC__) || defined(__clang__)
   109 #define SCSTR(str) __builtin_choose_expr( \
   110         __builtin_types_compatible_p(typeof(str), sstr_t), \
   111         ucx_ss2sc, \
   112         ucx_sc2sc)(str)
   113 #elif defined(__sun)
   114 #define SCSTR(str) ({typeof(str) ucx_tmp_var_str = str; \
   115 	scstr_t ucx_tmp_var_c; \
   116 	ucx_tmp_var_c.ptr = ucx_tmp_var_str.ptr;\
   117 	ucx_tmp_var_c.length = ucx_tmp_var_str.length;\
   118 	ucx_tmp_var_c; })
   119 #else
   120 scstr_t ucx_ss2c_s();
   121 #define SCSTR ucx_ss2c_s
   122 #endif /* C11 feature test */
   124 #endif /* C++ */
   126 #ifdef	__cplusplus
   127 extern "C" {
   128 #endif
   131 /**
   132  * Creates a new sstr_t based on a C string.
   133  * 
   134  * The length is implicitly inferred by using a call to <code>strlen()</code>.
   135  *
   136  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
   137  * do want a copy, use sstrdup() on the return value of this function.
   138  * 
   139  * @param cstring the C string to wrap
   140  * @return a new sstr_t containing the C string
   141  * 
   142  * @see sstrn()
   143  */
   144 sstr_t sstr(char *cstring);
   146 /**
   147  * Creates a new sstr_t of the specified length based on a C string.
   148  *
   149  * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
   150  * do want a copy, use sstrdup() on the return value of this function.
   151  * 
   152  * @param cstring  the C string to wrap
   153  * @param length   the length of the string
   154  * @return a new sstr_t containing the C string
   155  * 
   156  * @see sstr()
   157  * @see S()
   158  */
   159 sstr_t sstrn(char *cstring, size_t length);
   162 scstr_t scstr(const char *cstring);
   163 scstr_t scstrn(const char *cstring, size_t length);
   165 /**
   166  * Returns the cumulated length of all specified strings.
   167  *
   168  * At least one string must be specified.
   169  * 
   170  * <b>Attention:</b> if the count argument does not match the count of the
   171  * specified strings, the behavior is undefined.
   172  *
   173  * @param count    the total number of specified strings (so at least 1)
   174  * @param string   the first string
   175  * @param ...      all other strings
   176  * @return the cumulated length of all strings
   177  */
   178 size_t ucx_strnlen(size_t count, ...);
   180 #define sstrnlen(count, ...) ucx_strnlen(count, __VA_ARGS__)
   182 /**
   183  * Concatenates two or more strings.
   184  * 
   185  * The resulting string will be allocated by standard <code>malloc()</code>. 
   186  * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
   187  * 
   188  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   189  * terminated.
   190  *
   191  * @param count   the total number of strings to concatenate
   192  * @param s1      first string
   193  * @param ...     all remaining strings
   194  * @return the concatenated string
   195  */
   196 sstr_t ucx_strcat(size_t count, scstr_t s1, ...);
   198 #define sstrcat(count, s1, ...) ucx_strcat(count, SCSTR(s1), __VA_ARGS__)
   200 /**
   201  * Concatenates two or more strings using a UcxAllocator.
   202  * 
   203  * See sstrcat() for details.
   204  *
   205  * @param a       the allocator to use
   206  * @param count   the total number of strings to concatenate
   207  * @param s1      first string
   208  * @param ...     all remaining strings
   209  * @return the concatenated string
   210  */
   211 sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...);
   213 #define sstrcat_a(count, s1, ...) ucx_strcat_a(count, SCSTR(s1), __VA_ARGS__)
   215 /**
   216  * Returns a substring starting at the specified location.
   217  * 
   218  * <b>Attention:</b> the new string references the same memory area as the
   219  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   220  * Use sstrdup() to get a copy.
   221  * 
   222  * @param string input string
   223  * @param start  start location of the substring
   224  * @return a substring of <code>string</code> starting at <code>start</code>
   225  * 
   226  * @see sstrsubsl()
   227  * @see sstrchr()
   228  */
   229 sstr_t sstrsubs(sstr_t string, size_t start);
   231 /**
   232  * Returns a substring with a maximum length starting at the specified location.
   233  * 
   234  * <b>Attention:</b> the new string references the same memory area as the
   235  * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   236  * Use sstrdup() to get a copy.
   237  * 
   238  * @param string input string
   239  * @param start  start location of the substring
   240  * @param length the maximum length of the substring
   241  * @return a substring of <code>string</code> starting at <code>start</code>
   242  * with a maximum length of <code>length</code>
   243  * 
   244  * @see sstrsubs()
   245  * @see sstrchr()
   246  */
   247 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
   249 scstr_t scstrsubs(scstr_t s, size_t start);
   250 scstr_t scstrsubsl(scstr_t string, size_t start, size_t length);
   253 int ucx_strchr(const char *string, size_t length, int chr, size_t *pos);
   254 int ucx_strrchr(const char *string, size_t length, int chr, size_t *pos);
   256 /**
   257  * Returns a substring starting at the location of the first occurrence of the
   258  * specified character.
   259  * 
   260  * If the string does not contain the character, an empty string is returned.
   261  * 
   262  * @param string the string where to locate the character
   263  * @param chr    the character to locate
   264  * @return       a substring starting at the first location of <code>chr</code>
   265  * 
   266  * @see sstrsubs()
   267  */
   268 sstr_t sstrchr(sstr_t string, int chr);
   270 /**
   271  * Returns a substring starting at the location of the last occurrence of the
   272  * specified character.
   273  * 
   274  * If the string does not contain the character, an empty string is returned.
   275  * 
   276  * @param string the string where to locate the character
   277  * @param chr    the character to locate
   278  * @return       a substring starting at the last location of <code>chr</code>
   279  * 
   280  * @see sstrsubs()
   281  */
   282 sstr_t sstrrchr(sstr_t string, int chr);
   285 scstr_t scstrchr(scstr_t string, int chr);
   286 scstr_t scstrrchr(scstr_t string, int chr);
   288 const char* ucx_strstr(
   289         const char *str,
   290         size_t length,
   291         const char *match,
   292         size_t matchlen,
   293         size_t *newlen);
   295 /**
   296  * Returns a substring starting at the location of the first occurrence of the
   297  * specified string.
   298  * 
   299  * If the string does not contain the other string, an empty string is returned.
   300  * 
   301  * If <code>match</code> is an empty string, the complete <code>string</code> is
   302  * returned.
   303  * 
   304  * @param string the string to be scanned
   305  * @param match  string containing the sequence of characters to match
   306  * @return       a substring starting at the first occurrence of
   307  *               <code>match</code>, or an empty string, if the sequence is not
   308  *               present in <code>string</code>
   309  */
   310 sstr_t ucx_sstrstr(sstr_t string, scstr_t match);
   311 #define sstrstr(string, match) ucx_sstrstr(string, SCSTR(match))
   313 scstr_t ucx_scstrstr(scstr_t string, scstr_t match);
   314 #define scstrstr(string, match) ucx_scstrstr(string, SCSTR(match))
   316 /**
   317  * Splits a string into parts by using a delimiter string.
   318  * 
   319  * This function will return <code>NULL</code>, if one of the following happens:
   320  * <ul>
   321  *   <li>the string length is zero</li>
   322  *   <li>the delimeter length is zero</li>
   323  *   <li>the string equals the delimeter</li>
   324  *   <li>memory allocation fails</li>
   325  * </ul>
   326  * 
   327  * The integer referenced by <code>count</code> is used as input and determines
   328  * the maximum size of the resulting array, i.e. the maximum count of splits to
   329  * perform + 1.
   330  * 
   331  * The integer referenced by <code>count</code> is also used as output and is
   332  * set to
   333  * <ul>
   334  *   <li>-2, on memory allocation errors</li>
   335  *   <li>-1, if either the string or the delimiter is an empty string</li>
   336  *   <li>0, if the string equals the delimiter</li>
   337  *   <li>1, if the string does not contain the delimiter</li>
   338  *   <li>the count of array items, otherwise</li>
   339  * </ul>
   340  * 
   341  * If the string starts with the delimiter, the first item of the resulting
   342  * array will be an empty string.
   343  * 
   344  * If the string ends with the delimiter and the maximum list size is not
   345  * exceeded, the last array item will be an empty string.
   346  * In case the list size would be exceeded, the last array item will be the
   347  * remaining string after the last split, <i>including</i> the terminating
   348  * delimiter.
   349  * 
   350  * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
   351  * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
   352  * an allocator to managed memory, to avoid this.
   353  *
   354  * @param string the string to split
   355  * @param delim  the delimiter string
   356  * @param count  IN: the maximum size of the resulting array (0 = no limit),
   357  *               OUT: the actual size of the array
   358  * @return a sstr_t array containing the split strings or
   359  *         <code>NULL</code> on error
   360  * 
   361  * @see sstrsplit_a()
   362  */
   363 sstr_t* ucx_strsplit(scstr_t string, scstr_t delim, ssize_t *count);
   365 #define sstrsplit(s, delim, count) ucx_strsplit(SCSTR(s), SCSTR(delim), count)
   367 /**
   368  * Performing sstrsplit() using a UcxAllocator.
   369  * 
   370  * <i>Read the description of sstrsplit() for details.</i>
   371  * 
   372  * The memory for the sstr_t.ptr pointers of the array items and the memory for
   373  * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
   374  * function.
   375  * 
   376  * <b>Note:</b> the allocator is not used for memory that is freed within the
   377  * same call of this function (locally scoped variables).
   378  * 
   379  * @param allocator the UcxAllocator used for allocating memory
   380  * @param string the string to split
   381  * @param delim  the delimiter string
   382  * @param count  IN: the maximum size of the resulting array (0 = no limit),
   383  *               OUT: the actual size of the array
   384  * @return a sstr_t array containing the split strings or
   385  *         <code>NULL</code> on error
   386  * 
   387  * @see sstrsplit()
   388  */
   389 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t string, scstr_t delim,
   390         ssize_t *count);
   392 #define sstrsplit_a(a, s, d, c) ucx_strsplit_a(a, SCSTR(s), SCSTR(d, c))
   394 /**
   395  * Compares two UCX strings with standard <code>memcmp()</code>.
   396  * 
   397  * At first it compares the sstr_t.length attribute of the two strings. The
   398  * <code>memcmp()</code> function is called, if and only if the lengths match.
   399  * 
   400  * @param s1 the first string
   401  * @param s2 the second string
   402  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   403  * length of s1 is greater than the length of s2 or the result of
   404  * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
   405  */
   406 int ucx_str_cmp(scstr_t s1, scstr_t s2);
   408 #define sstrcmp(s1, s2) ucx_str_cmp(SCSTR(s1), SCSTR(s2))
   410 /**
   411  * Compares two UCX strings ignoring the case.
   412  * 
   413  * At first it compares the sstr_t.length attribute of the two strings. If and
   414  * only if the lengths match, both strings are compared char by char ignoring
   415  * the case.
   416  * 
   417  * @param s1 the first string
   418  * @param s2 the second string
   419  * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   420  * length of s1 is greater than the length of s2 or the difference between the
   421  * first two differing characters otherwise (i.e. 0 if the strings match and
   422  * no characters differ)
   423  */
   424 int ucx_str_casecmp(scstr_t s1, scstr_t s2);
   426 #define sstrcasecmp(s1, s2) ucx_str_casecmp(SCSTR(s1), SCSTR(s2))
   428 /**
   429  * Creates a duplicate of the specified string.
   430  * 
   431  * The new sstr_t will contain a copy allocated by standard
   432  * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
   433  * <code>free()</code>.
   434  * 
   435  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   436  * terminated.
   437  * 
   438  * @param string the string to duplicate
   439  * @return a duplicate of the string
   440  * @see sstrdup_a()
   441  */
   442 sstr_t scstrdup(scstr_t string);
   444 #define sstrdup(s) scstrdup(SCSTR(s))
   446 /**
   447  * Creates a duplicate of the specified string using a UcxAllocator.
   448  * 
   449  * The new sstr_t will contain a copy allocated by the allocators
   450  * ucx_allocator_malloc function. So it is implementation depended, whether the
   451  * returned sstr_t.ptr pointer must be passed to the allocators
   452  * ucx_allocator_free function manually.
   453  * 
   454  * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   455  * terminated.
   456  * 
   457  * @param allocator a valid instance of a UcxAllocator
   458  * @param string the string to duplicate
   459  * @return a duplicate of the string
   460  * @see sstrdup()
   461  */
   462 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t string);
   464 #define sstrdup_a(allocator, s) scstrdup_a(allocator, SCSTR(s))
   467 size_t ucx_strtrim(const char *str, size_t length, size_t *newlen);
   469 /**
   470  * Omits leading and trailing spaces.
   471  * 
   472  * This function returns a new sstr_t containing a trimmed version of the
   473  * specified string.
   474  * 
   475  * <b>Note:</b> the new sstr_t references the same memory, thus you
   476  * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
   477  * <code>free()</code>. It is also highly recommended to avoid assignments like
   478  * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
   479  * source string. Assignments of this type are only permitted, if the
   480  * sstr_t.ptr of the source string does not need to be freed or if another
   481  * reference to the source string exists.
   482  * 
   483  * @param string the string that shall be trimmed
   484  * @return a new sstr_t containing the trimmed string
   485  */
   486 sstr_t sstrtrim(sstr_t string);
   488 scstr_t scstrtrim(scstr_t string);
   490 /**
   491  * Checks, if a string has a specific prefix.
   492  * @param string the string to check
   493  * @param prefix the prefix the string should have
   494  * @return 1, if and only if the string has the specified prefix, 0 otherwise
   495  */
   496 int ucx_strprefix(scstr_t string, scstr_t prefix);
   498 #define sstrprefix(string, prefix) ucx_strprefix(SCSTR(string), SCSTR(prefix))
   500 /**
   501  * Checks, if a string has a specific suffix.
   502  * @param string the string to check
   503  * @param suffix the suffix the string should have
   504  * @return 1, if and only if the string has the specified suffix, 0 otherwise
   505  */
   506 int ucx_strsuffix(scstr_t string, scstr_t suffix);
   508 #define sstrsuffix(string, prefix) ucx_strsuffix(SCSTR(string), SCSTR(prefix))
   510 /**
   511  * Returns a lower case version of a string.
   512  * 
   513  * This function creates a duplicate of the input string, first. See the
   514  * documentation of sstrdup() for the implications.
   515  * 
   516  * @param string the input string
   517  * @return the resulting lower case string
   518  * @see sstrdup()
   519  */
   520 sstr_t ucx_strlower(scstr_t string);
   522 #define sstrlower(string) ucx_strlower(SCSTR(string))
   524 /**
   525  * Returns a lower case version of a string.
   526  * 
   527  * This function creates a duplicate of the input string, first. See the
   528  * documentation of sstrdup_a() for the implications.
   529  * 
   530  * @param allocator the allocator used for duplicating the string
   531  * @param string the input string
   532  * @return the resulting lower case string
   533  * @see sstrdup_a()
   534  */
   535 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string);
   537 #define sstrlower_a(allocator, string) ucx_strlower_a(allocator, SCSTR(string))
   539 /**
   540  * Returns a upper case version of a string.
   541  * 
   542  * This function creates a duplicate of the input string, first. See the
   543  * documentation of sstrdup() for the implications.
   544  * 
   545  * @param string the input string
   546  * @return the resulting upper case string
   547  * @see sstrdup()
   548  */
   549 sstr_t ucx_strupper(scstr_t string);
   551 #define sstrupper(string) ucx_strupper(SCSTR(string))
   553 /**
   554  * Returns a upper case version of a string.
   555  * 
   556  * This function creates a duplicate of the input string, first. See the
   557  * documentation of sstrdup_a() for the implications.
   558  * 
   559  * @param allocator the allocator used for duplicating the string
   560  * @param string the input string
   561  * @return the resulting upper case string
   562  * @see sstrdup_a()
   563  */
   564 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string);
   566 #define sstrupper_a(allocator, string) ucx_strupper_a(allocator, string)
   568 #ifdef	__cplusplus
   569 }
   570 #endif
   572 #endif	/* UCX_STRING_H */

mercurial