src/ucx/string.h

changeset 251
fae240d633fc
parent 250
b7d1317b138e
child 259
2f5dea574a75
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/ucx/string.h	Tue Oct 17 16:15:41 2017 +0200
     1.3 @@ -0,0 +1,460 @@
     1.4 +/*
     1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     1.6 + *
     1.7 + * Copyright 2017 Olaf Wintermann. All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions are met:
    1.11 + *
    1.12 + *   1. Redistributions of source code must retain the above copyright
    1.13 + *      notice, this list of conditions and the following disclaimer.
    1.14 + *
    1.15 + *   2. Redistributions in binary form must reproduce the above copyright
    1.16 + *      notice, this list of conditions and the following disclaimer in the
    1.17 + *      documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.29 + * POSSIBILITY OF SUCH DAMAGE.
    1.30 + */
    1.31 +/**
    1.32 + * Bounded string implementation.
    1.33 + * 
    1.34 + * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
    1.35 + * The main difference to C strings is, that <code>sstr_t</code> does <b>not
    1.36 + * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
    1.37 + * within the structure.
    1.38 + * 
    1.39 + * When using <code>sstr_t</code>, developers must be full aware of what type
    1.40 + * of string (<code>NULL</code>-terminated) or not) they are using, when 
    1.41 + * accessing the <code>char* ptr</code> directly.
    1.42 + * 
    1.43 + * The UCX string module provides some common string functions, known from
    1.44 + * standard libc, working with <code>sstr_t</code>.
    1.45 + * 
    1.46 + * @file   string.h
    1.47 + * @author Mike Becker
    1.48 + * @author Olaf Wintermann
    1.49 + */
    1.50 +
    1.51 +#ifndef UCX_STRING_H
    1.52 +#define	UCX_STRING_H
    1.53 +
    1.54 +#include <ucx/ucx.h>
    1.55 +#include <ucx/allocator.h>
    1.56 +#include <stddef.h>
    1.57 +
    1.58 +/** Shortcut for a <code>sstr_t struct</code> literal. */
    1.59 +#define ST(s) { (char*)s, sizeof(s)-1 }
    1.60 +
    1.61 +/** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
    1.62 +#define S(s) sstrn((char*)s, sizeof(s)-1)
    1.63 +
    1.64 +#ifdef	__cplusplus
    1.65 +extern "C" {
    1.66 +#endif
    1.67 +
    1.68 +/**
    1.69 + * The UCX string structure.
    1.70 + */
    1.71 +typedef struct {
    1.72 +   /** A reference to the string (<b>not necessarily  <code>NULL</code>
    1.73 +    * -terminated</b>) */
    1.74 +    char   *ptr;
    1.75 +    /** The length of the string */
    1.76 +    size_t length;
    1.77 +} sstr_t;
    1.78 +
    1.79 +/**
    1.80 + * Creates a new sstr_t based on a C string.
    1.81 + * 
    1.82 + * The length is implicitly inferred by using a call to <code>strlen()</code>.
    1.83 + *
    1.84 + * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
    1.85 + * do want a copy, use sstrdup() on the return value of this function.
    1.86 + * 
    1.87 + * @param cstring the C string to wrap
    1.88 + * @return a new sstr_t containing the C string
    1.89 + * 
    1.90 + * @see sstrn()
    1.91 + */
    1.92 +sstr_t sstr(char *cstring);
    1.93 +
    1.94 +/**
    1.95 + * Creates a new sstr_t of the specified length based on a C string.
    1.96 + *
    1.97 + * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
    1.98 + * do want a copy, use sstrdup() on the return value of this function.
    1.99 + * 
   1.100 + * @param cstring  the C string to wrap
   1.101 + * @param length   the length of the string
   1.102 + * @return a new sstr_t containing the C string
   1.103 + * 
   1.104 + * @see sstr()
   1.105 + * @see S()
   1.106 + */
   1.107 +sstr_t sstrn(char *cstring, size_t length);
   1.108 +
   1.109 +
   1.110 +/**
   1.111 + * Returns the cumulated length of all specified strings.
   1.112 + *
   1.113 + * At least one string must be specified.
   1.114 + * 
   1.115 + * <b>Attention:</b> if the count argument does not match the count of the
   1.116 + * specified strings, the behavior is undefined.
   1.117 + *
   1.118 + * @param count    the total number of specified strings (so at least 1)
   1.119 + * @param string   the first string
   1.120 + * @param ...      all other strings
   1.121 + * @return the cumulated length of all strings
   1.122 + */
   1.123 +size_t sstrnlen(size_t count, sstr_t string, ...);
   1.124 +
   1.125 +/**
   1.126 + * Concatenates two or more strings.
   1.127 + * 
   1.128 + * The resulting string will be allocated by standard <code>malloc()</code>. 
   1.129 + * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
   1.130 + * 
   1.131 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   1.132 + * terminated.
   1.133 + *
   1.134 + * @param count   the total number of strings to concatenate
   1.135 + * @param s1      first string
   1.136 + * @param s2      second string
   1.137 + * @param ...     all remaining strings
   1.138 + * @return the concatenated string
   1.139 + */
   1.140 +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
   1.141 +
   1.142 +/**
   1.143 + * Concatenates two or more strings using a UcxAllocator.
   1.144 + * 
   1.145 + * See sstrcat() for details.
   1.146 + *
   1.147 + * @param a       the allocator to use
   1.148 + * @param count   the total number of strings to concatenate
   1.149 + * @param s1      first string
   1.150 + * @param s2      second string
   1.151 + * @param ...     all remaining strings
   1.152 + * @return the concatenated string
   1.153 + */
   1.154 +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
   1.155 +
   1.156 +
   1.157 +/**
   1.158 + * Returns a substring starting at the specified location.
   1.159 + * 
   1.160 + * <b>Attention:</b> the new string references the same memory area as the
   1.161 + * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   1.162 + * Use sstrdup() to get a copy.
   1.163 + * 
   1.164 + * @param string input string
   1.165 + * @param start  start location of the substring
   1.166 + * @return a substring of <code>string</code> starting at <code>start</code>
   1.167 + * 
   1.168 + * @see sstrsubsl()
   1.169 + * @see sstrchr()
   1.170 + */
   1.171 +sstr_t sstrsubs(sstr_t string, size_t start);
   1.172 +
   1.173 +/**
   1.174 + * Returns a substring with a maximum length starting at the specified location.
   1.175 + * 
   1.176 + * <b>Attention:</b> the new string references the same memory area as the
   1.177 + * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
   1.178 + * Use sstrdup() to get a copy.
   1.179 + * 
   1.180 + * @param string input string
   1.181 + * @param start  start location of the substring
   1.182 + * @param length the maximum length of the substring
   1.183 + * @return a substring of <code>string</code> starting at <code>start</code>
   1.184 + * with a maximum length of <code>length</code>
   1.185 + * 
   1.186 + * @see sstrsubs()
   1.187 + * @see sstrchr()
   1.188 + */
   1.189 +sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
   1.190 +
   1.191 +/**
   1.192 + * Returns a substring starting at the location of the first occurrence of the
   1.193 + * specified character.
   1.194 + * 
   1.195 + * If the string does not contain the character, an empty string is returned.
   1.196 + * 
   1.197 + * @param string the string where to locate the character
   1.198 + * @param chr    the character to locate
   1.199 + * @return       a substring starting at the first location of <code>chr</code>
   1.200 + * 
   1.201 + * @see sstrsubs()
   1.202 + */
   1.203 +sstr_t sstrchr(sstr_t string, int chr);
   1.204 +
   1.205 +/**
   1.206 + * Returns a substring starting at the location of the last occurrence of the
   1.207 + * specified character.
   1.208 + * 
   1.209 + * If the string does not contain the character, an empty string is returned.
   1.210 + * 
   1.211 + * @param string the string where to locate the character
   1.212 + * @param chr    the character to locate
   1.213 + * @return       a substring starting at the last location of <code>chr</code>
   1.214 + * 
   1.215 + * @see sstrsubs()
   1.216 + */
   1.217 +sstr_t sstrrchr(sstr_t string, int chr);
   1.218 +
   1.219 +/**
   1.220 + * Returns a substring starting at the location of the first occurrence of the
   1.221 + * specified string.
   1.222 + * 
   1.223 + * If the string does not contain the other string, an empty string is returned.
   1.224 + * 
   1.225 + * If <code>match</code> is an empty string, the complete <code>string</code> is
   1.226 + * returned.
   1.227 + * 
   1.228 + * @param string the string to be scanned
   1.229 + * @param match  string containing the sequence of characters to match
   1.230 + * @return       a substring starting at the first occurrence of
   1.231 + *               <code>match</code>, or an empty string, if the sequence is not
   1.232 + *               present in <code>string</code>
   1.233 + */
   1.234 +sstr_t sstrstr(sstr_t string, sstr_t match);
   1.235 +
   1.236 +/**
   1.237 + * Splits a string into parts by using a delimiter string.
   1.238 + * 
   1.239 + * This function will return <code>NULL</code>, if one of the following happens:
   1.240 + * <ul>
   1.241 + *   <li>the string length is zero</li>
   1.242 + *   <li>the delimeter length is zero</li>
   1.243 + *   <li>the string equals the delimeter</li>
   1.244 + *   <li>memory allocation fails</li>
   1.245 + * </ul>
   1.246 + * 
   1.247 + * The integer referenced by <code>count</code> is used as input and determines
   1.248 + * the maximum size of the resulting array, i.e. the maximum count of splits to
   1.249 + * perform + 1.
   1.250 + * 
   1.251 + * The integer referenced by <code>count</code> is also used as output and is
   1.252 + * set to
   1.253 + * <ul>
   1.254 + *   <li>-2, on memory allocation errors</li>
   1.255 + *   <li>-1, if either the string or the delimiter is an empty string</li>
   1.256 + *   <li>0, if the string equals the delimiter</li>
   1.257 + *   <li>1, if the string does not contain the delimiter</li>
   1.258 + *   <li>the count of array items, otherwise</li>
   1.259 + * </ul>
   1.260 + * 
   1.261 + * If the string starts with the delimiter, the first item of the resulting
   1.262 + * array will be an empty string.
   1.263 + * 
   1.264 + * If the string ends with the delimiter and the maximum list size is not
   1.265 + * exceeded, the last array item will be an empty string.
   1.266 + * In case the list size would be exceeded, the last array item will be the
   1.267 + * remaining string after the last split, <i>including</i> the terminating
   1.268 + * delimiter.
   1.269 + * 
   1.270 + * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
   1.271 + * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
   1.272 + * an allocator to managed memory, to avoid this.
   1.273 + *
   1.274 + * @param string the string to split
   1.275 + * @param delim  the delimiter string
   1.276 + * @param count  IN: the maximum size of the resulting array (0 = no limit),
   1.277 + *               OUT: the actual size of the array
   1.278 + * @return a sstr_t array containing the split strings or
   1.279 + *         <code>NULL</code> on error
   1.280 + * 
   1.281 + * @see sstrsplit_a()
   1.282 + */
   1.283 +sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
   1.284 +
   1.285 +/**
   1.286 + * Performing sstrsplit() using a UcxAllocator.
   1.287 + * 
   1.288 + * <i>Read the description of sstrsplit() for details.</i>
   1.289 + * 
   1.290 + * The memory for the sstr_t.ptr pointers of the array items and the memory for
   1.291 + * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
   1.292 + * function.
   1.293 + * 
   1.294 + * <b>Note:</b> the allocator is not used for memory that is freed within the
   1.295 + * same call of this function (locally scoped variables).
   1.296 + * 
   1.297 + * @param allocator the UcxAllocator used for allocating memory
   1.298 + * @param string the string to split
   1.299 + * @param delim  the delimiter string
   1.300 + * @param count  IN: the maximum size of the resulting array (0 = no limit),
   1.301 + *               OUT: the actual size of the array
   1.302 + * @return a sstr_t array containing the split strings or
   1.303 + *         <code>NULL</code> on error
   1.304 + * 
   1.305 + * @see sstrsplit()
   1.306 + */
   1.307 +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
   1.308 +        ssize_t *count);
   1.309 +
   1.310 +/**
   1.311 + * Compares two UCX strings with standard <code>memcmp()</code>.
   1.312 + * 
   1.313 + * At first it compares the sstr_t.length attribute of the two strings. The
   1.314 + * <code>memcmp()</code> function is called, if and only if the lengths match.
   1.315 + * 
   1.316 + * @param s1 the first string
   1.317 + * @param s2 the second string
   1.318 + * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   1.319 + * length of s1 is greater than the length of s2 or the result of
   1.320 + * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
   1.321 + */
   1.322 +int sstrcmp(sstr_t s1, sstr_t s2);
   1.323 +
   1.324 +/**
   1.325 + * Compares two UCX strings ignoring the case.
   1.326 + * 
   1.327 + * At first it compares the sstr_t.length attribute of the two strings. If and
   1.328 + * only if the lengths match, both strings are compared char by char ignoring
   1.329 + * the case.
   1.330 + * 
   1.331 + * @param s1 the first string
   1.332 + * @param s2 the second string
   1.333 + * @return -1, if the length of s1 is less than the length of s2 or 1, if the 
   1.334 + * length of s1 is greater than the length of s2 or the difference between the
   1.335 + * first two differing characters otherwise (i.e. 0 if the strings match and
   1.336 + * no characters differ)
   1.337 + */
   1.338 +int sstrcasecmp(sstr_t s1, sstr_t s2);
   1.339 +
   1.340 +/**
   1.341 + * Creates a duplicate of the specified string.
   1.342 + * 
   1.343 + * The new sstr_t will contain a copy allocated by standard
   1.344 + * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
   1.345 + * <code>free()</code>.
   1.346 + * 
   1.347 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   1.348 + * terminated.
   1.349 + * 
   1.350 + * @param string the string to duplicate
   1.351 + * @return a duplicate of the string
   1.352 + * @see sstrdup_a()
   1.353 + */
   1.354 +sstr_t sstrdup(sstr_t string);
   1.355 +
   1.356 +/**
   1.357 + * Creates a duplicate of the specified string using a UcxAllocator.
   1.358 + * 
   1.359 + * The new sstr_t will contain a copy allocated by the allocators
   1.360 + * ucx_allocator_malloc function. So it is implementation depended, whether the
   1.361 + * returned sstr_t.ptr pointer must be passed to the allocators
   1.362 + * ucx_allocator_free function manually.
   1.363 + * 
   1.364 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
   1.365 + * terminated.
   1.366 + * 
   1.367 + * @param allocator a valid instance of a UcxAllocator
   1.368 + * @param string the string to duplicate
   1.369 + * @return a duplicate of the string
   1.370 + * @see sstrdup()
   1.371 + */
   1.372 +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
   1.373 +
   1.374 +/**
   1.375 + * Omits leading and trailing spaces.
   1.376 + * 
   1.377 + * This function returns a new sstr_t containing a trimmed version of the
   1.378 + * specified string.
   1.379 + * 
   1.380 + * <b>Note:</b> the new sstr_t references the same memory, thus you
   1.381 + * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
   1.382 + * <code>free()</code>. It is also highly recommended to avoid assignments like
   1.383 + * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
   1.384 + * source string. Assignments of this type are only permitted, if the
   1.385 + * sstr_t.ptr of the source string does not need to be freed or if another
   1.386 + * reference to the source string exists.
   1.387 + * 
   1.388 + * @param string the string that shall be trimmed
   1.389 + * @return a new sstr_t containing the trimmed string
   1.390 + */
   1.391 +sstr_t sstrtrim(sstr_t string);
   1.392 +
   1.393 +/**
   1.394 + * Checks, if a string has a specific prefix.
   1.395 + * @param string the string to check
   1.396 + * @param prefix the prefix the string should have
   1.397 + * @return 1, if and only if the string has the specified prefix, 0 otherwise
   1.398 + */
   1.399 +int sstrprefix(sstr_t string, sstr_t prefix);
   1.400 +
   1.401 +/**
   1.402 + * Checks, if a string has a specific suffix.
   1.403 + * @param string the string to check
   1.404 + * @param suffix the suffix the string should have
   1.405 + * @return 1, if and only if the string has the specified suffix, 0 otherwise
   1.406 + */
   1.407 +int sstrsuffix(sstr_t string, sstr_t suffix);
   1.408 +
   1.409 +/**
   1.410 + * Returns a lower case version of a string.
   1.411 + * 
   1.412 + * This function creates a duplicate of the input string, first. See the
   1.413 + * documentation of sstrdup() for the implications.
   1.414 + * 
   1.415 + * @param string the input string
   1.416 + * @return the resulting lower case string
   1.417 + * @see sstrdup()
   1.418 + */
   1.419 +sstr_t sstrlower(sstr_t string);
   1.420 +
   1.421 +/**
   1.422 + * Returns a lower case version of a string.
   1.423 + * 
   1.424 + * This function creates a duplicate of the input string, first. See the
   1.425 + * documentation of sstrdup_a() for the implications.
   1.426 + * 
   1.427 + * @param allocator the allocator used for duplicating the string
   1.428 + * @param string the input string
   1.429 + * @return the resulting lower case string
   1.430 + * @see sstrdup_a()
   1.431 + */
   1.432 +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
   1.433 +
   1.434 +/**
   1.435 + * Returns a upper case version of a string.
   1.436 + * 
   1.437 + * This function creates a duplicate of the input string, first. See the
   1.438 + * documentation of sstrdup() for the implications.
   1.439 + * 
   1.440 + * @param string the input string
   1.441 + * @return the resulting upper case string
   1.442 + * @see sstrdup()
   1.443 + */
   1.444 +sstr_t sstrupper(sstr_t string);
   1.445 +
   1.446 +/**
   1.447 + * Returns a upper case version of a string.
   1.448 + * 
   1.449 + * This function creates a duplicate of the input string, first. See the
   1.450 + * documentation of sstrdup_a() for the implications.
   1.451 + * 
   1.452 + * @param allocator the allocator used for duplicating the string
   1.453 + * @param string the input string
   1.454 + * @return the resulting upper case string
   1.455 + * @see sstrdup_a()
   1.456 + */
   1.457 +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
   1.458 +
   1.459 +#ifdef	__cplusplus
   1.460 +}
   1.461 +#endif
   1.462 +
   1.463 +#endif	/* UCX_STRING_H */

mercurial