1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/ucx/string.h Tue Oct 17 16:15:41 2017 +0200 1.3 @@ -0,0 +1,460 @@ 1.4 +/* 1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 1.6 + * 1.7 + * Copyright 2017 Olaf Wintermann. All rights reserved. 1.8 + * 1.9 + * Redistribution and use in source and binary forms, with or without 1.10 + * modification, are permitted provided that the following conditions are met: 1.11 + * 1.12 + * 1. Redistributions of source code must retain the above copyright 1.13 + * notice, this list of conditions and the following disclaimer. 1.14 + * 1.15 + * 2. Redistributions in binary form must reproduce the above copyright 1.16 + * notice, this list of conditions and the following disclaimer in the 1.17 + * documentation and/or other materials provided with the distribution. 1.18 + * 1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 1.29 + * POSSIBILITY OF SUCH DAMAGE. 1.30 + */ 1.31 +/** 1.32 + * Bounded string implementation. 1.33 + * 1.34 + * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings. 1.35 + * The main difference to C strings is, that <code>sstr_t</code> does <b>not 1.36 + * need to be <code>NULL</code>-terminated</b>. Instead the length is stored 1.37 + * within the structure. 1.38 + * 1.39 + * When using <code>sstr_t</code>, developers must be full aware of what type 1.40 + * of string (<code>NULL</code>-terminated) or not) they are using, when 1.41 + * accessing the <code>char* ptr</code> directly. 1.42 + * 1.43 + * The UCX string module provides some common string functions, known from 1.44 + * standard libc, working with <code>sstr_t</code>. 1.45 + * 1.46 + * @file string.h 1.47 + * @author Mike Becker 1.48 + * @author Olaf Wintermann 1.49 + */ 1.50 + 1.51 +#ifndef UCX_STRING_H 1.52 +#define UCX_STRING_H 1.53 + 1.54 +#include <ucx/ucx.h> 1.55 +#include <ucx/allocator.h> 1.56 +#include <stddef.h> 1.57 + 1.58 +/** Shortcut for a <code>sstr_t struct</code> literal. */ 1.59 +#define ST(s) { (char*)s, sizeof(s)-1 } 1.60 + 1.61 +/** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */ 1.62 +#define S(s) sstrn((char*)s, sizeof(s)-1) 1.63 + 1.64 +#ifdef __cplusplus 1.65 +extern "C" { 1.66 +#endif 1.67 + 1.68 +/** 1.69 + * The UCX string structure. 1.70 + */ 1.71 +typedef struct { 1.72 + /** A reference to the string (<b>not necessarily <code>NULL</code> 1.73 + * -terminated</b>) */ 1.74 + char *ptr; 1.75 + /** The length of the string */ 1.76 + size_t length; 1.77 +} sstr_t; 1.78 + 1.79 +/** 1.80 + * Creates a new sstr_t based on a C string. 1.81 + * 1.82 + * The length is implicitly inferred by using a call to <code>strlen()</code>. 1.83 + * 1.84 + * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you 1.85 + * do want a copy, use sstrdup() on the return value of this function. 1.86 + * 1.87 + * @param cstring the C string to wrap 1.88 + * @return a new sstr_t containing the C string 1.89 + * 1.90 + * @see sstrn() 1.91 + */ 1.92 +sstr_t sstr(char *cstring); 1.93 + 1.94 +/** 1.95 + * Creates a new sstr_t of the specified length based on a C string. 1.96 + * 1.97 + * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you 1.98 + * do want a copy, use sstrdup() on the return value of this function. 1.99 + * 1.100 + * @param cstring the C string to wrap 1.101 + * @param length the length of the string 1.102 + * @return a new sstr_t containing the C string 1.103 + * 1.104 + * @see sstr() 1.105 + * @see S() 1.106 + */ 1.107 +sstr_t sstrn(char *cstring, size_t length); 1.108 + 1.109 + 1.110 +/** 1.111 + * Returns the cumulated length of all specified strings. 1.112 + * 1.113 + * At least one string must be specified. 1.114 + * 1.115 + * <b>Attention:</b> if the count argument does not match the count of the 1.116 + * specified strings, the behavior is undefined. 1.117 + * 1.118 + * @param count the total number of specified strings (so at least 1) 1.119 + * @param string the first string 1.120 + * @param ... all other strings 1.121 + * @return the cumulated length of all strings 1.122 + */ 1.123 +size_t sstrnlen(size_t count, sstr_t string, ...); 1.124 + 1.125 +/** 1.126 + * Concatenates two or more strings. 1.127 + * 1.128 + * The resulting string will be allocated by standard <code>malloc()</code>. 1.129 + * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>. 1.130 + * 1.131 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- 1.132 + * terminated. 1.133 + * 1.134 + * @param count the total number of strings to concatenate 1.135 + * @param s1 first string 1.136 + * @param s2 second string 1.137 + * @param ... all remaining strings 1.138 + * @return the concatenated string 1.139 + */ 1.140 +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...); 1.141 + 1.142 +/** 1.143 + * Concatenates two or more strings using a UcxAllocator. 1.144 + * 1.145 + * See sstrcat() for details. 1.146 + * 1.147 + * @param a the allocator to use 1.148 + * @param count the total number of strings to concatenate 1.149 + * @param s1 first string 1.150 + * @param s2 second string 1.151 + * @param ... all remaining strings 1.152 + * @return the concatenated string 1.153 + */ 1.154 +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...); 1.155 + 1.156 + 1.157 +/** 1.158 + * Returns a substring starting at the specified location. 1.159 + * 1.160 + * <b>Attention:</b> the new string references the same memory area as the 1.161 + * input string and will <b>NOT</b> be <code>NULL</code>-terminated. 1.162 + * Use sstrdup() to get a copy. 1.163 + * 1.164 + * @param string input string 1.165 + * @param start start location of the substring 1.166 + * @return a substring of <code>string</code> starting at <code>start</code> 1.167 + * 1.168 + * @see sstrsubsl() 1.169 + * @see sstrchr() 1.170 + */ 1.171 +sstr_t sstrsubs(sstr_t string, size_t start); 1.172 + 1.173 +/** 1.174 + * Returns a substring with a maximum length starting at the specified location. 1.175 + * 1.176 + * <b>Attention:</b> the new string references the same memory area as the 1.177 + * input string and will <b>NOT</b> be <code>NULL</code>-terminated. 1.178 + * Use sstrdup() to get a copy. 1.179 + * 1.180 + * @param string input string 1.181 + * @param start start location of the substring 1.182 + * @param length the maximum length of the substring 1.183 + * @return a substring of <code>string</code> starting at <code>start</code> 1.184 + * with a maximum length of <code>length</code> 1.185 + * 1.186 + * @see sstrsubs() 1.187 + * @see sstrchr() 1.188 + */ 1.189 +sstr_t sstrsubsl(sstr_t string, size_t start, size_t length); 1.190 + 1.191 +/** 1.192 + * Returns a substring starting at the location of the first occurrence of the 1.193 + * specified character. 1.194 + * 1.195 + * If the string does not contain the character, an empty string is returned. 1.196 + * 1.197 + * @param string the string where to locate the character 1.198 + * @param chr the character to locate 1.199 + * @return a substring starting at the first location of <code>chr</code> 1.200 + * 1.201 + * @see sstrsubs() 1.202 + */ 1.203 +sstr_t sstrchr(sstr_t string, int chr); 1.204 + 1.205 +/** 1.206 + * Returns a substring starting at the location of the last occurrence of the 1.207 + * specified character. 1.208 + * 1.209 + * If the string does not contain the character, an empty string is returned. 1.210 + * 1.211 + * @param string the string where to locate the character 1.212 + * @param chr the character to locate 1.213 + * @return a substring starting at the last location of <code>chr</code> 1.214 + * 1.215 + * @see sstrsubs() 1.216 + */ 1.217 +sstr_t sstrrchr(sstr_t string, int chr); 1.218 + 1.219 +/** 1.220 + * Returns a substring starting at the location of the first occurrence of the 1.221 + * specified string. 1.222 + * 1.223 + * If the string does not contain the other string, an empty string is returned. 1.224 + * 1.225 + * If <code>match</code> is an empty string, the complete <code>string</code> is 1.226 + * returned. 1.227 + * 1.228 + * @param string the string to be scanned 1.229 + * @param match string containing the sequence of characters to match 1.230 + * @return a substring starting at the first occurrence of 1.231 + * <code>match</code>, or an empty string, if the sequence is not 1.232 + * present in <code>string</code> 1.233 + */ 1.234 +sstr_t sstrstr(sstr_t string, sstr_t match); 1.235 + 1.236 +/** 1.237 + * Splits a string into parts by using a delimiter string. 1.238 + * 1.239 + * This function will return <code>NULL</code>, if one of the following happens: 1.240 + * <ul> 1.241 + * <li>the string length is zero</li> 1.242 + * <li>the delimeter length is zero</li> 1.243 + * <li>the string equals the delimeter</li> 1.244 + * <li>memory allocation fails</li> 1.245 + * </ul> 1.246 + * 1.247 + * The integer referenced by <code>count</code> is used as input and determines 1.248 + * the maximum size of the resulting array, i.e. the maximum count of splits to 1.249 + * perform + 1. 1.250 + * 1.251 + * The integer referenced by <code>count</code> is also used as output and is 1.252 + * set to 1.253 + * <ul> 1.254 + * <li>-2, on memory allocation errors</li> 1.255 + * <li>-1, if either the string or the delimiter is an empty string</li> 1.256 + * <li>0, if the string equals the delimiter</li> 1.257 + * <li>1, if the string does not contain the delimiter</li> 1.258 + * <li>the count of array items, otherwise</li> 1.259 + * </ul> 1.260 + * 1.261 + * If the string starts with the delimiter, the first item of the resulting 1.262 + * array will be an empty string. 1.263 + * 1.264 + * If the string ends with the delimiter and the maximum list size is not 1.265 + * exceeded, the last array item will be an empty string. 1.266 + * In case the list size would be exceeded, the last array item will be the 1.267 + * remaining string after the last split, <i>including</i> the terminating 1.268 + * delimiter. 1.269 + * 1.270 + * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array 1.271 + * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with 1.272 + * an allocator to managed memory, to avoid this. 1.273 + * 1.274 + * @param string the string to split 1.275 + * @param delim the delimiter string 1.276 + * @param count IN: the maximum size of the resulting array (0 = no limit), 1.277 + * OUT: the actual size of the array 1.278 + * @return a sstr_t array containing the split strings or 1.279 + * <code>NULL</code> on error 1.280 + * 1.281 + * @see sstrsplit_a() 1.282 + */ 1.283 +sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count); 1.284 + 1.285 +/** 1.286 + * Performing sstrsplit() using a UcxAllocator. 1.287 + * 1.288 + * <i>Read the description of sstrsplit() for details.</i> 1.289 + * 1.290 + * The memory for the sstr_t.ptr pointers of the array items and the memory for 1.291 + * the sstr_t array itself are allocated by using the UcxAllocator.malloc() 1.292 + * function. 1.293 + * 1.294 + * <b>Note:</b> the allocator is not used for memory that is freed within the 1.295 + * same call of this function (locally scoped variables). 1.296 + * 1.297 + * @param allocator the UcxAllocator used for allocating memory 1.298 + * @param string the string to split 1.299 + * @param delim the delimiter string 1.300 + * @param count IN: the maximum size of the resulting array (0 = no limit), 1.301 + * OUT: the actual size of the array 1.302 + * @return a sstr_t array containing the split strings or 1.303 + * <code>NULL</code> on error 1.304 + * 1.305 + * @see sstrsplit() 1.306 + */ 1.307 +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim, 1.308 + ssize_t *count); 1.309 + 1.310 +/** 1.311 + * Compares two UCX strings with standard <code>memcmp()</code>. 1.312 + * 1.313 + * At first it compares the sstr_t.length attribute of the two strings. The 1.314 + * <code>memcmp()</code> function is called, if and only if the lengths match. 1.315 + * 1.316 + * @param s1 the first string 1.317 + * @param s2 the second string 1.318 + * @return -1, if the length of s1 is less than the length of s2 or 1, if the 1.319 + * length of s1 is greater than the length of s2 or the result of 1.320 + * <code>memcmp()</code> otherwise (i.e. 0 if the strings match) 1.321 + */ 1.322 +int sstrcmp(sstr_t s1, sstr_t s2); 1.323 + 1.324 +/** 1.325 + * Compares two UCX strings ignoring the case. 1.326 + * 1.327 + * At first it compares the sstr_t.length attribute of the two strings. If and 1.328 + * only if the lengths match, both strings are compared char by char ignoring 1.329 + * the case. 1.330 + * 1.331 + * @param s1 the first string 1.332 + * @param s2 the second string 1.333 + * @return -1, if the length of s1 is less than the length of s2 or 1, if the 1.334 + * length of s1 is greater than the length of s2 or the difference between the 1.335 + * first two differing characters otherwise (i.e. 0 if the strings match and 1.336 + * no characters differ) 1.337 + */ 1.338 +int sstrcasecmp(sstr_t s1, sstr_t s2); 1.339 + 1.340 +/** 1.341 + * Creates a duplicate of the specified string. 1.342 + * 1.343 + * The new sstr_t will contain a copy allocated by standard 1.344 + * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to 1.345 + * <code>free()</code>. 1.346 + * 1.347 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- 1.348 + * terminated. 1.349 + * 1.350 + * @param string the string to duplicate 1.351 + * @return a duplicate of the string 1.352 + * @see sstrdup_a() 1.353 + */ 1.354 +sstr_t sstrdup(sstr_t string); 1.355 + 1.356 +/** 1.357 + * Creates a duplicate of the specified string using a UcxAllocator. 1.358 + * 1.359 + * The new sstr_t will contain a copy allocated by the allocators 1.360 + * ucx_allocator_malloc function. So it is implementation depended, whether the 1.361 + * returned sstr_t.ptr pointer must be passed to the allocators 1.362 + * ucx_allocator_free function manually. 1.363 + * 1.364 + * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>- 1.365 + * terminated. 1.366 + * 1.367 + * @param allocator a valid instance of a UcxAllocator 1.368 + * @param string the string to duplicate 1.369 + * @return a duplicate of the string 1.370 + * @see sstrdup() 1.371 + */ 1.372 +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string); 1.373 + 1.374 +/** 1.375 + * Omits leading and trailing spaces. 1.376 + * 1.377 + * This function returns a new sstr_t containing a trimmed version of the 1.378 + * specified string. 1.379 + * 1.380 + * <b>Note:</b> the new sstr_t references the same memory, thus you 1.381 + * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to 1.382 + * <code>free()</code>. It is also highly recommended to avoid assignments like 1.383 + * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the 1.384 + * source string. Assignments of this type are only permitted, if the 1.385 + * sstr_t.ptr of the source string does not need to be freed or if another 1.386 + * reference to the source string exists. 1.387 + * 1.388 + * @param string the string that shall be trimmed 1.389 + * @return a new sstr_t containing the trimmed string 1.390 + */ 1.391 +sstr_t sstrtrim(sstr_t string); 1.392 + 1.393 +/** 1.394 + * Checks, if a string has a specific prefix. 1.395 + * @param string the string to check 1.396 + * @param prefix the prefix the string should have 1.397 + * @return 1, if and only if the string has the specified prefix, 0 otherwise 1.398 + */ 1.399 +int sstrprefix(sstr_t string, sstr_t prefix); 1.400 + 1.401 +/** 1.402 + * Checks, if a string has a specific suffix. 1.403 + * @param string the string to check 1.404 + * @param suffix the suffix the string should have 1.405 + * @return 1, if and only if the string has the specified suffix, 0 otherwise 1.406 + */ 1.407 +int sstrsuffix(sstr_t string, sstr_t suffix); 1.408 + 1.409 +/** 1.410 + * Returns a lower case version of a string. 1.411 + * 1.412 + * This function creates a duplicate of the input string, first. See the 1.413 + * documentation of sstrdup() for the implications. 1.414 + * 1.415 + * @param string the input string 1.416 + * @return the resulting lower case string 1.417 + * @see sstrdup() 1.418 + */ 1.419 +sstr_t sstrlower(sstr_t string); 1.420 + 1.421 +/** 1.422 + * Returns a lower case version of a string. 1.423 + * 1.424 + * This function creates a duplicate of the input string, first. See the 1.425 + * documentation of sstrdup_a() for the implications. 1.426 + * 1.427 + * @param allocator the allocator used for duplicating the string 1.428 + * @param string the input string 1.429 + * @return the resulting lower case string 1.430 + * @see sstrdup_a() 1.431 + */ 1.432 +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string); 1.433 + 1.434 +/** 1.435 + * Returns a upper case version of a string. 1.436 + * 1.437 + * This function creates a duplicate of the input string, first. See the 1.438 + * documentation of sstrdup() for the implications. 1.439 + * 1.440 + * @param string the input string 1.441 + * @return the resulting upper case string 1.442 + * @see sstrdup() 1.443 + */ 1.444 +sstr_t sstrupper(sstr_t string); 1.445 + 1.446 +/** 1.447 + * Returns a upper case version of a string. 1.448 + * 1.449 + * This function creates a duplicate of the input string, first. See the 1.450 + * documentation of sstrdup_a() for the implications. 1.451 + * 1.452 + * @param allocator the allocator used for duplicating the string 1.453 + * @param string the input string 1.454 + * @return the resulting upper case string 1.455 + * @see sstrdup_a() 1.456 + */ 1.457 +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string); 1.458 + 1.459 +#ifdef __cplusplus 1.460 +} 1.461 +#endif 1.462 + 1.463 +#endif /* UCX_STRING_H */