src/ucx/string.h

Wed, 02 May 2018 21:45:52 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 02 May 2018 21:45:52 +0200
changeset 283
c3b6ff227481
parent 259
2f5dea574a75
child 306
90b6d69bb499
permissions
-rw-r--r--

adds more convenience macros for sstr

This commit also increases the UCX version number towards the next release.
- New product version: 1.1
- New library version: 2.0.1 - we are backwards, but not forward compatible.

olaf@20 1 /*
universe@103 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20 3 *
universe@259 4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
universe@103 5 *
universe@103 6 * Redistribution and use in source and binary forms, with or without
universe@103 7 * modification, are permitted provided that the following conditions are met:
universe@103 8 *
universe@103 9 * 1. Redistributions of source code must retain the above copyright
universe@103 10 * notice, this list of conditions and the following disclaimer.
universe@103 11 *
universe@103 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@103 13 * notice, this list of conditions and the following disclaimer in the
universe@103 14 * documentation and/or other materials provided with the distribution.
universe@103 15 *
universe@103 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103 26 * POSSIBILITY OF SUCH DAMAGE.
olaf@20 27 */
universe@116 28 /**
universe@116 29 * Bounded string implementation.
universe@116 30 *
universe@116 31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
universe@116 32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
universe@116 33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
universe@116 34 * within the structure.
universe@116 35 *
universe@116 36 * When using <code>sstr_t</code>, developers must be full aware of what type
universe@116 37 * of string (<code>NULL</code>-terminated) or not) they are using, when
universe@116 38 * accessing the <code>char* ptr</code> directly.
universe@116 39 *
universe@116 40 * The UCX string module provides some common string functions, known from
universe@116 41 * standard libc, working with <code>sstr_t</code>.
universe@116 42 *
universe@116 43 * @file string.h
universe@116 44 * @author Mike Becker
universe@116 45 * @author Olaf Wintermann
universe@116 46 */
olaf@20 47
universe@116 48 #ifndef UCX_STRING_H
universe@116 49 #define UCX_STRING_H
olaf@20 50
universe@259 51 #include "ucx.h"
universe@259 52 #include "allocator.h"
universe@38 53 #include <stddef.h>
universe@38 54
universe@116 55 /** Shortcut for a <code>sstr_t struct</code> literal. */
universe@116 56 #define ST(s) { (char*)s, sizeof(s)-1 }
universe@146 57
universe@116 58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
universe@116 59 #define S(s) sstrn((char*)s, sizeof(s)-1)
olaf@20 60
universe@283 61 /** Expands a sstr_t to printf arguments. */
universe@283 62 #define SFMT(s) (int) (s).length, (s).ptr
universe@283 63
universe@283 64 /** Format specifier for a sstr_t. */
universe@283 65 #define PRIsstr ".*s"
universe@283 66
olaf@20 67 #ifdef __cplusplus
olaf@20 68 extern "C" {
olaf@20 69 #endif
olaf@20 70
universe@116 71 /**
universe@116 72 * The UCX string structure.
universe@116 73 */
universe@116 74 typedef struct {
universe@116 75 /** A reference to the string (<b>not necessarily <code>NULL</code>
universe@116 76 * -terminated</b>) */
olaf@20 77 char *ptr;
universe@116 78 /** The length of the string */
olaf@20 79 size_t length;
olaf@20 80 } sstr_t;
olaf@20 81
universe@116 82 /**
universe@116 83 * Creates a new sstr_t based on a C string.
universe@116 84 *
universe@116 85 * The length is implicitly inferred by using a call to <code>strlen()</code>.
olaf@20 86 *
universe@116 87 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116 88 * do want a copy, use sstrdup() on the return value of this function.
universe@116 89 *
universe@116 90 * @param cstring the C string to wrap
universe@116 91 * @return a new sstr_t containing the C string
universe@116 92 *
universe@116 93 * @see sstrn()
olaf@20 94 */
universe@116 95 sstr_t sstr(char *cstring);
olaf@20 96
universe@116 97 /**
universe@116 98 * Creates a new sstr_t of the specified length based on a C string.
olaf@20 99 *
universe@116 100 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@116 101 * do want a copy, use sstrdup() on the return value of this function.
universe@116 102 *
universe@116 103 * @param cstring the C string to wrap
universe@116 104 * @param length the length of the string
universe@116 105 * @return a new sstr_t containing the C string
universe@116 106 *
universe@116 107 * @see sstr()
universe@116 108 * @see S()
olaf@20 109 */
universe@116 110 sstr_t sstrn(char *cstring, size_t length);
olaf@20 111
olaf@20 112
universe@116 113 /**
universe@116 114 * Returns the cumulated length of all specified strings.
olaf@20 115 *
universe@116 116 * At least one string must be specified.
universe@116 117 *
universe@116 118 * <b>Attention:</b> if the count argument does not match the count of the
universe@116 119 * specified strings, the behavior is undefined.
universe@116 120 *
universe@116 121 * @param count the total number of specified strings (so at least 1)
universe@116 122 * @param string the first string
universe@116 123 * @param ... all other strings
universe@116 124 * @return the cumulated length of all strings
olaf@20 125 */
universe@116 126 size_t sstrnlen(size_t count, sstr_t string, ...);
olaf@20 127
universe@119 128 /**
olaf@183 129 * Concatenates two or more strings.
olaf@183 130 *
olaf@183 131 * The resulting string will be allocated by standard <code>malloc()</code>.
olaf@183 132 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
olaf@183 133 *
olaf@183 134 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
olaf@183 135 * terminated.
olaf@180 136 *
olaf@180 137 * @param count the total number of strings to concatenate
olaf@183 138 * @param s1 first string
olaf@183 139 * @param s2 second string
olaf@183 140 * @param ... all remaining strings
olaf@180 141 * @return the concatenated string
olaf@180 142 */
olaf@180 143 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
olaf@183 144
olaf@183 145 /**
universe@225 146 * Concatenates two or more strings using a UcxAllocator.
olaf@183 147 *
olaf@183 148 * See sstrcat() for details.
olaf@183 149 *
olaf@183 150 * @param a the allocator to use
olaf@183 151 * @param count the total number of strings to concatenate
olaf@183 152 * @param s1 first string
olaf@183 153 * @param s2 second string
olaf@183 154 * @param ... all remaining strings
olaf@183 155 * @return the concatenated string
olaf@183 156 */
olaf@180 157 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
olaf@180 158
olaf@180 159
olaf@180 160 /**
universe@119 161 * Returns a substring starting at the specified location.
universe@119 162 *
universe@119 163 * <b>Attention:</b> the new string references the same memory area as the
universe@119 164 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119 165 * Use sstrdup() to get a copy.
universe@119 166 *
universe@119 167 * @param string input string
universe@119 168 * @param start start location of the substring
universe@119 169 * @return a substring of <code>string</code> starting at <code>start</code>
universe@119 170 *
universe@119 171 * @see sstrsubsl()
universe@119 172 * @see sstrchr()
universe@119 173 */
universe@119 174 sstr_t sstrsubs(sstr_t string, size_t start);
universe@119 175
universe@119 176 /**
universe@119 177 * Returns a substring with a maximum length starting at the specified location.
universe@119 178 *
universe@119 179 * <b>Attention:</b> the new string references the same memory area as the
universe@119 180 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@119 181 * Use sstrdup() to get a copy.
universe@119 182 *
universe@119 183 * @param string input string
universe@119 184 * @param start start location of the substring
universe@119 185 * @param length the maximum length of the substring
universe@119 186 * @return a substring of <code>string</code> starting at <code>start</code>
universe@119 187 * with a maximum length of <code>length</code>
universe@119 188 *
universe@119 189 * @see sstrsubs()
universe@119 190 * @see sstrchr()
universe@119 191 */
universe@119 192 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
universe@119 193
universe@119 194 /**
universe@119 195 * Returns a substring starting at the location of the first occurrence of the
universe@119 196 * specified character.
universe@119 197 *
universe@119 198 * If the string does not contain the character, an empty string is returned.
universe@119 199 *
universe@119 200 * @param string the string where to locate the character
universe@119 201 * @param chr the character to locate
universe@148 202 * @return a substring starting at the first location of <code>chr</code>
universe@119 203 *
universe@119 204 * @see sstrsubs()
universe@119 205 */
universe@119 206 sstr_t sstrchr(sstr_t string, int chr);
universe@119 207
universe@119 208 /**
universe@148 209 * Returns a substring starting at the location of the last occurrence of the
universe@148 210 * specified character.
universe@148 211 *
universe@148 212 * If the string does not contain the character, an empty string is returned.
universe@148 213 *
universe@148 214 * @param string the string where to locate the character
universe@148 215 * @param chr the character to locate
universe@148 216 * @return a substring starting at the last location of <code>chr</code>
universe@148 217 *
universe@148 218 * @see sstrsubs()
universe@148 219 */
universe@148 220 sstr_t sstrrchr(sstr_t string, int chr);
universe@148 221
universe@148 222 /**
universe@214 223 * Returns a substring starting at the location of the first occurrence of the
universe@214 224 * specified string.
universe@214 225 *
universe@214 226 * If the string does not contain the other string, an empty string is returned.
universe@214 227 *
universe@214 228 * If <code>match</code> is an empty string, the complete <code>string</code> is
universe@214 229 * returned.
universe@214 230 *
universe@214 231 * @param string the string to be scanned
universe@214 232 * @param match string containing the sequence of characters to match
universe@214 233 * @return a substring starting at the first occurrence of
universe@214 234 * <code>match</code>, or an empty string, if the sequence is not
universe@214 235 * present in <code>string</code>
universe@214 236 */
universe@214 237 sstr_t sstrstr(sstr_t string, sstr_t match);
universe@214 238
universe@214 239 /**
universe@119 240 * Splits a string into parts by using a delimiter string.
universe@119 241 *
universe@119 242 * This function will return <code>NULL</code>, if one of the following happens:
universe@119 243 * <ul>
universe@119 244 * <li>the string length is zero</li>
universe@119 245 * <li>the delimeter length is zero</li>
universe@119 246 * <li>the string equals the delimeter</li>
universe@119 247 * <li>memory allocation fails</li>
universe@119 248 * </ul>
universe@119 249 *
universe@119 250 * The integer referenced by <code>count</code> is used as input and determines
universe@160 251 * the maximum size of the resulting array, i.e. the maximum count of splits to
universe@119 252 * perform + 1.
universe@119 253 *
universe@119 254 * The integer referenced by <code>count</code> is also used as output and is
universe@119 255 * set to
universe@119 256 * <ul>
universe@119 257 * <li>-2, on memory allocation errors</li>
universe@119 258 * <li>-1, if either the string or the delimiter is an empty string</li>
universe@119 259 * <li>0, if the string equals the delimiter</li>
universe@119 260 * <li>1, if the string does not contain the delimiter</li>
universe@160 261 * <li>the count of array items, otherwise</li>
universe@119 262 * </ul>
universe@119 263 *
universe@119 264 * If the string starts with the delimiter, the first item of the resulting
universe@160 265 * array will be an empty string.
universe@119 266 *
universe@119 267 * If the string ends with the delimiter and the maximum list size is not
universe@160 268 * exceeded, the last array item will be an empty string.
universe@233 269 * In case the list size would be exceeded, the last array item will be the
universe@233 270 * remaining string after the last split, <i>including</i> the terminating
universe@233 271 * delimiter.
universe@119 272 *
universe@160 273 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
universe@125 274 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
universe@119 275 * an allocator to managed memory, to avoid this.
olaf@20 276 *
universe@119 277 * @param string the string to split
universe@119 278 * @param delim the delimiter string
universe@160 279 * @param count IN: the maximum size of the resulting array (0 = no limit),
universe@160 280 * OUT: the actual size of the array
universe@160 281 * @return a sstr_t array containing the split strings or
universe@119 282 * <code>NULL</code> on error
universe@119 283 *
universe@125 284 * @see sstrsplit_a()
olaf@20 285 */
universe@173 286 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
olaf@20 287
universe@119 288 /**
universe@225 289 * Performing sstrsplit() using a UcxAllocator.
universe@119 290 *
universe@119 291 * <i>Read the description of sstrsplit() for details.</i>
universe@119 292 *
universe@160 293 * The memory for the sstr_t.ptr pointers of the array items and the memory for
universe@119 294 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
universe@119 295 * function.
universe@119 296 *
universe@119 297 * <b>Note:</b> the allocator is not used for memory that is freed within the
universe@119 298 * same call of this function (locally scoped variables).
universe@119 299 *
universe@125 300 * @param allocator the UcxAllocator used for allocating memory
universe@119 301 * @param string the string to split
universe@119 302 * @param delim the delimiter string
universe@160 303 * @param count IN: the maximum size of the resulting array (0 = no limit),
universe@160 304 * OUT: the actual size of the array
universe@160 305 * @return a sstr_t array containing the split strings or
universe@119 306 * <code>NULL</code> on error
universe@119 307 *
universe@119 308 * @see sstrsplit()
olaf@20 309 */
universe@125 310 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
universe@173 311 ssize_t *count);
olaf@20 312
universe@116 313 /**
universe@116 314 * Compares two UCX strings with standard <code>memcmp()</code>.
universe@116 315 *
universe@116 316 * At first it compares the sstr_t.length attribute of the two strings. The
universe@116 317 * <code>memcmp()</code> function is called, if and only if the lengths match.
universe@116 318 *
universe@116 319 * @param s1 the first string
universe@116 320 * @param s2 the second string
universe@116 321 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
universe@116 322 * length of s1 is greater than the length of s2 or the result of
universe@116 323 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
universe@116 324 */
olaf@68 325 int sstrcmp(sstr_t s1, sstr_t s2);
olaf@20 326
universe@116 327 /**
universe@149 328 * Compares two UCX strings ignoring the case.
universe@149 329 *
universe@149 330 * At first it compares the sstr_t.length attribute of the two strings. If and
universe@149 331 * only if the lengths match, both strings are compared char by char ignoring
universe@149 332 * the case.
universe@149 333 *
universe@149 334 * @param s1 the first string
universe@149 335 * @param s2 the second string
universe@149 336 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
universe@149 337 * length of s1 is greater than the length of s2 or the difference between the
universe@149 338 * first two differing characters otherwise (i.e. 0 if the strings match and
universe@149 339 * no characters differ)
universe@149 340 */
universe@149 341 int sstrcasecmp(sstr_t s1, sstr_t s2);
universe@149 342
universe@149 343 /**
universe@116 344 * Creates a duplicate of the specified string.
universe@116 345 *
universe@116 346 * The new sstr_t will contain a copy allocated by standard
universe@116 347 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
universe@116 348 * <code>free()</code>.
universe@116 349 *
universe@118 350 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118 351 * terminated.
universe@118 352 *
universe@116 353 * @param string the string to duplicate
universe@118 354 * @return a duplicate of the string
universe@125 355 * @see sstrdup_a()
universe@116 356 */
universe@116 357 sstr_t sstrdup(sstr_t string);
olaf@20 358
universe@118 359 /**
universe@225 360 * Creates a duplicate of the specified string using a UcxAllocator.
universe@118 361 *
universe@118 362 * The new sstr_t will contain a copy allocated by the allocators
universe@118 363 * ucx_allocator_malloc function. So it is implementation depended, whether the
universe@118 364 * returned sstr_t.ptr pointer must be passed to the allocators
universe@118 365 * ucx_allocator_free function manually.
universe@118 366 *
universe@118 367 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@118 368 * terminated.
universe@118 369 *
universe@225 370 * @param allocator a valid instance of a UcxAllocator
universe@118 371 * @param string the string to duplicate
universe@118 372 * @return a duplicate of the string
universe@119 373 * @see sstrdup()
universe@118 374 */
universe@125 375 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
universe@118 376
universe@118 377 /**
universe@118 378 * Omits leading and trailing spaces.
universe@118 379 *
universe@118 380 * This function returns a new sstr_t containing a trimmed version of the
universe@118 381 * specified string.
universe@118 382 *
universe@118 383 * <b>Note:</b> the new sstr_t references the same memory, thus you
universe@118 384 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
universe@118 385 * <code>free()</code>. It is also highly recommended to avoid assignments like
universe@118 386 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
universe@118 387 * source string. Assignments of this type are only permitted, if the
universe@118 388 * sstr_t.ptr of the source string does not need to be freed or if another
universe@118 389 * reference to the source string exists.
universe@118 390 *
universe@118 391 * @param string the string that shall be trimmed
universe@118 392 * @return a new sstr_t containing the trimmed string
universe@118 393 */
olaf@96 394 sstr_t sstrtrim(sstr_t string);
olaf@96 395
universe@146 396 /**
universe@146 397 * Checks, if a string has a specific prefix.
universe@146 398 * @param string the string to check
universe@146 399 * @param prefix the prefix the string should have
universe@146 400 * @return 1, if and only if the string has the specified prefix, 0 otherwise
universe@146 401 */
universe@146 402 int sstrprefix(sstr_t string, sstr_t prefix);
universe@146 403
universe@146 404 /**
universe@146 405 * Checks, if a string has a specific suffix.
universe@146 406 * @param string the string to check
universe@146 407 * @param suffix the suffix the string should have
universe@146 408 * @return 1, if and only if the string has the specified suffix, 0 otherwise
universe@146 409 */
universe@146 410 int sstrsuffix(sstr_t string, sstr_t suffix);
universe@146 411
universe@210 412 /**
universe@210 413 * Returns a lower case version of a string.
universe@210 414 *
universe@210 415 * This function creates a duplicate of the input string, first. See the
universe@210 416 * documentation of sstrdup() for the implications.
universe@210 417 *
universe@210 418 * @param string the input string
universe@210 419 * @return the resulting lower case string
universe@210 420 * @see sstrdup()
universe@210 421 */
universe@210 422 sstr_t sstrlower(sstr_t string);
universe@210 423
universe@210 424 /**
universe@210 425 * Returns a lower case version of a string.
universe@210 426 *
universe@210 427 * This function creates a duplicate of the input string, first. See the
universe@210 428 * documentation of sstrdup_a() for the implications.
universe@210 429 *
universe@210 430 * @param allocator the allocator used for duplicating the string
universe@210 431 * @param string the input string
universe@210 432 * @return the resulting lower case string
universe@210 433 * @see sstrdup_a()
universe@210 434 */
universe@210 435 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
universe@210 436
universe@210 437 /**
universe@210 438 * Returns a upper case version of a string.
universe@210 439 *
universe@210 440 * This function creates a duplicate of the input string, first. See the
universe@210 441 * documentation of sstrdup() for the implications.
universe@210 442 *
universe@210 443 * @param string the input string
universe@210 444 * @return the resulting upper case string
universe@210 445 * @see sstrdup()
universe@210 446 */
universe@210 447 sstr_t sstrupper(sstr_t string);
universe@210 448
universe@210 449 /**
universe@210 450 * Returns a upper case version of a string.
universe@210 451 *
universe@210 452 * This function creates a duplicate of the input string, first. See the
universe@210 453 * documentation of sstrdup_a() for the implications.
universe@210 454 *
universe@210 455 * @param allocator the allocator used for duplicating the string
universe@210 456 * @param string the input string
universe@210 457 * @return the resulting upper case string
universe@210 458 * @see sstrdup_a()
universe@210 459 */
universe@210 460 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
universe@210 461
olaf@20 462 #ifdef __cplusplus
olaf@20 463 }
olaf@20 464 #endif
olaf@20 465
universe@116 466 #endif /* UCX_STRING_H */

mercurial