src/ucx/string.h

Tue, 23 Aug 2016 13:49:38 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 23 Aug 2016 13:49:38 +0200
changeset 39
ac35daceb24c
permissions
-rw-r--r--

adds UCX + changes how the input file is read (uses an consecutive memory area now)

universe@39 1 /*
universe@39 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@39 3 *
universe@39 4 * Copyright 2015 Olaf Wintermann. All rights reserved.
universe@39 5 *
universe@39 6 * Redistribution and use in source and binary forms, with or without
universe@39 7 * modification, are permitted provided that the following conditions are met:
universe@39 8 *
universe@39 9 * 1. Redistributions of source code must retain the above copyright
universe@39 10 * notice, this list of conditions and the following disclaimer.
universe@39 11 *
universe@39 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@39 13 * notice, this list of conditions and the following disclaimer in the
universe@39 14 * documentation and/or other materials provided with the distribution.
universe@39 15 *
universe@39 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@39 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@39 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@39 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@39 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@39 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@39 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@39 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@39 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@39 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@39 26 * POSSIBILITY OF SUCH DAMAGE.
universe@39 27 */
universe@39 28 /**
universe@39 29 * Bounded string implementation.
universe@39 30 *
universe@39 31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
universe@39 32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
universe@39 33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
universe@39 34 * within the structure.
universe@39 35 *
universe@39 36 * When using <code>sstr_t</code>, developers must be full aware of what type
universe@39 37 * of string (<code>NULL</code>-terminated) or not) they are using, when
universe@39 38 * accessing the <code>char* ptr</code> directly.
universe@39 39 *
universe@39 40 * The UCX string module provides some common string functions, known from
universe@39 41 * standard libc, working with <code>sstr_t</code>.
universe@39 42 *
universe@39 43 * @file string.h
universe@39 44 * @author Mike Becker
universe@39 45 * @author Olaf Wintermann
universe@39 46 */
universe@39 47
universe@39 48 #ifndef UCX_STRING_H
universe@39 49 #define UCX_STRING_H
universe@39 50
universe@39 51 #include "ucx.h"
universe@39 52 #include "allocator.h"
universe@39 53 #include <stddef.h>
universe@39 54
universe@39 55 /** Shortcut for a <code>sstr_t struct</code> literal. */
universe@39 56 #define ST(s) { (char*)s, sizeof(s)-1 }
universe@39 57
universe@39 58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
universe@39 59 #define S(s) sstrn((char*)s, sizeof(s)-1)
universe@39 60
universe@39 61 #ifdef __cplusplus
universe@39 62 extern "C" {
universe@39 63 #endif
universe@39 64
universe@39 65 /**
universe@39 66 * The UCX string structure.
universe@39 67 */
universe@39 68 typedef struct {
universe@39 69 /** A reference to the string (<b>not necessarily <code>NULL</code>
universe@39 70 * -terminated</b>) */
universe@39 71 char *ptr;
universe@39 72 /** The length of the string */
universe@39 73 size_t length;
universe@39 74 } sstr_t;
universe@39 75
universe@39 76 /**
universe@39 77 * Creates a new sstr_t based on a C string.
universe@39 78 *
universe@39 79 * The length is implicitly inferred by using a call to <code>strlen()</code>.
universe@39 80 *
universe@39 81 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@39 82 * do want a copy, use sstrdup() on the return value of this function.
universe@39 83 *
universe@39 84 * @param cstring the C string to wrap
universe@39 85 * @return a new sstr_t containing the C string
universe@39 86 *
universe@39 87 * @see sstrn()
universe@39 88 */
universe@39 89 sstr_t sstr(char *cstring);
universe@39 90
universe@39 91 /**
universe@39 92 * Creates a new sstr_t of the specified length based on a C string.
universe@39 93 *
universe@39 94 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
universe@39 95 * do want a copy, use sstrdup() on the return value of this function.
universe@39 96 *
universe@39 97 * @param cstring the C string to wrap
universe@39 98 * @param length the length of the string
universe@39 99 * @return a new sstr_t containing the C string
universe@39 100 *
universe@39 101 * @see sstr()
universe@39 102 * @see S()
universe@39 103 */
universe@39 104 sstr_t sstrn(char *cstring, size_t length);
universe@39 105
universe@39 106
universe@39 107 /**
universe@39 108 * Returns the cumulated length of all specified strings.
universe@39 109 *
universe@39 110 * At least one string must be specified.
universe@39 111 *
universe@39 112 * <b>Attention:</b> if the count argument does not match the count of the
universe@39 113 * specified strings, the behavior is undefined.
universe@39 114 *
universe@39 115 * @param count the total number of specified strings (so at least 1)
universe@39 116 * @param string the first string
universe@39 117 * @param ... all other strings
universe@39 118 * @return the cumulated length of all strings
universe@39 119 */
universe@39 120 size_t sstrnlen(size_t count, sstr_t string, ...);
universe@39 121
universe@39 122 /**
universe@39 123 * Concatenates two or more strings.
universe@39 124 *
universe@39 125 * The resulting string will be allocated by standard <code>malloc()</code>.
universe@39 126 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
universe@39 127 *
universe@39 128 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@39 129 * terminated.
universe@39 130 *
universe@39 131 * @param count the total number of strings to concatenate
universe@39 132 * @param s1 first string
universe@39 133 * @param s2 second string
universe@39 134 * @param ... all remaining strings
universe@39 135 * @return the concatenated string
universe@39 136 */
universe@39 137 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
universe@39 138
universe@39 139 /**
universe@39 140 * Concatenates two or more strings using an UcxAllocator.
universe@39 141 *
universe@39 142 * See sstrcat() for details.
universe@39 143 *
universe@39 144 * @param a the allocator to use
universe@39 145 * @param count the total number of strings to concatenate
universe@39 146 * @param s1 first string
universe@39 147 * @param s2 second string
universe@39 148 * @param ... all remaining strings
universe@39 149 * @return the concatenated string
universe@39 150 */
universe@39 151 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
universe@39 152
universe@39 153
universe@39 154 /**
universe@39 155 * Returns a substring starting at the specified location.
universe@39 156 *
universe@39 157 * <b>Attention:</b> the new string references the same memory area as the
universe@39 158 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@39 159 * Use sstrdup() to get a copy.
universe@39 160 *
universe@39 161 * @param string input string
universe@39 162 * @param start start location of the substring
universe@39 163 * @return a substring of <code>string</code> starting at <code>start</code>
universe@39 164 *
universe@39 165 * @see sstrsubsl()
universe@39 166 * @see sstrchr()
universe@39 167 */
universe@39 168 sstr_t sstrsubs(sstr_t string, size_t start);
universe@39 169
universe@39 170 /**
universe@39 171 * Returns a substring with a maximum length starting at the specified location.
universe@39 172 *
universe@39 173 * <b>Attention:</b> the new string references the same memory area as the
universe@39 174 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
universe@39 175 * Use sstrdup() to get a copy.
universe@39 176 *
universe@39 177 * @param string input string
universe@39 178 * @param start start location of the substring
universe@39 179 * @param length the maximum length of the substring
universe@39 180 * @return a substring of <code>string</code> starting at <code>start</code>
universe@39 181 * with a maximum length of <code>length</code>
universe@39 182 *
universe@39 183 * @see sstrsubs()
universe@39 184 * @see sstrchr()
universe@39 185 */
universe@39 186 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
universe@39 187
universe@39 188 /**
universe@39 189 * Returns a substring starting at the location of the first occurrence of the
universe@39 190 * specified character.
universe@39 191 *
universe@39 192 * If the string does not contain the character, an empty string is returned.
universe@39 193 *
universe@39 194 * @param string the string where to locate the character
universe@39 195 * @param chr the character to locate
universe@39 196 * @return a substring starting at the first location of <code>chr</code>
universe@39 197 *
universe@39 198 * @see sstrsubs()
universe@39 199 */
universe@39 200 sstr_t sstrchr(sstr_t string, int chr);
universe@39 201
universe@39 202 /**
universe@39 203 * Returns a substring starting at the location of the last occurrence of the
universe@39 204 * specified character.
universe@39 205 *
universe@39 206 * If the string does not contain the character, an empty string is returned.
universe@39 207 *
universe@39 208 * @param string the string where to locate the character
universe@39 209 * @param chr the character to locate
universe@39 210 * @return a substring starting at the last location of <code>chr</code>
universe@39 211 *
universe@39 212 * @see sstrsubs()
universe@39 213 */
universe@39 214 sstr_t sstrrchr(sstr_t string, int chr);
universe@39 215
universe@39 216 /**
universe@39 217 * Returns a substring starting at the location of the first occurrence of the
universe@39 218 * specified string.
universe@39 219 *
universe@39 220 * If the string does not contain the other string, an empty string is returned.
universe@39 221 *
universe@39 222 * If <code>match</code> is an empty string, the complete <code>string</code> is
universe@39 223 * returned.
universe@39 224 *
universe@39 225 * @param string the string to be scanned
universe@39 226 * @param match string containing the sequence of characters to match
universe@39 227 * @return a substring starting at the first occurrence of
universe@39 228 * <code>match</code>, or an empty string, if the sequence is not
universe@39 229 * present in <code>string</code>
universe@39 230 */
universe@39 231 sstr_t sstrstr(sstr_t string, sstr_t match);
universe@39 232
universe@39 233 /**
universe@39 234 * Splits a string into parts by using a delimiter string.
universe@39 235 *
universe@39 236 * This function will return <code>NULL</code>, if one of the following happens:
universe@39 237 * <ul>
universe@39 238 * <li>the string length is zero</li>
universe@39 239 * <li>the delimeter length is zero</li>
universe@39 240 * <li>the string equals the delimeter</li>
universe@39 241 * <li>memory allocation fails</li>
universe@39 242 * </ul>
universe@39 243 *
universe@39 244 * The integer referenced by <code>count</code> is used as input and determines
universe@39 245 * the maximum size of the resulting array, i.e. the maximum count of splits to
universe@39 246 * perform + 1.
universe@39 247 *
universe@39 248 * The integer referenced by <code>count</code> is also used as output and is
universe@39 249 * set to
universe@39 250 * <ul>
universe@39 251 * <li>-2, on memory allocation errors</li>
universe@39 252 * <li>-1, if either the string or the delimiter is an empty string</li>
universe@39 253 * <li>0, if the string equals the delimiter</li>
universe@39 254 * <li>1, if the string does not contain the delimiter</li>
universe@39 255 * <li>the count of array items, otherwise</li>
universe@39 256 * </ul>
universe@39 257 *
universe@39 258 * If the string starts with the delimiter, the first item of the resulting
universe@39 259 * array will be an empty string.
universe@39 260 *
universe@39 261 * If the string ends with the delimiter and the maximum list size is not
universe@39 262 * exceeded, the last array item will be an empty string.
universe@39 263 *
universe@39 264 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
universe@39 265 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
universe@39 266 * an allocator to managed memory, to avoid this.
universe@39 267 *
universe@39 268 * @param string the string to split
universe@39 269 * @param delim the delimiter string
universe@39 270 * @param count IN: the maximum size of the resulting array (0 = no limit),
universe@39 271 * OUT: the actual size of the array
universe@39 272 * @return a sstr_t array containing the split strings or
universe@39 273 * <code>NULL</code> on error
universe@39 274 *
universe@39 275 * @see sstrsplit_a()
universe@39 276 */
universe@39 277 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
universe@39 278
universe@39 279 /**
universe@39 280 * Performing sstrsplit() using an UcxAllocator.
universe@39 281 *
universe@39 282 * <i>Read the description of sstrsplit() for details.</i>
universe@39 283 *
universe@39 284 * The memory for the sstr_t.ptr pointers of the array items and the memory for
universe@39 285 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
universe@39 286 * function.
universe@39 287 *
universe@39 288 * <b>Note:</b> the allocator is not used for memory that is freed within the
universe@39 289 * same call of this function (locally scoped variables).
universe@39 290 *
universe@39 291 * @param allocator the UcxAllocator used for allocating memory
universe@39 292 * @param string the string to split
universe@39 293 * @param delim the delimiter string
universe@39 294 * @param count IN: the maximum size of the resulting array (0 = no limit),
universe@39 295 * OUT: the actual size of the array
universe@39 296 * @return a sstr_t array containing the split strings or
universe@39 297 * <code>NULL</code> on error
universe@39 298 *
universe@39 299 * @see sstrsplit()
universe@39 300 */
universe@39 301 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
universe@39 302 ssize_t *count);
universe@39 303
universe@39 304 /**
universe@39 305 * Compares two UCX strings with standard <code>memcmp()</code>.
universe@39 306 *
universe@39 307 * At first it compares the sstr_t.length attribute of the two strings. The
universe@39 308 * <code>memcmp()</code> function is called, if and only if the lengths match.
universe@39 309 *
universe@39 310 * @param s1 the first string
universe@39 311 * @param s2 the second string
universe@39 312 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
universe@39 313 * length of s1 is greater than the length of s2 or the result of
universe@39 314 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
universe@39 315 */
universe@39 316 int sstrcmp(sstr_t s1, sstr_t s2);
universe@39 317
universe@39 318 /**
universe@39 319 * Compares two UCX strings ignoring the case.
universe@39 320 *
universe@39 321 * At first it compares the sstr_t.length attribute of the two strings. If and
universe@39 322 * only if the lengths match, both strings are compared char by char ignoring
universe@39 323 * the case.
universe@39 324 *
universe@39 325 * @param s1 the first string
universe@39 326 * @param s2 the second string
universe@39 327 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
universe@39 328 * length of s1 is greater than the length of s2 or the difference between the
universe@39 329 * first two differing characters otherwise (i.e. 0 if the strings match and
universe@39 330 * no characters differ)
universe@39 331 */
universe@39 332 int sstrcasecmp(sstr_t s1, sstr_t s2);
universe@39 333
universe@39 334 /**
universe@39 335 * Creates a duplicate of the specified string.
universe@39 336 *
universe@39 337 * The new sstr_t will contain a copy allocated by standard
universe@39 338 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
universe@39 339 * <code>free()</code>.
universe@39 340 *
universe@39 341 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@39 342 * terminated.
universe@39 343 *
universe@39 344 * @param string the string to duplicate
universe@39 345 * @return a duplicate of the string
universe@39 346 * @see sstrdup_a()
universe@39 347 */
universe@39 348 sstr_t sstrdup(sstr_t string);
universe@39 349
universe@39 350 /**
universe@39 351 * Creates a duplicate of the specified string using an UcxAllocator.
universe@39 352 *
universe@39 353 * The new sstr_t will contain a copy allocated by the allocators
universe@39 354 * ucx_allocator_malloc function. So it is implementation depended, whether the
universe@39 355 * returned sstr_t.ptr pointer must be passed to the allocators
universe@39 356 * ucx_allocator_free function manually.
universe@39 357 *
universe@39 358 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
universe@39 359 * terminated.
universe@39 360 *
universe@39 361 * @param allocator a valid instance of an UcxAllocator
universe@39 362 * @param string the string to duplicate
universe@39 363 * @return a duplicate of the string
universe@39 364 * @see sstrdup()
universe@39 365 */
universe@39 366 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t string);
universe@39 367
universe@39 368 /**
universe@39 369 * Omits leading and trailing spaces.
universe@39 370 *
universe@39 371 * This function returns a new sstr_t containing a trimmed version of the
universe@39 372 * specified string.
universe@39 373 *
universe@39 374 * <b>Note:</b> the new sstr_t references the same memory, thus you
universe@39 375 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
universe@39 376 * <code>free()</code>. It is also highly recommended to avoid assignments like
universe@39 377 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
universe@39 378 * source string. Assignments of this type are only permitted, if the
universe@39 379 * sstr_t.ptr of the source string does not need to be freed or if another
universe@39 380 * reference to the source string exists.
universe@39 381 *
universe@39 382 * @param string the string that shall be trimmed
universe@39 383 * @return a new sstr_t containing the trimmed string
universe@39 384 */
universe@39 385 sstr_t sstrtrim(sstr_t string);
universe@39 386
universe@39 387 /**
universe@39 388 * Checks, if a string has a specific prefix.
universe@39 389 * @param string the string to check
universe@39 390 * @param prefix the prefix the string should have
universe@39 391 * @return 1, if and only if the string has the specified prefix, 0 otherwise
universe@39 392 */
universe@39 393 int sstrprefix(sstr_t string, sstr_t prefix);
universe@39 394
universe@39 395 /**
universe@39 396 * Checks, if a string has a specific suffix.
universe@39 397 * @param string the string to check
universe@39 398 * @param suffix the suffix the string should have
universe@39 399 * @return 1, if and only if the string has the specified suffix, 0 otherwise
universe@39 400 */
universe@39 401 int sstrsuffix(sstr_t string, sstr_t suffix);
universe@39 402
universe@39 403 /**
universe@39 404 * Returns a lower case version of a string.
universe@39 405 *
universe@39 406 * This function creates a duplicate of the input string, first. See the
universe@39 407 * documentation of sstrdup() for the implications.
universe@39 408 *
universe@39 409 * @param string the input string
universe@39 410 * @return the resulting lower case string
universe@39 411 * @see sstrdup()
universe@39 412 */
universe@39 413 sstr_t sstrlower(sstr_t string);
universe@39 414
universe@39 415 /**
universe@39 416 * Returns a lower case version of a string.
universe@39 417 *
universe@39 418 * This function creates a duplicate of the input string, first. See the
universe@39 419 * documentation of sstrdup_a() for the implications.
universe@39 420 *
universe@39 421 * @param allocator the allocator used for duplicating the string
universe@39 422 * @param string the input string
universe@39 423 * @return the resulting lower case string
universe@39 424 * @see sstrdup_a()
universe@39 425 */
universe@39 426 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string);
universe@39 427
universe@39 428 /**
universe@39 429 * Returns a upper case version of a string.
universe@39 430 *
universe@39 431 * This function creates a duplicate of the input string, first. See the
universe@39 432 * documentation of sstrdup() for the implications.
universe@39 433 *
universe@39 434 * @param string the input string
universe@39 435 * @return the resulting upper case string
universe@39 436 * @see sstrdup()
universe@39 437 */
universe@39 438 sstr_t sstrupper(sstr_t string);
universe@39 439
universe@39 440 /**
universe@39 441 * Returns a upper case version of a string.
universe@39 442 *
universe@39 443 * This function creates a duplicate of the input string, first. See the
universe@39 444 * documentation of sstrdup_a() for the implications.
universe@39 445 *
universe@39 446 * @param allocator the allocator used for duplicating the string
universe@39 447 * @param string the input string
universe@39 448 * @return the resulting upper case string
universe@39 449 * @see sstrdup_a()
universe@39 450 */
universe@39 451 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string);
universe@39 452
universe@39 453 #ifdef __cplusplus
universe@39 454 }
universe@39 455 #endif
universe@39 456
universe@39 457 #endif /* UCX_STRING_H */

mercurial