Mon, 14 May 2018 19:24:34 +0200
adjusts documentation of UCX string types, converters, and constructors
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * Bounded string implementation.
30 *
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
34 * within the structure.
35 *
36 * When using <code>sstr_t</code>, developers must be full aware of what type
37 * of string (<code>NULL</code>-terminated) or not) they are using, when
38 * accessing the <code>char* ptr</code> directly.
39 *
40 * The UCX string module provides some common string functions, known from
41 * standard libc, working with <code>sstr_t</code>.
42 *
43 * @file string.h
44 * @author Mike Becker
45 * @author Olaf Wintermann
46 */
48 #ifndef UCX_STRING_H
49 #define UCX_STRING_H
51 #include "ucx.h"
52 #include "allocator.h"
53 #include <stddef.h>
55 /** Shortcut for a <code>sstr_t struct</code> literal. */
56 #define ST(s) { (char*)s, sizeof(s)-1 }
58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
59 #define S(s) sstrn((char*)s, sizeof(s)-1)
61 /** Expands a sstr_t or scstr_t to printf arguments. */
62 #define SFMT(s) (int) (s).length, (s).ptr
64 /** Format specifier for a sstr_t or scstr_t. */
65 #define PRIsstr ".*s"
67 #ifdef __cplusplus
68 extern "C" {
69 #endif
70 /**
71 * The UCX string structure.
72 */
73 typedef struct {
74 /** A pointer to the string
75 * (<b>not necessarily <code>NULL</code>-terminated</b>) */
76 char *ptr;
77 /** The length of the string */
78 size_t length;
79 } sstr_t;
81 /**
82 * The UCX string structure for immutable (constant) strings.
83 */
84 typedef struct {
85 /** A constant pointer to the immutable string
86 * (<b>not necessarily <code>NULL</code>-terminated</b>) */
87 const char *ptr;
88 /** The length of the string */
89 size_t length;
90 } scstr_t;
92 #ifdef __cplusplus
93 }
94 #endif
97 #ifdef __cplusplus
98 inline scstr_t s2scstr(sstr_t s) {
99 scstr_t c;
100 c.ptr = s.ptr;
101 c.length = s.ptr;
102 return c;
103 }
104 inline scstr_t s2scstr(scstr_t c) {
105 return c;
106 }
107 #define SCSTR s2scstr
108 #else
110 /**
111 * One of two type adjustment functions that return a scstr_t.
112 *
113 * Used internally to cast a UCX string to an immutable UCX string.
114 * This variant is used, when the string is already immutable and no operation
115 * needs to be performed.
116 *
117 * @param str some scstr_t
118 * @return the argument itself
119 */
120 scstr_t ucx_sc2sc(scstr_t str);
122 /**
123 * One of two type adjustment functions that return a scstr_t.
124 *
125 * Used internally to cast a UCX string to an immutable UCX string.
126 *
127 * @param str some sstr_t
128 * @return an immutable (scstr_t) version of the provided string.
129 */
130 scstr_t ucx_ss2sc(sstr_t str);
132 #if __STDC_VERSION__ >= 201112L
133 /**
134 * Casts a UCX string to an immutable UCX string (scstr_t).
135 * @param str some UCX string
136 * @return the an immutable version of the provided string
137 */
138 #define SCSTR(str) _Generic(str, sstr_t: ucx_ss2sc, scstr_t: ucx_sc2sc)(str)
140 #elif defined(__GNUC__) || defined(__clang__)
142 /**
143 * Casts a UCX string to an immutable UCX string (scstr_t).
144 * @param str some UCX string
145 * @return the an immutable version of the provided string
146 */
147 #define SCSTR(str) __builtin_choose_expr( \
148 __builtin_types_compatible_p(typeof(str), sstr_t), \
149 ucx_ss2sc, \
150 ucx_sc2sc)(str)
152 #elif defined(__sun)
154 /**
155 * Casts a UCX string to an immutable UCX string (scstr_t).
156 * @param str some UCX string
157 * @return the an immutable version of the provided string
158 */
159 #define SCSTR(str) ({typeof(str) ucx_tmp_var_str = str; \
160 scstr_t ucx_tmp_var_c; \
161 ucx_tmp_var_c.ptr = ucx_tmp_var_str.ptr;\
162 ucx_tmp_var_c.length = ucx_tmp_var_str.length;\
163 ucx_tmp_var_c; })
164 #else /* no generics and no builtins */
166 /**
167 * Casts a UCX string to an immutable UCX string (scstr_t).
168 *
169 * This internal function (ab)uses the C standard an expects one single
170 * argument which is then implicitly casted to scstr_t without a warning.
171 *
172 * @return the an immutable version of the provided string
173 */
174 scstr_t ucx_ss2c_s();
176 /**
177 * Casts a UCX string to an immutable UCX string (scstr_t).
178 * @param str some UCX string
179 * @return the an immutable version of the provided string
180 */
181 #define SCSTR(str) ucx_ss2c_s(str)
182 #endif /* C11 feature test */
184 #endif /* C++ */
186 #ifdef __cplusplus
187 extern "C" {
188 #endif
191 /**
192 * Creates a new sstr_t based on a C string.
193 *
194 * The length is implicitly inferred by using a call to <code>strlen()</code>.
195 *
196 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
197 * do want a copy, use sstrdup() on the return value of this function.
198 *
199 * If you need to wrap a constant string, use scstr().
200 *
201 * @param cstring the C string to wrap
202 * @return a new sstr_t containing the C string
203 *
204 * @see sstrn()
205 */
206 sstr_t sstr(char *cstring);
208 /**
209 * Creates a new sstr_t of the specified length based on a C string.
210 *
211 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
212 * do want a copy, use sstrdup() on the return value of this function.
213 *
214 * If you need to wrap a constant string, use scstrn().
215 *
216 * @param cstring the C string to wrap
217 * @param length the length of the string
218 * @return a new sstr_t containing the C string
219 *
220 * @see sstr()
221 * @see S()
222 */
223 sstr_t sstrn(char *cstring, size_t length);
225 /**
226 * Creates a new scstr_t based on a constant C string.
227 *
228 * The length is implicitly inferred by using a call to <code>strlen()</code>.
229 *
230 * <b>Note:</b> the scstr_t will hold a <i>reference</i> to the C string. If you
231 * do want a copy, use scstrdup() on the return value of this function.
232 *
233 * @param cstring the C string to wrap
234 * @return a new scstr_t containing the C string
235 *
236 * @see scstrn()
237 */
238 scstr_t scstr(const char *cstring);
241 /**
242 * Creates a new scstr_t of the specified length based on a constant C string.
243 *
244 * <b>Note:</b> the scstr_t will hold a <i>reference</i> to the C string. If you
245 * do want a copy, use scstrdup() on the return value of this function.
246 *
247 *
248 * @param cstring the C string to wrap
249 * @param length the length of the string
250 * @return a new scstr_t containing the C string
251 *
252 * @see scstr()
253 */
254 scstr_t scstrn(const char *cstring, size_t length);
256 /**
257 * Returns the cumulated length of all specified strings.
258 *
259 * At least one string must be specified.
260 *
261 * <b>Attention:</b> if the count argument does not match the count of the
262 * specified strings, the behavior is undefined.
263 *
264 * @param count the total number of specified strings (so at least 1)
265 * @param string the first string
266 * @param ... all other strings
267 * @return the cumulated length of all strings
268 */
269 size_t ucx_strnlen(size_t count, ...);
271 #define sstrnlen(count, ...) ucx_strnlen(count, __VA_ARGS__)
273 /**
274 * Concatenates two or more strings.
275 *
276 * The resulting string will be allocated by standard <code>malloc()</code>.
277 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
278 *
279 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
280 * terminated.
281 *
282 * @param count the total number of strings to concatenate
283 * @param s1 first string
284 * @param ... all remaining strings
285 * @return the concatenated string
286 */
287 sstr_t ucx_strcat(size_t count, scstr_t s1, ...);
289 #define sstrcat(count, s1, ...) ucx_strcat(count, SCSTR(s1), __VA_ARGS__)
291 /**
292 * Concatenates two or more strings using a UcxAllocator.
293 *
294 * See sstrcat() for details.
295 *
296 * @param a the allocator to use
297 * @param count the total number of strings to concatenate
298 * @param s1 first string
299 * @param ... all remaining strings
300 * @return the concatenated string
301 */
302 sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...);
304 #define sstrcat_a(count, s1, ...) ucx_strcat_a(count, SCSTR(s1), __VA_ARGS__)
306 /**
307 * Returns a substring starting at the specified location.
308 *
309 * <b>Attention:</b> the new string references the same memory area as the
310 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
311 * Use sstrdup() to get a copy.
312 *
313 * @param string input string
314 * @param start start location of the substring
315 * @return a substring of <code>string</code> starting at <code>start</code>
316 *
317 * @see sstrsubsl()
318 * @see sstrchr()
319 */
320 sstr_t sstrsubs(sstr_t string, size_t start);
322 /**
323 * Returns a substring with a maximum length starting at the specified location.
324 *
325 * <b>Attention:</b> the new string references the same memory area as the
326 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
327 * Use sstrdup() to get a copy.
328 *
329 * @param string input string
330 * @param start start location of the substring
331 * @param length the maximum length of the substring
332 * @return a substring of <code>string</code> starting at <code>start</code>
333 * with a maximum length of <code>length</code>
334 *
335 * @see sstrsubs()
336 * @see sstrchr()
337 */
338 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
340 scstr_t scstrsubs(scstr_t s, size_t start);
341 scstr_t scstrsubsl(scstr_t string, size_t start, size_t length);
344 int ucx_strchr(const char *string, size_t length, int chr, size_t *pos);
345 int ucx_strrchr(const char *string, size_t length, int chr, size_t *pos);
347 /**
348 * Returns a substring starting at the location of the first occurrence of the
349 * specified character.
350 *
351 * If the string does not contain the character, an empty string is returned.
352 *
353 * @param string the string where to locate the character
354 * @param chr the character to locate
355 * @return a substring starting at the first location of <code>chr</code>
356 *
357 * @see sstrsubs()
358 */
359 sstr_t sstrchr(sstr_t string, int chr);
361 /**
362 * Returns a substring starting at the location of the last occurrence of the
363 * specified character.
364 *
365 * If the string does not contain the character, an empty string is returned.
366 *
367 * @param string the string where to locate the character
368 * @param chr the character to locate
369 * @return a substring starting at the last location of <code>chr</code>
370 *
371 * @see sstrsubs()
372 */
373 sstr_t sstrrchr(sstr_t string, int chr);
376 scstr_t scstrchr(scstr_t string, int chr);
377 scstr_t scstrrchr(scstr_t string, int chr);
379 const char* ucx_strstr(
380 const char *str,
381 size_t length,
382 const char *match,
383 size_t matchlen,
384 size_t *newlen);
386 /**
387 * Returns a substring starting at the location of the first occurrence of the
388 * specified string.
389 *
390 * If the string does not contain the other string, an empty string is returned.
391 *
392 * If <code>match</code> is an empty string, the complete <code>string</code> is
393 * returned.
394 *
395 * @param string the string to be scanned
396 * @param match string containing the sequence of characters to match
397 * @return a substring starting at the first occurrence of
398 * <code>match</code>, or an empty string, if the sequence is not
399 * present in <code>string</code>
400 */
401 sstr_t ucx_sstrstr(sstr_t string, scstr_t match);
402 #define sstrstr(string, match) ucx_sstrstr(string, SCSTR(match))
404 scstr_t ucx_scstrstr(scstr_t string, scstr_t match);
405 #define scstrstr(string, match) ucx_scstrstr(string, SCSTR(match))
407 /**
408 * Splits a string into parts by using a delimiter string.
409 *
410 * This function will return <code>NULL</code>, if one of the following happens:
411 * <ul>
412 * <li>the string length is zero</li>
413 * <li>the delimeter length is zero</li>
414 * <li>the string equals the delimeter</li>
415 * <li>memory allocation fails</li>
416 * </ul>
417 *
418 * The integer referenced by <code>count</code> is used as input and determines
419 * the maximum size of the resulting array, i.e. the maximum count of splits to
420 * perform + 1.
421 *
422 * The integer referenced by <code>count</code> is also used as output and is
423 * set to
424 * <ul>
425 * <li>-2, on memory allocation errors</li>
426 * <li>-1, if either the string or the delimiter is an empty string</li>
427 * <li>0, if the string equals the delimiter</li>
428 * <li>1, if the string does not contain the delimiter</li>
429 * <li>the count of array items, otherwise</li>
430 * </ul>
431 *
432 * If the string starts with the delimiter, the first item of the resulting
433 * array will be an empty string.
434 *
435 * If the string ends with the delimiter and the maximum list size is not
436 * exceeded, the last array item will be an empty string.
437 * In case the list size would be exceeded, the last array item will be the
438 * remaining string after the last split, <i>including</i> the terminating
439 * delimiter.
440 *
441 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
442 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
443 * an allocator to managed memory, to avoid this.
444 *
445 * @param string the string to split
446 * @param delim the delimiter string
447 * @param count IN: the maximum size of the resulting array (0 = no limit),
448 * OUT: the actual size of the array
449 * @return a sstr_t array containing the split strings or
450 * <code>NULL</code> on error
451 *
452 * @see sstrsplit_a()
453 */
454 sstr_t* ucx_strsplit(scstr_t string, scstr_t delim, ssize_t *count);
456 #define sstrsplit(s, delim, count) ucx_strsplit(SCSTR(s), SCSTR(delim), count)
458 /**
459 * Performing sstrsplit() using a UcxAllocator.
460 *
461 * <i>Read the description of sstrsplit() for details.</i>
462 *
463 * The memory for the sstr_t.ptr pointers of the array items and the memory for
464 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
465 * function.
466 *
467 * <b>Note:</b> the allocator is not used for memory that is freed within the
468 * same call of this function (locally scoped variables).
469 *
470 * @param allocator the UcxAllocator used for allocating memory
471 * @param string the string to split
472 * @param delim the delimiter string
473 * @param count IN: the maximum size of the resulting array (0 = no limit),
474 * OUT: the actual size of the array
475 * @return a sstr_t array containing the split strings or
476 * <code>NULL</code> on error
477 *
478 * @see sstrsplit()
479 */
480 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t string, scstr_t delim,
481 ssize_t *count);
483 #define sstrsplit_a(a, s, d, c) ucx_strsplit_a(a, SCSTR(s), SCSTR(d, c))
485 /**
486 * Compares two UCX strings with standard <code>memcmp()</code>.
487 *
488 * At first it compares the sstr_t.length attribute of the two strings. The
489 * <code>memcmp()</code> function is called, if and only if the lengths match.
490 *
491 * @param s1 the first string
492 * @param s2 the second string
493 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
494 * length of s1 is greater than the length of s2 or the result of
495 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
496 */
497 int ucx_strcmp(scstr_t s1, scstr_t s2);
499 #define sstrcmp(s1, s2) ucx_strcmp(SCSTR(s1), SCSTR(s2))
501 /**
502 * Compares two UCX strings ignoring the case.
503 *
504 * At first it compares the sstr_t.length attribute of the two strings. If and
505 * only if the lengths match, both strings are compared char by char ignoring
506 * the case.
507 *
508 * @param s1 the first string
509 * @param s2 the second string
510 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
511 * length of s1 is greater than the length of s2 or the difference between the
512 * first two differing characters otherwise (i.e. 0 if the strings match and
513 * no characters differ)
514 */
515 int ucx_strcasecmp(scstr_t s1, scstr_t s2);
517 #define sstrcasecmp(s1, s2) ucx_strcasecmp(SCSTR(s1), SCSTR(s2))
519 /**
520 * Creates a duplicate of the specified string.
521 *
522 * The new sstr_t will contain a copy allocated by standard
523 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
524 * <code>free()</code>.
525 *
526 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
527 * terminated and mutable.
528 *
529 * @param string the string to duplicate
530 * @return a duplicate of the string
531 * @see sstrdup_a()
532 */
533 sstr_t scstrdup(scstr_t string);
535 #define sstrdup(s) scstrdup(SCSTR(s))
537 /**
538 * Creates a duplicate of the specified string using a UcxAllocator.
539 *
540 * The new sstr_t will contain a copy allocated by the allocators
541 * ucx_allocator_malloc function. So it is implementation depended, whether the
542 * returned sstr_t.ptr pointer must be passed to the allocators
543 * ucx_allocator_free function manually.
544 *
545 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
546 * terminated.
547 *
548 * @param allocator a valid instance of a UcxAllocator
549 * @param string the string to duplicate
550 * @return a duplicate of the string
551 * @see sstrdup()
552 */
553 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t string);
555 #define sstrdup_a(allocator, s) scstrdup_a(allocator, SCSTR(s))
558 size_t ucx_strtrim(const char *str, size_t length, size_t *newlen);
560 /**
561 * Omits leading and trailing spaces.
562 *
563 * This function returns a new sstr_t containing a trimmed version of the
564 * specified string.
565 *
566 * <b>Note:</b> the new sstr_t references the same memory, thus you
567 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
568 * <code>free()</code>. It is also highly recommended to avoid assignments like
569 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
570 * source string. Assignments of this type are only permitted, if the
571 * sstr_t.ptr of the source string does not need to be freed or if another
572 * reference to the source string exists.
573 *
574 * @param string the string that shall be trimmed
575 * @return a new sstr_t containing the trimmed string
576 */
577 sstr_t sstrtrim(sstr_t string);
579 scstr_t scstrtrim(scstr_t string);
581 /**
582 * Checks, if a string has a specific prefix.
583 * @param string the string to check
584 * @param prefix the prefix the string should have
585 * @return 1, if and only if the string has the specified prefix, 0 otherwise
586 */
587 int ucx_strprefix(scstr_t string, scstr_t prefix);
589 #define sstrprefix(string, prefix) ucx_strprefix(SCSTR(string), SCSTR(prefix))
591 /**
592 * Checks, if a string has a specific suffix.
593 * @param string the string to check
594 * @param suffix the suffix the string should have
595 * @return 1, if and only if the string has the specified suffix, 0 otherwise
596 */
597 int ucx_strsuffix(scstr_t string, scstr_t suffix);
599 #define sstrsuffix(string, prefix) ucx_strsuffix(SCSTR(string), SCSTR(prefix))
601 /**
602 * Returns a lower case version of a string.
603 *
604 * This function creates a duplicate of the input string, first. See the
605 * documentation of sstrdup() for the implications.
606 *
607 * @param string the input string
608 * @return the resulting lower case string
609 * @see sstrdup()
610 */
611 sstr_t ucx_strlower(scstr_t string);
613 #define sstrlower(string) ucx_strlower(SCSTR(string))
615 /**
616 * Returns a lower case version of a string.
617 *
618 * This function creates a duplicate of the input string, first. See the
619 * documentation of sstrdup_a() for the implications.
620 *
621 * @param allocator the allocator used for duplicating the string
622 * @param string the input string
623 * @return the resulting lower case string
624 * @see sstrdup_a()
625 */
626 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string);
628 #define sstrlower_a(allocator, string) ucx_strlower_a(allocator, SCSTR(string))
630 /**
631 * Returns a upper case version of a string.
632 *
633 * This function creates a duplicate of the input string, first. See the
634 * documentation of sstrdup() for the implications.
635 *
636 * @param string the input string
637 * @return the resulting upper case string
638 * @see sstrdup()
639 */
640 sstr_t ucx_strupper(scstr_t string);
642 #define sstrupper(string) ucx_strupper(SCSTR(string))
644 /**
645 * Returns a upper case version of a string.
646 *
647 * This function creates a duplicate of the input string, first. See the
648 * documentation of sstrdup_a() for the implications.
649 *
650 * @param allocator the allocator used for duplicating the string
651 * @param string the input string
652 * @return the resulting upper case string
653 * @see sstrdup_a()
654 */
655 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string);
657 #define sstrupper_a(allocator, string) ucx_strupper_a(allocator, string)
659 #ifdef __cplusplus
660 }
661 #endif
663 #endif /* UCX_STRING_H */