Mon, 14 May 2018 18:27:23 +0200
finally removes the underscore of ugliness from ucx_str_cmp() and ucx_str_casecmp()
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * Bounded string implementation.
30 *
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
34 * within the structure.
35 *
36 * When using <code>sstr_t</code>, developers must be full aware of what type
37 * of string (<code>NULL</code>-terminated) or not) they are using, when
38 * accessing the <code>char* ptr</code> directly.
39 *
40 * The UCX string module provides some common string functions, known from
41 * standard libc, working with <code>sstr_t</code>.
42 *
43 * @file string.h
44 * @author Mike Becker
45 * @author Olaf Wintermann
46 */
48 #ifndef UCX_STRING_H
49 #define UCX_STRING_H
51 #include "ucx.h"
52 #include "allocator.h"
53 #include <stddef.h>
55 /** Shortcut for a <code>sstr_t struct</code> literal. */
56 #define ST(s) { (char*)s, sizeof(s)-1 }
58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
59 #define S(s) sstrn((char*)s, sizeof(s)-1)
61 /** Expands a sstr_t to printf arguments. */
62 #define SFMT(s) (int) (s).length, (s).ptr
64 /** Format specifier for a sstr_t. */
65 #define PRIsstr ".*s"
67 #ifdef __cplusplus
68 extern "C" {
69 #endif
70 /**
71 * The UCX string structure.
72 */
73 typedef struct {
74 /** A reference to the string (<b>not necessarily <code>NULL</code>
75 * -terminated</b>) */
76 char *ptr;
77 /** The length of the string */
78 size_t length;
79 } sstr_t;
81 typedef struct {
82 const char *ptr;
83 size_t length;
84 } scstr_t;
86 #ifdef __cplusplus
87 }
88 #endif
91 #ifdef __cplusplus
92 inline scstr_t s2scstr(sstr_t s) {
93 scstr_t c;
94 c.ptr = s.ptr;
95 c.length = s.ptr;
96 return c;
97 }
98 inline scstr_t s2scstr(scstr_t c) {
99 return c;
100 }
101 #define SCSTR s2scstr
102 #else
104 scstr_t ucx_sc2sc(scstr_t c);
105 scstr_t ucx_ss2sc(sstr_t str);
106 #if __STDC_VERSION__ >= 201112L
107 #define SCSTR(str) _Generic(str, sstr_t: ucx_ss2sc, scstr_t: ucx_sc2sc)(str)
108 #elif defined(__GNUC__) || defined(__clang__)
109 #define SCSTR(str) __builtin_choose_expr( \
110 __builtin_types_compatible_p(typeof(str), sstr_t), \
111 ucx_ss2sc, \
112 ucx_sc2sc)(str)
113 #elif defined(__sun)
114 #define SCSTR(str) ({typeof(str) ucx_tmp_var_str = str; \
115 scstr_t ucx_tmp_var_c; \
116 ucx_tmp_var_c.ptr = ucx_tmp_var_str.ptr;\
117 ucx_tmp_var_c.length = ucx_tmp_var_str.length;\
118 ucx_tmp_var_c; })
119 #else
120 scstr_t ucx_ss2c_s();
121 #define SCSTR ucx_ss2c_s
122 #endif /* C11 feature test */
124 #endif /* C++ */
126 #ifdef __cplusplus
127 extern "C" {
128 #endif
131 /**
132 * Creates a new sstr_t based on a C string.
133 *
134 * The length is implicitly inferred by using a call to <code>strlen()</code>.
135 *
136 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
137 * do want a copy, use sstrdup() on the return value of this function.
138 *
139 * @param cstring the C string to wrap
140 * @return a new sstr_t containing the C string
141 *
142 * @see sstrn()
143 */
144 sstr_t sstr(char *cstring);
146 /**
147 * Creates a new sstr_t of the specified length based on a C string.
148 *
149 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
150 * do want a copy, use sstrdup() on the return value of this function.
151 *
152 * @param cstring the C string to wrap
153 * @param length the length of the string
154 * @return a new sstr_t containing the C string
155 *
156 * @see sstr()
157 * @see S()
158 */
159 sstr_t sstrn(char *cstring, size_t length);
162 scstr_t scstr(const char *cstring);
163 scstr_t scstrn(const char *cstring, size_t length);
165 /**
166 * Returns the cumulated length of all specified strings.
167 *
168 * At least one string must be specified.
169 *
170 * <b>Attention:</b> if the count argument does not match the count of the
171 * specified strings, the behavior is undefined.
172 *
173 * @param count the total number of specified strings (so at least 1)
174 * @param string the first string
175 * @param ... all other strings
176 * @return the cumulated length of all strings
177 */
178 size_t ucx_strnlen(size_t count, ...);
180 #define sstrnlen(count, ...) ucx_strnlen(count, __VA_ARGS__)
182 /**
183 * Concatenates two or more strings.
184 *
185 * The resulting string will be allocated by standard <code>malloc()</code>.
186 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
187 *
188 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
189 * terminated.
190 *
191 * @param count the total number of strings to concatenate
192 * @param s1 first string
193 * @param ... all remaining strings
194 * @return the concatenated string
195 */
196 sstr_t ucx_strcat(size_t count, scstr_t s1, ...);
198 #define sstrcat(count, s1, ...) ucx_strcat(count, SCSTR(s1), __VA_ARGS__)
200 /**
201 * Concatenates two or more strings using a UcxAllocator.
202 *
203 * See sstrcat() for details.
204 *
205 * @param a the allocator to use
206 * @param count the total number of strings to concatenate
207 * @param s1 first string
208 * @param ... all remaining strings
209 * @return the concatenated string
210 */
211 sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...);
213 #define sstrcat_a(count, s1, ...) ucx_strcat_a(count, SCSTR(s1), __VA_ARGS__)
215 /**
216 * Returns a substring starting at the specified location.
217 *
218 * <b>Attention:</b> the new string references the same memory area as the
219 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
220 * Use sstrdup() to get a copy.
221 *
222 * @param string input string
223 * @param start start location of the substring
224 * @return a substring of <code>string</code> starting at <code>start</code>
225 *
226 * @see sstrsubsl()
227 * @see sstrchr()
228 */
229 sstr_t sstrsubs(sstr_t string, size_t start);
231 /**
232 * Returns a substring with a maximum length starting at the specified location.
233 *
234 * <b>Attention:</b> the new string references the same memory area as the
235 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
236 * Use sstrdup() to get a copy.
237 *
238 * @param string input string
239 * @param start start location of the substring
240 * @param length the maximum length of the substring
241 * @return a substring of <code>string</code> starting at <code>start</code>
242 * with a maximum length of <code>length</code>
243 *
244 * @see sstrsubs()
245 * @see sstrchr()
246 */
247 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
249 scstr_t scstrsubs(scstr_t s, size_t start);
250 scstr_t scstrsubsl(scstr_t string, size_t start, size_t length);
253 int ucx_strchr(const char *string, size_t length, int chr, size_t *pos);
254 int ucx_strrchr(const char *string, size_t length, int chr, size_t *pos);
256 /**
257 * Returns a substring starting at the location of the first occurrence of the
258 * specified character.
259 *
260 * If the string does not contain the character, an empty string is returned.
261 *
262 * @param string the string where to locate the character
263 * @param chr the character to locate
264 * @return a substring starting at the first location of <code>chr</code>
265 *
266 * @see sstrsubs()
267 */
268 sstr_t sstrchr(sstr_t string, int chr);
270 /**
271 * Returns a substring starting at the location of the last occurrence of the
272 * specified character.
273 *
274 * If the string does not contain the character, an empty string is returned.
275 *
276 * @param string the string where to locate the character
277 * @param chr the character to locate
278 * @return a substring starting at the last location of <code>chr</code>
279 *
280 * @see sstrsubs()
281 */
282 sstr_t sstrrchr(sstr_t string, int chr);
285 scstr_t scstrchr(scstr_t string, int chr);
286 scstr_t scstrrchr(scstr_t string, int chr);
288 const char* ucx_strstr(
289 const char *str,
290 size_t length,
291 const char *match,
292 size_t matchlen,
293 size_t *newlen);
295 /**
296 * Returns a substring starting at the location of the first occurrence of the
297 * specified string.
298 *
299 * If the string does not contain the other string, an empty string is returned.
300 *
301 * If <code>match</code> is an empty string, the complete <code>string</code> is
302 * returned.
303 *
304 * @param string the string to be scanned
305 * @param match string containing the sequence of characters to match
306 * @return a substring starting at the first occurrence of
307 * <code>match</code>, or an empty string, if the sequence is not
308 * present in <code>string</code>
309 */
310 sstr_t ucx_sstrstr(sstr_t string, scstr_t match);
311 #define sstrstr(string, match) ucx_sstrstr(string, SCSTR(match))
313 scstr_t ucx_scstrstr(scstr_t string, scstr_t match);
314 #define scstrstr(string, match) ucx_scstrstr(string, SCSTR(match))
316 /**
317 * Splits a string into parts by using a delimiter string.
318 *
319 * This function will return <code>NULL</code>, if one of the following happens:
320 * <ul>
321 * <li>the string length is zero</li>
322 * <li>the delimeter length is zero</li>
323 * <li>the string equals the delimeter</li>
324 * <li>memory allocation fails</li>
325 * </ul>
326 *
327 * The integer referenced by <code>count</code> is used as input and determines
328 * the maximum size of the resulting array, i.e. the maximum count of splits to
329 * perform + 1.
330 *
331 * The integer referenced by <code>count</code> is also used as output and is
332 * set to
333 * <ul>
334 * <li>-2, on memory allocation errors</li>
335 * <li>-1, if either the string or the delimiter is an empty string</li>
336 * <li>0, if the string equals the delimiter</li>
337 * <li>1, if the string does not contain the delimiter</li>
338 * <li>the count of array items, otherwise</li>
339 * </ul>
340 *
341 * If the string starts with the delimiter, the first item of the resulting
342 * array will be an empty string.
343 *
344 * If the string ends with the delimiter and the maximum list size is not
345 * exceeded, the last array item will be an empty string.
346 * In case the list size would be exceeded, the last array item will be the
347 * remaining string after the last split, <i>including</i> the terminating
348 * delimiter.
349 *
350 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
351 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
352 * an allocator to managed memory, to avoid this.
353 *
354 * @param string the string to split
355 * @param delim the delimiter string
356 * @param count IN: the maximum size of the resulting array (0 = no limit),
357 * OUT: the actual size of the array
358 * @return a sstr_t array containing the split strings or
359 * <code>NULL</code> on error
360 *
361 * @see sstrsplit_a()
362 */
363 sstr_t* ucx_strsplit(scstr_t string, scstr_t delim, ssize_t *count);
365 #define sstrsplit(s, delim, count) ucx_strsplit(SCSTR(s), SCSTR(delim), count)
367 /**
368 * Performing sstrsplit() using a UcxAllocator.
369 *
370 * <i>Read the description of sstrsplit() for details.</i>
371 *
372 * The memory for the sstr_t.ptr pointers of the array items and the memory for
373 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
374 * function.
375 *
376 * <b>Note:</b> the allocator is not used for memory that is freed within the
377 * same call of this function (locally scoped variables).
378 *
379 * @param allocator the UcxAllocator used for allocating memory
380 * @param string the string to split
381 * @param delim the delimiter string
382 * @param count IN: the maximum size of the resulting array (0 = no limit),
383 * OUT: the actual size of the array
384 * @return a sstr_t array containing the split strings or
385 * <code>NULL</code> on error
386 *
387 * @see sstrsplit()
388 */
389 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t string, scstr_t delim,
390 ssize_t *count);
392 #define sstrsplit_a(a, s, d, c) ucx_strsplit_a(a, SCSTR(s), SCSTR(d, c))
394 /**
395 * Compares two UCX strings with standard <code>memcmp()</code>.
396 *
397 * At first it compares the sstr_t.length attribute of the two strings. The
398 * <code>memcmp()</code> function is called, if and only if the lengths match.
399 *
400 * @param s1 the first string
401 * @param s2 the second string
402 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
403 * length of s1 is greater than the length of s2 or the result of
404 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
405 */
406 int ucx_strcmp(scstr_t s1, scstr_t s2);
408 #define sstrcmp(s1, s2) ucx_strcmp(SCSTR(s1), SCSTR(s2))
410 /**
411 * Compares two UCX strings ignoring the case.
412 *
413 * At first it compares the sstr_t.length attribute of the two strings. If and
414 * only if the lengths match, both strings are compared char by char ignoring
415 * the case.
416 *
417 * @param s1 the first string
418 * @param s2 the second string
419 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
420 * length of s1 is greater than the length of s2 or the difference between the
421 * first two differing characters otherwise (i.e. 0 if the strings match and
422 * no characters differ)
423 */
424 int ucx_strcasecmp(scstr_t s1, scstr_t s2);
426 #define sstrcasecmp(s1, s2) ucx_strcasecmp(SCSTR(s1), SCSTR(s2))
428 /**
429 * Creates a duplicate of the specified string.
430 *
431 * The new sstr_t will contain a copy allocated by standard
432 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
433 * <code>free()</code>.
434 *
435 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
436 * terminated.
437 *
438 * @param string the string to duplicate
439 * @return a duplicate of the string
440 * @see sstrdup_a()
441 */
442 sstr_t scstrdup(scstr_t string);
444 #define sstrdup(s) scstrdup(SCSTR(s))
446 /**
447 * Creates a duplicate of the specified string using a UcxAllocator.
448 *
449 * The new sstr_t will contain a copy allocated by the allocators
450 * ucx_allocator_malloc function. So it is implementation depended, whether the
451 * returned sstr_t.ptr pointer must be passed to the allocators
452 * ucx_allocator_free function manually.
453 *
454 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
455 * terminated.
456 *
457 * @param allocator a valid instance of a UcxAllocator
458 * @param string the string to duplicate
459 * @return a duplicate of the string
460 * @see sstrdup()
461 */
462 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t string);
464 #define sstrdup_a(allocator, s) scstrdup_a(allocator, SCSTR(s))
467 size_t ucx_strtrim(const char *str, size_t length, size_t *newlen);
469 /**
470 * Omits leading and trailing spaces.
471 *
472 * This function returns a new sstr_t containing a trimmed version of the
473 * specified string.
474 *
475 * <b>Note:</b> the new sstr_t references the same memory, thus you
476 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
477 * <code>free()</code>. It is also highly recommended to avoid assignments like
478 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
479 * source string. Assignments of this type are only permitted, if the
480 * sstr_t.ptr of the source string does not need to be freed or if another
481 * reference to the source string exists.
482 *
483 * @param string the string that shall be trimmed
484 * @return a new sstr_t containing the trimmed string
485 */
486 sstr_t sstrtrim(sstr_t string);
488 scstr_t scstrtrim(scstr_t string);
490 /**
491 * Checks, if a string has a specific prefix.
492 * @param string the string to check
493 * @param prefix the prefix the string should have
494 * @return 1, if and only if the string has the specified prefix, 0 otherwise
495 */
496 int ucx_strprefix(scstr_t string, scstr_t prefix);
498 #define sstrprefix(string, prefix) ucx_strprefix(SCSTR(string), SCSTR(prefix))
500 /**
501 * Checks, if a string has a specific suffix.
502 * @param string the string to check
503 * @param suffix the suffix the string should have
504 * @return 1, if and only if the string has the specified suffix, 0 otherwise
505 */
506 int ucx_strsuffix(scstr_t string, scstr_t suffix);
508 #define sstrsuffix(string, prefix) ucx_strsuffix(SCSTR(string), SCSTR(prefix))
510 /**
511 * Returns a lower case version of a string.
512 *
513 * This function creates a duplicate of the input string, first. See the
514 * documentation of sstrdup() for the implications.
515 *
516 * @param string the input string
517 * @return the resulting lower case string
518 * @see sstrdup()
519 */
520 sstr_t ucx_strlower(scstr_t string);
522 #define sstrlower(string) ucx_strlower(SCSTR(string))
524 /**
525 * Returns a lower case version of a string.
526 *
527 * This function creates a duplicate of the input string, first. See the
528 * documentation of sstrdup_a() for the implications.
529 *
530 * @param allocator the allocator used for duplicating the string
531 * @param string the input string
532 * @return the resulting lower case string
533 * @see sstrdup_a()
534 */
535 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string);
537 #define sstrlower_a(allocator, string) ucx_strlower_a(allocator, SCSTR(string))
539 /**
540 * Returns a upper case version of a string.
541 *
542 * This function creates a duplicate of the input string, first. See the
543 * documentation of sstrdup() for the implications.
544 *
545 * @param string the input string
546 * @return the resulting upper case string
547 * @see sstrdup()
548 */
549 sstr_t ucx_strupper(scstr_t string);
551 #define sstrupper(string) ucx_strupper(SCSTR(string))
553 /**
554 * Returns a upper case version of a string.
555 *
556 * This function creates a duplicate of the input string, first. See the
557 * documentation of sstrdup_a() for the implications.
558 *
559 * @param allocator the allocator used for duplicating the string
560 * @param string the input string
561 * @return the resulting upper case string
562 * @see sstrdup_a()
563 */
564 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string);
566 #define sstrupper_a(allocator, string) ucx_strupper_a(allocator, string)
568 #ifdef __cplusplus
569 }
570 #endif
572 #endif /* UCX_STRING_H */