Sun, 11 Mar 2018 13:43:07 +0100
adds scstr_t struct for const strings and adapts some string functions
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * Bounded string implementation.
30 *
31 * The UCX strings (<code>sstr_t</code>) provide an alternative to C strings.
32 * The main difference to C strings is, that <code>sstr_t</code> does <b>not
33 * need to be <code>NULL</code>-terminated</b>. Instead the length is stored
34 * within the structure.
35 *
36 * When using <code>sstr_t</code>, developers must be full aware of what type
37 * of string (<code>NULL</code>-terminated) or not) they are using, when
38 * accessing the <code>char* ptr</code> directly.
39 *
40 * The UCX string module provides some common string functions, known from
41 * standard libc, working with <code>sstr_t</code>.
42 *
43 * @file string.h
44 * @author Mike Becker
45 * @author Olaf Wintermann
46 */
48 #ifndef UCX_STRING_H
49 #define UCX_STRING_H
51 #include "ucx.h"
52 #include "allocator.h"
53 #include <stddef.h>
55 /** Shortcut for a <code>sstr_t struct</code> literal. */
56 #define ST(s) { (char*)s, sizeof(s)-1 }
58 /** Shortcut for the conversion of a C string to a <code>sstr_t</code>. */
59 #define S(s) sstrn((char*)s, sizeof(s)-1)
61 #ifdef __cplusplus
62 extern "C" {
63 #endif
64 /**
65 * The UCX string structure.
66 */
67 typedef struct {
68 /** A reference to the string (<b>not necessarily <code>NULL</code>
69 * -terminated</b>) */
70 char *ptr;
71 /** The length of the string */
72 size_t length;
73 } sstr_t;
75 typedef struct {
76 const char *ptr;
77 size_t length;
78 } scstr_t;
79 #ifdef __cplusplus
80 }
81 #endif
84 #ifdef __cplusplus
85 inline scstr_t s2scstr(sstr_t s) {
86 scstr_t c;
87 c.ptr = s.ptr;
88 c.length = s.ptr;
89 return c;
90 }
91 inline scstr_t s2scstr(scstr_t c) {
92 return c;
93 }
94 #define SCSTR s2scstr
95 #else
97 scstr_t ucx_sc2sc(scstr_t c);
98 scstr_t ucx_ss2sc(sstr_t str);
99 #if __STDC_VERSION__ >= 201112L
100 #define SCSTR(str) _Generic(str, sstr_t: ucx_ss2sc, scstr_t: ucx_sc2sc)(str)
101 #elif defined(__GNUC__) || defined(__clang__)
102 #define SCSTR(str) __builtin_choose_expr( \
103 __builtin_types_compatible_p(typeof(str), sstr_t), \
104 ucx_ss2sc, \
105 ucx_sc2sc)(str)
106 #elif defined(__sun)
107 #define SCSTR(str) ({typeof(str) ucx_tmp_var_str = str; \
108 scstr_t ucx_tmp_var_c; \
109 ucx_tmp_var_c.ptr = ucx_tmp_var_str.ptr;\
110 ucx_tmp_var_c.length = ucx_tmp_var_str.length;\
111 ucx_tmp_var_c; })
112 #else
113 scstr_t ucx_ss2c_s();
114 #define SCSTR ucx_ss2c_s
115 #endif /* C11 feature test */
117 #endif /* C++ */
119 #ifdef __cplusplus
120 extern "C" {
121 #endif
124 /**
125 * Creates a new sstr_t based on a C string.
126 *
127 * The length is implicitly inferred by using a call to <code>strlen()</code>.
128 *
129 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
130 * do want a copy, use sstrdup() on the return value of this function.
131 *
132 * @param cstring the C string to wrap
133 * @return a new sstr_t containing the C string
134 *
135 * @see sstrn()
136 */
137 sstr_t sstr(char *cstring);
139 /**
140 * Creates a new sstr_t of the specified length based on a C string.
141 *
142 * <b>Note:</b> the sstr_t will hold a <i>reference</i> to the C string. If you
143 * do want a copy, use sstrdup() on the return value of this function.
144 *
145 * @param cstring the C string to wrap
146 * @param length the length of the string
147 * @return a new sstr_t containing the C string
148 *
149 * @see sstr()
150 * @see S()
151 */
152 sstr_t sstrn(char *cstring, size_t length);
155 scstr_t scstr(const char *cstring);
156 scstr_t scstrn(const char *cstring, size_t length);
158 /**
159 * Returns the cumulated length of all specified strings.
160 *
161 * At least one string must be specified.
162 *
163 * <b>Attention:</b> if the count argument does not match the count of the
164 * specified strings, the behavior is undefined.
165 *
166 * @param count the total number of specified strings (so at least 1)
167 * @param string the first string
168 * @param ... all other strings
169 * @return the cumulated length of all strings
170 */
171 size_t sstrnlen(size_t count, sstr_t string, ...);
173 /**
174 * Concatenates two or more strings.
175 *
176 * The resulting string will be allocated by standard <code>malloc()</code>.
177 * So developers <b>MUST</b> pass the sstr_t.ptr to <code>free()</code>.
178 *
179 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
180 * terminated.
181 *
182 * @param count the total number of strings to concatenate
183 * @param s1 first string
184 * @param s2 second string
185 * @param ... all remaining strings
186 * @return the concatenated string
187 */
188 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...);
190 /**
191 * Concatenates two or more strings using a UcxAllocator.
192 *
193 * See sstrcat() for details.
194 *
195 * @param a the allocator to use
196 * @param count the total number of strings to concatenate
197 * @param s1 first string
198 * @param s2 second string
199 * @param ... all remaining strings
200 * @return the concatenated string
201 */
202 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...);
205 /**
206 * Returns a substring starting at the specified location.
207 *
208 * <b>Attention:</b> the new string references the same memory area as the
209 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
210 * Use sstrdup() to get a copy.
211 *
212 * @param string input string
213 * @param start start location of the substring
214 * @return a substring of <code>string</code> starting at <code>start</code>
215 *
216 * @see sstrsubsl()
217 * @see sstrchr()
218 */
219 sstr_t sstrsubs(sstr_t string, size_t start);
221 /**
222 * Returns a substring with a maximum length starting at the specified location.
223 *
224 * <b>Attention:</b> the new string references the same memory area as the
225 * input string and will <b>NOT</b> be <code>NULL</code>-terminated.
226 * Use sstrdup() to get a copy.
227 *
228 * @param string input string
229 * @param start start location of the substring
230 * @param length the maximum length of the substring
231 * @return a substring of <code>string</code> starting at <code>start</code>
232 * with a maximum length of <code>length</code>
233 *
234 * @see sstrsubs()
235 * @see sstrchr()
236 */
237 sstr_t sstrsubsl(sstr_t string, size_t start, size_t length);
239 /**
240 * Returns a substring starting at the location of the first occurrence of the
241 * specified character.
242 *
243 * If the string does not contain the character, an empty string is returned.
244 *
245 * @param string the string where to locate the character
246 * @param chr the character to locate
247 * @return a substring starting at the first location of <code>chr</code>
248 *
249 * @see sstrsubs()
250 */
251 sstr_t sstrchr(sstr_t string, int chr);
253 /**
254 * Returns a substring starting at the location of the last occurrence of the
255 * specified character.
256 *
257 * If the string does not contain the character, an empty string is returned.
258 *
259 * @param string the string where to locate the character
260 * @param chr the character to locate
261 * @return a substring starting at the last location of <code>chr</code>
262 *
263 * @see sstrsubs()
264 */
265 sstr_t sstrrchr(sstr_t string, int chr);
267 /**
268 * Returns a substring starting at the location of the first occurrence of the
269 * specified string.
270 *
271 * If the string does not contain the other string, an empty string is returned.
272 *
273 * If <code>match</code> is an empty string, the complete <code>string</code> is
274 * returned.
275 *
276 * @param string the string to be scanned
277 * @param match string containing the sequence of characters to match
278 * @return a substring starting at the first occurrence of
279 * <code>match</code>, or an empty string, if the sequence is not
280 * present in <code>string</code>
281 */
282 sstr_t sstrstr(sstr_t string, sstr_t match);
284 /**
285 * Splits a string into parts by using a delimiter string.
286 *
287 * This function will return <code>NULL</code>, if one of the following happens:
288 * <ul>
289 * <li>the string length is zero</li>
290 * <li>the delimeter length is zero</li>
291 * <li>the string equals the delimeter</li>
292 * <li>memory allocation fails</li>
293 * </ul>
294 *
295 * The integer referenced by <code>count</code> is used as input and determines
296 * the maximum size of the resulting array, i.e. the maximum count of splits to
297 * perform + 1.
298 *
299 * The integer referenced by <code>count</code> is also used as output and is
300 * set to
301 * <ul>
302 * <li>-2, on memory allocation errors</li>
303 * <li>-1, if either the string or the delimiter is an empty string</li>
304 * <li>0, if the string equals the delimiter</li>
305 * <li>1, if the string does not contain the delimiter</li>
306 * <li>the count of array items, otherwise</li>
307 * </ul>
308 *
309 * If the string starts with the delimiter, the first item of the resulting
310 * array will be an empty string.
311 *
312 * If the string ends with the delimiter and the maximum list size is not
313 * exceeded, the last array item will be an empty string.
314 * In case the list size would be exceeded, the last array item will be the
315 * remaining string after the last split, <i>including</i> the terminating
316 * delimiter.
317 *
318 * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
319 * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with
320 * an allocator to managed memory, to avoid this.
321 *
322 * @param string the string to split
323 * @param delim the delimiter string
324 * @param count IN: the maximum size of the resulting array (0 = no limit),
325 * OUT: the actual size of the array
326 * @return a sstr_t array containing the split strings or
327 * <code>NULL</code> on error
328 *
329 * @see sstrsplit_a()
330 */
331 sstr_t* sstrsplit(sstr_t string, sstr_t delim, ssize_t *count);
333 /**
334 * Performing sstrsplit() using a UcxAllocator.
335 *
336 * <i>Read the description of sstrsplit() for details.</i>
337 *
338 * The memory for the sstr_t.ptr pointers of the array items and the memory for
339 * the sstr_t array itself are allocated by using the UcxAllocator.malloc()
340 * function.
341 *
342 * <b>Note:</b> the allocator is not used for memory that is freed within the
343 * same call of this function (locally scoped variables).
344 *
345 * @param allocator the UcxAllocator used for allocating memory
346 * @param string the string to split
347 * @param delim the delimiter string
348 * @param count IN: the maximum size of the resulting array (0 = no limit),
349 * OUT: the actual size of the array
350 * @return a sstr_t array containing the split strings or
351 * <code>NULL</code> on error
352 *
353 * @see sstrsplit()
354 */
355 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t string, sstr_t delim,
356 ssize_t *count);
358 /**
359 * Compares two UCX strings with standard <code>memcmp()</code>.
360 *
361 * At first it compares the sstr_t.length attribute of the two strings. The
362 * <code>memcmp()</code> function is called, if and only if the lengths match.
363 *
364 * @param s1 the first string
365 * @param s2 the second string
366 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
367 * length of s1 is greater than the length of s2 or the result of
368 * <code>memcmp()</code> otherwise (i.e. 0 if the strings match)
369 */
370 int sstrcmp(sstr_t s1, sstr_t s2);
372 /**
373 * Compares two UCX strings ignoring the case.
374 *
375 * At first it compares the sstr_t.length attribute of the two strings. If and
376 * only if the lengths match, both strings are compared char by char ignoring
377 * the case.
378 *
379 * @param s1 the first string
380 * @param s2 the second string
381 * @return -1, if the length of s1 is less than the length of s2 or 1, if the
382 * length of s1 is greater than the length of s2 or the difference between the
383 * first two differing characters otherwise (i.e. 0 if the strings match and
384 * no characters differ)
385 */
386 int sstrcasecmp(sstr_t s1, sstr_t s2);
388 /**
389 * Creates a duplicate of the specified string.
390 *
391 * The new sstr_t will contain a copy allocated by standard
392 * <code>malloc()</code>. So developers <b>MUST</b> pass the sstr_t.ptr to
393 * <code>free()</code>.
394 *
395 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
396 * terminated.
397 *
398 * @param string the string to duplicate
399 * @return a duplicate of the string
400 * @see sstrdup_a()
401 */
402 sstr_t scstrdup(scstr_t string);
404 #define sstrdup(s) scstrdup(SCSTR(s))
406 /**
407 * Creates a duplicate of the specified string using a UcxAllocator.
408 *
409 * The new sstr_t will contain a copy allocated by the allocators
410 * ucx_allocator_malloc function. So it is implementation depended, whether the
411 * returned sstr_t.ptr pointer must be passed to the allocators
412 * ucx_allocator_free function manually.
413 *
414 * The sstr_t.ptr of the return value will <i>always</i> be <code>NULL</code>-
415 * terminated.
416 *
417 * @param allocator a valid instance of a UcxAllocator
418 * @param string the string to duplicate
419 * @return a duplicate of the string
420 * @see sstrdup()
421 */
422 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t string);
424 #define sstrdup_a(allocator, s) scstrdup_a(allocator, SCSTR(s))
426 /**
427 * Omits leading and trailing spaces.
428 *
429 * This function returns a new sstr_t containing a trimmed version of the
430 * specified string.
431 *
432 * <b>Note:</b> the new sstr_t references the same memory, thus you
433 * <b>MUST NOT</b> pass the sstr_t.ptr of the return value to
434 * <code>free()</code>. It is also highly recommended to avoid assignments like
435 * <code>mystr = sstrtrim(mystr);</code> as you lose the reference to the
436 * source string. Assignments of this type are only permitted, if the
437 * sstr_t.ptr of the source string does not need to be freed or if another
438 * reference to the source string exists.
439 *
440 * @param string the string that shall be trimmed
441 * @return a new sstr_t containing the trimmed string
442 */
443 sstr_t sstrtrim(sstr_t string);
445 /**
446 * Checks, if a string has a specific prefix.
447 * @param string the string to check
448 * @param prefix the prefix the string should have
449 * @return 1, if and only if the string has the specified prefix, 0 otherwise
450 */
451 int ucx_strprefix(scstr_t string, scstr_t prefix);
453 #define sstrprefix(string, prefix) ucx_strprefix(SCSTR(string), SCSTR(prefix))
455 /**
456 * Checks, if a string has a specific suffix.
457 * @param string the string to check
458 * @param suffix the suffix the string should have
459 * @return 1, if and only if the string has the specified suffix, 0 otherwise
460 */
461 int ucx_strsuffix(scstr_t string, scstr_t suffix);
463 #define sstrsuffix(string, prefix) ucx_strsuffix(SCSTR(string), SCSTR(prefix))
465 /**
466 * Returns a lower case version of a string.
467 *
468 * This function creates a duplicate of the input string, first. See the
469 * documentation of sstrdup() for the implications.
470 *
471 * @param string the input string
472 * @return the resulting lower case string
473 * @see sstrdup()
474 */
475 sstr_t ucx_strlower(scstr_t string);
477 #define sstrlower(string) ucx_strlower(SCSTR(string))
479 /**
480 * Returns a lower case version of a string.
481 *
482 * This function creates a duplicate of the input string, first. See the
483 * documentation of sstrdup_a() for the implications.
484 *
485 * @param allocator the allocator used for duplicating the string
486 * @param string the input string
487 * @return the resulting lower case string
488 * @see sstrdup_a()
489 */
490 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string);
492 #define sstrlower_a(allocator, string) ucx_strlower_a(allocator, SCSTR(string))
494 /**
495 * Returns a upper case version of a string.
496 *
497 * This function creates a duplicate of the input string, first. See the
498 * documentation of sstrdup() for the implications.
499 *
500 * @param string the input string
501 * @return the resulting upper case string
502 * @see sstrdup()
503 */
504 sstr_t ucx_strupper(scstr_t string);
506 #define sstrupper(string) ucx_strupper(SCSTR(string))
508 /**
509 * Returns a upper case version of a string.
510 *
511 * This function creates a duplicate of the input string, first. See the
512 * documentation of sstrdup_a() for the implications.
513 *
514 * @param allocator the allocator used for duplicating the string
515 * @param string the input string
516 * @return the resulting upper case string
517 * @see sstrdup_a()
518 */
519 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string);
521 #define sstrupper_a(allocator, string) ucx_strupper_a(allocator, string)
523 #ifdef __cplusplus
524 }
525 #endif
527 #endif /* UCX_STRING_H */