src/cx/string.h

changeset 576
ba0c4ff6698e
child 577
26447d59a5ab
equal deleted inserted replaced
575:b05935945637 576:ba0c4ff6698e
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28 /**
29 * \file string.h
30 * \brief Strings that know their length.
31 * \author Mike Becker
32 * \author Olaf Wintermann
33 * \version 3.0
34 * \copyright 2-Clause BSD License
35 */
36
37 #ifndef UCX_STRING_H
38 #define UCX_STRING_H
39
40 #include "common.h"
41 #include "allocator.h"
42
43 /**
44 * The UCX string structure.
45 */
46 struct {
47 /**
48 * A pointer to the string.
49 * \note The string is not necessarily \c NULL terminated.
50 * Always use the length.
51 */
52 char *ptr;
53 /** The length of the string */
54 size_t length;
55 } cx_mutstr_s;
56
57 /**
58 * A mutable string.
59 */
60 typedef struct cx_mutstr_s cxmutstr;
61
62 /**
63 * The UCX string structure for immutable (constant) strings.
64 */
65 struct {
66 /**
67 * A pointer to the immutable string.
68 * \note The string is not necessarily \c NULL terminated.
69 * Always use the length.
70 */
71 char const *ptr;
72 /** The length of the string */
73 size_t length;
74 } cx_string_s;
75
76 /**
77 * An immutable string.
78 */
79 typedef struct cx_string_s cxstring;
80
81 #ifdef __cplusplus
82 extern "C" {
83 #endif
84
85
86 /**
87 * Wraps a mutable string that must be zero-terminated.
88 *
89 * The length is implicitly inferred by using a call to \c strlen().
90 * As a special case, a \c NULL argument is treated like an empty string.
91 *
92 * \note the wrapped string will share the specified pointer to the string.
93 * If you do want a copy, use cx_strdup() on the return value of this function.
94 *
95 * If you need to wrap a constant string, use cx_str().
96 *
97 * @param cstring the string to wrap, must be zero-terminated (or \c NULL)
98 * @return the wrapped string
99 *
100 * @see cx_mutstrn()
101 */
102 __attribute__((__warn_unused_result__))
103 cxmutstr cx_mutstr(char *cstring);
104
105 /**
106 * Wraps a string that does not need to be zero-terminated.
107 *
108 * The argument may be \c NULL if the length is zero.
109 *
110 * \note the wrapped string will share the specified pointer to the string.
111 * If you do want a copy, use cx_strdup() on the return value of this function.
112 *
113 * If you need to wrap a constant string, use cx_strn().
114 *
115 * @param cstring the string to wrap (or \c NULL, if the length is zero)
116 * @param length the length of the string
117 * @return the wrapped string
118 *
119 * @see cx_mutstr()
120 */
121 __attribute__((__warn_unused_result__))
122 cxmutstr cx_mutstrn(
123 char *cstring,
124 size_t length
125 );
126
127 /**
128 * Wraps a string that must be zero-terminated.
129 *
130 * The length is implicitly inferred by using a call to \c strlen().
131 * As a special case, a \c NULL argument is treated like an empty string.
132 *
133 * \note the wrapped string will share the specified pointer to the string.
134 * If you do want a copy, use cx_strdup() on the return value of this function.
135 *
136 * If you need to wrap a non-constant string, use cx_mutstr().
137 *
138 * @param cstring the string to wrap, must be zero-terminated (or \c NULL)
139 * @return the wrapped string
140 *
141 * @see cx_strn()
142 */
143 __attribute__((__warn_unused_result__))
144 cxstring cx_str(char const *cstring);
145
146
147 /**
148 * Wraps a string that does not need to be zero-terminated.
149 *
150 * The argument may be \c NULL if the length is zero.
151 *
152 * \note the wrapped string will share the specified pointer to the string.
153 * If you do want a copy, use cx_strdup() on the return value of this function.
154 *
155 * If you need to wrap a non-constant string, use cx_mutstrn().
156 *
157 * @param cstring the string to wrap (or \c NULL, if the length is zero)
158 * @param length the length of the string
159 * @return the wrapped string
160 *
161 * @see cx_str()
162 */
163 __attribute__((__warn_unused_result__))
164 cxstring cx_strn(
165 char const *cstring,
166 size_t length
167 );
168
169 /**
170 * Casts a mutable string to an immutable string.
171 *
172 * \note This is not seriously a cast. Instead you get a copy
173 * of the struct with the desired pointer type. Both structs still
174 * point to the same location, though!
175 *
176 * @param str the mutable string to cast
177 * @return an immutable copy of the string pointer
178 */
179 __attribute__((__warn_unused_result__))
180 cxstring cx_strcast(cxmutstr str);
181
182 /**
183 * Passes the pointer in this string to \c free().
184 *
185 * The pointer in the struct is set to \c NULL and the length is set to zero.
186 *
187 * \note There is no implementation for cxstring, because it is unlikely that
188 * you ever have a \c char \c const* you are really supposed to free. If you
189 * encounter such situation, you should double-check your code.
190 *
191 * @param str the string to free
192 */
193 void cx_strfree(cxmutstr *str);
194
195 /**
196 * Returns the accumulated length of all specified strings.
197 *
198 * \attention if the count argument is larger than the number of the
199 * specified strings, the behavior is undefined.
200 *
201 * @param count the total number of specified strings
202 * @param ... all strings
203 * @return the accumulated length of all strings
204 */
205 __attribute__((__warn_unused_result__))
206 size_t cx_strlen(
207 size_t count,
208 ...
209 );
210
211 /**
212 * Concatenates two or more strings.
213 *
214 * The resulting string will be allocated by the specified allocator.
215 * So developers \em must pass the return value to cx_strfree() eventually.
216 *
217 * \note It is guaranteed that there is only one allocation.
218 *
219 * @param alloc the allocator to use
220 * @param count the total number of strings to concatenate
221 * @param ... all strings
222 * @return the concatenated string
223 */
224 __attribute__((__warn_unused_result__, __nonnull__))
225 cxmutstr cx_strcat_a(
226 CxAllocator *alloc,
227 size_t count,
228 ...
229 );
230
231 /**
232 * Concatenates two or more strings.
233 *
234 * The resulting string will be allocated by standard \c malloc().
235 * So developers \em must pass the return value to cx_strfree() eventually.
236 *
237 * @param count the total number of strings to concatenate
238 * @param ... all strings
239 * @return the concatenated string
240 */
241 #define cx_strcat(count, ...) \
242 cx_strcat_a(cxDefaultAllocator, count, __VA_ARGS__)
243
244 /**
245 * Returns a substring starting at the specified location.
246 *
247 * \attention the new string references the same memory area as the
248 * input string and is usually \em not zero-terminated.
249 * Use cx_strdup() to get a copy.
250 *
251 * @param string input string
252 * @param start start location of the substring
253 * @return a substring of \p string starting at \p start
254 *
255 * @see cx_strsubsl()
256 * @see cx_strsubs_m()
257 * @see cx_strsubsl_m()
258 */
259 __attribute__((__warn_unused_result__))
260 cxstring cx_strsubs(
261 cxstring string,
262 size_t start
263 );
264
265 /**
266 * Returns a substring starting at the specified location.
267 *
268 * The returned string will be limited to \p length bytes or the number
269 * of bytes available in \p string, whichever is smaller.
270 *
271 * \attention the new string references the same memory area as the
272 * input string and is usually \em not zero-terminated.
273 * Use cx_strdup() to get a copy.
274 *
275 * @param string input string
276 * @param start start location of the substring
277 * @param length the maximum length of the returned string
278 * @return a substring of \p string starting at \p start
279 *
280 * @see cx_strsubs()
281 * @see cx_strsubs_m()
282 * @see cx_strsubsl_m()
283 */
284 __attribute__((__warn_unused_result__))
285 cxstring cx_strsubsl(
286 cxstring string,
287 size_t start,
288 size_t length
289 );
290
291 /**
292 * Returns a substring starting at the specified location.
293 *
294 * \attention the new string references the same memory area as the
295 * input string and is usually \em not zero-terminated.
296 * Use cx_strdup() to get a copy.
297 *
298 * @param string input string
299 * @param start start location of the substring
300 * @return a substring of \p string starting at \p start
301 *
302 * @see cx_strsubsl_m()
303 * @see cx_strsubs()
304 * @see cx_strsubsl()
305 */
306 __attribute__((__warn_unused_result__))
307 cxmutstr cx_strsubs_m(
308 cxmutstr string,
309 size_t start
310 );
311
312 /**
313 * Returns a substring starting at the specified location.
314 *
315 * The returned string will be limited to \p length bytes or the number
316 * of bytes available in \p string, whichever is smaller.
317 *
318 * \attention the new string references the same memory area as the
319 * input string and is usually \em not zero-terminated.
320 * Use cx_strdup() to get a copy.
321 *
322 * @param string input string
323 * @param start start location of the substring
324 * @param length the maximum length of the returned string
325 * @return a substring of \p string starting at \p start
326 *
327 * @see cx_strsubs_m()
328 * @see cx_strsubs()
329 * @see cx_strsubsl()
330 */
331 __attribute__((__warn_unused_result__))
332 cxmutstr cx_strsubsl_m(
333 cxmutstr string,
334 size_t start,
335 size_t length
336 );
337
338 /**
339 * Returns a substring starting at the location of the first occurrence of the
340 * specified character.
341 *
342 * If the string does not contain the character, an empty string is returned.
343 *
344 * @param string the string where to locate the character
345 * @param chr the character to locate
346 * @return a substring starting at the first location of \p chr
347 *
348 * @see cx_strchr_m()
349 */
350 __attribute__((__warn_unused_result__))
351 cxstring cx_strchr(
352 cxstring string,
353 int chr
354 );
355
356 /**
357 * Returns a substring starting at the location of the first occurrence of the
358 * specified character.
359 *
360 * If the string does not contain the character, an empty string is returned.
361 *
362 * @param string the string where to locate the character
363 * @param chr the character to locate
364 * @return a substring starting at the first location of \p chr
365 *
366 * @see cx_strchr()
367 */
368 __attribute__((__warn_unused_result__))
369 cxmutstr cx_strchr_m(
370 cxmutstr string,
371 int chr
372 );
373
374 /**
375 * Returns a substring starting at the location of the last occurrence of the
376 * specified character.
377 *
378 * If the string does not contain the character, an empty string is returned.
379 *
380 * @param string the string where to locate the character
381 * @param chr the character to locate
382 * @return a substring starting at the last location of \p chr
383 *
384 * @see cx_strrchr_m()
385 */
386 __attribute__((__warn_unused_result__))
387 cxstring cx_strrchr(
388 cxstring string,
389 int chr
390 );
391
392 /**
393 * Returns a substring starting at the location of the last occurrence of the
394 * specified character.
395 *
396 * If the string does not contain the character, an empty string is returned.
397 *
398 * @param string the string where to locate the character
399 * @param chr the character to locate
400 * @return a substring starting at the last location of \p chr
401 *
402 * @see cx_strrchr()
403 */
404 __attribute__((__warn_unused_result__))
405 cxmutstr cx_strrchr_m(
406 cxmutstr string,
407 int chr
408 );
409
410 /**
411 * Returns a substring starting at the location of the first occurrence of the
412 * specified string.
413 *
414 * If \p haystack does not contain \p needle, an empty string is returned.
415 *
416 * If \p needle is an empty string, the complete \p haystack is
417 * returned.
418 *
419 * @param haystack the string to be scanned
420 * @param needle string containing the sequence of characters to match
421 * @return a substring starting at the first occurrence of
422 * \p needle, or an empty string, if the sequence is not
423 * contained
424 * @see cx_strstr_m()
425 */
426 __attribute__((__warn_unused_result__))
427 cxstring cx_strstr(
428 cxstring haystack,
429 cxstring needle
430 );
431
432 /**
433 * Returns a substring starting at the location of the first occurrence of the
434 * specified string.
435 *
436 * If \p haystack does not contain \p needle, an empty string is returned.
437 *
438 * If \p needle is an empty string, the complete \p haystack is
439 * returned.
440 *
441 * @param haystack the string to be scanned
442 * @param needle string containing the sequence of characters to match
443 * @return a substring starting at the first occurrence of
444 * \p needle, or an empty string, if the sequence is not
445 * contained
446 * @see cx_strstr()
447 */
448 __attribute__((__warn_unused_result__))
449 cxmutstr cx_strstr_m(
450 cxmutstr haystack,
451 cxstring needle
452 );
453
454 /**
455 * Splits a given string using a delimiter string.
456 *
457 * \note The resulting array contains strings that point to the source
458 * \p string. Use cx_strdup() to get copies.
459 *
460 * @param string the string to split
461 * @param delim the delimiter
462 * @param limit the maximum number of split items
463 * @param output a pre-allocated array of at least \p limit length
464 * @return the actual number of split items
465 */
466 __attribute__((__warn_unused_result__, __nonnull__))
467 size_t cx_strsplit(
468 cxstring string,
469 cxstring delim,
470 size_t limit,
471 cxstring *output
472 );
473
474 /**
475 * Splits a given string using a delimiter string.
476 *
477 * The array pointed to by \p output will be allocated by \p allocator.
478 *
479 * \note The resulting array contains strings that point to the source
480 * \p string. Use cx_strdup() to get copies.
481 *
482 * \attention If allocation fails, the \c NULL pointer will be written to
483 * \p output and the number returned will be zero.
484 *
485 * @param allocator the allocator to use for allocating the resulting array
486 * @param string the string to split
487 * @param delim the delimiter
488 * @param limit the maximum number of split items
489 * @param output a pointer where the address of the allocated array shall be
490 * written to
491 * @return the actual number of split items
492 */
493 __attribute__((__warn_unused_result__, __nonnull__))
494 size_t cx_strsplit_a(
495 CxAllocator *allocator,
496 cxstring string,
497 cxstring delim,
498 size_t limit,
499 cxstring **output
500 );
501
502
503 /**
504 * Splits a given string using a delimiter string.
505 *
506 * \note The resulting array contains strings that point to the source
507 * \p string. Use cx_strdup() to get copies.
508 *
509 * @param string the string to split
510 * @param delim the delimiter
511 * @param limit the maximum number of split items
512 * @param output a pre-allocated array of at least \p limit length
513 * @return the actual number of split items
514 */
515 __attribute__((__warn_unused_result__, __nonnull__))
516 size_t cx_strsplit_m(
517 cxmutstr string,
518 cxstring delim,
519 size_t limit,
520 cxmutstr *output
521 );
522
523 /**
524 * Splits a given string using a delimiter string.
525 *
526 * The array pointed to by \p output will be allocated by \p allocator.
527 *
528 * \note The resulting array contains strings that point to the source
529 * \p string. Use cx_strdup() to get copies.
530 *
531 * \attention If allocation fails, the \c NULL pointer will be written to
532 * \p output and the number returned will be zero.
533 *
534 * @param allocator the allocator to use for allocating the resulting array
535 * @param string the string to split
536 * @param delim the delimiter
537 * @param limit the maximum number of split items
538 * @param output a pointer where the address of the allocated array shall be
539 * written to
540 * @return the actual number of split items
541 */
542 __attribute__((__warn_unused_result__, __nonnull__))
543 size_t cx_strsplit_ma(
544 CxAllocator *allocator,
545 cxmutstr string,
546 cxstring delim,
547 size_t limit,
548 cxmutstr **output
549 );
550
551 /**
552 * Compares two strings.
553 *
554 * @param s1 the first string
555 * @param s2 the second string
556 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
557 * than \p s2, zero if both strings equal
558 */
559 __attribute__((__warn_unused_result__))
560 int cx_strcmp(
561 cxstring s1,
562 cxstring s2
563 );
564
565 /**
566 * Compares two strings ignoring case.
567 *
568 * @param s1 the first string
569 * @param s2 the second string
570 * @return negative if \p s1 is smaller than \p s2, positive if \p s1 is larger
571 * than \p s2, zero if both strings equal ignoring case
572 */
573 __attribute__((__warn_unused_result__))
574 int cx_strcasecmp(
575 cxstring s1,
576 cxstring s2
577 );
578
579 /**
580 * Creates a duplicate of the specified string.
581 *
582 * The new string will contain a copy allocated by standard
583 * \c malloc(). So developers \em must pass the return value to cx_strfree().
584 *
585 * \note The returned string is guaranteed to be zero-terminated and can safely
586 * be passed to other APIs.
587 *
588 * @param string the string to duplicate
589 * @return a duplicate of the string
590 * @see cx_strdup_a()
591 */
592 __attribute__((__warn_unused_result__))
593 cxmutstr cx_strdup(cxstring string);
594
595 /**
596 * Creates a duplicate of the specified string.
597 *
598 * The new string will contain a copy allocated by \p allocator.
599 *
600 * \note The returned string is guaranteed to be zero-terminated and can safely
601 * be passed to other APIs.
602 *
603 * @param allocator the allocator to use
604 * @param string the string to duplicate
605 * @return a duplicate of the string
606 * @see cx_strdup()
607 */
608 __attribute__((__warn_unused_result__, __nonnull__))
609 cxmutstr cx_strdup_a(
610 CxAllocator *allocator,
611 cxstring string
612 );
613
614 /**
615 * Omits leading and trailing spaces.
616 *
617 * \note the returned string references the same memory, thus you
618 * must \em not free the returned memory.
619 *
620 * @param string the string that shall be trimmed
621 * @return the trimmed string
622 */
623 __attribute__((__warn_unused_result__))
624 cxstring cx_strtrim(cxstring string);
625
626 /**
627 * Omits leading and trailing spaces.
628 *
629 * \note the returned string references the same memory, thus you
630 * must \em not free the returned memory.
631 *
632 * @param string the string that shall be trimmed
633 * @return the trimmed string
634 */
635 __attribute__((__warn_unused_result__))
636 cxmutstr cx_strtrim_m(cxmutstr string);
637
638 /**
639 * Checks, if a string has a specific prefix.
640 *
641 * @param string the string to check
642 * @param prefix the prefix the string should have
643 * @return \c true, if and only if the string has the specified prefix,
644 * \c false otherwise
645 */
646 __attribute__((__warn_unused_result__))
647 bool cx_strprefix(
648 cxstring string,
649 cxstring prefix
650 );
651
652 /**
653 * Checks, if a string has a specific suffix.
654 *
655 * @param string the string to check
656 * @param suffix the suffix the string should have
657 * @return \c true, if and only if the string has the specified suffix,
658 * \c false otherwise
659 */
660 __attribute__((__warn_unused_result__))
661 int cx_strsuffix(
662 cxstring string,
663 cxstring suffix
664 );
665
666 /**
667 * Checks, if a string has a specific prefix, ignoring the case.
668 *
669 * @param string the string to check
670 * @param prefix the prefix the string should have
671 * @return \c true, if and only if the string has the specified prefix,
672 * \c false otherwise
673 */
674 __attribute__((__warn_unused_result__))
675 int cx_strcaseprefix(
676 cxstring string,
677 cxstring prefix
678 );
679
680 /**
681 * Checks, if a string has a specific suffix, ignoring the case.
682 *
683 * @param string the string to check
684 * @param suffix the suffix the string should have
685 * @return \c true, if and only if the string has the specified suffix,
686 * \c false otherwise
687 */
688 __attribute__((__warn_unused_result__))
689 int cx_strcasesuffix(
690 cxstring string,
691 cxstring suffix
692 );
693
694 /**
695 * Converts the string to lower case.
696 *
697 * The change is made in-place. If you want a copy, use cx_strdup(), first.
698 *
699 * @param string the string to modify
700 * @see cx_strdup()
701 */
702 void cx_strlower(cxmutstr string);
703
704 /**
705 * Converts the string to upper case.
706 *
707 * The change is made in-place. If you want a copy, use cx_strdup(), first.
708 *
709 * @param string the string to modify
710 * @see cx_strdup()
711 */
712 void cx_strupper(cxmutstr string);
713
714 /**
715 * Replaces a pattern in a string with another string.
716 *
717 * The pattern is taken literally and is no regular expression.
718 * Replaces at most \p replmax occurrences.
719 *
720 * The returned string will be allocated by \c malloc() and \em must be passed
721 * to cx_strfree() eventually.
722 *
723 * If allocation fails, or the input string is empty,
724 * the returned string will point to \c NULL.
725 *
726 * @param str the string where replacements should be applied
727 * @param pattern the pattern to search for
728 * @param replacement the replacement string
729 * @param replmax maximum number of replacements
730 * @return the resulting string after applying the replacements
731 */
732 __attribute__((__warn_unused_result__))
733 cxmutstr cx_strreplace(
734 cxstring str,
735 cxstring pattern,
736 cxstring replacement,
737 size_t replmax
738 );
739
740 /**
741 * Replaces a pattern in a string with another string.
742 *
743 * The pattern is taken literally and is no regular expression.
744 * Replaces at most \p replmax occurrences.
745 *
746 * The returned string will be allocated by \p allocator.
747 *
748 * If allocation fails, or the input string is empty,
749 * the returned string will point to \c NULL.
750 *
751 * @param allocator the allocator to use
752 * @param str the string where replacements should be applied
753 * @param pattern the pattern to search for
754 * @param replacement the replacement string
755 * @param replmax maximum number of replacements
756 * @return the resulting string after applying the replacements
757 */
758 __attribute__((__warn_unused_result__, __nonnull__))
759 cxmutstr cx_strreplace_a(
760 CxAllocator *allocator,
761 cxstring str,
762 cxstring pattern,
763 cxstring replacement,
764 size_t replmax
765 );
766
767 #ifdef __cplusplus
768 } // extern "C"
769 #endif
770
771 #endif //UCX_STRING_H

mercurial