Fri, 12 Apr 2024 21:48:12 +0200
improves interface of cx_sprintf() variants
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "cx/string.h"
30 #include "cx/utils.h"
32 #include <string.h>
33 #include <stdarg.h>
34 #include <ctype.h>
36 #ifndef _WIN32
38 #include <strings.h> // for strncasecmp()
40 #endif // _WIN32
42 cxmutstr cx_mutstr(char *cstring) {
43 return (cxmutstr) {cstring, strlen(cstring)};
44 }
46 cxmutstr cx_mutstrn(
47 char *cstring,
48 size_t length
49 ) {
50 return (cxmutstr) {cstring, length};
51 }
53 cxstring cx_str(const char *cstring) {
54 return (cxstring) {cstring, strlen(cstring)};
55 }
57 cxstring cx_strn(
58 const char *cstring,
59 size_t length
60 ) {
61 return (cxstring) {cstring, length};
62 }
64 cxstring cx_strcast(cxmutstr str) {
65 return (cxstring) {str.ptr, str.length};
66 }
68 void cx_strfree(cxmutstr *str) {
69 free(str->ptr);
70 str->ptr = NULL;
71 str->length = 0;
72 }
74 void cx_strfree_a(
75 CxAllocator const *alloc,
76 cxmutstr *str
77 ) {
78 cxFree(alloc, str->ptr);
79 str->ptr = NULL;
80 str->length = 0;
81 }
83 size_t cx_strlen(
84 size_t count,
85 ...
86 ) {
87 if (count == 0) return 0;
89 va_list ap;
90 va_start(ap, count);
91 size_t size = 0;
92 cx_for_n(i, count) {
93 cxstring str = va_arg(ap, cxstring);
94 size += str.length;
95 }
96 va_end(ap);
98 return size;
99 }
101 cxmutstr cx_strcat_ma(
102 CxAllocator const *alloc,
103 cxmutstr str,
104 size_t count,
105 ...
106 ) {
107 if (count == 0) return str;
109 cxstring *strings = calloc(count, sizeof(cxstring));
110 if (!strings) abort();
112 va_list ap;
113 va_start(ap, count);
115 // get all args and overall length
116 size_t slen = str.length;
117 cx_for_n(i, count) {
118 cxstring s = va_arg (ap, cxstring);
119 strings[i] = s;
120 slen += s.length;
121 }
122 va_end(ap);
124 // reallocate or create new string
125 if (str.ptr == NULL) {
126 str.ptr = cxMalloc(alloc, slen + 1);
127 } else {
128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
129 }
130 if (str.ptr == NULL) abort();
132 // concatenate strings
133 size_t pos = str.length;
134 str.length = slen;
135 cx_for_n(i, count) {
136 cxstring s = strings[i];
137 memcpy(str.ptr + pos, s.ptr, s.length);
138 pos += s.length;
139 }
141 // terminate string
142 str.ptr[str.length] = '\0';
144 // free temporary array
145 free(strings);
147 return str;
148 }
150 cxstring cx_strsubs(
151 cxstring string,
152 size_t start
153 ) {
154 return cx_strsubsl(string, start, string.length - start);
155 }
157 cxmutstr cx_strsubs_m(
158 cxmutstr string,
159 size_t start
160 ) {
161 return cx_strsubsl_m(string, start, string.length - start);
162 }
164 cxstring cx_strsubsl(
165 cxstring string,
166 size_t start,
167 size_t length
168 ) {
169 if (start > string.length) {
170 return (cxstring) {NULL, 0};
171 }
173 size_t rem_len = string.length - start;
174 if (length > rem_len) {
175 length = rem_len;
176 }
178 return (cxstring) {string.ptr + start, length};
179 }
181 cxmutstr cx_strsubsl_m(
182 cxmutstr string,
183 size_t start,
184 size_t length
185 ) {
186 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
187 return (cxmutstr) {(char *) result.ptr, result.length};
188 }
190 cxstring cx_strchr(
191 cxstring string,
192 int chr
193 ) {
194 chr = 0xFF & chr;
195 // TODO: improve by comparing multiple bytes at once
196 cx_for_n(i, string.length) {
197 if (string.ptr[i] == chr) {
198 return cx_strsubs(string, i);
199 }
200 }
201 return (cxstring) {NULL, 0};
202 }
204 cxmutstr cx_strchr_m(
205 cxmutstr string,
206 int chr
207 ) {
208 cxstring result = cx_strchr(cx_strcast(string), chr);
209 return (cxmutstr) {(char *) result.ptr, result.length};
210 }
212 cxstring cx_strrchr(
213 cxstring string,
214 int chr
215 ) {
216 chr = 0xFF & chr;
217 size_t i = string.length;
218 while (i > 0) {
219 i--;
220 // TODO: improve by comparing multiple bytes at once
221 if (string.ptr[i] == chr) {
222 return cx_strsubs(string, i);
223 }
224 }
225 return (cxstring) {NULL, 0};
226 }
228 cxmutstr cx_strrchr_m(
229 cxmutstr string,
230 int chr
231 ) {
232 cxstring result = cx_strrchr(cx_strcast(string), chr);
233 return (cxmutstr) {(char *) result.ptr, result.length};
234 }
236 #ifndef CX_STRSTR_SBO_SIZE
237 #define CX_STRSTR_SBO_SIZE 512
238 #endif
239 unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
241 cxstring cx_strstr(
242 cxstring haystack,
243 cxstring needle
244 ) {
245 if (needle.length == 0) {
246 return haystack;
247 }
249 // optimize for single-char needles
250 if (needle.length == 1) {
251 return cx_strchr(haystack, *needle.ptr);
252 }
254 /*
255 * IMPORTANT:
256 * Our prefix table contains the prefix length PLUS ONE
257 * this is our decision, because we want to use the full range of size_t.
258 * The original algorithm needs a (-1) at one single place,
259 * and we want to avoid that.
260 */
262 // local prefix table
263 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
265 // check needle length and use appropriate prefix table
266 // if the pattern exceeds static prefix table, allocate on the heap
267 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
268 register size_t *ptable = useheap ? calloc(needle.length + 1,
269 sizeof(size_t)) : s_prefix_table;
271 // keep counter in registers
272 register size_t i, j;
274 // fill prefix table
275 i = 0;
276 j = 0;
277 ptable[i] = j;
278 while (i < needle.length) {
279 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
280 j = ptable[j - 1];
281 }
282 i++;
283 j++;
284 ptable[i] = j;
285 }
287 // search
288 cxstring result = {NULL, 0};
289 i = 0;
290 j = 1;
291 while (i < haystack.length) {
292 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
293 j = ptable[j - 1];
294 }
295 i++;
296 j++;
297 if (j - 1 == needle.length) {
298 size_t start = i - needle.length;
299 result.ptr = haystack.ptr + start;
300 result.length = haystack.length - start;
301 break;
302 }
303 }
305 // if prefix table was allocated on the heap, free it
306 if (ptable != s_prefix_table) {
307 free(ptable);
308 }
310 return result;
311 }
313 cxmutstr cx_strstr_m(
314 cxmutstr haystack,
315 cxstring needle
316 ) {
317 cxstring result = cx_strstr(cx_strcast(haystack), needle);
318 return (cxmutstr) {(char *) result.ptr, result.length};
319 }
321 size_t cx_strsplit(
322 cxstring string,
323 cxstring delim,
324 size_t limit,
325 cxstring *output
326 ) {
327 // special case: output limit is zero
328 if (limit == 0) return 0;
330 // special case: delimiter is empty
331 if (delim.length == 0) {
332 output[0] = string;
333 return 1;
334 }
336 // special cases: delimiter is at least as large as the string
337 if (delim.length >= string.length) {
338 // exact match
339 if (cx_strcmp(string, delim) == 0) {
340 output[0] = cx_strn(string.ptr, 0);
341 output[1] = cx_strn(string.ptr + string.length, 0);
342 return 2;
343 } else {
344 // no match possible
345 output[0] = string;
346 return 1;
347 }
348 }
350 size_t n = 0;
351 cxstring curpos = string;
352 while (1) {
353 ++n;
354 cxstring match = cx_strstr(curpos, delim);
355 if (match.length > 0) {
356 // is the limit reached?
357 if (n < limit) {
358 // copy the current string to the array
359 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
360 output[n - 1] = item;
361 size_t processed = item.length + delim.length;
362 curpos.ptr += processed;
363 curpos.length -= processed;
364 } else {
365 // limit reached, copy the _full_ remaining string
366 output[n - 1] = curpos;
367 break;
368 }
369 } else {
370 // no more matches, copy last string
371 output[n - 1] = curpos;
372 break;
373 }
374 }
376 return n;
377 }
379 size_t cx_strsplit_a(
380 CxAllocator const *allocator,
381 cxstring string,
382 cxstring delim,
383 size_t limit,
384 cxstring **output
385 ) {
386 // find out how many splits we're going to make and allocate memory
387 size_t n = 0;
388 cxstring curpos = string;
389 while (1) {
390 ++n;
391 cxstring match = cx_strstr(curpos, delim);
392 if (match.length > 0) {
393 // is the limit reached?
394 if (n < limit) {
395 size_t processed = match.ptr - curpos.ptr + delim.length;
396 curpos.ptr += processed;
397 curpos.length -= processed;
398 } else {
399 // limit reached
400 break;
401 }
402 } else {
403 // no more matches
404 break;
405 }
406 }
407 *output = cxCalloc(allocator, n, sizeof(cxstring));
408 return cx_strsplit(string, delim, n, *output);
409 }
411 size_t cx_strsplit_m(
412 cxmutstr string,
413 cxstring delim,
414 size_t limit,
415 cxmutstr *output
416 ) {
417 return cx_strsplit(cx_strcast(string),
418 delim, limit, (cxstring *) output);
419 }
421 size_t cx_strsplit_ma(
422 CxAllocator const *allocator,
423 cxmutstr string,
424 cxstring delim,
425 size_t limit,
426 cxmutstr **output
427 ) {
428 return cx_strsplit_a(allocator, cx_strcast(string),
429 delim, limit, (cxstring **) output);
430 }
432 int cx_strcmp(
433 cxstring s1,
434 cxstring s2
435 ) {
436 if (s1.length == s2.length) {
437 return memcmp(s1.ptr, s2.ptr, s1.length);
438 } else if (s1.length > s2.length) {
439 return 1;
440 } else {
441 return -1;
442 }
443 }
445 int cx_strcasecmp(
446 cxstring s1,
447 cxstring s2
448 ) {
449 if (s1.length == s2.length) {
450 #ifdef _WIN32
451 return _strnicmp(s1.ptr, s2.ptr, s1.length);
452 #else
453 return strncasecmp(s1.ptr, s2.ptr, s1.length);
454 #endif
455 } else if (s1.length > s2.length) {
456 return 1;
457 } else {
458 return -1;
459 }
460 }
462 int cx_strcmp_p(
463 void const *s1,
464 void const *s2
465 ) {
466 cxstring const *left = s1;
467 cxstring const *right = s2;
468 return cx_strcmp(*left, *right);
469 }
471 int cx_strcasecmp_p(
472 void const *s1,
473 void const *s2
474 ) {
475 cxstring const *left = s1;
476 cxstring const *right = s2;
477 return cx_strcasecmp(*left, *right);
478 }
480 cxmutstr cx_strdup_a(
481 CxAllocator const *allocator,
482 cxstring string
483 ) {
484 cxmutstr result = {
485 cxMalloc(allocator, string.length + 1),
486 string.length
487 };
488 if (result.ptr == NULL) {
489 result.length = 0;
490 return result;
491 }
492 memcpy(result.ptr, string.ptr, string.length);
493 result.ptr[string.length] = '\0';
494 return result;
495 }
497 cxstring cx_strtrim(cxstring string) {
498 cxstring result = string;
499 // TODO: optimize by comparing multiple bytes at once
500 while (result.length > 0 && isspace(*result.ptr)) {
501 result.ptr++;
502 result.length--;
503 }
504 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
505 result.length--;
506 }
507 return result;
508 }
510 cxmutstr cx_strtrim_m(cxmutstr string) {
511 cxstring result = cx_strtrim(cx_strcast(string));
512 return (cxmutstr) {(char *) result.ptr, result.length};
513 }
515 bool cx_strprefix(
516 cxstring string,
517 cxstring prefix
518 ) {
519 if (string.length < prefix.length) return false;
520 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
521 }
523 bool cx_strsuffix(
524 cxstring string,
525 cxstring suffix
526 ) {
527 if (string.length < suffix.length) return false;
528 return memcmp(string.ptr + string.length - suffix.length,
529 suffix.ptr, suffix.length) == 0;
530 }
532 bool cx_strcaseprefix(
533 cxstring string,
534 cxstring prefix
535 ) {
536 if (string.length < prefix.length) return false;
537 #ifdef _WIN32
538 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
539 #else
540 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
541 #endif
542 }
544 bool cx_strcasesuffix(
545 cxstring string,
546 cxstring suffix
547 ) {
548 if (string.length < suffix.length) return false;
549 #ifdef _WIN32
550 return _strnicmp(string.ptr+string.length-suffix.length,
551 suffix.ptr, suffix.length) == 0;
552 #else
553 return strncasecmp(string.ptr + string.length - suffix.length,
554 suffix.ptr, suffix.length) == 0;
555 #endif
556 }
558 void cx_strlower(cxmutstr string) {
559 cx_for_n(i, string.length) {
560 string.ptr[i] = (char) tolower(string.ptr[i]);
561 }
562 }
564 void cx_strupper(cxmutstr string) {
565 cx_for_n(i, string.length) {
566 string.ptr[i] = (char) toupper(string.ptr[i]);
567 }
568 }
570 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
571 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
572 #endif
574 struct cx_strreplace_ibuf {
575 size_t *buf;
576 struct cx_strreplace_ibuf *next;
577 unsigned int len;
578 };
580 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
581 while (buf) {
582 struct cx_strreplace_ibuf *next = buf->next;
583 free(buf->buf);
584 free(buf);
585 buf = next;
586 }
587 }
589 cxmutstr cx_strreplacen_a(
590 CxAllocator const *allocator,
591 cxstring str,
592 cxstring pattern,
593 cxstring replacement,
594 size_t replmax
595 ) {
597 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
598 return cx_strdup_a(allocator, str);
600 // Compute expected buffer length
601 size_t ibufmax = str.length / pattern.length;
602 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
603 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
604 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
605 }
607 // Allocate first index buffer
608 struct cx_strreplace_ibuf *firstbuf, *curbuf;
609 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
610 if (!firstbuf) return cx_mutstrn(NULL, 0);
611 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
612 if (!firstbuf->buf) {
613 free(firstbuf);
614 return cx_mutstrn(NULL, 0);
615 }
617 // Search occurrences
618 cxstring searchstr = str;
619 size_t found = 0;
620 do {
621 cxstring match = cx_strstr(searchstr, pattern);
622 if (match.length > 0) {
623 // Allocate next buffer in chain, if required
624 if (curbuf->len == ibuflen) {
625 struct cx_strreplace_ibuf *nextbuf =
626 calloc(1, sizeof(struct cx_strreplace_ibuf));
627 if (!nextbuf) {
628 cx_strrepl_free_ibuf(firstbuf);
629 return cx_mutstrn(NULL, 0);
630 }
631 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
632 if (!nextbuf->buf) {
633 free(nextbuf);
634 cx_strrepl_free_ibuf(firstbuf);
635 return cx_mutstrn(NULL, 0);
636 }
637 curbuf->next = nextbuf;
638 curbuf = nextbuf;
639 }
641 // Record match index
642 found++;
643 size_t idx = match.ptr - str.ptr;
644 curbuf->buf[curbuf->len++] = idx;
645 searchstr.ptr = match.ptr + pattern.length;
646 searchstr.length = str.length - idx - pattern.length;
647 } else {
648 break;
649 }
650 } while (searchstr.length > 0 && found < replmax);
652 // Allocate result string
653 cxmutstr result;
654 {
655 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
656 size_t rcount = 0;
657 curbuf = firstbuf;
658 do {
659 rcount += curbuf->len;
660 curbuf = curbuf->next;
661 } while (curbuf);
662 result.length = str.length + rcount * adjlen;
663 result.ptr = cxMalloc(allocator, result.length + 1);
664 if (!result.ptr) {
665 cx_strrepl_free_ibuf(firstbuf);
666 return cx_mutstrn(NULL, 0);
667 }
668 }
670 // Build result string
671 curbuf = firstbuf;
672 size_t srcidx = 0;
673 char *destptr = result.ptr;
674 do {
675 for (size_t i = 0; i < curbuf->len; i++) {
676 // Copy source part up to next match
677 size_t idx = curbuf->buf[i];
678 size_t srclen = idx - srcidx;
679 if (srclen > 0) {
680 memcpy(destptr, str.ptr + srcidx, srclen);
681 destptr += srclen;
682 srcidx += srclen;
683 }
685 // Copy the replacement and skip the source pattern
686 srcidx += pattern.length;
687 memcpy(destptr, replacement.ptr, replacement.length);
688 destptr += replacement.length;
689 }
690 curbuf = curbuf->next;
691 } while (curbuf);
692 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
694 // Result is guaranteed to be zero-terminated
695 result.ptr[result.length] = '\0';
697 // Free index buffer
698 cx_strrepl_free_ibuf(firstbuf);
700 return result;
701 }
703 CxStrtokCtx cx_strtok(
704 cxstring str,
705 cxstring delim,
706 size_t limit
707 ) {
708 CxStrtokCtx ctx;
709 ctx.str = str;
710 ctx.delim = delim;
711 ctx.limit = limit;
712 ctx.pos = 0;
713 ctx.next_pos = 0;
714 ctx.delim_pos = 0;
715 ctx.found = 0;
716 ctx.delim_more = NULL;
717 ctx.delim_more_count = 0;
718 return ctx;
719 }
721 CxStrtokCtx cx_strtok_m(
722 cxmutstr str,
723 cxstring delim,
724 size_t limit
725 ) {
726 return cx_strtok(cx_strcast(str), delim, limit);
727 }
729 bool cx_strtok_next(
730 CxStrtokCtx *ctx,
731 cxstring *token
732 ) {
733 // abortion criteria
734 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
735 return false;
736 }
738 // determine the search start
739 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
741 // search the next delimiter
742 cxstring delim = cx_strstr(haystack, ctx->delim);
744 // if found, make delim capture exactly the delimiter
745 if (delim.length > 0) {
746 delim.length = ctx->delim.length;
747 }
749 // if more delimiters are specified, check them now
750 if (ctx->delim_more_count > 0) {
751 cx_for_n(i, ctx->delim_more_count) {
752 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
753 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
754 delim.ptr = d.ptr;
755 delim.length = ctx->delim_more[i].length;
756 }
757 }
758 }
760 // store the token information and adjust the context
761 ctx->found++;
762 ctx->pos = ctx->next_pos;
763 token->ptr = &ctx->str.ptr[ctx->pos];
764 ctx->delim_pos = delim.length == 0 ?
765 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
766 token->length = ctx->delim_pos - ctx->pos;
767 ctx->next_pos = ctx->delim_pos + delim.length;
769 return true;
770 }
772 bool cx_strtok_next_m(
773 CxStrtokCtx *ctx,
774 cxmutstr *token
775 ) {
776 return cx_strtok_next(ctx, (cxstring *) token);
777 }
779 void cx_strtok_delim(
780 CxStrtokCtx *ctx,
781 cxstring const *delim,
782 size_t count
783 ) {
784 ctx->delim_more = delim;
785 ctx->delim_more_count = count;
786 }