Sun, 23 Oct 2022 13:32:16 +0200
enclose macro arguments in parenthesis
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "cx/string.h"
30 #include "cx/utils.h"
32 #include <string.h>
33 #include <stdarg.h>
34 #include <stdint.h>
35 #include <ctype.h>
37 #ifndef _WIN32
39 #include <strings.h> /* for strncasecmp() */
41 #endif /* _WIN32 */
43 cxmutstr cx_mutstr(char *cstring) {
44 return (cxmutstr) {cstring, strlen(cstring)};
45 }
47 cxmutstr cx_mutstrn(
48 char *cstring,
49 size_t length
50 ) {
51 return (cxmutstr) {cstring, length};
52 }
54 cxstring cx_str(const char *cstring) {
55 return (cxstring) {cstring, strlen(cstring)};
56 }
58 cxstring cx_strn(
59 const char *cstring,
60 size_t length
61 ) {
62 return (cxstring) {cstring, length};
63 }
65 cxstring cx_strcast(cxmutstr str) {
66 return (cxstring) {str.ptr, str.length};
67 }
69 void cx_strfree(cxmutstr *str) {
70 free(str->ptr);
71 str->ptr = NULL;
72 str->length = 0;
73 }
75 void cx_strfree_a(
76 CxAllocator *alloc,
77 cxmutstr *str
78 ) {
79 cxFree(alloc, str->ptr);
80 str->ptr = NULL;
81 str->length = 0;
82 }
84 size_t cx_strlen(
85 size_t count,
86 ...
87 ) {
88 if (count == 0) return 0;
90 va_list ap;
91 va_start(ap, count);
92 size_t size = 0;
93 cx_for_n(i, count) {
94 cxstring str = va_arg(ap, cxstring);
95 size += str.length;
96 }
97 va_end(ap);
99 return size;
100 }
102 cxmutstr cx_strcat_a(
103 CxAllocator *alloc,
104 size_t count,
105 ...
106 ) {
107 cxstring *strings = calloc(count, sizeof(cxstring));
108 if (!strings) abort();
110 va_list ap;
111 va_start(ap, count);
113 // get all args and overall length
114 size_t slen = 0;
115 cx_for_n(i, count) {
116 cxstring s = va_arg (ap, cxstring);
117 strings[i] = s;
118 slen += s.length;
119 }
121 // create new string
122 cxmutstr result;
123 result.ptr = cxMalloc(alloc, slen + 1);
124 result.length = slen;
125 if (result.ptr == NULL) abort();
127 // concatenate strings
128 size_t pos = 0;
129 cx_for_n(i, count) {
130 cxstring s = strings[i];
131 memcpy(result.ptr + pos, s.ptr, s.length);
132 pos += s.length;
133 }
135 // terminate string
136 result.ptr[result.length] = '\0';
138 // free temporary array
139 free(strings);
141 return result;
142 }
144 cxstring cx_strsubs(
145 cxstring string,
146 size_t start
147 ) {
148 return cx_strsubsl(string, start, string.length - start);
149 }
151 cxmutstr cx_strsubs_m(
152 cxmutstr string,
153 size_t start
154 ) {
155 return cx_strsubsl_m(string, start, string.length - start);
156 }
158 cxstring cx_strsubsl(
159 cxstring string,
160 size_t start,
161 size_t length
162 ) {
163 if (start > string.length) {
164 return (cxstring) {NULL, 0};
165 }
167 size_t rem_len = string.length - start;
168 if (length > rem_len) {
169 length = rem_len;
170 }
172 return (cxstring) {string.ptr + start, length};
173 }
175 cxmutstr cx_strsubsl_m(
176 cxmutstr string,
177 size_t start,
178 size_t length
179 ) {
180 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
181 return (cxmutstr) {(char *) result.ptr, result.length};
182 }
184 cxstring cx_strchr(
185 cxstring string,
186 int chr
187 ) {
188 chr = 0xFF & chr;
189 // TODO: improve by comparing multiple bytes at once
190 cx_for_n(i, string.length) {
191 if (string.ptr[i] == chr) {
192 return cx_strsubs(string, i);
193 }
194 }
195 return (cxstring) {NULL, 0};
196 }
198 cxmutstr cx_strchr_m(
199 cxmutstr string,
200 int chr
201 ) {
202 cxstring result = cx_strchr(cx_strcast(string), chr);
203 return (cxmutstr) {(char *) result.ptr, result.length};
204 }
206 cxstring cx_strrchr(
207 cxstring string,
208 int chr
209 ) {
210 chr = 0xFF & chr;
211 size_t i = string.length;
212 while (i > 0) {
213 i--;
214 // TODO: improve by comparing multiple bytes at once
215 if (string.ptr[i] == chr) {
216 return cx_strsubs(string, i);
217 }
218 }
219 return (cxstring) {NULL, 0};
220 }
222 cxmutstr cx_strrchr_m(
223 cxmutstr string,
224 int chr
225 ) {
226 cxstring result = cx_strrchr(cx_strcast(string), chr);
227 return (cxmutstr) {(char *) result.ptr, result.length};
228 }
230 #define STRSTR_SBO_BUFLEN 512
232 cxstring cx_strstr(
233 cxstring haystack,
234 cxstring needle
235 ) {
236 if (needle.length == 0) {
237 return haystack;
238 }
240 /* optimize for single-char needles */
241 if (needle.length == 1) {
242 return cx_strchr(haystack, *needle.ptr);
243 }
245 /*
246 * IMPORTANT:
247 * Our prefix table contains the prefix length PLUS ONE
248 * this is our decision, because we want to use the full range of size_t.
249 * The original algorithm needs a (-1) at one single place,
250 * and we want to avoid that.
251 */
253 /* local prefix table */
254 size_t s_prefix_table[STRSTR_SBO_BUFLEN];
256 /* check needle length and use appropriate prefix table */
257 /* if the pattern exceeds static prefix table, allocate on the heap */
258 bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
259 register size_t *ptable = useheap ? calloc(needle.length + 1,
260 sizeof(size_t)) : s_prefix_table;
262 /* keep counter in registers */
263 register size_t i, j;
265 /* fill prefix table */
266 i = 0;
267 j = 0;
268 ptable[i] = j;
269 while (i < needle.length) {
270 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
271 j = ptable[j - 1];
272 }
273 i++;
274 j++;
275 ptable[i] = j;
276 }
278 /* search */
279 cxstring result = {NULL, 0};
280 i = 0;
281 j = 1;
282 while (i < haystack.length) {
283 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
284 j = ptable[j - 1];
285 }
286 i++;
287 j++;
288 if (j - 1 == needle.length) {
289 size_t start = i - needle.length;
290 result.ptr = haystack.ptr + start;
291 result.length = haystack.length - start;
292 break;
293 }
294 }
296 /* if prefix table was allocated on the heap, free it */
297 if (ptable != s_prefix_table) {
298 free(ptable);
299 }
301 return result;
302 }
304 cxmutstr cx_strstr_m(
305 cxmutstr haystack,
306 cxstring needle
307 ) {
308 cxstring result = cx_strstr(cx_strcast(haystack), needle);
309 return (cxmutstr) {(char *) result.ptr, result.length};
310 }
312 size_t cx_strsplit(
313 cxstring string,
314 cxstring delim,
315 size_t limit,
316 cxstring *output
317 ) {
318 /* special case: output limit is zero */
319 if (limit == 0) return 0;
321 /* special case: delimiter is empty */
322 if (delim.length == 0) {
323 output[0] = string;
324 return 1;
325 }
327 /* special cases: delimiter is at least as large as the string */
328 if (delim.length >= string.length) {
329 /* exact match */
330 if (cx_strcmp(string, delim) == 0) {
331 output[0] = cx_strn(string.ptr, 0);
332 output[1] = cx_strn(string.ptr + string.length, 0);
333 return 2;
334 } else /* no match possible */ {
335 output[0] = string;
336 return 1;
337 }
338 }
340 size_t n = 0;
341 cxstring curpos = string;
342 while (1) {
343 ++n;
344 cxstring match = cx_strstr(curpos, delim);
345 if (match.length > 0) {
346 /* is the limit reached? */
347 if (n < limit) {
348 /* copy the current string to the array */
349 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
350 output[n - 1] = item;
351 size_t processed = item.length + delim.length;
352 curpos.ptr += processed;
353 curpos.length -= processed;
354 } else {
355 /* limit reached, copy the _full_ remaining string */
356 output[n - 1] = curpos;
357 break;
358 }
359 } else {
360 /* no more matches, copy last string */
361 output[n - 1] = curpos;
362 break;
363 }
364 }
366 return n;
367 }
369 size_t cx_strsplit_a(
370 CxAllocator *allocator,
371 cxstring string,
372 cxstring delim,
373 size_t limit,
374 cxstring **output
375 ) {
376 /* find out how many splits we're going to make and allocate memory */
377 size_t n = 0;
378 cxstring curpos = string;
379 while (1) {
380 ++n;
381 cxstring match = cx_strstr(curpos, delim);
382 if (match.length > 0) {
383 /* is the limit reached? */
384 if (n < limit) {
385 size_t processed = match.ptr - curpos.ptr + delim.length;
386 curpos.ptr += processed;
387 curpos.length -= processed;
388 } else {
389 /* limit reached */
390 break;
391 }
392 } else {
393 /* no more matches */
394 break;
395 }
396 }
397 *output = cxCalloc(allocator, n, sizeof(cxstring));
398 return cx_strsplit(string, delim, n, *output);
399 }
401 size_t cx_strsplit_m(
402 cxmutstr string,
403 cxstring delim,
404 size_t limit,
405 cxmutstr *output
406 ) {
407 return cx_strsplit(cx_strcast(string),
408 delim, limit, (cxstring *) output);
409 }
411 size_t cx_strsplit_ma(
412 CxAllocator *allocator,
413 cxmutstr string,
414 cxstring delim,
415 size_t limit,
416 cxmutstr **output
417 ) {
418 return cx_strsplit_a(allocator, cx_strcast(string),
419 delim, limit, (cxstring **) output);
420 }
422 int cx_strcmp(
423 cxstring s1,
424 cxstring s2
425 ) {
426 if (s1.length == s2.length) {
427 return memcmp(s1.ptr, s2.ptr, s1.length);
428 } else if (s1.length > s2.length) {
429 return 1;
430 } else {
431 return -1;
432 }
433 }
435 int cx_strcasecmp(
436 cxstring s1,
437 cxstring s2
438 ) {
439 if (s1.length == s2.length) {
440 #ifdef _WIN32
441 return _strnicmp(s1.ptr, s2.ptr, s1.length);
442 #else
443 return strncasecmp(s1.ptr, s2.ptr, s1.length);
444 #endif
445 } else if (s1.length > s2.length) {
446 return 1;
447 } else {
448 return -1;
449 }
450 }
452 cxmutstr cx_strdup_a(
453 CxAllocator *allocator,
454 cxstring string
455 ) {
456 cxmutstr result = {
457 cxMalloc(allocator, string.length + 1),
458 string.length
459 };
460 if (result.ptr == NULL) {
461 result.length = 0;
462 return result;
463 }
464 memcpy(result.ptr, string.ptr, string.length);
465 result.ptr[string.length] = '\0';
466 return result;
467 }
469 cxstring cx_strtrim(cxstring string) {
470 cxstring result = string;
471 // TODO: optimize by comparing multiple bytes at once
472 while (result.length > 0 && isspace(*result.ptr)) {
473 result.ptr++;
474 result.length--;
475 }
476 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
477 result.length--;
478 }
479 return result;
480 }
482 cxmutstr cx_strtrim_m(cxmutstr string) {
483 cxstring result = cx_strtrim(cx_strcast(string));
484 return (cxmutstr) {(char *) result.ptr, result.length};
485 }
487 bool cx_strprefix(
488 cxstring string,
489 cxstring prefix
490 ) {
491 if (string.length < prefix.length) return false;
492 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
493 }
495 bool cx_strsuffix(
496 cxstring string,
497 cxstring suffix
498 ) {
499 if (string.length < suffix.length) return false;
500 return memcmp(string.ptr + string.length - suffix.length,
501 suffix.ptr, suffix.length) == 0;
502 }
504 bool cx_strcaseprefix(
505 cxstring string,
506 cxstring prefix
507 ) {
508 if (string.length < prefix.length) return false;
509 #ifdef _WIN32
510 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
511 #else
512 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
513 #endif
514 }
516 bool cx_strcasesuffix(
517 cxstring string,
518 cxstring suffix
519 ) {
520 if (string.length < suffix.length) return false;
521 #ifdef _WIN32
522 return _strnicmp(string.ptr+string.length-suffix.length,
523 suffix.ptr, suffix.length) == 0;
524 #else
525 return strncasecmp(string.ptr + string.length - suffix.length,
526 suffix.ptr, suffix.length) == 0;
527 #endif
528 }
530 void cx_strlower(cxmutstr string) {
531 cx_for_n(i, string.length) {
532 string.ptr[i] = tolower(string.ptr[i]);
533 }
534 }
536 void cx_strupper(cxmutstr string) {
537 cx_for_n(i, string.length) {
538 string.ptr[i] = toupper(string.ptr[i]);
539 }
540 }
542 #define REPLACE_INDEX_BUFFER_MAX 100
544 struct cx_strreplace_ibuf {
545 size_t *buf;
546 struct cx_strreplace_ibuf *next;
547 unsigned int len;
548 };
550 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
551 while (buf) {
552 struct cx_strreplace_ibuf *next = buf->next;
553 free(buf->buf);
554 free(buf);
555 buf = next;
556 }
557 }
559 cxmutstr cx_strreplacen_a(
560 CxAllocator *allocator,
561 cxstring str,
562 cxstring pattern,
563 cxstring replacement,
564 size_t replmax
565 ) {
567 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
568 return cx_strdup_a(allocator, str);
570 /* Compute expected buffer length */
571 size_t ibufmax = str.length / pattern.length;
572 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
573 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
574 ibuflen = REPLACE_INDEX_BUFFER_MAX;
575 }
577 /* Allocate first index buffer */
578 struct cx_strreplace_ibuf *firstbuf, *curbuf;
579 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
580 if (!firstbuf) return cx_mutstrn(NULL, 0);
581 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
582 if (!firstbuf->buf) {
583 free(firstbuf);
584 return cx_mutstrn(NULL, 0);
585 }
587 /* Search occurrences */
588 cxstring searchstr = str;
589 size_t found = 0;
590 do {
591 cxstring match = cx_strstr(searchstr, pattern);
592 if (match.length > 0) {
593 /* Allocate next buffer in chain, if required */
594 if (curbuf->len == ibuflen) {
595 struct cx_strreplace_ibuf *nextbuf =
596 calloc(1, sizeof(struct cx_strreplace_ibuf));
597 if (!nextbuf) {
598 cx_strrepl_free_ibuf(firstbuf);
599 return cx_mutstrn(NULL, 0);
600 }
601 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
602 if (!nextbuf->buf) {
603 free(nextbuf);
604 cx_strrepl_free_ibuf(firstbuf);
605 return cx_mutstrn(NULL, 0);
606 }
607 curbuf->next = nextbuf;
608 curbuf = nextbuf;
609 }
611 /* Record match index */
612 found++;
613 size_t idx = match.ptr - str.ptr;
614 curbuf->buf[curbuf->len++] = idx;
615 searchstr.ptr = match.ptr + pattern.length;
616 searchstr.length = str.length - idx - pattern.length;
617 } else {
618 break;
619 }
620 } while (searchstr.length > 0 && found < replmax);
622 /* Allocate result string */
623 cxmutstr result;
624 {
625 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
626 size_t rcount = 0;
627 curbuf = firstbuf;
628 do {
629 rcount += curbuf->len;
630 curbuf = curbuf->next;
631 } while (curbuf);
632 result.length = str.length + rcount * adjlen;
633 result.ptr = cxMalloc(allocator, result.length + 1);
634 if (!result.ptr) {
635 cx_strrepl_free_ibuf(firstbuf);
636 return cx_mutstrn(NULL, 0);
637 }
638 }
640 /* Build result string */
641 curbuf = firstbuf;
642 size_t srcidx = 0;
643 char *destptr = result.ptr;
644 do {
645 for (size_t i = 0; i < curbuf->len; i++) {
646 /* Copy source part up to next match*/
647 size_t idx = curbuf->buf[i];
648 size_t srclen = idx - srcidx;
649 if (srclen > 0) {
650 memcpy(destptr, str.ptr + srcidx, srclen);
651 destptr += srclen;
652 srcidx += srclen;
653 }
655 /* Copy the replacement and skip the source pattern */
656 srcidx += pattern.length;
657 memcpy(destptr, replacement.ptr, replacement.length);
658 destptr += replacement.length;
659 }
660 curbuf = curbuf->next;
661 } while (curbuf);
662 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
664 /* Result is guaranteed to be zero-terminated */
665 result.ptr[result.length] = '\0';
667 /* Free index buffer */
668 cx_strrepl_free_ibuf(firstbuf);
670 return result;
671 }