2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
29 #include "cx/string.h"
38 #include <strings.h> // for strncasecmp()
42 cxmutstr cx_mutstr(char *cstring) {
43 return (cxmutstr) {cstring, strlen(cstring)};
50 return (cxmutstr) {cstring, length};
53 cxstring cx_str(const char *cstring) {
54 return (cxstring) {cstring, strlen(cstring)};
61 return (cxstring) {cstring, length};
64 cxstring cx_strcast(cxmutstr str) {
65 return (cxstring) {str.ptr, str.length};
68 void cx_strfree(cxmutstr *str) {
78 cxFree(alloc, str->ptr);
87 if (count == 0) return 0;
93 cxstring str = va_arg(ap, cxstring);
101 cxmutstr cx_strcat_a(
106 cxstring *strings = calloc(count, sizeof(cxstring));
107 if (!strings) abort();
112 // get all args and overall length
115 cxstring s = va_arg (ap, cxstring);
122 result.ptr = cxMalloc(alloc, slen + 1);
123 result.length = slen;
124 if (result.ptr == NULL) abort();
126 // concatenate strings
129 cxstring s = strings[i];
130 memcpy(result.ptr + pos, s.ptr, s.length);
135 result.ptr[result.length] = '\0';
137 // free temporary array
147 return cx_strsubsl(string, start, string.length - start);
150 cxmutstr cx_strsubs_m(
154 return cx_strsubsl_m(string, start, string.length - start);
157 cxstring cx_strsubsl(
162 if (start > string.length) {
163 return (cxstring) {NULL, 0};
166 size_t rem_len = string.length - start;
167 if (length > rem_len) {
171 return (cxstring) {string.ptr + start, length};
174 cxmutstr cx_strsubsl_m(
179 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
180 return (cxmutstr) {(char *) result.ptr, result.length};
188 // TODO: improve by comparing multiple bytes at once
189 cx_for_n(i, string.length) {
190 if (string.ptr[i] == chr) {
191 return cx_strsubs(string, i);
194 return (cxstring) {NULL, 0};
197 cxmutstr cx_strchr_m(
201 cxstring result = cx_strchr(cx_strcast(string), chr);
202 return (cxmutstr) {(char *) result.ptr, result.length};
210 size_t i = string.length;
213 // TODO: improve by comparing multiple bytes at once
214 if (string.ptr[i] == chr) {
215 return cx_strsubs(string, i);
218 return (cxstring) {NULL, 0};
221 cxmutstr cx_strrchr_m(
225 cxstring result = cx_strrchr(cx_strcast(string), chr);
226 return (cxmutstr) {(char *) result.ptr, result.length};
229 #ifndef CX_STRSTR_SBO_SIZE
230 #define CX_STRSTR_SBO_SIZE 512
237 if (needle.length == 0) {
241 // optimize for single-char needles
242 if (needle.length == 1) {
243 return cx_strchr(haystack, *needle.ptr);
248 * Our prefix table contains the prefix length PLUS ONE
249 * this is our decision, because we want to use the full range of size_t.
250 * The original algorithm needs a (-1) at one single place,
251 * and we want to avoid that.
254 // local prefix table
255 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
257 // check needle length and use appropriate prefix table
258 // if the pattern exceeds static prefix table, allocate on the heap
259 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
260 register size_t *ptable = useheap ? calloc(needle.length + 1,
261 sizeof(size_t)) : s_prefix_table;
263 // keep counter in registers
264 register size_t i, j;
270 while (i < needle.length) {
271 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
280 cxstring result = {NULL, 0};
283 while (i < haystack.length) {
284 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
289 if (j - 1 == needle.length) {
290 size_t start = i - needle.length;
291 result.ptr = haystack.ptr + start;
292 result.length = haystack.length - start;
297 // if prefix table was allocated on the heap, free it
298 if (ptable != s_prefix_table) {
305 cxmutstr cx_strstr_m(
309 cxstring result = cx_strstr(cx_strcast(haystack), needle);
310 return (cxmutstr) {(char *) result.ptr, result.length};
319 // special case: output limit is zero
320 if (limit == 0) return 0;
322 // special case: delimiter is empty
323 if (delim.length == 0) {
328 // special cases: delimiter is at least as large as the string
329 if (delim.length >= string.length) {
331 if (cx_strcmp(string, delim) == 0) {
332 output[0] = cx_strn(string.ptr, 0);
333 output[1] = cx_strn(string.ptr + string.length, 0);
343 cxstring curpos = string;
346 cxstring match = cx_strstr(curpos, delim);
347 if (match.length > 0) {
348 // is the limit reached?
350 // copy the current string to the array
351 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
352 output[n - 1] = item;
353 size_t processed = item.length + delim.length;
354 curpos.ptr += processed;
355 curpos.length -= processed;
357 // limit reached, copy the _full_ remaining string
358 output[n - 1] = curpos;
362 // no more matches, copy last string
363 output[n - 1] = curpos;
371 size_t cx_strsplit_a(
372 CxAllocator *allocator,
378 // find out how many splits we're going to make and allocate memory
380 cxstring curpos = string;
383 cxstring match = cx_strstr(curpos, delim);
384 if (match.length > 0) {
385 // is the limit reached?
387 size_t processed = match.ptr - curpos.ptr + delim.length;
388 curpos.ptr += processed;
389 curpos.length -= processed;
399 *output = cxCalloc(allocator, n, sizeof(cxstring));
400 return cx_strsplit(string, delim, n, *output);
403 size_t cx_strsplit_m(
409 return cx_strsplit(cx_strcast(string),
410 delim, limit, (cxstring *) output);
413 size_t cx_strsplit_ma(
414 CxAllocator *allocator,
420 return cx_strsplit_a(allocator, cx_strcast(string),
421 delim, limit, (cxstring **) output);
428 if (s1.length == s2.length) {
429 return memcmp(s1.ptr, s2.ptr, s1.length);
430 } else if (s1.length > s2.length) {
441 if (s1.length == s2.length) {
443 return _strnicmp(s1.ptr, s2.ptr, s1.length);
445 return strncasecmp(s1.ptr, s2.ptr, s1.length);
447 } else if (s1.length > s2.length) {
458 cxstring const *left = s1;
459 cxstring const *right = s2;
460 return cx_strcmp(*left, *right);
467 cxstring const *left = s1;
468 cxstring const *right = s2;
469 return cx_strcasecmp(*left, *right);
472 cxmutstr cx_strdup_a(
473 CxAllocator *allocator,
477 cxMalloc(allocator, string.length + 1),
480 if (result.ptr == NULL) {
484 memcpy(result.ptr, string.ptr, string.length);
485 result.ptr[string.length] = '\0';
489 cxstring cx_strtrim(cxstring string) {
490 cxstring result = string;
491 // TODO: optimize by comparing multiple bytes at once
492 while (result.length > 0 && isspace(*result.ptr)) {
496 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
502 cxmutstr cx_strtrim_m(cxmutstr string) {
503 cxstring result = cx_strtrim(cx_strcast(string));
504 return (cxmutstr) {(char *) result.ptr, result.length};
511 if (string.length < prefix.length) return false;
512 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
519 if (string.length < suffix.length) return false;
520 return memcmp(string.ptr + string.length - suffix.length,
521 suffix.ptr, suffix.length) == 0;
524 bool cx_strcaseprefix(
528 if (string.length < prefix.length) return false;
530 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
532 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
536 bool cx_strcasesuffix(
540 if (string.length < suffix.length) return false;
542 return _strnicmp(string.ptr+string.length-suffix.length,
543 suffix.ptr, suffix.length) == 0;
545 return strncasecmp(string.ptr + string.length - suffix.length,
546 suffix.ptr, suffix.length) == 0;
550 void cx_strlower(cxmutstr string) {
551 cx_for_n(i, string.length) {
552 string.ptr[i] = (char) tolower(string.ptr[i]);
556 void cx_strupper(cxmutstr string) {
557 cx_for_n(i, string.length) {
558 string.ptr[i] = (char) toupper(string.ptr[i]);
562 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
563 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
566 struct cx_strreplace_ibuf {
568 struct cx_strreplace_ibuf *next;
572 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
574 struct cx_strreplace_ibuf *next = buf->next;
581 cxmutstr cx_strreplacen_a(
582 CxAllocator *allocator,
585 cxstring replacement,
589 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
590 return cx_strdup_a(allocator, str);
592 // Compute expected buffer length
593 size_t ibufmax = str.length / pattern.length;
594 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
595 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
596 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
599 // Allocate first index buffer
600 struct cx_strreplace_ibuf *firstbuf, *curbuf;
601 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
602 if (!firstbuf) return cx_mutstrn(NULL, 0);
603 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
604 if (!firstbuf->buf) {
606 return cx_mutstrn(NULL, 0);
609 // Search occurrences
610 cxstring searchstr = str;
613 cxstring match = cx_strstr(searchstr, pattern);
614 if (match.length > 0) {
615 // Allocate next buffer in chain, if required
616 if (curbuf->len == ibuflen) {
617 struct cx_strreplace_ibuf *nextbuf =
618 calloc(1, sizeof(struct cx_strreplace_ibuf));
620 cx_strrepl_free_ibuf(firstbuf);
621 return cx_mutstrn(NULL, 0);
623 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
626 cx_strrepl_free_ibuf(firstbuf);
627 return cx_mutstrn(NULL, 0);
629 curbuf->next = nextbuf;
633 // Record match index
635 size_t idx = match.ptr - str.ptr;
636 curbuf->buf[curbuf->len++] = idx;
637 searchstr.ptr = match.ptr + pattern.length;
638 searchstr.length = str.length - idx - pattern.length;
642 } while (searchstr.length > 0 && found < replmax);
644 // Allocate result string
647 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
651 rcount += curbuf->len;
652 curbuf = curbuf->next;
654 result.length = str.length + rcount * adjlen;
655 result.ptr = cxMalloc(allocator, result.length + 1);
657 cx_strrepl_free_ibuf(firstbuf);
658 return cx_mutstrn(NULL, 0);
662 // Build result string
665 char *destptr = result.ptr;
667 for (size_t i = 0; i < curbuf->len; i++) {
668 // Copy source part up to next match
669 size_t idx = curbuf->buf[i];
670 size_t srclen = idx - srcidx;
672 memcpy(destptr, str.ptr + srcidx, srclen);
677 // Copy the replacement and skip the source pattern
678 srcidx += pattern.length;
679 memcpy(destptr, replacement.ptr, replacement.length);
680 destptr += replacement.length;
682 curbuf = curbuf->next;
684 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
686 // Result is guaranteed to be zero-terminated
687 result.ptr[result.length] = '\0';
690 cx_strrepl_free_ibuf(firstbuf);
695 CxStrtokCtx cx_strtok(
708 ctx.delim_more = NULL;
709 ctx.delim_more_count = 0;
713 CxStrtokCtx cx_strtok_m(
718 return cx_strtok(cx_strcast(str), delim, limit);
726 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
730 // determine the search start
731 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
733 // search the next delimiter
734 cxstring delim = cx_strstr(haystack, ctx->delim);
736 // if found, make delim capture exactly the delimiter
737 if (delim.length > 0) {
738 delim.length = ctx->delim.length;
741 // if more delimiters are specified, check them now
742 if (ctx->delim_more_count > 0) {
743 cx_for_n(i, ctx->delim_more_count) {
744 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
745 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
747 delim.length = ctx->delim_more[i].length;
752 // store the token information and adjust the context
754 ctx->pos = ctx->next_pos;
755 token->ptr = &ctx->str.ptr[ctx->pos];
756 ctx->delim_pos = delim.length == 0 ?
757 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
758 token->length = ctx->delim_pos - ctx->pos;
759 ctx->next_pos = ctx->delim_pos + delim.length;
764 bool cx_strtok_next_m(
768 return cx_strtok_next(ctx, (cxstring *) token);
771 void cx_strtok_delim(
773 cxstring const *delim,
776 ctx->delim_more = delim;
777 ctx->delim_more_count = count;