Sun, 20 Nov 2022 21:08:36 +0100
use //-style single line comments everywhere
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "cx/string.h"
30 #include "cx/utils.h"
32 #include <string.h>
33 #include <stdarg.h>
34 #include <ctype.h>
36 #ifndef _WIN32
38 #include <strings.h> // for strncasecmp()
40 #endif // _WIN32
42 cxmutstr cx_mutstr(char *cstring) {
43 return (cxmutstr) {cstring, strlen(cstring)};
44 }
46 cxmutstr cx_mutstrn(
47 char *cstring,
48 size_t length
49 ) {
50 return (cxmutstr) {cstring, length};
51 }
53 cxstring cx_str(const char *cstring) {
54 return (cxstring) {cstring, strlen(cstring)};
55 }
57 cxstring cx_strn(
58 const char *cstring,
59 size_t length
60 ) {
61 return (cxstring) {cstring, length};
62 }
64 cxstring cx_strcast(cxmutstr str) {
65 return (cxstring) {str.ptr, str.length};
66 }
68 void cx_strfree(cxmutstr *str) {
69 free(str->ptr);
70 str->ptr = NULL;
71 str->length = 0;
72 }
74 void cx_strfree_a(
75 CxAllocator *alloc,
76 cxmutstr *str
77 ) {
78 cxFree(alloc, str->ptr);
79 str->ptr = NULL;
80 str->length = 0;
81 }
83 size_t cx_strlen(
84 size_t count,
85 ...
86 ) {
87 if (count == 0) return 0;
89 va_list ap;
90 va_start(ap, count);
91 size_t size = 0;
92 cx_for_n(i, count) {
93 cxstring str = va_arg(ap, cxstring);
94 size += str.length;
95 }
96 va_end(ap);
98 return size;
99 }
101 cxmutstr cx_strcat_a(
102 CxAllocator *alloc,
103 size_t count,
104 ...
105 ) {
106 cxstring *strings = calloc(count, sizeof(cxstring));
107 if (!strings) abort();
109 va_list ap;
110 va_start(ap, count);
112 // get all args and overall length
113 size_t slen = 0;
114 cx_for_n(i, count) {
115 cxstring s = va_arg (ap, cxstring);
116 strings[i] = s;
117 slen += s.length;
118 }
120 // create new string
121 cxmutstr result;
122 result.ptr = cxMalloc(alloc, slen + 1);
123 result.length = slen;
124 if (result.ptr == NULL) abort();
126 // concatenate strings
127 size_t pos = 0;
128 cx_for_n(i, count) {
129 cxstring s = strings[i];
130 memcpy(result.ptr + pos, s.ptr, s.length);
131 pos += s.length;
132 }
134 // terminate string
135 result.ptr[result.length] = '\0';
137 // free temporary array
138 free(strings);
140 return result;
141 }
143 cxstring cx_strsubs(
144 cxstring string,
145 size_t start
146 ) {
147 return cx_strsubsl(string, start, string.length - start);
148 }
150 cxmutstr cx_strsubs_m(
151 cxmutstr string,
152 size_t start
153 ) {
154 return cx_strsubsl_m(string, start, string.length - start);
155 }
157 cxstring cx_strsubsl(
158 cxstring string,
159 size_t start,
160 size_t length
161 ) {
162 if (start > string.length) {
163 return (cxstring) {NULL, 0};
164 }
166 size_t rem_len = string.length - start;
167 if (length > rem_len) {
168 length = rem_len;
169 }
171 return (cxstring) {string.ptr + start, length};
172 }
174 cxmutstr cx_strsubsl_m(
175 cxmutstr string,
176 size_t start,
177 size_t length
178 ) {
179 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
180 return (cxmutstr) {(char *) result.ptr, result.length};
181 }
183 cxstring cx_strchr(
184 cxstring string,
185 int chr
186 ) {
187 chr = 0xFF & chr;
188 // TODO: improve by comparing multiple bytes at once
189 cx_for_n(i, string.length) {
190 if (string.ptr[i] == chr) {
191 return cx_strsubs(string, i);
192 }
193 }
194 return (cxstring) {NULL, 0};
195 }
197 cxmutstr cx_strchr_m(
198 cxmutstr string,
199 int chr
200 ) {
201 cxstring result = cx_strchr(cx_strcast(string), chr);
202 return (cxmutstr) {(char *) result.ptr, result.length};
203 }
205 cxstring cx_strrchr(
206 cxstring string,
207 int chr
208 ) {
209 chr = 0xFF & chr;
210 size_t i = string.length;
211 while (i > 0) {
212 i--;
213 // TODO: improve by comparing multiple bytes at once
214 if (string.ptr[i] == chr) {
215 return cx_strsubs(string, i);
216 }
217 }
218 return (cxstring) {NULL, 0};
219 }
221 cxmutstr cx_strrchr_m(
222 cxmutstr string,
223 int chr
224 ) {
225 cxstring result = cx_strrchr(cx_strcast(string), chr);
226 return (cxmutstr) {(char *) result.ptr, result.length};
227 }
229 #define STRSTR_SBO_BUFLEN 512
231 cxstring cx_strstr(
232 cxstring haystack,
233 cxstring needle
234 ) {
235 if (needle.length == 0) {
236 return haystack;
237 }
239 // optimize for single-char needles
240 if (needle.length == 1) {
241 return cx_strchr(haystack, *needle.ptr);
242 }
244 /*
245 * IMPORTANT:
246 * Our prefix table contains the prefix length PLUS ONE
247 * this is our decision, because we want to use the full range of size_t.
248 * The original algorithm needs a (-1) at one single place,
249 * and we want to avoid that.
250 */
252 // local prefix table
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN];
255 // check needle length and use appropriate prefix table
256 // if the pattern exceeds static prefix table, allocate on the heap
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
258 register size_t *ptable = useheap ? calloc(needle.length + 1,
259 sizeof(size_t)) : s_prefix_table;
261 // keep counter in registers
262 register size_t i, j;
264 // fill prefix table
265 i = 0;
266 j = 0;
267 ptable[i] = j;
268 while (i < needle.length) {
269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
270 j = ptable[j - 1];
271 }
272 i++;
273 j++;
274 ptable[i] = j;
275 }
277 // search
278 cxstring result = {NULL, 0};
279 i = 0;
280 j = 1;
281 while (i < haystack.length) {
282 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
283 j = ptable[j - 1];
284 }
285 i++;
286 j++;
287 if (j - 1 == needle.length) {
288 size_t start = i - needle.length;
289 result.ptr = haystack.ptr + start;
290 result.length = haystack.length - start;
291 break;
292 }
293 }
295 // if prefix table was allocated on the heap, free it
296 if (ptable != s_prefix_table) {
297 free(ptable);
298 }
300 return result;
301 }
303 cxmutstr cx_strstr_m(
304 cxmutstr haystack,
305 cxstring needle
306 ) {
307 cxstring result = cx_strstr(cx_strcast(haystack), needle);
308 return (cxmutstr) {(char *) result.ptr, result.length};
309 }
311 size_t cx_strsplit(
312 cxstring string,
313 cxstring delim,
314 size_t limit,
315 cxstring *output
316 ) {
317 // special case: output limit is zero
318 if (limit == 0) return 0;
320 // special case: delimiter is empty
321 if (delim.length == 0) {
322 output[0] = string;
323 return 1;
324 }
326 // special cases: delimiter is at least as large as the string
327 if (delim.length >= string.length) {
328 // exact match
329 if (cx_strcmp(string, delim) == 0) {
330 output[0] = cx_strn(string.ptr, 0);
331 output[1] = cx_strn(string.ptr + string.length, 0);
332 return 2;
333 } else {
334 // no match possible
335 output[0] = string;
336 return 1;
337 }
338 }
340 size_t n = 0;
341 cxstring curpos = string;
342 while (1) {
343 ++n;
344 cxstring match = cx_strstr(curpos, delim);
345 if (match.length > 0) {
346 // is the limit reached?
347 if (n < limit) {
348 // copy the current string to the array
349 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
350 output[n - 1] = item;
351 size_t processed = item.length + delim.length;
352 curpos.ptr += processed;
353 curpos.length -= processed;
354 } else {
355 // limit reached, copy the _full_ remaining string
356 output[n - 1] = curpos;
357 break;
358 }
359 } else {
360 // no more matches, copy last string
361 output[n - 1] = curpos;
362 break;
363 }
364 }
366 return n;
367 }
369 size_t cx_strsplit_a(
370 CxAllocator *allocator,
371 cxstring string,
372 cxstring delim,
373 size_t limit,
374 cxstring **output
375 ) {
376 // find out how many splits we're going to make and allocate memory
377 size_t n = 0;
378 cxstring curpos = string;
379 while (1) {
380 ++n;
381 cxstring match = cx_strstr(curpos, delim);
382 if (match.length > 0) {
383 // is the limit reached?
384 if (n < limit) {
385 size_t processed = match.ptr - curpos.ptr + delim.length;
386 curpos.ptr += processed;
387 curpos.length -= processed;
388 } else {
389 // limit reached
390 break;
391 }
392 } else {
393 // no more matches
394 break;
395 }
396 }
397 *output = cxCalloc(allocator, n, sizeof(cxstring));
398 return cx_strsplit(string, delim, n, *output);
399 }
401 size_t cx_strsplit_m(
402 cxmutstr string,
403 cxstring delim,
404 size_t limit,
405 cxmutstr *output
406 ) {
407 return cx_strsplit(cx_strcast(string),
408 delim, limit, (cxstring *) output);
409 }
411 size_t cx_strsplit_ma(
412 CxAllocator *allocator,
413 cxmutstr string,
414 cxstring delim,
415 size_t limit,
416 cxmutstr **output
417 ) {
418 return cx_strsplit_a(allocator, cx_strcast(string),
419 delim, limit, (cxstring **) output);
420 }
422 int cx_strcmp(
423 cxstring s1,
424 cxstring s2
425 ) {
426 if (s1.length == s2.length) {
427 return memcmp(s1.ptr, s2.ptr, s1.length);
428 } else if (s1.length > s2.length) {
429 return 1;
430 } else {
431 return -1;
432 }
433 }
435 int cx_strcasecmp(
436 cxstring s1,
437 cxstring s2
438 ) {
439 if (s1.length == s2.length) {
440 #ifdef _WIN32
441 return _strnicmp(s1.ptr, s2.ptr, s1.length);
442 #else
443 return strncasecmp(s1.ptr, s2.ptr, s1.length);
444 #endif
445 } else if (s1.length > s2.length) {
446 return 1;
447 } else {
448 return -1;
449 }
450 }
452 cxmutstr cx_strdup_a(
453 CxAllocator *allocator,
454 cxstring string
455 ) {
456 cxmutstr result = {
457 cxMalloc(allocator, string.length + 1),
458 string.length
459 };
460 if (result.ptr == NULL) {
461 result.length = 0;
462 return result;
463 }
464 memcpy(result.ptr, string.ptr, string.length);
465 result.ptr[string.length] = '\0';
466 return result;
467 }
469 cxstring cx_strtrim(cxstring string) {
470 cxstring result = string;
471 // TODO: optimize by comparing multiple bytes at once
472 while (result.length > 0 && isspace(*result.ptr)) {
473 result.ptr++;
474 result.length--;
475 }
476 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
477 result.length--;
478 }
479 return result;
480 }
482 cxmutstr cx_strtrim_m(cxmutstr string) {
483 cxstring result = cx_strtrim(cx_strcast(string));
484 return (cxmutstr) {(char *) result.ptr, result.length};
485 }
487 bool cx_strprefix(
488 cxstring string,
489 cxstring prefix
490 ) {
491 if (string.length < prefix.length) return false;
492 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
493 }
495 bool cx_strsuffix(
496 cxstring string,
497 cxstring suffix
498 ) {
499 if (string.length < suffix.length) return false;
500 return memcmp(string.ptr + string.length - suffix.length,
501 suffix.ptr, suffix.length) == 0;
502 }
504 bool cx_strcaseprefix(
505 cxstring string,
506 cxstring prefix
507 ) {
508 if (string.length < prefix.length) return false;
509 #ifdef _WIN32
510 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
511 #else
512 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
513 #endif
514 }
516 bool cx_strcasesuffix(
517 cxstring string,
518 cxstring suffix
519 ) {
520 if (string.length < suffix.length) return false;
521 #ifdef _WIN32
522 return _strnicmp(string.ptr+string.length-suffix.length,
523 suffix.ptr, suffix.length) == 0;
524 #else
525 return strncasecmp(string.ptr + string.length - suffix.length,
526 suffix.ptr, suffix.length) == 0;
527 #endif
528 }
530 void cx_strlower(cxmutstr string) {
531 cx_for_n(i, string.length) {
532 string.ptr[i] = (char) tolower(string.ptr[i]);
533 }
534 }
536 void cx_strupper(cxmutstr string) {
537 cx_for_n(i, string.length) {
538 string.ptr[i] = (char) toupper(string.ptr[i]);
539 }
540 }
542 #define REPLACE_INDEX_BUFFER_MAX 100
544 struct cx_strreplace_ibuf {
545 size_t *buf;
546 struct cx_strreplace_ibuf *next;
547 unsigned int len;
548 };
550 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
551 while (buf) {
552 struct cx_strreplace_ibuf *next = buf->next;
553 free(buf->buf);
554 free(buf);
555 buf = next;
556 }
557 }
559 cxmutstr cx_strreplacen_a(
560 CxAllocator *allocator,
561 cxstring str,
562 cxstring pattern,
563 cxstring replacement,
564 size_t replmax
565 ) {
567 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
568 return cx_strdup_a(allocator, str);
570 // Compute expected buffer length
571 size_t ibufmax = str.length / pattern.length;
572 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
573 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
574 ibuflen = REPLACE_INDEX_BUFFER_MAX;
575 }
577 // Allocate first index buffer
578 struct cx_strreplace_ibuf *firstbuf, *curbuf;
579 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
580 if (!firstbuf) return cx_mutstrn(NULL, 0);
581 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
582 if (!firstbuf->buf) {
583 free(firstbuf);
584 return cx_mutstrn(NULL, 0);
585 }
587 // Search occurrences
588 cxstring searchstr = str;
589 size_t found = 0;
590 do {
591 cxstring match = cx_strstr(searchstr, pattern);
592 if (match.length > 0) {
593 // Allocate next buffer in chain, if required
594 if (curbuf->len == ibuflen) {
595 struct cx_strreplace_ibuf *nextbuf =
596 calloc(1, sizeof(struct cx_strreplace_ibuf));
597 if (!nextbuf) {
598 cx_strrepl_free_ibuf(firstbuf);
599 return cx_mutstrn(NULL, 0);
600 }
601 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
602 if (!nextbuf->buf) {
603 free(nextbuf);
604 cx_strrepl_free_ibuf(firstbuf);
605 return cx_mutstrn(NULL, 0);
606 }
607 curbuf->next = nextbuf;
608 curbuf = nextbuf;
609 }
611 // Record match index
612 found++;
613 size_t idx = match.ptr - str.ptr;
614 curbuf->buf[curbuf->len++] = idx;
615 searchstr.ptr = match.ptr + pattern.length;
616 searchstr.length = str.length - idx - pattern.length;
617 } else {
618 break;
619 }
620 } while (searchstr.length > 0 && found < replmax);
622 // Allocate result string
623 cxmutstr result;
624 {
625 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
626 size_t rcount = 0;
627 curbuf = firstbuf;
628 do {
629 rcount += curbuf->len;
630 curbuf = curbuf->next;
631 } while (curbuf);
632 result.length = str.length + rcount * adjlen;
633 result.ptr = cxMalloc(allocator, result.length + 1);
634 if (!result.ptr) {
635 cx_strrepl_free_ibuf(firstbuf);
636 return cx_mutstrn(NULL, 0);
637 }
638 }
640 // Build result string
641 curbuf = firstbuf;
642 size_t srcidx = 0;
643 char *destptr = result.ptr;
644 do {
645 for (size_t i = 0; i < curbuf->len; i++) {
646 // Copy source part up to next match
647 size_t idx = curbuf->buf[i];
648 size_t srclen = idx - srcidx;
649 if (srclen > 0) {
650 memcpy(destptr, str.ptr + srcidx, srclen);
651 destptr += srclen;
652 srcidx += srclen;
653 }
655 // Copy the replacement and skip the source pattern
656 srcidx += pattern.length;
657 memcpy(destptr, replacement.ptr, replacement.length);
658 destptr += replacement.length;
659 }
660 curbuf = curbuf->next;
661 } while (curbuf);
662 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
664 // Result is guaranteed to be zero-terminated
665 result.ptr[result.length] = '\0';
667 // Free index buffer
668 cx_strrepl_free_ibuf(firstbuf);
670 return result;
671 }