Tue, 04 Oct 2022 18:55:20 +0200
fix missing zero-termination in strreplace
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "cx/string.h"
30 #include "cx/utils.h"
32 #include <string.h>
33 #include <stdarg.h>
34 #include <stdint.h>
35 #include <ctype.h>
37 #ifndef _WIN32
39 #include <strings.h> /* for strncasecmp() */
41 #endif /* _WIN32 */
43 cxmutstr cx_mutstr(char *cstring) {
44 return (cxmutstr) {cstring, strlen(cstring)};
45 }
47 cxmutstr cx_mutstrn(
48 char *cstring,
49 size_t length
50 ) {
51 return (cxmutstr) {cstring, length};
52 }
54 cxstring cx_str(const char *cstring) {
55 return (cxstring) {cstring, strlen(cstring)};
56 }
58 cxstring cx_strn(
59 const char *cstring,
60 size_t length
61 ) {
62 return (cxstring) {cstring, length};
63 }
65 cxstring cx_strcast(cxmutstr str) {
66 return (cxstring) {str.ptr, str.length};
67 }
69 void cx_strfree(cxmutstr *str) {
70 free(str->ptr);
71 str->ptr = NULL;
72 str->length = 0;
73 }
75 void cx_strfree_a(
76 CxAllocator *alloc,
77 cxmutstr *str
78 ) {
79 cxFree(alloc, str->ptr);
80 str->ptr = NULL;
81 str->length = 0;
82 }
84 size_t cx_strlen(
85 size_t count,
86 ...
87 ) {
88 if (count == 0) return 0;
90 va_list ap;
91 va_start(ap, count);
92 size_t size = 0;
93 cx_for_n(i, count) {
94 cxstring str = va_arg(ap, cxstring);
95 size += str.length;
96 }
97 va_end(ap);
99 return size;
100 }
102 cxmutstr cx_strcat_a(
103 CxAllocator *alloc,
104 size_t count,
105 ...
106 ) {
107 cxstring *strings = calloc(count, sizeof(cxstring));
108 if (!strings) abort();
110 va_list ap;
111 va_start(ap, count);
113 // get all args and overall length
114 size_t slen = 0;
115 cx_for_n(i, count) {
116 cxstring s = va_arg (ap, cxstring);
117 strings[i] = s;
118 slen += s.length;
119 }
121 // create new string
122 cxmutstr result;
123 result.ptr = cxMalloc(alloc, slen + 1);
124 result.length = slen;
125 if (result.ptr == NULL) abort();
127 // concatenate strings
128 size_t pos = 0;
129 cx_for_n(i, count) {
130 cxstring s = strings[i];
131 memcpy(result.ptr + pos, s.ptr, s.length);
132 pos += s.length;
133 }
135 // terminate string
136 result.ptr[result.length] = '\0';
138 // free temporary array
139 free(strings);
141 return result;
142 }
144 cxstring cx_strsubs(
145 cxstring string,
146 size_t start
147 ) {
148 return cx_strsubsl(string, start, string.length - start);
149 }
151 cxmutstr cx_strsubs_m(
152 cxmutstr string,
153 size_t start
154 ) {
155 return cx_strsubsl_m(string, start, string.length - start);
156 }
158 cxstring cx_strsubsl(
159 cxstring string,
160 size_t start,
161 size_t length
162 ) {
163 if (start > string.length) {
164 return (cxstring) {NULL, 0};
165 }
167 size_t rem_len = string.length - start;
168 if (length > rem_len) {
169 length = rem_len;
170 }
172 return (cxstring) {string.ptr + start, length};
173 }
175 cxmutstr cx_strsubsl_m(
176 cxmutstr string,
177 size_t start,
178 size_t length
179 ) {
180 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
181 return (cxmutstr) {(char *) result.ptr, result.length};
182 }
184 cxstring cx_strchr(
185 cxstring string,
186 int chr
187 ) {
188 chr = 0xFF & chr;
189 // TODO: improve by comparing multiple bytes at once
190 cx_for_n(i, string.length) {
191 if (string.ptr[i] == chr) {
192 return cx_strsubs(string, i);
193 }
194 }
195 return (cxstring) {NULL, 0};
196 }
198 cxmutstr cx_strchr_m(
199 cxmutstr string,
200 int chr
201 ) {
202 cxstring result = cx_strchr(cx_strcast(string), chr);
203 return (cxmutstr) {(char *) result.ptr, result.length};
204 }
206 cxstring cx_strrchr(
207 cxstring string,
208 int chr
209 ) {
210 chr = 0xFF & chr;
211 size_t i = string.length;
212 while (i > 0) {
213 i--;
214 // TODO: improve by comparing multiple bytes at once
215 if (string.ptr[i] == chr) {
216 return cx_strsubs(string, i);
217 }
218 }
219 return (cxstring) {NULL, 0};
220 }
222 cxmutstr cx_strrchr_m(
223 cxmutstr string,
224 int chr
225 ) {
226 cxstring result = cx_strrchr(cx_strcast(string), chr);
227 return (cxmutstr) {(char *) result.ptr, result.length};
228 }
230 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
231 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
233 #define ptable_w(useheap, ptable, index, src) do {\
234 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
235 else ((size_t*)ptable)[index] = src;\
236 } while (0)
239 cxstring cx_strstr(
240 cxstring haystack,
241 cxstring needle
242 ) {
243 if (needle.length == 0) {
244 return haystack;
245 }
247 /* optimize for single-char needles */
248 if (needle.length == 1) {
249 return cx_strchr(haystack, *needle.ptr);
250 }
252 /*
253 * IMPORTANT:
254 * Our prefix table contains the prefix length PLUS ONE
255 * this is our decision, because we want to use the full range of size_t.
256 * The original algorithm needs a (-1) at one single place,
257 * and we want to avoid that.
258 */
260 /* static prefix table */
261 static uint8_t s_prefix_table[512];
263 /* check pattern length and use appropriate prefix table */
264 /* if the pattern exceeds static prefix table, allocate on the heap */
265 register int useheap = needle.length >= 512;
266 register void *ptable = useheap ? calloc(needle.length + 1,
267 sizeof(size_t)) : s_prefix_table;
269 /* keep counter in registers */
270 register size_t i, j;
272 /* fill prefix table */
273 i = 0;
274 j = 0;
275 ptable_w(useheap, ptable, i, j);
276 while (i < needle.length) {
277 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
278 ptable_r(j, useheap, ptable, j - 1);
279 }
280 i++;
281 j++;
282 ptable_w(useheap, ptable, i, j);
283 }
285 /* search */
286 cxstring result = {NULL, 0};
287 i = 0;
288 j = 1;
289 while (i < haystack.length) {
290 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
291 ptable_r(j, useheap, ptable, j - 1);
292 }
293 i++;
294 j++;
295 if (j - 1 == needle.length) {
296 size_t start = i - needle.length;
297 result.ptr = haystack.ptr + start;
298 result.length = haystack.length - start;
299 break;
300 }
301 }
303 /* if prefix table was allocated on the heap, free it */
304 if (ptable != s_prefix_table) {
305 free(ptable);
306 }
308 return result;
309 }
311 cxmutstr cx_strstr_m(
312 cxmutstr haystack,
313 cxstring needle
314 ) {
315 cxstring result = cx_strstr(cx_strcast(haystack), needle);
316 return (cxmutstr) {(char *) result.ptr, result.length};
317 }
319 size_t cx_strsplit(
320 cxstring string,
321 cxstring delim,
322 size_t limit,
323 cxstring *output
324 ) {
325 /* special case: output limit is zero */
326 if (limit == 0) return 0;
328 /* special case: delimiter is empty */
329 if (delim.length == 0) {
330 output[0] = string;
331 return 1;
332 }
334 /* special cases: delimiter is at least as large as the string */
335 if (delim.length >= string.length) {
336 /* exact match */
337 if (cx_strcmp(string, delim) == 0) {
338 output[0] = cx_strn(string.ptr, 0);
339 output[1] = cx_strn(string.ptr + string.length, 0);
340 return 2;
341 } else /* no match possible */ {
342 output[0] = string;
343 return 1;
344 }
345 }
347 size_t n = 0;
348 cxstring curpos = string;
349 while (1) {
350 ++n;
351 cxstring match = cx_strstr(curpos, delim);
352 if (match.length > 0) {
353 /* is the limit reached? */
354 if (n < limit) {
355 /* copy the current string to the array */
356 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
357 output[n - 1] = item;
358 size_t processed = item.length + delim.length;
359 curpos.ptr += processed;
360 curpos.length -= processed;
361 } else {
362 /* limit reached, copy the _full_ remaining string */
363 output[n - 1] = curpos;
364 break;
365 }
366 } else {
367 /* no more matches, copy last string */
368 output[n - 1] = curpos;
369 break;
370 }
371 }
373 return n;
374 }
376 size_t cx_strsplit_a(
377 CxAllocator *allocator,
378 cxstring string,
379 cxstring delim,
380 size_t limit,
381 cxstring **output
382 ) {
383 /* find out how many splits we're going to make and allocate memory */
384 size_t n = 0;
385 cxstring curpos = string;
386 while (1) {
387 ++n;
388 cxstring match = cx_strstr(curpos, delim);
389 if (match.length > 0) {
390 /* is the limit reached? */
391 if (n < limit) {
392 size_t processed = match.ptr - curpos.ptr + delim.length;
393 curpos.ptr += processed;
394 curpos.length -= processed;
395 } else {
396 /* limit reached */
397 break;
398 }
399 } else {
400 /* no more matches */
401 break;
402 }
403 }
404 *output = cxCalloc(allocator, n, sizeof(cxstring));
405 return cx_strsplit(string, delim, n, *output);
406 }
408 size_t cx_strsplit_m(
409 cxmutstr string,
410 cxstring delim,
411 size_t limit,
412 cxmutstr *output
413 ) {
414 return cx_strsplit(cx_strcast(string),
415 delim, limit, (cxstring *) output);
416 }
418 size_t cx_strsplit_ma(
419 CxAllocator *allocator,
420 cxmutstr string,
421 cxstring delim,
422 size_t limit,
423 cxmutstr **output
424 ) {
425 return cx_strsplit_a(allocator, cx_strcast(string),
426 delim, limit, (cxstring **) output);
427 }
429 int cx_strcmp(
430 cxstring s1,
431 cxstring s2
432 ) {
433 if (s1.length == s2.length) {
434 return memcmp(s1.ptr, s2.ptr, s1.length);
435 } else if (s1.length > s2.length) {
436 return 1;
437 } else {
438 return -1;
439 }
440 }
442 int cx_strcasecmp(
443 cxstring s1,
444 cxstring s2
445 ) {
446 if (s1.length == s2.length) {
447 #ifdef _WIN32
448 return _strnicmp(s1.ptr, s2.ptr, s1.length);
449 #else
450 return strncasecmp(s1.ptr, s2.ptr, s1.length);
451 #endif
452 } else if (s1.length > s2.length) {
453 return 1;
454 } else {
455 return -1;
456 }
457 }
459 cxmutstr cx_strdup_a(
460 CxAllocator *allocator,
461 cxstring string
462 ) {
463 cxmutstr result = {
464 cxMalloc(allocator, string.length + 1),
465 string.length
466 };
467 if (result.ptr == NULL) {
468 result.length = 0;
469 return result;
470 }
471 memcpy(result.ptr, string.ptr, string.length);
472 result.ptr[string.length] = '\0';
473 return result;
474 }
476 cxstring cx_strtrim(cxstring string) {
477 cxstring result = string;
478 // TODO: optimize by comparing multiple bytes at once
479 while (result.length > 0 && isspace(*result.ptr)) {
480 result.ptr++;
481 result.length--;
482 }
483 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
484 result.length--;
485 }
486 return result;
487 }
489 cxmutstr cx_strtrim_m(cxmutstr string) {
490 cxstring result = cx_strtrim(cx_strcast(string));
491 return (cxmutstr) {(char *) result.ptr, result.length};
492 }
494 bool cx_strprefix(
495 cxstring string,
496 cxstring prefix
497 ) {
498 if (string.length < prefix.length) return false;
499 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
500 }
502 bool cx_strsuffix(
503 cxstring string,
504 cxstring suffix
505 ) {
506 if (string.length < suffix.length) return false;
507 return memcmp(string.ptr + string.length - suffix.length,
508 suffix.ptr, suffix.length) == 0;
509 }
511 bool cx_strcaseprefix(
512 cxstring string,
513 cxstring prefix
514 ) {
515 if (string.length < prefix.length) return false;
516 #ifdef _WIN32
517 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
518 #else
519 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
520 #endif
521 }
523 bool cx_strcasesuffix(
524 cxstring string,
525 cxstring suffix
526 ) {
527 if (string.length < suffix.length) return false;
528 #ifdef _WIN32
529 return _strnicmp(string.ptr+string.length-suffix.length,
530 suffix.ptr, suffix.length) == 0;
531 #else
532 return strncasecmp(string.ptr + string.length - suffix.length,
533 suffix.ptr, suffix.length) == 0;
534 #endif
535 }
537 void cx_strlower(cxmutstr string) {
538 cx_for_n(i, string.length) {
539 string.ptr[i] = tolower(string.ptr[i]);
540 }
541 }
543 void cx_strupper(cxmutstr string) {
544 cx_for_n(i, string.length) {
545 string.ptr[i] = toupper(string.ptr[i]);
546 }
547 }
549 #define REPLACE_INDEX_BUFFER_MAX 100
551 struct cx_strreplace_ibuf {
552 size_t *buf;
553 struct cx_strreplace_ibuf *next;
554 unsigned int len;
555 };
557 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
558 while (buf) {
559 struct cx_strreplace_ibuf *next = buf->next;
560 free(buf->buf);
561 free(buf);
562 buf = next;
563 }
564 }
566 cxmutstr cx_strreplacen_a(
567 CxAllocator *allocator,
568 cxstring str,
569 cxstring pattern,
570 cxstring replacement,
571 size_t replmax
572 ) {
574 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
575 return cx_strdup_a(allocator, str);
577 /* Compute expected buffer length */
578 size_t ibufmax = str.length / pattern.length;
579 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
580 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
581 ibuflen = REPLACE_INDEX_BUFFER_MAX;
582 }
584 /* Allocate first index buffer */
585 struct cx_strreplace_ibuf *firstbuf, *curbuf;
586 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
587 if (!firstbuf) return cx_mutstrn(NULL, 0);
588 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
589 if (!firstbuf->buf) {
590 free(firstbuf);
591 return cx_mutstrn(NULL, 0);
592 }
594 /* Search occurrences */
595 cxstring searchstr = str;
596 size_t found = 0;
597 do {
598 cxstring match = cx_strstr(searchstr, pattern);
599 if (match.length > 0) {
600 /* Allocate next buffer in chain, if required */
601 if (curbuf->len == ibuflen) {
602 struct cx_strreplace_ibuf *nextbuf =
603 calloc(1, sizeof(struct cx_strreplace_ibuf));
604 if (!nextbuf) {
605 cx_strrepl_free_ibuf(firstbuf);
606 return cx_mutstrn(NULL, 0);
607 }
608 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
609 if (!nextbuf->buf) {
610 free(nextbuf);
611 cx_strrepl_free_ibuf(firstbuf);
612 return cx_mutstrn(NULL, 0);
613 }
614 curbuf->next = nextbuf;
615 curbuf = nextbuf;
616 }
618 /* Record match index */
619 found++;
620 size_t idx = match.ptr - str.ptr;
621 curbuf->buf[curbuf->len++] = idx;
622 searchstr.ptr = match.ptr + pattern.length;
623 searchstr.length = str.length - idx - pattern.length;
624 } else {
625 break;
626 }
627 } while (searchstr.length > 0 && found < replmax);
629 /* Allocate result string */
630 cxmutstr result;
631 {
632 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
633 size_t rcount = 0;
634 curbuf = firstbuf;
635 do {
636 rcount += curbuf->len;
637 curbuf = curbuf->next;
638 } while (curbuf);
639 result.length = str.length + rcount * adjlen;
640 result.ptr = cxMalloc(allocator, result.length + 1);
641 if (!result.ptr) {
642 cx_strrepl_free_ibuf(firstbuf);
643 return cx_mutstrn(NULL, 0);
644 }
645 }
647 /* Build result string */
648 curbuf = firstbuf;
649 size_t srcidx = 0;
650 char *destptr = result.ptr;
651 do {
652 for (size_t i = 0; i < curbuf->len; i++) {
653 /* Copy source part up to next match*/
654 size_t idx = curbuf->buf[i];
655 size_t srclen = idx - srcidx;
656 if (srclen > 0) {
657 memcpy(destptr, str.ptr + srcidx, srclen);
658 destptr += srclen;
659 srcidx += srclen;
660 }
662 /* Copy the replacement and skip the source pattern */
663 srcidx += pattern.length;
664 memcpy(destptr, replacement.ptr, replacement.length);
665 destptr += replacement.length;
666 }
667 curbuf = curbuf->next;
668 } while (curbuf);
669 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
671 /* Result is guaranteed to be zero-terminated */
672 result.ptr[result.length] = '\0';
674 /* Free index buffer */
675 cx_strrepl_free_ibuf(firstbuf);
677 return result;
678 }