2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
40 #include <strings.h> /* for strncasecmp() */
43 sstr_t sstr(char *cstring) {
46 string.length = strlen(cstring);
50 sstr_t sstrn(char *cstring, size_t length) {
53 string.length = length;
57 scstr_t scstr(const char *cstring) {
60 string.length = strlen(cstring);
64 scstr_t scstrn(const char *cstring, size_t length) {
67 string.length = length;
72 size_t scstrnlen(size_t n, ...) {
80 for (size_t i = 0 ; i < n ; i++) {
81 scstr_t str = va_arg(ap, scstr_t);
82 if(SIZE_MAX - str.length < size) {
93 static sstr_t sstrvcat_a(
105 scstr_t s2 = va_arg (ap, scstr_t);
107 if(((size_t)-1) - s1.length < s2.length) {
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
116 // get all args and overall length
119 size_t slen = s1.length + s2.length;
121 for (size_t i=2;i<count;i++) {
122 scstr_t s = va_arg (ap, scstr_t);
124 if(((size_t)-1) - s.length < slen) {
136 str.ptr = (char*) almalloc(a, slen + 1);
144 // concatenate strings
146 for (size_t i=0;i<count;i++) {
147 scstr_t s = strings[i];
148 memcpy(str.ptr + pos, s.ptr, s.length);
152 str.ptr[str.length] = '\0';
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) {
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
170 sstr_t s = sstrvcat_a(a, count, s1, ap);
175 static int ucx_substring(
185 if(start > str_length) {
189 if(length > str_length - start) {
190 length = str_length - start;
197 sstr_t sstrsubs(sstr_t s, size_t start) {
198 return sstrsubsl (s, start, s.length-start);
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
203 sstr_t ret = { NULL, 0 };
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
205 ret.ptr = s.ptr + pos;
210 scstr_t scstrsubs(scstr_t string, size_t start) {
211 return scstrsubsl(string, start, string.length-start);
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
216 scstr_t ret = { NULL, 0 };
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
218 ret.ptr = s.ptr + pos;
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
225 for(size_t i=0;i<length;i++) {
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
236 for(size_t i=length ; i>0 ; i--) {
237 if(str[i-1] == chr) {
246 sstr_t sstrchr(sstr_t s, int c) {
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
249 return sstrsubs(s, pos);
251 return sstrn(NULL, 0);
254 sstr_t sstrrchr(sstr_t s, int c) {
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
257 return sstrsubs(s, pos);
259 return sstrn(NULL, 0);
262 scstr_t scstrchr(scstr_t s, int c) {
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
265 return scstrsubs(s, pos);
267 return scstrn(NULL, 0);
270 scstr_t scstrrchr(scstr_t s, int c) {
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
273 return scstrsubs(s, pos);
275 return scstrn(NULL, 0);
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
281 #define ptable_w(useheap, ptable, index, src) do {\
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
283 else ((size_t*)ptable)[index] = src;\
287 static const char* ucx_strstr(
299 const char *result = NULL;
300 size_t resultlen = 0;
304 * our prefix table contains the prefix length PLUS ONE
305 * this is our decision, because we want to use the full range of size_t
306 * the original algorithm needs a (-1) at one single place
307 * and we want to avoid that
310 /* static prefix table */
311 static uint8_t s_prefix_table[256];
313 /* check pattern length and use appropriate prefix table */
314 /* if the pattern exceeds static prefix table, allocate on the heap */
315 register int useheap = matchlen > 255;
316 register void* ptable = useheap ?
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
319 /* keep counter in registers */
320 register size_t i, j;
322 /* fill prefix table */
324 ptable_w(useheap, ptable, i, j);
325 while (i < matchlen) {
326 while (j >= 1 && match[j-1] != match[i]) {
327 ptable_r(j, useheap, ptable, j-1);
330 ptable_w(useheap, ptable, i, j);
336 while (j >= 1 && str[i] != match[j-1]) {
337 ptable_r(j, useheap, ptable, j-1);
340 if (j-1 == matchlen) {
341 size_t start = i - matchlen;
342 result = str + start;
343 resultlen = length - start;
348 /* if prefix table was allocated on the heap, free it */
349 if (ptable != s_prefix_table) {
357 sstr_t scstrsstr(sstr_t string, scstr_t match) {
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
368 size_t pos = resstr - string.ptr;
369 result.ptr = string.ptr + pos;
370 result.length = reslen;
375 scstr_t scstrscstr(scstr_t string, scstr_t match) {
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
386 size_t pos = resstr - string.ptr;
387 result.ptr = string.ptr + pos;
388 result.length = reslen;
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
397 return scstrsplit_a(ucx_default_allocator(), s, d, n);
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
401 if (s.length == 0 || d.length == 0) {
406 /* special cases: delimiter is at least as large as the string */
407 if (d.length >= s.length) {
409 if (sstrcmp(s, d) == 0) {
412 } else /* no match possible */ {
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
416 *result = sstrdup_a(allocator, s);
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
433 /* optimize for one byte delimiters */
436 for (size_t i = 0 ; i < curpos.length ; i++) {
437 if (curpos.ptr[i] == *(d.ptr)) {
438 match.ptr = curpos.ptr + i;
444 match = scstrscstr(curpos, d);
446 if (match.length > 0) {
447 /* is this our last try? */
448 if (nmax == 0 || j < nmax) {
449 /* copy the current string to the array */
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
451 result[j-1] = sstrdup_a(allocator, item);
452 size_t processed = item.length + d.length;
453 curpos.ptr += processed;
454 curpos.length -= processed;
456 /* allocate memory for the next string */
461 sstr_t* reallocated = NULL;
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
463 reallocated = (sstr_t*) alrealloc(
464 allocator, result, reallocsz);
467 result = reallocated;
469 for (ssize_t i = 0 ; i < j-1 ; i++) {
470 alfree(allocator, result[i].ptr);
472 alfree(allocator, result);
478 /* nmax reached, copy the _full_ remaining string */
479 result[j-1] = sstrdup_a(allocator, curpos);
483 /* no more matches, copy last string */
484 result[j-1] = sstrdup_a(allocator, curpos);
496 int scstrcmp(scstr_t s1, scstr_t s2) {
497 if (s1.length == s2.length) {
498 return memcmp(s1.ptr, s2.ptr, s1.length);
499 } else if (s1.length > s2.length) {
506 int scstrcasecmp(scstr_t s1, scstr_t s2) {
507 if (s1.length == s2.length) {
509 return _strnicmp(s1.ptr, s2.ptr, s1.length);
511 return strncasecmp(s1.ptr, s2.ptr, s1.length);
513 } else if (s1.length > s2.length) {
520 sstr_t scstrdup(scstr_t s) {
521 return sstrdup_a(ucx_default_allocator(), s);
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
528 newstring.length = s.length;
529 newstring.ptr[newstring.length] = 0;
531 memcpy(newstring.ptr, s.ptr, s.length);
533 newstring.length = 0;
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
541 const char *newptr = s;
544 while(length > 0 && isspace(*newptr)) {
548 while(length > 0 && isspace(newptr[length-1])) {
556 sstr_t sstrtrim(sstr_t string) {
558 newstr.ptr = string.ptr
559 + ucx_strtrim(string.ptr, string.length, &newstr.length);
563 scstr_t scstrtrim(scstr_t string) {
565 newstr.ptr = string.ptr
566 + ucx_strtrim(string.ptr, string.length, &newstr.length);
570 int scstrprefix(scstr_t string, scstr_t prefix) {
571 if (string.length == 0) {
572 return prefix.length == 0;
574 if (prefix.length == 0) {
578 if (prefix.length > string.length) {
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
585 int scstrsuffix(scstr_t string, scstr_t suffix) {
586 if (string.length == 0) {
587 return suffix.length == 0;
589 if (suffix.length == 0) {
593 if (suffix.length > string.length) {
596 return memcmp(string.ptr+string.length-suffix.length,
597 suffix.ptr, suffix.length) == 0;
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) {
602 if (string.length == 0) {
603 return prefix.length == 0;
605 if (prefix.length == 0) {
609 if (prefix.length > string.length) {
612 scstr_t subs = scstrsubsl(string, 0, prefix.length);
613 return scstrcasecmp(subs, prefix) == 0;
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) {
618 if (string.length == 0) {
619 return suffix.length == 0;
621 if (suffix.length == 0) {
625 if (suffix.length > string.length) {
628 scstr_t subs = scstrsubs(string, string.length-suffix.length);
629 return scstrcasecmp(subs, suffix) == 0;
633 sstr_t scstrlower(scstr_t string) {
634 sstr_t ret = sstrdup(string);
635 for (size_t i = 0; i < ret.length ; i++) {
636 ret.ptr[i] = tolower(ret.ptr[i]);
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
642 sstr_t ret = sstrdup_a(allocator, string);
643 for (size_t i = 0; i < ret.length ; i++) {
644 ret.ptr[i] = tolower(ret.ptr[i]);
649 sstr_t scstrupper(scstr_t string) {
650 sstr_t ret = sstrdup(string);
651 for (size_t i = 0; i < ret.length ; i++) {
652 ret.ptr[i] = toupper(ret.ptr[i]);
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
658 sstr_t ret = sstrdup_a(allocator, string);
659 for (size_t i = 0; i < ret.length ; i++) {
660 ret.ptr[i] = toupper(ret.ptr[i]);
665 #define REPLACE_INDEX_BUFFER_MAX 100
667 struct scstrreplace_ibuf {
669 unsigned int len; /* small indices */
670 struct scstrreplace_ibuf* next;
673 static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
675 struct scstrreplace_ibuf *next = buf->next;
682 sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
683 scstr_t pattern, scstr_t replacement, size_t replmax) {
685 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
688 /* Compute expected buffer length */
689 size_t ibufmax = str.length / pattern.length;
690 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
691 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
692 ibuflen = REPLACE_INDEX_BUFFER_MAX;
695 /* Allocate first index buffer */
696 struct scstrreplace_ibuf *firstbuf, *curbuf;
697 firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
698 if (!firstbuf) return sstrn(NULL, 0);
699 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
700 if (!firstbuf->buf) {
702 return sstrn(NULL, 0);
705 /* Search occurrences */
706 scstr_t searchstr = str;
709 scstr_t match = scstrscstr(searchstr, pattern);
710 if (match.length > 0) {
711 /* Allocate next buffer in chain, if required */
712 if (curbuf->len == ibuflen) {
713 struct scstrreplace_ibuf *nextbuf =
714 calloc(1, sizeof(struct scstrreplace_ibuf));
716 scstrrepl_free_ibuf(firstbuf);
717 return sstrn(NULL, 0);
719 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
722 scstrrepl_free_ibuf(firstbuf);
723 return sstrn(NULL, 0);
725 curbuf->next = nextbuf;
729 /* Record match index */
731 size_t idx = match.ptr - str.ptr;
732 curbuf->buf[curbuf->len++] = idx;
733 searchstr.ptr = match.ptr + pattern.length;
734 searchstr.length = str.length - idx - pattern.length;
738 } while (searchstr.length > 0 && found < replmax);
740 /* Allocate result string */
743 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
747 rcount += curbuf->len;
748 curbuf = curbuf->next;
750 result.length = str.length + rcount * adjlen;
751 result.ptr = almalloc(allocator, result.length);
753 scstrrepl_free_ibuf(firstbuf);
754 return sstrn(NULL, 0);
758 /* Build result string */
761 char* destptr = result.ptr;
763 for (size_t i = 0; i < curbuf->len; i++) {
764 /* Copy source part up to next match*/
765 size_t idx = curbuf->buf[i];
766 size_t srclen = idx - srcidx;
768 memcpy(destptr, str.ptr+srcidx, srclen);
773 /* Copy the replacement and skip the source pattern */
774 srcidx += pattern.length;
775 memcpy(destptr, replacement.ptr, replacement.length);
776 destptr += replacement.length;
778 curbuf = curbuf->next;
780 memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
782 /* Free index buffer */
783 scstrrepl_free_ibuf(firstbuf);
788 sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
789 scstr_t replacement, size_t replmax) {
790 return scstrreplacen_a(ucx_default_allocator(),
791 str, pattern, replacement, replmax);
795 // type adjustment functions
796 scstr_t ucx_sc2sc(scstr_t str) {
799 scstr_t ucx_ss2sc(sstr_t str) {
802 cs.length = str.length;
805 scstr_t ucx_ss2c_s(scstr_t c) {