universe@576: /* universe@576: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. universe@576: * universe@576: * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. universe@576: * universe@576: * Redistribution and use in source and binary forms, with or without universe@576: * modification, are permitted provided that the following conditions are met: universe@576: * universe@576: * 1. Redistributions of source code must retain the above copyright universe@576: * notice, this list of conditions and the following disclaimer. universe@576: * universe@576: * 2. Redistributions in binary form must reproduce the above copyright universe@576: * notice, this list of conditions and the following disclaimer in the universe@576: * documentation and/or other materials provided with the distribution. universe@576: * universe@576: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" universe@576: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE universe@576: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE universe@576: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE universe@576: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR universe@576: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF universe@576: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS universe@576: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN universe@576: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) universe@576: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE universe@576: * POSSIBILITY OF SUCH DAMAGE. universe@576: */ universe@576: universe@576: #include "cx/string.h" universe@579: #include "cx/utils.h" universe@579: universe@579: #include universe@579: #include universe@581: #include universe@581: universe@581: #ifndef _WIN32 universe@581: universe@581: #include /* for strncasecmp() */ universe@581: universe@581: #endif /* _WIN32 */ universe@579: universe@579: cxmutstr cx_mutstr(char *cstring) { universe@579: return (cxmutstr) {cstring, strlen(cstring)}; universe@579: } universe@579: universe@579: cxmutstr cx_mutstrn( universe@579: char *cstring, universe@579: size_t length universe@579: ) { universe@579: return (cxmutstr) {cstring, length}; universe@579: } universe@579: universe@579: cxstring cx_str(const char *cstring) { universe@579: return (cxstring) {cstring, strlen(cstring)}; universe@579: } universe@579: universe@579: cxstring cx_strn( universe@579: const char *cstring, universe@579: size_t length universe@579: ) { universe@579: return (cxstring) {cstring, length}; universe@579: } universe@579: universe@579: cxstring cx_strcast(cxmutstr str) { universe@579: return (cxstring) {str.ptr, str.length}; universe@579: } universe@579: universe@579: void cx_strfree(cxmutstr *str) { universe@579: free(str->ptr); universe@579: str->ptr = NULL; universe@579: str->length = 0; universe@579: } universe@579: universe@583: void cx_strfree_a( universe@583: CxAllocator *alloc, universe@583: cxmutstr *str universe@583: ) { universe@583: cxFree(alloc, str->ptr); universe@583: str->ptr = NULL; universe@583: str->length = 0; universe@583: } universe@583: universe@579: size_t cx_strlen( universe@579: size_t count, universe@579: ... universe@579: ) { universe@579: if (count == 0) return 0; universe@579: universe@579: va_list ap; universe@579: va_start(ap, count); universe@579: size_t size = 0; universe@579: cx_for_n(i, count) { universe@579: cxstring str = va_arg(ap, cxstring); universe@579: size += str.length; universe@579: } universe@579: va_end(ap); universe@579: universe@579: return size; universe@579: } universe@579: universe@579: cxmutstr cx_strcat_a( universe@579: CxAllocator *alloc, universe@579: size_t count, universe@579: ... universe@579: ) { universe@579: cxstring *strings = calloc(count, sizeof(cxstring)); universe@579: if (!strings) abort(); universe@579: universe@579: va_list ap; universe@579: va_start(ap, count); universe@579: universe@579: // get all args and overall length universe@579: size_t slen = 0; universe@579: cx_for_n(i, count) { universe@579: cxstring s = va_arg (ap, cxstring); universe@579: strings[i] = s; universe@579: slen += s.length; universe@579: } universe@579: universe@579: // create new string universe@579: cxmutstr result; universe@579: result.ptr = cxMalloc(alloc, slen + 1); universe@579: result.length = slen; universe@579: if (result.ptr == NULL) abort(); universe@579: universe@579: // concatenate strings universe@579: size_t pos = 0; universe@579: cx_for_n(i, count) { universe@579: cxstring s = strings[i]; universe@579: memcpy(result.ptr + pos, s.ptr, s.length); universe@579: pos += s.length; universe@579: } universe@579: universe@579: // terminate string universe@579: result.ptr[result.length] = '\0'; universe@579: universe@579: // free temporary array universe@579: free(strings); universe@579: universe@579: return result; universe@579: } universe@579: universe@580: cxstring cx_strsubs( universe@580: cxstring string, universe@580: size_t start universe@580: ) { universe@580: return cx_strsubsl(string, start, string.length - start); universe@580: } universe@579: universe@580: cxmutstr cx_strsubs_m( universe@580: cxmutstr string, universe@580: size_t start universe@580: ) { universe@580: return cx_strsubsl_m(string, start, string.length - start); universe@580: } universe@579: universe@580: cxstring cx_strsubsl( universe@580: cxstring string, universe@580: size_t start, universe@580: size_t length universe@580: ) { universe@580: if (start > string.length) { universe@580: return (cxstring) {NULL, 0}; universe@580: } universe@580: universe@580: size_t rem_len = string.length - start; universe@580: if (length > rem_len) { universe@580: length = rem_len; universe@580: } universe@580: universe@580: return (cxstring) {string.ptr + start, length}; universe@580: } universe@580: universe@580: cxmutstr cx_strsubsl_m( universe@580: cxmutstr string, universe@580: size_t start, universe@580: size_t length universe@580: ) { universe@580: cxstring result = cx_strsubsl(cx_strcast(string), start, length); universe@580: return (cxmutstr) {(char *) result.ptr, result.length}; universe@580: } universe@580: universe@580: cxstring cx_strchr( universe@580: cxstring string, universe@580: int chr universe@580: ) { universe@580: chr = 0xFF & chr; universe@580: // TODO: improve by comparing multiple bytes at once universe@580: cx_for_n(i, string.length) { universe@580: if (string.ptr[i] == chr) { universe@580: return cx_strsubs(string, i); universe@580: } universe@580: } universe@580: return (cxstring) {NULL, 0}; universe@580: } universe@580: universe@580: cxmutstr cx_strchr_m( universe@580: cxmutstr string, universe@580: int chr universe@580: ) { universe@580: cxstring result = cx_strchr(cx_strcast(string), chr); universe@580: return (cxmutstr) {(char *) result.ptr, result.length}; universe@580: } universe@580: universe@580: cxstring cx_strrchr( universe@580: cxstring string, universe@580: int chr universe@580: ) { universe@580: chr = 0xFF & chr; universe@580: size_t i = string.length; universe@580: while (i > 0) { universe@580: i--; universe@580: // TODO: improve by comparing multiple bytes at once universe@580: if (string.ptr[i] == chr) { universe@580: return cx_strsubs(string, i); universe@580: } universe@580: } universe@580: return (cxstring) {NULL, 0}; universe@580: } universe@580: universe@580: cxmutstr cx_strrchr_m( universe@580: cxmutstr string, universe@580: int chr universe@580: ) { universe@580: cxstring result = cx_strrchr(cx_strcast(string), chr); universe@580: return (cxmutstr) {(char *) result.ptr, result.length}; universe@580: } universe@580: universe@591: #define STRSTR_SBO_BUFLEN 512 universe@580: universe@580: cxstring cx_strstr( universe@580: cxstring haystack, universe@580: cxstring needle universe@580: ) { universe@580: if (needle.length == 0) { universe@580: return haystack; universe@580: } universe@580: universe@583: /* optimize for single-char needles */ universe@583: if (needle.length == 1) { universe@583: return cx_strchr(haystack, *needle.ptr); universe@583: } universe@583: universe@580: /* universe@580: * IMPORTANT: universe@580: * Our prefix table contains the prefix length PLUS ONE universe@580: * this is our decision, because we want to use the full range of size_t. universe@580: * The original algorithm needs a (-1) at one single place, universe@580: * and we want to avoid that. universe@580: */ universe@580: universe@591: /* local prefix table */ universe@591: size_t s_prefix_table[STRSTR_SBO_BUFLEN]; universe@580: universe@591: /* check needle length and use appropriate prefix table */ universe@580: /* if the pattern exceeds static prefix table, allocate on the heap */ universe@591: bool useheap = needle.length >= STRSTR_SBO_BUFLEN; universe@591: register size_t *ptable = useheap ? calloc(needle.length + 1, universe@591: sizeof(size_t)) : s_prefix_table; universe@580: universe@580: /* keep counter in registers */ universe@580: register size_t i, j; universe@580: universe@580: /* fill prefix table */ universe@580: i = 0; universe@580: j = 0; universe@591: ptable[i] = j; universe@580: while (i < needle.length) { universe@580: while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { universe@591: j = ptable[j - 1]; universe@580: } universe@580: i++; universe@580: j++; universe@591: ptable[i] = j; universe@580: } universe@580: universe@580: /* search */ universe@580: cxstring result = {NULL, 0}; universe@580: i = 0; universe@580: j = 1; universe@580: while (i < haystack.length) { universe@580: while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { universe@591: j = ptable[j - 1]; universe@580: } universe@580: i++; universe@580: j++; universe@580: if (j - 1 == needle.length) { universe@580: size_t start = i - needle.length; universe@580: result.ptr = haystack.ptr + start; universe@580: result.length = haystack.length - start; universe@580: break; universe@580: } universe@580: } universe@580: universe@580: /* if prefix table was allocated on the heap, free it */ universe@580: if (ptable != s_prefix_table) { universe@580: free(ptable); universe@580: } universe@580: universe@580: return result; universe@580: } universe@580: universe@580: cxmutstr cx_strstr_m( universe@580: cxmutstr haystack, universe@580: cxstring needle universe@580: ) { universe@580: cxstring result = cx_strstr(cx_strcast(haystack), needle); universe@580: return (cxmutstr) {(char *) result.ptr, result.length}; universe@580: } universe@580: universe@580: size_t cx_strsplit( universe@580: cxstring string, universe@580: cxstring delim, universe@580: size_t limit, universe@580: cxstring *output universe@580: ) { universe@583: /* special case: output limit is zero */ universe@583: if (limit == 0) return 0; universe@583: universe@583: /* special case: delimiter is empty */ universe@583: if (delim.length == 0) { universe@583: output[0] = string; universe@583: return 1; universe@583: } universe@583: universe@583: /* special cases: delimiter is at least as large as the string */ universe@583: if (delim.length >= string.length) { universe@583: /* exact match */ universe@583: if (cx_strcmp(string, delim) == 0) { universe@583: output[0] = cx_strn(string.ptr, 0); universe@583: output[1] = cx_strn(string.ptr + string.length, 0); universe@583: return 2; universe@583: } else /* no match possible */ { universe@583: output[0] = string; universe@583: return 1; universe@583: } universe@583: } universe@583: universe@583: size_t n = 0; universe@583: cxstring curpos = string; universe@583: while (1) { universe@583: ++n; universe@583: cxstring match = cx_strstr(curpos, delim); universe@583: if (match.length > 0) { universe@583: /* is the limit reached? */ universe@583: if (n < limit) { universe@583: /* copy the current string to the array */ universe@583: cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); universe@583: output[n - 1] = item; universe@583: size_t processed = item.length + delim.length; universe@583: curpos.ptr += processed; universe@583: curpos.length -= processed; universe@583: } else { universe@583: /* limit reached, copy the _full_ remaining string */ universe@583: output[n - 1] = curpos; universe@583: break; universe@583: } universe@583: } else { universe@583: /* no more matches, copy last string */ universe@583: output[n - 1] = curpos; universe@583: break; universe@583: } universe@583: } universe@583: universe@583: return n; universe@580: } universe@580: universe@580: size_t cx_strsplit_a( universe@580: CxAllocator *allocator, universe@580: cxstring string, universe@580: cxstring delim, universe@580: size_t limit, universe@580: cxstring **output universe@580: ) { universe@583: /* find out how many splits we're going to make and allocate memory */ universe@583: size_t n = 0; universe@583: cxstring curpos = string; universe@583: while (1) { universe@583: ++n; universe@583: cxstring match = cx_strstr(curpos, delim); universe@583: if (match.length > 0) { universe@583: /* is the limit reached? */ universe@583: if (n < limit) { universe@583: size_t processed = match.ptr - curpos.ptr + delim.length; universe@583: curpos.ptr += processed; universe@583: curpos.length -= processed; universe@583: } else { universe@583: /* limit reached */ universe@583: break; universe@583: } universe@583: } else { universe@583: /* no more matches */ universe@583: break; universe@583: } universe@583: } universe@583: *output = cxCalloc(allocator, n, sizeof(cxstring)); universe@583: return cx_strsplit(string, delim, n, *output); universe@580: } universe@580: universe@580: size_t cx_strsplit_m( universe@580: cxmutstr string, universe@580: cxstring delim, universe@580: size_t limit, universe@580: cxmutstr *output universe@580: ) { universe@580: return cx_strsplit(cx_strcast(string), universe@580: delim, limit, (cxstring *) output); universe@580: } universe@580: universe@580: size_t cx_strsplit_ma( universe@580: CxAllocator *allocator, universe@580: cxmutstr string, universe@580: cxstring delim, universe@580: size_t limit, universe@580: cxmutstr **output universe@580: ) { universe@580: return cx_strsplit_a(allocator, cx_strcast(string), universe@580: delim, limit, (cxstring **) output); universe@580: } universe@581: universe@583: int cx_strcmp( universe@583: cxstring s1, universe@583: cxstring s2 universe@583: ) { universe@581: if (s1.length == s2.length) { universe@581: return memcmp(s1.ptr, s2.ptr, s1.length); universe@581: } else if (s1.length > s2.length) { universe@581: return 1; universe@581: } else { universe@581: return -1; universe@581: } universe@581: } universe@581: universe@583: int cx_strcasecmp( universe@583: cxstring s1, universe@583: cxstring s2 universe@583: ) { universe@581: if (s1.length == s2.length) { universe@581: #ifdef _WIN32 universe@581: return _strnicmp(s1.ptr, s2.ptr, s1.length); universe@581: #else universe@581: return strncasecmp(s1.ptr, s2.ptr, s1.length); universe@581: #endif universe@581: } else if (s1.length > s2.length) { universe@581: return 1; universe@581: } else { universe@581: return -1; universe@581: } universe@581: } universe@581: universe@583: cxmutstr cx_strdup_a( universe@583: CxAllocator *allocator, universe@583: cxstring string universe@583: ) { universe@581: cxmutstr result = { universe@581: cxMalloc(allocator, string.length + 1), universe@581: string.length universe@581: }; universe@581: if (result.ptr == NULL) { universe@581: result.length = 0; universe@581: return result; universe@581: } universe@581: memcpy(result.ptr, string.ptr, string.length); universe@581: result.ptr[string.length] = '\0'; universe@581: return result; universe@581: } universe@581: universe@581: cxstring cx_strtrim(cxstring string) { universe@581: cxstring result = string; universe@581: // TODO: optimize by comparing multiple bytes at once universe@581: while (result.length > 0 && isspace(*result.ptr)) { universe@581: result.ptr++; universe@581: result.length--; universe@581: } universe@581: while (result.length > 0 && isspace(result.ptr[result.length - 1])) { universe@581: result.length--; universe@581: } universe@581: return result; universe@581: } universe@581: universe@581: cxmutstr cx_strtrim_m(cxmutstr string) { universe@581: cxstring result = cx_strtrim(cx_strcast(string)); universe@581: return (cxmutstr) {(char *) result.ptr, result.length}; universe@581: } universe@581: universe@583: bool cx_strprefix( universe@583: cxstring string, universe@583: cxstring prefix universe@583: ) { universe@581: if (string.length < prefix.length) return false; universe@581: return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; universe@581: } universe@581: universe@583: bool cx_strsuffix( universe@583: cxstring string, universe@583: cxstring suffix universe@583: ) { universe@581: if (string.length < suffix.length) return false; universe@581: return memcmp(string.ptr + string.length - suffix.length, universe@581: suffix.ptr, suffix.length) == 0; universe@581: } universe@581: universe@583: bool cx_strcaseprefix( universe@583: cxstring string, universe@583: cxstring prefix universe@583: ) { universe@581: if (string.length < prefix.length) return false; universe@581: #ifdef _WIN32 universe@581: return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; universe@581: #else universe@581: return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; universe@581: #endif universe@581: } universe@581: universe@583: bool cx_strcasesuffix( universe@583: cxstring string, universe@583: cxstring suffix universe@583: ) { universe@581: if (string.length < suffix.length) return false; universe@581: #ifdef _WIN32 universe@581: return _strnicmp(string.ptr+string.length-suffix.length, universe@581: suffix.ptr, suffix.length) == 0; universe@581: #else universe@581: return strncasecmp(string.ptr + string.length - suffix.length, universe@581: suffix.ptr, suffix.length) == 0; universe@581: #endif universe@581: } universe@582: universe@582: void cx_strlower(cxmutstr string) { universe@582: cx_for_n(i, string.length) { universe@593: string.ptr[i] = (char) tolower(string.ptr[i]); universe@582: } universe@582: } universe@582: universe@582: void cx_strupper(cxmutstr string) { universe@582: cx_for_n(i, string.length) { universe@593: string.ptr[i] = (char) toupper(string.ptr[i]); universe@582: } universe@582: } universe@583: universe@583: #define REPLACE_INDEX_BUFFER_MAX 100 universe@583: universe@583: struct cx_strreplace_ibuf { universe@583: size_t *buf; universe@583: struct cx_strreplace_ibuf *next; universe@590: unsigned int len; universe@583: }; universe@583: universe@583: static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { universe@583: while (buf) { universe@583: struct cx_strreplace_ibuf *next = buf->next; universe@583: free(buf->buf); universe@583: free(buf); universe@583: buf = next; universe@583: } universe@583: } universe@583: universe@583: cxmutstr cx_strreplacen_a( universe@583: CxAllocator *allocator, universe@583: cxstring str, universe@583: cxstring pattern, universe@583: cxstring replacement, universe@583: size_t replmax universe@583: ) { universe@583: universe@583: if (pattern.length == 0 || pattern.length > str.length || replmax == 0) universe@583: return cx_strdup_a(allocator, str); universe@583: universe@583: /* Compute expected buffer length */ universe@583: size_t ibufmax = str.length / pattern.length; universe@583: size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; universe@583: if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { universe@583: ibuflen = REPLACE_INDEX_BUFFER_MAX; universe@583: } universe@583: universe@583: /* Allocate first index buffer */ universe@583: struct cx_strreplace_ibuf *firstbuf, *curbuf; universe@583: firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); universe@583: if (!firstbuf) return cx_mutstrn(NULL, 0); universe@583: firstbuf->buf = calloc(ibuflen, sizeof(size_t)); universe@583: if (!firstbuf->buf) { universe@583: free(firstbuf); universe@583: return cx_mutstrn(NULL, 0); universe@583: } universe@583: universe@583: /* Search occurrences */ universe@583: cxstring searchstr = str; universe@583: size_t found = 0; universe@583: do { universe@583: cxstring match = cx_strstr(searchstr, pattern); universe@583: if (match.length > 0) { universe@583: /* Allocate next buffer in chain, if required */ universe@583: if (curbuf->len == ibuflen) { universe@583: struct cx_strreplace_ibuf *nextbuf = universe@583: calloc(1, sizeof(struct cx_strreplace_ibuf)); universe@583: if (!nextbuf) { universe@583: cx_strrepl_free_ibuf(firstbuf); universe@583: return cx_mutstrn(NULL, 0); universe@583: } universe@583: nextbuf->buf = calloc(ibuflen, sizeof(size_t)); universe@583: if (!nextbuf->buf) { universe@583: free(nextbuf); universe@583: cx_strrepl_free_ibuf(firstbuf); universe@583: return cx_mutstrn(NULL, 0); universe@583: } universe@583: curbuf->next = nextbuf; universe@583: curbuf = nextbuf; universe@583: } universe@583: universe@583: /* Record match index */ universe@583: found++; universe@583: size_t idx = match.ptr - str.ptr; universe@583: curbuf->buf[curbuf->len++] = idx; universe@583: searchstr.ptr = match.ptr + pattern.length; universe@583: searchstr.length = str.length - idx - pattern.length; universe@583: } else { universe@583: break; universe@583: } universe@583: } while (searchstr.length > 0 && found < replmax); universe@583: universe@583: /* Allocate result string */ universe@583: cxmutstr result; universe@583: { universe@583: ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; universe@583: size_t rcount = 0; universe@583: curbuf = firstbuf; universe@583: do { universe@583: rcount += curbuf->len; universe@583: curbuf = curbuf->next; universe@583: } while (curbuf); universe@583: result.length = str.length + rcount * adjlen; universe@590: result.ptr = cxMalloc(allocator, result.length + 1); universe@583: if (!result.ptr) { universe@583: cx_strrepl_free_ibuf(firstbuf); universe@583: return cx_mutstrn(NULL, 0); universe@583: } universe@583: } universe@583: universe@583: /* Build result string */ universe@583: curbuf = firstbuf; universe@583: size_t srcidx = 0; universe@583: char *destptr = result.ptr; universe@583: do { universe@583: for (size_t i = 0; i < curbuf->len; i++) { universe@583: /* Copy source part up to next match*/ universe@583: size_t idx = curbuf->buf[i]; universe@583: size_t srclen = idx - srcidx; universe@583: if (srclen > 0) { universe@583: memcpy(destptr, str.ptr + srcidx, srclen); universe@583: destptr += srclen; universe@583: srcidx += srclen; universe@583: } universe@583: universe@583: /* Copy the replacement and skip the source pattern */ universe@583: srcidx += pattern.length; universe@583: memcpy(destptr, replacement.ptr, replacement.length); universe@583: destptr += replacement.length; universe@583: } universe@583: curbuf = curbuf->next; universe@583: } while (curbuf); universe@583: memcpy(destptr, str.ptr + srcidx, str.length - srcidx); universe@583: universe@590: /* Result is guaranteed to be zero-terminated */ universe@590: result.ptr[result.length] = '\0'; universe@590: universe@583: /* Free index buffer */ universe@583: cx_strrepl_free_ibuf(firstbuf); universe@583: universe@583: return result; universe@583: } universe@583: universe@583: