diff -r 96fa7fa6af4f -r 0f3c9662f9b5 src/string.c --- a/src/string.c Sat Sep 03 15:11:23 2022 +0200 +++ b/src/string.c Fri Sep 09 20:19:08 2022 +0200 @@ -72,6 +72,15 @@ str->length = 0; } +void cx_strfree_a( + CxAllocator *alloc, + cxmutstr *str +) { + cxFree(alloc, str->ptr); + str->ptr = NULL; + str->length = 0; +} + size_t cx_strlen( size_t count, ... @@ -235,6 +244,11 @@ return haystack; } + /* optimize for single-char needles */ + if (needle.length == 1) { + return cx_strchr(haystack, *needle.ptr); + } + /* * IMPORTANT: * Our prefix table contains the prefix length PLUS ONE @@ -308,8 +322,55 @@ size_t limit, cxstring *output ) { - // TODO: implement - return 0; + /* special case: output limit is zero */ + if (limit == 0) return 0; + + /* special case: delimiter is empty */ + if (delim.length == 0) { + output[0] = string; + return 1; + } + + /* special cases: delimiter is at least as large as the string */ + if (delim.length >= string.length) { + /* exact match */ + if (cx_strcmp(string, delim) == 0) { + output[0] = cx_strn(string.ptr, 0); + output[1] = cx_strn(string.ptr + string.length, 0); + return 2; + } else /* no match possible */ { + output[0] = string; + return 1; + } + } + + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + /* is the limit reached? */ + if (n < limit) { + /* copy the current string to the array */ + cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); + output[n - 1] = item; + size_t processed = item.length + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + /* limit reached, copy the _full_ remaining string */ + output[n - 1] = curpos; + break; + } + } else { + /* no more matches, copy last string */ + output[n - 1] = curpos; + break; + } + } + + return n; } size_t cx_strsplit_a( @@ -319,8 +380,29 @@ size_t limit, cxstring **output ) { - // TODO: implement - return 0; + /* find out how many splits we're going to make and allocate memory */ + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + /* is the limit reached? */ + if (n < limit) { + size_t processed = match.ptr - curpos.ptr + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + /* limit reached */ + break; + } + } else { + /* no more matches */ + break; + } + } + *output = cxCalloc(allocator, n, sizeof(cxstring)); + return cx_strsplit(string, delim, n, *output); } size_t cx_strsplit_m( @@ -344,7 +426,10 @@ delim, limit, (cxstring **) output); } -int cx_strcmp(cxstring s1, cxstring s2) { +int cx_strcmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { @@ -354,7 +439,10 @@ } } -int cx_strcasecmp(cxstring s1, cxstring s2) { +int cx_strcasecmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); @@ -368,7 +456,10 @@ } } -cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { +cxmutstr cx_strdup_a( + CxAllocator *allocator, + cxstring string +) { cxmutstr result = { cxMalloc(allocator, string.length + 1), string.length @@ -400,18 +491,27 @@ return (cxmutstr) {(char *) result.ptr, result.length}; } -bool cx_strprefix(cxstring string, cxstring prefix) { +bool cx_strprefix( + cxstring string, + cxstring prefix +) { if (string.length < prefix.length) return false; return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } -bool cx_strsuffix(cxstring string, cxstring suffix) { +bool cx_strsuffix( + cxstring string, + cxstring suffix +) { if (string.length < suffix.length) return false; return memcmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; } -bool cx_casestrprefix(cxstring string, cxstring prefix) { +bool cx_strcaseprefix( + cxstring string, + cxstring prefix +) { if (string.length < prefix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; @@ -420,7 +520,10 @@ #endif } -bool cx_casestrsuffix(cxstring string, cxstring suffix) { +bool cx_strcasesuffix( + cxstring string, + cxstring suffix +) { if (string.length < suffix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr+string.length-suffix.length, @@ -442,3 +545,133 @@ string.ptr[i] = toupper(string.ptr[i]); } } + +#define REPLACE_INDEX_BUFFER_MAX 100 + +struct cx_strreplace_ibuf { + size_t *buf; + unsigned int len; /* small indices */ + struct cx_strreplace_ibuf *next; +}; + +static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { + while (buf) { + struct cx_strreplace_ibuf *next = buf->next; + free(buf->buf); + free(buf); + buf = next; + } +} + +cxmutstr cx_strreplacen_a( + CxAllocator *allocator, + cxstring str, + cxstring pattern, + cxstring replacement, + size_t replmax +) { + + if (pattern.length == 0 || pattern.length > str.length || replmax == 0) + return cx_strdup_a(allocator, str); + + /* Compute expected buffer length */ + size_t ibufmax = str.length / pattern.length; + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { + ibuflen = REPLACE_INDEX_BUFFER_MAX; + } + + /* Allocate first index buffer */ + struct cx_strreplace_ibuf *firstbuf, *curbuf; + firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); + if (!firstbuf) return cx_mutstrn(NULL, 0); + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!firstbuf->buf) { + free(firstbuf); + return cx_mutstrn(NULL, 0); + } + + /* Search occurrences */ + cxstring searchstr = str; + size_t found = 0; + do { + cxstring match = cx_strstr(searchstr, pattern); + if (match.length > 0) { + /* Allocate next buffer in chain, if required */ + if (curbuf->len == ibuflen) { + struct cx_strreplace_ibuf *nextbuf = + calloc(1, sizeof(struct cx_strreplace_ibuf)); + if (!nextbuf) { + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!nextbuf->buf) { + free(nextbuf); + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + curbuf->next = nextbuf; + curbuf = nextbuf; + } + + /* Record match index */ + found++; + size_t idx = match.ptr - str.ptr; + curbuf->buf[curbuf->len++] = idx; + searchstr.ptr = match.ptr + pattern.length; + searchstr.length = str.length - idx - pattern.length; + } else { + break; + } + } while (searchstr.length > 0 && found < replmax); + + /* Allocate result string */ + cxmutstr result; + { + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + size_t rcount = 0; + curbuf = firstbuf; + do { + rcount += curbuf->len; + curbuf = curbuf->next; + } while (curbuf); + result.length = str.length + rcount * adjlen; + result.ptr = cxMalloc(allocator, result.length); + if (!result.ptr) { + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + } + + /* Build result string */ + curbuf = firstbuf; + size_t srcidx = 0; + char *destptr = result.ptr; + do { + for (size_t i = 0; i < curbuf->len; i++) { + /* Copy source part up to next match*/ + size_t idx = curbuf->buf[i]; + size_t srclen = idx - srcidx; + if (srclen > 0) { + memcpy(destptr, str.ptr + srcidx, srclen); + destptr += srclen; + srcidx += srclen; + } + + /* Copy the replacement and skip the source pattern */ + srcidx += pattern.length; + memcpy(destptr, replacement.ptr, replacement.length); + destptr += replacement.length; + } + curbuf = curbuf->next; + } while (curbuf); + memcpy(destptr, str.ptr + srcidx, str.length - srcidx); + + /* Free index buffer */ + cx_strrepl_free_ibuf(firstbuf); + + return result; +} + +