# HG changeset patch # User Mike Becker # Date 1661980325 -7200 # Node ID aac47db8da0b98dc076eeb91e91f168dcc1e5e42 # Parent bbc46dcd5255f3f8aa1cd0a05056708a43231529 more implementations of string functions diff -r bbc46dcd5255 -r aac47db8da0b src/string.c --- a/src/string.c Tue Aug 30 19:56:07 2022 +0200 +++ b/src/string.c Wed Aug 31 23:12:05 2022 +0200 @@ -125,5 +125,214 @@ return result; } +cxstring cx_strsubs( + cxstring string, + size_t start +) { + return cx_strsubsl(string, start, string.length - start); +} +cxmutstr cx_strsubs_m( + cxmutstr string, + size_t start +) { + return cx_strsubsl_m(string, start, string.length - start); +} +cxstring cx_strsubsl( + cxstring string, + size_t start, + size_t length +) { + if (start > string.length) { + return (cxstring) {NULL, 0}; + } + + size_t rem_len = string.length - start; + if (length > rem_len) { + length = rem_len; + } + + return (cxstring) {string.ptr + start, length}; +} + +cxmutstr cx_strsubsl_m( + cxmutstr string, + size_t start, + size_t length +) { + cxstring result = cx_strsubsl(cx_strcast(string), start, length); + return (cxmutstr) {(char *) result.ptr, result.length}; +} + +cxstring cx_strchr( + cxstring string, + int chr +) { + chr = 0xFF & chr; + // TODO: improve by comparing multiple bytes at once + cx_for_n(i, string.length) { + if (string.ptr[i] == chr) { + return cx_strsubs(string, i); + } + } + return (cxstring) {NULL, 0}; +} + +cxmutstr cx_strchr_m( + cxmutstr string, + int chr +) { + cxstring result = cx_strchr(cx_strcast(string), chr); + return (cxmutstr) {(char *) result.ptr, result.length}; +} + +cxstring cx_strrchr( + cxstring string, + int chr +) { + chr = 0xFF & chr; + size_t i = string.length; + while (i > 0) { + i--; + // TODO: improve by comparing multiple bytes at once + if (string.ptr[i] == chr) { + return cx_strsubs(string, i); + } + } + return (cxstring) {NULL, 0}; +} + +cxmutstr cx_strrchr_m( + cxmutstr string, + int chr +) { + cxstring result = cx_strrchr(cx_strcast(string), chr); + return (cxmutstr) {(char *) result.ptr, result.length}; +} + +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) + +#define ptable_w(useheap, ptable, index, src) do {\ + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ + else ((size_t*)ptable)[index] = src;\ + } while (0) + + +cxstring cx_strstr( + cxstring haystack, + cxstring needle +) { + if (needle.length == 0) { + return haystack; + } + + /* + * IMPORTANT: + * Our prefix table contains the prefix length PLUS ONE + * this is our decision, because we want to use the full range of size_t. + * The original algorithm needs a (-1) at one single place, + * and we want to avoid that. + */ + + /* static prefix table */ + static uint8_t s_prefix_table[512]; + + /* check pattern length and use appropriate prefix table */ + /* if the pattern exceeds static prefix table, allocate on the heap */ + register int useheap = needle.length >= 512; + register void *ptable = useheap ? calloc(needle.length + 1, + sizeof(size_t)) : s_prefix_table; + + /* keep counter in registers */ + register size_t i, j; + + /* fill prefix table */ + i = 0; + j = 0; + ptable_w(useheap, ptable, i, j); + while (i < needle.length) { + while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { + ptable_r(j, useheap, ptable, j - 1); + } + i++; + j++; + ptable_w(useheap, ptable, i, j); + } + + /* search */ + cxstring result = {NULL, 0}; + i = 0; + j = 1; + while (i < haystack.length) { + while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { + ptable_r(j, useheap, ptable, j - 1); + } + i++; + j++; + if (j - 1 == needle.length) { + size_t start = i - needle.length; + result.ptr = haystack.ptr + start; + result.length = haystack.length - start; + break; + } + } + + /* if prefix table was allocated on the heap, free it */ + if (ptable != s_prefix_table) { + free(ptable); + } + + return result; +} + +cxmutstr cx_strstr_m( + cxmutstr haystack, + cxstring needle +) { + cxstring result = cx_strstr(cx_strcast(haystack), needle); + return (cxmutstr) {(char *) result.ptr, result.length}; +} + +size_t cx_strsplit( + cxstring string, + cxstring delim, + size_t limit, + cxstring *output +) { + // TODO: implement + return 0; +} + +size_t cx_strsplit_a( + CxAllocator *allocator, + cxstring string, + cxstring delim, + size_t limit, + cxstring **output +) { + // TODO: implement + return 0; +} + +size_t cx_strsplit_m( + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr *output +) { + return cx_strsplit(cx_strcast(string), + delim, limit, (cxstring *) output); +} + +size_t cx_strsplit_ma( + CxAllocator *allocator, + cxmutstr string, + cxstring delim, + size_t limit, + cxmutstr **output +) { + return cx_strsplit_a(allocator, cx_strcast(string), + delim, limit, (cxstring **) output); +}