Wed, 31 Aug 2022 23:12:05 +0200
more implementations of string functions
src/string.c | file | annotate | diff | comparison | revisions |
1.1 --- a/src/string.c Tue Aug 30 19:56:07 2022 +0200 1.2 +++ b/src/string.c Wed Aug 31 23:12:05 2022 +0200 1.3 @@ -125,5 +125,214 @@ 1.4 return result; 1.5 } 1.6 1.7 +cxstring cx_strsubs( 1.8 + cxstring string, 1.9 + size_t start 1.10 +) { 1.11 + return cx_strsubsl(string, start, string.length - start); 1.12 +} 1.13 1.14 +cxmutstr cx_strsubs_m( 1.15 + cxmutstr string, 1.16 + size_t start 1.17 +) { 1.18 + return cx_strsubsl_m(string, start, string.length - start); 1.19 +} 1.20 1.21 +cxstring cx_strsubsl( 1.22 + cxstring string, 1.23 + size_t start, 1.24 + size_t length 1.25 +) { 1.26 + if (start > string.length) { 1.27 + return (cxstring) {NULL, 0}; 1.28 + } 1.29 + 1.30 + size_t rem_len = string.length - start; 1.31 + if (length > rem_len) { 1.32 + length = rem_len; 1.33 + } 1.34 + 1.35 + return (cxstring) {string.ptr + start, length}; 1.36 +} 1.37 + 1.38 +cxmutstr cx_strsubsl_m( 1.39 + cxmutstr string, 1.40 + size_t start, 1.41 + size_t length 1.42 +) { 1.43 + cxstring result = cx_strsubsl(cx_strcast(string), start, length); 1.44 + return (cxmutstr) {(char *) result.ptr, result.length}; 1.45 +} 1.46 + 1.47 +cxstring cx_strchr( 1.48 + cxstring string, 1.49 + int chr 1.50 +) { 1.51 + chr = 0xFF & chr; 1.52 + // TODO: improve by comparing multiple bytes at once 1.53 + cx_for_n(i, string.length) { 1.54 + if (string.ptr[i] == chr) { 1.55 + return cx_strsubs(string, i); 1.56 + } 1.57 + } 1.58 + return (cxstring) {NULL, 0}; 1.59 +} 1.60 + 1.61 +cxmutstr cx_strchr_m( 1.62 + cxmutstr string, 1.63 + int chr 1.64 +) { 1.65 + cxstring result = cx_strchr(cx_strcast(string), chr); 1.66 + return (cxmutstr) {(char *) result.ptr, result.length}; 1.67 +} 1.68 + 1.69 +cxstring cx_strrchr( 1.70 + cxstring string, 1.71 + int chr 1.72 +) { 1.73 + chr = 0xFF & chr; 1.74 + size_t i = string.length; 1.75 + while (i > 0) { 1.76 + i--; 1.77 + // TODO: improve by comparing multiple bytes at once 1.78 + if (string.ptr[i] == chr) { 1.79 + return cx_strsubs(string, i); 1.80 + } 1.81 + } 1.82 + return (cxstring) {NULL, 0}; 1.83 +} 1.84 + 1.85 +cxmutstr cx_strrchr_m( 1.86 + cxmutstr string, 1.87 + int chr 1.88 +) { 1.89 + cxstring result = cx_strrchr(cx_strcast(string), chr); 1.90 + return (cxmutstr) {(char *) result.ptr, result.length}; 1.91 +} 1.92 + 1.93 +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ 1.94 + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) 1.95 + 1.96 +#define ptable_w(useheap, ptable, index, src) do {\ 1.97 + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ 1.98 + else ((size_t*)ptable)[index] = src;\ 1.99 + } while (0) 1.100 + 1.101 + 1.102 +cxstring cx_strstr( 1.103 + cxstring haystack, 1.104 + cxstring needle 1.105 +) { 1.106 + if (needle.length == 0) { 1.107 + return haystack; 1.108 + } 1.109 + 1.110 + /* 1.111 + * IMPORTANT: 1.112 + * Our prefix table contains the prefix length PLUS ONE 1.113 + * this is our decision, because we want to use the full range of size_t. 1.114 + * The original algorithm needs a (-1) at one single place, 1.115 + * and we want to avoid that. 1.116 + */ 1.117 + 1.118 + /* static prefix table */ 1.119 + static uint8_t s_prefix_table[512]; 1.120 + 1.121 + /* check pattern length and use appropriate prefix table */ 1.122 + /* if the pattern exceeds static prefix table, allocate on the heap */ 1.123 + register int useheap = needle.length >= 512; 1.124 + register void *ptable = useheap ? calloc(needle.length + 1, 1.125 + sizeof(size_t)) : s_prefix_table; 1.126 + 1.127 + /* keep counter in registers */ 1.128 + register size_t i, j; 1.129 + 1.130 + /* fill prefix table */ 1.131 + i = 0; 1.132 + j = 0; 1.133 + ptable_w(useheap, ptable, i, j); 1.134 + while (i < needle.length) { 1.135 + while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { 1.136 + ptable_r(j, useheap, ptable, j - 1); 1.137 + } 1.138 + i++; 1.139 + j++; 1.140 + ptable_w(useheap, ptable, i, j); 1.141 + } 1.142 + 1.143 + /* search */ 1.144 + cxstring result = {NULL, 0}; 1.145 + i = 0; 1.146 + j = 1; 1.147 + while (i < haystack.length) { 1.148 + while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { 1.149 + ptable_r(j, useheap, ptable, j - 1); 1.150 + } 1.151 + i++; 1.152 + j++; 1.153 + if (j - 1 == needle.length) { 1.154 + size_t start = i - needle.length; 1.155 + result.ptr = haystack.ptr + start; 1.156 + result.length = haystack.length - start; 1.157 + break; 1.158 + } 1.159 + } 1.160 + 1.161 + /* if prefix table was allocated on the heap, free it */ 1.162 + if (ptable != s_prefix_table) { 1.163 + free(ptable); 1.164 + } 1.165 + 1.166 + return result; 1.167 +} 1.168 + 1.169 +cxmutstr cx_strstr_m( 1.170 + cxmutstr haystack, 1.171 + cxstring needle 1.172 +) { 1.173 + cxstring result = cx_strstr(cx_strcast(haystack), needle); 1.174 + return (cxmutstr) {(char *) result.ptr, result.length}; 1.175 +} 1.176 + 1.177 +size_t cx_strsplit( 1.178 + cxstring string, 1.179 + cxstring delim, 1.180 + size_t limit, 1.181 + cxstring *output 1.182 +) { 1.183 + // TODO: implement 1.184 + return 0; 1.185 +} 1.186 + 1.187 +size_t cx_strsplit_a( 1.188 + CxAllocator *allocator, 1.189 + cxstring string, 1.190 + cxstring delim, 1.191 + size_t limit, 1.192 + cxstring **output 1.193 +) { 1.194 + // TODO: implement 1.195 + return 0; 1.196 +} 1.197 + 1.198 +size_t cx_strsplit_m( 1.199 + cxmutstr string, 1.200 + cxstring delim, 1.201 + size_t limit, 1.202 + cxmutstr *output 1.203 +) { 1.204 + return cx_strsplit(cx_strcast(string), 1.205 + delim, limit, (cxstring *) output); 1.206 +} 1.207 + 1.208 +size_t cx_strsplit_ma( 1.209 + CxAllocator *allocator, 1.210 + cxmutstr string, 1.211 + cxstring delim, 1.212 + size_t limit, 1.213 + cxmutstr **output 1.214 +) { 1.215 + return cx_strsplit_a(allocator, cx_strcast(string), 1.216 + delim, limit, (cxstring **) output); 1.217 +}