1.1 --- a/src/string.c Sat Sep 03 15:11:23 2022 +0200 1.2 +++ b/src/string.c Fri Sep 09 20:19:08 2022 +0200 1.3 @@ -72,6 +72,15 @@ 1.4 str->length = 0; 1.5 } 1.6 1.7 +void cx_strfree_a( 1.8 + CxAllocator *alloc, 1.9 + cxmutstr *str 1.10 +) { 1.11 + cxFree(alloc, str->ptr); 1.12 + str->ptr = NULL; 1.13 + str->length = 0; 1.14 +} 1.15 + 1.16 size_t cx_strlen( 1.17 size_t count, 1.18 ... 1.19 @@ -235,6 +244,11 @@ 1.20 return haystack; 1.21 } 1.22 1.23 + /* optimize for single-char needles */ 1.24 + if (needle.length == 1) { 1.25 + return cx_strchr(haystack, *needle.ptr); 1.26 + } 1.27 + 1.28 /* 1.29 * IMPORTANT: 1.30 * Our prefix table contains the prefix length PLUS ONE 1.31 @@ -308,8 +322,55 @@ 1.32 size_t limit, 1.33 cxstring *output 1.34 ) { 1.35 - // TODO: implement 1.36 - return 0; 1.37 + /* special case: output limit is zero */ 1.38 + if (limit == 0) return 0; 1.39 + 1.40 + /* special case: delimiter is empty */ 1.41 + if (delim.length == 0) { 1.42 + output[0] = string; 1.43 + return 1; 1.44 + } 1.45 + 1.46 + /* special cases: delimiter is at least as large as the string */ 1.47 + if (delim.length >= string.length) { 1.48 + /* exact match */ 1.49 + if (cx_strcmp(string, delim) == 0) { 1.50 + output[0] = cx_strn(string.ptr, 0); 1.51 + output[1] = cx_strn(string.ptr + string.length, 0); 1.52 + return 2; 1.53 + } else /* no match possible */ { 1.54 + output[0] = string; 1.55 + return 1; 1.56 + } 1.57 + } 1.58 + 1.59 + size_t n = 0; 1.60 + cxstring curpos = string; 1.61 + while (1) { 1.62 + ++n; 1.63 + cxstring match = cx_strstr(curpos, delim); 1.64 + if (match.length > 0) { 1.65 + /* is the limit reached? */ 1.66 + if (n < limit) { 1.67 + /* copy the current string to the array */ 1.68 + cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); 1.69 + output[n - 1] = item; 1.70 + size_t processed = item.length + delim.length; 1.71 + curpos.ptr += processed; 1.72 + curpos.length -= processed; 1.73 + } else { 1.74 + /* limit reached, copy the _full_ remaining string */ 1.75 + output[n - 1] = curpos; 1.76 + break; 1.77 + } 1.78 + } else { 1.79 + /* no more matches, copy last string */ 1.80 + output[n - 1] = curpos; 1.81 + break; 1.82 + } 1.83 + } 1.84 + 1.85 + return n; 1.86 } 1.87 1.88 size_t cx_strsplit_a( 1.89 @@ -319,8 +380,29 @@ 1.90 size_t limit, 1.91 cxstring **output 1.92 ) { 1.93 - // TODO: implement 1.94 - return 0; 1.95 + /* find out how many splits we're going to make and allocate memory */ 1.96 + size_t n = 0; 1.97 + cxstring curpos = string; 1.98 + while (1) { 1.99 + ++n; 1.100 + cxstring match = cx_strstr(curpos, delim); 1.101 + if (match.length > 0) { 1.102 + /* is the limit reached? */ 1.103 + if (n < limit) { 1.104 + size_t processed = match.ptr - curpos.ptr + delim.length; 1.105 + curpos.ptr += processed; 1.106 + curpos.length -= processed; 1.107 + } else { 1.108 + /* limit reached */ 1.109 + break; 1.110 + } 1.111 + } else { 1.112 + /* no more matches */ 1.113 + break; 1.114 + } 1.115 + } 1.116 + *output = cxCalloc(allocator, n, sizeof(cxstring)); 1.117 + return cx_strsplit(string, delim, n, *output); 1.118 } 1.119 1.120 size_t cx_strsplit_m( 1.121 @@ -344,7 +426,10 @@ 1.122 delim, limit, (cxstring **) output); 1.123 } 1.124 1.125 -int cx_strcmp(cxstring s1, cxstring s2) { 1.126 +int cx_strcmp( 1.127 + cxstring s1, 1.128 + cxstring s2 1.129 +) { 1.130 if (s1.length == s2.length) { 1.131 return memcmp(s1.ptr, s2.ptr, s1.length); 1.132 } else if (s1.length > s2.length) { 1.133 @@ -354,7 +439,10 @@ 1.134 } 1.135 } 1.136 1.137 -int cx_strcasecmp(cxstring s1, cxstring s2) { 1.138 +int cx_strcasecmp( 1.139 + cxstring s1, 1.140 + cxstring s2 1.141 +) { 1.142 if (s1.length == s2.length) { 1.143 #ifdef _WIN32 1.144 return _strnicmp(s1.ptr, s2.ptr, s1.length); 1.145 @@ -368,7 +456,10 @@ 1.146 } 1.147 } 1.148 1.149 -cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { 1.150 +cxmutstr cx_strdup_a( 1.151 + CxAllocator *allocator, 1.152 + cxstring string 1.153 +) { 1.154 cxmutstr result = { 1.155 cxMalloc(allocator, string.length + 1), 1.156 string.length 1.157 @@ -400,18 +491,27 @@ 1.158 return (cxmutstr) {(char *) result.ptr, result.length}; 1.159 } 1.160 1.161 -bool cx_strprefix(cxstring string, cxstring prefix) { 1.162 +bool cx_strprefix( 1.163 + cxstring string, 1.164 + cxstring prefix 1.165 +) { 1.166 if (string.length < prefix.length) return false; 1.167 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 1.168 } 1.169 1.170 -bool cx_strsuffix(cxstring string, cxstring suffix) { 1.171 +bool cx_strsuffix( 1.172 + cxstring string, 1.173 + cxstring suffix 1.174 +) { 1.175 if (string.length < suffix.length) return false; 1.176 return memcmp(string.ptr + string.length - suffix.length, 1.177 suffix.ptr, suffix.length) == 0; 1.178 } 1.179 1.180 -bool cx_casestrprefix(cxstring string, cxstring prefix) { 1.181 +bool cx_strcaseprefix( 1.182 + cxstring string, 1.183 + cxstring prefix 1.184 +) { 1.185 if (string.length < prefix.length) return false; 1.186 #ifdef _WIN32 1.187 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; 1.188 @@ -420,7 +520,10 @@ 1.189 #endif 1.190 } 1.191 1.192 -bool cx_casestrsuffix(cxstring string, cxstring suffix) { 1.193 +bool cx_strcasesuffix( 1.194 + cxstring string, 1.195 + cxstring suffix 1.196 +) { 1.197 if (string.length < suffix.length) return false; 1.198 #ifdef _WIN32 1.199 return _strnicmp(string.ptr+string.length-suffix.length, 1.200 @@ -442,3 +545,133 @@ 1.201 string.ptr[i] = toupper(string.ptr[i]); 1.202 } 1.203 } 1.204 + 1.205 +#define REPLACE_INDEX_BUFFER_MAX 100 1.206 + 1.207 +struct cx_strreplace_ibuf { 1.208 + size_t *buf; 1.209 + unsigned int len; /* small indices */ 1.210 + struct cx_strreplace_ibuf *next; 1.211 +}; 1.212 + 1.213 +static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { 1.214 + while (buf) { 1.215 + struct cx_strreplace_ibuf *next = buf->next; 1.216 + free(buf->buf); 1.217 + free(buf); 1.218 + buf = next; 1.219 + } 1.220 +} 1.221 + 1.222 +cxmutstr cx_strreplacen_a( 1.223 + CxAllocator *allocator, 1.224 + cxstring str, 1.225 + cxstring pattern, 1.226 + cxstring replacement, 1.227 + size_t replmax 1.228 +) { 1.229 + 1.230 + if (pattern.length == 0 || pattern.length > str.length || replmax == 0) 1.231 + return cx_strdup_a(allocator, str); 1.232 + 1.233 + /* Compute expected buffer length */ 1.234 + size_t ibufmax = str.length / pattern.length; 1.235 + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 1.236 + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 1.237 + ibuflen = REPLACE_INDEX_BUFFER_MAX; 1.238 + } 1.239 + 1.240 + /* Allocate first index buffer */ 1.241 + struct cx_strreplace_ibuf *firstbuf, *curbuf; 1.242 + firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); 1.243 + if (!firstbuf) return cx_mutstrn(NULL, 0); 1.244 + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.245 + if (!firstbuf->buf) { 1.246 + free(firstbuf); 1.247 + return cx_mutstrn(NULL, 0); 1.248 + } 1.249 + 1.250 + /* Search occurrences */ 1.251 + cxstring searchstr = str; 1.252 + size_t found = 0; 1.253 + do { 1.254 + cxstring match = cx_strstr(searchstr, pattern); 1.255 + if (match.length > 0) { 1.256 + /* Allocate next buffer in chain, if required */ 1.257 + if (curbuf->len == ibuflen) { 1.258 + struct cx_strreplace_ibuf *nextbuf = 1.259 + calloc(1, sizeof(struct cx_strreplace_ibuf)); 1.260 + if (!nextbuf) { 1.261 + cx_strrepl_free_ibuf(firstbuf); 1.262 + return cx_mutstrn(NULL, 0); 1.263 + } 1.264 + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.265 + if (!nextbuf->buf) { 1.266 + free(nextbuf); 1.267 + cx_strrepl_free_ibuf(firstbuf); 1.268 + return cx_mutstrn(NULL, 0); 1.269 + } 1.270 + curbuf->next = nextbuf; 1.271 + curbuf = nextbuf; 1.272 + } 1.273 + 1.274 + /* Record match index */ 1.275 + found++; 1.276 + size_t idx = match.ptr - str.ptr; 1.277 + curbuf->buf[curbuf->len++] = idx; 1.278 + searchstr.ptr = match.ptr + pattern.length; 1.279 + searchstr.length = str.length - idx - pattern.length; 1.280 + } else { 1.281 + break; 1.282 + } 1.283 + } while (searchstr.length > 0 && found < replmax); 1.284 + 1.285 + /* Allocate result string */ 1.286 + cxmutstr result; 1.287 + { 1.288 + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 1.289 + size_t rcount = 0; 1.290 + curbuf = firstbuf; 1.291 + do { 1.292 + rcount += curbuf->len; 1.293 + curbuf = curbuf->next; 1.294 + } while (curbuf); 1.295 + result.length = str.length + rcount * adjlen; 1.296 + result.ptr = cxMalloc(allocator, result.length); 1.297 + if (!result.ptr) { 1.298 + cx_strrepl_free_ibuf(firstbuf); 1.299 + return cx_mutstrn(NULL, 0); 1.300 + } 1.301 + } 1.302 + 1.303 + /* Build result string */ 1.304 + curbuf = firstbuf; 1.305 + size_t srcidx = 0; 1.306 + char *destptr = result.ptr; 1.307 + do { 1.308 + for (size_t i = 0; i < curbuf->len; i++) { 1.309 + /* Copy source part up to next match*/ 1.310 + size_t idx = curbuf->buf[i]; 1.311 + size_t srclen = idx - srcidx; 1.312 + if (srclen > 0) { 1.313 + memcpy(destptr, str.ptr + srcidx, srclen); 1.314 + destptr += srclen; 1.315 + srcidx += srclen; 1.316 + } 1.317 + 1.318 + /* Copy the replacement and skip the source pattern */ 1.319 + srcidx += pattern.length; 1.320 + memcpy(destptr, replacement.ptr, replacement.length); 1.321 + destptr += replacement.length; 1.322 + } 1.323 + curbuf = curbuf->next; 1.324 + } while (curbuf); 1.325 + memcpy(destptr, str.ptr + srcidx, str.length - srcidx); 1.326 + 1.327 + /* Free index buffer */ 1.328 + cx_strrepl_free_ibuf(firstbuf); 1.329 + 1.330 + return result; 1.331 +} 1.332 + 1.333 +