Sun, 13 Nov 2022 13:22:03 +0100
proposal for a low level array copy
universe@576 | 1 | /* |
universe@576 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@576 | 3 | * |
universe@576 | 4 | * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. |
universe@576 | 5 | * |
universe@576 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@576 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@576 | 8 | * |
universe@576 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@576 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@576 | 11 | * |
universe@576 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@576 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@576 | 14 | * documentation and/or other materials provided with the distribution. |
universe@576 | 15 | * |
universe@576 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@576 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@576 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@576 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@576 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@576 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@576 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@576 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@576 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@576 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@576 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@576 | 27 | */ |
universe@576 | 28 | |
universe@576 | 29 | #include "cx/string.h" |
universe@579 | 30 | #include "cx/utils.h" |
universe@579 | 31 | |
universe@579 | 32 | #include <string.h> |
universe@579 | 33 | #include <stdarg.h> |
universe@581 | 34 | #include <ctype.h> |
universe@581 | 35 | |
universe@581 | 36 | #ifndef _WIN32 |
universe@581 | 37 | |
universe@581 | 38 | #include <strings.h> /* for strncasecmp() */ |
universe@581 | 39 | |
universe@581 | 40 | #endif /* _WIN32 */ |
universe@579 | 41 | |
universe@579 | 42 | cxmutstr cx_mutstr(char *cstring) { |
universe@579 | 43 | return (cxmutstr) {cstring, strlen(cstring)}; |
universe@579 | 44 | } |
universe@579 | 45 | |
universe@579 | 46 | cxmutstr cx_mutstrn( |
universe@579 | 47 | char *cstring, |
universe@579 | 48 | size_t length |
universe@579 | 49 | ) { |
universe@579 | 50 | return (cxmutstr) {cstring, length}; |
universe@579 | 51 | } |
universe@579 | 52 | |
universe@579 | 53 | cxstring cx_str(const char *cstring) { |
universe@579 | 54 | return (cxstring) {cstring, strlen(cstring)}; |
universe@579 | 55 | } |
universe@579 | 56 | |
universe@579 | 57 | cxstring cx_strn( |
universe@579 | 58 | const char *cstring, |
universe@579 | 59 | size_t length |
universe@579 | 60 | ) { |
universe@579 | 61 | return (cxstring) {cstring, length}; |
universe@579 | 62 | } |
universe@579 | 63 | |
universe@579 | 64 | cxstring cx_strcast(cxmutstr str) { |
universe@579 | 65 | return (cxstring) {str.ptr, str.length}; |
universe@579 | 66 | } |
universe@579 | 67 | |
universe@579 | 68 | void cx_strfree(cxmutstr *str) { |
universe@579 | 69 | free(str->ptr); |
universe@579 | 70 | str->ptr = NULL; |
universe@579 | 71 | str->length = 0; |
universe@579 | 72 | } |
universe@579 | 73 | |
universe@583 | 74 | void cx_strfree_a( |
universe@583 | 75 | CxAllocator *alloc, |
universe@583 | 76 | cxmutstr *str |
universe@583 | 77 | ) { |
universe@583 | 78 | cxFree(alloc, str->ptr); |
universe@583 | 79 | str->ptr = NULL; |
universe@583 | 80 | str->length = 0; |
universe@583 | 81 | } |
universe@583 | 82 | |
universe@579 | 83 | size_t cx_strlen( |
universe@579 | 84 | size_t count, |
universe@579 | 85 | ... |
universe@579 | 86 | ) { |
universe@579 | 87 | if (count == 0) return 0; |
universe@579 | 88 | |
universe@579 | 89 | va_list ap; |
universe@579 | 90 | va_start(ap, count); |
universe@579 | 91 | size_t size = 0; |
universe@579 | 92 | cx_for_n(i, count) { |
universe@579 | 93 | cxstring str = va_arg(ap, cxstring); |
universe@579 | 94 | size += str.length; |
universe@579 | 95 | } |
universe@579 | 96 | va_end(ap); |
universe@579 | 97 | |
universe@579 | 98 | return size; |
universe@579 | 99 | } |
universe@579 | 100 | |
universe@579 | 101 | cxmutstr cx_strcat_a( |
universe@579 | 102 | CxAllocator *alloc, |
universe@579 | 103 | size_t count, |
universe@579 | 104 | ... |
universe@579 | 105 | ) { |
universe@579 | 106 | cxstring *strings = calloc(count, sizeof(cxstring)); |
universe@579 | 107 | if (!strings) abort(); |
universe@579 | 108 | |
universe@579 | 109 | va_list ap; |
universe@579 | 110 | va_start(ap, count); |
universe@579 | 111 | |
universe@579 | 112 | // get all args and overall length |
universe@579 | 113 | size_t slen = 0; |
universe@579 | 114 | cx_for_n(i, count) { |
universe@579 | 115 | cxstring s = va_arg (ap, cxstring); |
universe@579 | 116 | strings[i] = s; |
universe@579 | 117 | slen += s.length; |
universe@579 | 118 | } |
universe@579 | 119 | |
universe@579 | 120 | // create new string |
universe@579 | 121 | cxmutstr result; |
universe@579 | 122 | result.ptr = cxMalloc(alloc, slen + 1); |
universe@579 | 123 | result.length = slen; |
universe@579 | 124 | if (result.ptr == NULL) abort(); |
universe@579 | 125 | |
universe@579 | 126 | // concatenate strings |
universe@579 | 127 | size_t pos = 0; |
universe@579 | 128 | cx_for_n(i, count) { |
universe@579 | 129 | cxstring s = strings[i]; |
universe@579 | 130 | memcpy(result.ptr + pos, s.ptr, s.length); |
universe@579 | 131 | pos += s.length; |
universe@579 | 132 | } |
universe@579 | 133 | |
universe@579 | 134 | // terminate string |
universe@579 | 135 | result.ptr[result.length] = '\0'; |
universe@579 | 136 | |
universe@579 | 137 | // free temporary array |
universe@579 | 138 | free(strings); |
universe@579 | 139 | |
universe@579 | 140 | return result; |
universe@579 | 141 | } |
universe@579 | 142 | |
universe@580 | 143 | cxstring cx_strsubs( |
universe@580 | 144 | cxstring string, |
universe@580 | 145 | size_t start |
universe@580 | 146 | ) { |
universe@580 | 147 | return cx_strsubsl(string, start, string.length - start); |
universe@580 | 148 | } |
universe@579 | 149 | |
universe@580 | 150 | cxmutstr cx_strsubs_m( |
universe@580 | 151 | cxmutstr string, |
universe@580 | 152 | size_t start |
universe@580 | 153 | ) { |
universe@580 | 154 | return cx_strsubsl_m(string, start, string.length - start); |
universe@580 | 155 | } |
universe@579 | 156 | |
universe@580 | 157 | cxstring cx_strsubsl( |
universe@580 | 158 | cxstring string, |
universe@580 | 159 | size_t start, |
universe@580 | 160 | size_t length |
universe@580 | 161 | ) { |
universe@580 | 162 | if (start > string.length) { |
universe@580 | 163 | return (cxstring) {NULL, 0}; |
universe@580 | 164 | } |
universe@580 | 165 | |
universe@580 | 166 | size_t rem_len = string.length - start; |
universe@580 | 167 | if (length > rem_len) { |
universe@580 | 168 | length = rem_len; |
universe@580 | 169 | } |
universe@580 | 170 | |
universe@580 | 171 | return (cxstring) {string.ptr + start, length}; |
universe@580 | 172 | } |
universe@580 | 173 | |
universe@580 | 174 | cxmutstr cx_strsubsl_m( |
universe@580 | 175 | cxmutstr string, |
universe@580 | 176 | size_t start, |
universe@580 | 177 | size_t length |
universe@580 | 178 | ) { |
universe@580 | 179 | cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
universe@580 | 180 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 181 | } |
universe@580 | 182 | |
universe@580 | 183 | cxstring cx_strchr( |
universe@580 | 184 | cxstring string, |
universe@580 | 185 | int chr |
universe@580 | 186 | ) { |
universe@580 | 187 | chr = 0xFF & chr; |
universe@580 | 188 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 189 | cx_for_n(i, string.length) { |
universe@580 | 190 | if (string.ptr[i] == chr) { |
universe@580 | 191 | return cx_strsubs(string, i); |
universe@580 | 192 | } |
universe@580 | 193 | } |
universe@580 | 194 | return (cxstring) {NULL, 0}; |
universe@580 | 195 | } |
universe@580 | 196 | |
universe@580 | 197 | cxmutstr cx_strchr_m( |
universe@580 | 198 | cxmutstr string, |
universe@580 | 199 | int chr |
universe@580 | 200 | ) { |
universe@580 | 201 | cxstring result = cx_strchr(cx_strcast(string), chr); |
universe@580 | 202 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 203 | } |
universe@580 | 204 | |
universe@580 | 205 | cxstring cx_strrchr( |
universe@580 | 206 | cxstring string, |
universe@580 | 207 | int chr |
universe@580 | 208 | ) { |
universe@580 | 209 | chr = 0xFF & chr; |
universe@580 | 210 | size_t i = string.length; |
universe@580 | 211 | while (i > 0) { |
universe@580 | 212 | i--; |
universe@580 | 213 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 214 | if (string.ptr[i] == chr) { |
universe@580 | 215 | return cx_strsubs(string, i); |
universe@580 | 216 | } |
universe@580 | 217 | } |
universe@580 | 218 | return (cxstring) {NULL, 0}; |
universe@580 | 219 | } |
universe@580 | 220 | |
universe@580 | 221 | cxmutstr cx_strrchr_m( |
universe@580 | 222 | cxmutstr string, |
universe@580 | 223 | int chr |
universe@580 | 224 | ) { |
universe@580 | 225 | cxstring result = cx_strrchr(cx_strcast(string), chr); |
universe@580 | 226 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 227 | } |
universe@580 | 228 | |
universe@591 | 229 | #define STRSTR_SBO_BUFLEN 512 |
universe@580 | 230 | |
universe@580 | 231 | cxstring cx_strstr( |
universe@580 | 232 | cxstring haystack, |
universe@580 | 233 | cxstring needle |
universe@580 | 234 | ) { |
universe@580 | 235 | if (needle.length == 0) { |
universe@580 | 236 | return haystack; |
universe@580 | 237 | } |
universe@580 | 238 | |
universe@583 | 239 | /* optimize for single-char needles */ |
universe@583 | 240 | if (needle.length == 1) { |
universe@583 | 241 | return cx_strchr(haystack, *needle.ptr); |
universe@583 | 242 | } |
universe@583 | 243 | |
universe@580 | 244 | /* |
universe@580 | 245 | * IMPORTANT: |
universe@580 | 246 | * Our prefix table contains the prefix length PLUS ONE |
universe@580 | 247 | * this is our decision, because we want to use the full range of size_t. |
universe@580 | 248 | * The original algorithm needs a (-1) at one single place, |
universe@580 | 249 | * and we want to avoid that. |
universe@580 | 250 | */ |
universe@580 | 251 | |
universe@591 | 252 | /* local prefix table */ |
universe@591 | 253 | size_t s_prefix_table[STRSTR_SBO_BUFLEN]; |
universe@580 | 254 | |
universe@591 | 255 | /* check needle length and use appropriate prefix table */ |
universe@580 | 256 | /* if the pattern exceeds static prefix table, allocate on the heap */ |
universe@591 | 257 | bool useheap = needle.length >= STRSTR_SBO_BUFLEN; |
universe@591 | 258 | register size_t *ptable = useheap ? calloc(needle.length + 1, |
universe@591 | 259 | sizeof(size_t)) : s_prefix_table; |
universe@580 | 260 | |
universe@580 | 261 | /* keep counter in registers */ |
universe@580 | 262 | register size_t i, j; |
universe@580 | 263 | |
universe@580 | 264 | /* fill prefix table */ |
universe@580 | 265 | i = 0; |
universe@580 | 266 | j = 0; |
universe@591 | 267 | ptable[i] = j; |
universe@580 | 268 | while (i < needle.length) { |
universe@580 | 269 | while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
universe@591 | 270 | j = ptable[j - 1]; |
universe@580 | 271 | } |
universe@580 | 272 | i++; |
universe@580 | 273 | j++; |
universe@591 | 274 | ptable[i] = j; |
universe@580 | 275 | } |
universe@580 | 276 | |
universe@580 | 277 | /* search */ |
universe@580 | 278 | cxstring result = {NULL, 0}; |
universe@580 | 279 | i = 0; |
universe@580 | 280 | j = 1; |
universe@580 | 281 | while (i < haystack.length) { |
universe@580 | 282 | while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
universe@591 | 283 | j = ptable[j - 1]; |
universe@580 | 284 | } |
universe@580 | 285 | i++; |
universe@580 | 286 | j++; |
universe@580 | 287 | if (j - 1 == needle.length) { |
universe@580 | 288 | size_t start = i - needle.length; |
universe@580 | 289 | result.ptr = haystack.ptr + start; |
universe@580 | 290 | result.length = haystack.length - start; |
universe@580 | 291 | break; |
universe@580 | 292 | } |
universe@580 | 293 | } |
universe@580 | 294 | |
universe@580 | 295 | /* if prefix table was allocated on the heap, free it */ |
universe@580 | 296 | if (ptable != s_prefix_table) { |
universe@580 | 297 | free(ptable); |
universe@580 | 298 | } |
universe@580 | 299 | |
universe@580 | 300 | return result; |
universe@580 | 301 | } |
universe@580 | 302 | |
universe@580 | 303 | cxmutstr cx_strstr_m( |
universe@580 | 304 | cxmutstr haystack, |
universe@580 | 305 | cxstring needle |
universe@580 | 306 | ) { |
universe@580 | 307 | cxstring result = cx_strstr(cx_strcast(haystack), needle); |
universe@580 | 308 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 309 | } |
universe@580 | 310 | |
universe@580 | 311 | size_t cx_strsplit( |
universe@580 | 312 | cxstring string, |
universe@580 | 313 | cxstring delim, |
universe@580 | 314 | size_t limit, |
universe@580 | 315 | cxstring *output |
universe@580 | 316 | ) { |
universe@583 | 317 | /* special case: output limit is zero */ |
universe@583 | 318 | if (limit == 0) return 0; |
universe@583 | 319 | |
universe@583 | 320 | /* special case: delimiter is empty */ |
universe@583 | 321 | if (delim.length == 0) { |
universe@583 | 322 | output[0] = string; |
universe@583 | 323 | return 1; |
universe@583 | 324 | } |
universe@583 | 325 | |
universe@583 | 326 | /* special cases: delimiter is at least as large as the string */ |
universe@583 | 327 | if (delim.length >= string.length) { |
universe@583 | 328 | /* exact match */ |
universe@583 | 329 | if (cx_strcmp(string, delim) == 0) { |
universe@583 | 330 | output[0] = cx_strn(string.ptr, 0); |
universe@583 | 331 | output[1] = cx_strn(string.ptr + string.length, 0); |
universe@583 | 332 | return 2; |
universe@583 | 333 | } else /* no match possible */ { |
universe@583 | 334 | output[0] = string; |
universe@583 | 335 | return 1; |
universe@583 | 336 | } |
universe@583 | 337 | } |
universe@583 | 338 | |
universe@583 | 339 | size_t n = 0; |
universe@583 | 340 | cxstring curpos = string; |
universe@583 | 341 | while (1) { |
universe@583 | 342 | ++n; |
universe@583 | 343 | cxstring match = cx_strstr(curpos, delim); |
universe@583 | 344 | if (match.length > 0) { |
universe@583 | 345 | /* is the limit reached? */ |
universe@583 | 346 | if (n < limit) { |
universe@583 | 347 | /* copy the current string to the array */ |
universe@583 | 348 | cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
universe@583 | 349 | output[n - 1] = item; |
universe@583 | 350 | size_t processed = item.length + delim.length; |
universe@583 | 351 | curpos.ptr += processed; |
universe@583 | 352 | curpos.length -= processed; |
universe@583 | 353 | } else { |
universe@583 | 354 | /* limit reached, copy the _full_ remaining string */ |
universe@583 | 355 | output[n - 1] = curpos; |
universe@583 | 356 | break; |
universe@583 | 357 | } |
universe@583 | 358 | } else { |
universe@583 | 359 | /* no more matches, copy last string */ |
universe@583 | 360 | output[n - 1] = curpos; |
universe@583 | 361 | break; |
universe@583 | 362 | } |
universe@583 | 363 | } |
universe@583 | 364 | |
universe@583 | 365 | return n; |
universe@580 | 366 | } |
universe@580 | 367 | |
universe@580 | 368 | size_t cx_strsplit_a( |
universe@580 | 369 | CxAllocator *allocator, |
universe@580 | 370 | cxstring string, |
universe@580 | 371 | cxstring delim, |
universe@580 | 372 | size_t limit, |
universe@580 | 373 | cxstring **output |
universe@580 | 374 | ) { |
universe@583 | 375 | /* find out how many splits we're going to make and allocate memory */ |
universe@583 | 376 | size_t n = 0; |
universe@583 | 377 | cxstring curpos = string; |
universe@583 | 378 | while (1) { |
universe@583 | 379 | ++n; |
universe@583 | 380 | cxstring match = cx_strstr(curpos, delim); |
universe@583 | 381 | if (match.length > 0) { |
universe@583 | 382 | /* is the limit reached? */ |
universe@583 | 383 | if (n < limit) { |
universe@583 | 384 | size_t processed = match.ptr - curpos.ptr + delim.length; |
universe@583 | 385 | curpos.ptr += processed; |
universe@583 | 386 | curpos.length -= processed; |
universe@583 | 387 | } else { |
universe@583 | 388 | /* limit reached */ |
universe@583 | 389 | break; |
universe@583 | 390 | } |
universe@583 | 391 | } else { |
universe@583 | 392 | /* no more matches */ |
universe@583 | 393 | break; |
universe@583 | 394 | } |
universe@583 | 395 | } |
universe@583 | 396 | *output = cxCalloc(allocator, n, sizeof(cxstring)); |
universe@583 | 397 | return cx_strsplit(string, delim, n, *output); |
universe@580 | 398 | } |
universe@580 | 399 | |
universe@580 | 400 | size_t cx_strsplit_m( |
universe@580 | 401 | cxmutstr string, |
universe@580 | 402 | cxstring delim, |
universe@580 | 403 | size_t limit, |
universe@580 | 404 | cxmutstr *output |
universe@580 | 405 | ) { |
universe@580 | 406 | return cx_strsplit(cx_strcast(string), |
universe@580 | 407 | delim, limit, (cxstring *) output); |
universe@580 | 408 | } |
universe@580 | 409 | |
universe@580 | 410 | size_t cx_strsplit_ma( |
universe@580 | 411 | CxAllocator *allocator, |
universe@580 | 412 | cxmutstr string, |
universe@580 | 413 | cxstring delim, |
universe@580 | 414 | size_t limit, |
universe@580 | 415 | cxmutstr **output |
universe@580 | 416 | ) { |
universe@580 | 417 | return cx_strsplit_a(allocator, cx_strcast(string), |
universe@580 | 418 | delim, limit, (cxstring **) output); |
universe@580 | 419 | } |
universe@581 | 420 | |
universe@583 | 421 | int cx_strcmp( |
universe@583 | 422 | cxstring s1, |
universe@583 | 423 | cxstring s2 |
universe@583 | 424 | ) { |
universe@581 | 425 | if (s1.length == s2.length) { |
universe@581 | 426 | return memcmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 427 | } else if (s1.length > s2.length) { |
universe@581 | 428 | return 1; |
universe@581 | 429 | } else { |
universe@581 | 430 | return -1; |
universe@581 | 431 | } |
universe@581 | 432 | } |
universe@581 | 433 | |
universe@583 | 434 | int cx_strcasecmp( |
universe@583 | 435 | cxstring s1, |
universe@583 | 436 | cxstring s2 |
universe@583 | 437 | ) { |
universe@581 | 438 | if (s1.length == s2.length) { |
universe@581 | 439 | #ifdef _WIN32 |
universe@581 | 440 | return _strnicmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 441 | #else |
universe@581 | 442 | return strncasecmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 443 | #endif |
universe@581 | 444 | } else if (s1.length > s2.length) { |
universe@581 | 445 | return 1; |
universe@581 | 446 | } else { |
universe@581 | 447 | return -1; |
universe@581 | 448 | } |
universe@581 | 449 | } |
universe@581 | 450 | |
universe@583 | 451 | cxmutstr cx_strdup_a( |
universe@583 | 452 | CxAllocator *allocator, |
universe@583 | 453 | cxstring string |
universe@583 | 454 | ) { |
universe@581 | 455 | cxmutstr result = { |
universe@581 | 456 | cxMalloc(allocator, string.length + 1), |
universe@581 | 457 | string.length |
universe@581 | 458 | }; |
universe@581 | 459 | if (result.ptr == NULL) { |
universe@581 | 460 | result.length = 0; |
universe@581 | 461 | return result; |
universe@581 | 462 | } |
universe@581 | 463 | memcpy(result.ptr, string.ptr, string.length); |
universe@581 | 464 | result.ptr[string.length] = '\0'; |
universe@581 | 465 | return result; |
universe@581 | 466 | } |
universe@581 | 467 | |
universe@581 | 468 | cxstring cx_strtrim(cxstring string) { |
universe@581 | 469 | cxstring result = string; |
universe@581 | 470 | // TODO: optimize by comparing multiple bytes at once |
universe@581 | 471 | while (result.length > 0 && isspace(*result.ptr)) { |
universe@581 | 472 | result.ptr++; |
universe@581 | 473 | result.length--; |
universe@581 | 474 | } |
universe@581 | 475 | while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
universe@581 | 476 | result.length--; |
universe@581 | 477 | } |
universe@581 | 478 | return result; |
universe@581 | 479 | } |
universe@581 | 480 | |
universe@581 | 481 | cxmutstr cx_strtrim_m(cxmutstr string) { |
universe@581 | 482 | cxstring result = cx_strtrim(cx_strcast(string)); |
universe@581 | 483 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@581 | 484 | } |
universe@581 | 485 | |
universe@583 | 486 | bool cx_strprefix( |
universe@583 | 487 | cxstring string, |
universe@583 | 488 | cxstring prefix |
universe@583 | 489 | ) { |
universe@581 | 490 | if (string.length < prefix.length) return false; |
universe@581 | 491 | return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 492 | } |
universe@581 | 493 | |
universe@583 | 494 | bool cx_strsuffix( |
universe@583 | 495 | cxstring string, |
universe@583 | 496 | cxstring suffix |
universe@583 | 497 | ) { |
universe@581 | 498 | if (string.length < suffix.length) return false; |
universe@581 | 499 | return memcmp(string.ptr + string.length - suffix.length, |
universe@581 | 500 | suffix.ptr, suffix.length) == 0; |
universe@581 | 501 | } |
universe@581 | 502 | |
universe@583 | 503 | bool cx_strcaseprefix( |
universe@583 | 504 | cxstring string, |
universe@583 | 505 | cxstring prefix |
universe@583 | 506 | ) { |
universe@581 | 507 | if (string.length < prefix.length) return false; |
universe@581 | 508 | #ifdef _WIN32 |
universe@581 | 509 | return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 510 | #else |
universe@581 | 511 | return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 512 | #endif |
universe@581 | 513 | } |
universe@581 | 514 | |
universe@583 | 515 | bool cx_strcasesuffix( |
universe@583 | 516 | cxstring string, |
universe@583 | 517 | cxstring suffix |
universe@583 | 518 | ) { |
universe@581 | 519 | if (string.length < suffix.length) return false; |
universe@581 | 520 | #ifdef _WIN32 |
universe@581 | 521 | return _strnicmp(string.ptr+string.length-suffix.length, |
universe@581 | 522 | suffix.ptr, suffix.length) == 0; |
universe@581 | 523 | #else |
universe@581 | 524 | return strncasecmp(string.ptr + string.length - suffix.length, |
universe@581 | 525 | suffix.ptr, suffix.length) == 0; |
universe@581 | 526 | #endif |
universe@581 | 527 | } |
universe@582 | 528 | |
universe@582 | 529 | void cx_strlower(cxmutstr string) { |
universe@582 | 530 | cx_for_n(i, string.length) { |
universe@593 | 531 | string.ptr[i] = (char) tolower(string.ptr[i]); |
universe@582 | 532 | } |
universe@582 | 533 | } |
universe@582 | 534 | |
universe@582 | 535 | void cx_strupper(cxmutstr string) { |
universe@582 | 536 | cx_for_n(i, string.length) { |
universe@593 | 537 | string.ptr[i] = (char) toupper(string.ptr[i]); |
universe@582 | 538 | } |
universe@582 | 539 | } |
universe@583 | 540 | |
universe@583 | 541 | #define REPLACE_INDEX_BUFFER_MAX 100 |
universe@583 | 542 | |
universe@583 | 543 | struct cx_strreplace_ibuf { |
universe@583 | 544 | size_t *buf; |
universe@583 | 545 | struct cx_strreplace_ibuf *next; |
universe@590 | 546 | unsigned int len; |
universe@583 | 547 | }; |
universe@583 | 548 | |
universe@583 | 549 | static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
universe@583 | 550 | while (buf) { |
universe@583 | 551 | struct cx_strreplace_ibuf *next = buf->next; |
universe@583 | 552 | free(buf->buf); |
universe@583 | 553 | free(buf); |
universe@583 | 554 | buf = next; |
universe@583 | 555 | } |
universe@583 | 556 | } |
universe@583 | 557 | |
universe@583 | 558 | cxmutstr cx_strreplacen_a( |
universe@583 | 559 | CxAllocator *allocator, |
universe@583 | 560 | cxstring str, |
universe@583 | 561 | cxstring pattern, |
universe@583 | 562 | cxstring replacement, |
universe@583 | 563 | size_t replmax |
universe@583 | 564 | ) { |
universe@583 | 565 | |
universe@583 | 566 | if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
universe@583 | 567 | return cx_strdup_a(allocator, str); |
universe@583 | 568 | |
universe@583 | 569 | /* Compute expected buffer length */ |
universe@583 | 570 | size_t ibufmax = str.length / pattern.length; |
universe@583 | 571 | size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
universe@583 | 572 | if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
universe@583 | 573 | ibuflen = REPLACE_INDEX_BUFFER_MAX; |
universe@583 | 574 | } |
universe@583 | 575 | |
universe@583 | 576 | /* Allocate first index buffer */ |
universe@583 | 577 | struct cx_strreplace_ibuf *firstbuf, *curbuf; |
universe@583 | 578 | firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
universe@583 | 579 | if (!firstbuf) return cx_mutstrn(NULL, 0); |
universe@583 | 580 | firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
universe@583 | 581 | if (!firstbuf->buf) { |
universe@583 | 582 | free(firstbuf); |
universe@583 | 583 | return cx_mutstrn(NULL, 0); |
universe@583 | 584 | } |
universe@583 | 585 | |
universe@583 | 586 | /* Search occurrences */ |
universe@583 | 587 | cxstring searchstr = str; |
universe@583 | 588 | size_t found = 0; |
universe@583 | 589 | do { |
universe@583 | 590 | cxstring match = cx_strstr(searchstr, pattern); |
universe@583 | 591 | if (match.length > 0) { |
universe@583 | 592 | /* Allocate next buffer in chain, if required */ |
universe@583 | 593 | if (curbuf->len == ibuflen) { |
universe@583 | 594 | struct cx_strreplace_ibuf *nextbuf = |
universe@583 | 595 | calloc(1, sizeof(struct cx_strreplace_ibuf)); |
universe@583 | 596 | if (!nextbuf) { |
universe@583 | 597 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 598 | return cx_mutstrn(NULL, 0); |
universe@583 | 599 | } |
universe@583 | 600 | nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
universe@583 | 601 | if (!nextbuf->buf) { |
universe@583 | 602 | free(nextbuf); |
universe@583 | 603 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 604 | return cx_mutstrn(NULL, 0); |
universe@583 | 605 | } |
universe@583 | 606 | curbuf->next = nextbuf; |
universe@583 | 607 | curbuf = nextbuf; |
universe@583 | 608 | } |
universe@583 | 609 | |
universe@583 | 610 | /* Record match index */ |
universe@583 | 611 | found++; |
universe@583 | 612 | size_t idx = match.ptr - str.ptr; |
universe@583 | 613 | curbuf->buf[curbuf->len++] = idx; |
universe@583 | 614 | searchstr.ptr = match.ptr + pattern.length; |
universe@583 | 615 | searchstr.length = str.length - idx - pattern.length; |
universe@583 | 616 | } else { |
universe@583 | 617 | break; |
universe@583 | 618 | } |
universe@583 | 619 | } while (searchstr.length > 0 && found < replmax); |
universe@583 | 620 | |
universe@583 | 621 | /* Allocate result string */ |
universe@583 | 622 | cxmutstr result; |
universe@583 | 623 | { |
universe@583 | 624 | ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
universe@583 | 625 | size_t rcount = 0; |
universe@583 | 626 | curbuf = firstbuf; |
universe@583 | 627 | do { |
universe@583 | 628 | rcount += curbuf->len; |
universe@583 | 629 | curbuf = curbuf->next; |
universe@583 | 630 | } while (curbuf); |
universe@583 | 631 | result.length = str.length + rcount * adjlen; |
universe@590 | 632 | result.ptr = cxMalloc(allocator, result.length + 1); |
universe@583 | 633 | if (!result.ptr) { |
universe@583 | 634 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 635 | return cx_mutstrn(NULL, 0); |
universe@583 | 636 | } |
universe@583 | 637 | } |
universe@583 | 638 | |
universe@583 | 639 | /* Build result string */ |
universe@583 | 640 | curbuf = firstbuf; |
universe@583 | 641 | size_t srcidx = 0; |
universe@583 | 642 | char *destptr = result.ptr; |
universe@583 | 643 | do { |
universe@583 | 644 | for (size_t i = 0; i < curbuf->len; i++) { |
universe@583 | 645 | /* Copy source part up to next match*/ |
universe@583 | 646 | size_t idx = curbuf->buf[i]; |
universe@583 | 647 | size_t srclen = idx - srcidx; |
universe@583 | 648 | if (srclen > 0) { |
universe@583 | 649 | memcpy(destptr, str.ptr + srcidx, srclen); |
universe@583 | 650 | destptr += srclen; |
universe@583 | 651 | srcidx += srclen; |
universe@583 | 652 | } |
universe@583 | 653 | |
universe@583 | 654 | /* Copy the replacement and skip the source pattern */ |
universe@583 | 655 | srcidx += pattern.length; |
universe@583 | 656 | memcpy(destptr, replacement.ptr, replacement.length); |
universe@583 | 657 | destptr += replacement.length; |
universe@583 | 658 | } |
universe@583 | 659 | curbuf = curbuf->next; |
universe@583 | 660 | } while (curbuf); |
universe@583 | 661 | memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
universe@583 | 662 | |
universe@590 | 663 | /* Result is guaranteed to be zero-terminated */ |
universe@590 | 664 | result.ptr[result.length] = '\0'; |
universe@590 | 665 | |
universe@583 | 666 | /* Free index buffer */ |
universe@583 | 667 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 668 | |
universe@583 | 669 | return result; |
universe@583 | 670 | } |
universe@583 | 671 | |
universe@583 | 672 |