Mon, 18 Dec 2023 15:13:26 +0100
add cxBufferReset() - resolves #338
universe@576 | 1 | /* |
universe@576 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@576 | 3 | * |
universe@576 | 4 | * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. |
universe@576 | 5 | * |
universe@576 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@576 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@576 | 8 | * |
universe@576 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@576 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@576 | 11 | * |
universe@576 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@576 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@576 | 14 | * documentation and/or other materials provided with the distribution. |
universe@576 | 15 | * |
universe@576 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@576 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@576 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@576 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@576 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@576 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@576 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@576 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@576 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@576 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@576 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@576 | 27 | */ |
universe@576 | 28 | |
universe@576 | 29 | #include "cx/string.h" |
universe@579 | 30 | #include "cx/utils.h" |
universe@579 | 31 | |
universe@579 | 32 | #include <string.h> |
universe@579 | 33 | #include <stdarg.h> |
universe@581 | 34 | #include <ctype.h> |
universe@581 | 35 | |
universe@581 | 36 | #ifndef _WIN32 |
universe@581 | 37 | |
universe@628 | 38 | #include <strings.h> // for strncasecmp() |
universe@581 | 39 | |
universe@628 | 40 | #endif // _WIN32 |
universe@579 | 41 | |
universe@579 | 42 | cxmutstr cx_mutstr(char *cstring) { |
universe@579 | 43 | return (cxmutstr) {cstring, strlen(cstring)}; |
universe@579 | 44 | } |
universe@579 | 45 | |
universe@579 | 46 | cxmutstr cx_mutstrn( |
universe@579 | 47 | char *cstring, |
universe@579 | 48 | size_t length |
universe@579 | 49 | ) { |
universe@579 | 50 | return (cxmutstr) {cstring, length}; |
universe@579 | 51 | } |
universe@579 | 52 | |
universe@579 | 53 | cxstring cx_str(const char *cstring) { |
universe@579 | 54 | return (cxstring) {cstring, strlen(cstring)}; |
universe@579 | 55 | } |
universe@579 | 56 | |
universe@579 | 57 | cxstring cx_strn( |
universe@579 | 58 | const char *cstring, |
universe@579 | 59 | size_t length |
universe@579 | 60 | ) { |
universe@579 | 61 | return (cxstring) {cstring, length}; |
universe@579 | 62 | } |
universe@579 | 63 | |
universe@579 | 64 | cxstring cx_strcast(cxmutstr str) { |
universe@579 | 65 | return (cxstring) {str.ptr, str.length}; |
universe@579 | 66 | } |
universe@579 | 67 | |
universe@579 | 68 | void cx_strfree(cxmutstr *str) { |
universe@579 | 69 | free(str->ptr); |
universe@579 | 70 | str->ptr = NULL; |
universe@579 | 71 | str->length = 0; |
universe@579 | 72 | } |
universe@579 | 73 | |
universe@583 | 74 | void cx_strfree_a( |
universe@693 | 75 | CxAllocator const *alloc, |
universe@583 | 76 | cxmutstr *str |
universe@583 | 77 | ) { |
universe@583 | 78 | cxFree(alloc, str->ptr); |
universe@583 | 79 | str->ptr = NULL; |
universe@583 | 80 | str->length = 0; |
universe@583 | 81 | } |
universe@583 | 82 | |
universe@579 | 83 | size_t cx_strlen( |
universe@579 | 84 | size_t count, |
universe@579 | 85 | ... |
universe@579 | 86 | ) { |
universe@579 | 87 | if (count == 0) return 0; |
universe@579 | 88 | |
universe@579 | 89 | va_list ap; |
universe@579 | 90 | va_start(ap, count); |
universe@579 | 91 | size_t size = 0; |
universe@579 | 92 | cx_for_n(i, count) { |
universe@579 | 93 | cxstring str = va_arg(ap, cxstring); |
universe@579 | 94 | size += str.length; |
universe@579 | 95 | } |
universe@579 | 96 | va_end(ap); |
universe@579 | 97 | |
universe@579 | 98 | return size; |
universe@579 | 99 | } |
universe@579 | 100 | |
universe@697 | 101 | cxmutstr cx_strcat_ma( |
universe@693 | 102 | CxAllocator const *alloc, |
universe@697 | 103 | cxmutstr str, |
universe@579 | 104 | size_t count, |
universe@579 | 105 | ... |
universe@579 | 106 | ) { |
universe@697 | 107 | if (count == 0) return str; |
universe@697 | 108 | |
universe@579 | 109 | cxstring *strings = calloc(count, sizeof(cxstring)); |
universe@579 | 110 | if (!strings) abort(); |
universe@579 | 111 | |
universe@579 | 112 | va_list ap; |
universe@579 | 113 | va_start(ap, count); |
universe@579 | 114 | |
universe@579 | 115 | // get all args and overall length |
universe@697 | 116 | size_t slen = str.length; |
universe@579 | 117 | cx_for_n(i, count) { |
universe@579 | 118 | cxstring s = va_arg (ap, cxstring); |
universe@579 | 119 | strings[i] = s; |
universe@579 | 120 | slen += s.length; |
universe@579 | 121 | } |
universe@697 | 122 | va_end(ap); |
universe@579 | 123 | |
universe@697 | 124 | // reallocate or create new string |
universe@697 | 125 | if (str.ptr == NULL) { |
universe@697 | 126 | str.ptr = cxMalloc(alloc, slen + 1); |
universe@697 | 127 | } else { |
universe@697 | 128 | str.ptr = cxRealloc(alloc, str.ptr, slen + 1); |
universe@697 | 129 | } |
universe@697 | 130 | if (str.ptr == NULL) abort(); |
universe@579 | 131 | |
universe@579 | 132 | // concatenate strings |
universe@697 | 133 | size_t pos = str.length; |
universe@697 | 134 | str.length = slen; |
universe@579 | 135 | cx_for_n(i, count) { |
universe@579 | 136 | cxstring s = strings[i]; |
universe@697 | 137 | memcpy(str.ptr + pos, s.ptr, s.length); |
universe@579 | 138 | pos += s.length; |
universe@579 | 139 | } |
universe@579 | 140 | |
universe@579 | 141 | // terminate string |
universe@697 | 142 | str.ptr[str.length] = '\0'; |
universe@579 | 143 | |
universe@579 | 144 | // free temporary array |
universe@579 | 145 | free(strings); |
universe@579 | 146 | |
universe@697 | 147 | return str; |
universe@579 | 148 | } |
universe@579 | 149 | |
universe@580 | 150 | cxstring cx_strsubs( |
universe@580 | 151 | cxstring string, |
universe@580 | 152 | size_t start |
universe@580 | 153 | ) { |
universe@580 | 154 | return cx_strsubsl(string, start, string.length - start); |
universe@580 | 155 | } |
universe@579 | 156 | |
universe@580 | 157 | cxmutstr cx_strsubs_m( |
universe@580 | 158 | cxmutstr string, |
universe@580 | 159 | size_t start |
universe@580 | 160 | ) { |
universe@580 | 161 | return cx_strsubsl_m(string, start, string.length - start); |
universe@580 | 162 | } |
universe@579 | 163 | |
universe@580 | 164 | cxstring cx_strsubsl( |
universe@580 | 165 | cxstring string, |
universe@580 | 166 | size_t start, |
universe@580 | 167 | size_t length |
universe@580 | 168 | ) { |
universe@580 | 169 | if (start > string.length) { |
universe@580 | 170 | return (cxstring) {NULL, 0}; |
universe@580 | 171 | } |
universe@580 | 172 | |
universe@580 | 173 | size_t rem_len = string.length - start; |
universe@580 | 174 | if (length > rem_len) { |
universe@580 | 175 | length = rem_len; |
universe@580 | 176 | } |
universe@580 | 177 | |
universe@580 | 178 | return (cxstring) {string.ptr + start, length}; |
universe@580 | 179 | } |
universe@580 | 180 | |
universe@580 | 181 | cxmutstr cx_strsubsl_m( |
universe@580 | 182 | cxmutstr string, |
universe@580 | 183 | size_t start, |
universe@580 | 184 | size_t length |
universe@580 | 185 | ) { |
universe@580 | 186 | cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
universe@580 | 187 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 188 | } |
universe@580 | 189 | |
universe@580 | 190 | cxstring cx_strchr( |
universe@580 | 191 | cxstring string, |
universe@580 | 192 | int chr |
universe@580 | 193 | ) { |
universe@580 | 194 | chr = 0xFF & chr; |
universe@580 | 195 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 196 | cx_for_n(i, string.length) { |
universe@580 | 197 | if (string.ptr[i] == chr) { |
universe@580 | 198 | return cx_strsubs(string, i); |
universe@580 | 199 | } |
universe@580 | 200 | } |
universe@580 | 201 | return (cxstring) {NULL, 0}; |
universe@580 | 202 | } |
universe@580 | 203 | |
universe@580 | 204 | cxmutstr cx_strchr_m( |
universe@580 | 205 | cxmutstr string, |
universe@580 | 206 | int chr |
universe@580 | 207 | ) { |
universe@580 | 208 | cxstring result = cx_strchr(cx_strcast(string), chr); |
universe@580 | 209 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 210 | } |
universe@580 | 211 | |
universe@580 | 212 | cxstring cx_strrchr( |
universe@580 | 213 | cxstring string, |
universe@580 | 214 | int chr |
universe@580 | 215 | ) { |
universe@580 | 216 | chr = 0xFF & chr; |
universe@580 | 217 | size_t i = string.length; |
universe@580 | 218 | while (i > 0) { |
universe@580 | 219 | i--; |
universe@580 | 220 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 221 | if (string.ptr[i] == chr) { |
universe@580 | 222 | return cx_strsubs(string, i); |
universe@580 | 223 | } |
universe@580 | 224 | } |
universe@580 | 225 | return (cxstring) {NULL, 0}; |
universe@580 | 226 | } |
universe@580 | 227 | |
universe@580 | 228 | cxmutstr cx_strrchr_m( |
universe@580 | 229 | cxmutstr string, |
universe@580 | 230 | int chr |
universe@580 | 231 | ) { |
universe@580 | 232 | cxstring result = cx_strrchr(cx_strcast(string), chr); |
universe@580 | 233 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 234 | } |
universe@580 | 235 | |
universe@643 | 236 | #ifndef CX_STRSTR_SBO_SIZE |
universe@643 | 237 | #define CX_STRSTR_SBO_SIZE 512 |
universe@643 | 238 | #endif |
universe@580 | 239 | |
universe@580 | 240 | cxstring cx_strstr( |
universe@580 | 241 | cxstring haystack, |
universe@580 | 242 | cxstring needle |
universe@580 | 243 | ) { |
universe@580 | 244 | if (needle.length == 0) { |
universe@580 | 245 | return haystack; |
universe@580 | 246 | } |
universe@580 | 247 | |
universe@628 | 248 | // optimize for single-char needles |
universe@583 | 249 | if (needle.length == 1) { |
universe@583 | 250 | return cx_strchr(haystack, *needle.ptr); |
universe@583 | 251 | } |
universe@583 | 252 | |
universe@580 | 253 | /* |
universe@580 | 254 | * IMPORTANT: |
universe@580 | 255 | * Our prefix table contains the prefix length PLUS ONE |
universe@580 | 256 | * this is our decision, because we want to use the full range of size_t. |
universe@580 | 257 | * The original algorithm needs a (-1) at one single place, |
universe@580 | 258 | * and we want to avoid that. |
universe@580 | 259 | */ |
universe@580 | 260 | |
universe@628 | 261 | // local prefix table |
universe@643 | 262 | size_t s_prefix_table[CX_STRSTR_SBO_SIZE]; |
universe@580 | 263 | |
universe@628 | 264 | // check needle length and use appropriate prefix table |
universe@628 | 265 | // if the pattern exceeds static prefix table, allocate on the heap |
universe@643 | 266 | bool useheap = needle.length >= CX_STRSTR_SBO_SIZE; |
universe@591 | 267 | register size_t *ptable = useheap ? calloc(needle.length + 1, |
universe@591 | 268 | sizeof(size_t)) : s_prefix_table; |
universe@580 | 269 | |
universe@628 | 270 | // keep counter in registers |
universe@580 | 271 | register size_t i, j; |
universe@580 | 272 | |
universe@628 | 273 | // fill prefix table |
universe@580 | 274 | i = 0; |
universe@580 | 275 | j = 0; |
universe@591 | 276 | ptable[i] = j; |
universe@580 | 277 | while (i < needle.length) { |
universe@580 | 278 | while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
universe@591 | 279 | j = ptable[j - 1]; |
universe@580 | 280 | } |
universe@580 | 281 | i++; |
universe@580 | 282 | j++; |
universe@591 | 283 | ptable[i] = j; |
universe@580 | 284 | } |
universe@580 | 285 | |
universe@628 | 286 | // search |
universe@580 | 287 | cxstring result = {NULL, 0}; |
universe@580 | 288 | i = 0; |
universe@580 | 289 | j = 1; |
universe@580 | 290 | while (i < haystack.length) { |
universe@580 | 291 | while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
universe@591 | 292 | j = ptable[j - 1]; |
universe@580 | 293 | } |
universe@580 | 294 | i++; |
universe@580 | 295 | j++; |
universe@580 | 296 | if (j - 1 == needle.length) { |
universe@580 | 297 | size_t start = i - needle.length; |
universe@580 | 298 | result.ptr = haystack.ptr + start; |
universe@580 | 299 | result.length = haystack.length - start; |
universe@580 | 300 | break; |
universe@580 | 301 | } |
universe@580 | 302 | } |
universe@580 | 303 | |
universe@628 | 304 | // if prefix table was allocated on the heap, free it |
universe@580 | 305 | if (ptable != s_prefix_table) { |
universe@580 | 306 | free(ptable); |
universe@580 | 307 | } |
universe@580 | 308 | |
universe@580 | 309 | return result; |
universe@580 | 310 | } |
universe@580 | 311 | |
universe@580 | 312 | cxmutstr cx_strstr_m( |
universe@580 | 313 | cxmutstr haystack, |
universe@580 | 314 | cxstring needle |
universe@580 | 315 | ) { |
universe@580 | 316 | cxstring result = cx_strstr(cx_strcast(haystack), needle); |
universe@580 | 317 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 318 | } |
universe@580 | 319 | |
universe@580 | 320 | size_t cx_strsplit( |
universe@580 | 321 | cxstring string, |
universe@580 | 322 | cxstring delim, |
universe@580 | 323 | size_t limit, |
universe@580 | 324 | cxstring *output |
universe@580 | 325 | ) { |
universe@628 | 326 | // special case: output limit is zero |
universe@583 | 327 | if (limit == 0) return 0; |
universe@583 | 328 | |
universe@628 | 329 | // special case: delimiter is empty |
universe@583 | 330 | if (delim.length == 0) { |
universe@583 | 331 | output[0] = string; |
universe@583 | 332 | return 1; |
universe@583 | 333 | } |
universe@583 | 334 | |
universe@628 | 335 | // special cases: delimiter is at least as large as the string |
universe@583 | 336 | if (delim.length >= string.length) { |
universe@628 | 337 | // exact match |
universe@583 | 338 | if (cx_strcmp(string, delim) == 0) { |
universe@583 | 339 | output[0] = cx_strn(string.ptr, 0); |
universe@583 | 340 | output[1] = cx_strn(string.ptr + string.length, 0); |
universe@583 | 341 | return 2; |
universe@628 | 342 | } else { |
universe@628 | 343 | // no match possible |
universe@583 | 344 | output[0] = string; |
universe@583 | 345 | return 1; |
universe@583 | 346 | } |
universe@583 | 347 | } |
universe@583 | 348 | |
universe@583 | 349 | size_t n = 0; |
universe@583 | 350 | cxstring curpos = string; |
universe@583 | 351 | while (1) { |
universe@583 | 352 | ++n; |
universe@583 | 353 | cxstring match = cx_strstr(curpos, delim); |
universe@583 | 354 | if (match.length > 0) { |
universe@628 | 355 | // is the limit reached? |
universe@583 | 356 | if (n < limit) { |
universe@628 | 357 | // copy the current string to the array |
universe@583 | 358 | cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
universe@583 | 359 | output[n - 1] = item; |
universe@583 | 360 | size_t processed = item.length + delim.length; |
universe@583 | 361 | curpos.ptr += processed; |
universe@583 | 362 | curpos.length -= processed; |
universe@583 | 363 | } else { |
universe@628 | 364 | // limit reached, copy the _full_ remaining string |
universe@583 | 365 | output[n - 1] = curpos; |
universe@583 | 366 | break; |
universe@583 | 367 | } |
universe@583 | 368 | } else { |
universe@628 | 369 | // no more matches, copy last string |
universe@583 | 370 | output[n - 1] = curpos; |
universe@583 | 371 | break; |
universe@583 | 372 | } |
universe@583 | 373 | } |
universe@583 | 374 | |
universe@583 | 375 | return n; |
universe@580 | 376 | } |
universe@580 | 377 | |
universe@580 | 378 | size_t cx_strsplit_a( |
universe@693 | 379 | CxAllocator const *allocator, |
universe@580 | 380 | cxstring string, |
universe@580 | 381 | cxstring delim, |
universe@580 | 382 | size_t limit, |
universe@580 | 383 | cxstring **output |
universe@580 | 384 | ) { |
universe@628 | 385 | // find out how many splits we're going to make and allocate memory |
universe@583 | 386 | size_t n = 0; |
universe@583 | 387 | cxstring curpos = string; |
universe@583 | 388 | while (1) { |
universe@583 | 389 | ++n; |
universe@583 | 390 | cxstring match = cx_strstr(curpos, delim); |
universe@583 | 391 | if (match.length > 0) { |
universe@628 | 392 | // is the limit reached? |
universe@583 | 393 | if (n < limit) { |
universe@583 | 394 | size_t processed = match.ptr - curpos.ptr + delim.length; |
universe@583 | 395 | curpos.ptr += processed; |
universe@583 | 396 | curpos.length -= processed; |
universe@583 | 397 | } else { |
universe@628 | 398 | // limit reached |
universe@583 | 399 | break; |
universe@583 | 400 | } |
universe@583 | 401 | } else { |
universe@628 | 402 | // no more matches |
universe@583 | 403 | break; |
universe@583 | 404 | } |
universe@583 | 405 | } |
universe@583 | 406 | *output = cxCalloc(allocator, n, sizeof(cxstring)); |
universe@583 | 407 | return cx_strsplit(string, delim, n, *output); |
universe@580 | 408 | } |
universe@580 | 409 | |
universe@580 | 410 | size_t cx_strsplit_m( |
universe@580 | 411 | cxmutstr string, |
universe@580 | 412 | cxstring delim, |
universe@580 | 413 | size_t limit, |
universe@580 | 414 | cxmutstr *output |
universe@580 | 415 | ) { |
universe@580 | 416 | return cx_strsplit(cx_strcast(string), |
universe@580 | 417 | delim, limit, (cxstring *) output); |
universe@580 | 418 | } |
universe@580 | 419 | |
universe@580 | 420 | size_t cx_strsplit_ma( |
universe@693 | 421 | CxAllocator const *allocator, |
universe@580 | 422 | cxmutstr string, |
universe@580 | 423 | cxstring delim, |
universe@580 | 424 | size_t limit, |
universe@580 | 425 | cxmutstr **output |
universe@580 | 426 | ) { |
universe@580 | 427 | return cx_strsplit_a(allocator, cx_strcast(string), |
universe@580 | 428 | delim, limit, (cxstring **) output); |
universe@580 | 429 | } |
universe@581 | 430 | |
universe@583 | 431 | int cx_strcmp( |
universe@583 | 432 | cxstring s1, |
universe@583 | 433 | cxstring s2 |
universe@583 | 434 | ) { |
universe@581 | 435 | if (s1.length == s2.length) { |
universe@581 | 436 | return memcmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 437 | } else if (s1.length > s2.length) { |
universe@581 | 438 | return 1; |
universe@581 | 439 | } else { |
universe@581 | 440 | return -1; |
universe@581 | 441 | } |
universe@581 | 442 | } |
universe@581 | 443 | |
universe@583 | 444 | int cx_strcasecmp( |
universe@583 | 445 | cxstring s1, |
universe@583 | 446 | cxstring s2 |
universe@583 | 447 | ) { |
universe@581 | 448 | if (s1.length == s2.length) { |
universe@581 | 449 | #ifdef _WIN32 |
universe@581 | 450 | return _strnicmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 451 | #else |
universe@581 | 452 | return strncasecmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 453 | #endif |
universe@581 | 454 | } else if (s1.length > s2.length) { |
universe@581 | 455 | return 1; |
universe@581 | 456 | } else { |
universe@581 | 457 | return -1; |
universe@581 | 458 | } |
universe@581 | 459 | } |
universe@581 | 460 | |
universe@657 | 461 | int cx_strcmp_p( |
universe@657 | 462 | void const *s1, |
universe@657 | 463 | void const *s2 |
universe@657 | 464 | ) { |
universe@657 | 465 | cxstring const *left = s1; |
universe@657 | 466 | cxstring const *right = s2; |
universe@657 | 467 | return cx_strcmp(*left, *right); |
universe@657 | 468 | } |
universe@657 | 469 | |
universe@657 | 470 | int cx_strcasecmp_p( |
universe@657 | 471 | void const *s1, |
universe@657 | 472 | void const *s2 |
universe@657 | 473 | ) { |
universe@657 | 474 | cxstring const *left = s1; |
universe@657 | 475 | cxstring const *right = s2; |
universe@657 | 476 | return cx_strcasecmp(*left, *right); |
universe@657 | 477 | } |
universe@657 | 478 | |
universe@583 | 479 | cxmutstr cx_strdup_a( |
universe@693 | 480 | CxAllocator const *allocator, |
universe@583 | 481 | cxstring string |
universe@583 | 482 | ) { |
universe@581 | 483 | cxmutstr result = { |
universe@581 | 484 | cxMalloc(allocator, string.length + 1), |
universe@581 | 485 | string.length |
universe@581 | 486 | }; |
universe@581 | 487 | if (result.ptr == NULL) { |
universe@581 | 488 | result.length = 0; |
universe@581 | 489 | return result; |
universe@581 | 490 | } |
universe@581 | 491 | memcpy(result.ptr, string.ptr, string.length); |
universe@581 | 492 | result.ptr[string.length] = '\0'; |
universe@581 | 493 | return result; |
universe@581 | 494 | } |
universe@581 | 495 | |
universe@581 | 496 | cxstring cx_strtrim(cxstring string) { |
universe@581 | 497 | cxstring result = string; |
universe@581 | 498 | // TODO: optimize by comparing multiple bytes at once |
universe@581 | 499 | while (result.length > 0 && isspace(*result.ptr)) { |
universe@581 | 500 | result.ptr++; |
universe@581 | 501 | result.length--; |
universe@581 | 502 | } |
universe@581 | 503 | while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
universe@581 | 504 | result.length--; |
universe@581 | 505 | } |
universe@581 | 506 | return result; |
universe@581 | 507 | } |
universe@581 | 508 | |
universe@581 | 509 | cxmutstr cx_strtrim_m(cxmutstr string) { |
universe@581 | 510 | cxstring result = cx_strtrim(cx_strcast(string)); |
universe@581 | 511 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@581 | 512 | } |
universe@581 | 513 | |
universe@583 | 514 | bool cx_strprefix( |
universe@583 | 515 | cxstring string, |
universe@583 | 516 | cxstring prefix |
universe@583 | 517 | ) { |
universe@581 | 518 | if (string.length < prefix.length) return false; |
universe@581 | 519 | return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 520 | } |
universe@581 | 521 | |
universe@583 | 522 | bool cx_strsuffix( |
universe@583 | 523 | cxstring string, |
universe@583 | 524 | cxstring suffix |
universe@583 | 525 | ) { |
universe@581 | 526 | if (string.length < suffix.length) return false; |
universe@581 | 527 | return memcmp(string.ptr + string.length - suffix.length, |
universe@581 | 528 | suffix.ptr, suffix.length) == 0; |
universe@581 | 529 | } |
universe@581 | 530 | |
universe@583 | 531 | bool cx_strcaseprefix( |
universe@583 | 532 | cxstring string, |
universe@583 | 533 | cxstring prefix |
universe@583 | 534 | ) { |
universe@581 | 535 | if (string.length < prefix.length) return false; |
universe@581 | 536 | #ifdef _WIN32 |
universe@581 | 537 | return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 538 | #else |
universe@581 | 539 | return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 540 | #endif |
universe@581 | 541 | } |
universe@581 | 542 | |
universe@583 | 543 | bool cx_strcasesuffix( |
universe@583 | 544 | cxstring string, |
universe@583 | 545 | cxstring suffix |
universe@583 | 546 | ) { |
universe@581 | 547 | if (string.length < suffix.length) return false; |
universe@581 | 548 | #ifdef _WIN32 |
universe@581 | 549 | return _strnicmp(string.ptr+string.length-suffix.length, |
universe@581 | 550 | suffix.ptr, suffix.length) == 0; |
universe@581 | 551 | #else |
universe@581 | 552 | return strncasecmp(string.ptr + string.length - suffix.length, |
universe@581 | 553 | suffix.ptr, suffix.length) == 0; |
universe@581 | 554 | #endif |
universe@581 | 555 | } |
universe@582 | 556 | |
universe@582 | 557 | void cx_strlower(cxmutstr string) { |
universe@582 | 558 | cx_for_n(i, string.length) { |
universe@593 | 559 | string.ptr[i] = (char) tolower(string.ptr[i]); |
universe@582 | 560 | } |
universe@582 | 561 | } |
universe@582 | 562 | |
universe@582 | 563 | void cx_strupper(cxmutstr string) { |
universe@582 | 564 | cx_for_n(i, string.length) { |
universe@593 | 565 | string.ptr[i] = (char) toupper(string.ptr[i]); |
universe@582 | 566 | } |
universe@582 | 567 | } |
universe@583 | 568 | |
universe@643 | 569 | #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE |
universe@643 | 570 | #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64 |
universe@643 | 571 | #endif |
universe@583 | 572 | |
universe@583 | 573 | struct cx_strreplace_ibuf { |
universe@583 | 574 | size_t *buf; |
universe@583 | 575 | struct cx_strreplace_ibuf *next; |
universe@590 | 576 | unsigned int len; |
universe@583 | 577 | }; |
universe@583 | 578 | |
universe@583 | 579 | static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { |
universe@583 | 580 | while (buf) { |
universe@583 | 581 | struct cx_strreplace_ibuf *next = buf->next; |
universe@583 | 582 | free(buf->buf); |
universe@583 | 583 | free(buf); |
universe@583 | 584 | buf = next; |
universe@583 | 585 | } |
universe@583 | 586 | } |
universe@583 | 587 | |
universe@583 | 588 | cxmutstr cx_strreplacen_a( |
universe@693 | 589 | CxAllocator const *allocator, |
universe@583 | 590 | cxstring str, |
universe@583 | 591 | cxstring pattern, |
universe@583 | 592 | cxstring replacement, |
universe@583 | 593 | size_t replmax |
universe@583 | 594 | ) { |
universe@583 | 595 | |
universe@583 | 596 | if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
universe@583 | 597 | return cx_strdup_a(allocator, str); |
universe@583 | 598 | |
universe@628 | 599 | // Compute expected buffer length |
universe@583 | 600 | size_t ibufmax = str.length / pattern.length; |
universe@583 | 601 | size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
universe@643 | 602 | if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) { |
universe@643 | 603 | ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE; |
universe@583 | 604 | } |
universe@583 | 605 | |
universe@628 | 606 | // Allocate first index buffer |
universe@583 | 607 | struct cx_strreplace_ibuf *firstbuf, *curbuf; |
universe@583 | 608 | firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
universe@583 | 609 | if (!firstbuf) return cx_mutstrn(NULL, 0); |
universe@583 | 610 | firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
universe@583 | 611 | if (!firstbuf->buf) { |
universe@583 | 612 | free(firstbuf); |
universe@583 | 613 | return cx_mutstrn(NULL, 0); |
universe@583 | 614 | } |
universe@583 | 615 | |
universe@628 | 616 | // Search occurrences |
universe@583 | 617 | cxstring searchstr = str; |
universe@583 | 618 | size_t found = 0; |
universe@583 | 619 | do { |
universe@583 | 620 | cxstring match = cx_strstr(searchstr, pattern); |
universe@583 | 621 | if (match.length > 0) { |
universe@628 | 622 | // Allocate next buffer in chain, if required |
universe@583 | 623 | if (curbuf->len == ibuflen) { |
universe@583 | 624 | struct cx_strreplace_ibuf *nextbuf = |
universe@583 | 625 | calloc(1, sizeof(struct cx_strreplace_ibuf)); |
universe@583 | 626 | if (!nextbuf) { |
universe@583 | 627 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 628 | return cx_mutstrn(NULL, 0); |
universe@583 | 629 | } |
universe@583 | 630 | nextbuf->buf = calloc(ibuflen, sizeof(size_t)); |
universe@583 | 631 | if (!nextbuf->buf) { |
universe@583 | 632 | free(nextbuf); |
universe@583 | 633 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 634 | return cx_mutstrn(NULL, 0); |
universe@583 | 635 | } |
universe@583 | 636 | curbuf->next = nextbuf; |
universe@583 | 637 | curbuf = nextbuf; |
universe@583 | 638 | } |
universe@583 | 639 | |
universe@628 | 640 | // Record match index |
universe@583 | 641 | found++; |
universe@583 | 642 | size_t idx = match.ptr - str.ptr; |
universe@583 | 643 | curbuf->buf[curbuf->len++] = idx; |
universe@583 | 644 | searchstr.ptr = match.ptr + pattern.length; |
universe@583 | 645 | searchstr.length = str.length - idx - pattern.length; |
universe@583 | 646 | } else { |
universe@583 | 647 | break; |
universe@583 | 648 | } |
universe@583 | 649 | } while (searchstr.length > 0 && found < replmax); |
universe@583 | 650 | |
universe@628 | 651 | // Allocate result string |
universe@583 | 652 | cxmutstr result; |
universe@583 | 653 | { |
universe@583 | 654 | ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
universe@583 | 655 | size_t rcount = 0; |
universe@583 | 656 | curbuf = firstbuf; |
universe@583 | 657 | do { |
universe@583 | 658 | rcount += curbuf->len; |
universe@583 | 659 | curbuf = curbuf->next; |
universe@583 | 660 | } while (curbuf); |
universe@583 | 661 | result.length = str.length + rcount * adjlen; |
universe@590 | 662 | result.ptr = cxMalloc(allocator, result.length + 1); |
universe@583 | 663 | if (!result.ptr) { |
universe@583 | 664 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 665 | return cx_mutstrn(NULL, 0); |
universe@583 | 666 | } |
universe@583 | 667 | } |
universe@583 | 668 | |
universe@628 | 669 | // Build result string |
universe@583 | 670 | curbuf = firstbuf; |
universe@583 | 671 | size_t srcidx = 0; |
universe@583 | 672 | char *destptr = result.ptr; |
universe@583 | 673 | do { |
universe@583 | 674 | for (size_t i = 0; i < curbuf->len; i++) { |
universe@628 | 675 | // Copy source part up to next match |
universe@583 | 676 | size_t idx = curbuf->buf[i]; |
universe@583 | 677 | size_t srclen = idx - srcidx; |
universe@583 | 678 | if (srclen > 0) { |
universe@583 | 679 | memcpy(destptr, str.ptr + srcidx, srclen); |
universe@583 | 680 | destptr += srclen; |
universe@583 | 681 | srcidx += srclen; |
universe@583 | 682 | } |
universe@583 | 683 | |
universe@628 | 684 | // Copy the replacement and skip the source pattern |
universe@583 | 685 | srcidx += pattern.length; |
universe@583 | 686 | memcpy(destptr, replacement.ptr, replacement.length); |
universe@583 | 687 | destptr += replacement.length; |
universe@583 | 688 | } |
universe@583 | 689 | curbuf = curbuf->next; |
universe@583 | 690 | } while (curbuf); |
universe@583 | 691 | memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
universe@583 | 692 | |
universe@628 | 693 | // Result is guaranteed to be zero-terminated |
universe@590 | 694 | result.ptr[result.length] = '\0'; |
universe@590 | 695 | |
universe@628 | 696 | // Free index buffer |
universe@583 | 697 | cx_strrepl_free_ibuf(firstbuf); |
universe@583 | 698 | |
universe@583 | 699 | return result; |
universe@583 | 700 | } |
universe@583 | 701 | |
universe@645 | 702 | CxStrtokCtx cx_strtok( |
universe@645 | 703 | cxstring str, |
universe@645 | 704 | cxstring delim, |
universe@645 | 705 | size_t limit |
universe@645 | 706 | ) { |
universe@645 | 707 | CxStrtokCtx ctx; |
universe@645 | 708 | ctx.str = str; |
universe@645 | 709 | ctx.delim = delim; |
universe@645 | 710 | ctx.limit = limit; |
universe@645 | 711 | ctx.pos = 0; |
universe@645 | 712 | ctx.next_pos = 0; |
universe@645 | 713 | ctx.delim_pos = 0; |
universe@645 | 714 | ctx.found = 0; |
universe@645 | 715 | ctx.delim_more = NULL; |
universe@645 | 716 | ctx.delim_more_count = 0; |
universe@645 | 717 | return ctx; |
universe@645 | 718 | } |
universe@583 | 719 | |
universe@645 | 720 | CxStrtokCtx cx_strtok_m( |
universe@645 | 721 | cxmutstr str, |
universe@645 | 722 | cxstring delim, |
universe@645 | 723 | size_t limit |
universe@645 | 724 | ) { |
universe@645 | 725 | return cx_strtok(cx_strcast(str), delim, limit); |
universe@645 | 726 | } |
universe@645 | 727 | |
universe@645 | 728 | bool cx_strtok_next( |
universe@645 | 729 | CxStrtokCtx *ctx, |
universe@645 | 730 | cxstring *token |
universe@645 | 731 | ) { |
universe@645 | 732 | // abortion criteria |
universe@645 | 733 | if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) { |
universe@645 | 734 | return false; |
universe@645 | 735 | } |
universe@645 | 736 | |
universe@645 | 737 | // determine the search start |
universe@645 | 738 | cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos); |
universe@645 | 739 | |
universe@645 | 740 | // search the next delimiter |
universe@645 | 741 | cxstring delim = cx_strstr(haystack, ctx->delim); |
universe@645 | 742 | |
universe@645 | 743 | // if found, make delim capture exactly the delimiter |
universe@645 | 744 | if (delim.length > 0) { |
universe@645 | 745 | delim.length = ctx->delim.length; |
universe@645 | 746 | } |
universe@645 | 747 | |
universe@645 | 748 | // if more delimiters are specified, check them now |
universe@645 | 749 | if (ctx->delim_more_count > 0) { |
universe@645 | 750 | cx_for_n(i, ctx->delim_more_count) { |
universe@645 | 751 | cxstring d = cx_strstr(haystack, ctx->delim_more[i]); |
universe@645 | 752 | if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) { |
universe@645 | 753 | delim.ptr = d.ptr; |
universe@645 | 754 | delim.length = ctx->delim_more[i].length; |
universe@645 | 755 | } |
universe@645 | 756 | } |
universe@645 | 757 | } |
universe@645 | 758 | |
universe@645 | 759 | // store the token information and adjust the context |
universe@645 | 760 | ctx->found++; |
universe@645 | 761 | ctx->pos = ctx->next_pos; |
universe@645 | 762 | token->ptr = &ctx->str.ptr[ctx->pos]; |
universe@645 | 763 | ctx->delim_pos = delim.length == 0 ? |
universe@645 | 764 | ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr); |
universe@645 | 765 | token->length = ctx->delim_pos - ctx->pos; |
universe@645 | 766 | ctx->next_pos = ctx->delim_pos + delim.length; |
universe@645 | 767 | |
universe@645 | 768 | return true; |
universe@645 | 769 | } |
universe@645 | 770 | |
universe@645 | 771 | bool cx_strtok_next_m( |
universe@645 | 772 | CxStrtokCtx *ctx, |
universe@645 | 773 | cxmutstr *token |
universe@645 | 774 | ) { |
universe@645 | 775 | return cx_strtok_next(ctx, (cxstring *) token); |
universe@645 | 776 | } |
universe@645 | 777 | |
universe@645 | 778 | void cx_strtok_delim( |
universe@645 | 779 | CxStrtokCtx *ctx, |
universe@645 | 780 | cxstring const *delim, |
universe@645 | 781 | size_t count |
universe@645 | 782 | ) { |
universe@645 | 783 | ctx->delim_more = delim; |
universe@645 | 784 | ctx->delim_more_count = count; |
universe@645 | 785 | } |