src/string.c

Fri, 12 Apr 2024 21:48:12 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 12 Apr 2024 21:48:12 +0200
changeset 849
edb9f875b7f9
parent 806
e06249e09f99
permissions
-rw-r--r--

improves interface of cx_sprintf() variants

universe@576 1 /*
universe@576 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@576 3 *
universe@576 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
universe@576 5 *
universe@576 6 * Redistribution and use in source and binary forms, with or without
universe@576 7 * modification, are permitted provided that the following conditions are met:
universe@576 8 *
universe@576 9 * 1. Redistributions of source code must retain the above copyright
universe@576 10 * notice, this list of conditions and the following disclaimer.
universe@576 11 *
universe@576 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@576 13 * notice, this list of conditions and the following disclaimer in the
universe@576 14 * documentation and/or other materials provided with the distribution.
universe@576 15 *
universe@576 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@576 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@576 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@576 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@576 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@576 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@576 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@576 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@576 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@576 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@576 26 * POSSIBILITY OF SUCH DAMAGE.
universe@576 27 */
universe@576 28
universe@576 29 #include "cx/string.h"
universe@579 30 #include "cx/utils.h"
universe@579 31
universe@579 32 #include <string.h>
universe@579 33 #include <stdarg.h>
universe@581 34 #include <ctype.h>
universe@581 35
universe@581 36 #ifndef _WIN32
universe@581 37
universe@628 38 #include <strings.h> // for strncasecmp()
universe@581 39
universe@628 40 #endif // _WIN32
universe@579 41
universe@579 42 cxmutstr cx_mutstr(char *cstring) {
universe@579 43 return (cxmutstr) {cstring, strlen(cstring)};
universe@579 44 }
universe@579 45
universe@579 46 cxmutstr cx_mutstrn(
universe@579 47 char *cstring,
universe@579 48 size_t length
universe@579 49 ) {
universe@579 50 return (cxmutstr) {cstring, length};
universe@579 51 }
universe@579 52
universe@579 53 cxstring cx_str(const char *cstring) {
universe@579 54 return (cxstring) {cstring, strlen(cstring)};
universe@579 55 }
universe@579 56
universe@579 57 cxstring cx_strn(
universe@579 58 const char *cstring,
universe@579 59 size_t length
universe@579 60 ) {
universe@579 61 return (cxstring) {cstring, length};
universe@579 62 }
universe@579 63
universe@579 64 cxstring cx_strcast(cxmutstr str) {
universe@579 65 return (cxstring) {str.ptr, str.length};
universe@579 66 }
universe@579 67
universe@579 68 void cx_strfree(cxmutstr *str) {
universe@579 69 free(str->ptr);
universe@579 70 str->ptr = NULL;
universe@579 71 str->length = 0;
universe@579 72 }
universe@579 73
universe@583 74 void cx_strfree_a(
universe@693 75 CxAllocator const *alloc,
universe@583 76 cxmutstr *str
universe@583 77 ) {
universe@583 78 cxFree(alloc, str->ptr);
universe@583 79 str->ptr = NULL;
universe@583 80 str->length = 0;
universe@583 81 }
universe@583 82
universe@579 83 size_t cx_strlen(
universe@579 84 size_t count,
universe@579 85 ...
universe@579 86 ) {
universe@579 87 if (count == 0) return 0;
universe@579 88
universe@579 89 va_list ap;
universe@579 90 va_start(ap, count);
universe@579 91 size_t size = 0;
universe@579 92 cx_for_n(i, count) {
universe@579 93 cxstring str = va_arg(ap, cxstring);
universe@579 94 size += str.length;
universe@579 95 }
universe@579 96 va_end(ap);
universe@579 97
universe@579 98 return size;
universe@579 99 }
universe@579 100
universe@697 101 cxmutstr cx_strcat_ma(
universe@693 102 CxAllocator const *alloc,
universe@697 103 cxmutstr str,
universe@579 104 size_t count,
universe@579 105 ...
universe@579 106 ) {
universe@697 107 if (count == 0) return str;
universe@697 108
universe@579 109 cxstring *strings = calloc(count, sizeof(cxstring));
universe@579 110 if (!strings) abort();
universe@579 111
universe@579 112 va_list ap;
universe@579 113 va_start(ap, count);
universe@579 114
universe@579 115 // get all args and overall length
universe@697 116 size_t slen = str.length;
universe@579 117 cx_for_n(i, count) {
universe@579 118 cxstring s = va_arg (ap, cxstring);
universe@579 119 strings[i] = s;
universe@579 120 slen += s.length;
universe@579 121 }
universe@697 122 va_end(ap);
universe@579 123
universe@697 124 // reallocate or create new string
universe@697 125 if (str.ptr == NULL) {
universe@697 126 str.ptr = cxMalloc(alloc, slen + 1);
universe@697 127 } else {
universe@697 128 str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
universe@697 129 }
universe@697 130 if (str.ptr == NULL) abort();
universe@579 131
universe@579 132 // concatenate strings
universe@697 133 size_t pos = str.length;
universe@697 134 str.length = slen;
universe@579 135 cx_for_n(i, count) {
universe@579 136 cxstring s = strings[i];
universe@697 137 memcpy(str.ptr + pos, s.ptr, s.length);
universe@579 138 pos += s.length;
universe@579 139 }
universe@579 140
universe@579 141 // terminate string
universe@697 142 str.ptr[str.length] = '\0';
universe@579 143
universe@579 144 // free temporary array
universe@579 145 free(strings);
universe@579 146
universe@697 147 return str;
universe@579 148 }
universe@579 149
universe@580 150 cxstring cx_strsubs(
universe@580 151 cxstring string,
universe@580 152 size_t start
universe@580 153 ) {
universe@580 154 return cx_strsubsl(string, start, string.length - start);
universe@580 155 }
universe@579 156
universe@580 157 cxmutstr cx_strsubs_m(
universe@580 158 cxmutstr string,
universe@580 159 size_t start
universe@580 160 ) {
universe@580 161 return cx_strsubsl_m(string, start, string.length - start);
universe@580 162 }
universe@579 163
universe@580 164 cxstring cx_strsubsl(
universe@580 165 cxstring string,
universe@580 166 size_t start,
universe@580 167 size_t length
universe@580 168 ) {
universe@580 169 if (start > string.length) {
universe@580 170 return (cxstring) {NULL, 0};
universe@580 171 }
universe@580 172
universe@580 173 size_t rem_len = string.length - start;
universe@580 174 if (length > rem_len) {
universe@580 175 length = rem_len;
universe@580 176 }
universe@580 177
universe@580 178 return (cxstring) {string.ptr + start, length};
universe@580 179 }
universe@580 180
universe@580 181 cxmutstr cx_strsubsl_m(
universe@580 182 cxmutstr string,
universe@580 183 size_t start,
universe@580 184 size_t length
universe@580 185 ) {
universe@580 186 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
universe@580 187 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 188 }
universe@580 189
universe@580 190 cxstring cx_strchr(
universe@580 191 cxstring string,
universe@580 192 int chr
universe@580 193 ) {
universe@580 194 chr = 0xFF & chr;
universe@580 195 // TODO: improve by comparing multiple bytes at once
universe@580 196 cx_for_n(i, string.length) {
universe@580 197 if (string.ptr[i] == chr) {
universe@580 198 return cx_strsubs(string, i);
universe@580 199 }
universe@580 200 }
universe@580 201 return (cxstring) {NULL, 0};
universe@580 202 }
universe@580 203
universe@580 204 cxmutstr cx_strchr_m(
universe@580 205 cxmutstr string,
universe@580 206 int chr
universe@580 207 ) {
universe@580 208 cxstring result = cx_strchr(cx_strcast(string), chr);
universe@580 209 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 210 }
universe@580 211
universe@580 212 cxstring cx_strrchr(
universe@580 213 cxstring string,
universe@580 214 int chr
universe@580 215 ) {
universe@580 216 chr = 0xFF & chr;
universe@580 217 size_t i = string.length;
universe@580 218 while (i > 0) {
universe@580 219 i--;
universe@580 220 // TODO: improve by comparing multiple bytes at once
universe@580 221 if (string.ptr[i] == chr) {
universe@580 222 return cx_strsubs(string, i);
universe@580 223 }
universe@580 224 }
universe@580 225 return (cxstring) {NULL, 0};
universe@580 226 }
universe@580 227
universe@580 228 cxmutstr cx_strrchr_m(
universe@580 229 cxmutstr string,
universe@580 230 int chr
universe@580 231 ) {
universe@580 232 cxstring result = cx_strrchr(cx_strcast(string), chr);
universe@580 233 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 234 }
universe@580 235
universe@643 236 #ifndef CX_STRSTR_SBO_SIZE
universe@643 237 #define CX_STRSTR_SBO_SIZE 512
universe@643 238 #endif
universe@806 239 unsigned const cx_strstr_sbo_size = CX_STRSTR_SBO_SIZE;
universe@580 240
universe@580 241 cxstring cx_strstr(
universe@580 242 cxstring haystack,
universe@580 243 cxstring needle
universe@580 244 ) {
universe@580 245 if (needle.length == 0) {
universe@580 246 return haystack;
universe@580 247 }
universe@580 248
universe@628 249 // optimize for single-char needles
universe@583 250 if (needle.length == 1) {
universe@583 251 return cx_strchr(haystack, *needle.ptr);
universe@583 252 }
universe@583 253
universe@580 254 /*
universe@580 255 * IMPORTANT:
universe@580 256 * Our prefix table contains the prefix length PLUS ONE
universe@580 257 * this is our decision, because we want to use the full range of size_t.
universe@580 258 * The original algorithm needs a (-1) at one single place,
universe@580 259 * and we want to avoid that.
universe@580 260 */
universe@580 261
universe@628 262 // local prefix table
universe@643 263 size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
universe@580 264
universe@628 265 // check needle length and use appropriate prefix table
universe@628 266 // if the pattern exceeds static prefix table, allocate on the heap
universe@643 267 bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
universe@591 268 register size_t *ptable = useheap ? calloc(needle.length + 1,
universe@591 269 sizeof(size_t)) : s_prefix_table;
universe@580 270
universe@628 271 // keep counter in registers
universe@580 272 register size_t i, j;
universe@580 273
universe@628 274 // fill prefix table
universe@580 275 i = 0;
universe@580 276 j = 0;
universe@591 277 ptable[i] = j;
universe@580 278 while (i < needle.length) {
universe@580 279 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
universe@591 280 j = ptable[j - 1];
universe@580 281 }
universe@580 282 i++;
universe@580 283 j++;
universe@591 284 ptable[i] = j;
universe@580 285 }
universe@580 286
universe@628 287 // search
universe@580 288 cxstring result = {NULL, 0};
universe@580 289 i = 0;
universe@580 290 j = 1;
universe@580 291 while (i < haystack.length) {
universe@580 292 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
universe@591 293 j = ptable[j - 1];
universe@580 294 }
universe@580 295 i++;
universe@580 296 j++;
universe@580 297 if (j - 1 == needle.length) {
universe@580 298 size_t start = i - needle.length;
universe@580 299 result.ptr = haystack.ptr + start;
universe@580 300 result.length = haystack.length - start;
universe@580 301 break;
universe@580 302 }
universe@580 303 }
universe@580 304
universe@628 305 // if prefix table was allocated on the heap, free it
universe@580 306 if (ptable != s_prefix_table) {
universe@580 307 free(ptable);
universe@580 308 }
universe@580 309
universe@580 310 return result;
universe@580 311 }
universe@580 312
universe@580 313 cxmutstr cx_strstr_m(
universe@580 314 cxmutstr haystack,
universe@580 315 cxstring needle
universe@580 316 ) {
universe@580 317 cxstring result = cx_strstr(cx_strcast(haystack), needle);
universe@580 318 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 319 }
universe@580 320
universe@580 321 size_t cx_strsplit(
universe@580 322 cxstring string,
universe@580 323 cxstring delim,
universe@580 324 size_t limit,
universe@580 325 cxstring *output
universe@580 326 ) {
universe@628 327 // special case: output limit is zero
universe@583 328 if (limit == 0) return 0;
universe@583 329
universe@628 330 // special case: delimiter is empty
universe@583 331 if (delim.length == 0) {
universe@583 332 output[0] = string;
universe@583 333 return 1;
universe@583 334 }
universe@583 335
universe@628 336 // special cases: delimiter is at least as large as the string
universe@583 337 if (delim.length >= string.length) {
universe@628 338 // exact match
universe@583 339 if (cx_strcmp(string, delim) == 0) {
universe@583 340 output[0] = cx_strn(string.ptr, 0);
universe@583 341 output[1] = cx_strn(string.ptr + string.length, 0);
universe@583 342 return 2;
universe@628 343 } else {
universe@628 344 // no match possible
universe@583 345 output[0] = string;
universe@583 346 return 1;
universe@583 347 }
universe@583 348 }
universe@583 349
universe@583 350 size_t n = 0;
universe@583 351 cxstring curpos = string;
universe@583 352 while (1) {
universe@583 353 ++n;
universe@583 354 cxstring match = cx_strstr(curpos, delim);
universe@583 355 if (match.length > 0) {
universe@628 356 // is the limit reached?
universe@583 357 if (n < limit) {
universe@628 358 // copy the current string to the array
universe@583 359 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
universe@583 360 output[n - 1] = item;
universe@583 361 size_t processed = item.length + delim.length;
universe@583 362 curpos.ptr += processed;
universe@583 363 curpos.length -= processed;
universe@583 364 } else {
universe@628 365 // limit reached, copy the _full_ remaining string
universe@583 366 output[n - 1] = curpos;
universe@583 367 break;
universe@583 368 }
universe@583 369 } else {
universe@628 370 // no more matches, copy last string
universe@583 371 output[n - 1] = curpos;
universe@583 372 break;
universe@583 373 }
universe@583 374 }
universe@583 375
universe@583 376 return n;
universe@580 377 }
universe@580 378
universe@580 379 size_t cx_strsplit_a(
universe@693 380 CxAllocator const *allocator,
universe@580 381 cxstring string,
universe@580 382 cxstring delim,
universe@580 383 size_t limit,
universe@580 384 cxstring **output
universe@580 385 ) {
universe@628 386 // find out how many splits we're going to make and allocate memory
universe@583 387 size_t n = 0;
universe@583 388 cxstring curpos = string;
universe@583 389 while (1) {
universe@583 390 ++n;
universe@583 391 cxstring match = cx_strstr(curpos, delim);
universe@583 392 if (match.length > 0) {
universe@628 393 // is the limit reached?
universe@583 394 if (n < limit) {
universe@583 395 size_t processed = match.ptr - curpos.ptr + delim.length;
universe@583 396 curpos.ptr += processed;
universe@583 397 curpos.length -= processed;
universe@583 398 } else {
universe@628 399 // limit reached
universe@583 400 break;
universe@583 401 }
universe@583 402 } else {
universe@628 403 // no more matches
universe@583 404 break;
universe@583 405 }
universe@583 406 }
universe@583 407 *output = cxCalloc(allocator, n, sizeof(cxstring));
universe@583 408 return cx_strsplit(string, delim, n, *output);
universe@580 409 }
universe@580 410
universe@580 411 size_t cx_strsplit_m(
universe@580 412 cxmutstr string,
universe@580 413 cxstring delim,
universe@580 414 size_t limit,
universe@580 415 cxmutstr *output
universe@580 416 ) {
universe@580 417 return cx_strsplit(cx_strcast(string),
universe@580 418 delim, limit, (cxstring *) output);
universe@580 419 }
universe@580 420
universe@580 421 size_t cx_strsplit_ma(
universe@693 422 CxAllocator const *allocator,
universe@580 423 cxmutstr string,
universe@580 424 cxstring delim,
universe@580 425 size_t limit,
universe@580 426 cxmutstr **output
universe@580 427 ) {
universe@580 428 return cx_strsplit_a(allocator, cx_strcast(string),
universe@580 429 delim, limit, (cxstring **) output);
universe@580 430 }
universe@581 431
universe@583 432 int cx_strcmp(
universe@583 433 cxstring s1,
universe@583 434 cxstring s2
universe@583 435 ) {
universe@581 436 if (s1.length == s2.length) {
universe@581 437 return memcmp(s1.ptr, s2.ptr, s1.length);
universe@581 438 } else if (s1.length > s2.length) {
universe@581 439 return 1;
universe@581 440 } else {
universe@581 441 return -1;
universe@581 442 }
universe@581 443 }
universe@581 444
universe@583 445 int cx_strcasecmp(
universe@583 446 cxstring s1,
universe@583 447 cxstring s2
universe@583 448 ) {
universe@581 449 if (s1.length == s2.length) {
universe@581 450 #ifdef _WIN32
universe@581 451 return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@581 452 #else
universe@581 453 return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@581 454 #endif
universe@581 455 } else if (s1.length > s2.length) {
universe@581 456 return 1;
universe@581 457 } else {
universe@581 458 return -1;
universe@581 459 }
universe@581 460 }
universe@581 461
universe@657 462 int cx_strcmp_p(
universe@657 463 void const *s1,
universe@657 464 void const *s2
universe@657 465 ) {
universe@657 466 cxstring const *left = s1;
universe@657 467 cxstring const *right = s2;
universe@657 468 return cx_strcmp(*left, *right);
universe@657 469 }
universe@657 470
universe@657 471 int cx_strcasecmp_p(
universe@657 472 void const *s1,
universe@657 473 void const *s2
universe@657 474 ) {
universe@657 475 cxstring const *left = s1;
universe@657 476 cxstring const *right = s2;
universe@657 477 return cx_strcasecmp(*left, *right);
universe@657 478 }
universe@657 479
universe@583 480 cxmutstr cx_strdup_a(
universe@693 481 CxAllocator const *allocator,
universe@583 482 cxstring string
universe@583 483 ) {
universe@581 484 cxmutstr result = {
universe@581 485 cxMalloc(allocator, string.length + 1),
universe@581 486 string.length
universe@581 487 };
universe@581 488 if (result.ptr == NULL) {
universe@581 489 result.length = 0;
universe@581 490 return result;
universe@581 491 }
universe@581 492 memcpy(result.ptr, string.ptr, string.length);
universe@581 493 result.ptr[string.length] = '\0';
universe@581 494 return result;
universe@581 495 }
universe@581 496
universe@581 497 cxstring cx_strtrim(cxstring string) {
universe@581 498 cxstring result = string;
universe@581 499 // TODO: optimize by comparing multiple bytes at once
universe@581 500 while (result.length > 0 && isspace(*result.ptr)) {
universe@581 501 result.ptr++;
universe@581 502 result.length--;
universe@581 503 }
universe@581 504 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
universe@581 505 result.length--;
universe@581 506 }
universe@581 507 return result;
universe@581 508 }
universe@581 509
universe@581 510 cxmutstr cx_strtrim_m(cxmutstr string) {
universe@581 511 cxstring result = cx_strtrim(cx_strcast(string));
universe@581 512 return (cxmutstr) {(char *) result.ptr, result.length};
universe@581 513 }
universe@581 514
universe@583 515 bool cx_strprefix(
universe@583 516 cxstring string,
universe@583 517 cxstring prefix
universe@583 518 ) {
universe@581 519 if (string.length < prefix.length) return false;
universe@581 520 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 521 }
universe@581 522
universe@583 523 bool cx_strsuffix(
universe@583 524 cxstring string,
universe@583 525 cxstring suffix
universe@583 526 ) {
universe@581 527 if (string.length < suffix.length) return false;
universe@581 528 return memcmp(string.ptr + string.length - suffix.length,
universe@581 529 suffix.ptr, suffix.length) == 0;
universe@581 530 }
universe@581 531
universe@583 532 bool cx_strcaseprefix(
universe@583 533 cxstring string,
universe@583 534 cxstring prefix
universe@583 535 ) {
universe@581 536 if (string.length < prefix.length) return false;
universe@581 537 #ifdef _WIN32
universe@581 538 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 539 #else
universe@581 540 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 541 #endif
universe@581 542 }
universe@581 543
universe@583 544 bool cx_strcasesuffix(
universe@583 545 cxstring string,
universe@583 546 cxstring suffix
universe@583 547 ) {
universe@581 548 if (string.length < suffix.length) return false;
universe@581 549 #ifdef _WIN32
universe@581 550 return _strnicmp(string.ptr+string.length-suffix.length,
universe@581 551 suffix.ptr, suffix.length) == 0;
universe@581 552 #else
universe@581 553 return strncasecmp(string.ptr + string.length - suffix.length,
universe@581 554 suffix.ptr, suffix.length) == 0;
universe@581 555 #endif
universe@581 556 }
universe@582 557
universe@582 558 void cx_strlower(cxmutstr string) {
universe@582 559 cx_for_n(i, string.length) {
universe@593 560 string.ptr[i] = (char) tolower(string.ptr[i]);
universe@582 561 }
universe@582 562 }
universe@582 563
universe@582 564 void cx_strupper(cxmutstr string) {
universe@582 565 cx_for_n(i, string.length) {
universe@593 566 string.ptr[i] = (char) toupper(string.ptr[i]);
universe@582 567 }
universe@582 568 }
universe@583 569
universe@643 570 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
universe@643 571 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
universe@643 572 #endif
universe@583 573
universe@583 574 struct cx_strreplace_ibuf {
universe@583 575 size_t *buf;
universe@583 576 struct cx_strreplace_ibuf *next;
universe@590 577 unsigned int len;
universe@583 578 };
universe@583 579
universe@583 580 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
universe@583 581 while (buf) {
universe@583 582 struct cx_strreplace_ibuf *next = buf->next;
universe@583 583 free(buf->buf);
universe@583 584 free(buf);
universe@583 585 buf = next;
universe@583 586 }
universe@583 587 }
universe@583 588
universe@583 589 cxmutstr cx_strreplacen_a(
universe@693 590 CxAllocator const *allocator,
universe@583 591 cxstring str,
universe@583 592 cxstring pattern,
universe@583 593 cxstring replacement,
universe@583 594 size_t replmax
universe@583 595 ) {
universe@583 596
universe@583 597 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
universe@583 598 return cx_strdup_a(allocator, str);
universe@583 599
universe@628 600 // Compute expected buffer length
universe@583 601 size_t ibufmax = str.length / pattern.length;
universe@583 602 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
universe@643 603 if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
universe@643 604 ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
universe@583 605 }
universe@583 606
universe@628 607 // Allocate first index buffer
universe@583 608 struct cx_strreplace_ibuf *firstbuf, *curbuf;
universe@583 609 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
universe@583 610 if (!firstbuf) return cx_mutstrn(NULL, 0);
universe@583 611 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
universe@583 612 if (!firstbuf->buf) {
universe@583 613 free(firstbuf);
universe@583 614 return cx_mutstrn(NULL, 0);
universe@583 615 }
universe@583 616
universe@628 617 // Search occurrences
universe@583 618 cxstring searchstr = str;
universe@583 619 size_t found = 0;
universe@583 620 do {
universe@583 621 cxstring match = cx_strstr(searchstr, pattern);
universe@583 622 if (match.length > 0) {
universe@628 623 // Allocate next buffer in chain, if required
universe@583 624 if (curbuf->len == ibuflen) {
universe@583 625 struct cx_strreplace_ibuf *nextbuf =
universe@583 626 calloc(1, sizeof(struct cx_strreplace_ibuf));
universe@583 627 if (!nextbuf) {
universe@583 628 cx_strrepl_free_ibuf(firstbuf);
universe@583 629 return cx_mutstrn(NULL, 0);
universe@583 630 }
universe@583 631 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
universe@583 632 if (!nextbuf->buf) {
universe@583 633 free(nextbuf);
universe@583 634 cx_strrepl_free_ibuf(firstbuf);
universe@583 635 return cx_mutstrn(NULL, 0);
universe@583 636 }
universe@583 637 curbuf->next = nextbuf;
universe@583 638 curbuf = nextbuf;
universe@583 639 }
universe@583 640
universe@628 641 // Record match index
universe@583 642 found++;
universe@583 643 size_t idx = match.ptr - str.ptr;
universe@583 644 curbuf->buf[curbuf->len++] = idx;
universe@583 645 searchstr.ptr = match.ptr + pattern.length;
universe@583 646 searchstr.length = str.length - idx - pattern.length;
universe@583 647 } else {
universe@583 648 break;
universe@583 649 }
universe@583 650 } while (searchstr.length > 0 && found < replmax);
universe@583 651
universe@628 652 // Allocate result string
universe@583 653 cxmutstr result;
universe@583 654 {
universe@583 655 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
universe@583 656 size_t rcount = 0;
universe@583 657 curbuf = firstbuf;
universe@583 658 do {
universe@583 659 rcount += curbuf->len;
universe@583 660 curbuf = curbuf->next;
universe@583 661 } while (curbuf);
universe@583 662 result.length = str.length + rcount * adjlen;
universe@590 663 result.ptr = cxMalloc(allocator, result.length + 1);
universe@583 664 if (!result.ptr) {
universe@583 665 cx_strrepl_free_ibuf(firstbuf);
universe@583 666 return cx_mutstrn(NULL, 0);
universe@583 667 }
universe@583 668 }
universe@583 669
universe@628 670 // Build result string
universe@583 671 curbuf = firstbuf;
universe@583 672 size_t srcidx = 0;
universe@583 673 char *destptr = result.ptr;
universe@583 674 do {
universe@583 675 for (size_t i = 0; i < curbuf->len; i++) {
universe@628 676 // Copy source part up to next match
universe@583 677 size_t idx = curbuf->buf[i];
universe@583 678 size_t srclen = idx - srcidx;
universe@583 679 if (srclen > 0) {
universe@583 680 memcpy(destptr, str.ptr + srcidx, srclen);
universe@583 681 destptr += srclen;
universe@583 682 srcidx += srclen;
universe@583 683 }
universe@583 684
universe@628 685 // Copy the replacement and skip the source pattern
universe@583 686 srcidx += pattern.length;
universe@583 687 memcpy(destptr, replacement.ptr, replacement.length);
universe@583 688 destptr += replacement.length;
universe@583 689 }
universe@583 690 curbuf = curbuf->next;
universe@583 691 } while (curbuf);
universe@583 692 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
universe@583 693
universe@628 694 // Result is guaranteed to be zero-terminated
universe@590 695 result.ptr[result.length] = '\0';
universe@590 696
universe@628 697 // Free index buffer
universe@583 698 cx_strrepl_free_ibuf(firstbuf);
universe@583 699
universe@583 700 return result;
universe@583 701 }
universe@583 702
universe@645 703 CxStrtokCtx cx_strtok(
universe@645 704 cxstring str,
universe@645 705 cxstring delim,
universe@645 706 size_t limit
universe@645 707 ) {
universe@645 708 CxStrtokCtx ctx;
universe@645 709 ctx.str = str;
universe@645 710 ctx.delim = delim;
universe@645 711 ctx.limit = limit;
universe@645 712 ctx.pos = 0;
universe@645 713 ctx.next_pos = 0;
universe@645 714 ctx.delim_pos = 0;
universe@645 715 ctx.found = 0;
universe@645 716 ctx.delim_more = NULL;
universe@645 717 ctx.delim_more_count = 0;
universe@645 718 return ctx;
universe@645 719 }
universe@583 720
universe@645 721 CxStrtokCtx cx_strtok_m(
universe@645 722 cxmutstr str,
universe@645 723 cxstring delim,
universe@645 724 size_t limit
universe@645 725 ) {
universe@645 726 return cx_strtok(cx_strcast(str), delim, limit);
universe@645 727 }
universe@645 728
universe@645 729 bool cx_strtok_next(
universe@645 730 CxStrtokCtx *ctx,
universe@645 731 cxstring *token
universe@645 732 ) {
universe@645 733 // abortion criteria
universe@645 734 if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
universe@645 735 return false;
universe@645 736 }
universe@645 737
universe@645 738 // determine the search start
universe@645 739 cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
universe@645 740
universe@645 741 // search the next delimiter
universe@645 742 cxstring delim = cx_strstr(haystack, ctx->delim);
universe@645 743
universe@645 744 // if found, make delim capture exactly the delimiter
universe@645 745 if (delim.length > 0) {
universe@645 746 delim.length = ctx->delim.length;
universe@645 747 }
universe@645 748
universe@645 749 // if more delimiters are specified, check them now
universe@645 750 if (ctx->delim_more_count > 0) {
universe@645 751 cx_for_n(i, ctx->delim_more_count) {
universe@645 752 cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
universe@645 753 if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
universe@645 754 delim.ptr = d.ptr;
universe@645 755 delim.length = ctx->delim_more[i].length;
universe@645 756 }
universe@645 757 }
universe@645 758 }
universe@645 759
universe@645 760 // store the token information and adjust the context
universe@645 761 ctx->found++;
universe@645 762 ctx->pos = ctx->next_pos;
universe@645 763 token->ptr = &ctx->str.ptr[ctx->pos];
universe@645 764 ctx->delim_pos = delim.length == 0 ?
universe@645 765 ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
universe@645 766 token->length = ctx->delim_pos - ctx->pos;
universe@645 767 ctx->next_pos = ctx->delim_pos + delim.length;
universe@645 768
universe@645 769 return true;
universe@645 770 }
universe@645 771
universe@645 772 bool cx_strtok_next_m(
universe@645 773 CxStrtokCtx *ctx,
universe@645 774 cxmutstr *token
universe@645 775 ) {
universe@645 776 return cx_strtok_next(ctx, (cxstring *) token);
universe@645 777 }
universe@645 778
universe@645 779 void cx_strtok_delim(
universe@645 780 CxStrtokCtx *ctx,
universe@645 781 cxstring const *delim,
universe@645 782 size_t count
universe@645 783 ) {
universe@645 784 ctx->delim_more = delim;
universe@645 785 ctx->delim_more_count = count;
universe@645 786 }

mercurial