src/string.c

changeset 583
0f3c9662f9b5
parent 582
96fa7fa6af4f
child 590
02a56701a5cb
equal deleted inserted replaced
582:96fa7fa6af4f 583:0f3c9662f9b5
70 free(str->ptr); 70 free(str->ptr);
71 str->ptr = NULL; 71 str->ptr = NULL;
72 str->length = 0; 72 str->length = 0;
73 } 73 }
74 74
75 void cx_strfree_a(
76 CxAllocator *alloc,
77 cxmutstr *str
78 ) {
79 cxFree(alloc, str->ptr);
80 str->ptr = NULL;
81 str->length = 0;
82 }
83
75 size_t cx_strlen( 84 size_t cx_strlen(
76 size_t count, 85 size_t count,
77 ... 86 ...
78 ) { 87 ) {
79 if (count == 0) return 0; 88 if (count == 0) return 0;
233 ) { 242 ) {
234 if (needle.length == 0) { 243 if (needle.length == 0) {
235 return haystack; 244 return haystack;
236 } 245 }
237 246
247 /* optimize for single-char needles */
248 if (needle.length == 1) {
249 return cx_strchr(haystack, *needle.ptr);
250 }
251
238 /* 252 /*
239 * IMPORTANT: 253 * IMPORTANT:
240 * Our prefix table contains the prefix length PLUS ONE 254 * Our prefix table contains the prefix length PLUS ONE
241 * this is our decision, because we want to use the full range of size_t. 255 * this is our decision, because we want to use the full range of size_t.
242 * The original algorithm needs a (-1) at one single place, 256 * The original algorithm needs a (-1) at one single place,
306 cxstring string, 320 cxstring string,
307 cxstring delim, 321 cxstring delim,
308 size_t limit, 322 size_t limit,
309 cxstring *output 323 cxstring *output
310 ) { 324 ) {
311 // TODO: implement 325 /* special case: output limit is zero */
312 return 0; 326 if (limit == 0) return 0;
327
328 /* special case: delimiter is empty */
329 if (delim.length == 0) {
330 output[0] = string;
331 return 1;
332 }
333
334 /* special cases: delimiter is at least as large as the string */
335 if (delim.length >= string.length) {
336 /* exact match */
337 if (cx_strcmp(string, delim) == 0) {
338 output[0] = cx_strn(string.ptr, 0);
339 output[1] = cx_strn(string.ptr + string.length, 0);
340 return 2;
341 } else /* no match possible */ {
342 output[0] = string;
343 return 1;
344 }
345 }
346
347 size_t n = 0;
348 cxstring curpos = string;
349 while (1) {
350 ++n;
351 cxstring match = cx_strstr(curpos, delim);
352 if (match.length > 0) {
353 /* is the limit reached? */
354 if (n < limit) {
355 /* copy the current string to the array */
356 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
357 output[n - 1] = item;
358 size_t processed = item.length + delim.length;
359 curpos.ptr += processed;
360 curpos.length -= processed;
361 } else {
362 /* limit reached, copy the _full_ remaining string */
363 output[n - 1] = curpos;
364 break;
365 }
366 } else {
367 /* no more matches, copy last string */
368 output[n - 1] = curpos;
369 break;
370 }
371 }
372
373 return n;
313 } 374 }
314 375
315 size_t cx_strsplit_a( 376 size_t cx_strsplit_a(
316 CxAllocator *allocator, 377 CxAllocator *allocator,
317 cxstring string, 378 cxstring string,
318 cxstring delim, 379 cxstring delim,
319 size_t limit, 380 size_t limit,
320 cxstring **output 381 cxstring **output
321 ) { 382 ) {
322 // TODO: implement 383 /* find out how many splits we're going to make and allocate memory */
323 return 0; 384 size_t n = 0;
385 cxstring curpos = string;
386 while (1) {
387 ++n;
388 cxstring match = cx_strstr(curpos, delim);
389 if (match.length > 0) {
390 /* is the limit reached? */
391 if (n < limit) {
392 size_t processed = match.ptr - curpos.ptr + delim.length;
393 curpos.ptr += processed;
394 curpos.length -= processed;
395 } else {
396 /* limit reached */
397 break;
398 }
399 } else {
400 /* no more matches */
401 break;
402 }
403 }
404 *output = cxCalloc(allocator, n, sizeof(cxstring));
405 return cx_strsplit(string, delim, n, *output);
324 } 406 }
325 407
326 size_t cx_strsplit_m( 408 size_t cx_strsplit_m(
327 cxmutstr string, 409 cxmutstr string,
328 cxstring delim, 410 cxstring delim,
342 ) { 424 ) {
343 return cx_strsplit_a(allocator, cx_strcast(string), 425 return cx_strsplit_a(allocator, cx_strcast(string),
344 delim, limit, (cxstring **) output); 426 delim, limit, (cxstring **) output);
345 } 427 }
346 428
347 int cx_strcmp(cxstring s1, cxstring s2) { 429 int cx_strcmp(
430 cxstring s1,
431 cxstring s2
432 ) {
348 if (s1.length == s2.length) { 433 if (s1.length == s2.length) {
349 return memcmp(s1.ptr, s2.ptr, s1.length); 434 return memcmp(s1.ptr, s2.ptr, s1.length);
350 } else if (s1.length > s2.length) { 435 } else if (s1.length > s2.length) {
351 return 1; 436 return 1;
352 } else { 437 } else {
353 return -1; 438 return -1;
354 } 439 }
355 } 440 }
356 441
357 int cx_strcasecmp(cxstring s1, cxstring s2) { 442 int cx_strcasecmp(
443 cxstring s1,
444 cxstring s2
445 ) {
358 if (s1.length == s2.length) { 446 if (s1.length == s2.length) {
359 #ifdef _WIN32 447 #ifdef _WIN32
360 return _strnicmp(s1.ptr, s2.ptr, s1.length); 448 return _strnicmp(s1.ptr, s2.ptr, s1.length);
361 #else 449 #else
362 return strncasecmp(s1.ptr, s2.ptr, s1.length); 450 return strncasecmp(s1.ptr, s2.ptr, s1.length);
366 } else { 454 } else {
367 return -1; 455 return -1;
368 } 456 }
369 } 457 }
370 458
371 cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { 459 cxmutstr cx_strdup_a(
460 CxAllocator *allocator,
461 cxstring string
462 ) {
372 cxmutstr result = { 463 cxmutstr result = {
373 cxMalloc(allocator, string.length + 1), 464 cxMalloc(allocator, string.length + 1),
374 string.length 465 string.length
375 }; 466 };
376 if (result.ptr == NULL) { 467 if (result.ptr == NULL) {
398 cxmutstr cx_strtrim_m(cxmutstr string) { 489 cxmutstr cx_strtrim_m(cxmutstr string) {
399 cxstring result = cx_strtrim(cx_strcast(string)); 490 cxstring result = cx_strtrim(cx_strcast(string));
400 return (cxmutstr) {(char *) result.ptr, result.length}; 491 return (cxmutstr) {(char *) result.ptr, result.length};
401 } 492 }
402 493
403 bool cx_strprefix(cxstring string, cxstring prefix) { 494 bool cx_strprefix(
495 cxstring string,
496 cxstring prefix
497 ) {
404 if (string.length < prefix.length) return false; 498 if (string.length < prefix.length) return false;
405 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 499 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
406 } 500 }
407 501
408 bool cx_strsuffix(cxstring string, cxstring suffix) { 502 bool cx_strsuffix(
503 cxstring string,
504 cxstring suffix
505 ) {
409 if (string.length < suffix.length) return false; 506 if (string.length < suffix.length) return false;
410 return memcmp(string.ptr + string.length - suffix.length, 507 return memcmp(string.ptr + string.length - suffix.length,
411 suffix.ptr, suffix.length) == 0; 508 suffix.ptr, suffix.length) == 0;
412 } 509 }
413 510
414 bool cx_casestrprefix(cxstring string, cxstring prefix) { 511 bool cx_strcaseprefix(
512 cxstring string,
513 cxstring prefix
514 ) {
415 if (string.length < prefix.length) return false; 515 if (string.length < prefix.length) return false;
416 #ifdef _WIN32 516 #ifdef _WIN32
417 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; 517 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
418 #else 518 #else
419 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; 519 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
420 #endif 520 #endif
421 } 521 }
422 522
423 bool cx_casestrsuffix(cxstring string, cxstring suffix) { 523 bool cx_strcasesuffix(
524 cxstring string,
525 cxstring suffix
526 ) {
424 if (string.length < suffix.length) return false; 527 if (string.length < suffix.length) return false;
425 #ifdef _WIN32 528 #ifdef _WIN32
426 return _strnicmp(string.ptr+string.length-suffix.length, 529 return _strnicmp(string.ptr+string.length-suffix.length,
427 suffix.ptr, suffix.length) == 0; 530 suffix.ptr, suffix.length) == 0;
428 #else 531 #else
440 void cx_strupper(cxmutstr string) { 543 void cx_strupper(cxmutstr string) {
441 cx_for_n(i, string.length) { 544 cx_for_n(i, string.length) {
442 string.ptr[i] = toupper(string.ptr[i]); 545 string.ptr[i] = toupper(string.ptr[i]);
443 } 546 }
444 } 547 }
548
549 #define REPLACE_INDEX_BUFFER_MAX 100
550
551 struct cx_strreplace_ibuf {
552 size_t *buf;
553 unsigned int len; /* small indices */
554 struct cx_strreplace_ibuf *next;
555 };
556
557 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
558 while (buf) {
559 struct cx_strreplace_ibuf *next = buf->next;
560 free(buf->buf);
561 free(buf);
562 buf = next;
563 }
564 }
565
566 cxmutstr cx_strreplacen_a(
567 CxAllocator *allocator,
568 cxstring str,
569 cxstring pattern,
570 cxstring replacement,
571 size_t replmax
572 ) {
573
574 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
575 return cx_strdup_a(allocator, str);
576
577 /* Compute expected buffer length */
578 size_t ibufmax = str.length / pattern.length;
579 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
580 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
581 ibuflen = REPLACE_INDEX_BUFFER_MAX;
582 }
583
584 /* Allocate first index buffer */
585 struct cx_strreplace_ibuf *firstbuf, *curbuf;
586 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
587 if (!firstbuf) return cx_mutstrn(NULL, 0);
588 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
589 if (!firstbuf->buf) {
590 free(firstbuf);
591 return cx_mutstrn(NULL, 0);
592 }
593
594 /* Search occurrences */
595 cxstring searchstr = str;
596 size_t found = 0;
597 do {
598 cxstring match = cx_strstr(searchstr, pattern);
599 if (match.length > 0) {
600 /* Allocate next buffer in chain, if required */
601 if (curbuf->len == ibuflen) {
602 struct cx_strreplace_ibuf *nextbuf =
603 calloc(1, sizeof(struct cx_strreplace_ibuf));
604 if (!nextbuf) {
605 cx_strrepl_free_ibuf(firstbuf);
606 return cx_mutstrn(NULL, 0);
607 }
608 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
609 if (!nextbuf->buf) {
610 free(nextbuf);
611 cx_strrepl_free_ibuf(firstbuf);
612 return cx_mutstrn(NULL, 0);
613 }
614 curbuf->next = nextbuf;
615 curbuf = nextbuf;
616 }
617
618 /* Record match index */
619 found++;
620 size_t idx = match.ptr - str.ptr;
621 curbuf->buf[curbuf->len++] = idx;
622 searchstr.ptr = match.ptr + pattern.length;
623 searchstr.length = str.length - idx - pattern.length;
624 } else {
625 break;
626 }
627 } while (searchstr.length > 0 && found < replmax);
628
629 /* Allocate result string */
630 cxmutstr result;
631 {
632 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
633 size_t rcount = 0;
634 curbuf = firstbuf;
635 do {
636 rcount += curbuf->len;
637 curbuf = curbuf->next;
638 } while (curbuf);
639 result.length = str.length + rcount * adjlen;
640 result.ptr = cxMalloc(allocator, result.length);
641 if (!result.ptr) {
642 cx_strrepl_free_ibuf(firstbuf);
643 return cx_mutstrn(NULL, 0);
644 }
645 }
646
647 /* Build result string */
648 curbuf = firstbuf;
649 size_t srcidx = 0;
650 char *destptr = result.ptr;
651 do {
652 for (size_t i = 0; i < curbuf->len; i++) {
653 /* Copy source part up to next match*/
654 size_t idx = curbuf->buf[i];
655 size_t srclen = idx - srcidx;
656 if (srclen > 0) {
657 memcpy(destptr, str.ptr + srcidx, srclen);
658 destptr += srclen;
659 srcidx += srclen;
660 }
661
662 /* Copy the replacement and skip the source pattern */
663 srcidx += pattern.length;
664 memcpy(destptr, replacement.ptr, replacement.length);
665 destptr += replacement.length;
666 }
667 curbuf = curbuf->next;
668 } while (curbuf);
669 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
670
671 /* Free index buffer */
672 cx_strrepl_free_ibuf(firstbuf);
673
674 return result;
675 }
676
677

mercurial