src/string.c

changeset 628
1e2be40f0cb5
parent 593
ea9b41b5ebbc
child 643
5700ba9154ab
equal deleted inserted replaced
627:cc8cbabd27cd 628:1e2be40f0cb5
33 #include <stdarg.h> 33 #include <stdarg.h>
34 #include <ctype.h> 34 #include <ctype.h>
35 35
36 #ifndef _WIN32 36 #ifndef _WIN32
37 37
38 #include <strings.h> /* for strncasecmp() */ 38 #include <strings.h> // for strncasecmp()
39 39
40 #endif /* _WIN32 */ 40 #endif // _WIN32
41 41
42 cxmutstr cx_mutstr(char *cstring) { 42 cxmutstr cx_mutstr(char *cstring) {
43 return (cxmutstr) {cstring, strlen(cstring)}; 43 return (cxmutstr) {cstring, strlen(cstring)};
44 } 44 }
45 45
234 ) { 234 ) {
235 if (needle.length == 0) { 235 if (needle.length == 0) {
236 return haystack; 236 return haystack;
237 } 237 }
238 238
239 /* optimize for single-char needles */ 239 // optimize for single-char needles
240 if (needle.length == 1) { 240 if (needle.length == 1) {
241 return cx_strchr(haystack, *needle.ptr); 241 return cx_strchr(haystack, *needle.ptr);
242 } 242 }
243 243
244 /* 244 /*
247 * this is our decision, because we want to use the full range of size_t. 247 * this is our decision, because we want to use the full range of size_t.
248 * The original algorithm needs a (-1) at one single place, 248 * The original algorithm needs a (-1) at one single place,
249 * and we want to avoid that. 249 * and we want to avoid that.
250 */ 250 */
251 251
252 /* local prefix table */ 252 // local prefix table
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN]; 253 size_t s_prefix_table[STRSTR_SBO_BUFLEN];
254 254
255 /* check needle length and use appropriate prefix table */ 255 // check needle length and use appropriate prefix table
256 /* if the pattern exceeds static prefix table, allocate on the heap */ 256 // if the pattern exceeds static prefix table, allocate on the heap
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN; 257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
258 register size_t *ptable = useheap ? calloc(needle.length + 1, 258 register size_t *ptable = useheap ? calloc(needle.length + 1,
259 sizeof(size_t)) : s_prefix_table; 259 sizeof(size_t)) : s_prefix_table;
260 260
261 /* keep counter in registers */ 261 // keep counter in registers
262 register size_t i, j; 262 register size_t i, j;
263 263
264 /* fill prefix table */ 264 // fill prefix table
265 i = 0; 265 i = 0;
266 j = 0; 266 j = 0;
267 ptable[i] = j; 267 ptable[i] = j;
268 while (i < needle.length) { 268 while (i < needle.length) {
269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { 269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
272 i++; 272 i++;
273 j++; 273 j++;
274 ptable[i] = j; 274 ptable[i] = j;
275 } 275 }
276 276
277 /* search */ 277 // search
278 cxstring result = {NULL, 0}; 278 cxstring result = {NULL, 0};
279 i = 0; 279 i = 0;
280 j = 1; 280 j = 1;
281 while (i < haystack.length) { 281 while (i < haystack.length) {
282 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { 282 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
290 result.length = haystack.length - start; 290 result.length = haystack.length - start;
291 break; 291 break;
292 } 292 }
293 } 293 }
294 294
295 /* if prefix table was allocated on the heap, free it */ 295 // if prefix table was allocated on the heap, free it
296 if (ptable != s_prefix_table) { 296 if (ptable != s_prefix_table) {
297 free(ptable); 297 free(ptable);
298 } 298 }
299 299
300 return result; 300 return result;
312 cxstring string, 312 cxstring string,
313 cxstring delim, 313 cxstring delim,
314 size_t limit, 314 size_t limit,
315 cxstring *output 315 cxstring *output
316 ) { 316 ) {
317 /* special case: output limit is zero */ 317 // special case: output limit is zero
318 if (limit == 0) return 0; 318 if (limit == 0) return 0;
319 319
320 /* special case: delimiter is empty */ 320 // special case: delimiter is empty
321 if (delim.length == 0) { 321 if (delim.length == 0) {
322 output[0] = string; 322 output[0] = string;
323 return 1; 323 return 1;
324 } 324 }
325 325
326 /* special cases: delimiter is at least as large as the string */ 326 // special cases: delimiter is at least as large as the string
327 if (delim.length >= string.length) { 327 if (delim.length >= string.length) {
328 /* exact match */ 328 // exact match
329 if (cx_strcmp(string, delim) == 0) { 329 if (cx_strcmp(string, delim) == 0) {
330 output[0] = cx_strn(string.ptr, 0); 330 output[0] = cx_strn(string.ptr, 0);
331 output[1] = cx_strn(string.ptr + string.length, 0); 331 output[1] = cx_strn(string.ptr + string.length, 0);
332 return 2; 332 return 2;
333 } else /* no match possible */ { 333 } else {
334 // no match possible
334 output[0] = string; 335 output[0] = string;
335 return 1; 336 return 1;
336 } 337 }
337 } 338 }
338 339
340 cxstring curpos = string; 341 cxstring curpos = string;
341 while (1) { 342 while (1) {
342 ++n; 343 ++n;
343 cxstring match = cx_strstr(curpos, delim); 344 cxstring match = cx_strstr(curpos, delim);
344 if (match.length > 0) { 345 if (match.length > 0) {
345 /* is the limit reached? */ 346 // is the limit reached?
346 if (n < limit) { 347 if (n < limit) {
347 /* copy the current string to the array */ 348 // copy the current string to the array
348 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); 349 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
349 output[n - 1] = item; 350 output[n - 1] = item;
350 size_t processed = item.length + delim.length; 351 size_t processed = item.length + delim.length;
351 curpos.ptr += processed; 352 curpos.ptr += processed;
352 curpos.length -= processed; 353 curpos.length -= processed;
353 } else { 354 } else {
354 /* limit reached, copy the _full_ remaining string */ 355 // limit reached, copy the _full_ remaining string
355 output[n - 1] = curpos; 356 output[n - 1] = curpos;
356 break; 357 break;
357 } 358 }
358 } else { 359 } else {
359 /* no more matches, copy last string */ 360 // no more matches, copy last string
360 output[n - 1] = curpos; 361 output[n - 1] = curpos;
361 break; 362 break;
362 } 363 }
363 } 364 }
364 365
370 cxstring string, 371 cxstring string,
371 cxstring delim, 372 cxstring delim,
372 size_t limit, 373 size_t limit,
373 cxstring **output 374 cxstring **output
374 ) { 375 ) {
375 /* find out how many splits we're going to make and allocate memory */ 376 // find out how many splits we're going to make and allocate memory
376 size_t n = 0; 377 size_t n = 0;
377 cxstring curpos = string; 378 cxstring curpos = string;
378 while (1) { 379 while (1) {
379 ++n; 380 ++n;
380 cxstring match = cx_strstr(curpos, delim); 381 cxstring match = cx_strstr(curpos, delim);
381 if (match.length > 0) { 382 if (match.length > 0) {
382 /* is the limit reached? */ 383 // is the limit reached?
383 if (n < limit) { 384 if (n < limit) {
384 size_t processed = match.ptr - curpos.ptr + delim.length; 385 size_t processed = match.ptr - curpos.ptr + delim.length;
385 curpos.ptr += processed; 386 curpos.ptr += processed;
386 curpos.length -= processed; 387 curpos.length -= processed;
387 } else { 388 } else {
388 /* limit reached */ 389 // limit reached
389 break; 390 break;
390 } 391 }
391 } else { 392 } else {
392 /* no more matches */ 393 // no more matches
393 break; 394 break;
394 } 395 }
395 } 396 }
396 *output = cxCalloc(allocator, n, sizeof(cxstring)); 397 *output = cxCalloc(allocator, n, sizeof(cxstring));
397 return cx_strsplit(string, delim, n, *output); 398 return cx_strsplit(string, delim, n, *output);
564 ) { 565 ) {
565 566
566 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) 567 if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
567 return cx_strdup_a(allocator, str); 568 return cx_strdup_a(allocator, str);
568 569
569 /* Compute expected buffer length */ 570 // Compute expected buffer length
570 size_t ibufmax = str.length / pattern.length; 571 size_t ibufmax = str.length / pattern.length;
571 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 572 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
572 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 573 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
573 ibuflen = REPLACE_INDEX_BUFFER_MAX; 574 ibuflen = REPLACE_INDEX_BUFFER_MAX;
574 } 575 }
575 576
576 /* Allocate first index buffer */ 577 // Allocate first index buffer
577 struct cx_strreplace_ibuf *firstbuf, *curbuf; 578 struct cx_strreplace_ibuf *firstbuf, *curbuf;
578 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); 579 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
579 if (!firstbuf) return cx_mutstrn(NULL, 0); 580 if (!firstbuf) return cx_mutstrn(NULL, 0);
580 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 581 firstbuf->buf = calloc(ibuflen, sizeof(size_t));
581 if (!firstbuf->buf) { 582 if (!firstbuf->buf) {
582 free(firstbuf); 583 free(firstbuf);
583 return cx_mutstrn(NULL, 0); 584 return cx_mutstrn(NULL, 0);
584 } 585 }
585 586
586 /* Search occurrences */ 587 // Search occurrences
587 cxstring searchstr = str; 588 cxstring searchstr = str;
588 size_t found = 0; 589 size_t found = 0;
589 do { 590 do {
590 cxstring match = cx_strstr(searchstr, pattern); 591 cxstring match = cx_strstr(searchstr, pattern);
591 if (match.length > 0) { 592 if (match.length > 0) {
592 /* Allocate next buffer in chain, if required */ 593 // Allocate next buffer in chain, if required
593 if (curbuf->len == ibuflen) { 594 if (curbuf->len == ibuflen) {
594 struct cx_strreplace_ibuf *nextbuf = 595 struct cx_strreplace_ibuf *nextbuf =
595 calloc(1, sizeof(struct cx_strreplace_ibuf)); 596 calloc(1, sizeof(struct cx_strreplace_ibuf));
596 if (!nextbuf) { 597 if (!nextbuf) {
597 cx_strrepl_free_ibuf(firstbuf); 598 cx_strrepl_free_ibuf(firstbuf);
605 } 606 }
606 curbuf->next = nextbuf; 607 curbuf->next = nextbuf;
607 curbuf = nextbuf; 608 curbuf = nextbuf;
608 } 609 }
609 610
610 /* Record match index */ 611 // Record match index
611 found++; 612 found++;
612 size_t idx = match.ptr - str.ptr; 613 size_t idx = match.ptr - str.ptr;
613 curbuf->buf[curbuf->len++] = idx; 614 curbuf->buf[curbuf->len++] = idx;
614 searchstr.ptr = match.ptr + pattern.length; 615 searchstr.ptr = match.ptr + pattern.length;
615 searchstr.length = str.length - idx - pattern.length; 616 searchstr.length = str.length - idx - pattern.length;
616 } else { 617 } else {
617 break; 618 break;
618 } 619 }
619 } while (searchstr.length > 0 && found < replmax); 620 } while (searchstr.length > 0 && found < replmax);
620 621
621 /* Allocate result string */ 622 // Allocate result string
622 cxmutstr result; 623 cxmutstr result;
623 { 624 {
624 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 625 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
625 size_t rcount = 0; 626 size_t rcount = 0;
626 curbuf = firstbuf; 627 curbuf = firstbuf;
634 cx_strrepl_free_ibuf(firstbuf); 635 cx_strrepl_free_ibuf(firstbuf);
635 return cx_mutstrn(NULL, 0); 636 return cx_mutstrn(NULL, 0);
636 } 637 }
637 } 638 }
638 639
639 /* Build result string */ 640 // Build result string
640 curbuf = firstbuf; 641 curbuf = firstbuf;
641 size_t srcidx = 0; 642 size_t srcidx = 0;
642 char *destptr = result.ptr; 643 char *destptr = result.ptr;
643 do { 644 do {
644 for (size_t i = 0; i < curbuf->len; i++) { 645 for (size_t i = 0; i < curbuf->len; i++) {
645 /* Copy source part up to next match*/ 646 // Copy source part up to next match
646 size_t idx = curbuf->buf[i]; 647 size_t idx = curbuf->buf[i];
647 size_t srclen = idx - srcidx; 648 size_t srclen = idx - srcidx;
648 if (srclen > 0) { 649 if (srclen > 0) {
649 memcpy(destptr, str.ptr + srcidx, srclen); 650 memcpy(destptr, str.ptr + srcidx, srclen);
650 destptr += srclen; 651 destptr += srclen;
651 srcidx += srclen; 652 srcidx += srclen;
652 } 653 }
653 654
654 /* Copy the replacement and skip the source pattern */ 655 // Copy the replacement and skip the source pattern
655 srcidx += pattern.length; 656 srcidx += pattern.length;
656 memcpy(destptr, replacement.ptr, replacement.length); 657 memcpy(destptr, replacement.ptr, replacement.length);
657 destptr += replacement.length; 658 destptr += replacement.length;
658 } 659 }
659 curbuf = curbuf->next; 660 curbuf = curbuf->next;
660 } while (curbuf); 661 } while (curbuf);
661 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); 662 memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
662 663
663 /* Result is guaranteed to be zero-terminated */ 664 // Result is guaranteed to be zero-terminated
664 result.ptr[result.length] = '\0'; 665 result.ptr[result.length] = '\0';
665 666
666 /* Free index buffer */ 667 // Free index buffer
667 cx_strrepl_free_ibuf(firstbuf); 668 cx_strrepl_free_ibuf(firstbuf);
668 669
669 return result; 670 return result;
670 } 671 }
671 672

mercurial