247 * this is our decision, because we want to use the full range of size_t. |
247 * this is our decision, because we want to use the full range of size_t. |
248 * The original algorithm needs a (-1) at one single place, |
248 * The original algorithm needs a (-1) at one single place, |
249 * and we want to avoid that. |
249 * and we want to avoid that. |
250 */ |
250 */ |
251 |
251 |
252 /* local prefix table */ |
252 // local prefix table |
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN]; |
253 size_t s_prefix_table[STRSTR_SBO_BUFLEN]; |
254 |
254 |
255 /* check needle length and use appropriate prefix table */ |
255 // check needle length and use appropriate prefix table |
256 /* if the pattern exceeds static prefix table, allocate on the heap */ |
256 // if the pattern exceeds static prefix table, allocate on the heap |
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN; |
257 bool useheap = needle.length >= STRSTR_SBO_BUFLEN; |
258 register size_t *ptable = useheap ? calloc(needle.length + 1, |
258 register size_t *ptable = useheap ? calloc(needle.length + 1, |
259 sizeof(size_t)) : s_prefix_table; |
259 sizeof(size_t)) : s_prefix_table; |
260 |
260 |
261 /* keep counter in registers */ |
261 // keep counter in registers |
262 register size_t i, j; |
262 register size_t i, j; |
263 |
263 |
264 /* fill prefix table */ |
264 // fill prefix table |
265 i = 0; |
265 i = 0; |
266 j = 0; |
266 j = 0; |
267 ptable[i] = j; |
267 ptable[i] = j; |
268 while (i < needle.length) { |
268 while (i < needle.length) { |
269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
269 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
312 cxstring string, |
312 cxstring string, |
313 cxstring delim, |
313 cxstring delim, |
314 size_t limit, |
314 size_t limit, |
315 cxstring *output |
315 cxstring *output |
316 ) { |
316 ) { |
317 /* special case: output limit is zero */ |
317 // special case: output limit is zero |
318 if (limit == 0) return 0; |
318 if (limit == 0) return 0; |
319 |
319 |
320 /* special case: delimiter is empty */ |
320 // special case: delimiter is empty |
321 if (delim.length == 0) { |
321 if (delim.length == 0) { |
322 output[0] = string; |
322 output[0] = string; |
323 return 1; |
323 return 1; |
324 } |
324 } |
325 |
325 |
326 /* special cases: delimiter is at least as large as the string */ |
326 // special cases: delimiter is at least as large as the string |
327 if (delim.length >= string.length) { |
327 if (delim.length >= string.length) { |
328 /* exact match */ |
328 // exact match |
329 if (cx_strcmp(string, delim) == 0) { |
329 if (cx_strcmp(string, delim) == 0) { |
330 output[0] = cx_strn(string.ptr, 0); |
330 output[0] = cx_strn(string.ptr, 0); |
331 output[1] = cx_strn(string.ptr + string.length, 0); |
331 output[1] = cx_strn(string.ptr + string.length, 0); |
332 return 2; |
332 return 2; |
333 } else /* no match possible */ { |
333 } else { |
|
334 // no match possible |
334 output[0] = string; |
335 output[0] = string; |
335 return 1; |
336 return 1; |
336 } |
337 } |
337 } |
338 } |
338 |
339 |
340 cxstring curpos = string; |
341 cxstring curpos = string; |
341 while (1) { |
342 while (1) { |
342 ++n; |
343 ++n; |
343 cxstring match = cx_strstr(curpos, delim); |
344 cxstring match = cx_strstr(curpos, delim); |
344 if (match.length > 0) { |
345 if (match.length > 0) { |
345 /* is the limit reached? */ |
346 // is the limit reached? |
346 if (n < limit) { |
347 if (n < limit) { |
347 /* copy the current string to the array */ |
348 // copy the current string to the array |
348 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
349 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); |
349 output[n - 1] = item; |
350 output[n - 1] = item; |
350 size_t processed = item.length + delim.length; |
351 size_t processed = item.length + delim.length; |
351 curpos.ptr += processed; |
352 curpos.ptr += processed; |
352 curpos.length -= processed; |
353 curpos.length -= processed; |
353 } else { |
354 } else { |
354 /* limit reached, copy the _full_ remaining string */ |
355 // limit reached, copy the _full_ remaining string |
355 output[n - 1] = curpos; |
356 output[n - 1] = curpos; |
356 break; |
357 break; |
357 } |
358 } |
358 } else { |
359 } else { |
359 /* no more matches, copy last string */ |
360 // no more matches, copy last string |
360 output[n - 1] = curpos; |
361 output[n - 1] = curpos; |
361 break; |
362 break; |
362 } |
363 } |
363 } |
364 } |
364 |
365 |
370 cxstring string, |
371 cxstring string, |
371 cxstring delim, |
372 cxstring delim, |
372 size_t limit, |
373 size_t limit, |
373 cxstring **output |
374 cxstring **output |
374 ) { |
375 ) { |
375 /* find out how many splits we're going to make and allocate memory */ |
376 // find out how many splits we're going to make and allocate memory |
376 size_t n = 0; |
377 size_t n = 0; |
377 cxstring curpos = string; |
378 cxstring curpos = string; |
378 while (1) { |
379 while (1) { |
379 ++n; |
380 ++n; |
380 cxstring match = cx_strstr(curpos, delim); |
381 cxstring match = cx_strstr(curpos, delim); |
381 if (match.length > 0) { |
382 if (match.length > 0) { |
382 /* is the limit reached? */ |
383 // is the limit reached? |
383 if (n < limit) { |
384 if (n < limit) { |
384 size_t processed = match.ptr - curpos.ptr + delim.length; |
385 size_t processed = match.ptr - curpos.ptr + delim.length; |
385 curpos.ptr += processed; |
386 curpos.ptr += processed; |
386 curpos.length -= processed; |
387 curpos.length -= processed; |
387 } else { |
388 } else { |
388 /* limit reached */ |
389 // limit reached |
389 break; |
390 break; |
390 } |
391 } |
391 } else { |
392 } else { |
392 /* no more matches */ |
393 // no more matches |
393 break; |
394 break; |
394 } |
395 } |
395 } |
396 } |
396 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
397 *output = cxCalloc(allocator, n, sizeof(cxstring)); |
397 return cx_strsplit(string, delim, n, *output); |
398 return cx_strsplit(string, delim, n, *output); |
564 ) { |
565 ) { |
565 |
566 |
566 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
567 if (pattern.length == 0 || pattern.length > str.length || replmax == 0) |
567 return cx_strdup_a(allocator, str); |
568 return cx_strdup_a(allocator, str); |
568 |
569 |
569 /* Compute expected buffer length */ |
570 // Compute expected buffer length |
570 size_t ibufmax = str.length / pattern.length; |
571 size_t ibufmax = str.length / pattern.length; |
571 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
572 size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; |
572 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
573 if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { |
573 ibuflen = REPLACE_INDEX_BUFFER_MAX; |
574 ibuflen = REPLACE_INDEX_BUFFER_MAX; |
574 } |
575 } |
575 |
576 |
576 /* Allocate first index buffer */ |
577 // Allocate first index buffer |
577 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
578 struct cx_strreplace_ibuf *firstbuf, *curbuf; |
578 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
579 firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); |
579 if (!firstbuf) return cx_mutstrn(NULL, 0); |
580 if (!firstbuf) return cx_mutstrn(NULL, 0); |
580 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
581 firstbuf->buf = calloc(ibuflen, sizeof(size_t)); |
581 if (!firstbuf->buf) { |
582 if (!firstbuf->buf) { |
582 free(firstbuf); |
583 free(firstbuf); |
583 return cx_mutstrn(NULL, 0); |
584 return cx_mutstrn(NULL, 0); |
584 } |
585 } |
585 |
586 |
586 /* Search occurrences */ |
587 // Search occurrences |
587 cxstring searchstr = str; |
588 cxstring searchstr = str; |
588 size_t found = 0; |
589 size_t found = 0; |
589 do { |
590 do { |
590 cxstring match = cx_strstr(searchstr, pattern); |
591 cxstring match = cx_strstr(searchstr, pattern); |
591 if (match.length > 0) { |
592 if (match.length > 0) { |
592 /* Allocate next buffer in chain, if required */ |
593 // Allocate next buffer in chain, if required |
593 if (curbuf->len == ibuflen) { |
594 if (curbuf->len == ibuflen) { |
594 struct cx_strreplace_ibuf *nextbuf = |
595 struct cx_strreplace_ibuf *nextbuf = |
595 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
596 calloc(1, sizeof(struct cx_strreplace_ibuf)); |
596 if (!nextbuf) { |
597 if (!nextbuf) { |
597 cx_strrepl_free_ibuf(firstbuf); |
598 cx_strrepl_free_ibuf(firstbuf); |
605 } |
606 } |
606 curbuf->next = nextbuf; |
607 curbuf->next = nextbuf; |
607 curbuf = nextbuf; |
608 curbuf = nextbuf; |
608 } |
609 } |
609 |
610 |
610 /* Record match index */ |
611 // Record match index |
611 found++; |
612 found++; |
612 size_t idx = match.ptr - str.ptr; |
613 size_t idx = match.ptr - str.ptr; |
613 curbuf->buf[curbuf->len++] = idx; |
614 curbuf->buf[curbuf->len++] = idx; |
614 searchstr.ptr = match.ptr + pattern.length; |
615 searchstr.ptr = match.ptr + pattern.length; |
615 searchstr.length = str.length - idx - pattern.length; |
616 searchstr.length = str.length - idx - pattern.length; |
616 } else { |
617 } else { |
617 break; |
618 break; |
618 } |
619 } |
619 } while (searchstr.length > 0 && found < replmax); |
620 } while (searchstr.length > 0 && found < replmax); |
620 |
621 |
621 /* Allocate result string */ |
622 // Allocate result string |
622 cxmutstr result; |
623 cxmutstr result; |
623 { |
624 { |
624 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
625 ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; |
625 size_t rcount = 0; |
626 size_t rcount = 0; |
626 curbuf = firstbuf; |
627 curbuf = firstbuf; |
634 cx_strrepl_free_ibuf(firstbuf); |
635 cx_strrepl_free_ibuf(firstbuf); |
635 return cx_mutstrn(NULL, 0); |
636 return cx_mutstrn(NULL, 0); |
636 } |
637 } |
637 } |
638 } |
638 |
639 |
639 /* Build result string */ |
640 // Build result string |
640 curbuf = firstbuf; |
641 curbuf = firstbuf; |
641 size_t srcidx = 0; |
642 size_t srcidx = 0; |
642 char *destptr = result.ptr; |
643 char *destptr = result.ptr; |
643 do { |
644 do { |
644 for (size_t i = 0; i < curbuf->len; i++) { |
645 for (size_t i = 0; i < curbuf->len; i++) { |
645 /* Copy source part up to next match*/ |
646 // Copy source part up to next match |
646 size_t idx = curbuf->buf[i]; |
647 size_t idx = curbuf->buf[i]; |
647 size_t srclen = idx - srcidx; |
648 size_t srclen = idx - srcidx; |
648 if (srclen > 0) { |
649 if (srclen > 0) { |
649 memcpy(destptr, str.ptr + srcidx, srclen); |
650 memcpy(destptr, str.ptr + srcidx, srclen); |
650 destptr += srclen; |
651 destptr += srclen; |
651 srcidx += srclen; |
652 srcidx += srclen; |
652 } |
653 } |
653 |
654 |
654 /* Copy the replacement and skip the source pattern */ |
655 // Copy the replacement and skip the source pattern |
655 srcidx += pattern.length; |
656 srcidx += pattern.length; |
656 memcpy(destptr, replacement.ptr, replacement.length); |
657 memcpy(destptr, replacement.ptr, replacement.length); |
657 destptr += replacement.length; |
658 destptr += replacement.length; |
658 } |
659 } |
659 curbuf = curbuf->next; |
660 curbuf = curbuf->next; |
660 } while (curbuf); |
661 } while (curbuf); |
661 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
662 memcpy(destptr, str.ptr + srcidx, str.length - srcidx); |
662 |
663 |
663 /* Result is guaranteed to be zero-terminated */ |
664 // Result is guaranteed to be zero-terminated |
664 result.ptr[result.length] = '\0'; |
665 result.ptr[result.length] = '\0'; |
665 |
666 |
666 /* Free index buffer */ |
667 // Free index buffer |
667 cx_strrepl_free_ibuf(firstbuf); |
668 cx_strrepl_free_ibuf(firstbuf); |
668 |
669 |
669 return result; |
670 return result; |
670 } |
671 } |
671 |
672 |