1.1 --- a/src/string.c Fri Dec 27 11:48:55 2019 +0100 1.2 +++ b/src/string.c Sun Dec 29 11:29:17 2019 +0100 1.3 @@ -662,6 +662,130 @@ 1.4 return ret; 1.5 } 1.6 1.7 +#define REPLACE_INDEX_BUFFER_MAX 100 1.8 + 1.9 +struct scstrreplace_ibuf { 1.10 + size_t* buf; 1.11 + unsigned int len; /* small indices */ 1.12 + struct scstrreplace_ibuf* next; 1.13 +}; 1.14 + 1.15 +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { 1.16 + while (buf) { 1.17 + struct scstrreplace_ibuf *next = buf->next; 1.18 + free(buf->buf); 1.19 + free(buf); 1.20 + buf = next; 1.21 + } 1.22 +} 1.23 + 1.24 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, 1.25 + scstr_t pattern, scstr_t replacement, size_t replmax) { 1.26 + 1.27 + if (pattern.length == 0 || pattern.length > str.length) 1.28 + return sstrdup(str); 1.29 + 1.30 + /* Compute expected buffer length */ 1.31 + size_t ibufmax = str.length / pattern.length; 1.32 + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 1.33 + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 1.34 + ibuflen = REPLACE_INDEX_BUFFER_MAX; 1.35 + } 1.36 + 1.37 + /* Allocate first index buffer */ 1.38 + struct scstrreplace_ibuf *firstbuf, *curbuf; 1.39 + firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); 1.40 + if (!firstbuf) return sstrn(NULL, 0); 1.41 + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.42 + if (!firstbuf->buf) { 1.43 + free(firstbuf); 1.44 + return sstrn(NULL, 0); 1.45 + } 1.46 + 1.47 + /* Search occurrences */ 1.48 + scstr_t searchstr = str; 1.49 + size_t found = 0; 1.50 + do { 1.51 + scstr_t match = scstrscstr(searchstr, pattern); 1.52 + if (match.length > 0) { 1.53 + /* Allocate next buffer in chain, if required */ 1.54 + if (curbuf->len == ibuflen) { 1.55 + struct scstrreplace_ibuf *nextbuf = 1.56 + calloc(1, sizeof(struct scstrreplace_ibuf)); 1.57 + if (!nextbuf) return sstrn(NULL, 0); 1.58 + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.59 + if (!nextbuf->buf) { 1.60 + free(nextbuf); 1.61 + scstrrepl_free_ibuf(firstbuf); 1.62 + return sstrn(NULL, 0); 1.63 + } 1.64 + curbuf->next = nextbuf; 1.65 + curbuf = nextbuf; 1.66 + } 1.67 + 1.68 + /* Record match index */ 1.69 + found++; 1.70 + size_t idx = match.ptr - str.ptr; 1.71 + curbuf->buf[curbuf->len++] = idx; 1.72 + searchstr.ptr = match.ptr + pattern.length; 1.73 + searchstr.length = str.length - idx - pattern.length; 1.74 + } else { 1.75 + break; 1.76 + } 1.77 + } while (searchstr.length > 0 && found < replmax); 1.78 + 1.79 + /* Allocate result string */ 1.80 + sstr_t result; 1.81 + { 1.82 + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 1.83 + size_t rcount = 0; 1.84 + curbuf = firstbuf; 1.85 + do { 1.86 + rcount += curbuf->len; 1.87 + curbuf = curbuf->next; 1.88 + } while (curbuf); 1.89 + result.length = str.length + rcount * adjlen; 1.90 + result.ptr = almalloc(allocator, result.length); 1.91 + if (!result.ptr) { 1.92 + scstrrepl_free_ibuf(firstbuf); 1.93 + return sstrn(NULL, 0); 1.94 + } 1.95 + } 1.96 + 1.97 + /* Build result string */ 1.98 + curbuf = firstbuf; 1.99 + size_t srcidx = 0; 1.100 + char* destptr = result.ptr; 1.101 + do { 1.102 + for (size_t i = 0; i < curbuf->len; i++) { 1.103 + /* Copy source part up to next match*/ 1.104 + size_t idx = curbuf->buf[i]; 1.105 + size_t srclen = idx - srcidx; 1.106 + if (srclen > 0) { 1.107 + memcpy(destptr, str.ptr+srcidx, srclen); 1.108 + destptr += srclen; 1.109 + srcidx += srclen; 1.110 + } 1.111 + 1.112 + /* Copy the replacement and skip the source pattern */ 1.113 + srcidx += pattern.length; 1.114 + memcpy(destptr, replacement.ptr, replacement.length); 1.115 + destptr += replacement.length; 1.116 + } 1.117 + curbuf = curbuf->next; 1.118 + } while (curbuf); 1.119 + memcpy(destptr, str.ptr+srcidx, str.length-srcidx); 1.120 + 1.121 + return result; 1.122 +} 1.123 + 1.124 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, 1.125 + scstr_t replacement, size_t replmax) { 1.126 + return scstrreplacen_a(ucx_default_allocator(), 1.127 + str, pattern, replacement, replmax); 1.128 +} 1.129 + 1.130 + 1.131 // type adjustment functions 1.132 scstr_t ucx_sc2sc(scstr_t str) { 1.133 return str;