src/string.c

changeset 378
952c2df7e7ac
parent 364
5577d6c27a33
child 379
477404eb380e
     1.1 --- a/src/string.c	Fri Dec 27 11:48:55 2019 +0100
     1.2 +++ b/src/string.c	Sun Dec 29 11:29:17 2019 +0100
     1.3 @@ -662,6 +662,130 @@
     1.4      return ret;
     1.5  }
     1.6  
     1.7 +#define REPLACE_INDEX_BUFFER_MAX 100
     1.8 +
     1.9 +struct scstrreplace_ibuf {
    1.10 +    size_t* buf;
    1.11 +    unsigned int len; /* small indices */
    1.12 +    struct scstrreplace_ibuf* next;
    1.13 +};
    1.14 +
    1.15 +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
    1.16 +    while (buf) {
    1.17 +        struct scstrreplace_ibuf *next = buf->next;
    1.18 +        free(buf->buf);
    1.19 +        free(buf);
    1.20 +        buf = next;
    1.21 +    }
    1.22 +}
    1.23 +
    1.24 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
    1.25 +                     scstr_t pattern, scstr_t replacement, size_t replmax) {
    1.26 +
    1.27 +    if (pattern.length == 0 || pattern.length > str.length)
    1.28 +        return sstrdup(str);
    1.29 +
    1.30 +    /* Compute expected buffer length */
    1.31 +    size_t ibufmax = str.length / pattern.length;
    1.32 +    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
    1.33 +    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
    1.34 +        ibuflen = REPLACE_INDEX_BUFFER_MAX;
    1.35 +    }
    1.36 +
    1.37 +    /* Allocate first index buffer */
    1.38 +    struct scstrreplace_ibuf *firstbuf, *curbuf;
    1.39 +    firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
    1.40 +    if (!firstbuf) return sstrn(NULL, 0);
    1.41 +    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
    1.42 +    if (!firstbuf->buf) {
    1.43 +        free(firstbuf);
    1.44 +        return sstrn(NULL, 0);
    1.45 +    }
    1.46 +
    1.47 +    /* Search occurrences */
    1.48 +    scstr_t searchstr = str;
    1.49 +    size_t found = 0;
    1.50 +    do {
    1.51 +        scstr_t match = scstrscstr(searchstr, pattern);
    1.52 +        if (match.length > 0) {
    1.53 +            /* Allocate next buffer in chain, if required */
    1.54 +            if (curbuf->len == ibuflen) {
    1.55 +                struct scstrreplace_ibuf *nextbuf =
    1.56 +                        calloc(1, sizeof(struct scstrreplace_ibuf));
    1.57 +                if (!nextbuf) return sstrn(NULL, 0);
    1.58 +                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
    1.59 +                if (!nextbuf->buf) {
    1.60 +                    free(nextbuf);
    1.61 +                    scstrrepl_free_ibuf(firstbuf);
    1.62 +                    return sstrn(NULL, 0);
    1.63 +                }
    1.64 +                curbuf->next = nextbuf;
    1.65 +                curbuf = nextbuf;
    1.66 +            }
    1.67 +
    1.68 +            /* Record match index */
    1.69 +            found++;
    1.70 +            size_t idx = match.ptr - str.ptr;
    1.71 +            curbuf->buf[curbuf->len++] = idx;
    1.72 +            searchstr.ptr = match.ptr + pattern.length;
    1.73 +            searchstr.length = str.length - idx - pattern.length;
    1.74 +        } else {
    1.75 +            break;
    1.76 +        }
    1.77 +    } while (searchstr.length > 0 && found < replmax);
    1.78 +
    1.79 +    /* Allocate result string */
    1.80 +    sstr_t result;
    1.81 +    {
    1.82 +        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
    1.83 +        size_t rcount = 0;
    1.84 +        curbuf = firstbuf;
    1.85 +        do {
    1.86 +            rcount += curbuf->len;
    1.87 +            curbuf = curbuf->next;
    1.88 +        } while (curbuf);
    1.89 +        result.length = str.length + rcount * adjlen;
    1.90 +        result.ptr = almalloc(allocator, result.length);
    1.91 +        if (!result.ptr) {
    1.92 +            scstrrepl_free_ibuf(firstbuf);
    1.93 +            return sstrn(NULL, 0);
    1.94 +        }
    1.95 +    }
    1.96 +
    1.97 +    /* Build result string */
    1.98 +    curbuf = firstbuf;
    1.99 +    size_t srcidx = 0;
   1.100 +    char* destptr = result.ptr;
   1.101 +    do {
   1.102 +        for (size_t i = 0; i < curbuf->len; i++) {
   1.103 +            /* Copy source part up to next match*/
   1.104 +            size_t idx = curbuf->buf[i];
   1.105 +            size_t srclen = idx - srcidx;
   1.106 +            if (srclen > 0) {
   1.107 +                memcpy(destptr, str.ptr+srcidx, srclen);
   1.108 +                destptr += srclen;
   1.109 +                srcidx += srclen;
   1.110 +            }
   1.111 +
   1.112 +            /* Copy the replacement and skip the source pattern */
   1.113 +            srcidx += pattern.length;
   1.114 +            memcpy(destptr, replacement.ptr, replacement.length);
   1.115 +            destptr += replacement.length;
   1.116 +        }
   1.117 +        curbuf = curbuf->next;
   1.118 +    } while (curbuf);
   1.119 +    memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
   1.120 +
   1.121 +    return result;
   1.122 +}
   1.123 +
   1.124 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
   1.125 +        scstr_t replacement, size_t replmax) {
   1.126 +    return scstrreplacen_a(ucx_default_allocator(),
   1.127 +            str, pattern, replacement, replmax);
   1.128 +}
   1.129 +
   1.130 +
   1.131  // type adjustment functions
   1.132  scstr_t ucx_sc2sc(scstr_t str) {
   1.133      return str;

mercurial