adds string replace functions

Sun, 29 Dec 2019 11:29:17 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 29 Dec 2019 11:29:17 +0100
changeset 378
952c2df7e7ac
parent 377
2099a3aff61e
child 379
477404eb380e

adds string replace functions

src/string.c file | annotate | diff | comparison | revisions
src/ucx/string.h file | annotate | diff | comparison | revisions
test/main.c file | annotate | diff | comparison | revisions
test/string_tests.c file | annotate | diff | comparison | revisions
test/string_tests.h file | annotate | diff | comparison | revisions
     1.1 --- a/src/string.c	Fri Dec 27 11:48:55 2019 +0100
     1.2 +++ b/src/string.c	Sun Dec 29 11:29:17 2019 +0100
     1.3 @@ -662,6 +662,130 @@
     1.4      return ret;
     1.5  }
     1.6  
     1.7 +#define REPLACE_INDEX_BUFFER_MAX 100
     1.8 +
     1.9 +struct scstrreplace_ibuf {
    1.10 +    size_t* buf;
    1.11 +    unsigned int len; /* small indices */
    1.12 +    struct scstrreplace_ibuf* next;
    1.13 +};
    1.14 +
    1.15 +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
    1.16 +    while (buf) {
    1.17 +        struct scstrreplace_ibuf *next = buf->next;
    1.18 +        free(buf->buf);
    1.19 +        free(buf);
    1.20 +        buf = next;
    1.21 +    }
    1.22 +}
    1.23 +
    1.24 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
    1.25 +                     scstr_t pattern, scstr_t replacement, size_t replmax) {
    1.26 +
    1.27 +    if (pattern.length == 0 || pattern.length > str.length)
    1.28 +        return sstrdup(str);
    1.29 +
    1.30 +    /* Compute expected buffer length */
    1.31 +    size_t ibufmax = str.length / pattern.length;
    1.32 +    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
    1.33 +    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
    1.34 +        ibuflen = REPLACE_INDEX_BUFFER_MAX;
    1.35 +    }
    1.36 +
    1.37 +    /* Allocate first index buffer */
    1.38 +    struct scstrreplace_ibuf *firstbuf, *curbuf;
    1.39 +    firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
    1.40 +    if (!firstbuf) return sstrn(NULL, 0);
    1.41 +    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
    1.42 +    if (!firstbuf->buf) {
    1.43 +        free(firstbuf);
    1.44 +        return sstrn(NULL, 0);
    1.45 +    }
    1.46 +
    1.47 +    /* Search occurrences */
    1.48 +    scstr_t searchstr = str;
    1.49 +    size_t found = 0;
    1.50 +    do {
    1.51 +        scstr_t match = scstrscstr(searchstr, pattern);
    1.52 +        if (match.length > 0) {
    1.53 +            /* Allocate next buffer in chain, if required */
    1.54 +            if (curbuf->len == ibuflen) {
    1.55 +                struct scstrreplace_ibuf *nextbuf =
    1.56 +                        calloc(1, sizeof(struct scstrreplace_ibuf));
    1.57 +                if (!nextbuf) return sstrn(NULL, 0);
    1.58 +                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
    1.59 +                if (!nextbuf->buf) {
    1.60 +                    free(nextbuf);
    1.61 +                    scstrrepl_free_ibuf(firstbuf);
    1.62 +                    return sstrn(NULL, 0);
    1.63 +                }
    1.64 +                curbuf->next = nextbuf;
    1.65 +                curbuf = nextbuf;
    1.66 +            }
    1.67 +
    1.68 +            /* Record match index */
    1.69 +            found++;
    1.70 +            size_t idx = match.ptr - str.ptr;
    1.71 +            curbuf->buf[curbuf->len++] = idx;
    1.72 +            searchstr.ptr = match.ptr + pattern.length;
    1.73 +            searchstr.length = str.length - idx - pattern.length;
    1.74 +        } else {
    1.75 +            break;
    1.76 +        }
    1.77 +    } while (searchstr.length > 0 && found < replmax);
    1.78 +
    1.79 +    /* Allocate result string */
    1.80 +    sstr_t result;
    1.81 +    {
    1.82 +        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
    1.83 +        size_t rcount = 0;
    1.84 +        curbuf = firstbuf;
    1.85 +        do {
    1.86 +            rcount += curbuf->len;
    1.87 +            curbuf = curbuf->next;
    1.88 +        } while (curbuf);
    1.89 +        result.length = str.length + rcount * adjlen;
    1.90 +        result.ptr = almalloc(allocator, result.length);
    1.91 +        if (!result.ptr) {
    1.92 +            scstrrepl_free_ibuf(firstbuf);
    1.93 +            return sstrn(NULL, 0);
    1.94 +        }
    1.95 +    }
    1.96 +
    1.97 +    /* Build result string */
    1.98 +    curbuf = firstbuf;
    1.99 +    size_t srcidx = 0;
   1.100 +    char* destptr = result.ptr;
   1.101 +    do {
   1.102 +        for (size_t i = 0; i < curbuf->len; i++) {
   1.103 +            /* Copy source part up to next match*/
   1.104 +            size_t idx = curbuf->buf[i];
   1.105 +            size_t srclen = idx - srcidx;
   1.106 +            if (srclen > 0) {
   1.107 +                memcpy(destptr, str.ptr+srcidx, srclen);
   1.108 +                destptr += srclen;
   1.109 +                srcidx += srclen;
   1.110 +            }
   1.111 +
   1.112 +            /* Copy the replacement and skip the source pattern */
   1.113 +            srcidx += pattern.length;
   1.114 +            memcpy(destptr, replacement.ptr, replacement.length);
   1.115 +            destptr += replacement.length;
   1.116 +        }
   1.117 +        curbuf = curbuf->next;
   1.118 +    } while (curbuf);
   1.119 +    memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
   1.120 +
   1.121 +    return result;
   1.122 +}
   1.123 +
   1.124 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
   1.125 +        scstr_t replacement, size_t replmax) {
   1.126 +    return scstrreplacen_a(ucx_default_allocator(),
   1.127 +            str, pattern, replacement, replmax);
   1.128 +}
   1.129 +
   1.130 +
   1.131  // type adjustment functions
   1.132  scstr_t ucx_sc2sc(scstr_t str) {
   1.133      return str;
     2.1 --- a/src/ucx/string.h	Fri Dec 27 11:48:55 2019 +0100
     2.2 +++ b/src/ucx/string.h	Sun Dec 29 11:29:17 2019 +0100
     2.3 @@ -1072,6 +1072,128 @@
     2.4   */
     2.5  #define sstrupper_a(allocator, string) scstrupper_a(allocator, string)
     2.6  
     2.7 +
     2.8 +/**
     2.9 + * Replaces a pattern in a string with another string.
    2.10 + *
    2.11 + * The pattern is taken literally and is no regular expression.
    2.12 + * Replaces at most <code>replmax</code> occurrences.
    2.13 + *
    2.14 + * The resulting string is allocated by the specified allocator. I.e. it
    2.15 + * depends on the used allocator, whether the sstr_t.ptr must be freed
    2.16 + * manually.
    2.17 + *
    2.18 + * If allocation fails, the sstr_t.ptr of the return value is NULL.
    2.19 + *
    2.20 + * @param allocator the allocator to use
    2.21 + * @param str the string where replacements should be applied
    2.22 + * @param pattern the pattern to search for
    2.23 + * @param replacement the replacement string
    2.24 + * @param replmax maximum number of replacements
    2.25 + * @return the resulting string after applying the replacements
    2.26 + */
    2.27 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
    2.28 +        scstr_t pattern, scstr_t replacement, size_t replmax);
    2.29 +
    2.30 +/**
    2.31 + * Replaces a pattern in a string with another string.
    2.32 + *
    2.33 + * The pattern is taken literally and is no regular expression.
    2.34 + * Replaces at most <code>replmax</code> occurrences.
    2.35 + *
    2.36 + * The sstr_t.ptr of the resulting string must be freed manually.
    2.37 + *
    2.38 + * If allocation fails, the sstr_t.ptr of the return value is NULL.
    2.39 + *
    2.40 + * @param str the string where replacements should be applied
    2.41 + * @param pattern the pattern to search for
    2.42 + * @param replacement the replacement string
    2.43 + * @param replmax maximum number of replacements
    2.44 + * @return the resulting string after applying the replacements
    2.45 + */
    2.46 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
    2.47 +        scstr_t replacement, size_t replmax);
    2.48 +
    2.49 +/**
    2.50 + * Replaces a pattern in a string with another string.
    2.51 + *
    2.52 + * The pattern is taken literally and is no regular expression.
    2.53 + * Replaces at most <code>replmax</code> occurrences.
    2.54 + *
    2.55 + * The resulting string is allocated by the specified allocator. I.e. it
    2.56 + * depends on the used allocator, whether the sstr_t.ptr must be freed
    2.57 + * manually.
    2.58 + *
    2.59 + * @param allocator the allocator to use
    2.60 + * @param str the string where replacements should be applied
    2.61 + * @param pattern the pattern to search for
    2.62 + * @param replacement the replacement string
    2.63 + * @param replmax maximum number of replacements
    2.64 + * @return the resulting string after applying the replacements
    2.65 + */
    2.66 +#define sstrreplacen_a(allocator, str, pattern, replacement, replmax) \
    2.67 +        scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \
    2.68 +            SCSTR(replacement), replmax)
    2.69 +
    2.70 +/**
    2.71 + * Replaces a pattern in a string with another string.
    2.72 + *
    2.73 + * The pattern is taken literally and is no regular expression.
    2.74 + * Replaces at most <code>replmax</code> occurrences.
    2.75 + *
    2.76 + * The sstr_t.ptr of the resulting string must be freed manually.
    2.77 + *
    2.78 + * If allocation fails, the sstr_t.ptr of the return value is NULL.
    2.79 + *
    2.80 + * @param str the string where replacements should be applied
    2.81 + * @param pattern the pattern to search for
    2.82 + * @param replacement the replacement string
    2.83 + * @param replmax maximum number of replacements
    2.84 + * @return the resulting string after applying the replacements
    2.85 + */
    2.86 +#define sstrreplacen(str, pattern, replacement, replmax) \
    2.87 +        scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), replmax)
    2.88 +
    2.89 +/**
    2.90 + * Replaces a pattern in a string with another string.
    2.91 + *
    2.92 + * The pattern is taken literally and is no regular expression.
    2.93 + * Replaces at most <code>replmax</code> occurrences.
    2.94 + *
    2.95 + * The resulting string is allocated by the specified allocator. I.e. it
    2.96 + * depends on the used allocator, whether the sstr_t.ptr must be freed
    2.97 + * manually.
    2.98 + *
    2.99 + * If allocation fails, the sstr_t.ptr of the return value is NULL.
   2.100 + *
   2.101 + * @param allocator the allocator to use
   2.102 + * @param str the string where replacements should be applied
   2.103 + * @param pattern the pattern to search for
   2.104 + * @param replacement the replacement string
   2.105 + * @return the resulting string after applying the replacements
   2.106 + */
   2.107 +#define sstrreplace_a(allocator, str, pattern, replacement) \
   2.108 +        scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \
   2.109 +            SCSTR(replacement), SIZE_MAX)
   2.110 +
   2.111 +/**
   2.112 + * Replaces a pattern in a string with another string.
   2.113 + *
   2.114 + * The pattern is taken literally and is no regular expression.
   2.115 + * Replaces at most <code>replmax</code> occurrences.
   2.116 + *
   2.117 + * The sstr_t.ptr of the resulting string must be freed manually.
   2.118 + *
   2.119 + * If allocation fails, the sstr_t.ptr of the return value is NULL.
   2.120 + *
   2.121 + * @param str the string where replacements should be applied
   2.122 + * @param pattern the pattern to search for
   2.123 + * @param replacement the replacement string
   2.124 + * @return the resulting string after applying the replacements
   2.125 + */
   2.126 +#define sstrreplace(str, pattern, replacement) \
   2.127 +        scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), SIZE_MAX)
   2.128 +
   2.129  #ifdef	__cplusplus
   2.130  }
   2.131  #endif
     3.1 --- a/test/main.c	Fri Dec 27 11:48:55 2019 +0100
     3.2 +++ b/test/main.c	Sun Dec 29 11:29:17 2019 +0100
     3.3 @@ -138,6 +138,7 @@
     3.4          ucx_test_register(suite, test_sstrtrim);
     3.5          ucx_test_register(suite, test_sstrprefixsuffix);
     3.6          ucx_test_register(suite, test_sstrcaseprefixsuffix);
     3.7 +        ucx_test_register(suite, test_sstrreplace);
     3.8          
     3.9          /* UcxLogger Tests */
    3.10          ucx_test_register(suite, test_ucx_logger_new);
     4.1 --- a/test/string_tests.c	Fri Dec 27 11:48:55 2019 +0100
     4.2 +++ b/test/string_tests.c	Sun Dec 29 11:29:17 2019 +0100
     4.3 @@ -476,3 +476,41 @@
     4.4      
     4.5      UCX_TEST_END
     4.6  }
     4.7 +
     4.8 +UCX_TEST(test_sstrreplace) {
     4.9 +
    4.10 +    sstr_t str = ST("test ababab string aba");
    4.11 +    sstr_t longstr = ST("xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd");
    4.12 +    sstr_t notrail = ST("test abab");
    4.13 +
    4.14 +    sstr_t repl = sstrreplace(str, SC("abab"), SC("muchlonger"));
    4.15 +    sstr_t expected = ST("test muchlongerab string aba");
    4.16 +
    4.17 +    sstr_t repln = sstrreplacen(str, SC("ab"), SC("c"), 2);
    4.18 +    sstr_t expectedn = ST("test ccab string aba");
    4.19 +
    4.20 +    sstr_t longrepl = sstrreplace(longstr, SC("a"), SC("z"));
    4.21 +    sstr_t longexpect = ST("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd");
    4.22 +
    4.23 +    sstr_t replnotrail = sstrreplace(notrail, SC("ab"), SC("z"));
    4.24 +    sstr_t notrailexpect = ST("test zz");
    4.25 +
    4.26 +    UCX_TEST_BEGIN
    4.27 +
    4.28 +    UCX_TEST_ASSERT(repl.ptr != str.ptr, "result string is not fresh");
    4.29 +    UCX_TEST_ASSERT(!sstrcmp(repl, expected), "incorrect replacement");
    4.30 +
    4.31 +    UCX_TEST_ASSERT(repln.ptr != str.ptr, "result string is not fresh");
    4.32 +    UCX_TEST_ASSERT(!sstrcmp(repln, expectedn), "incorrect replacement");
    4.33 +
    4.34 +    UCX_TEST_ASSERT(!sstrcmp(longrepl, longexpect),
    4.35 +            "incorrect handling of long strings");
    4.36 +
    4.37 +    UCX_TEST_ASSERT(!sstrcmp(replnotrail, notrailexpect),
    4.38 +            "no trail replacement fails");
    4.39 +
    4.40 +    UCX_TEST_END
    4.41 +
    4.42 +    free(repl.ptr);
    4.43 +    free(repln.ptr);
    4.44 +}
    4.45 \ No newline at end of file
     5.1 --- a/test/string_tests.h	Fri Dec 27 11:48:55 2019 +0100
     5.2 +++ b/test/string_tests.h	Sun Dec 29 11:29:17 2019 +0100
     5.3 @@ -48,6 +48,7 @@
     5.4  UCX_TEST(test_sstrtrim);
     5.5  UCX_TEST(test_sstrprefixsuffix);
     5.6  UCX_TEST(test_sstrcaseprefixsuffix);
     5.7 +UCX_TEST(test_sstrreplace);
     5.8  
     5.9  #ifdef	__cplusplus
    5.10  }

mercurial