adds string replace functions

Sun, 29 Dec 2019 11:29:17 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 29 Dec 2019 11:29:17 +0100
changeset 378
952c2df7e7ac
parent 377
2099a3aff61e
child 379
477404eb380e

adds string replace functions

src/string.c file | annotate | diff | comparison | revisions
src/ucx/string.h file | annotate | diff | comparison | revisions
test/main.c file | annotate | diff | comparison | revisions
test/string_tests.c file | annotate | diff | comparison | revisions
test/string_tests.h file | annotate | diff | comparison | revisions
--- a/src/string.c	Fri Dec 27 11:48:55 2019 +0100
+++ b/src/string.c	Sun Dec 29 11:29:17 2019 +0100
@@ -662,6 +662,130 @@
     return ret;
 }
 
+#define REPLACE_INDEX_BUFFER_MAX 100
+
+struct scstrreplace_ibuf {
+    size_t* buf;
+    unsigned int len; /* small indices */
+    struct scstrreplace_ibuf* next;
+};
+
+static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) {
+    while (buf) {
+        struct scstrreplace_ibuf *next = buf->next;
+        free(buf->buf);
+        free(buf);
+        buf = next;
+    }
+}
+
+sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
+                     scstr_t pattern, scstr_t replacement, size_t replmax) {
+
+    if (pattern.length == 0 || pattern.length > str.length)
+        return sstrdup(str);
+
+    /* Compute expected buffer length */
+    size_t ibufmax = str.length / pattern.length;
+    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
+    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
+        ibuflen = REPLACE_INDEX_BUFFER_MAX;
+    }
+
+    /* Allocate first index buffer */
+    struct scstrreplace_ibuf *firstbuf, *curbuf;
+    firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf));
+    if (!firstbuf) return sstrn(NULL, 0);
+    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
+    if (!firstbuf->buf) {
+        free(firstbuf);
+        return sstrn(NULL, 0);
+    }
+
+    /* Search occurrences */
+    scstr_t searchstr = str;
+    size_t found = 0;
+    do {
+        scstr_t match = scstrscstr(searchstr, pattern);
+        if (match.length > 0) {
+            /* Allocate next buffer in chain, if required */
+            if (curbuf->len == ibuflen) {
+                struct scstrreplace_ibuf *nextbuf =
+                        calloc(1, sizeof(struct scstrreplace_ibuf));
+                if (!nextbuf) return sstrn(NULL, 0);
+                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
+                if (!nextbuf->buf) {
+                    free(nextbuf);
+                    scstrrepl_free_ibuf(firstbuf);
+                    return sstrn(NULL, 0);
+                }
+                curbuf->next = nextbuf;
+                curbuf = nextbuf;
+            }
+
+            /* Record match index */
+            found++;
+            size_t idx = match.ptr - str.ptr;
+            curbuf->buf[curbuf->len++] = idx;
+            searchstr.ptr = match.ptr + pattern.length;
+            searchstr.length = str.length - idx - pattern.length;
+        } else {
+            break;
+        }
+    } while (searchstr.length > 0 && found < replmax);
+
+    /* Allocate result string */
+    sstr_t result;
+    {
+        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
+        size_t rcount = 0;
+        curbuf = firstbuf;
+        do {
+            rcount += curbuf->len;
+            curbuf = curbuf->next;
+        } while (curbuf);
+        result.length = str.length + rcount * adjlen;
+        result.ptr = almalloc(allocator, result.length);
+        if (!result.ptr) {
+            scstrrepl_free_ibuf(firstbuf);
+            return sstrn(NULL, 0);
+        }
+    }
+
+    /* Build result string */
+    curbuf = firstbuf;
+    size_t srcidx = 0;
+    char* destptr = result.ptr;
+    do {
+        for (size_t i = 0; i < curbuf->len; i++) {
+            /* Copy source part up to next match*/
+            size_t idx = curbuf->buf[i];
+            size_t srclen = idx - srcidx;
+            if (srclen > 0) {
+                memcpy(destptr, str.ptr+srcidx, srclen);
+                destptr += srclen;
+                srcidx += srclen;
+            }
+
+            /* Copy the replacement and skip the source pattern */
+            srcidx += pattern.length;
+            memcpy(destptr, replacement.ptr, replacement.length);
+            destptr += replacement.length;
+        }
+        curbuf = curbuf->next;
+    } while (curbuf);
+    memcpy(destptr, str.ptr+srcidx, str.length-srcidx);
+
+    return result;
+}
+
+sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
+        scstr_t replacement, size_t replmax) {
+    return scstrreplacen_a(ucx_default_allocator(),
+            str, pattern, replacement, replmax);
+}
+
+
 // type adjustment functions
 scstr_t ucx_sc2sc(scstr_t str) {
     return str;
--- a/src/ucx/string.h	Fri Dec 27 11:48:55 2019 +0100
+++ b/src/ucx/string.h	Sun Dec 29 11:29:17 2019 +0100
@@ -1072,6 +1072,128 @@
  */
 #define sstrupper_a(allocator, string) scstrupper_a(allocator, string)
 
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The resulting string is allocated by the specified allocator. I.e. it
+ * depends on the used allocator, whether the sstr_t.ptr must be freed
+ * manually.
+ *
+ * If allocation fails, the sstr_t.ptr of the return value is NULL.
+ *
+ * @param allocator the allocator to use
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @param replmax maximum number of replacements
+ * @return the resulting string after applying the replacements
+ */
+sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str,
+        scstr_t pattern, scstr_t replacement, size_t replmax);
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The sstr_t.ptr of the resulting string must be freed manually.
+ *
+ * If allocation fails, the sstr_t.ptr of the return value is NULL.
+ *
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @param replmax maximum number of replacements
+ * @return the resulting string after applying the replacements
+ */
+sstr_t scstrreplacen(scstr_t str, scstr_t pattern,
+        scstr_t replacement, size_t replmax);
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The resulting string is allocated by the specified allocator. I.e. it
+ * depends on the used allocator, whether the sstr_t.ptr must be freed
+ * manually.
+ *
+ * @param allocator the allocator to use
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @param replmax maximum number of replacements
+ * @return the resulting string after applying the replacements
+ */
+#define sstrreplacen_a(allocator, str, pattern, replacement, replmax) \
+        scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \
+            SCSTR(replacement), replmax)
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The sstr_t.ptr of the resulting string must be freed manually.
+ *
+ * If allocation fails, the sstr_t.ptr of the return value is NULL.
+ *
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @param replmax maximum number of replacements
+ * @return the resulting string after applying the replacements
+ */
+#define sstrreplacen(str, pattern, replacement, replmax) \
+        scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), replmax)
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The resulting string is allocated by the specified allocator. I.e. it
+ * depends on the used allocator, whether the sstr_t.ptr must be freed
+ * manually.
+ *
+ * If allocation fails, the sstr_t.ptr of the return value is NULL.
+ *
+ * @param allocator the allocator to use
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @return the resulting string after applying the replacements
+ */
+#define sstrreplace_a(allocator, str, pattern, replacement) \
+        scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \
+            SCSTR(replacement), SIZE_MAX)
+
+/**
+ * Replaces a pattern in a string with another string.
+ *
+ * The pattern is taken literally and is no regular expression.
+ * Replaces at most <code>replmax</code> occurrences.
+ *
+ * The sstr_t.ptr of the resulting string must be freed manually.
+ *
+ * If allocation fails, the sstr_t.ptr of the return value is NULL.
+ *
+ * @param str the string where replacements should be applied
+ * @param pattern the pattern to search for
+ * @param replacement the replacement string
+ * @return the resulting string after applying the replacements
+ */
+#define sstrreplace(str, pattern, replacement) \
+        scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), SIZE_MAX)
+
 #ifdef	__cplusplus
 }
 #endif
--- a/test/main.c	Fri Dec 27 11:48:55 2019 +0100
+++ b/test/main.c	Sun Dec 29 11:29:17 2019 +0100
@@ -138,6 +138,7 @@
         ucx_test_register(suite, test_sstrtrim);
         ucx_test_register(suite, test_sstrprefixsuffix);
         ucx_test_register(suite, test_sstrcaseprefixsuffix);
+        ucx_test_register(suite, test_sstrreplace);
         
         /* UcxLogger Tests */
         ucx_test_register(suite, test_ucx_logger_new);
--- a/test/string_tests.c	Fri Dec 27 11:48:55 2019 +0100
+++ b/test/string_tests.c	Sun Dec 29 11:29:17 2019 +0100
@@ -476,3 +476,41 @@
     
     UCX_TEST_END
 }
+
+UCX_TEST(test_sstrreplace) {
+
+    sstr_t str = ST("test ababab string aba");
+    sstr_t longstr = ST("xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd");
+    sstr_t notrail = ST("test abab");
+
+    sstr_t repl = sstrreplace(str, SC("abab"), SC("muchlonger"));
+    sstr_t expected = ST("test muchlongerab string aba");
+
+    sstr_t repln = sstrreplacen(str, SC("ab"), SC("c"), 2);
+    sstr_t expectedn = ST("test ccab string aba");
+
+    sstr_t longrepl = sstrreplace(longstr, SC("a"), SC("z"));
+    sstr_t longexpect = ST("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd");
+
+    sstr_t replnotrail = sstrreplace(notrail, SC("ab"), SC("z"));
+    sstr_t notrailexpect = ST("test zz");
+
+    UCX_TEST_BEGIN
+
+    UCX_TEST_ASSERT(repl.ptr != str.ptr, "result string is not fresh");
+    UCX_TEST_ASSERT(!sstrcmp(repl, expected), "incorrect replacement");
+
+    UCX_TEST_ASSERT(repln.ptr != str.ptr, "result string is not fresh");
+    UCX_TEST_ASSERT(!sstrcmp(repln, expectedn), "incorrect replacement");
+
+    UCX_TEST_ASSERT(!sstrcmp(longrepl, longexpect),
+            "incorrect handling of long strings");
+
+    UCX_TEST_ASSERT(!sstrcmp(replnotrail, notrailexpect),
+            "no trail replacement fails");
+
+    UCX_TEST_END
+
+    free(repl.ptr);
+    free(repln.ptr);
+}
\ No newline at end of file
--- a/test/string_tests.h	Fri Dec 27 11:48:55 2019 +0100
+++ b/test/string_tests.h	Sun Dec 29 11:29:17 2019 +0100
@@ -48,6 +48,7 @@
 UCX_TEST(test_sstrtrim);
 UCX_TEST(test_sstrprefixsuffix);
 UCX_TEST(test_sstrcaseprefixsuffix);
+UCX_TEST(test_sstrreplace);
 
 #ifdef	__cplusplus
 }

mercurial