Sun, 29 Dec 2019 11:29:17 +0100
adds string replace functions
src/string.c | file | annotate | diff | comparison | revisions | |
src/ucx/string.h | file | annotate | diff | comparison | revisions | |
test/main.c | file | annotate | diff | comparison | revisions | |
test/string_tests.c | file | annotate | diff | comparison | revisions | |
test/string_tests.h | file | annotate | diff | comparison | revisions |
1.1 --- a/src/string.c Fri Dec 27 11:48:55 2019 +0100 1.2 +++ b/src/string.c Sun Dec 29 11:29:17 2019 +0100 1.3 @@ -662,6 +662,130 @@ 1.4 return ret; 1.5 } 1.6 1.7 +#define REPLACE_INDEX_BUFFER_MAX 100 1.8 + 1.9 +struct scstrreplace_ibuf { 1.10 + size_t* buf; 1.11 + unsigned int len; /* small indices */ 1.12 + struct scstrreplace_ibuf* next; 1.13 +}; 1.14 + 1.15 +static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { 1.16 + while (buf) { 1.17 + struct scstrreplace_ibuf *next = buf->next; 1.18 + free(buf->buf); 1.19 + free(buf); 1.20 + buf = next; 1.21 + } 1.22 +} 1.23 + 1.24 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, 1.25 + scstr_t pattern, scstr_t replacement, size_t replmax) { 1.26 + 1.27 + if (pattern.length == 0 || pattern.length > str.length) 1.28 + return sstrdup(str); 1.29 + 1.30 + /* Compute expected buffer length */ 1.31 + size_t ibufmax = str.length / pattern.length; 1.32 + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; 1.33 + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { 1.34 + ibuflen = REPLACE_INDEX_BUFFER_MAX; 1.35 + } 1.36 + 1.37 + /* Allocate first index buffer */ 1.38 + struct scstrreplace_ibuf *firstbuf, *curbuf; 1.39 + firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); 1.40 + if (!firstbuf) return sstrn(NULL, 0); 1.41 + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.42 + if (!firstbuf->buf) { 1.43 + free(firstbuf); 1.44 + return sstrn(NULL, 0); 1.45 + } 1.46 + 1.47 + /* Search occurrences */ 1.48 + scstr_t searchstr = str; 1.49 + size_t found = 0; 1.50 + do { 1.51 + scstr_t match = scstrscstr(searchstr, pattern); 1.52 + if (match.length > 0) { 1.53 + /* Allocate next buffer in chain, if required */ 1.54 + if (curbuf->len == ibuflen) { 1.55 + struct scstrreplace_ibuf *nextbuf = 1.56 + calloc(1, sizeof(struct scstrreplace_ibuf)); 1.57 + if (!nextbuf) return sstrn(NULL, 0); 1.58 + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); 1.59 + if (!nextbuf->buf) { 1.60 + free(nextbuf); 1.61 + scstrrepl_free_ibuf(firstbuf); 1.62 + return sstrn(NULL, 0); 1.63 + } 1.64 + curbuf->next = nextbuf; 1.65 + curbuf = nextbuf; 1.66 + } 1.67 + 1.68 + /* Record match index */ 1.69 + found++; 1.70 + size_t idx = match.ptr - str.ptr; 1.71 + curbuf->buf[curbuf->len++] = idx; 1.72 + searchstr.ptr = match.ptr + pattern.length; 1.73 + searchstr.length = str.length - idx - pattern.length; 1.74 + } else { 1.75 + break; 1.76 + } 1.77 + } while (searchstr.length > 0 && found < replmax); 1.78 + 1.79 + /* Allocate result string */ 1.80 + sstr_t result; 1.81 + { 1.82 + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; 1.83 + size_t rcount = 0; 1.84 + curbuf = firstbuf; 1.85 + do { 1.86 + rcount += curbuf->len; 1.87 + curbuf = curbuf->next; 1.88 + } while (curbuf); 1.89 + result.length = str.length + rcount * adjlen; 1.90 + result.ptr = almalloc(allocator, result.length); 1.91 + if (!result.ptr) { 1.92 + scstrrepl_free_ibuf(firstbuf); 1.93 + return sstrn(NULL, 0); 1.94 + } 1.95 + } 1.96 + 1.97 + /* Build result string */ 1.98 + curbuf = firstbuf; 1.99 + size_t srcidx = 0; 1.100 + char* destptr = result.ptr; 1.101 + do { 1.102 + for (size_t i = 0; i < curbuf->len; i++) { 1.103 + /* Copy source part up to next match*/ 1.104 + size_t idx = curbuf->buf[i]; 1.105 + size_t srclen = idx - srcidx; 1.106 + if (srclen > 0) { 1.107 + memcpy(destptr, str.ptr+srcidx, srclen); 1.108 + destptr += srclen; 1.109 + srcidx += srclen; 1.110 + } 1.111 + 1.112 + /* Copy the replacement and skip the source pattern */ 1.113 + srcidx += pattern.length; 1.114 + memcpy(destptr, replacement.ptr, replacement.length); 1.115 + destptr += replacement.length; 1.116 + } 1.117 + curbuf = curbuf->next; 1.118 + } while (curbuf); 1.119 + memcpy(destptr, str.ptr+srcidx, str.length-srcidx); 1.120 + 1.121 + return result; 1.122 +} 1.123 + 1.124 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, 1.125 + scstr_t replacement, size_t replmax) { 1.126 + return scstrreplacen_a(ucx_default_allocator(), 1.127 + str, pattern, replacement, replmax); 1.128 +} 1.129 + 1.130 + 1.131 // type adjustment functions 1.132 scstr_t ucx_sc2sc(scstr_t str) { 1.133 return str;
2.1 --- a/src/ucx/string.h Fri Dec 27 11:48:55 2019 +0100 2.2 +++ b/src/ucx/string.h Sun Dec 29 11:29:17 2019 +0100 2.3 @@ -1072,6 +1072,128 @@ 2.4 */ 2.5 #define sstrupper_a(allocator, string) scstrupper_a(allocator, string) 2.6 2.7 + 2.8 +/** 2.9 + * Replaces a pattern in a string with another string. 2.10 + * 2.11 + * The pattern is taken literally and is no regular expression. 2.12 + * Replaces at most <code>replmax</code> occurrences. 2.13 + * 2.14 + * The resulting string is allocated by the specified allocator. I.e. it 2.15 + * depends on the used allocator, whether the sstr_t.ptr must be freed 2.16 + * manually. 2.17 + * 2.18 + * If allocation fails, the sstr_t.ptr of the return value is NULL. 2.19 + * 2.20 + * @param allocator the allocator to use 2.21 + * @param str the string where replacements should be applied 2.22 + * @param pattern the pattern to search for 2.23 + * @param replacement the replacement string 2.24 + * @param replmax maximum number of replacements 2.25 + * @return the resulting string after applying the replacements 2.26 + */ 2.27 +sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, 2.28 + scstr_t pattern, scstr_t replacement, size_t replmax); 2.29 + 2.30 +/** 2.31 + * Replaces a pattern in a string with another string. 2.32 + * 2.33 + * The pattern is taken literally and is no regular expression. 2.34 + * Replaces at most <code>replmax</code> occurrences. 2.35 + * 2.36 + * The sstr_t.ptr of the resulting string must be freed manually. 2.37 + * 2.38 + * If allocation fails, the sstr_t.ptr of the return value is NULL. 2.39 + * 2.40 + * @param str the string where replacements should be applied 2.41 + * @param pattern the pattern to search for 2.42 + * @param replacement the replacement string 2.43 + * @param replmax maximum number of replacements 2.44 + * @return the resulting string after applying the replacements 2.45 + */ 2.46 +sstr_t scstrreplacen(scstr_t str, scstr_t pattern, 2.47 + scstr_t replacement, size_t replmax); 2.48 + 2.49 +/** 2.50 + * Replaces a pattern in a string with another string. 2.51 + * 2.52 + * The pattern is taken literally and is no regular expression. 2.53 + * Replaces at most <code>replmax</code> occurrences. 2.54 + * 2.55 + * The resulting string is allocated by the specified allocator. I.e. it 2.56 + * depends on the used allocator, whether the sstr_t.ptr must be freed 2.57 + * manually. 2.58 + * 2.59 + * @param allocator the allocator to use 2.60 + * @param str the string where replacements should be applied 2.61 + * @param pattern the pattern to search for 2.62 + * @param replacement the replacement string 2.63 + * @param replmax maximum number of replacements 2.64 + * @return the resulting string after applying the replacements 2.65 + */ 2.66 +#define sstrreplacen_a(allocator, str, pattern, replacement, replmax) \ 2.67 + scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \ 2.68 + SCSTR(replacement), replmax) 2.69 + 2.70 +/** 2.71 + * Replaces a pattern in a string with another string. 2.72 + * 2.73 + * The pattern is taken literally and is no regular expression. 2.74 + * Replaces at most <code>replmax</code> occurrences. 2.75 + * 2.76 + * The sstr_t.ptr of the resulting string must be freed manually. 2.77 + * 2.78 + * If allocation fails, the sstr_t.ptr of the return value is NULL. 2.79 + * 2.80 + * @param str the string where replacements should be applied 2.81 + * @param pattern the pattern to search for 2.82 + * @param replacement the replacement string 2.83 + * @param replmax maximum number of replacements 2.84 + * @return the resulting string after applying the replacements 2.85 + */ 2.86 +#define sstrreplacen(str, pattern, replacement, replmax) \ 2.87 + scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), replmax) 2.88 + 2.89 +/** 2.90 + * Replaces a pattern in a string with another string. 2.91 + * 2.92 + * The pattern is taken literally and is no regular expression. 2.93 + * Replaces at most <code>replmax</code> occurrences. 2.94 + * 2.95 + * The resulting string is allocated by the specified allocator. I.e. it 2.96 + * depends on the used allocator, whether the sstr_t.ptr must be freed 2.97 + * manually. 2.98 + * 2.99 + * If allocation fails, the sstr_t.ptr of the return value is NULL. 2.100 + * 2.101 + * @param allocator the allocator to use 2.102 + * @param str the string where replacements should be applied 2.103 + * @param pattern the pattern to search for 2.104 + * @param replacement the replacement string 2.105 + * @return the resulting string after applying the replacements 2.106 + */ 2.107 +#define sstrreplace_a(allocator, str, pattern, replacement) \ 2.108 + scstrreplacen_a(allocator, SCSTR(str), SCSTR(pattern), \ 2.109 + SCSTR(replacement), SIZE_MAX) 2.110 + 2.111 +/** 2.112 + * Replaces a pattern in a string with another string. 2.113 + * 2.114 + * The pattern is taken literally and is no regular expression. 2.115 + * Replaces at most <code>replmax</code> occurrences. 2.116 + * 2.117 + * The sstr_t.ptr of the resulting string must be freed manually. 2.118 + * 2.119 + * If allocation fails, the sstr_t.ptr of the return value is NULL. 2.120 + * 2.121 + * @param str the string where replacements should be applied 2.122 + * @param pattern the pattern to search for 2.123 + * @param replacement the replacement string 2.124 + * @return the resulting string after applying the replacements 2.125 + */ 2.126 +#define sstrreplace(str, pattern, replacement) \ 2.127 + scstrreplacen(SCSTR(str), SCSTR(pattern), SCSTR(replacement), SIZE_MAX) 2.128 + 2.129 #ifdef __cplusplus 2.130 } 2.131 #endif
3.1 --- a/test/main.c Fri Dec 27 11:48:55 2019 +0100 3.2 +++ b/test/main.c Sun Dec 29 11:29:17 2019 +0100 3.3 @@ -138,6 +138,7 @@ 3.4 ucx_test_register(suite, test_sstrtrim); 3.5 ucx_test_register(suite, test_sstrprefixsuffix); 3.6 ucx_test_register(suite, test_sstrcaseprefixsuffix); 3.7 + ucx_test_register(suite, test_sstrreplace); 3.8 3.9 /* UcxLogger Tests */ 3.10 ucx_test_register(suite, test_ucx_logger_new);
4.1 --- a/test/string_tests.c Fri Dec 27 11:48:55 2019 +0100 4.2 +++ b/test/string_tests.c Sun Dec 29 11:29:17 2019 +0100 4.3 @@ -476,3 +476,41 @@ 4.4 4.5 UCX_TEST_END 4.6 } 4.7 + 4.8 +UCX_TEST(test_sstrreplace) { 4.9 + 4.10 + sstr_t str = ST("test ababab string aba"); 4.11 + sstr_t longstr = ST("xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd"); 4.12 + sstr_t notrail = ST("test abab"); 4.13 + 4.14 + sstr_t repl = sstrreplace(str, SC("abab"), SC("muchlonger")); 4.15 + sstr_t expected = ST("test muchlongerab string aba"); 4.16 + 4.17 + sstr_t repln = sstrreplacen(str, SC("ab"), SC("c"), 2); 4.18 + sstr_t expectedn = ST("test ccab string aba"); 4.19 + 4.20 + sstr_t longrepl = sstrreplace(longstr, SC("a"), SC("z")); 4.21 + sstr_t longexpect = ST("xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd"); 4.22 + 4.23 + sstr_t replnotrail = sstrreplace(notrail, SC("ab"), SC("z")); 4.24 + sstr_t notrailexpect = ST("test zz"); 4.25 + 4.26 + UCX_TEST_BEGIN 4.27 + 4.28 + UCX_TEST_ASSERT(repl.ptr != str.ptr, "result string is not fresh"); 4.29 + UCX_TEST_ASSERT(!sstrcmp(repl, expected), "incorrect replacement"); 4.30 + 4.31 + UCX_TEST_ASSERT(repln.ptr != str.ptr, "result string is not fresh"); 4.32 + UCX_TEST_ASSERT(!sstrcmp(repln, expectedn), "incorrect replacement"); 4.33 + 4.34 + UCX_TEST_ASSERT(!sstrcmp(longrepl, longexpect), 4.35 + "incorrect handling of long strings"); 4.36 + 4.37 + UCX_TEST_ASSERT(!sstrcmp(replnotrail, notrailexpect), 4.38 + "no trail replacement fails"); 4.39 + 4.40 + UCX_TEST_END 4.41 + 4.42 + free(repl.ptr); 4.43 + free(repln.ptr); 4.44 +} 4.45 \ No newline at end of file
5.1 --- a/test/string_tests.h Fri Dec 27 11:48:55 2019 +0100 5.2 +++ b/test/string_tests.h Sun Dec 29 11:29:17 2019 +0100 5.3 @@ -48,6 +48,7 @@ 5.4 UCX_TEST(test_sstrtrim); 5.5 UCX_TEST(test_sstrprefixsuffix); 5.6 UCX_TEST(test_sstrcaseprefixsuffix); 5.7 +UCX_TEST(test_sstrreplace); 5.8 5.9 #ifdef __cplusplus 5.10 }