# HG changeset patch # User Mike Becker # Date 1662747548 -7200 # Node ID 0f3c9662f9b522ce69ebd80929b3c2bea0b2c680 # Parent 96fa7fa6af4fa1f1ad8bdbbbda18ca7ebccf9aaa add tests and missing implementations for strings diff -r 96fa7fa6af4f -r 0f3c9662f9b5 src/cx/string.h --- a/src/cx/string.h Sat Sep 03 15:11:23 2022 +0200 +++ b/src/cx/string.h Fri Sep 09 20:19:08 2022 +0200 @@ -78,6 +78,15 @@ */ typedef struct cx_string_s cxstring; +/** + * A literal initializer for an UCX string structure. + * + * The argument MUST be a string (const char*) \em literal. + * + * @param literal the string literal + */ +#define CX_STR(literal) {literal, sizeof(literal) - 1} + #ifdef __cplusplus extern "C" { #endif @@ -190,9 +199,28 @@ * * @param str the string to free */ +__attribute__((__nonnull__)) void cx_strfree(cxmutstr *str); /** + * Passes the pointer in this string to the allocators free function. + * + * The pointer in the struct is set to \c NULL and the length is set to zero. + * + * \note There is no implementation for cxstring, because it is unlikely that + * you ever have a \c char \c const* you are really supposed to free. If you + * encounter such situation, you should double-check your code. + * + * @param alloc the allocator + * @param str the string to free + */ +__attribute__((__nonnull__)) +void cx_strfree_a( + CxAllocator *alloc, + cxmutstr *str +); + +/** * Returns the accumulated length of all specified strings. * * \attention if the count argument is larger than the number of the @@ -720,7 +748,7 @@ * The returned string will be allocated by \p allocator. * * If allocation fails, or the input string is empty, - * the returned string will point to \c NULL. + * the returned string will be empty. * * @param allocator the allocator to use * @param str the string where replacements should be applied @@ -730,7 +758,7 @@ * @return the resulting string after applying the replacements */ __attribute__((__warn_unused_result__, __nonnull__)) -cxmutstr cx_strreplace_a( +cxmutstr cx_strreplacen_a( CxAllocator *allocator, cxstring str, cxstring pattern, @@ -748,7 +776,7 @@ * to cx_strfree() eventually. * * If allocation fails, or the input string is empty, - * the returned string will point to \c NULL. + * the returned string will be empty. * * @param str the string where replacements should be applied * @param pattern the pattern to search for @@ -756,8 +784,47 @@ * @param replmax maximum number of replacements * @return the resulting string after applying the replacements */ -#define cx_strreplace(str, pattern, replacement, replmax) \ -cx_strreplace_a(cxDefaultAllocator, str, pattern, replacement, replmax) +#define cx_strreplacen(str, pattern, replacement, replmax) \ +cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, replmax) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * + * The returned string will be allocated by \p allocator. + * + * If allocation fails, or the input string is empty, + * the returned string will be empty. + * + * @param allocator the allocator to use + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @return the resulting string after applying the replacements + */ +#define cx_strreplace_a(allocator, str, pattern, replacement) \ +cx_strreplacen_a(allocator, str, pattern, replacement, SIZE_MAX) + +/** + * Replaces a pattern in a string with another string. + * + * The pattern is taken literally and is no regular expression. + * Replaces at most \p replmax occurrences. + * + * The returned string will be allocated by \c malloc() and \em must be passed + * to cx_strfree() eventually. + * + * If allocation fails, or the input string is empty, + * the returned string will be empty. + * + * @param str the string where replacements should be applied + * @param pattern the pattern to search for + * @param replacement the replacement string + * @return the resulting string after applying the replacements + */ +#define cx_strreplace(str, pattern, replacement) \ +cx_strreplacen_a(cxDefaultAllocator, str, pattern, replacement, SIZE_MAX) #ifdef __cplusplus } // extern "C" diff -r 96fa7fa6af4f -r 0f3c9662f9b5 src/string.c --- a/src/string.c Sat Sep 03 15:11:23 2022 +0200 +++ b/src/string.c Fri Sep 09 20:19:08 2022 +0200 @@ -72,6 +72,15 @@ str->length = 0; } +void cx_strfree_a( + CxAllocator *alloc, + cxmutstr *str +) { + cxFree(alloc, str->ptr); + str->ptr = NULL; + str->length = 0; +} + size_t cx_strlen( size_t count, ... @@ -235,6 +244,11 @@ return haystack; } + /* optimize for single-char needles */ + if (needle.length == 1) { + return cx_strchr(haystack, *needle.ptr); + } + /* * IMPORTANT: * Our prefix table contains the prefix length PLUS ONE @@ -308,8 +322,55 @@ size_t limit, cxstring *output ) { - // TODO: implement - return 0; + /* special case: output limit is zero */ + if (limit == 0) return 0; + + /* special case: delimiter is empty */ + if (delim.length == 0) { + output[0] = string; + return 1; + } + + /* special cases: delimiter is at least as large as the string */ + if (delim.length >= string.length) { + /* exact match */ + if (cx_strcmp(string, delim) == 0) { + output[0] = cx_strn(string.ptr, 0); + output[1] = cx_strn(string.ptr + string.length, 0); + return 2; + } else /* no match possible */ { + output[0] = string; + return 1; + } + } + + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + /* is the limit reached? */ + if (n < limit) { + /* copy the current string to the array */ + cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr); + output[n - 1] = item; + size_t processed = item.length + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + /* limit reached, copy the _full_ remaining string */ + output[n - 1] = curpos; + break; + } + } else { + /* no more matches, copy last string */ + output[n - 1] = curpos; + break; + } + } + + return n; } size_t cx_strsplit_a( @@ -319,8 +380,29 @@ size_t limit, cxstring **output ) { - // TODO: implement - return 0; + /* find out how many splits we're going to make and allocate memory */ + size_t n = 0; + cxstring curpos = string; + while (1) { + ++n; + cxstring match = cx_strstr(curpos, delim); + if (match.length > 0) { + /* is the limit reached? */ + if (n < limit) { + size_t processed = match.ptr - curpos.ptr + delim.length; + curpos.ptr += processed; + curpos.length -= processed; + } else { + /* limit reached */ + break; + } + } else { + /* no more matches */ + break; + } + } + *output = cxCalloc(allocator, n, sizeof(cxstring)); + return cx_strsplit(string, delim, n, *output); } size_t cx_strsplit_m( @@ -344,7 +426,10 @@ delim, limit, (cxstring **) output); } -int cx_strcmp(cxstring s1, cxstring s2) { +int cx_strcmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { @@ -354,7 +439,10 @@ } } -int cx_strcasecmp(cxstring s1, cxstring s2) { +int cx_strcasecmp( + cxstring s1, + cxstring s2 +) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); @@ -368,7 +456,10 @@ } } -cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { +cxmutstr cx_strdup_a( + CxAllocator *allocator, + cxstring string +) { cxmutstr result = { cxMalloc(allocator, string.length + 1), string.length @@ -400,18 +491,27 @@ return (cxmutstr) {(char *) result.ptr, result.length}; } -bool cx_strprefix(cxstring string, cxstring prefix) { +bool cx_strprefix( + cxstring string, + cxstring prefix +) { if (string.length < prefix.length) return false; return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } -bool cx_strsuffix(cxstring string, cxstring suffix) { +bool cx_strsuffix( + cxstring string, + cxstring suffix +) { if (string.length < suffix.length) return false; return memcmp(string.ptr + string.length - suffix.length, suffix.ptr, suffix.length) == 0; } -bool cx_casestrprefix(cxstring string, cxstring prefix) { +bool cx_strcaseprefix( + cxstring string, + cxstring prefix +) { if (string.length < prefix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; @@ -420,7 +520,10 @@ #endif } -bool cx_casestrsuffix(cxstring string, cxstring suffix) { +bool cx_strcasesuffix( + cxstring string, + cxstring suffix +) { if (string.length < suffix.length) return false; #ifdef _WIN32 return _strnicmp(string.ptr+string.length-suffix.length, @@ -442,3 +545,133 @@ string.ptr[i] = toupper(string.ptr[i]); } } + +#define REPLACE_INDEX_BUFFER_MAX 100 + +struct cx_strreplace_ibuf { + size_t *buf; + unsigned int len; /* small indices */ + struct cx_strreplace_ibuf *next; +}; + +static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) { + while (buf) { + struct cx_strreplace_ibuf *next = buf->next; + free(buf->buf); + free(buf); + buf = next; + } +} + +cxmutstr cx_strreplacen_a( + CxAllocator *allocator, + cxstring str, + cxstring pattern, + cxstring replacement, + size_t replmax +) { + + if (pattern.length == 0 || pattern.length > str.length || replmax == 0) + return cx_strdup_a(allocator, str); + + /* Compute expected buffer length */ + size_t ibufmax = str.length / pattern.length; + size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; + if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { + ibuflen = REPLACE_INDEX_BUFFER_MAX; + } + + /* Allocate first index buffer */ + struct cx_strreplace_ibuf *firstbuf, *curbuf; + firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf)); + if (!firstbuf) return cx_mutstrn(NULL, 0); + firstbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!firstbuf->buf) { + free(firstbuf); + return cx_mutstrn(NULL, 0); + } + + /* Search occurrences */ + cxstring searchstr = str; + size_t found = 0; + do { + cxstring match = cx_strstr(searchstr, pattern); + if (match.length > 0) { + /* Allocate next buffer in chain, if required */ + if (curbuf->len == ibuflen) { + struct cx_strreplace_ibuf *nextbuf = + calloc(1, sizeof(struct cx_strreplace_ibuf)); + if (!nextbuf) { + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + nextbuf->buf = calloc(ibuflen, sizeof(size_t)); + if (!nextbuf->buf) { + free(nextbuf); + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + curbuf->next = nextbuf; + curbuf = nextbuf; + } + + /* Record match index */ + found++; + size_t idx = match.ptr - str.ptr; + curbuf->buf[curbuf->len++] = idx; + searchstr.ptr = match.ptr + pattern.length; + searchstr.length = str.length - idx - pattern.length; + } else { + break; + } + } while (searchstr.length > 0 && found < replmax); + + /* Allocate result string */ + cxmutstr result; + { + ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; + size_t rcount = 0; + curbuf = firstbuf; + do { + rcount += curbuf->len; + curbuf = curbuf->next; + } while (curbuf); + result.length = str.length + rcount * adjlen; + result.ptr = cxMalloc(allocator, result.length); + if (!result.ptr) { + cx_strrepl_free_ibuf(firstbuf); + return cx_mutstrn(NULL, 0); + } + } + + /* Build result string */ + curbuf = firstbuf; + size_t srcidx = 0; + char *destptr = result.ptr; + do { + for (size_t i = 0; i < curbuf->len; i++) { + /* Copy source part up to next match*/ + size_t idx = curbuf->buf[i]; + size_t srclen = idx - srcidx; + if (srclen > 0) { + memcpy(destptr, str.ptr + srcidx, srclen); + destptr += srclen; + srcidx += srclen; + } + + /* Copy the replacement and skip the source pattern */ + srcidx += pattern.length; + memcpy(destptr, replacement.ptr, replacement.length); + destptr += replacement.length; + } + curbuf = curbuf->next; + } while (curbuf); + memcpy(destptr, str.ptr + srcidx, str.length - srcidx); + + /* Free index buffer */ + cx_strrepl_free_ibuf(firstbuf); + + return result; +} + + diff -r 96fa7fa6af4f -r 0f3c9662f9b5 test/CMakeLists.txt --- a/test/CMakeLists.txt Sat Sep 03 15:11:23 2022 +0200 +++ b/test/CMakeLists.txt Fri Sep 09 20:19:08 2022 +0200 @@ -15,6 +15,7 @@ add_executable(ucxtest test_allocator.cpp + test_string.cpp test_buffer.cpp test_list.cpp test_tree.cpp diff -r 96fa7fa6af4f -r 0f3c9662f9b5 test/test_string.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_string.cpp Fri Sep 09 20:19:08 2022 +0200 @@ -0,0 +1,531 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "cx/string.h" +#include "util_allocator.h" + +#include + +TEST(String, construct) { + cxstring s1 = cx_str("1234"); + cxstring s2 = cx_strn("abcd", 2); + cxmutstr s3 = cx_mutstr((char *) "1234"); + cxmutstr s4 = cx_mutstrn((char *) "abcd", 2); + + EXPECT_EQ(s1.length, 4); + EXPECT_EQ(s2.length, 2); + EXPECT_EQ(s3.length, 4); + EXPECT_EQ(s4.length, 2); +} + +TEST(String, strfree) { + CxTestingAllocator alloc; + auto test = (char *) cxMalloc(&alloc, 16); + cxmutstr str = cx_mutstrn(test, 16); + ASSERT_EQ(str.ptr, test); + EXPECT_EQ(str.length, 16); + cx_strfree_a(&alloc, &str); + EXPECT_EQ(str.ptr, nullptr); + EXPECT_EQ(str.length, 0); + EXPECT_TRUE(alloc.verify()); +} + +TEST(String, strlen) { + cxstring s1 = CX_STR("1234"); + cxstring s2 = CX_STR(".:.:."); + cxstring s3 = CX_STR("X"); + + size_t len0 = cx_strlen(0); + size_t len1 = cx_strlen(1, s1); + size_t len2 = cx_strlen(2, s1, s2); + size_t len3 = cx_strlen(3, s1, s2, s3); + + EXPECT_EQ(len0, 0); + EXPECT_EQ(len1, 4); + EXPECT_EQ(len2, 9); + EXPECT_EQ(len3, 10); +} + + +TEST(String, strchr) { + cxstring str = CX_STR("I will find you - and I will kill you"); + + cxstring notfound = cx_strchr(str, 'x'); + EXPECT_EQ(notfound.length, 0); + + cxstring result = cx_strchr(str, 'w'); + EXPECT_EQ(result.length, 35); + EXPECT_EQ(strcmp("will find you - and I will kill you", result.ptr), 0); +} + +TEST(String, strrchr) { + cxstring str = CX_STR("I will find you - and I will kill you"); + + cxstring notfound = cx_strrchr(str, 'x'); + EXPECT_EQ(notfound.length, 0); + + cxstring result = cx_strrchr(str, 'w'); + EXPECT_EQ(result.length, 13); + EXPECT_EQ(strcmp("will kill you", result.ptr), 0); +} + +TEST(String, strstr) { + cxstring str = CX_STR("find the match in this string"); + cxstring longstr = CX_STR( + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl" + "mnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwx" + "yzabcdeababababnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghij" + "klmnopqrstuvwxyzaababababababababrstuvwxyzabcdefghijklmnopqrstuv" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "wxyz1234567890"); + cxstring longstrpattern = CX_STR( + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + ); + cxstring longstrresult = CX_STR( + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "abababababababababababababababababababababababababababababababab" + "wxyz1234567890" + ); + + cxstring notfound = cx_strstr(str, cx_str("no match")); + EXPECT_EQ(notfound.length, 0); + + cxstring result = cx_strstr(str, cx_str("match")); + EXPECT_EQ(result.length, 20); + EXPECT_EQ(strcmp("match in this string", result.ptr), 0); + + result = cx_strstr(str, cx_str("")); + EXPECT_EQ(result.length, str.length); + EXPECT_EQ(strcmp(str.ptr, result.ptr), 0); + + result = cx_strstr(longstr, longstrpattern); + EXPECT_EQ(result.length, longstrresult.length); + EXPECT_EQ(strcmp(result.ptr, longstrresult.ptr), 0); +} + +TEST(String, strcmp) { + cxstring str = CX_STR("compare this"); + + EXPECT_EQ(cx_strcmp(CX_STR(""), CX_STR("")), 0); + EXPECT_GT(cx_strcmp(str, CX_STR("")), 0); + EXPECT_EQ(cx_strcmp(str, CX_STR("compare this")), 0); + EXPECT_NE(cx_strcmp(str, CX_STR("Compare This")), 0); + EXPECT_LT(cx_strcmp(str, CX_STR("compare tool")), 0); + EXPECT_GT(cx_strcmp(str, CX_STR("compare shit")), 0); + EXPECT_LT(cx_strcmp(str, CX_STR("compare this not")), 0); + EXPECT_GT(cx_strcmp(str, CX_STR("compare")), 0); +} + +TEST(String, strcasecmp) { + cxstring str = CX_STR("compare this"); + + EXPECT_EQ(cx_strcasecmp(CX_STR(""), CX_STR("")), 0); + EXPECT_GT(cx_strcasecmp(str, CX_STR("")), 0); + EXPECT_EQ(cx_strcasecmp(str, CX_STR("compare this")), 0); + EXPECT_EQ(cx_strcasecmp(str, CX_STR("Compare This")), 0); + EXPECT_LT(cx_strcasecmp(str, CX_STR("compare tool")), 0); + EXPECT_GT(cx_strcasecmp(str, CX_STR("compare shit")), 0); + EXPECT_LT(cx_strcasecmp(str, CX_STR("compare this not")), 0); + EXPECT_GT(cx_strcasecmp(str, CX_STR("compare")), 0); +} + + +TEST(String, strcat) { + cxstring s1 = CX_STR("12"); + cxstring s2 = CX_STR("34"); + cxstring s3 = CX_STR("56"); + cxstring sn = {nullptr, 0}; + + CxTestingAllocator alloc; + + cxmutstr t1 = cx_strcat_a(&alloc, 2, s1, s2); + EXPECT_EQ(cx_strcmp(cx_strcast(t1), CX_STR("1234")), 0); + cx_strfree_a(&alloc, &t1); + + cxmutstr t2 = cx_strcat_a(&alloc, 3, s1, s2, s3); + EXPECT_EQ(cx_strcmp(cx_strcast(t2), CX_STR("123456")), 0); + cx_strfree_a(&alloc, &t2); + + cxmutstr t3 = cx_strcat_a(&alloc, 6, s1, sn, s2, sn, s3, sn); + EXPECT_EQ(cx_strcmp(cx_strcast(t3), CX_STR("123456")), 0); + cx_strfree_a(&alloc, &t3); + + cxmutstr t4 = cx_strcat_a(&alloc, 2, sn, sn); + EXPECT_EQ(cx_strcmp(cx_strcast(t4), CX_STR("")), 0); + cx_strfree_a(&alloc, &t4); + + EXPECT_TRUE(alloc.verify()); +} + +TEST(String, strsplit) { + + cxstring test = cx_str("this,is,a,csv,string"); + size_t capa = 8; + cxstring list[8]; + size_t n; + + /* special case: empty string */ + n = cx_strsplit(test, cx_str(""), capa, list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + + /* no delimiter occurrence */ + n = cx_strsplit(test, cx_str("z"), capa, list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + + /* partially matching delimiter */ + n = cx_strsplit(test, cx_str("is,not"), capa, list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + + /* matching single-char delimiter */ + n = cx_strsplit(test, cx_str(","), capa, list); + ASSERT_EQ(n, 5); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a")), 0); + EXPECT_EQ(cx_strcmp(list[3], cx_str("csv")), 0); + EXPECT_EQ(cx_strcmp(list[4], cx_str("string")), 0); + + /* matching multi-char delimiter */ + n = cx_strsplit(test, cx_str("is"), capa, list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str(",a,csv,string")), 0); + + /* bounded list using single-char delimiter */ + n = cx_strsplit(test, cx_str(","), 3, list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0); + + /* bounded list using multi-char delimiter */ + n = cx_strsplit(test, cx_str("is"), 2, list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0); + + /* start with delimiter */ + n = cx_strsplit(test, cx_str("this"), capa, list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0); + + /* end with delimiter */ + n = cx_strsplit(test, cx_str("string"), capa, list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this,is,a,csv,")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + + + /* end with delimiter exceed bound */ + n = cx_strsplit(cx_str("a,b,c,"), cx_str(","), 3, list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("a")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("b")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("c,")), 0); + + /* exact match */ + n = cx_strsplit(test, cx_str("this,is,a,csv,string"), capa, list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + + /* string to be split is only substring */ + n = cx_strsplit(test, cx_str("this,is,a,csv,string,with,extension"), capa, list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + + /* subsequent encounter of delimiter (the string between is empty) */ + n = cx_strsplit(test, cx_str("is,"), capa, list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0); +} + +TEST(String, strsplit_a) { + CxTestingAllocator alloc; + + cxstring test = cx_str("this,is,a,csv,string"); + size_t capa = 8; + cxstring *list; + size_t n; + + /* special case: empty string */ + n = cx_strsplit_a(&alloc, test, cx_str(""), capa, &list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + cxFree(&alloc, list); + + /* no delimiter occurrence */ + n = cx_strsplit_a(&alloc, test, cx_str("z"), capa, &list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + cxFree(&alloc, list); + + /* partially matching delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str("is,not"), capa, &list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + cxFree(&alloc, list); + + /* matching single-char delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str(","), capa, &list); + ASSERT_EQ(n, 5); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a")), 0); + EXPECT_EQ(cx_strcmp(list[3], cx_str("csv")), 0); + EXPECT_EQ(cx_strcmp(list[4], cx_str("string")), 0); + cxFree(&alloc, list); + + /* matching multi-char delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str("is"), capa, &list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str(",a,csv,string")), 0); + cxFree(&alloc, list); + + /* bounded list using single-char delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str(","), 3, &list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("is")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0); + cxFree(&alloc, list); + + /* bounded list using multi-char delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str("is"), 2, &list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0); + cxFree(&alloc, list); + + /* start with delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str("this"), capa, &list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str(",is,a,csv,string")), 0); + cxFree(&alloc, list); + + /* end with delimiter */ + n = cx_strsplit_a(&alloc, test, cx_str("string"), capa, &list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("this,is,a,csv,")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + cxFree(&alloc, list); + + /* end with delimiter exceed bound */ + n = cx_strsplit_a(&alloc, cx_str("a,b,c,"), cx_str(","), 3, &list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("a")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("b")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("c,")), 0); + cxFree(&alloc, list); + + /* exact match */ + n = cx_strsplit_a(&alloc, test, cx_str("this,is,a,csv,string"), capa, &list); + ASSERT_EQ(n, 2); + EXPECT_EQ(cx_strcmp(list[0], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + cxFree(&alloc, list); + + /* string to be split is only substring */ + n = cx_strsplit_a(&alloc, test, cx_str("this,is,a,csv,string,with,extension"), capa, &list); + ASSERT_EQ(n, 1); + EXPECT_EQ(cx_strcmp(list[0], test), 0); + cxFree(&alloc, list); + + /* subsequent encounter of delimiter (the string between is empty) */ + n = cx_strsplit_a(&alloc, test, cx_str("is,"), capa, &list); + ASSERT_EQ(n, 3); + EXPECT_EQ(cx_strcmp(list[0], cx_str("th")), 0); + EXPECT_EQ(cx_strcmp(list[1], cx_str("")), 0); + EXPECT_EQ(cx_strcmp(list[2], cx_str("a,csv,string")), 0); + cxFree(&alloc, list); + + EXPECT_TRUE(alloc.verify()); +} + +TEST(String, strtrim) { + cxstring t1 = cx_strtrim(cx_str(" ein test \t ")); + cxstring t2 = cx_strtrim(cx_str("abc")); + cxstring t3 = cx_strtrim(cx_str(" 123")); + cxstring t4 = cx_strtrim(cx_str("xyz ")); + cxstring t5 = cx_strtrim(cx_str(" ")); + cxstring empty = cx_strtrim(cx_str("")); + + EXPECT_EQ(cx_strcmp(t1, cx_str("ein test")), 0); + EXPECT_EQ(cx_strcmp(t2, cx_str("abc")), 0); + EXPECT_EQ(cx_strcmp(t3, cx_str("123")), 0); + EXPECT_EQ(cx_strcmp(t4, cx_str("xyz")), 0); + EXPECT_EQ(cx_strcmp(t5, cx_str("")), 0); + EXPECT_EQ(cx_strcmp(empty, cx_str("")), 0); +} + +TEST(String, strprefix) { + cxstring str = CX_STR("test my prefix and my suffix"); + cxstring empty = CX_STR(""); + EXPECT_FALSE(cx_strprefix(empty, cx_str("pref"))); + EXPECT_TRUE(cx_strprefix(str, empty)); + EXPECT_TRUE(cx_strprefix(empty, empty)); + EXPECT_TRUE(cx_strprefix(str, cx_str("test "))); + EXPECT_FALSE(cx_strprefix(str, cx_str("8-) fsck "))); +} + +TEST(String, strsuffix) { + cxstring str = CX_STR("test my prefix and my suffix"); + cxstring empty = CX_STR(""); + EXPECT_FALSE(cx_strsuffix(empty, cx_str("suf"))); + EXPECT_TRUE(cx_strsuffix(str, empty)); + EXPECT_TRUE(cx_strsuffix(empty, empty)); + EXPECT_TRUE(cx_strsuffix(str, cx_str("fix"))); + EXPECT_FALSE(cx_strsuffix(str, cx_str("fox"))); +} + +TEST(String, strcaseprefix) { + cxstring str = CX_STR("test my prefix and my suffix"); + cxstring empty = CX_STR(""); + EXPECT_FALSE(cx_strcaseprefix(empty, cx_str("pREf"))); + EXPECT_TRUE(cx_strcaseprefix(str, empty)); + EXPECT_TRUE(cx_strcaseprefix(empty, empty)); + EXPECT_TRUE(cx_strcaseprefix(str, cx_str("TEST "))); + EXPECT_FALSE(cx_strcaseprefix(str, cx_str("8-) fsck "))); +} + +TEST(String, strcasesuffix) { + cxstring str = CX_STR("test my prefix and my suffix"); + cxstring empty = CX_STR(""); + EXPECT_FALSE(cx_strcasesuffix(empty, cx_str("sUf"))); + EXPECT_TRUE(cx_strcasesuffix(str, empty)); + EXPECT_TRUE(cx_strcasesuffix(empty, empty)); + EXPECT_TRUE(cx_strcasesuffix(str, cx_str("FIX"))); + EXPECT_FALSE(cx_strcasesuffix(str, cx_str("fox"))); +} + +TEST(String, strreplace) { + cxstring str = CX_STR("test ababab string aba"); + cxstring longstr = CX_STR( + "xyaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaacd"); + cxstring notrail = CX_STR("test abab"); + cxstring empty = CX_STR(""); + cxstring astr = CX_STR("aaaaaaaaaa"); + cxstring csstr = CX_STR("test AB ab TEST xyz"); + + cxmutstr repl = cx_strreplace(str, cx_str("abab"), cx_str("muchlonger")); + cxstring expected = CX_STR("test muchlongerab string aba"); + + cxmutstr repln = cx_strreplacen(str, cx_str("ab"), cx_str("c"), 2); + cxstring expectedn = CX_STR("test ccab string aba"); + + cxmutstr longrepl = cx_strreplace(longstr, cx_str("a"), cx_str("z")); + cxstring longexpect = CX_STR( + "xyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzcd"); + + cxmutstr replnotrail = cx_strreplace(notrail, cx_str("ab"), cx_str("z")); + cxstring notrailexpect = CX_STR("test zz"); + + cxmutstr repleq = cx_strreplace(str, str, cx_str("hello")); + cxstring eqexpect = CX_STR("hello"); + + cxmutstr replempty1 = cx_strreplace(empty, cx_str("ab"), cx_str("c")); // expect: empty + cxmutstr replempty2 = cx_strreplace(str, cx_str("abab"), empty); + cxstring emptyexpect2 = CX_STR("test ab string aba"); + + cxmutstr replpre = cx_strreplace(str, cx_str("test "), cx_str("TEST ")); + cxstring preexpected = CX_STR("TEST ababab string aba"); + + cxmutstr replan1 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 1); + cxstring an1expected = CX_STR("xaaaaaaaaa"); + + cxmutstr replan4 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 4); + cxstring an4expected = CX_STR("xxxxaaaaaa"); + + cxmutstr replan9 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 9); + cxstring an9expected = CX_STR("xxxxxxxxxa"); + + cxmutstr replan10 = cx_strreplacen(astr, cx_str("a"), cx_str("x"), 10); + cxstring an10expected = CX_STR("xxxxxxxxxx"); + + cxmutstr replcs1 = cx_strreplace(csstr, cx_str("AB"), cx_str("*")); + cxstring cs1expected = CX_STR("test * ab TEST xyz"); + + cxmutstr replcs2 = cx_strreplace(csstr, cx_str("test"), cx_str("TEST")); + cxstring cs2expected = CX_STR("TEST AB ab TEST xyz"); + + + EXPECT_NE(repl.ptr, str.ptr); + EXPECT_EQ(cx_strcmp(cx_strcast(repl), expected), 0); + EXPECT_NE(repln.ptr, str.ptr); + EXPECT_EQ(cx_strcmp(cx_strcast(repln), expectedn), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(longrepl), longexpect), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replnotrail), notrailexpect), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(repleq), eqexpect), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replempty1), empty), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replempty2), emptyexpect2), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replpre), preexpected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replan1), an1expected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replan4), an4expected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replan9), an9expected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replan10), an10expected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replcs1), cs1expected), 0); + EXPECT_EQ(cx_strcmp(cx_strcast(replcs2), cs2expected), 0); + + cx_strfree(&repl); + cx_strfree(&repln); + cx_strfree(&longrepl); + cx_strfree(&replnotrail); + cx_strfree(&repleq); + cx_strfree(&replempty1); + cx_strfree(&replempty2); + cx_strfree(&replpre); + cx_strfree(&replan1); + cx_strfree(&replan4); + cx_strfree(&replan9); + cx_strfree(&replan10); + cx_strfree(&replcs1); + cx_strfree(&replcs2); +}