2012-10-02
added sstrsplit function
test/Makefile | file | annotate | diff | comparison | revisions | |
test/main.c | file | annotate | diff | comparison | revisions | |
test/string_tests.c | file | annotate | diff | comparison | revisions | |
test/string_tests.h | file | annotate | diff | comparison | revisions | |
ucx/string.c | file | annotate | diff | comparison | revisions | |
ucx/string.h | file | annotate | diff | comparison | revisions |
--- a/test/Makefile Tue Oct 02 11:18:47 2012 +0200 +++ b/test/Makefile Tue Oct 02 13:43:17 2012 +0200 @@ -28,7 +28,7 @@ include ../$(CONF).mk -SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c +SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c string_tests.c OBJ = $(SRC:%.c=../build/%.$(OBJ_EXT))
--- a/test/main.c Tue Oct 02 11:18:47 2012 +0200 +++ b/test/main.c Tue Oct 02 13:43:17 2012 +0200 @@ -35,7 +35,7 @@ #include "list_tests.h" #include "dlist_tests.h" - +#include "string_tests.h" #include "mpool_tests.h" #include "map_tests.h" @@ -148,6 +148,9 @@ ucx_test_register(suite, test_ucx_map_iterator); ucx_test_register(suite, test_ucx_map_iterator_chain); + /* sstring Tests */ + ucx_test_register(suite, test_sstrsplit); + ucx_test_run(suite, stdout); ucx_test_suite_free(suite);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/string_tests.c Tue Oct 02 13:43:17 2012 +0200 @@ -0,0 +1,131 @@ +/* + * + */ + +#include "string_tests.h" + +UCX_TEST_IMPLEMENT(test_sstrsplit) { + + const char *original = "this,is,a,csv,string"; + sstr_t test = sstr("this,is,a,csv,string"); /* use copy of original here */ + size_t n; + sstr_t *list; + + UCX_TEST_BEGIN + + /* Nullpointer check */ + n = 0; + UCX_TEST_ASSERT(sstrsplit(test, ST(""), &n) == NULL, + "empty delimiter must return NULL"); + + /* no delimiter occurence (ndo) */ + n = 0; + list = sstrsplit(test, ST("z"), &n); + UCX_TEST_ASSERT(n == 1, "ndo, list length must be 1"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "ndo, " + "original string shall be returned as single list element"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "ndo, original has been modified"); + free(list); + + /* partially matching delimiter (pmd) */ + n = 0; + list = sstrsplit(test, ST("stringbuilder"), &n); + UCX_TEST_ASSERT(n == 1, "pmd, list length must be 1"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "pmd, " + "original string shall be returned as single list element"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "pmd, original has been modified"); + free(list); + + /* matching single-char delimiter (mscd) */ + n = 0; + list = sstrsplit(test, ST(","), &n); + UCX_TEST_ASSERT(n == 5, "mscd, list length must be 5"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "mscd, item 0 mismatch"); + UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "mscd, item 1 mismatch"); + UCX_TEST_ASSERT(strcmp(list[2].ptr, "a") == 0, "mscd, item 2 mismatch"); + UCX_TEST_ASSERT(strcmp(list[3].ptr, "csv") == 0, "mscd, item 3 mismatch"); + UCX_TEST_ASSERT(strcmp(list[4].ptr, "string")==0, "mscd, item 4 mismatch"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "mscd, original has been modified"); + free(list); + + /* matching multi-char delimiter (mmcd) */ + n = 0; + list = sstrsplit(test, ST("is"), &n); + UCX_TEST_ASSERT(n == 3, "mscd, list length must be 3"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "mmcd, item 0 mismatch"); + UCX_TEST_ASSERT(strcmp(list[1].ptr, ",") == 0, "mmcd, item 1 mismatch"); + UCX_TEST_ASSERT(strcmp(list[2].ptr, ",a,csv,string") == 0, + "mmcd, item 2 mismatch"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "mmcd, original has been modified"); + free(list); + + /* bounded list using single-char delimiter (blsc) */ + n = 3; + list = sstrsplit(test, ST(","), &n); + UCX_TEST_ASSERT(n == 3, "blsc, list length must be 3"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "blsc, item 0 mismatch"); + UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "blsc, item 1 mismatch"); + UCX_TEST_ASSERT(strcmp(list[2].ptr, "a,csv,string") == 0, + "blsc, item 2 mismatch"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "blsc, original has been modified"); + free(list); + + /* bounded list using multi-char delimiter (blmc) */ + n = 2; + list = sstrsplit(test, ST("is"), &n); + UCX_TEST_ASSERT(n == 2, "blmc, list length must be 2"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "blmc, item 0 mismatch"); + UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0, + "blmc, item 1 mismatch"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "blmc, original has been modified"); + free(list); + + /* start with delimiter (swd) */ + n = 0; + list = sstrsplit(test, ST("this"), &n); + UCX_TEST_ASSERT(n == 2, "swd, list length must be 2"); + UCX_TEST_ASSERT(list[0].length == 0, "swd, first item must be empty"); + UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0, + "swd, second item corrupt"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "swd, original has been modified"); + free(list); + + /* end with delimiter (ewd) */ + n = 0; + list = sstrsplit(test, ST("string"), &n); + UCX_TEST_ASSERT(n == 2, "ewd, list length must be 2"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, "this,is,a,csv,") == 0, + "swd, first item corrupt"); + UCX_TEST_ASSERT(list[1].length == 0, "ewd, second item must be empty"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "ewd, original has been modified"); + free(list); + + /* exact match (exm) */ + n = 0; + list = sstrsplit(test, ST("this,is,a,csv,string"), &n); + UCX_TEST_ASSERT(n == 1, "exm, list length must be 1"); + UCX_TEST_ASSERT(list[0].length == 0, "exm, single item must be empty"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "exm, original has been modified"); + free(list); + + /* substring (subs) */ + n = 0; + list = sstrsplit(test, ST("this,is,a,csv,string,with,extension"), &n); + UCX_TEST_ASSERT(n == 1, "subs, list length must be 1"); + UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, + "subs, single item must be the original string"); + UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0, + "subs, original has been modified"); + free(list); + + UCX_TEST_END +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/string_tests.h Tue Oct 02 13:43:17 2012 +0200 @@ -0,0 +1,22 @@ +/* + * + */ + +#ifndef STRING_TESTS_H +#define STRING_TESTS_H + +#include "ucx/test.h" +#include "ucx/string.h" + +#ifdef __cplusplus +extern "C" { +#endif + +UCX_TEST_DECLARE(test_sstrsplit) + +#ifdef __cplusplus +} +#endif + +#endif /* MPOOL_TESTS_H */ +
--- a/ucx/string.c Tue Oct 02 11:18:47 2012 +0200 +++ b/ucx/string.c Tue Oct 02 13:43:17 2012 +0200 @@ -86,6 +86,62 @@ return new_sstr; } +sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n) { + if (d.length == 0) { + return NULL; + } + + sstr_t* result; + size_t nmax = *n; + *n = 1; + + /* special case: exact match - no processing needed */ + if (s.length == d.length && strncmp(s.ptr, d.ptr, s.length) == 0) { + result = malloc(sizeof(sstr_t)); + result[0] = sstrn("", 0); + return result; + } + sstr_t sv = sstrdup(s); + + for (int i = 0 ; i < s.length ; i++) { + if (sv.ptr[i] == d.ptr[0]) { + _Bool match = 1; + for (int j = 1 ; j < d.length ; j++) { + if (j+i < s.length) { + match &= (sv.ptr[i+j] == d.ptr[j]); + } else { + match = 0; + break; + } + } + if (match) { + (*n)++; + for (int j = 0 ; j < d.length ; j++) { + sv.ptr[i+j] = 0; + } + i += d.length; + } + } + if ((*n) == nmax) break; + } + result = malloc(sizeof(sstr_t) * (*n)); + + char *pptr = sv.ptr; + for (int i = 0 ; i < *n ; i++) { + size_t l = strlen(pptr); + char* ptr = malloc(l + 1); + memcpy(ptr, pptr, l); + ptr[l] = 0; + + result[i] = sstrn(ptr, l); + pptr += l + d.length; + } + + free(sv.ptr); + + return result; +} + int sstrcmp(sstr_t s1, sstr_t s2) { return strncmp(s1.ptr, s2.ptr, s1.length>s2.length ? s2.length: s1.length); }
--- a/ucx/string.h Tue Oct 02 11:18:47 2012 +0200 +++ b/ucx/string.h Tue Oct 02 13:43:17 2012 +0200 @@ -68,6 +68,23 @@ */ sstr_t sstrsubsl (sstr_t s, size_t start, size_t end); +/* + * splits s into n parts + * + * s the string to split + * d the delimiter string + * n the maximum size of the resulting list + * a size of 0 indicates an unbounded list size + * the actual size of the list will be stored here + * + * Hint: use this value to avoid dynamic reallocation of the result list + * + * Returns a list of the split strings + * NOTE: this list needs to be freed manually after usage + * + * Returns NULL on error + */ +sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n); int sstrcmp(sstr_t s1, sstr_t s2);