added sstrsplit function

2012-10-02

author
Mike Becker <universe@uap-core.de>
date
Tue, 02 Oct 2012 13:43:17 +0200 (2012-10-02)
changeset 39
bf8ab7bb74ff
parent 38
35f67a8ef875
child 40
583718dd4cf3

added sstrsplit function

test/Makefile file | annotate | diff | comparison | revisions
test/main.c file | annotate | diff | comparison | revisions
test/string_tests.c file | annotate | diff | comparison | revisions
test/string_tests.h file | annotate | diff | comparison | revisions
ucx/string.c file | annotate | diff | comparison | revisions
ucx/string.h file | annotate | diff | comparison | revisions
--- a/test/Makefile	Tue Oct 02 11:18:47 2012 +0200
+++ b/test/Makefile	Tue Oct 02 13:43:17 2012 +0200
@@ -28,7 +28,7 @@
 
 include ../$(CONF).mk
 
-SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c
+SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c string_tests.c
 
 OBJ = $(SRC:%.c=../build/%.$(OBJ_EXT))
 
--- a/test/main.c	Tue Oct 02 11:18:47 2012 +0200
+++ b/test/main.c	Tue Oct 02 13:43:17 2012 +0200
@@ -35,7 +35,7 @@
 
 #include "list_tests.h"
 #include "dlist_tests.h"
-
+#include "string_tests.h"
 #include "mpool_tests.h"
 #include "map_tests.h"
 
@@ -148,6 +148,9 @@
         ucx_test_register(suite, test_ucx_map_iterator);
         ucx_test_register(suite, test_ucx_map_iterator_chain);
         
+        /* sstring Tests */
+        ucx_test_register(suite, test_sstrsplit);
+
         ucx_test_run(suite, stdout);
         ucx_test_suite_free(suite);
         
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/string_tests.c	Tue Oct 02 13:43:17 2012 +0200
@@ -0,0 +1,131 @@
+/*
+ *
+ */
+
+#include "string_tests.h"
+
+UCX_TEST_IMPLEMENT(test_sstrsplit) {
+
+    const char *original = "this,is,a,csv,string";
+    sstr_t test = sstr("this,is,a,csv,string"); /* use copy of original here */
+    size_t n;
+    sstr_t *list;
+
+    UCX_TEST_BEGIN
+
+    /* Nullpointer check */
+    n = 0;
+    UCX_TEST_ASSERT(sstrsplit(test, ST(""), &n) == NULL,
+            "empty delimiter must return NULL");
+
+    /* no delimiter occurence (ndo) */
+    n = 0;
+    list = sstrsplit(test, ST("z"), &n);
+    UCX_TEST_ASSERT(n == 1, "ndo, list length must be 1");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "ndo, "
+            "original string shall be returned as single list element");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+                "ndo, original has been modified");
+    free(list);
+
+    /* partially matching delimiter (pmd) */
+    n = 0;
+    list = sstrsplit(test, ST("stringbuilder"), &n);
+    UCX_TEST_ASSERT(n == 1, "pmd, list length must be 1");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "pmd, "
+            "original string shall be returned as single list element");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+                "pmd, original has been modified");
+    free(list);
+
+    /* matching single-char delimiter (mscd) */
+    n = 0;
+    list = sstrsplit(test, ST(","), &n);
+    UCX_TEST_ASSERT(n == 5, "mscd, list length must be 5");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "mscd, item 0 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "mscd, item 1 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[2].ptr, "a") == 0, "mscd, item 2 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[3].ptr, "csv") == 0, "mscd, item 3 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[4].ptr, "string")==0, "mscd, item 4 mismatch");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "mscd, original has been modified");
+    free(list);
+
+    /* matching multi-char delimiter (mmcd) */
+    n = 0;
+    list = sstrsplit(test, ST("is"), &n);
+    UCX_TEST_ASSERT(n == 3, "mscd, list length must be 3");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "mmcd, item 0 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",") == 0, "mmcd, item 1 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[2].ptr, ",a,csv,string") == 0,
+            "mmcd, item 2 mismatch");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "mmcd, original has been modified");
+    free(list);
+
+    /* bounded list using single-char delimiter (blsc) */
+    n = 3;
+    list = sstrsplit(test, ST(","), &n);
+    UCX_TEST_ASSERT(n == 3, "blsc, list length must be 3");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "blsc, item 0 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "blsc, item 1 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[2].ptr, "a,csv,string") == 0,
+            "blsc, item 2 mismatch");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "blsc, original has been modified");
+    free(list);
+
+    /* bounded list using multi-char delimiter (blmc) */
+    n = 2;
+    list = sstrsplit(test, ST("is"), &n);
+    UCX_TEST_ASSERT(n == 2, "blmc, list length must be 2");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "blmc, item 0 mismatch");
+    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0,
+            "blmc, item 1 mismatch");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "blmc, original has been modified");
+    free(list);
+
+    /* start with delimiter (swd) */
+    n = 0;
+    list = sstrsplit(test, ST("this"), &n);
+    UCX_TEST_ASSERT(n == 2, "swd, list length must be 2");
+    UCX_TEST_ASSERT(list[0].length == 0, "swd, first item must be empty");
+    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0,
+            "swd, second item corrupt");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "swd, original has been modified");
+    free(list);
+
+    /* end with delimiter (ewd) */
+    n = 0;
+    list = sstrsplit(test, ST("string"), &n);
+    UCX_TEST_ASSERT(n == 2, "ewd, list length must be 2");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this,is,a,csv,") == 0,
+            "swd, first item corrupt");
+    UCX_TEST_ASSERT(list[1].length == 0, "ewd, second item must be empty");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "ewd, original has been modified");
+    free(list);
+
+    /* exact match (exm) */
+    n = 0;
+    list = sstrsplit(test, ST("this,is,a,csv,string"), &n);
+    UCX_TEST_ASSERT(n == 1, "exm, list length must be 1");
+    UCX_TEST_ASSERT(list[0].length == 0, "exm, single item must be empty");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "exm, original has been modified");
+    free(list);
+
+    /* substring (subs) */
+    n = 0;
+    list = sstrsplit(test, ST("this,is,a,csv,string,with,extension"), &n);
+    UCX_TEST_ASSERT(n == 1, "subs, list length must be 1");
+    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0,
+            "subs, single item must be the original string");
+    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
+            "subs, original has been modified");
+    free(list);
+
+    UCX_TEST_END
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/string_tests.h	Tue Oct 02 13:43:17 2012 +0200
@@ -0,0 +1,22 @@
+/* 
+ *
+ */
+
+#ifndef STRING_TESTS_H
+#define	STRING_TESTS_H
+
+#include "ucx/test.h"
+#include "ucx/string.h"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+UCX_TEST_DECLARE(test_sstrsplit)
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* MPOOL_TESTS_H */
+
--- a/ucx/string.c	Tue Oct 02 11:18:47 2012 +0200
+++ b/ucx/string.c	Tue Oct 02 13:43:17 2012 +0200
@@ -86,6 +86,62 @@
     return new_sstr;
 }
 
+sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n) {
+    if (d.length == 0) {
+        return NULL;
+    }
+
+    sstr_t* result;
+    size_t nmax = *n;
+    *n = 1;
+
+    /* special case: exact match - no processing needed */
+    if (s.length == d.length && strncmp(s.ptr, d.ptr, s.length) == 0) {
+        result = malloc(sizeof(sstr_t));
+        result[0] = sstrn("", 0);
+        return result;
+    }
+    sstr_t sv = sstrdup(s);
+
+    for (int i = 0 ; i < s.length ; i++) {
+        if (sv.ptr[i] == d.ptr[0]) {
+            _Bool match = 1;
+            for (int j = 1 ; j < d.length ; j++) {
+                if (j+i < s.length) {
+                    match &= (sv.ptr[i+j] == d.ptr[j]);
+                } else {
+                    match = 0;
+                    break;
+                }
+            }
+            if (match) {
+                (*n)++;
+                for (int j = 0 ; j < d.length ; j++) {
+                    sv.ptr[i+j] = 0;
+                }
+                i += d.length;
+            }
+        }
+        if ((*n) == nmax) break;
+    }
+    result = malloc(sizeof(sstr_t) * (*n));
+
+    char *pptr = sv.ptr;
+    for (int i = 0 ; i < *n ; i++) {
+        size_t l = strlen(pptr);
+        char* ptr = malloc(l + 1);
+        memcpy(ptr, pptr, l);
+        ptr[l] = 0;
+
+        result[i] = sstrn(ptr, l);
+        pptr += l + d.length;
+    }
+
+    free(sv.ptr);
+
+    return result;
+}
+
 int sstrcmp(sstr_t s1, sstr_t s2) {
     return strncmp(s1.ptr, s2.ptr, s1.length>s2.length ? s2.length: s1.length);
 }
--- a/ucx/string.h	Tue Oct 02 11:18:47 2012 +0200
+++ b/ucx/string.h	Tue Oct 02 13:43:17 2012 +0200
@@ -68,6 +68,23 @@
  */
 sstr_t sstrsubsl (sstr_t s, size_t start, size_t end);
 
+/*
+ * splits s into n parts
+ *
+ * s    the string to split
+ * d    the delimiter string
+ * n    the maximum size of the resulting list
+ *      a size of 0 indicates an unbounded list size
+ *      the actual size of the list will be stored here
+ *
+ *      Hint: use this value to avoid dynamic reallocation of the result list
+ *
+ * Returns a list of the split strings
+ * NOTE: this list needs to be freed manually after usage
+ *
+ * Returns NULL on error
+ */
+sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n);
 
 int sstrcmp(sstr_t s1, sstr_t s2);
 

mercurial