added sstrsplit function

Tue, 02 Oct 2012 13:43:17 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 02 Oct 2012 13:43:17 +0200
changeset 39
bf8ab7bb74ff
parent 38
35f67a8ef875
child 40
583718dd4cf3

added sstrsplit function

test/Makefile file | annotate | diff | comparison | revisions
test/main.c file | annotate | diff | comparison | revisions
test/string_tests.c file | annotate | diff | comparison | revisions
test/string_tests.h file | annotate | diff | comparison | revisions
ucx/string.c file | annotate | diff | comparison | revisions
ucx/string.h file | annotate | diff | comparison | revisions
     1.1 --- a/test/Makefile	Tue Oct 02 11:18:47 2012 +0200
     1.2 +++ b/test/Makefile	Tue Oct 02 13:43:17 2012 +0200
     1.3 @@ -28,7 +28,7 @@
     1.4  
     1.5  include ../$(CONF).mk
     1.6  
     1.7 -SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c
     1.8 +SRC = main.c list_tests.c dlist_tests.c mpool_tests.c map_tests.c string_tests.c
     1.9  
    1.10  OBJ = $(SRC:%.c=../build/%.$(OBJ_EXT))
    1.11  
     2.1 --- a/test/main.c	Tue Oct 02 11:18:47 2012 +0200
     2.2 +++ b/test/main.c	Tue Oct 02 13:43:17 2012 +0200
     2.3 @@ -35,7 +35,7 @@
     2.4  
     2.5  #include "list_tests.h"
     2.6  #include "dlist_tests.h"
     2.7 -
     2.8 +#include "string_tests.h"
     2.9  #include "mpool_tests.h"
    2.10  #include "map_tests.h"
    2.11  
    2.12 @@ -148,6 +148,9 @@
    2.13          ucx_test_register(suite, test_ucx_map_iterator);
    2.14          ucx_test_register(suite, test_ucx_map_iterator_chain);
    2.15          
    2.16 +        /* sstring Tests */
    2.17 +        ucx_test_register(suite, test_sstrsplit);
    2.18 +
    2.19          ucx_test_run(suite, stdout);
    2.20          ucx_test_suite_free(suite);
    2.21          
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/test/string_tests.c	Tue Oct 02 13:43:17 2012 +0200
     3.3 @@ -0,0 +1,131 @@
     3.4 +/*
     3.5 + *
     3.6 + */
     3.7 +
     3.8 +#include "string_tests.h"
     3.9 +
    3.10 +UCX_TEST_IMPLEMENT(test_sstrsplit) {
    3.11 +
    3.12 +    const char *original = "this,is,a,csv,string";
    3.13 +    sstr_t test = sstr("this,is,a,csv,string"); /* use copy of original here */
    3.14 +    size_t n;
    3.15 +    sstr_t *list;
    3.16 +
    3.17 +    UCX_TEST_BEGIN
    3.18 +
    3.19 +    /* Nullpointer check */
    3.20 +    n = 0;
    3.21 +    UCX_TEST_ASSERT(sstrsplit(test, ST(""), &n) == NULL,
    3.22 +            "empty delimiter must return NULL");
    3.23 +
    3.24 +    /* no delimiter occurence (ndo) */
    3.25 +    n = 0;
    3.26 +    list = sstrsplit(test, ST("z"), &n);
    3.27 +    UCX_TEST_ASSERT(n == 1, "ndo, list length must be 1");
    3.28 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "ndo, "
    3.29 +            "original string shall be returned as single list element");
    3.30 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.31 +                "ndo, original has been modified");
    3.32 +    free(list);
    3.33 +
    3.34 +    /* partially matching delimiter (pmd) */
    3.35 +    n = 0;
    3.36 +    list = sstrsplit(test, ST("stringbuilder"), &n);
    3.37 +    UCX_TEST_ASSERT(n == 1, "pmd, list length must be 1");
    3.38 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0, "pmd, "
    3.39 +            "original string shall be returned as single list element");
    3.40 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.41 +                "pmd, original has been modified");
    3.42 +    free(list);
    3.43 +
    3.44 +    /* matching single-char delimiter (mscd) */
    3.45 +    n = 0;
    3.46 +    list = sstrsplit(test, ST(","), &n);
    3.47 +    UCX_TEST_ASSERT(n == 5, "mscd, list length must be 5");
    3.48 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "mscd, item 0 mismatch");
    3.49 +    UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "mscd, item 1 mismatch");
    3.50 +    UCX_TEST_ASSERT(strcmp(list[2].ptr, "a") == 0, "mscd, item 2 mismatch");
    3.51 +    UCX_TEST_ASSERT(strcmp(list[3].ptr, "csv") == 0, "mscd, item 3 mismatch");
    3.52 +    UCX_TEST_ASSERT(strcmp(list[4].ptr, "string")==0, "mscd, item 4 mismatch");
    3.53 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.54 +            "mscd, original has been modified");
    3.55 +    free(list);
    3.56 +
    3.57 +    /* matching multi-char delimiter (mmcd) */
    3.58 +    n = 0;
    3.59 +    list = sstrsplit(test, ST("is"), &n);
    3.60 +    UCX_TEST_ASSERT(n == 3, "mscd, list length must be 3");
    3.61 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "mmcd, item 0 mismatch");
    3.62 +    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",") == 0, "mmcd, item 1 mismatch");
    3.63 +    UCX_TEST_ASSERT(strcmp(list[2].ptr, ",a,csv,string") == 0,
    3.64 +            "mmcd, item 2 mismatch");
    3.65 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.66 +            "mmcd, original has been modified");
    3.67 +    free(list);
    3.68 +
    3.69 +    /* bounded list using single-char delimiter (blsc) */
    3.70 +    n = 3;
    3.71 +    list = sstrsplit(test, ST(","), &n);
    3.72 +    UCX_TEST_ASSERT(n == 3, "blsc, list length must be 3");
    3.73 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this") == 0, "blsc, item 0 mismatch");
    3.74 +    UCX_TEST_ASSERT(strcmp(list[1].ptr, "is") == 0, "blsc, item 1 mismatch");
    3.75 +    UCX_TEST_ASSERT(strcmp(list[2].ptr, "a,csv,string") == 0,
    3.76 +            "blsc, item 2 mismatch");
    3.77 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.78 +            "blsc, original has been modified");
    3.79 +    free(list);
    3.80 +
    3.81 +    /* bounded list using multi-char delimiter (blmc) */
    3.82 +    n = 2;
    3.83 +    list = sstrsplit(test, ST("is"), &n);
    3.84 +    UCX_TEST_ASSERT(n == 2, "blmc, list length must be 2");
    3.85 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, "th") == 0, "blmc, item 0 mismatch");
    3.86 +    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0,
    3.87 +            "blmc, item 1 mismatch");
    3.88 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
    3.89 +            "blmc, original has been modified");
    3.90 +    free(list);
    3.91 +
    3.92 +    /* start with delimiter (swd) */
    3.93 +    n = 0;
    3.94 +    list = sstrsplit(test, ST("this"), &n);
    3.95 +    UCX_TEST_ASSERT(n == 2, "swd, list length must be 2");
    3.96 +    UCX_TEST_ASSERT(list[0].length == 0, "swd, first item must be empty");
    3.97 +    UCX_TEST_ASSERT(strcmp(list[1].ptr, ",is,a,csv,string") == 0,
    3.98 +            "swd, second item corrupt");
    3.99 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
   3.100 +            "swd, original has been modified");
   3.101 +    free(list);
   3.102 +
   3.103 +    /* end with delimiter (ewd) */
   3.104 +    n = 0;
   3.105 +    list = sstrsplit(test, ST("string"), &n);
   3.106 +    UCX_TEST_ASSERT(n == 2, "ewd, list length must be 2");
   3.107 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, "this,is,a,csv,") == 0,
   3.108 +            "swd, first item corrupt");
   3.109 +    UCX_TEST_ASSERT(list[1].length == 0, "ewd, second item must be empty");
   3.110 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
   3.111 +            "ewd, original has been modified");
   3.112 +    free(list);
   3.113 +
   3.114 +    /* exact match (exm) */
   3.115 +    n = 0;
   3.116 +    list = sstrsplit(test, ST("this,is,a,csv,string"), &n);
   3.117 +    UCX_TEST_ASSERT(n == 1, "exm, list length must be 1");
   3.118 +    UCX_TEST_ASSERT(list[0].length == 0, "exm, single item must be empty");
   3.119 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
   3.120 +            "exm, original has been modified");
   3.121 +    free(list);
   3.122 +
   3.123 +    /* substring (subs) */
   3.124 +    n = 0;
   3.125 +    list = sstrsplit(test, ST("this,is,a,csv,string,with,extension"), &n);
   3.126 +    UCX_TEST_ASSERT(n == 1, "subs, list length must be 1");
   3.127 +    UCX_TEST_ASSERT(strcmp(list[0].ptr, original) == 0,
   3.128 +            "subs, single item must be the original string");
   3.129 +    UCX_TEST_ASSERT(strcmp(test.ptr, original) == 0,
   3.130 +            "subs, original has been modified");
   3.131 +    free(list);
   3.132 +
   3.133 +    UCX_TEST_END
   3.134 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/test/string_tests.h	Tue Oct 02 13:43:17 2012 +0200
     4.3 @@ -0,0 +1,22 @@
     4.4 +/* 
     4.5 + *
     4.6 + */
     4.7 +
     4.8 +#ifndef STRING_TESTS_H
     4.9 +#define	STRING_TESTS_H
    4.10 +
    4.11 +#include "ucx/test.h"
    4.12 +#include "ucx/string.h"
    4.13 +
    4.14 +#ifdef	__cplusplus
    4.15 +extern "C" {
    4.16 +#endif
    4.17 +
    4.18 +UCX_TEST_DECLARE(test_sstrsplit)
    4.19 +
    4.20 +#ifdef	__cplusplus
    4.21 +}
    4.22 +#endif
    4.23 +
    4.24 +#endif	/* MPOOL_TESTS_H */
    4.25 +
     5.1 --- a/ucx/string.c	Tue Oct 02 11:18:47 2012 +0200
     5.2 +++ b/ucx/string.c	Tue Oct 02 13:43:17 2012 +0200
     5.3 @@ -86,6 +86,62 @@
     5.4      return new_sstr;
     5.5  }
     5.6  
     5.7 +sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n) {
     5.8 +    if (d.length == 0) {
     5.9 +        return NULL;
    5.10 +    }
    5.11 +
    5.12 +    sstr_t* result;
    5.13 +    size_t nmax = *n;
    5.14 +    *n = 1;
    5.15 +
    5.16 +    /* special case: exact match - no processing needed */
    5.17 +    if (s.length == d.length && strncmp(s.ptr, d.ptr, s.length) == 0) {
    5.18 +        result = malloc(sizeof(sstr_t));
    5.19 +        result[0] = sstrn("", 0);
    5.20 +        return result;
    5.21 +    }
    5.22 +    sstr_t sv = sstrdup(s);
    5.23 +
    5.24 +    for (int i = 0 ; i < s.length ; i++) {
    5.25 +        if (sv.ptr[i] == d.ptr[0]) {
    5.26 +            _Bool match = 1;
    5.27 +            for (int j = 1 ; j < d.length ; j++) {
    5.28 +                if (j+i < s.length) {
    5.29 +                    match &= (sv.ptr[i+j] == d.ptr[j]);
    5.30 +                } else {
    5.31 +                    match = 0;
    5.32 +                    break;
    5.33 +                }
    5.34 +            }
    5.35 +            if (match) {
    5.36 +                (*n)++;
    5.37 +                for (int j = 0 ; j < d.length ; j++) {
    5.38 +                    sv.ptr[i+j] = 0;
    5.39 +                }
    5.40 +                i += d.length;
    5.41 +            }
    5.42 +        }
    5.43 +        if ((*n) == nmax) break;
    5.44 +    }
    5.45 +    result = malloc(sizeof(sstr_t) * (*n));
    5.46 +
    5.47 +    char *pptr = sv.ptr;
    5.48 +    for (int i = 0 ; i < *n ; i++) {
    5.49 +        size_t l = strlen(pptr);
    5.50 +        char* ptr = malloc(l + 1);
    5.51 +        memcpy(ptr, pptr, l);
    5.52 +        ptr[l] = 0;
    5.53 +
    5.54 +        result[i] = sstrn(ptr, l);
    5.55 +        pptr += l + d.length;
    5.56 +    }
    5.57 +
    5.58 +    free(sv.ptr);
    5.59 +
    5.60 +    return result;
    5.61 +}
    5.62 +
    5.63  int sstrcmp(sstr_t s1, sstr_t s2) {
    5.64      return strncmp(s1.ptr, s2.ptr, s1.length>s2.length ? s2.length: s1.length);
    5.65  }
     6.1 --- a/ucx/string.h	Tue Oct 02 11:18:47 2012 +0200
     6.2 +++ b/ucx/string.h	Tue Oct 02 13:43:17 2012 +0200
     6.3 @@ -68,6 +68,23 @@
     6.4   */
     6.5  sstr_t sstrsubsl (sstr_t s, size_t start, size_t end);
     6.6  
     6.7 +/*
     6.8 + * splits s into n parts
     6.9 + *
    6.10 + * s    the string to split
    6.11 + * d    the delimiter string
    6.12 + * n    the maximum size of the resulting list
    6.13 + *      a size of 0 indicates an unbounded list size
    6.14 + *      the actual size of the list will be stored here
    6.15 + *
    6.16 + *      Hint: use this value to avoid dynamic reallocation of the result list
    6.17 + *
    6.18 + * Returns a list of the split strings
    6.19 + * NOTE: this list needs to be freed manually after usage
    6.20 + *
    6.21 + * Returns NULL on error
    6.22 + */
    6.23 +sstr_t* sstrsplit(sstr_t s, sstr_t d, size_t *n);
    6.24  
    6.25  int sstrcmp(sstr_t s1, sstr_t s2);
    6.26  

mercurial