reimplementation of sstrsplit

Mon, 20 Feb 2017 16:57:09 +0100

author
Mike Becker <universe@uap-core.de>
date
Mon, 20 Feb 2017 16:57:09 +0100
changeset 233
bd58fdde142d
parent 232
5f2d650eade7
child 234
7a63b4986b5b

reimplementation of sstrsplit

test/string_tests.c file | annotate | diff | comparison | revisions
ucx/string.c file | annotate | diff | comparison | revisions
ucx/string.h file | annotate | diff | comparison | revisions
     1.1 --- a/test/string_tests.c	Mon Feb 20 16:04:14 2017 +0100
     1.2 +++ b/test/string_tests.c	Mon Feb 20 16:57:09 2017 +0100
     1.3 @@ -299,7 +299,7 @@
     1.4      UCX_TEST_ASSERT(n == 3, "ewdeb, list length must be 3");
     1.5      UCX_TEST_ASSERT(strcmp(list[0].ptr, "a") == 0, "ewdeb, fst item corrupt");
     1.6      UCX_TEST_ASSERT(strcmp(list[1].ptr, "b") == 0, "ewdeb, snd item corrupt");
     1.7 -    UCX_TEST_ASSERT(strcmp(list[2].ptr, "c") == 0, "ewdeb, trd item corrupt");
     1.8 +    UCX_TEST_ASSERT(strcmp(list[2].ptr, "c,") == 0, "ewdeb, trd item corrupt");
     1.9      for(int i=0;i<n;i++) {
    1.10          free(list[i].ptr);
    1.11      }
     2.1 --- a/ucx/string.c	Mon Feb 20 16:04:14 2017 +0100
     2.2 +++ b/ucx/string.c	Mon Feb 20 16:57:09 2017 +0100
     2.3 @@ -212,62 +212,58 @@
     2.4          } else /* no match possible */ {
     2.5              *n = 1;
     2.6              sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
     2.7 -            result->ptr = (char*) almalloc(allocator, 1+s.length);
     2.8 -            memcpy(result->ptr, s.ptr, s.length);
     2.9 -            result->ptr[s.length] = '\0';
    2.10 -            result->length = s.length;
    2.11 +            *result = sstrdup_a(allocator, s);
    2.12              return result;
    2.13          }
    2.14      }
    2.15      
    2.16 -    sstr_t* result;
    2.17      ssize_t nmax = *n;
    2.18 -    *n = 1;
    2.19 -    
    2.20 -    sstr_t sv = sstrdup(s);
    2.21 -    if (sv.length == 0) {
    2.22 -        *n = -2;
    2.23 -        return NULL;
    2.24 -    }
    2.25 -
    2.26 -    for (size_t i = 0 ; i < s.length ; i++) {
    2.27 -        sstr_t substr = sstrsubs(sv, i);
    2.28 -        if (sstrprefix(substr, d)) {
    2.29 -            (*n)++;
    2.30 -            for (size_t j = 0 ; j < d.length ; j++) {
    2.31 -                sv.ptr[i+j] = 0;
    2.32 -            }
    2.33 -            i += d.length - 1; // -1, because the loop will do a i++
    2.34 -        }
    2.35 -        if ((*n) == nmax) break;
    2.36 -    }
    2.37 -    result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n));
    2.38 +    sstr_t* result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
    2.39  
    2.40      if (result) {
    2.41 -        char *pptr = sv.ptr;
    2.42 -        for (ssize_t i = 0 ; i < *n ; i++) {
    2.43 -            size_t l = strlen(pptr);
    2.44 -            char* ptr = (char*) almalloc(allocator, l + 1);
    2.45 -            if (ptr) {
    2.46 -                memcpy(ptr, pptr, l);
    2.47 -                ptr[l] = '\0';
    2.48 +        sstr_t curpos = s;
    2.49 +        ssize_t j = 1;
    2.50 +        while (1) {
    2.51 +            sstr_t match = sstrstr(curpos, d);
    2.52 +            if (match.length > 0) {
    2.53 +                /* is this our last try? */
    2.54 +                if (nmax == 0 || j < nmax) {
    2.55 +                    /* copy the current string to the array */
    2.56 +                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
    2.57 +                    result[j-1] = sstrdup_a(allocator, item);
    2.58 +                    size_t processed = item.length + d.length;
    2.59 +                    curpos.ptr += processed;
    2.60 +                    curpos.length -= processed;
    2.61  
    2.62 -                result[i] = sstrn(ptr, l);
    2.63 -                pptr += l + d.length;
    2.64 +                    /* allocate memory for the next string */
    2.65 +                    j++;
    2.66 +                    sstr_t* reallocated = (sstr_t*)
    2.67 +                            alrealloc(allocator, result, j*sizeof(sstr_t));
    2.68 +                    if (reallocated) {
    2.69 +                        result = reallocated;
    2.70 +                    } else {
    2.71 +                        for (ssize_t i = 0 ; i < j-1 ; i++) {
    2.72 +                            alfree(allocator, result[i].ptr);
    2.73 +                        }
    2.74 +                        alfree(allocator, result);
    2.75 +                        *n = -2;
    2.76 +                        return NULL;
    2.77 +                    }
    2.78 +                } else {
    2.79 +                    /* nmax reached, copy the _full_ remaining string */
    2.80 +                    result[j-1] = sstrdup_a(allocator, curpos);
    2.81 +                    break;
    2.82 +                }
    2.83              } else {
    2.84 -                for (ssize_t j = i-1 ; j >= 0 ; j--) {
    2.85 -                    alfree(allocator, result[j].ptr);
    2.86 -                }
    2.87 -                alfree(allocator, result);
    2.88 -                *n = -2;
    2.89 +                /* no more matches, copy last string */
    2.90 +                result[j-1] = sstrdup_a(allocator, curpos);
    2.91                  break;
    2.92              }
    2.93          }
    2.94 +        *n = j;
    2.95      } else {
    2.96          *n = -2;
    2.97      }
    2.98 -    
    2.99 -    free(sv.ptr);
   2.100  
   2.101      return result;
   2.102  }
     3.1 --- a/ucx/string.h	Mon Feb 20 16:04:14 2017 +0100
     3.2 +++ b/ucx/string.h	Mon Feb 20 16:57:09 2017 +0100
     3.3 @@ -260,6 +260,9 @@
     3.4   * 
     3.5   * If the string ends with the delimiter and the maximum list size is not
     3.6   * exceeded, the last array item will be an empty string.
     3.7 + * In case the list size would be exceeded, the last array item will be the
     3.8 + * remaining string after the last split, <i>including</i> the terminating
     3.9 + * delimiter.
    3.10   * 
    3.11   * <b>Attention:</b> The array pointer <b>AND</b> all sstr_t.ptr of the array
    3.12   * items must be manually passed to <code>free()</code>. Use sstrsplit_a() with

mercurial