ucx/string.c

Mon, 20 Feb 2017 17:28:58 +0100

author
Mike Becker <universe@uap-core.de>
date
Mon, 20 Feb 2017 17:28:58 +0100
changeset 235
7cf1e41833a2
parent 234
7a63b4986b5b
child 236
ffc6d0910342
permissions
-rw-r--r--

reduces amount of realloc calls in sstrsplit

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2016 Olaf Wintermann. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include <stdlib.h>
    30 #include <string.h>
    31 #include <stdarg.h>
    32 #include <ctype.h>
    34 #include "string.h"
    35 #include "allocator.h"
    37 sstr_t sstr(char *cstring) {
    38     sstr_t string;
    39     string.ptr = cstring;
    40     string.length = strlen(cstring);
    41     return string;
    42 }
    44 sstr_t sstrn(char *cstring, size_t length) {
    45     sstr_t string;
    46     string.ptr = cstring;
    47     string.length = length;
    48     return string;
    49 }
    51 size_t sstrnlen(size_t n, sstr_t s, ...) {
    52     va_list ap;
    53     size_t size = s.length;
    54     va_start(ap, s);
    56     for (size_t i = 1 ; i < n ; i++) {
    57         sstr_t str = va_arg(ap, sstr_t);
    58         size += str.length;
    59     }
    60     va_end(ap);
    62     return size;
    63 }
    65 static sstr_t sstrvcat_a(
    66         UcxAllocator *a,
    67         size_t count,
    68         sstr_t s1,
    69         sstr_t s2,
    70         va_list ap) {
    71     sstr_t str;
    72     str.ptr = NULL;
    73     str.length = 0;
    74     if(count < 2) {
    75         return str;
    76     }
    78     sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    79     if(!strings) {
    80         return str;
    81     }
    83     // get all args and overall length
    84     strings[0] = s1;
    85     strings[1] = s2;
    86     size_t strlen = s1.length + s2.length;
    87     for (size_t i=2;i<count;i++) {
    88         sstr_t s = va_arg (ap, sstr_t);
    89         strings[i] = s;
    90         strlen += s.length;
    91     }
    93     // create new string
    94     str.ptr = (char*) almalloc(a, strlen + 1);
    95     str.length = strlen;
    96     if(!str.ptr) {
    97         free(strings);
    98         str.length = 0;
    99         return str;
   100     }
   102     // concatenate strings
   103     size_t pos = 0;
   104     for (size_t i=0;i<count;i++) {
   105         sstr_t s = strings[i];
   106         memcpy(str.ptr + pos, s.ptr, s.length);
   107         pos += s.length;
   108     }
   110     str.ptr[str.length] = '\0';
   112     free(strings);
   114     return str;
   115 }
   117 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   118     va_list ap;
   119     va_start(ap, s2);
   120     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   121     va_end(ap);
   122     return s;
   123 }
   125 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   126     va_list ap;
   127     va_start(ap, s2);
   128     sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   129     va_end(ap);
   130     return s;
   131 }
   133 sstr_t sstrsubs(sstr_t s, size_t start) {
   134     return sstrsubsl (s, start, s.length-start);
   135 }
   137 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   138     sstr_t new_sstr;
   139     if (start >= s.length) {
   140         new_sstr.ptr = NULL;
   141         new_sstr.length = 0;
   142     } else {
   143         if (length > s.length-start) {
   144             length = s.length-start;
   145         }
   146         new_sstr.ptr = &s.ptr[start];
   147         new_sstr.length = length;
   148     }
   149     return new_sstr;
   150 }
   152 sstr_t sstrchr(sstr_t s, int c) {
   153     for(size_t i=0;i<s.length;i++) {
   154         if(s.ptr[i] == c) {
   155             return sstrsubs(s, i);
   156         }
   157     }
   158     sstr_t n;
   159     n.ptr = NULL;
   160     n.length = 0;
   161     return n;
   162 }
   164 sstr_t sstrrchr(sstr_t s, int c) {
   165     if (s.length > 0) {
   166         for(size_t i=s.length;i>0;i--) {
   167             if(s.ptr[i-1] == c) {
   168                 return sstrsubs(s, i-1);
   169             }
   170         }
   171     }
   172     sstr_t n;
   173     n.ptr = NULL;
   174     n.length = 0;
   175     return n;
   176 }
   178 sstr_t sstrstr(sstr_t string, sstr_t match) {
   179     if (match.length == 0) {
   180         return string;
   181     }
   183     for (size_t i = 0 ; i < string.length ; i++) {
   184         sstr_t substr = sstrsubs(string, i);
   185         if (sstrprefix(substr, match)) {
   186             return substr;
   187         }
   188     }
   190     sstr_t emptystr;
   191     emptystr.length = 0;
   192     emptystr.ptr = NULL;
   193     return emptystr;
   194 }
   196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   197     return sstrsplit_a(ucx_default_allocator(), s, d, n);
   198 }
   200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   201     if (s.length == 0 || d.length == 0) {
   202         *n = -1;
   203         return NULL;
   204     }
   206     /* special cases: delimiter is at least as large as the string */
   207     if (d.length >= s.length) {
   208         /* exact match */
   209         if (sstrcmp(s, d) == 0) {
   210             *n = 0;
   211             return NULL;
   212         } else /* no match possible */ {
   213             *n = 1;
   214             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   215             *result = sstrdup_a(allocator, s);
   216             return result;
   217         }
   218     }
   220     ssize_t nmax = *n;
   221     size_t arrlen = 16;
   222     sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
   224     if (result) {
   225         sstr_t curpos = s;
   226         ssize_t j = 1;
   227         while (1) {
   228             sstr_t match;
   229             /* optimize for one byte delimiters */
   230             if (d.length == 1) {
   231                 match = curpos;
   232                 for (size_t i = 0 ; i < curpos.length ; i++) {
   233                     if (curpos.ptr[i] == *(d.ptr)) {
   234                         match.ptr = curpos.ptr + i;
   235                         break;
   236                     }
   237                     match.length--;
   238                 }
   239             } else {
   240                 match = sstrstr(curpos, d);
   241             }
   242             if (match.length > 0) {
   243                 /* is this our last try? */
   244                 if (nmax == 0 || j < nmax) {
   245                     /* copy the current string to the array */
   246                     sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
   247                     result[j-1] = sstrdup_a(allocator, item);
   248                     size_t processed = item.length + d.length;
   249                     curpos.ptr += processed;
   250                     curpos.length -= processed;
   252                     /* allocate memory for the next string */
   253                     j++;
   254                     if (j > arrlen) {
   255                         arrlen *= 2;
   256                         sstr_t* reallocated = (sstr_t*) alrealloc(
   257                                 allocator, result, arrlen*sizeof(sstr_t));
   258                         if (reallocated) {
   259                             result = reallocated;
   260                         } else {
   261                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   262                                 alfree(allocator, result[i].ptr);
   263                             }
   264                             alfree(allocator, result);
   265                             *n = -2;
   266                             return NULL;
   267                         }
   268                     }
   269                 } else {
   270                     /* nmax reached, copy the _full_ remaining string */
   271                     result[j-1] = sstrdup_a(allocator, curpos);
   272                     break;
   273                 }
   274             } else {
   275                 /* no more matches, copy last string */
   276                 result[j-1] = sstrdup_a(allocator, curpos);
   277                 break;
   278             }
   279         }
   280         *n = j;
   281     } else {
   282         *n = -2;
   283     }
   285     return result;
   286 }
   288 int sstrcmp(sstr_t s1, sstr_t s2) {
   289     if (s1.length == s2.length) {
   290         return memcmp(s1.ptr, s2.ptr, s1.length);
   291     } else if (s1.length > s2.length) {
   292         return 1;
   293     } else {
   294         return -1;
   295     }
   296 }
   298 int sstrcasecmp(sstr_t s1, sstr_t s2) {
   299     if (s1.length == s2.length) {
   300 #ifdef _WIN32
   301         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   302 #else
   303         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   304 #endif
   305     } else if (s1.length > s2.length) {
   306         return 1;
   307     } else {
   308         return -1;
   309     }
   310 }
   312 sstr_t sstrdup(sstr_t s) {
   313     return sstrdup_a(ucx_default_allocator(), s);
   314 }
   316 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   317     sstr_t newstring;
   318     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   319     if (newstring.ptr) {
   320         newstring.length = s.length;
   321         newstring.ptr[newstring.length] = 0;
   323         memcpy(newstring.ptr, s.ptr, s.length);
   324     } else {
   325         newstring.length = 0;
   326     }
   328     return newstring;
   329 }
   331 sstr_t sstrtrim(sstr_t string) {
   332     sstr_t newstr = string;
   334     while (newstr.length > 0 && isspace(*newstr.ptr)) {
   335         newstr.ptr++;
   336         newstr.length--;
   337     }
   338     while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   339         newstr.length--;
   340     }
   342     return newstr;
   343 }
   345 int sstrprefix(sstr_t string, sstr_t prefix) {
   346     if (string.length == 0) {
   347         return prefix.length == 0;
   348     }
   349     if (prefix.length == 0) {
   350         return 1;
   351     }
   353     if (prefix.length > string.length) {
   354         return 0;
   355     } else {
   356         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   357     }
   358 }
   360 int sstrsuffix(sstr_t string, sstr_t suffix) {
   361     if (string.length == 0) {
   362         return suffix.length == 0;
   363     }
   364     if (suffix.length == 0) {
   365         return 1;
   366     }
   368     if (suffix.length > string.length) {
   369         return 0;
   370     } else {
   371         return memcmp(string.ptr+string.length-suffix.length,
   372             suffix.ptr, suffix.length) == 0;
   373     }
   374 }
   376 sstr_t sstrlower(sstr_t string) {
   377     sstr_t ret = sstrdup(string);
   378     for (size_t i = 0; i < ret.length ; i++) {
   379         ret.ptr[i] = tolower(ret.ptr[i]);
   380     }
   381     return ret;
   382 }
   384 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   385     sstr_t ret = sstrdup_a(allocator, string);
   386     for (size_t i = 0; i < ret.length ; i++) {
   387         ret.ptr[i] = tolower(ret.ptr[i]);
   388     }
   389     return ret;
   390 }
   392 sstr_t sstrupper(sstr_t string) {
   393     sstr_t ret = sstrdup(string);
   394     for (size_t i = 0; i < ret.length ; i++) {
   395         ret.ptr[i] = toupper(ret.ptr[i]);
   396     }
   397     return ret;
   398 }
   400 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   401     sstr_t ret = sstrdup_a(allocator, string);
   402     for (size_t i = 0; i < ret.length ; i++) {
   403         ret.ptr[i] = toupper(ret.ptr[i]);
   404     }
   405     return ret;
   406 }

mercurial