src/string.c

Sun, 21 Jan 2018 14:10:59 +0100

author
Mike Becker <universe@uap-core.de>
date
Sun, 21 Jan 2018 14:10:59 +0100
changeset 273
9c1591b3c4a4
parent 272
2def28b65328
child 275
96f643d30ff1
permissions
-rw-r--r--

fixes return value for multiplication with zero in ucx_szmul

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "ucx/string.h"
    31 #include "ucx/allocator.h"
    33 #include <stdlib.h>
    34 #include <string.h>
    35 #include <stdarg.h>
    36 #include <stdint.h>
    37 #include <ctype.h>
    39 sstr_t sstr(char *cstring) {
    40     sstr_t string;
    41     string.ptr = cstring;
    42     string.length = strlen(cstring);
    43     return string;
    44 }
    46 sstr_t sstrn(char *cstring, size_t length) {
    47     sstr_t string;
    48     string.ptr = cstring;
    49     string.length = length;
    50     return string;
    51 }
    53 size_t sstrnlen(size_t n, sstr_t s, ...) {
    54     va_list ap;
    55     size_t size = s.length;
    56     va_start(ap, s);
    58     for (size_t i = 1 ; i < n ; i++) {
    59         sstr_t str = va_arg(ap, sstr_t);
    60         if(((size_t)-1) - str.length < size) {
    61             size = 0;
    62             break;
    63         }
    64         size += str.length;
    65     }
    66     va_end(ap);
    68     return size;
    69 }
    71 static sstr_t sstrvcat_a(
    72         UcxAllocator *a,
    73         size_t count,
    74         sstr_t s1,
    75         sstr_t s2,
    76         va_list ap) {
    77     sstr_t str;
    78     str.ptr = NULL;
    79     str.length = 0;
    80     if(count < 2) {
    81         return str;
    82     }
    84     if(((size_t)-1) - s1.length < s2.length) {
    85         return str;
    86     }
    88     sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    89     if(!strings) {
    90         return str;
    91     }
    93     // get all args and overall length
    94     strings[0] = s1;
    95     strings[1] = s2;
    96     size_t slen = s1.length + s2.length;
    97     int error = 0;
    98     for (size_t i=2;i<count;i++) {
    99         sstr_t s = va_arg (ap, sstr_t);
   100         strings[i] = s;
   101         if(((size_t)-1) - s.length < slen) {
   102             error = 1;
   103             break;
   104         }
   105         slen += s.length;
   106     }
   107     if(error) {
   108         free(strings);
   109         return str;
   110     }
   112     // create new string
   113     str.ptr = (char*) almalloc(a, slen + 1);
   114     str.length = slen;
   115     if(!str.ptr) {
   116         free(strings);
   117         str.length = 0;
   118         return str;
   119     }
   121     // concatenate strings
   122     size_t pos = 0;
   123     for (size_t i=0;i<count;i++) {
   124         sstr_t s = strings[i];
   125         memcpy(str.ptr + pos, s.ptr, s.length);
   126         pos += s.length;
   127     }
   129     str.ptr[str.length] = '\0';
   131     free(strings);
   133     return str;
   134 }
   136 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   137     va_list ap;
   138     va_start(ap, s2);
   139     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   140     va_end(ap);
   141     return s;
   142 }
   144 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   145     va_list ap;
   146     va_start(ap, s2);
   147     sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   148     va_end(ap);
   149     return s;
   150 }
   152 sstr_t sstrsubs(sstr_t s, size_t start) {
   153     return sstrsubsl (s, start, s.length-start);
   154 }
   156 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   157     sstr_t new_sstr;
   158     if (start >= s.length) {
   159         new_sstr.ptr = NULL;
   160         new_sstr.length = 0;
   161     } else {
   162         if (length > s.length-start) {
   163             length = s.length-start;
   164         }
   165         new_sstr.ptr = &s.ptr[start];
   166         new_sstr.length = length;
   167     }
   168     return new_sstr;
   169 }
   171 sstr_t sstrchr(sstr_t s, int c) {
   172     for(size_t i=0;i<s.length;i++) {
   173         if(s.ptr[i] == c) {
   174             return sstrsubs(s, i);
   175         }
   176     }
   177     sstr_t n;
   178     n.ptr = NULL;
   179     n.length = 0;
   180     return n;
   181 }
   183 sstr_t sstrrchr(sstr_t s, int c) {
   184     if (s.length > 0) {
   185         for(size_t i=s.length;i>0;i--) {
   186             if(s.ptr[i-1] == c) {
   187                 return sstrsubs(s, i-1);
   188             }
   189         }
   190     }
   191     sstr_t n;
   192     n.ptr = NULL;
   193     n.length = 0;
   194     return n;
   195 }
   197 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   198     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   200 #define ptable_w(useheap, ptable, index, src) do {\
   201     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   202     else ((size_t*)ptable)[index] = src;\
   203     } while (0);
   205 sstr_t sstrstr(sstr_t string, sstr_t match) {
   206     if (match.length == 0) {
   207         return string;
   208     }
   210     /* prepare default return value in case of no match */
   211     sstr_t result = sstrn(NULL, 0);
   213     /*
   214      * IMPORTANT:
   215      * our prefix table contains the prefix length PLUS ONE
   216      * this is our decision, because we want to use the full range of size_t
   217      * the original algorithm needs a (-1) at one single place
   218      * and we want to avoid that
   219      */
   221     /* static prefix table */
   222     static uint8_t s_prefix_table[256];
   224     /* check pattern length and use appropriate prefix table */
   225     /* if the pattern exceeds static prefix table, allocate on the heap */
   226     register int useheap = match.length > 255;
   227     register void* ptable = useheap ?
   228         calloc(match.length+1, sizeof(size_t)): s_prefix_table;
   230     /* keep counter in registers */
   231     register size_t i, j;
   233     /* fill prefix table */
   234     i = 0; j = 0;
   235     ptable_w(useheap, ptable, i, j);
   236     while (i < match.length) {
   237         while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
   238             ptable_r(j, useheap, ptable, j-1);
   239         }
   240         i++; j++;
   241         ptable_w(useheap, ptable, i, j);
   242     }
   244     /* search */
   245     i = 0; j = 1;
   246     while (i < string.length) {
   247         while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
   248             ptable_r(j, useheap, ptable, j-1);
   249         }
   250         i++; j++;
   251         if (j-1 == match.length) {
   252             size_t start = i - match.length;
   253             result.ptr = string.ptr + start;
   254             result.length = string.length - start;
   255             break;
   256         }
   257     }
   259     /* if prefix table was allocated on the heap, free it */
   260     if (ptable != s_prefix_table) {
   261         free(ptable);
   262     }
   264     return result;
   265 }
   267 #undef ptable_r
   268 #undef ptable_w
   270 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   271     return sstrsplit_a(ucx_default_allocator(), s, d, n);
   272 }
   274 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   275     if (s.length == 0 || d.length == 0) {
   276         *n = -1;
   277         return NULL;
   278     }
   280     /* special cases: delimiter is at least as large as the string */
   281     if (d.length >= s.length) {
   282         /* exact match */
   283         if (sstrcmp(s, d) == 0) {
   284             *n = 0;
   285             return NULL;
   286         } else /* no match possible */ {
   287             *n = 1;
   288             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   289             if(result) {
   290                 *result = sstrdup_a(allocator, s);
   291             } else {
   292                 *n = -2;
   293             }
   294             return result;
   295         }
   296     }
   298     ssize_t nmax = *n;
   299     size_t arrlen = 16;
   300     sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
   302     if (result) {
   303         sstr_t curpos = s;
   304         ssize_t j = 1;
   305         while (1) {
   306             sstr_t match;
   307             /* optimize for one byte delimiters */
   308             if (d.length == 1) {
   309                 match = curpos;
   310                 for (size_t i = 0 ; i < curpos.length ; i++) {
   311                     if (curpos.ptr[i] == *(d.ptr)) {
   312                         match.ptr = curpos.ptr + i;
   313                         break;
   314                     }
   315                     match.length--;
   316                 }
   317             } else {
   318                 match = sstrstr(curpos, d);
   319             }
   320             if (match.length > 0) {
   321                 /* is this our last try? */
   322                 if (nmax == 0 || j < nmax) {
   323                     /* copy the current string to the array */
   324                     sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
   325                     result[j-1] = sstrdup_a(allocator, item);
   326                     size_t processed = item.length + d.length;
   327                     curpos.ptr += processed;
   328                     curpos.length -= processed;
   330                     /* allocate memory for the next string */
   331                     j++;
   332                     if (j > arrlen) {
   333                         arrlen *= 2;
   334                         size_t reallocsz;
   335                         sstr_t* reallocated = NULL;
   336                         if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
   337                             reallocated = (sstr_t*) alrealloc(
   338                                     allocator, result, reallocsz);
   339                         }
   340                         if (reallocated) {
   341                             result = reallocated;
   342                         } else {
   343                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   344                                 alfree(allocator, result[i].ptr);
   345                             }
   346                             alfree(allocator, result);
   347                             *n = -2;
   348                             return NULL;
   349                         }
   350                     }
   351                 } else {
   352                     /* nmax reached, copy the _full_ remaining string */
   353                     result[j-1] = sstrdup_a(allocator, curpos);
   354                     break;
   355                 }
   356             } else {
   357                 /* no more matches, copy last string */
   358                 result[j-1] = sstrdup_a(allocator, curpos);
   359                 break;
   360             }
   361         }
   362         *n = j;
   363     } else {
   364         *n = -2;
   365     }
   367     return result;
   368 }
   370 int sstrcmp(sstr_t s1, sstr_t s2) {
   371     if (s1.length == s2.length) {
   372         return memcmp(s1.ptr, s2.ptr, s1.length);
   373     } else if (s1.length > s2.length) {
   374         return 1;
   375     } else {
   376         return -1;
   377     }
   378 }
   380 int sstrcasecmp(sstr_t s1, sstr_t s2) {
   381     if (s1.length == s2.length) {
   382 #ifdef _WIN32
   383         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   384 #else
   385         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   386 #endif
   387     } else if (s1.length > s2.length) {
   388         return 1;
   389     } else {
   390         return -1;
   391     }
   392 }
   394 sstr_t sstrdup(sstr_t s) {
   395     return sstrdup_a(ucx_default_allocator(), s);
   396 }
   398 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   399     sstr_t newstring;
   400     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   401     if (newstring.ptr) {
   402         newstring.length = s.length;
   403         newstring.ptr[newstring.length] = 0;
   405         memcpy(newstring.ptr, s.ptr, s.length);
   406     } else {
   407         newstring.length = 0;
   408     }
   410     return newstring;
   411 }
   413 sstr_t sstrtrim(sstr_t string) {
   414     sstr_t newstr = string;
   416     while (newstr.length > 0 && isspace(*newstr.ptr)) {
   417         newstr.ptr++;
   418         newstr.length--;
   419     }
   420     while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   421         newstr.length--;
   422     }
   424     return newstr;
   425 }
   427 int sstrprefix(sstr_t string, sstr_t prefix) {
   428     if (string.length == 0) {
   429         return prefix.length == 0;
   430     }
   431     if (prefix.length == 0) {
   432         return 1;
   433     }
   435     if (prefix.length > string.length) {
   436         return 0;
   437     } else {
   438         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   439     }
   440 }
   442 int sstrsuffix(sstr_t string, sstr_t suffix) {
   443     if (string.length == 0) {
   444         return suffix.length == 0;
   445     }
   446     if (suffix.length == 0) {
   447         return 1;
   448     }
   450     if (suffix.length > string.length) {
   451         return 0;
   452     } else {
   453         return memcmp(string.ptr+string.length-suffix.length,
   454             suffix.ptr, suffix.length) == 0;
   455     }
   456 }
   458 sstr_t sstrlower(sstr_t string) {
   459     sstr_t ret = sstrdup(string);
   460     for (size_t i = 0; i < ret.length ; i++) {
   461         ret.ptr[i] = tolower(ret.ptr[i]);
   462     }
   463     return ret;
   464 }
   466 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   467     sstr_t ret = sstrdup_a(allocator, string);
   468     for (size_t i = 0; i < ret.length ; i++) {
   469         ret.ptr[i] = tolower(ret.ptr[i]);
   470     }
   471     return ret;
   472 }
   474 sstr_t sstrupper(sstr_t string) {
   475     sstr_t ret = sstrdup(string);
   476     for (size_t i = 0; i < ret.length ; i++) {
   477         ret.ptr[i] = toupper(ret.ptr[i]);
   478     }
   479     return ret;
   480 }
   482 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   483     sstr_t ret = sstrdup_a(allocator, string);
   484     for (size_t i = 0; i < ret.length ; i++) {
   485         ret.ptr[i] = toupper(ret.ptr[i]);
   486     }
   487     return ret;
   488 }

mercurial