src/string.c

Sun, 01 Apr 2018 09:51:01 +0200

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sun, 01 Apr 2018 09:51:01 +0200
branch
constsstr
changeset 276
f1b2146d4805
parent 275
96f643d30ff1
child 288
6af5798342e8
permissions
-rw-r--r--

adapts sstrtrim, sstrsplit, sstrcmp and sstrstr to new const string API

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "ucx/string.h"
    31 #include "ucx/allocator.h"
    33 #include <stdlib.h>
    34 #include <string.h>
    35 #include <stdarg.h>
    36 #include <stdint.h>
    37 #include <ctype.h>
    39 sstr_t sstr(char *cstring) {
    40     sstr_t string;
    41     string.ptr = cstring;
    42     string.length = strlen(cstring);
    43     return string;
    44 }
    46 sstr_t sstrn(char *cstring, size_t length) {
    47     sstr_t string;
    48     string.ptr = cstring;
    49     string.length = length;
    50     return string;
    51 }
    53 scstr_t scstr(const char *cstring) {
    54     scstr_t string;
    55     string.ptr = cstring;
    56     string.length = strlen(cstring);
    57     return string;
    58 }
    60 scstr_t scstrn(const char *cstring, size_t length) {
    61     scstr_t string;
    62     string.ptr = cstring;
    63     string.length = length;
    64     return string;
    65 }
    68 size_t sstrnlen(size_t n, sstr_t s, ...) {
    69     va_list ap;
    70     size_t size = s.length;
    71     va_start(ap, s);
    73     for (size_t i = 1 ; i < n ; i++) {
    74         sstr_t str = va_arg(ap, sstr_t);
    75         if(((size_t)-1) - str.length < size) {
    76             size = 0;
    77             break;
    78         }
    79         size += str.length;
    80     }
    81     va_end(ap);
    83     return size;
    84 }
    86 static sstr_t sstrvcat_a(
    87         UcxAllocator *a,
    88         size_t count,
    89         sstr_t s1,
    90         sstr_t s2,
    91         va_list ap) {
    92     sstr_t str;
    93     str.ptr = NULL;
    94     str.length = 0;
    95     if(count < 2) {
    96         return str;
    97     }
    99     if(((size_t)-1) - s1.length < s2.length) {
   100         return str;
   101     }
   103     sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
   104     if(!strings) {
   105         return str;
   106     }
   108     // get all args and overall length
   109     strings[0] = s1;
   110     strings[1] = s2;
   111     size_t slen = s1.length + s2.length;
   112     int error = 0;
   113     for (size_t i=2;i<count;i++) {
   114         sstr_t s = va_arg (ap, sstr_t);
   115         strings[i] = s;
   116         if(((size_t)-1) - s.length < slen) {
   117             error = 1;
   118             break;
   119         }
   120         slen += s.length;
   121     }
   122     if(error) {
   123         free(strings);
   124         return str;
   125     }
   127     // create new string
   128     str.ptr = (char*) almalloc(a, slen + 1);
   129     str.length = slen;
   130     if(!str.ptr) {
   131         free(strings);
   132         str.length = 0;
   133         return str;
   134     }
   136     // concatenate strings
   137     size_t pos = 0;
   138     for (size_t i=0;i<count;i++) {
   139         sstr_t s = strings[i];
   140         memcpy(str.ptr + pos, s.ptr, s.length);
   141         pos += s.length;
   142     }
   144     str.ptr[str.length] = '\0';
   146     free(strings);
   148     return str;
   149 }
   151 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   152     va_list ap;
   153     va_start(ap, s2);
   154     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   155     va_end(ap);
   156     return s;
   157 }
   159 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   160     va_list ap;
   161     va_start(ap, s2);
   162     sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   163     va_end(ap);
   164     return s;
   165 }
   167 sstr_t sstrsubs(sstr_t s, size_t start) {
   168     return sstrsubsl (s, start, s.length-start);
   169 }
   171 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   172     sstr_t new_sstr;
   173     if (start >= s.length) {
   174         new_sstr.ptr = NULL;
   175         new_sstr.length = 0;
   176     } else {
   177         if (length > s.length-start) {
   178             length = s.length-start;
   179         }
   180         new_sstr.ptr = &s.ptr[start];
   181         new_sstr.length = length;
   182     }
   183     return new_sstr;
   184 }
   186 sstr_t sstrchr(sstr_t s, int c) {
   187     for(size_t i=0;i<s.length;i++) {
   188         if(s.ptr[i] == c) {
   189             return sstrsubs(s, i);
   190         }
   191     }
   192     sstr_t n;
   193     n.ptr = NULL;
   194     n.length = 0;
   195     return n;
   196 }
   198 sstr_t sstrrchr(sstr_t s, int c) {
   199     if (s.length > 0) {
   200         for(size_t i=s.length;i>0;i--) {
   201             if(s.ptr[i-1] == c) {
   202                 return sstrsubs(s, i-1);
   203             }
   204         }
   205     }
   206     sstr_t n;
   207     n.ptr = NULL;
   208     n.length = 0;
   209     return n;
   210 }
   212 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   213     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   215 #define ptable_w(useheap, ptable, index, src) do {\
   216     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   217     else ((size_t*)ptable)[index] = src;\
   218     } while (0);
   221 const char* ucx_strstr(
   222         const char *str,
   223         size_t length,
   224         const char *match,
   225         size_t matchlen,
   226         size_t *newlen)
   227 {
   228     *newlen = length;
   229     if (matchlen == 0) {
   230         return str;
   231     }
   233     const char *result = NULL;
   234     size_t resultlen = 0;
   236     /*
   237      * IMPORTANT:
   238      * our prefix table contains the prefix length PLUS ONE
   239      * this is our decision, because we want to use the full range of size_t
   240      * the original algorithm needs a (-1) at one single place
   241      * and we want to avoid that
   242      */
   244     /* static prefix table */
   245     static uint8_t s_prefix_table[256];
   247     /* check pattern length and use appropriate prefix table */
   248     /* if the pattern exceeds static prefix table, allocate on the heap */
   249     register int useheap = matchlen > 255;
   250     register void* ptable = useheap ?
   251         calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
   253     /* keep counter in registers */
   254     register size_t i, j;
   256     /* fill prefix table */
   257     i = 0; j = 0;
   258     ptable_w(useheap, ptable, i, j);
   259     while (i < matchlen) {
   260         while (j >= 1 && match[j-1] != match[i]) {
   261             ptable_r(j, useheap, ptable, j-1);
   262         }
   263         i++; j++;
   264         ptable_w(useheap, ptable, i, j);
   265     }
   267     /* search */
   268     i = 0; j = 1;
   269     while (i < length) {
   270         while (j >= 1 && str[i] != match[j-1]) {
   271             ptable_r(j, useheap, ptable, j-1);
   272         }
   273         i++; j++;
   274         if (j-1 == matchlen) {
   275             size_t start = i - matchlen;
   276             result = str + start;
   277             resultlen = length - start;
   278             break;
   279         }
   280     }
   282     /* if prefix table was allocated on the heap, free it */
   283     if (ptable != s_prefix_table) {
   284         free(ptable);
   285     }
   287     *newlen = resultlen;
   288     return result;
   289 }
   291 sstr_t ucx_sstrstr(sstr_t string, scstr_t match) {
   292     sstr_t result;
   294     size_t reslen;
   295     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   296     if(!resstr) {
   297         result.ptr = NULL;
   298         result.length = 0;
   299         return result;
   300     }
   302     size_t pos = resstr - string.ptr;
   303     result.ptr = string.ptr + pos;
   304     result.length = reslen;
   306     return result;
   307 }
   309 scstr_t ucx_scstrstr(scstr_t string, scstr_t match) {
   310     scstr_t result;
   312     size_t reslen;
   313     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   314     if(!resstr) {
   315         result.ptr = NULL;
   316         result.length = 0;
   317         return result;
   318     }
   320     size_t pos = resstr - string.ptr;
   321     result.ptr = string.ptr + pos;
   322     result.length = reslen;
   324     return result;
   325 }
   327 #undef ptable_r
   328 #undef ptable_w
   330 sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) {
   331     return ucx_strsplit_a(ucx_default_allocator(), s, d, n);
   332 }
   334 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
   335     if (s.length == 0 || d.length == 0) {
   336         *n = -1;
   337         return NULL;
   338     }
   340     /* special cases: delimiter is at least as large as the string */
   341     if (d.length >= s.length) {
   342         /* exact match */
   343         if (sstrcmp(s, d) == 0) {
   344             *n = 0;
   345             return NULL;
   346         } else /* no match possible */ {
   347             *n = 1;
   348             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   349             if(result) {
   350                 *result = sstrdup_a(allocator, s);
   351             } else {
   352                 *n = -2;
   353             }
   354             return result;
   355         }
   356     }
   358     ssize_t nmax = *n;
   359     size_t arrlen = 16;
   360     sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
   362     if (result) {
   363         scstr_t curpos = s;
   364         ssize_t j = 1;
   365         while (1) {
   366             scstr_t match;
   367             /* optimize for one byte delimiters */
   368             if (d.length == 1) {
   369                 match = curpos;
   370                 for (size_t i = 0 ; i < curpos.length ; i++) {
   371                     if (curpos.ptr[i] == *(d.ptr)) {
   372                         match.ptr = curpos.ptr + i;
   373                         break;
   374                     }
   375                     match.length--;
   376                 }
   377             } else {
   378                 match = scstrstr(curpos, d);
   379             }
   380             if (match.length > 0) {
   381                 /* is this our last try? */
   382                 if (nmax == 0 || j < nmax) {
   383                     /* copy the current string to the array */
   384                     scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
   385                     result[j-1] = sstrdup_a(allocator, item);
   386                     size_t processed = item.length + d.length;
   387                     curpos.ptr += processed;
   388                     curpos.length -= processed;
   390                     /* allocate memory for the next string */
   391                     j++;
   392                     if (j > arrlen) {
   393                         arrlen *= 2;
   394                         size_t reallocsz;
   395                         sstr_t* reallocated = NULL;
   396                         if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
   397                             reallocated = (sstr_t*) alrealloc(
   398                                     allocator, result, reallocsz);
   399                         }
   400                         if (reallocated) {
   401                             result = reallocated;
   402                         } else {
   403                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   404                                 alfree(allocator, result[i].ptr);
   405                             }
   406                             alfree(allocator, result);
   407                             *n = -2;
   408                             return NULL;
   409                         }
   410                     }
   411                 } else {
   412                     /* nmax reached, copy the _full_ remaining string */
   413                     result[j-1] = sstrdup_a(allocator, curpos);
   414                     break;
   415                 }
   416             } else {
   417                 /* no more matches, copy last string */
   418                 result[j-1] = sstrdup_a(allocator, curpos);
   419                 break;
   420             }
   421         }
   422         *n = j;
   423     } else {
   424         *n = -2;
   425     }
   427     return result;
   428 }
   430 int ucx_str_cmp(scstr_t s1, scstr_t s2) {
   431     if (s1.length == s2.length) {
   432         return memcmp(s1.ptr, s2.ptr, s1.length);
   433     } else if (s1.length > s2.length) {
   434         return 1;
   435     } else {
   436         return -1;
   437     }
   438 }
   440 int ucx_str_casecmp(scstr_t s1, scstr_t s2) {
   441     if (s1.length == s2.length) {
   442 #ifdef _WIN32
   443         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   444 #else
   445         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   446 #endif
   447     } else if (s1.length > s2.length) {
   448         return 1;
   449     } else {
   450         return -1;
   451     }
   452 }
   454 sstr_t scstrdup(scstr_t s) {
   455     return sstrdup_a(ucx_default_allocator(), s);
   456 }
   458 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
   459     sstr_t newstring;
   460     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   461     if (newstring.ptr) {
   462         newstring.length = s.length;
   463         newstring.ptr[newstring.length] = 0;
   465         memcpy(newstring.ptr, s.ptr, s.length);
   466     } else {
   467         newstring.length = 0;
   468     }
   470     return newstring;
   471 }
   474 size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
   475     const char *newptr = s;
   476     size_t length = len;
   478     while(length > 0 && isspace(*newptr)) {
   479         newptr++;
   480         length--;
   481     }
   482     while(length > 0 && isspace(newptr[length-1])) {
   483         length--;
   484     }
   486     *newlen = length;
   487     return newptr - s;
   488 }
   490 sstr_t sstrtrim(sstr_t string) {
   491     sstr_t newstr;
   492     newstr.ptr = string.ptr
   493                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   494     return newstr;
   495 }
   497 scstr_t scstrtrim(scstr_t string) {
   498     scstr_t newstr;
   499     newstr.ptr = string.ptr
   500                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   501     return newstr;
   502 }
   504 int ucx_strprefix(scstr_t string, scstr_t prefix) {
   505     if (string.length == 0) {
   506         return prefix.length == 0;
   507     }
   508     if (prefix.length == 0) {
   509         return 1;
   510     }
   512     if (prefix.length > string.length) {
   513         return 0;
   514     } else {
   515         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   516     }
   517 }
   519 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
   520     if (string.length == 0) {
   521         return suffix.length == 0;
   522     }
   523     if (suffix.length == 0) {
   524         return 1;
   525     }
   527     if (suffix.length > string.length) {
   528         return 0;
   529     } else {
   530         return memcmp(string.ptr+string.length-suffix.length,
   531             suffix.ptr, suffix.length) == 0;
   532     }
   533 }
   535 sstr_t ucx_strlower(scstr_t string) {
   536     sstr_t ret = sstrdup(string);
   537     for (size_t i = 0; i < ret.length ; i++) {
   538         ret.ptr[i] = tolower(ret.ptr[i]);
   539     }
   540     return ret;
   541 }
   543 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
   544     sstr_t ret = sstrdup_a(allocator, string);
   545     for (size_t i = 0; i < ret.length ; i++) {
   546         ret.ptr[i] = tolower(ret.ptr[i]);
   547     }
   548     return ret;
   549 }
   551 sstr_t ucx_strupper(scstr_t string) {
   552     sstr_t ret = sstrdup(string);
   553     for (size_t i = 0; i < ret.length ; i++) {
   554         ret.ptr[i] = toupper(ret.ptr[i]);
   555     }
   556     return ret;
   557 }
   559 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
   560     sstr_t ret = sstrdup_a(allocator, string);
   561     for (size_t i = 0; i < ret.length ; i++) {
   562         ret.ptr[i] = toupper(ret.ptr[i]);
   563     }
   564     return ret;
   565 }
   567 // private string conversion functions
   568 scstr_t ucx_sc2sc(scstr_t c) {
   569     return c;
   570 }
   571 scstr_t ucx_ss2sc(sstr_t str) {
   572     scstr_t cs;
   573     cs.ptr = str.ptr;
   574     cs.length = str.length;
   575     return cs;
   576 }
   577 scstr_t ucx_ss2c_s(scstr_t c) {
   578     return c;
   579 }

mercurial