src/string.c

Sat, 05 Oct 2019 17:07:16 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 05 Oct 2019 17:07:16 +0200
changeset 361
8ee9e23adbd2
parent 319
0380e438a7ce
child 363
8175ba2b3bcb
permissions
-rw-r--r--

adds missing include for strncasecmp() to avoid an implicit declaration

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "ucx/string.h"
    31 #include "ucx/allocator.h"
    33 #include <stdlib.h>
    34 #include <string.h>
    35 #include <stdarg.h>
    36 #include <stdint.h>
    37 #include <ctype.h>
    39 #ifndef _WIN32
    40 #include <strings.h> /* for strncasecmp() */
    41 #endif /* _WIN32 */
    43 sstr_t sstr(char *cstring) {
    44     sstr_t string;
    45     string.ptr = cstring;
    46     string.length = strlen(cstring);
    47     return string;
    48 }
    50 sstr_t sstrn(char *cstring, size_t length) {
    51     sstr_t string;
    52     string.ptr = cstring;
    53     string.length = length;
    54     return string;
    55 }
    57 scstr_t scstr(const char *cstring) {
    58     scstr_t string;
    59     string.ptr = cstring;
    60     string.length = strlen(cstring);
    61     return string;
    62 }
    64 scstr_t scstrn(const char *cstring, size_t length) {
    65     scstr_t string;
    66     string.ptr = cstring;
    67     string.length = length;
    68     return string;
    69 }
    72 size_t scstrnlen(size_t n, ...) {
    73     va_list ap;
    74     va_start(ap, n);
    76     size_t size = 0;
    78     for (size_t i = 0 ; i < n ; i++) {
    79         scstr_t str = va_arg(ap, scstr_t);
    80         if(SIZE_MAX - str.length < size) {
    81             size = SIZE_MAX;
    82             break;
    83         }
    84         size += str.length;
    85     }
    86     va_end(ap);
    88     return size;
    89 }
    91 static sstr_t sstrvcat_a(
    92         UcxAllocator *a,
    93         size_t count,
    94         scstr_t s1,
    95         va_list ap) {
    96     sstr_t str;
    97     str.ptr = NULL;
    98     str.length = 0;
    99     if(count < 2) {
   100         return str;
   101     }
   103     scstr_t s2 = va_arg (ap, scstr_t);
   105     if(((size_t)-1) - s1.length < s2.length) {
   106         return str;
   107     }
   109     scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
   110     if(!strings) {
   111         return str;
   112     }
   114     // get all args and overall length
   115     strings[0] = s1;
   116     strings[1] = s2;
   117     size_t slen = s1.length + s2.length;
   118     int error = 0;
   119     for (size_t i=2;i<count;i++) {
   120         scstr_t s = va_arg (ap, scstr_t);
   121         strings[i] = s;
   122         if(((size_t)-1) - s.length < slen) {
   123             error = 1;
   124             break;
   125         }
   126         slen += s.length;
   127     }
   128     if(error) {
   129         free(strings);
   130         return str;
   131     }
   133     // create new string
   134     str.ptr = (char*) almalloc(a, slen + 1);
   135     str.length = slen;
   136     if(!str.ptr) {
   137         free(strings);
   138         str.length = 0;
   139         return str;
   140     }
   142     // concatenate strings
   143     size_t pos = 0;
   144     for (size_t i=0;i<count;i++) {
   145         scstr_t s = strings[i];
   146         memcpy(str.ptr + pos, s.ptr, s.length);
   147         pos += s.length;
   148     }
   150     str.ptr[str.length] = '\0';
   152     free(strings);
   154     return str;
   155 }
   157 sstr_t scstrcat(size_t count, scstr_t s1, ...) {
   158     va_list ap;
   159     va_start(ap, s1);
   160     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
   161     va_end(ap);
   162     return s;
   163 }
   165 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
   166     va_list ap;
   167     va_start(ap, s1);
   168     sstr_t s = sstrvcat_a(a, count, s1, ap);
   169     va_end(ap);
   170     return s;
   171 }
   173 static int ucx_substring(
   174         size_t str_length,
   175         size_t start,
   176         size_t length,
   177         size_t *newlen,
   178         size_t *newpos)
   179 {
   180     *newlen = 0;
   181     *newpos = 0;
   183     if(start > str_length) {
   184         return 0;
   185     }
   187     if(length > str_length - start) {
   188         length = str_length - start;
   189     }
   190     *newlen = length;
   191     *newpos = start;
   192     return 1;
   193 }
   195 sstr_t sstrsubs(sstr_t s, size_t start) {
   196     return sstrsubsl (s, start, s.length-start);
   197 }
   199 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   200     size_t pos;
   201     sstr_t ret = { NULL, 0 };
   202     if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   203         ret.ptr = s.ptr + pos;
   204     }
   205     return ret;
   206 }
   208 scstr_t scstrsubs(scstr_t string, size_t start) {
   209     return scstrsubsl(string, start, string.length-start);
   210 }
   212 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
   213     size_t pos;
   214     scstr_t ret = { NULL, 0 };
   215     if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   216         ret.ptr = s.ptr + pos;
   217     }
   218     return ret;
   219 }
   222 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
   223     for(size_t i=0;i<length;i++) {
   224         if(str[i] == chr) {
   225             *pos = i;
   226             return 1;
   227         }
   228     }
   229     return 0;
   230 }
   232 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
   233     if(length > 0) {
   234         for(size_t i=length ; i>0 ; i--) {
   235             if(str[i-1] == chr) {
   236                 *pos = i-1;
   237                 return 1;
   238             }
   239         }
   240     }
   241     return 0;
   242 }
   244 sstr_t sstrchr(sstr_t s, int c) {
   245     size_t pos = 0;
   246     if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   247         return sstrsubs(s, pos);
   248     }
   249     return sstrn(NULL, 0);
   250 }
   252 sstr_t sstrrchr(sstr_t s, int c) {
   253     size_t pos = 0;
   254     if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   255         return sstrsubs(s, pos);
   256     }
   257     return sstrn(NULL, 0);
   258 }
   260 scstr_t scstrchr(scstr_t s, int c) {
   261     size_t pos = 0;
   262     if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   263         return scstrsubs(s, pos);
   264     }
   265     return scstrn(NULL, 0);
   266 }
   268 scstr_t scstrrchr(scstr_t s, int c) {
   269     size_t pos = 0;
   270     if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   271         return scstrsubs(s, pos);
   272     }
   273     return scstrn(NULL, 0);
   274 }
   276 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   277     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   279 #define ptable_w(useheap, ptable, index, src) do {\
   280     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   281     else ((size_t*)ptable)[index] = src;\
   282     } while (0);
   285 static const char* ucx_strstr(
   286         const char *str,
   287         size_t length,
   288         const char *match,
   289         size_t matchlen,
   290         size_t *newlen)
   291 {
   292     *newlen = length;
   293     if (matchlen == 0) {
   294         return str;
   295     }
   297     const char *result = NULL;
   298     size_t resultlen = 0;
   300     /*
   301      * IMPORTANT:
   302      * our prefix table contains the prefix length PLUS ONE
   303      * this is our decision, because we want to use the full range of size_t
   304      * the original algorithm needs a (-1) at one single place
   305      * and we want to avoid that
   306      */
   308     /* static prefix table */
   309     static uint8_t s_prefix_table[256];
   311     /* check pattern length and use appropriate prefix table */
   312     /* if the pattern exceeds static prefix table, allocate on the heap */
   313     register int useheap = matchlen > 255;
   314     register void* ptable = useheap ?
   315         calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
   317     /* keep counter in registers */
   318     register size_t i, j;
   320     /* fill prefix table */
   321     i = 0; j = 0;
   322     ptable_w(useheap, ptable, i, j);
   323     while (i < matchlen) {
   324         while (j >= 1 && match[j-1] != match[i]) {
   325             ptable_r(j, useheap, ptable, j-1);
   326         }
   327         i++; j++;
   328         ptable_w(useheap, ptable, i, j);
   329     }
   331     /* search */
   332     i = 0; j = 1;
   333     while (i < length) {
   334         while (j >= 1 && str[i] != match[j-1]) {
   335             ptable_r(j, useheap, ptable, j-1);
   336         }
   337         i++; j++;
   338         if (j-1 == matchlen) {
   339             size_t start = i - matchlen;
   340             result = str + start;
   341             resultlen = length - start;
   342             break;
   343         }
   344     }
   346     /* if prefix table was allocated on the heap, free it */
   347     if (ptable != s_prefix_table) {
   348         free(ptable);
   349     }
   351     *newlen = resultlen;
   352     return result;
   353 }
   355 sstr_t scstrsstr(sstr_t string, scstr_t match) {
   356     sstr_t result;
   358     size_t reslen;
   359     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   360     if(!resstr) {
   361         result.ptr = NULL;
   362         result.length = 0;
   363         return result;
   364     }
   366     size_t pos = resstr - string.ptr;
   367     result.ptr = string.ptr + pos;
   368     result.length = reslen;
   370     return result;
   371 }
   373 scstr_t scstrscstr(scstr_t string, scstr_t match) {
   374     scstr_t result;
   376     size_t reslen;
   377     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   378     if(!resstr) {
   379         result.ptr = NULL;
   380         result.length = 0;
   381         return result;
   382     }
   384     size_t pos = resstr - string.ptr;
   385     result.ptr = string.ptr + pos;
   386     result.length = reslen;
   388     return result;
   389 }
   391 #undef ptable_r
   392 #undef ptable_w
   394 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
   395     return scstrsplit_a(ucx_default_allocator(), s, d, n);
   396 }
   398 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
   399     if (s.length == 0 || d.length == 0) {
   400         *n = -1;
   401         return NULL;
   402     }
   404     /* special cases: delimiter is at least as large as the string */
   405     if (d.length >= s.length) {
   406         /* exact match */
   407         if (sstrcmp(s, d) == 0) {
   408             *n = 0;
   409             return NULL;
   410         } else /* no match possible */ {
   411             *n = 1;
   412             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   413             if(result) {
   414                 *result = sstrdup_a(allocator, s);
   415             } else {
   416                 *n = -2;
   417             }
   418             return result;
   419         }
   420     }
   422     ssize_t nmax = *n;
   423     size_t arrlen = 16;
   424     sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
   426     if (result) {
   427         scstr_t curpos = s;
   428         ssize_t j = 1;
   429         while (1) {
   430             scstr_t match;
   431             /* optimize for one byte delimiters */
   432             if (d.length == 1) {
   433                 match = curpos;
   434                 for (size_t i = 0 ; i < curpos.length ; i++) {
   435                     if (curpos.ptr[i] == *(d.ptr)) {
   436                         match.ptr = curpos.ptr + i;
   437                         break;
   438                     }
   439                     match.length--;
   440                 }
   441             } else {
   442                 match = scstrscstr(curpos, d);
   443             }
   444             if (match.length > 0) {
   445                 /* is this our last try? */
   446                 if (nmax == 0 || j < nmax) {
   447                     /* copy the current string to the array */
   448                     scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
   449                     result[j-1] = sstrdup_a(allocator, item);
   450                     size_t processed = item.length + d.length;
   451                     curpos.ptr += processed;
   452                     curpos.length -= processed;
   454                     /* allocate memory for the next string */
   455                     j++;
   456                     if (j > arrlen) {
   457                         arrlen *= 2;
   458                         size_t reallocsz;
   459                         sstr_t* reallocated = NULL;
   460                         if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
   461                             reallocated = (sstr_t*) alrealloc(
   462                                     allocator, result, reallocsz);
   463                         }
   464                         if (reallocated) {
   465                             result = reallocated;
   466                         } else {
   467                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   468                                 alfree(allocator, result[i].ptr);
   469                             }
   470                             alfree(allocator, result);
   471                             *n = -2;
   472                             return NULL;
   473                         }
   474                     }
   475                 } else {
   476                     /* nmax reached, copy the _full_ remaining string */
   477                     result[j-1] = sstrdup_a(allocator, curpos);
   478                     break;
   479                 }
   480             } else {
   481                 /* no more matches, copy last string */
   482                 result[j-1] = sstrdup_a(allocator, curpos);
   483                 break;
   484             }
   485         }
   486         *n = j;
   487     } else {
   488         *n = -2;
   489     }
   491     return result;
   492 }
   494 int scstrcmp(scstr_t s1, scstr_t s2) {
   495     if (s1.length == s2.length) {
   496         return memcmp(s1.ptr, s2.ptr, s1.length);
   497     } else if (s1.length > s2.length) {
   498         return 1;
   499     } else {
   500         return -1;
   501     }
   502 }
   504 int scstrcasecmp(scstr_t s1, scstr_t s2) {
   505     if (s1.length == s2.length) {
   506 #ifdef _WIN32
   507         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   508 #else
   509         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   510 #endif
   511     } else if (s1.length > s2.length) {
   512         return 1;
   513     } else {
   514         return -1;
   515     }
   516 }
   518 sstr_t scstrdup(scstr_t s) {
   519     return sstrdup_a(ucx_default_allocator(), s);
   520 }
   522 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
   523     sstr_t newstring;
   524     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   525     if (newstring.ptr) {
   526         newstring.length = s.length;
   527         newstring.ptr[newstring.length] = 0;
   529         memcpy(newstring.ptr, s.ptr, s.length);
   530     } else {
   531         newstring.length = 0;
   532     }
   534     return newstring;
   535 }
   538 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
   539     const char *newptr = s;
   540     size_t length = len;
   542     while(length > 0 && isspace(*newptr)) {
   543         newptr++;
   544         length--;
   545     }
   546     while(length > 0 && isspace(newptr[length-1])) {
   547         length--;
   548     }
   550     *newlen = length;
   551     return newptr - s;
   552 }
   554 sstr_t sstrtrim(sstr_t string) {
   555     sstr_t newstr;
   556     newstr.ptr = string.ptr
   557                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   558     return newstr;
   559 }
   561 scstr_t scstrtrim(scstr_t string) {
   562     scstr_t newstr;
   563     newstr.ptr = string.ptr
   564                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   565     return newstr;
   566 }
   568 int scstrprefix(scstr_t string, scstr_t prefix) {
   569     if (string.length == 0) {
   570         return prefix.length == 0;
   571     }
   572     if (prefix.length == 0) {
   573         return 1;
   574     }
   576     if (prefix.length > string.length) {
   577         return 0;
   578     } else {
   579         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   580     }
   581 }
   583 int scstrsuffix(scstr_t string, scstr_t suffix) {
   584     if (string.length == 0) {
   585         return suffix.length == 0;
   586     }
   587     if (suffix.length == 0) {
   588         return 1;
   589     }
   591     if (suffix.length > string.length) {
   592         return 0;
   593     } else {
   594         return memcmp(string.ptr+string.length-suffix.length,
   595             suffix.ptr, suffix.length) == 0;
   596     }
   597 }
   599 sstr_t scstrlower(scstr_t string) {
   600     sstr_t ret = sstrdup(string);
   601     for (size_t i = 0; i < ret.length ; i++) {
   602         ret.ptr[i] = tolower(ret.ptr[i]);
   603     }
   604     return ret;
   605 }
   607 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
   608     sstr_t ret = sstrdup_a(allocator, string);
   609     for (size_t i = 0; i < ret.length ; i++) {
   610         ret.ptr[i] = tolower(ret.ptr[i]);
   611     }
   612     return ret;
   613 }
   615 sstr_t scstrupper(scstr_t string) {
   616     sstr_t ret = sstrdup(string);
   617     for (size_t i = 0; i < ret.length ; i++) {
   618         ret.ptr[i] = toupper(ret.ptr[i]);
   619     }
   620     return ret;
   621 }
   623 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
   624     sstr_t ret = sstrdup_a(allocator, string);
   625     for (size_t i = 0; i < ret.length ; i++) {
   626         ret.ptr[i] = toupper(ret.ptr[i]);
   627     }
   628     return ret;
   629 }
   631 // type adjustment functions
   632 scstr_t ucx_sc2sc(scstr_t str) {
   633     return str;
   634 }
   635 scstr_t ucx_ss2sc(sstr_t str) {
   636     scstr_t cs;
   637     cs.ptr = str.ptr;
   638     cs.length = str.length;
   639     return cs;
   640 }
   641 scstr_t ucx_ss2c_s(scstr_t c) {
   642     return c;
   643 }

mercurial