src/string.c

Wed, 16 May 2018 14:02:59 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 16 May 2018 14:02:59 +0200
changeset 318
348fd9cb7b14
parent 317
ebae0e434898
child 319
0380e438a7ce
permissions
-rw-r--r--

adds remaining documentation for the scstr functions

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "ucx/string.h"
    31 #include "ucx/allocator.h"
    33 #include <stdlib.h>
    34 #include <string.h>
    35 #include <stdarg.h>
    36 #include <stdint.h>
    37 #include <ctype.h>
    39 sstr_t sstr(char *cstring) {
    40     sstr_t string;
    41     string.ptr = cstring;
    42     string.length = strlen(cstring);
    43     return string;
    44 }
    46 sstr_t sstrn(char *cstring, size_t length) {
    47     sstr_t string;
    48     string.ptr = cstring;
    49     string.length = length;
    50     return string;
    51 }
    53 scstr_t scstr(const char *cstring) {
    54     scstr_t string;
    55     string.ptr = cstring;
    56     string.length = strlen(cstring);
    57     return string;
    58 }
    60 scstr_t scstrn(const char *cstring, size_t length) {
    61     scstr_t string;
    62     string.ptr = cstring;
    63     string.length = length;
    64     return string;
    65 }
    68 size_t ucx_strnlen(size_t n, ...) {
    69     va_list ap;
    70     va_start(ap, n);
    72     size_t size = 0;
    74     for (size_t i = 0 ; i < n ; i++) {
    75         scstr_t str = va_arg(ap, scstr_t);
    76         if(SIZE_MAX - str.length < size) {
    77             size = SIZE_MAX;
    78             break;
    79         }
    80         size += str.length;
    81     }
    82     va_end(ap);
    84     return size;
    85 }
    87 static sstr_t sstrvcat_a(
    88         UcxAllocator *a,
    89         size_t count,
    90         scstr_t s1,
    91         va_list ap) {
    92     sstr_t str;
    93     str.ptr = NULL;
    94     str.length = 0;
    95     if(count < 2) {
    96         return str;
    97     }
    99     scstr_t s2 = va_arg (ap, scstr_t);
   101     if(((size_t)-1) - s1.length < s2.length) {
   102         return str;
   103     }
   105     scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
   106     if(!strings) {
   107         return str;
   108     }
   110     // get all args and overall length
   111     strings[0] = s1;
   112     strings[1] = s2;
   113     size_t slen = s1.length + s2.length;
   114     int error = 0;
   115     for (size_t i=2;i<count;i++) {
   116         scstr_t s = va_arg (ap, scstr_t);
   117         strings[i] = s;
   118         if(((size_t)-1) - s.length < slen) {
   119             error = 1;
   120             break;
   121         }
   122         slen += s.length;
   123     }
   124     if(error) {
   125         free(strings);
   126         return str;
   127     }
   129     // create new string
   130     str.ptr = (char*) almalloc(a, slen + 1);
   131     str.length = slen;
   132     if(!str.ptr) {
   133         free(strings);
   134         str.length = 0;
   135         return str;
   136     }
   138     // concatenate strings
   139     size_t pos = 0;
   140     for (size_t i=0;i<count;i++) {
   141         scstr_t s = strings[i];
   142         memcpy(str.ptr + pos, s.ptr, s.length);
   143         pos += s.length;
   144     }
   146     str.ptr[str.length] = '\0';
   148     free(strings);
   150     return str;
   151 }
   153 sstr_t ucx_strcat(size_t count, scstr_t s1, ...) {
   154     va_list ap;
   155     va_start(ap, s1);
   156     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
   157     va_end(ap);
   158     return s;
   159 }
   161 sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
   162     va_list ap;
   163     va_start(ap, s1);
   164     sstr_t s = sstrvcat_a(a, count, s1, ap);
   165     va_end(ap);
   166     return s;
   167 }
   169 static int ucx_substring(
   170         size_t str_length,
   171         size_t start,
   172         size_t length,
   173         size_t *newlen,
   174         size_t *newpos)
   175 {
   176     *newlen = 0;
   177     *newpos = 0;
   179     if(start > str_length) {
   180         return 0;
   181     }
   183     if(length > str_length - start) {
   184         length = str_length - start;
   185     }
   186     *newlen = length;
   187     *newpos = start;
   188     return 1;
   189 }
   191 sstr_t sstrsubs(sstr_t s, size_t start) {
   192     return sstrsubsl (s, start, s.length-start);
   193 }
   195 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   196     size_t pos;
   197     sstr_t ret = { NULL, 0 };
   198     if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   199         ret.ptr = s.ptr + pos;
   200     }
   201     return ret;
   202 }
   204 scstr_t scstrsubs(scstr_t string, size_t start) {
   205     return scstrsubsl(string, start, string.length-start);
   206 }
   208 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
   209     size_t pos;
   210     scstr_t ret = { NULL, 0 };
   211     if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   212         ret.ptr = s.ptr + pos;
   213     }
   214     return ret;
   215 }
   218 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
   219     for(size_t i=0;i<length;i++) {
   220         if(str[i] == chr) {
   221             *pos = i;
   222             return 1;
   223         }
   224     }
   225     return 0;
   226 }
   228 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
   229     if(length > 0) {
   230         for(size_t i=length ; i>0 ; i--) {
   231             if(str[i-1] == chr) {
   232                 *pos = i-1;
   233                 return 1;
   234             }
   235         }
   236     }
   237     return 0;
   238 }
   240 sstr_t sstrchr(sstr_t s, int c) {
   241     size_t pos = 0;
   242     if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   243         return sstrsubs(s, pos);
   244     }
   245     return sstrn(NULL, 0);
   246 }
   248 sstr_t sstrrchr(sstr_t s, int c) {
   249     size_t pos = 0;
   250     if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   251         return sstrsubs(s, pos);
   252     }
   253     return sstrn(NULL, 0);
   254 }
   256 scstr_t scstrchr(scstr_t s, int c) {
   257     size_t pos = 0;
   258     if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   259         return scstrsubs(s, pos);
   260     }
   261     return scstrn(NULL, 0);
   262 }
   264 scstr_t scstrrchr(scstr_t s, int c) {
   265     size_t pos = 0;
   266     if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   267         return scstrsubs(s, pos);
   268     }
   269     return scstrn(NULL, 0);
   270 }
   272 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   273     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   275 #define ptable_w(useheap, ptable, index, src) do {\
   276     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   277     else ((size_t*)ptable)[index] = src;\
   278     } while (0);
   281 static const char* ucx_strstr(
   282         const char *str,
   283         size_t length,
   284         const char *match,
   285         size_t matchlen,
   286         size_t *newlen)
   287 {
   288     *newlen = length;
   289     if (matchlen == 0) {
   290         return str;
   291     }
   293     const char *result = NULL;
   294     size_t resultlen = 0;
   296     /*
   297      * IMPORTANT:
   298      * our prefix table contains the prefix length PLUS ONE
   299      * this is our decision, because we want to use the full range of size_t
   300      * the original algorithm needs a (-1) at one single place
   301      * and we want to avoid that
   302      */
   304     /* static prefix table */
   305     static uint8_t s_prefix_table[256];
   307     /* check pattern length and use appropriate prefix table */
   308     /* if the pattern exceeds static prefix table, allocate on the heap */
   309     register int useheap = matchlen > 255;
   310     register void* ptable = useheap ?
   311         calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
   313     /* keep counter in registers */
   314     register size_t i, j;
   316     /* fill prefix table */
   317     i = 0; j = 0;
   318     ptable_w(useheap, ptable, i, j);
   319     while (i < matchlen) {
   320         while (j >= 1 && match[j-1] != match[i]) {
   321             ptable_r(j, useheap, ptable, j-1);
   322         }
   323         i++; j++;
   324         ptable_w(useheap, ptable, i, j);
   325     }
   327     /* search */
   328     i = 0; j = 1;
   329     while (i < length) {
   330         while (j >= 1 && str[i] != match[j-1]) {
   331             ptable_r(j, useheap, ptable, j-1);
   332         }
   333         i++; j++;
   334         if (j-1 == matchlen) {
   335             size_t start = i - matchlen;
   336             result = str + start;
   337             resultlen = length - start;
   338             break;
   339         }
   340     }
   342     /* if prefix table was allocated on the heap, free it */
   343     if (ptable != s_prefix_table) {
   344         free(ptable);
   345     }
   347     *newlen = resultlen;
   348     return result;
   349 }
   351 sstr_t ucx_sstrstr(sstr_t string, scstr_t match) {
   352     sstr_t result;
   354     size_t reslen;
   355     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   356     if(!resstr) {
   357         result.ptr = NULL;
   358         result.length = 0;
   359         return result;
   360     }
   362     size_t pos = resstr - string.ptr;
   363     result.ptr = string.ptr + pos;
   364     result.length = reslen;
   366     return result;
   367 }
   369 scstr_t ucx_scstrstr(scstr_t string, scstr_t match) {
   370     scstr_t result;
   372     size_t reslen;
   373     const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   374     if(!resstr) {
   375         result.ptr = NULL;
   376         result.length = 0;
   377         return result;
   378     }
   380     size_t pos = resstr - string.ptr;
   381     result.ptr = string.ptr + pos;
   382     result.length = reslen;
   384     return result;
   385 }
   387 #undef ptable_r
   388 #undef ptable_w
   390 sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) {
   391     return ucx_strsplit_a(ucx_default_allocator(), s, d, n);
   392 }
   394 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
   395     if (s.length == 0 || d.length == 0) {
   396         *n = -1;
   397         return NULL;
   398     }
   400     /* special cases: delimiter is at least as large as the string */
   401     if (d.length >= s.length) {
   402         /* exact match */
   403         if (sstrcmp(s, d) == 0) {
   404             *n = 0;
   405             return NULL;
   406         } else /* no match possible */ {
   407             *n = 1;
   408             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   409             if(result) {
   410                 *result = sstrdup_a(allocator, s);
   411             } else {
   412                 *n = -2;
   413             }
   414             return result;
   415         }
   416     }
   418     ssize_t nmax = *n;
   419     size_t arrlen = 16;
   420     sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
   422     if (result) {
   423         scstr_t curpos = s;
   424         ssize_t j = 1;
   425         while (1) {
   426             scstr_t match;
   427             /* optimize for one byte delimiters */
   428             if (d.length == 1) {
   429                 match = curpos;
   430                 for (size_t i = 0 ; i < curpos.length ; i++) {
   431                     if (curpos.ptr[i] == *(d.ptr)) {
   432                         match.ptr = curpos.ptr + i;
   433                         break;
   434                     }
   435                     match.length--;
   436                 }
   437             } else {
   438                 match = scstrstr(curpos, d);
   439             }
   440             if (match.length > 0) {
   441                 /* is this our last try? */
   442                 if (nmax == 0 || j < nmax) {
   443                     /* copy the current string to the array */
   444                     scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
   445                     result[j-1] = sstrdup_a(allocator, item);
   446                     size_t processed = item.length + d.length;
   447                     curpos.ptr += processed;
   448                     curpos.length -= processed;
   450                     /* allocate memory for the next string */
   451                     j++;
   452                     if (j > arrlen) {
   453                         arrlen *= 2;
   454                         size_t reallocsz;
   455                         sstr_t* reallocated = NULL;
   456                         if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
   457                             reallocated = (sstr_t*) alrealloc(
   458                                     allocator, result, reallocsz);
   459                         }
   460                         if (reallocated) {
   461                             result = reallocated;
   462                         } else {
   463                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   464                                 alfree(allocator, result[i].ptr);
   465                             }
   466                             alfree(allocator, result);
   467                             *n = -2;
   468                             return NULL;
   469                         }
   470                     }
   471                 } else {
   472                     /* nmax reached, copy the _full_ remaining string */
   473                     result[j-1] = sstrdup_a(allocator, curpos);
   474                     break;
   475                 }
   476             } else {
   477                 /* no more matches, copy last string */
   478                 result[j-1] = sstrdup_a(allocator, curpos);
   479                 break;
   480             }
   481         }
   482         *n = j;
   483     } else {
   484         *n = -2;
   485     }
   487     return result;
   488 }
   490 int ucx_strcmp(scstr_t s1, scstr_t s2) {
   491     if (s1.length == s2.length) {
   492         return memcmp(s1.ptr, s2.ptr, s1.length);
   493     } else if (s1.length > s2.length) {
   494         return 1;
   495     } else {
   496         return -1;
   497     }
   498 }
   500 int ucx_strcasecmp(scstr_t s1, scstr_t s2) {
   501     if (s1.length == s2.length) {
   502 #ifdef _WIN32
   503         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   504 #else
   505         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   506 #endif
   507     } else if (s1.length > s2.length) {
   508         return 1;
   509     } else {
   510         return -1;
   511     }
   512 }
   514 sstr_t ucx_strdup(scstr_t s) {
   515     return sstrdup_a(ucx_default_allocator(), s);
   516 }
   518 sstr_t ucx_strdup_a(UcxAllocator *allocator, scstr_t s) {
   519     sstr_t newstring;
   520     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   521     if (newstring.ptr) {
   522         newstring.length = s.length;
   523         newstring.ptr[newstring.length] = 0;
   525         memcpy(newstring.ptr, s.ptr, s.length);
   526     } else {
   527         newstring.length = 0;
   528     }
   530     return newstring;
   531 }
   534 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
   535     const char *newptr = s;
   536     size_t length = len;
   538     while(length > 0 && isspace(*newptr)) {
   539         newptr++;
   540         length--;
   541     }
   542     while(length > 0 && isspace(newptr[length-1])) {
   543         length--;
   544     }
   546     *newlen = length;
   547     return newptr - s;
   548 }
   550 sstr_t sstrtrim(sstr_t string) {
   551     sstr_t newstr;
   552     newstr.ptr = string.ptr
   553                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   554     return newstr;
   555 }
   557 scstr_t scstrtrim(scstr_t string) {
   558     scstr_t newstr;
   559     newstr.ptr = string.ptr
   560                  + ucx_strtrim(string.ptr, string.length, &newstr.length);
   561     return newstr;
   562 }
   564 int ucx_strprefix(scstr_t string, scstr_t prefix) {
   565     if (string.length == 0) {
   566         return prefix.length == 0;
   567     }
   568     if (prefix.length == 0) {
   569         return 1;
   570     }
   572     if (prefix.length > string.length) {
   573         return 0;
   574     } else {
   575         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   576     }
   577 }
   579 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
   580     if (string.length == 0) {
   581         return suffix.length == 0;
   582     }
   583     if (suffix.length == 0) {
   584         return 1;
   585     }
   587     if (suffix.length > string.length) {
   588         return 0;
   589     } else {
   590         return memcmp(string.ptr+string.length-suffix.length,
   591             suffix.ptr, suffix.length) == 0;
   592     }
   593 }
   595 sstr_t ucx_strlower(scstr_t string) {
   596     sstr_t ret = sstrdup(string);
   597     for (size_t i = 0; i < ret.length ; i++) {
   598         ret.ptr[i] = tolower(ret.ptr[i]);
   599     }
   600     return ret;
   601 }
   603 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
   604     sstr_t ret = sstrdup_a(allocator, string);
   605     for (size_t i = 0; i < ret.length ; i++) {
   606         ret.ptr[i] = tolower(ret.ptr[i]);
   607     }
   608     return ret;
   609 }
   611 sstr_t ucx_strupper(scstr_t string) {
   612     sstr_t ret = sstrdup(string);
   613     for (size_t i = 0; i < ret.length ; i++) {
   614         ret.ptr[i] = toupper(ret.ptr[i]);
   615     }
   616     return ret;
   617 }
   619 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
   620     sstr_t ret = sstrdup_a(allocator, string);
   621     for (size_t i = 0; i < ret.length ; i++) {
   622         ret.ptr[i] = toupper(ret.ptr[i]);
   623     }
   624     return ret;
   625 }
   627 // type adjustment functions
   628 scstr_t ucx_sc2sc(scstr_t str) {
   629     return str;
   630 }
   631 scstr_t ucx_ss2sc(sstr_t str) {
   632     scstr_t cs;
   633     cs.ptr = str.ptr;
   634     cs.length = str.length;
   635     return cs;
   636 }
   637 scstr_t ucx_ss2c_s(scstr_t c) {
   638     return c;
   639 }

mercurial