src/string.c

Sat, 28 Oct 2017 15:59:16 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 28 Oct 2017 15:59:16 +0200
changeset 260
a6184aff5108
parent 259
2f5dea574a75
child 270
3d80d425543b
permissions
-rw-r--r--

rename LICENSE to COPYING to be distributed by autoconf

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "ucx/string.h"
    31 #include "ucx/allocator.h"
    33 #include <stdlib.h>
    34 #include <string.h>
    35 #include <stdarg.h>
    36 #include <stdint.h>
    37 #include <ctype.h>
    39 sstr_t sstr(char *cstring) {
    40     sstr_t string;
    41     string.ptr = cstring;
    42     string.length = strlen(cstring);
    43     return string;
    44 }
    46 sstr_t sstrn(char *cstring, size_t length) {
    47     sstr_t string;
    48     string.ptr = cstring;
    49     string.length = length;
    50     return string;
    51 }
    53 size_t sstrnlen(size_t n, sstr_t s, ...) {
    54     va_list ap;
    55     size_t size = s.length;
    56     va_start(ap, s);
    58     for (size_t i = 1 ; i < n ; i++) {
    59         sstr_t str = va_arg(ap, sstr_t);
    60         size += str.length;
    61     }
    62     va_end(ap);
    64     return size;
    65 }
    67 static sstr_t sstrvcat_a(
    68         UcxAllocator *a,
    69         size_t count,
    70         sstr_t s1,
    71         sstr_t s2,
    72         va_list ap) {
    73     sstr_t str;
    74     str.ptr = NULL;
    75     str.length = 0;
    76     if(count < 2) {
    77         return str;
    78     }
    80     sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    81     if(!strings) {
    82         return str;
    83     }
    85     // get all args and overall length
    86     strings[0] = s1;
    87     strings[1] = s2;
    88     size_t strlen = s1.length + s2.length;
    89     for (size_t i=2;i<count;i++) {
    90         sstr_t s = va_arg (ap, sstr_t);
    91         strings[i] = s;
    92         strlen += s.length;
    93     }
    95     // create new string
    96     str.ptr = (char*) almalloc(a, strlen + 1);
    97     str.length = strlen;
    98     if(!str.ptr) {
    99         free(strings);
   100         str.length = 0;
   101         return str;
   102     }
   104     // concatenate strings
   105     size_t pos = 0;
   106     for (size_t i=0;i<count;i++) {
   107         sstr_t s = strings[i];
   108         memcpy(str.ptr + pos, s.ptr, s.length);
   109         pos += s.length;
   110     }
   112     str.ptr[str.length] = '\0';
   114     free(strings);
   116     return str;
   117 }
   119 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   120     va_list ap;
   121     va_start(ap, s2);
   122     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   123     va_end(ap);
   124     return s;
   125 }
   127 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   128     va_list ap;
   129     va_start(ap, s2);
   130     sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   131     va_end(ap);
   132     return s;
   133 }
   135 sstr_t sstrsubs(sstr_t s, size_t start) {
   136     return sstrsubsl (s, start, s.length-start);
   137 }
   139 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   140     sstr_t new_sstr;
   141     if (start >= s.length) {
   142         new_sstr.ptr = NULL;
   143         new_sstr.length = 0;
   144     } else {
   145         if (length > s.length-start) {
   146             length = s.length-start;
   147         }
   148         new_sstr.ptr = &s.ptr[start];
   149         new_sstr.length = length;
   150     }
   151     return new_sstr;
   152 }
   154 sstr_t sstrchr(sstr_t s, int c) {
   155     for(size_t i=0;i<s.length;i++) {
   156         if(s.ptr[i] == c) {
   157             return sstrsubs(s, i);
   158         }
   159     }
   160     sstr_t n;
   161     n.ptr = NULL;
   162     n.length = 0;
   163     return n;
   164 }
   166 sstr_t sstrrchr(sstr_t s, int c) {
   167     if (s.length > 0) {
   168         for(size_t i=s.length;i>0;i--) {
   169             if(s.ptr[i-1] == c) {
   170                 return sstrsubs(s, i-1);
   171             }
   172         }
   173     }
   174     sstr_t n;
   175     n.ptr = NULL;
   176     n.length = 0;
   177     return n;
   178 }
   180 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   181     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   183 #define ptable_w(useheap, ptable, index, src) do {\
   184     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   185     else ((size_t*)ptable)[index] = src;\
   186     } while (0);
   188 sstr_t sstrstr(sstr_t string, sstr_t match) {
   189     if (match.length == 0) {
   190         return string;
   191     }
   193     /* prepare default return value in case of no match */
   194     sstr_t result = sstrn(NULL, 0);
   196     /*
   197      * IMPORTANT:
   198      * our prefix table contains the prefix length PLUS ONE
   199      * this is our decision, because we want to use the full range of size_t
   200      * the original algorithm needs a (-1) at one single place
   201      * and we want to avoid that
   202      */
   204     /* static prefix table */
   205     static uint8_t s_prefix_table[256];
   207     /* check pattern length and use appropriate prefix table */
   208     /* if the pattern exceeds static prefix table, allocate on the heap */
   209     register int useheap = match.length > 255;
   210     register void* ptable = useheap ?
   211         calloc(match.length+1, sizeof(size_t)): s_prefix_table;
   213     /* keep counter in registers */
   214     register size_t i, j;
   216     /* fill prefix table */
   217     i = 0; j = 0;
   218     ptable_w(useheap, ptable, i, j);
   219     while (i < match.length) {
   220         while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
   221             ptable_r(j, useheap, ptable, j-1);
   222         }
   223         i++; j++;
   224         ptable_w(useheap, ptable, i, j);
   225     }
   227     /* search */
   228     i = 0; j = 1;
   229     while (i < string.length) {
   230         while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
   231             ptable_r(j, useheap, ptable, j-1);
   232         }
   233         i++; j++;
   234         if (j-1 == match.length) {
   235             size_t start = i - match.length;
   236             result.ptr = string.ptr + start;
   237             result.length = string.length - start;
   238             break;
   239         }
   240     }
   242     /* if prefix table was allocated on the heap, free it */
   243     if (ptable != s_prefix_table) {
   244         free(ptable);
   245     }
   247     return result;
   248 }
   250 #undef ptable_r
   251 #undef ptable_w
   253 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   254     return sstrsplit_a(ucx_default_allocator(), s, d, n);
   255 }
   257 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   258     if (s.length == 0 || d.length == 0) {
   259         *n = -1;
   260         return NULL;
   261     }
   263     /* special cases: delimiter is at least as large as the string */
   264     if (d.length >= s.length) {
   265         /* exact match */
   266         if (sstrcmp(s, d) == 0) {
   267             *n = 0;
   268             return NULL;
   269         } else /* no match possible */ {
   270             *n = 1;
   271             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   272             *result = sstrdup_a(allocator, s);
   273             return result;
   274         }
   275     }
   277     ssize_t nmax = *n;
   278     size_t arrlen = 16;
   279     sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
   281     if (result) {
   282         sstr_t curpos = s;
   283         ssize_t j = 1;
   284         while (1) {
   285             sstr_t match;
   286             /* optimize for one byte delimiters */
   287             if (d.length == 1) {
   288                 match = curpos;
   289                 for (size_t i = 0 ; i < curpos.length ; i++) {
   290                     if (curpos.ptr[i] == *(d.ptr)) {
   291                         match.ptr = curpos.ptr + i;
   292                         break;
   293                     }
   294                     match.length--;
   295                 }
   296             } else {
   297                 match = sstrstr(curpos, d);
   298             }
   299             if (match.length > 0) {
   300                 /* is this our last try? */
   301                 if (nmax == 0 || j < nmax) {
   302                     /* copy the current string to the array */
   303                     sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
   304                     result[j-1] = sstrdup_a(allocator, item);
   305                     size_t processed = item.length + d.length;
   306                     curpos.ptr += processed;
   307                     curpos.length -= processed;
   309                     /* allocate memory for the next string */
   310                     j++;
   311                     if (j > arrlen) {
   312                         arrlen *= 2;
   313                         sstr_t* reallocated = (sstr_t*) alrealloc(
   314                                 allocator, result, arrlen*sizeof(sstr_t));
   315                         if (reallocated) {
   316                             result = reallocated;
   317                         } else {
   318                             for (ssize_t i = 0 ; i < j-1 ; i++) {
   319                                 alfree(allocator, result[i].ptr);
   320                             }
   321                             alfree(allocator, result);
   322                             *n = -2;
   323                             return NULL;
   324                         }
   325                     }
   326                 } else {
   327                     /* nmax reached, copy the _full_ remaining string */
   328                     result[j-1] = sstrdup_a(allocator, curpos);
   329                     break;
   330                 }
   331             } else {
   332                 /* no more matches, copy last string */
   333                 result[j-1] = sstrdup_a(allocator, curpos);
   334                 break;
   335             }
   336         }
   337         *n = j;
   338     } else {
   339         *n = -2;
   340     }
   342     return result;
   343 }
   345 int sstrcmp(sstr_t s1, sstr_t s2) {
   346     if (s1.length == s2.length) {
   347         return memcmp(s1.ptr, s2.ptr, s1.length);
   348     } else if (s1.length > s2.length) {
   349         return 1;
   350     } else {
   351         return -1;
   352     }
   353 }
   355 int sstrcasecmp(sstr_t s1, sstr_t s2) {
   356     if (s1.length == s2.length) {
   357 #ifdef _WIN32
   358         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   359 #else
   360         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   361 #endif
   362     } else if (s1.length > s2.length) {
   363         return 1;
   364     } else {
   365         return -1;
   366     }
   367 }
   369 sstr_t sstrdup(sstr_t s) {
   370     return sstrdup_a(ucx_default_allocator(), s);
   371 }
   373 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   374     sstr_t newstring;
   375     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   376     if (newstring.ptr) {
   377         newstring.length = s.length;
   378         newstring.ptr[newstring.length] = 0;
   380         memcpy(newstring.ptr, s.ptr, s.length);
   381     } else {
   382         newstring.length = 0;
   383     }
   385     return newstring;
   386 }
   388 sstr_t sstrtrim(sstr_t string) {
   389     sstr_t newstr = string;
   391     while (newstr.length > 0 && isspace(*newstr.ptr)) {
   392         newstr.ptr++;
   393         newstr.length--;
   394     }
   395     while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   396         newstr.length--;
   397     }
   399     return newstr;
   400 }
   402 int sstrprefix(sstr_t string, sstr_t prefix) {
   403     if (string.length == 0) {
   404         return prefix.length == 0;
   405     }
   406     if (prefix.length == 0) {
   407         return 1;
   408     }
   410     if (prefix.length > string.length) {
   411         return 0;
   412     } else {
   413         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   414     }
   415 }
   417 int sstrsuffix(sstr_t string, sstr_t suffix) {
   418     if (string.length == 0) {
   419         return suffix.length == 0;
   420     }
   421     if (suffix.length == 0) {
   422         return 1;
   423     }
   425     if (suffix.length > string.length) {
   426         return 0;
   427     } else {
   428         return memcmp(string.ptr+string.length-suffix.length,
   429             suffix.ptr, suffix.length) == 0;
   430     }
   431 }
   433 sstr_t sstrlower(sstr_t string) {
   434     sstr_t ret = sstrdup(string);
   435     for (size_t i = 0; i < ret.length ; i++) {
   436         ret.ptr[i] = tolower(ret.ptr[i]);
   437     }
   438     return ret;
   439 }
   441 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   442     sstr_t ret = sstrdup_a(allocator, string);
   443     for (size_t i = 0; i < ret.length ; i++) {
   444         ret.ptr[i] = tolower(ret.ptr[i]);
   445     }
   446     return ret;
   447 }
   449 sstr_t sstrupper(sstr_t string) {
   450     sstr_t ret = sstrdup(string);
   451     for (size_t i = 0; i < ret.length ; i++) {
   452         ret.ptr[i] = toupper(ret.ptr[i]);
   453     }
   454     return ret;
   455 }
   457 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   458     sstr_t ret = sstrdup_a(allocator, string);
   459     for (size_t i = 0; i < ret.length ; i++) {
   460         ret.ptr[i] = toupper(ret.ptr[i]);
   461     }
   462     return ret;
   463 }

mercurial