src/string.c

Wed, 16 Nov 2022 22:27:46 +0100

author
Mike Becker <universe@uap-core.de>
date
Wed, 16 Nov 2022 22:27:46 +0100
changeset 610
de5d3ee6435f
parent 593
ea9b41b5ebbc
child 628
1e2be40f0cb5
permissions
-rw-r--r--

#219 array list: implement add and at

Add uses the low level cx_array_copy function which is
now also implemented, but not tested by individual unit
tests.

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "cx/string.h"
    30 #include "cx/utils.h"
    32 #include <string.h>
    33 #include <stdarg.h>
    34 #include <ctype.h>
    36 #ifndef _WIN32
    38 #include <strings.h> /* for strncasecmp() */
    40 #endif /* _WIN32 */
    42 cxmutstr cx_mutstr(char *cstring) {
    43     return (cxmutstr) {cstring, strlen(cstring)};
    44 }
    46 cxmutstr cx_mutstrn(
    47         char *cstring,
    48         size_t length
    49 ) {
    50     return (cxmutstr) {cstring, length};
    51 }
    53 cxstring cx_str(const char *cstring) {
    54     return (cxstring) {cstring, strlen(cstring)};
    55 }
    57 cxstring cx_strn(
    58         const char *cstring,
    59         size_t length
    60 ) {
    61     return (cxstring) {cstring, length};
    62 }
    64 cxstring cx_strcast(cxmutstr str) {
    65     return (cxstring) {str.ptr, str.length};
    66 }
    68 void cx_strfree(cxmutstr *str) {
    69     free(str->ptr);
    70     str->ptr = NULL;
    71     str->length = 0;
    72 }
    74 void cx_strfree_a(
    75         CxAllocator *alloc,
    76         cxmutstr *str
    77 ) {
    78     cxFree(alloc, str->ptr);
    79     str->ptr = NULL;
    80     str->length = 0;
    81 }
    83 size_t cx_strlen(
    84         size_t count,
    85         ...
    86 ) {
    87     if (count == 0) return 0;
    89     va_list ap;
    90     va_start(ap, count);
    91     size_t size = 0;
    92     cx_for_n(i, count) {
    93         cxstring str = va_arg(ap, cxstring);
    94         size += str.length;
    95     }
    96     va_end(ap);
    98     return size;
    99 }
   101 cxmutstr cx_strcat_a(
   102         CxAllocator *alloc,
   103         size_t count,
   104         ...
   105 ) {
   106     cxstring *strings = calloc(count, sizeof(cxstring));
   107     if (!strings) abort();
   109     va_list ap;
   110     va_start(ap, count);
   112     // get all args and overall length
   113     size_t slen = 0;
   114     cx_for_n(i, count) {
   115         cxstring s = va_arg (ap, cxstring);
   116         strings[i] = s;
   117         slen += s.length;
   118     }
   120     // create new string
   121     cxmutstr result;
   122     result.ptr = cxMalloc(alloc, slen + 1);
   123     result.length = slen;
   124     if (result.ptr == NULL) abort();
   126     // concatenate strings
   127     size_t pos = 0;
   128     cx_for_n(i, count) {
   129         cxstring s = strings[i];
   130         memcpy(result.ptr + pos, s.ptr, s.length);
   131         pos += s.length;
   132     }
   134     // terminate string
   135     result.ptr[result.length] = '\0';
   137     // free temporary array
   138     free(strings);
   140     return result;
   141 }
   143 cxstring cx_strsubs(
   144         cxstring string,
   145         size_t start
   146 ) {
   147     return cx_strsubsl(string, start, string.length - start);
   148 }
   150 cxmutstr cx_strsubs_m(
   151         cxmutstr string,
   152         size_t start
   153 ) {
   154     return cx_strsubsl_m(string, start, string.length - start);
   155 }
   157 cxstring cx_strsubsl(
   158         cxstring string,
   159         size_t start,
   160         size_t length
   161 ) {
   162     if (start > string.length) {
   163         return (cxstring) {NULL, 0};
   164     }
   166     size_t rem_len = string.length - start;
   167     if (length > rem_len) {
   168         length = rem_len;
   169     }
   171     return (cxstring) {string.ptr + start, length};
   172 }
   174 cxmutstr cx_strsubsl_m(
   175         cxmutstr string,
   176         size_t start,
   177         size_t length
   178 ) {
   179     cxstring result = cx_strsubsl(cx_strcast(string), start, length);
   180     return (cxmutstr) {(char *) result.ptr, result.length};
   181 }
   183 cxstring cx_strchr(
   184         cxstring string,
   185         int chr
   186 ) {
   187     chr = 0xFF & chr;
   188     // TODO: improve by comparing multiple bytes at once
   189     cx_for_n(i, string.length) {
   190         if (string.ptr[i] == chr) {
   191             return cx_strsubs(string, i);
   192         }
   193     }
   194     return (cxstring) {NULL, 0};
   195 }
   197 cxmutstr cx_strchr_m(
   198         cxmutstr string,
   199         int chr
   200 ) {
   201     cxstring result = cx_strchr(cx_strcast(string), chr);
   202     return (cxmutstr) {(char *) result.ptr, result.length};
   203 }
   205 cxstring cx_strrchr(
   206         cxstring string,
   207         int chr
   208 ) {
   209     chr = 0xFF & chr;
   210     size_t i = string.length;
   211     while (i > 0) {
   212         i--;
   213         // TODO: improve by comparing multiple bytes at once
   214         if (string.ptr[i] == chr) {
   215             return cx_strsubs(string, i);
   216         }
   217     }
   218     return (cxstring) {NULL, 0};
   219 }
   221 cxmutstr cx_strrchr_m(
   222         cxmutstr string,
   223         int chr
   224 ) {
   225     cxstring result = cx_strrchr(cx_strcast(string), chr);
   226     return (cxmutstr) {(char *) result.ptr, result.length};
   227 }
   229 #define STRSTR_SBO_BUFLEN 512
   231 cxstring cx_strstr(
   232         cxstring haystack,
   233         cxstring needle
   234 ) {
   235     if (needle.length == 0) {
   236         return haystack;
   237     }
   239     /* optimize for single-char needles */
   240     if (needle.length == 1) {
   241         return cx_strchr(haystack, *needle.ptr);
   242     }
   244     /*
   245      * IMPORTANT:
   246      * Our prefix table contains the prefix length PLUS ONE
   247      * this is our decision, because we want to use the full range of size_t.
   248      * The original algorithm needs a (-1) at one single place,
   249      * and we want to avoid that.
   250      */
   252     /* local prefix table */
   253     size_t s_prefix_table[STRSTR_SBO_BUFLEN];
   255     /* check needle length and use appropriate prefix table */
   256     /* if the pattern exceeds static prefix table, allocate on the heap */
   257     bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
   258     register size_t *ptable = useheap ? calloc(needle.length + 1,
   259                                                sizeof(size_t)) : s_prefix_table;
   261     /* keep counter in registers */
   262     register size_t i, j;
   264     /* fill prefix table */
   265     i = 0;
   266     j = 0;
   267     ptable[i] = j;
   268     while (i < needle.length) {
   269         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
   270             j = ptable[j - 1];
   271         }
   272         i++;
   273         j++;
   274         ptable[i] = j;
   275     }
   277     /* search */
   278     cxstring result = {NULL, 0};
   279     i = 0;
   280     j = 1;
   281     while (i < haystack.length) {
   282         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
   283             j = ptable[j - 1];
   284         }
   285         i++;
   286         j++;
   287         if (j - 1 == needle.length) {
   288             size_t start = i - needle.length;
   289             result.ptr = haystack.ptr + start;
   290             result.length = haystack.length - start;
   291             break;
   292         }
   293     }
   295     /* if prefix table was allocated on the heap, free it */
   296     if (ptable != s_prefix_table) {
   297         free(ptable);
   298     }
   300     return result;
   301 }
   303 cxmutstr cx_strstr_m(
   304         cxmutstr haystack,
   305         cxstring needle
   306 ) {
   307     cxstring result = cx_strstr(cx_strcast(haystack), needle);
   308     return (cxmutstr) {(char *) result.ptr, result.length};
   309 }
   311 size_t cx_strsplit(
   312         cxstring string,
   313         cxstring delim,
   314         size_t limit,
   315         cxstring *output
   316 ) {
   317     /* special case: output limit is zero */
   318     if (limit == 0) return 0;
   320     /* special case: delimiter is empty */
   321     if (delim.length == 0) {
   322         output[0] = string;
   323         return 1;
   324     }
   326     /* special cases: delimiter is at least as large as the string */
   327     if (delim.length >= string.length) {
   328         /* exact match */
   329         if (cx_strcmp(string, delim) == 0) {
   330             output[0] = cx_strn(string.ptr, 0);
   331             output[1] = cx_strn(string.ptr + string.length, 0);
   332             return 2;
   333         } else /* no match possible */ {
   334             output[0] = string;
   335             return 1;
   336         }
   337     }
   339     size_t n = 0;
   340     cxstring curpos = string;
   341     while (1) {
   342         ++n;
   343         cxstring match = cx_strstr(curpos, delim);
   344         if (match.length > 0) {
   345             /* is the limit reached? */
   346             if (n < limit) {
   347                 /* copy the current string to the array */
   348                 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
   349                 output[n - 1] = item;
   350                 size_t processed = item.length + delim.length;
   351                 curpos.ptr += processed;
   352                 curpos.length -= processed;
   353             } else {
   354                 /* limit reached, copy the _full_ remaining string */
   355                 output[n - 1] = curpos;
   356                 break;
   357             }
   358         } else {
   359             /* no more matches, copy last string */
   360             output[n - 1] = curpos;
   361             break;
   362         }
   363     }
   365     return n;
   366 }
   368 size_t cx_strsplit_a(
   369         CxAllocator *allocator,
   370         cxstring string,
   371         cxstring delim,
   372         size_t limit,
   373         cxstring **output
   374 ) {
   375     /* find out how many splits we're going to make and allocate memory */
   376     size_t n = 0;
   377     cxstring curpos = string;
   378     while (1) {
   379         ++n;
   380         cxstring match = cx_strstr(curpos, delim);
   381         if (match.length > 0) {
   382             /* is the limit reached? */
   383             if (n < limit) {
   384                 size_t processed = match.ptr - curpos.ptr + delim.length;
   385                 curpos.ptr += processed;
   386                 curpos.length -= processed;
   387             } else {
   388                 /* limit reached */
   389                 break;
   390             }
   391         } else {
   392             /* no more matches */
   393             break;
   394         }
   395     }
   396     *output = cxCalloc(allocator, n, sizeof(cxstring));
   397     return cx_strsplit(string, delim, n, *output);
   398 }
   400 size_t cx_strsplit_m(
   401         cxmutstr string,
   402         cxstring delim,
   403         size_t limit,
   404         cxmutstr *output
   405 ) {
   406     return cx_strsplit(cx_strcast(string),
   407                        delim, limit, (cxstring *) output);
   408 }
   410 size_t cx_strsplit_ma(
   411         CxAllocator *allocator,
   412         cxmutstr string,
   413         cxstring delim,
   414         size_t limit,
   415         cxmutstr **output
   416 ) {
   417     return cx_strsplit_a(allocator, cx_strcast(string),
   418                          delim, limit, (cxstring **) output);
   419 }
   421 int cx_strcmp(
   422         cxstring s1,
   423         cxstring s2
   424 ) {
   425     if (s1.length == s2.length) {
   426         return memcmp(s1.ptr, s2.ptr, s1.length);
   427     } else if (s1.length > s2.length) {
   428         return 1;
   429     } else {
   430         return -1;
   431     }
   432 }
   434 int cx_strcasecmp(
   435         cxstring s1,
   436         cxstring s2
   437 ) {
   438     if (s1.length == s2.length) {
   439 #ifdef _WIN32
   440         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   441 #else
   442         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   443 #endif
   444     } else if (s1.length > s2.length) {
   445         return 1;
   446     } else {
   447         return -1;
   448     }
   449 }
   451 cxmutstr cx_strdup_a(
   452         CxAllocator *allocator,
   453         cxstring string
   454 ) {
   455     cxmutstr result = {
   456             cxMalloc(allocator, string.length + 1),
   457             string.length
   458     };
   459     if (result.ptr == NULL) {
   460         result.length = 0;
   461         return result;
   462     }
   463     memcpy(result.ptr, string.ptr, string.length);
   464     result.ptr[string.length] = '\0';
   465     return result;
   466 }
   468 cxstring cx_strtrim(cxstring string) {
   469     cxstring result = string;
   470     // TODO: optimize by comparing multiple bytes at once
   471     while (result.length > 0 && isspace(*result.ptr)) {
   472         result.ptr++;
   473         result.length--;
   474     }
   475     while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
   476         result.length--;
   477     }
   478     return result;
   479 }
   481 cxmutstr cx_strtrim_m(cxmutstr string) {
   482     cxstring result = cx_strtrim(cx_strcast(string));
   483     return (cxmutstr) {(char *) result.ptr, result.length};
   484 }
   486 bool cx_strprefix(
   487         cxstring string,
   488         cxstring prefix
   489 ) {
   490     if (string.length < prefix.length) return false;
   491     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   492 }
   494 bool cx_strsuffix(
   495         cxstring string,
   496         cxstring suffix
   497 ) {
   498     if (string.length < suffix.length) return false;
   499     return memcmp(string.ptr + string.length - suffix.length,
   500                   suffix.ptr, suffix.length) == 0;
   501 }
   503 bool cx_strcaseprefix(
   504         cxstring string,
   505         cxstring prefix
   506 ) {
   507     if (string.length < prefix.length) return false;
   508 #ifdef _WIN32
   509     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
   510 #else
   511     return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
   512 #endif
   513 }
   515 bool cx_strcasesuffix(
   516         cxstring string,
   517         cxstring suffix
   518 ) {
   519     if (string.length < suffix.length) return false;
   520 #ifdef _WIN32
   521     return _strnicmp(string.ptr+string.length-suffix.length,
   522                   suffix.ptr, suffix.length) == 0;
   523 #else
   524     return strncasecmp(string.ptr + string.length - suffix.length,
   525                        suffix.ptr, suffix.length) == 0;
   526 #endif
   527 }
   529 void cx_strlower(cxmutstr string) {
   530     cx_for_n(i, string.length) {
   531         string.ptr[i] = (char) tolower(string.ptr[i]);
   532     }
   533 }
   535 void cx_strupper(cxmutstr string) {
   536     cx_for_n(i, string.length) {
   537         string.ptr[i] = (char) toupper(string.ptr[i]);
   538     }
   539 }
   541 #define REPLACE_INDEX_BUFFER_MAX 100
   543 struct cx_strreplace_ibuf {
   544     size_t *buf;
   545     struct cx_strreplace_ibuf *next;
   546     unsigned int len;
   547 };
   549 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
   550     while (buf) {
   551         struct cx_strreplace_ibuf *next = buf->next;
   552         free(buf->buf);
   553         free(buf);
   554         buf = next;
   555     }
   556 }
   558 cxmutstr cx_strreplacen_a(
   559         CxAllocator *allocator,
   560         cxstring str,
   561         cxstring pattern,
   562         cxstring replacement,
   563         size_t replmax
   564 ) {
   566     if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
   567         return cx_strdup_a(allocator, str);
   569     /* Compute expected buffer length */
   570     size_t ibufmax = str.length / pattern.length;
   571     size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
   572     if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
   573         ibuflen = REPLACE_INDEX_BUFFER_MAX;
   574     }
   576     /* Allocate first index buffer */
   577     struct cx_strreplace_ibuf *firstbuf, *curbuf;
   578     firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
   579     if (!firstbuf) return cx_mutstrn(NULL, 0);
   580     firstbuf->buf = calloc(ibuflen, sizeof(size_t));
   581     if (!firstbuf->buf) {
   582         free(firstbuf);
   583         return cx_mutstrn(NULL, 0);
   584     }
   586     /* Search occurrences */
   587     cxstring searchstr = str;
   588     size_t found = 0;
   589     do {
   590         cxstring match = cx_strstr(searchstr, pattern);
   591         if (match.length > 0) {
   592             /* Allocate next buffer in chain, if required */
   593             if (curbuf->len == ibuflen) {
   594                 struct cx_strreplace_ibuf *nextbuf =
   595                         calloc(1, sizeof(struct cx_strreplace_ibuf));
   596                 if (!nextbuf) {
   597                     cx_strrepl_free_ibuf(firstbuf);
   598                     return cx_mutstrn(NULL, 0);
   599                 }
   600                 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
   601                 if (!nextbuf->buf) {
   602                     free(nextbuf);
   603                     cx_strrepl_free_ibuf(firstbuf);
   604                     return cx_mutstrn(NULL, 0);
   605                 }
   606                 curbuf->next = nextbuf;
   607                 curbuf = nextbuf;
   608             }
   610             /* Record match index */
   611             found++;
   612             size_t idx = match.ptr - str.ptr;
   613             curbuf->buf[curbuf->len++] = idx;
   614             searchstr.ptr = match.ptr + pattern.length;
   615             searchstr.length = str.length - idx - pattern.length;
   616         } else {
   617             break;
   618         }
   619     } while (searchstr.length > 0 && found < replmax);
   621     /* Allocate result string */
   622     cxmutstr result;
   623     {
   624         ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
   625         size_t rcount = 0;
   626         curbuf = firstbuf;
   627         do {
   628             rcount += curbuf->len;
   629             curbuf = curbuf->next;
   630         } while (curbuf);
   631         result.length = str.length + rcount * adjlen;
   632         result.ptr = cxMalloc(allocator, result.length + 1);
   633         if (!result.ptr) {
   634             cx_strrepl_free_ibuf(firstbuf);
   635             return cx_mutstrn(NULL, 0);
   636         }
   637     }
   639     /* Build result string */
   640     curbuf = firstbuf;
   641     size_t srcidx = 0;
   642     char *destptr = result.ptr;
   643     do {
   644         for (size_t i = 0; i < curbuf->len; i++) {
   645             /* Copy source part up to next match*/
   646             size_t idx = curbuf->buf[i];
   647             size_t srclen = idx - srcidx;
   648             if (srclen > 0) {
   649                 memcpy(destptr, str.ptr + srcidx, srclen);
   650                 destptr += srclen;
   651                 srcidx += srclen;
   652             }
   654             /* Copy the replacement and skip the source pattern */
   655             srcidx += pattern.length;
   656             memcpy(destptr, replacement.ptr, replacement.length);
   657             destptr += replacement.length;
   658         }
   659         curbuf = curbuf->next;
   660     } while (curbuf);
   661     memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
   663     /* Result is guaranteed to be zero-terminated */
   664     result.ptr[result.length] = '\0';
   666     /* Free index buffer */
   667     cx_strrepl_free_ibuf(firstbuf);
   669     return result;
   670 }

mercurial