src/string.c

Sun, 23 Oct 2022 13:32:16 +0200

author
Mike Becker <universe@uap-core.de>
date
Sun, 23 Oct 2022 13:32:16 +0200
changeset 592
bb69ef3ad1f3
parent 591
7df0bcaecffa
child 593
ea9b41b5ebbc
permissions
-rw-r--r--

enclose macro arguments in parenthesis

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "cx/string.h"
    30 #include "cx/utils.h"
    32 #include <string.h>
    33 #include <stdarg.h>
    34 #include <stdint.h>
    35 #include <ctype.h>
    37 #ifndef _WIN32
    39 #include <strings.h> /* for strncasecmp() */
    41 #endif /* _WIN32 */
    43 cxmutstr cx_mutstr(char *cstring) {
    44     return (cxmutstr) {cstring, strlen(cstring)};
    45 }
    47 cxmutstr cx_mutstrn(
    48         char *cstring,
    49         size_t length
    50 ) {
    51     return (cxmutstr) {cstring, length};
    52 }
    54 cxstring cx_str(const char *cstring) {
    55     return (cxstring) {cstring, strlen(cstring)};
    56 }
    58 cxstring cx_strn(
    59         const char *cstring,
    60         size_t length
    61 ) {
    62     return (cxstring) {cstring, length};
    63 }
    65 cxstring cx_strcast(cxmutstr str) {
    66     return (cxstring) {str.ptr, str.length};
    67 }
    69 void cx_strfree(cxmutstr *str) {
    70     free(str->ptr);
    71     str->ptr = NULL;
    72     str->length = 0;
    73 }
    75 void cx_strfree_a(
    76         CxAllocator *alloc,
    77         cxmutstr *str
    78 ) {
    79     cxFree(alloc, str->ptr);
    80     str->ptr = NULL;
    81     str->length = 0;
    82 }
    84 size_t cx_strlen(
    85         size_t count,
    86         ...
    87 ) {
    88     if (count == 0) return 0;
    90     va_list ap;
    91     va_start(ap, count);
    92     size_t size = 0;
    93     cx_for_n(i, count) {
    94         cxstring str = va_arg(ap, cxstring);
    95         size += str.length;
    96     }
    97     va_end(ap);
    99     return size;
   100 }
   102 cxmutstr cx_strcat_a(
   103         CxAllocator *alloc,
   104         size_t count,
   105         ...
   106 ) {
   107     cxstring *strings = calloc(count, sizeof(cxstring));
   108     if (!strings) abort();
   110     va_list ap;
   111     va_start(ap, count);
   113     // get all args and overall length
   114     size_t slen = 0;
   115     cx_for_n(i, count) {
   116         cxstring s = va_arg (ap, cxstring);
   117         strings[i] = s;
   118         slen += s.length;
   119     }
   121     // create new string
   122     cxmutstr result;
   123     result.ptr = cxMalloc(alloc, slen + 1);
   124     result.length = slen;
   125     if (result.ptr == NULL) abort();
   127     // concatenate strings
   128     size_t pos = 0;
   129     cx_for_n(i, count) {
   130         cxstring s = strings[i];
   131         memcpy(result.ptr + pos, s.ptr, s.length);
   132         pos += s.length;
   133     }
   135     // terminate string
   136     result.ptr[result.length] = '\0';
   138     // free temporary array
   139     free(strings);
   141     return result;
   142 }
   144 cxstring cx_strsubs(
   145         cxstring string,
   146         size_t start
   147 ) {
   148     return cx_strsubsl(string, start, string.length - start);
   149 }
   151 cxmutstr cx_strsubs_m(
   152         cxmutstr string,
   153         size_t start
   154 ) {
   155     return cx_strsubsl_m(string, start, string.length - start);
   156 }
   158 cxstring cx_strsubsl(
   159         cxstring string,
   160         size_t start,
   161         size_t length
   162 ) {
   163     if (start > string.length) {
   164         return (cxstring) {NULL, 0};
   165     }
   167     size_t rem_len = string.length - start;
   168     if (length > rem_len) {
   169         length = rem_len;
   170     }
   172     return (cxstring) {string.ptr + start, length};
   173 }
   175 cxmutstr cx_strsubsl_m(
   176         cxmutstr string,
   177         size_t start,
   178         size_t length
   179 ) {
   180     cxstring result = cx_strsubsl(cx_strcast(string), start, length);
   181     return (cxmutstr) {(char *) result.ptr, result.length};
   182 }
   184 cxstring cx_strchr(
   185         cxstring string,
   186         int chr
   187 ) {
   188     chr = 0xFF & chr;
   189     // TODO: improve by comparing multiple bytes at once
   190     cx_for_n(i, string.length) {
   191         if (string.ptr[i] == chr) {
   192             return cx_strsubs(string, i);
   193         }
   194     }
   195     return (cxstring) {NULL, 0};
   196 }
   198 cxmutstr cx_strchr_m(
   199         cxmutstr string,
   200         int chr
   201 ) {
   202     cxstring result = cx_strchr(cx_strcast(string), chr);
   203     return (cxmutstr) {(char *) result.ptr, result.length};
   204 }
   206 cxstring cx_strrchr(
   207         cxstring string,
   208         int chr
   209 ) {
   210     chr = 0xFF & chr;
   211     size_t i = string.length;
   212     while (i > 0) {
   213         i--;
   214         // TODO: improve by comparing multiple bytes at once
   215         if (string.ptr[i] == chr) {
   216             return cx_strsubs(string, i);
   217         }
   218     }
   219     return (cxstring) {NULL, 0};
   220 }
   222 cxmutstr cx_strrchr_m(
   223         cxmutstr string,
   224         int chr
   225 ) {
   226     cxstring result = cx_strrchr(cx_strcast(string), chr);
   227     return (cxmutstr) {(char *) result.ptr, result.length};
   228 }
   230 #define STRSTR_SBO_BUFLEN 512
   232 cxstring cx_strstr(
   233         cxstring haystack,
   234         cxstring needle
   235 ) {
   236     if (needle.length == 0) {
   237         return haystack;
   238     }
   240     /* optimize for single-char needles */
   241     if (needle.length == 1) {
   242         return cx_strchr(haystack, *needle.ptr);
   243     }
   245     /*
   246      * IMPORTANT:
   247      * Our prefix table contains the prefix length PLUS ONE
   248      * this is our decision, because we want to use the full range of size_t.
   249      * The original algorithm needs a (-1) at one single place,
   250      * and we want to avoid that.
   251      */
   253     /* local prefix table */
   254     size_t s_prefix_table[STRSTR_SBO_BUFLEN];
   256     /* check needle length and use appropriate prefix table */
   257     /* if the pattern exceeds static prefix table, allocate on the heap */
   258     bool useheap = needle.length >= STRSTR_SBO_BUFLEN;
   259     register size_t *ptable = useheap ? calloc(needle.length + 1,
   260                                                sizeof(size_t)) : s_prefix_table;
   262     /* keep counter in registers */
   263     register size_t i, j;
   265     /* fill prefix table */
   266     i = 0;
   267     j = 0;
   268     ptable[i] = j;
   269     while (i < needle.length) {
   270         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
   271             j = ptable[j - 1];
   272         }
   273         i++;
   274         j++;
   275         ptable[i] = j;
   276     }
   278     /* search */
   279     cxstring result = {NULL, 0};
   280     i = 0;
   281     j = 1;
   282     while (i < haystack.length) {
   283         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
   284             j = ptable[j - 1];
   285         }
   286         i++;
   287         j++;
   288         if (j - 1 == needle.length) {
   289             size_t start = i - needle.length;
   290             result.ptr = haystack.ptr + start;
   291             result.length = haystack.length - start;
   292             break;
   293         }
   294     }
   296     /* if prefix table was allocated on the heap, free it */
   297     if (ptable != s_prefix_table) {
   298         free(ptable);
   299     }
   301     return result;
   302 }
   304 cxmutstr cx_strstr_m(
   305         cxmutstr haystack,
   306         cxstring needle
   307 ) {
   308     cxstring result = cx_strstr(cx_strcast(haystack), needle);
   309     return (cxmutstr) {(char *) result.ptr, result.length};
   310 }
   312 size_t cx_strsplit(
   313         cxstring string,
   314         cxstring delim,
   315         size_t limit,
   316         cxstring *output
   317 ) {
   318     /* special case: output limit is zero */
   319     if (limit == 0) return 0;
   321     /* special case: delimiter is empty */
   322     if (delim.length == 0) {
   323         output[0] = string;
   324         return 1;
   325     }
   327     /* special cases: delimiter is at least as large as the string */
   328     if (delim.length >= string.length) {
   329         /* exact match */
   330         if (cx_strcmp(string, delim) == 0) {
   331             output[0] = cx_strn(string.ptr, 0);
   332             output[1] = cx_strn(string.ptr + string.length, 0);
   333             return 2;
   334         } else /* no match possible */ {
   335             output[0] = string;
   336             return 1;
   337         }
   338     }
   340     size_t n = 0;
   341     cxstring curpos = string;
   342     while (1) {
   343         ++n;
   344         cxstring match = cx_strstr(curpos, delim);
   345         if (match.length > 0) {
   346             /* is the limit reached? */
   347             if (n < limit) {
   348                 /* copy the current string to the array */
   349                 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
   350                 output[n - 1] = item;
   351                 size_t processed = item.length + delim.length;
   352                 curpos.ptr += processed;
   353                 curpos.length -= processed;
   354             } else {
   355                 /* limit reached, copy the _full_ remaining string */
   356                 output[n - 1] = curpos;
   357                 break;
   358             }
   359         } else {
   360             /* no more matches, copy last string */
   361             output[n - 1] = curpos;
   362             break;
   363         }
   364     }
   366     return n;
   367 }
   369 size_t cx_strsplit_a(
   370         CxAllocator *allocator,
   371         cxstring string,
   372         cxstring delim,
   373         size_t limit,
   374         cxstring **output
   375 ) {
   376     /* find out how many splits we're going to make and allocate memory */
   377     size_t n = 0;
   378     cxstring curpos = string;
   379     while (1) {
   380         ++n;
   381         cxstring match = cx_strstr(curpos, delim);
   382         if (match.length > 0) {
   383             /* is the limit reached? */
   384             if (n < limit) {
   385                 size_t processed = match.ptr - curpos.ptr + delim.length;
   386                 curpos.ptr += processed;
   387                 curpos.length -= processed;
   388             } else {
   389                 /* limit reached */
   390                 break;
   391             }
   392         } else {
   393             /* no more matches */
   394             break;
   395         }
   396     }
   397     *output = cxCalloc(allocator, n, sizeof(cxstring));
   398     return cx_strsplit(string, delim, n, *output);
   399 }
   401 size_t cx_strsplit_m(
   402         cxmutstr string,
   403         cxstring delim,
   404         size_t limit,
   405         cxmutstr *output
   406 ) {
   407     return cx_strsplit(cx_strcast(string),
   408                        delim, limit, (cxstring *) output);
   409 }
   411 size_t cx_strsplit_ma(
   412         CxAllocator *allocator,
   413         cxmutstr string,
   414         cxstring delim,
   415         size_t limit,
   416         cxmutstr **output
   417 ) {
   418     return cx_strsplit_a(allocator, cx_strcast(string),
   419                          delim, limit, (cxstring **) output);
   420 }
   422 int cx_strcmp(
   423         cxstring s1,
   424         cxstring s2
   425 ) {
   426     if (s1.length == s2.length) {
   427         return memcmp(s1.ptr, s2.ptr, s1.length);
   428     } else if (s1.length > s2.length) {
   429         return 1;
   430     } else {
   431         return -1;
   432     }
   433 }
   435 int cx_strcasecmp(
   436         cxstring s1,
   437         cxstring s2
   438 ) {
   439     if (s1.length == s2.length) {
   440 #ifdef _WIN32
   441         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   442 #else
   443         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   444 #endif
   445     } else if (s1.length > s2.length) {
   446         return 1;
   447     } else {
   448         return -1;
   449     }
   450 }
   452 cxmutstr cx_strdup_a(
   453         CxAllocator *allocator,
   454         cxstring string
   455 ) {
   456     cxmutstr result = {
   457             cxMalloc(allocator, string.length + 1),
   458             string.length
   459     };
   460     if (result.ptr == NULL) {
   461         result.length = 0;
   462         return result;
   463     }
   464     memcpy(result.ptr, string.ptr, string.length);
   465     result.ptr[string.length] = '\0';
   466     return result;
   467 }
   469 cxstring cx_strtrim(cxstring string) {
   470     cxstring result = string;
   471     // TODO: optimize by comparing multiple bytes at once
   472     while (result.length > 0 && isspace(*result.ptr)) {
   473         result.ptr++;
   474         result.length--;
   475     }
   476     while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
   477         result.length--;
   478     }
   479     return result;
   480 }
   482 cxmutstr cx_strtrim_m(cxmutstr string) {
   483     cxstring result = cx_strtrim(cx_strcast(string));
   484     return (cxmutstr) {(char *) result.ptr, result.length};
   485 }
   487 bool cx_strprefix(
   488         cxstring string,
   489         cxstring prefix
   490 ) {
   491     if (string.length < prefix.length) return false;
   492     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   493 }
   495 bool cx_strsuffix(
   496         cxstring string,
   497         cxstring suffix
   498 ) {
   499     if (string.length < suffix.length) return false;
   500     return memcmp(string.ptr + string.length - suffix.length,
   501                   suffix.ptr, suffix.length) == 0;
   502 }
   504 bool cx_strcaseprefix(
   505         cxstring string,
   506         cxstring prefix
   507 ) {
   508     if (string.length < prefix.length) return false;
   509 #ifdef _WIN32
   510     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
   511 #else
   512     return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
   513 #endif
   514 }
   516 bool cx_strcasesuffix(
   517         cxstring string,
   518         cxstring suffix
   519 ) {
   520     if (string.length < suffix.length) return false;
   521 #ifdef _WIN32
   522     return _strnicmp(string.ptr+string.length-suffix.length,
   523                   suffix.ptr, suffix.length) == 0;
   524 #else
   525     return strncasecmp(string.ptr + string.length - suffix.length,
   526                        suffix.ptr, suffix.length) == 0;
   527 #endif
   528 }
   530 void cx_strlower(cxmutstr string) {
   531     cx_for_n(i, string.length) {
   532         string.ptr[i] = tolower(string.ptr[i]);
   533     }
   534 }
   536 void cx_strupper(cxmutstr string) {
   537     cx_for_n(i, string.length) {
   538         string.ptr[i] = toupper(string.ptr[i]);
   539     }
   540 }
   542 #define REPLACE_INDEX_BUFFER_MAX 100
   544 struct cx_strreplace_ibuf {
   545     size_t *buf;
   546     struct cx_strreplace_ibuf *next;
   547     unsigned int len;
   548 };
   550 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
   551     while (buf) {
   552         struct cx_strreplace_ibuf *next = buf->next;
   553         free(buf->buf);
   554         free(buf);
   555         buf = next;
   556     }
   557 }
   559 cxmutstr cx_strreplacen_a(
   560         CxAllocator *allocator,
   561         cxstring str,
   562         cxstring pattern,
   563         cxstring replacement,
   564         size_t replmax
   565 ) {
   567     if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
   568         return cx_strdup_a(allocator, str);
   570     /* Compute expected buffer length */
   571     size_t ibufmax = str.length / pattern.length;
   572     size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
   573     if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
   574         ibuflen = REPLACE_INDEX_BUFFER_MAX;
   575     }
   577     /* Allocate first index buffer */
   578     struct cx_strreplace_ibuf *firstbuf, *curbuf;
   579     firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
   580     if (!firstbuf) return cx_mutstrn(NULL, 0);
   581     firstbuf->buf = calloc(ibuflen, sizeof(size_t));
   582     if (!firstbuf->buf) {
   583         free(firstbuf);
   584         return cx_mutstrn(NULL, 0);
   585     }
   587     /* Search occurrences */
   588     cxstring searchstr = str;
   589     size_t found = 0;
   590     do {
   591         cxstring match = cx_strstr(searchstr, pattern);
   592         if (match.length > 0) {
   593             /* Allocate next buffer in chain, if required */
   594             if (curbuf->len == ibuflen) {
   595                 struct cx_strreplace_ibuf *nextbuf =
   596                         calloc(1, sizeof(struct cx_strreplace_ibuf));
   597                 if (!nextbuf) {
   598                     cx_strrepl_free_ibuf(firstbuf);
   599                     return cx_mutstrn(NULL, 0);
   600                 }
   601                 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
   602                 if (!nextbuf->buf) {
   603                     free(nextbuf);
   604                     cx_strrepl_free_ibuf(firstbuf);
   605                     return cx_mutstrn(NULL, 0);
   606                 }
   607                 curbuf->next = nextbuf;
   608                 curbuf = nextbuf;
   609             }
   611             /* Record match index */
   612             found++;
   613             size_t idx = match.ptr - str.ptr;
   614             curbuf->buf[curbuf->len++] = idx;
   615             searchstr.ptr = match.ptr + pattern.length;
   616             searchstr.length = str.length - idx - pattern.length;
   617         } else {
   618             break;
   619         }
   620     } while (searchstr.length > 0 && found < replmax);
   622     /* Allocate result string */
   623     cxmutstr result;
   624     {
   625         ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
   626         size_t rcount = 0;
   627         curbuf = firstbuf;
   628         do {
   629             rcount += curbuf->len;
   630             curbuf = curbuf->next;
   631         } while (curbuf);
   632         result.length = str.length + rcount * adjlen;
   633         result.ptr = cxMalloc(allocator, result.length + 1);
   634         if (!result.ptr) {
   635             cx_strrepl_free_ibuf(firstbuf);
   636             return cx_mutstrn(NULL, 0);
   637         }
   638     }
   640     /* Build result string */
   641     curbuf = firstbuf;
   642     size_t srcidx = 0;
   643     char *destptr = result.ptr;
   644     do {
   645         for (size_t i = 0; i < curbuf->len; i++) {
   646             /* Copy source part up to next match*/
   647             size_t idx = curbuf->buf[i];
   648             size_t srclen = idx - srcidx;
   649             if (srclen > 0) {
   650                 memcpy(destptr, str.ptr + srcidx, srclen);
   651                 destptr += srclen;
   652                 srcidx += srclen;
   653             }
   655             /* Copy the replacement and skip the source pattern */
   656             srcidx += pattern.length;
   657             memcpy(destptr, replacement.ptr, replacement.length);
   658             destptr += replacement.length;
   659         }
   660         curbuf = curbuf->next;
   661     } while (curbuf);
   662     memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
   664     /* Result is guaranteed to be zero-terminated */
   665     result.ptr[result.length] = '\0';
   667     /* Free index buffer */
   668     cx_strrepl_free_ibuf(firstbuf);
   670     return result;
   671 }

mercurial