ucx: src/string.c@2dd409ed056f (annotated)

src/string.c@2dd409ed056f (annotated)

src/string.c

Sun, 21 May 2023 16:22:09 +0200

author: Mike Becker <universe@uap-core.de>
date: Sun, 21 May 2023 16:22:09 +0200
changeset 710: 2dd409ed056f
parent 697: ebdce4bf262b
child 806: e06249e09f99
permissions: -rw-r--r--

fix const-ness of non-mutating iterator creation for maps

 /*
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  *
  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include "cx/string.h"
 #include "cx/utils.h"
 #include <string.h>
 #include <stdarg.h>
 #include <ctype.h>
 #ifndef _WIN32
 #include <strings.h> // for strncasecmp()
 #endif // _WIN32
 cxmutstr cx_mutstr(char *cstring) {
     return (cxmutstr) {cstring, strlen(cstring)};
 }
 cxmutstr cx_mutstrn(
         char *cstring,
         size_t length
 ) {
     return (cxmutstr) {cstring, length};
 }
 cxstring cx_str(const char *cstring) {
     return (cxstring) {cstring, strlen(cstring)};
 }
 cxstring cx_strn(
         const char *cstring,
         size_t length
 ) {
     return (cxstring) {cstring, length};
 }
 cxstring cx_strcast(cxmutstr str) {
     return (cxstring) {str.ptr, str.length};
 }
 void cx_strfree(cxmutstr *str) {
     free(str->ptr);
     str->ptr = NULL;
     str->length = 0;
 }
 void cx_strfree_a(
         CxAllocator const *alloc,
         cxmutstr *str
 ) {
     cxFree(alloc, str->ptr);
     str->ptr = NULL;
     str->length = 0;
 }
 size_t cx_strlen(
         size_t count,
         ...
 ) {
     if (count == 0) return 0;
     va_list ap;
     va_start(ap, count);
     size_t size = 0;
     cx_for_n(i, count) {
         cxstring str = va_arg(ap, cxstring);
         size += str.length;
     }
     va_end(ap);
     return size;
 }
 cxmutstr cx_strcat_ma(
         CxAllocator const *alloc,
         cxmutstr str,
         size_t count,
         ...
 ) {
     if (count == 0) return str;
     cxstring *strings = calloc(count, sizeof(cxstring));
     if (!strings) abort();
     va_list ap;
     va_start(ap, count);
     // get all args and overall length
     size_t slen = str.length;
     cx_for_n(i, count) {
         cxstring s = va_arg (ap, cxstring);
         strings[i] = s;
         slen += s.length;
     }
     va_end(ap);
     // reallocate or create new string
     if (str.ptr == NULL) {
         str.ptr = cxMalloc(alloc, slen + 1);
     } else {
         str.ptr = cxRealloc(alloc, str.ptr, slen + 1);
     }
     if (str.ptr == NULL) abort();
     // concatenate strings
     size_t pos = str.length;
     str.length = slen;
     cx_for_n(i, count) {
         cxstring s = strings[i];
         memcpy(str.ptr + pos, s.ptr, s.length);
         pos += s.length;
     }
     // terminate string
     str.ptr[str.length] = '\0';
     // free temporary array
     free(strings);
     return str;
 }
 cxstring cx_strsubs(
         cxstring string,
         size_t start
 ) {
     return cx_strsubsl(string, start, string.length - start);
 }
 cxmutstr cx_strsubs_m(
         cxmutstr string,
         size_t start
 ) {
     return cx_strsubsl_m(string, start, string.length - start);
 }
 cxstring cx_strsubsl(
         cxstring string,
         size_t start,
         size_t length
 ) {
     if (start > string.length) {
         return (cxstring) {NULL, 0};
     }
     size_t rem_len = string.length - start;
     if (length > rem_len) {
         length = rem_len;
     }
     return (cxstring) {string.ptr + start, length};
 }
 cxmutstr cx_strsubsl_m(
         cxmutstr string,
         size_t start,
         size_t length
 ) {
     cxstring result = cx_strsubsl(cx_strcast(string), start, length);
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 cxstring cx_strchr(
         cxstring string,
         int chr
 ) {
     chr = 0xFF & chr;
     // TODO: improve by comparing multiple bytes at once
     cx_for_n(i, string.length) {
         if (string.ptr[i] == chr) {
             return cx_strsubs(string, i);
         }
     }
     return (cxstring) {NULL, 0};
 }
 cxmutstr cx_strchr_m(
         cxmutstr string,
         int chr
 ) {
     cxstring result = cx_strchr(cx_strcast(string), chr);
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 cxstring cx_strrchr(
         cxstring string,
         int chr
 ) {
     chr = 0xFF & chr;
     size_t i = string.length;
     while (i > 0) {
         i--;
         // TODO: improve by comparing multiple bytes at once
         if (string.ptr[i] == chr) {
             return cx_strsubs(string, i);
         }
     }
     return (cxstring) {NULL, 0};
 }
 cxmutstr cx_strrchr_m(
         cxmutstr string,
         int chr
 ) {
     cxstring result = cx_strrchr(cx_strcast(string), chr);
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 #ifndef CX_STRSTR_SBO_SIZE
 #define CX_STRSTR_SBO_SIZE 512
 #endif
 cxstring cx_strstr(
         cxstring haystack,
         cxstring needle
 ) {
     if (needle.length == 0) {
         return haystack;
     }
     // optimize for single-char needles
     if (needle.length == 1) {
         return cx_strchr(haystack, *needle.ptr);
     }
     /*
      * IMPORTANT:
      * Our prefix table contains the prefix length PLUS ONE
      * this is our decision, because we want to use the full range of size_t.
      * The original algorithm needs a (-1) at one single place,
      * and we want to avoid that.
      */
     // local prefix table
     size_t s_prefix_table[CX_STRSTR_SBO_SIZE];
     // check needle length and use appropriate prefix table
     // if the pattern exceeds static prefix table, allocate on the heap
     bool useheap = needle.length >= CX_STRSTR_SBO_SIZE;
     register size_t *ptable = useheap ? calloc(needle.length + 1,
                                                sizeof(size_t)) : s_prefix_table;
     // keep counter in registers
     register size_t i, j;
     // fill prefix table
     i = 0;
     j = 0;
     ptable[i] = j;
     while (i < needle.length) {
         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
             j = ptable[j - 1];
         }
         i++;
         j++;
         ptable[i] = j;
     }
     // search
     cxstring result = {NULL, 0};
     i = 0;
     j = 1;
     while (i < haystack.length) {
         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
             j = ptable[j - 1];
         }
         i++;
         j++;
         if (j - 1 == needle.length) {
             size_t start = i - needle.length;
             result.ptr = haystack.ptr + start;
             result.length = haystack.length - start;
             break;
         }
     }
     // if prefix table was allocated on the heap, free it
     if (ptable != s_prefix_table) {
         free(ptable);
     }
     return result;
 }
 cxmutstr cx_strstr_m(
         cxmutstr haystack,
         cxstring needle
 ) {
     cxstring result = cx_strstr(cx_strcast(haystack), needle);
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 size_t cx_strsplit(
         cxstring string,
         cxstring delim,
         size_t limit,
         cxstring *output
 ) {
     // special case: output limit is zero
     if (limit == 0) return 0;
     // special case: delimiter is empty
     if (delim.length == 0) {
         output[0] = string;
         return 1;
     }
     // special cases: delimiter is at least as large as the string
     if (delim.length >= string.length) {
         // exact match
         if (cx_strcmp(string, delim) == 0) {
             output[0] = cx_strn(string.ptr, 0);
             output[1] = cx_strn(string.ptr + string.length, 0);
             return 2;
         } else {
             // no match possible
             output[0] = string;
             return 1;
         }
     }
     size_t n = 0;
     cxstring curpos = string;
     while (1) {
         ++n;
         cxstring match = cx_strstr(curpos, delim);
         if (match.length > 0) {
             // is the limit reached?
             if (n < limit) {
                 // copy the current string to the array
                 cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
                 output[n - 1] = item;
                 size_t processed = item.length + delim.length;
                 curpos.ptr += processed;
                 curpos.length -= processed;
             } else {
                 // limit reached, copy the _full_ remaining string
                 output[n - 1] = curpos;
                 break;
             }
         } else {
             // no more matches, copy last string
             output[n - 1] = curpos;
             break;
         }
     }
     return n;
 }
 size_t cx_strsplit_a(
         CxAllocator const *allocator,
         cxstring string,
         cxstring delim,
         size_t limit,
         cxstring **output
 ) {
     // find out how many splits we're going to make and allocate memory
     size_t n = 0;
     cxstring curpos = string;
     while (1) {
         ++n;
         cxstring match = cx_strstr(curpos, delim);
         if (match.length > 0) {
             // is the limit reached?
             if (n < limit) {
                 size_t processed = match.ptr - curpos.ptr + delim.length;
                 curpos.ptr += processed;
                 curpos.length -= processed;
             } else {
                 // limit reached
                 break;
             }
         } else {
             // no more matches
             break;
         }
     }
     *output = cxCalloc(allocator, n, sizeof(cxstring));
     return cx_strsplit(string, delim, n, *output);
 }
 size_t cx_strsplit_m(
         cxmutstr string,
         cxstring delim,
         size_t limit,
         cxmutstr *output
 ) {
     return cx_strsplit(cx_strcast(string),
                        delim, limit, (cxstring *) output);
 }
 size_t cx_strsplit_ma(
         CxAllocator const *allocator,
         cxmutstr string,
         cxstring delim,
         size_t limit,
         cxmutstr **output
 ) {
     return cx_strsplit_a(allocator, cx_strcast(string),
                          delim, limit, (cxstring **) output);
 }
 int cx_strcmp(
         cxstring s1,
         cxstring s2
 ) {
     if (s1.length == s2.length) {
         return memcmp(s1.ptr, s2.ptr, s1.length);
     } else if (s1.length > s2.length) {
         return 1;
     } else {
         return -1;
     }
 }
 int cx_strcasecmp(
         cxstring s1,
         cxstring s2
 ) {
     if (s1.length == s2.length) {
 #ifdef _WIN32
         return _strnicmp(s1.ptr, s2.ptr, s1.length);
 #else
         return strncasecmp(s1.ptr, s2.ptr, s1.length);
 #endif
     } else if (s1.length > s2.length) {
         return 1;
     } else {
         return -1;
     }
 }
 int cx_strcmp_p(
         void const *s1,
         void const *s2
 ) {
     cxstring const *left = s1;
     cxstring const *right = s2;
     return cx_strcmp(*left, *right);
 }
 int cx_strcasecmp_p(
         void const *s1,
         void const *s2
 ) {
     cxstring const *left = s1;
     cxstring const *right = s2;
     return cx_strcasecmp(*left, *right);
 }
 cxmutstr cx_strdup_a(
         CxAllocator const *allocator,
         cxstring string
 ) {
     cxmutstr result = {
             cxMalloc(allocator, string.length + 1),
             string.length
     };
     if (result.ptr == NULL) {
         result.length = 0;
         return result;
     }
     memcpy(result.ptr, string.ptr, string.length);
     result.ptr[string.length] = '\0';
     return result;
 }
 cxstring cx_strtrim(cxstring string) {
     cxstring result = string;
     // TODO: optimize by comparing multiple bytes at once
     while (result.length > 0 && isspace(*result.ptr)) {
         result.ptr++;
         result.length--;
     }
     while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
         result.length--;
     }
     return result;
 }
 cxmutstr cx_strtrim_m(cxmutstr string) {
     cxstring result = cx_strtrim(cx_strcast(string));
     return (cxmutstr) {(char *) result.ptr, result.length};
 }
 bool cx_strprefix(
         cxstring string,
         cxstring prefix
 ) {
     if (string.length < prefix.length) return false;
     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
 }
 bool cx_strsuffix(
         cxstring string,
         cxstring suffix
 ) {
     if (string.length < suffix.length) return false;
     return memcmp(string.ptr + string.length - suffix.length,
                   suffix.ptr, suffix.length) == 0;
 }
 bool cx_strcaseprefix(
         cxstring string,
         cxstring prefix
 ) {
     if (string.length < prefix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
 #else
     return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
 #endif
 }
 bool cx_strcasesuffix(
         cxstring string,
         cxstring suffix
 ) {
     if (string.length < suffix.length) return false;
 #ifdef _WIN32
     return _strnicmp(string.ptr+string.length-suffix.length,
                   suffix.ptr, suffix.length) == 0;
 #else
     return strncasecmp(string.ptr + string.length - suffix.length,
                        suffix.ptr, suffix.length) == 0;
 #endif
 }
 void cx_strlower(cxmutstr string) {
     cx_for_n(i, string.length) {
         string.ptr[i] = (char) tolower(string.ptr[i]);
     }
 }
 void cx_strupper(cxmutstr string) {
     cx_for_n(i, string.length) {
         string.ptr[i] = (char) toupper(string.ptr[i]);
     }
 }
 #ifndef CX_STRREPLACE_INDEX_BUFFER_SIZE
 #define CX_STRREPLACE_INDEX_BUFFER_SIZE 64
 #endif
 struct cx_strreplace_ibuf {
     size_t *buf;
     struct cx_strreplace_ibuf *next;
     unsigned int len;
 };
 static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
     while (buf) {
         struct cx_strreplace_ibuf *next = buf->next;
         free(buf->buf);
         free(buf);
         buf = next;
     }
 }
 cxmutstr cx_strreplacen_a(
         CxAllocator const *allocator,
         cxstring str,
         cxstring pattern,
         cxstring replacement,
         size_t replmax
 ) {
     if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
         return cx_strdup_a(allocator, str);
     // Compute expected buffer length
     size_t ibufmax = str.length / pattern.length;
     size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
     if (ibuflen > CX_STRREPLACE_INDEX_BUFFER_SIZE) {
         ibuflen = CX_STRREPLACE_INDEX_BUFFER_SIZE;
     }
     // Allocate first index buffer
     struct cx_strreplace_ibuf *firstbuf, *curbuf;
     firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
     if (!firstbuf) return cx_mutstrn(NULL, 0);
     firstbuf->buf = calloc(ibuflen, sizeof(size_t));
     if (!firstbuf->buf) {
         free(firstbuf);
         return cx_mutstrn(NULL, 0);
     }
     // Search occurrences
     cxstring searchstr = str;
     size_t found = 0;
     do {
         cxstring match = cx_strstr(searchstr, pattern);
         if (match.length > 0) {
             // Allocate next buffer in chain, if required
             if (curbuf->len == ibuflen) {
                 struct cx_strreplace_ibuf *nextbuf =
                         calloc(1, sizeof(struct cx_strreplace_ibuf));
                 if (!nextbuf) {
                     cx_strrepl_free_ibuf(firstbuf);
                     return cx_mutstrn(NULL, 0);
                 }
                 nextbuf->buf = calloc(ibuflen, sizeof(size_t));
                 if (!nextbuf->buf) {
                     free(nextbuf);
                     cx_strrepl_free_ibuf(firstbuf);
                     return cx_mutstrn(NULL, 0);
                 }
                 curbuf->next = nextbuf;
                 curbuf = nextbuf;
             }
             // Record match index
             found++;
             size_t idx = match.ptr - str.ptr;
             curbuf->buf[curbuf->len++] = idx;
             searchstr.ptr = match.ptr + pattern.length;
             searchstr.length = str.length - idx - pattern.length;
         } else {
             break;
         }
     } while (searchstr.length > 0 && found < replmax);
     // Allocate result string
     cxmutstr result;
     {
         ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
         size_t rcount = 0;
         curbuf = firstbuf;
         do {
             rcount += curbuf->len;
             curbuf = curbuf->next;
         } while (curbuf);
         result.length = str.length + rcount * adjlen;
         result.ptr = cxMalloc(allocator, result.length + 1);
         if (!result.ptr) {
             cx_strrepl_free_ibuf(firstbuf);
             return cx_mutstrn(NULL, 0);
         }
     }
     // Build result string
     curbuf = firstbuf;
     size_t srcidx = 0;
     char *destptr = result.ptr;
     do {
         for (size_t i = 0; i < curbuf->len; i++) {
             // Copy source part up to next match
             size_t idx = curbuf->buf[i];
             size_t srclen = idx - srcidx;
             if (srclen > 0) {
                 memcpy(destptr, str.ptr + srcidx, srclen);
                 destptr += srclen;
                 srcidx += srclen;
             }
             // Copy the replacement and skip the source pattern
             srcidx += pattern.length;
             memcpy(destptr, replacement.ptr, replacement.length);
             destptr += replacement.length;
         }
         curbuf = curbuf->next;
     } while (curbuf);
     memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
     // Result is guaranteed to be zero-terminated
     result.ptr[result.length] = '\0';
     // Free index buffer
     cx_strrepl_free_ibuf(firstbuf);
     return result;
 }
 CxStrtokCtx cx_strtok(
         cxstring str,
         cxstring delim,
         size_t limit
 ) {
     CxStrtokCtx ctx;
     ctx.str = str;
     ctx.delim = delim;
     ctx.limit = limit;
     ctx.pos = 0;
     ctx.next_pos = 0;
     ctx.delim_pos = 0;
     ctx.found = 0;
     ctx.delim_more = NULL;
     ctx.delim_more_count = 0;
     return ctx;
 }
 CxStrtokCtx cx_strtok_m(
         cxmutstr str,
         cxstring delim,
         size_t limit
 ) {
     return cx_strtok(cx_strcast(str), delim, limit);
 }
 bool cx_strtok_next(
         CxStrtokCtx *ctx,
         cxstring *token
 ) {
     // abortion criteria
     if (ctx->found >= ctx->limit || ctx->delim_pos >= ctx->str.length) {
         return false;
     }
     // determine the search start
     cxstring haystack = cx_strsubs(ctx->str, ctx->next_pos);
     // search the next delimiter
     cxstring delim = cx_strstr(haystack, ctx->delim);
     // if found, make delim capture exactly the delimiter
     if (delim.length > 0) {
         delim.length = ctx->delim.length;
     }
     // if more delimiters are specified, check them now
     if (ctx->delim_more_count > 0) {
         cx_for_n(i, ctx->delim_more_count) {
             cxstring d = cx_strstr(haystack, ctx->delim_more[i]);
             if (d.length > 0 && (delim.length == 0 || d.ptr < delim.ptr)) {
                 delim.ptr = d.ptr;
                 delim.length = ctx->delim_more[i].length;
             }
         }
     }
     // store the token information and adjust the context
     ctx->found++;
     ctx->pos = ctx->next_pos;
     token->ptr = &ctx->str.ptr[ctx->pos];
     ctx->delim_pos = delim.length == 0 ?
                      ctx->str.length : (size_t) (delim.ptr - ctx->str.ptr);
     token->length = ctx->delim_pos - ctx->pos;
     ctx->next_pos = ctx->delim_pos + delim.length;
     return true;
 }
 bool cx_strtok_next_m(
         CxStrtokCtx *ctx,
         cxmutstr *token
 ) {
     return cx_strtok_next(ctx, (cxstring *) token);
 }
 void cx_strtok_delim(
         CxStrtokCtx *ctx,
         cxstring const *delim,
         size_t count
 ) {
     ctx->delim_more = delim;
     ctx->delim_more_count = count;
 }

Mercurial > hg > ucx / annotate

src/string.c@2dd409ed056f (annotated)

src/string.c