src/string.c

Wed, 31 Aug 2022 23:12:05 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2022 23:12:05 +0200
changeset 580
aac47db8da0b
parent 579
bbc46dcd5255
child 581
c067394737ca
permissions
-rw-r--r--

more implementations of string functions

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "cx/string.h"
    30 #include "cx/utils.h"
    32 #include <string.h>
    33 #include <stdarg.h>
    34 #include <stdint.h>
    36 cxmutstr cx_mutstr(char *cstring) {
    37     return (cxmutstr) {cstring, strlen(cstring)};
    38 }
    40 cxmutstr cx_mutstrn(
    41         char *cstring,
    42         size_t length
    43 ) {
    44     return (cxmutstr) {cstring, length};
    45 }
    47 cxstring cx_str(const char *cstring) {
    48     return (cxstring) {cstring, strlen(cstring)};
    49 }
    51 cxstring cx_strn(
    52         const char *cstring,
    53         size_t length
    54 ) {
    55     return (cxstring) {cstring, length};
    56 }
    58 cxstring cx_strcast(cxmutstr str) {
    59     return (cxstring) {str.ptr, str.length};
    60 }
    62 void cx_strfree(cxmutstr *str) {
    63     free(str->ptr);
    64     str->ptr = NULL;
    65     str->length = 0;
    66 }
    68 size_t cx_strlen(
    69         size_t count,
    70         ...
    71 ) {
    72     if (count == 0) return 0;
    74     va_list ap;
    75     va_start(ap, count);
    76     size_t size = 0;
    77     cx_for_n(i, count) {
    78         cxstring str = va_arg(ap, cxstring);
    79         size += str.length;
    80     }
    81     va_end(ap);
    83     return size;
    84 }
    86 cxmutstr cx_strcat_a(
    87         CxAllocator *alloc,
    88         size_t count,
    89         ...
    90 ) {
    91     cxstring *strings = calloc(count, sizeof(cxstring));
    92     if (!strings) abort();
    94     va_list ap;
    95     va_start(ap, count);
    97     // get all args and overall length
    98     size_t slen = 0;
    99     cx_for_n(i, count) {
   100         cxstring s = va_arg (ap, cxstring);
   101         strings[i] = s;
   102         slen += s.length;
   103     }
   105     // create new string
   106     cxmutstr result;
   107     result.ptr = cxMalloc(alloc, slen + 1);
   108     result.length = slen;
   109     if (result.ptr == NULL) abort();
   111     // concatenate strings
   112     size_t pos = 0;
   113     cx_for_n(i, count) {
   114         cxstring s = strings[i];
   115         memcpy(result.ptr + pos, s.ptr, s.length);
   116         pos += s.length;
   117     }
   119     // terminate string
   120     result.ptr[result.length] = '\0';
   122     // free temporary array
   123     free(strings);
   125     return result;
   126 }
   128 cxstring cx_strsubs(
   129         cxstring string,
   130         size_t start
   131 ) {
   132     return cx_strsubsl(string, start, string.length - start);
   133 }
   135 cxmutstr cx_strsubs_m(
   136         cxmutstr string,
   137         size_t start
   138 ) {
   139     return cx_strsubsl_m(string, start, string.length - start);
   140 }
   142 cxstring cx_strsubsl(
   143         cxstring string,
   144         size_t start,
   145         size_t length
   146 ) {
   147     if (start > string.length) {
   148         return (cxstring) {NULL, 0};
   149     }
   151     size_t rem_len = string.length - start;
   152     if (length > rem_len) {
   153         length = rem_len;
   154     }
   156     return (cxstring) {string.ptr + start, length};
   157 }
   159 cxmutstr cx_strsubsl_m(
   160         cxmutstr string,
   161         size_t start,
   162         size_t length
   163 ) {
   164     cxstring result = cx_strsubsl(cx_strcast(string), start, length);
   165     return (cxmutstr) {(char *) result.ptr, result.length};
   166 }
   168 cxstring cx_strchr(
   169         cxstring string,
   170         int chr
   171 ) {
   172     chr = 0xFF & chr;
   173     // TODO: improve by comparing multiple bytes at once
   174     cx_for_n(i, string.length) {
   175         if (string.ptr[i] == chr) {
   176             return cx_strsubs(string, i);
   177         }
   178     }
   179     return (cxstring) {NULL, 0};
   180 }
   182 cxmutstr cx_strchr_m(
   183         cxmutstr string,
   184         int chr
   185 ) {
   186     cxstring result = cx_strchr(cx_strcast(string), chr);
   187     return (cxmutstr) {(char *) result.ptr, result.length};
   188 }
   190 cxstring cx_strrchr(
   191         cxstring string,
   192         int chr
   193 ) {
   194     chr = 0xFF & chr;
   195     size_t i = string.length;
   196     while (i > 0) {
   197         i--;
   198         // TODO: improve by comparing multiple bytes at once
   199         if (string.ptr[i] == chr) {
   200             return cx_strsubs(string, i);
   201         }
   202     }
   203     return (cxstring) {NULL, 0};
   204 }
   206 cxmutstr cx_strrchr_m(
   207         cxmutstr string,
   208         int chr
   209 ) {
   210     cxstring result = cx_strrchr(cx_strcast(string), chr);
   211     return (cxmutstr) {(char *) result.ptr, result.length};
   212 }
   214 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   215     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   217 #define ptable_w(useheap, ptable, index, src) do {\
   218     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   219     else ((size_t*)ptable)[index] = src;\
   220     } while (0)
   223 cxstring cx_strstr(
   224         cxstring haystack,
   225         cxstring needle
   226 ) {
   227     if (needle.length == 0) {
   228         return haystack;
   229     }
   231     /*
   232      * IMPORTANT:
   233      * Our prefix table contains the prefix length PLUS ONE
   234      * this is our decision, because we want to use the full range of size_t.
   235      * The original algorithm needs a (-1) at one single place,
   236      * and we want to avoid that.
   237      */
   239     /* static prefix table */
   240     static uint8_t s_prefix_table[512];
   242     /* check pattern length and use appropriate prefix table */
   243     /* if the pattern exceeds static prefix table, allocate on the heap */
   244     register int useheap = needle.length >= 512;
   245     register void *ptable = useheap ? calloc(needle.length + 1,
   246                                              sizeof(size_t)) : s_prefix_table;
   248     /* keep counter in registers */
   249     register size_t i, j;
   251     /* fill prefix table */
   252     i = 0;
   253     j = 0;
   254     ptable_w(useheap, ptable, i, j);
   255     while (i < needle.length) {
   256         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
   257             ptable_r(j, useheap, ptable, j - 1);
   258         }
   259         i++;
   260         j++;
   261         ptable_w(useheap, ptable, i, j);
   262     }
   264     /* search */
   265     cxstring result = {NULL, 0};
   266     i = 0;
   267     j = 1;
   268     while (i < haystack.length) {
   269         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
   270             ptable_r(j, useheap, ptable, j - 1);
   271         }
   272         i++;
   273         j++;
   274         if (j - 1 == needle.length) {
   275             size_t start = i - needle.length;
   276             result.ptr = haystack.ptr + start;
   277             result.length = haystack.length - start;
   278             break;
   279         }
   280     }
   282     /* if prefix table was allocated on the heap, free it */
   283     if (ptable != s_prefix_table) {
   284         free(ptable);
   285     }
   287     return result;
   288 }
   290 cxmutstr cx_strstr_m(
   291         cxmutstr haystack,
   292         cxstring needle
   293 ) {
   294     cxstring result = cx_strstr(cx_strcast(haystack), needle);
   295     return (cxmutstr) {(char *) result.ptr, result.length};
   296 }
   298 size_t cx_strsplit(
   299         cxstring string,
   300         cxstring delim,
   301         size_t limit,
   302         cxstring *output
   303 ) {
   304     // TODO: implement
   305     return 0;
   306 }
   308 size_t cx_strsplit_a(
   309         CxAllocator *allocator,
   310         cxstring string,
   311         cxstring delim,
   312         size_t limit,
   313         cxstring **output
   314 ) {
   315     // TODO: implement
   316     return 0;
   317 }
   319 size_t cx_strsplit_m(
   320         cxmutstr string,
   321         cxstring delim,
   322         size_t limit,
   323         cxmutstr *output
   324 ) {
   325     return cx_strsplit(cx_strcast(string),
   326                        delim, limit, (cxstring *) output);
   327 }
   329 size_t cx_strsplit_ma(
   330         CxAllocator *allocator,
   331         cxmutstr string,
   332         cxstring delim,
   333         size_t limit,
   334         cxmutstr **output
   335 ) {
   336     return cx_strsplit_a(allocator, cx_strcast(string),
   337                          delim, limit, (cxstring **) output);
   338 }

mercurial