src/string.c

Sat, 03 Sep 2022 15:11:23 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 03 Sep 2022 15:11:23 +0200
changeset 582
96fa7fa6af4f
parent 581
c067394737ca
child 583
0f3c9662f9b5
permissions
-rw-r--r--

implement strupper and strlower

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  */
    29 #include "cx/string.h"
    30 #include "cx/utils.h"
    32 #include <string.h>
    33 #include <stdarg.h>
    34 #include <stdint.h>
    35 #include <ctype.h>
    37 #ifndef _WIN32
    39 #include <strings.h> /* for strncasecmp() */
    41 #endif /* _WIN32 */
    43 cxmutstr cx_mutstr(char *cstring) {
    44     return (cxmutstr) {cstring, strlen(cstring)};
    45 }
    47 cxmutstr cx_mutstrn(
    48         char *cstring,
    49         size_t length
    50 ) {
    51     return (cxmutstr) {cstring, length};
    52 }
    54 cxstring cx_str(const char *cstring) {
    55     return (cxstring) {cstring, strlen(cstring)};
    56 }
    58 cxstring cx_strn(
    59         const char *cstring,
    60         size_t length
    61 ) {
    62     return (cxstring) {cstring, length};
    63 }
    65 cxstring cx_strcast(cxmutstr str) {
    66     return (cxstring) {str.ptr, str.length};
    67 }
    69 void cx_strfree(cxmutstr *str) {
    70     free(str->ptr);
    71     str->ptr = NULL;
    72     str->length = 0;
    73 }
    75 size_t cx_strlen(
    76         size_t count,
    77         ...
    78 ) {
    79     if (count == 0) return 0;
    81     va_list ap;
    82     va_start(ap, count);
    83     size_t size = 0;
    84     cx_for_n(i, count) {
    85         cxstring str = va_arg(ap, cxstring);
    86         size += str.length;
    87     }
    88     va_end(ap);
    90     return size;
    91 }
    93 cxmutstr cx_strcat_a(
    94         CxAllocator *alloc,
    95         size_t count,
    96         ...
    97 ) {
    98     cxstring *strings = calloc(count, sizeof(cxstring));
    99     if (!strings) abort();
   101     va_list ap;
   102     va_start(ap, count);
   104     // get all args and overall length
   105     size_t slen = 0;
   106     cx_for_n(i, count) {
   107         cxstring s = va_arg (ap, cxstring);
   108         strings[i] = s;
   109         slen += s.length;
   110     }
   112     // create new string
   113     cxmutstr result;
   114     result.ptr = cxMalloc(alloc, slen + 1);
   115     result.length = slen;
   116     if (result.ptr == NULL) abort();
   118     // concatenate strings
   119     size_t pos = 0;
   120     cx_for_n(i, count) {
   121         cxstring s = strings[i];
   122         memcpy(result.ptr + pos, s.ptr, s.length);
   123         pos += s.length;
   124     }
   126     // terminate string
   127     result.ptr[result.length] = '\0';
   129     // free temporary array
   130     free(strings);
   132     return result;
   133 }
   135 cxstring cx_strsubs(
   136         cxstring string,
   137         size_t start
   138 ) {
   139     return cx_strsubsl(string, start, string.length - start);
   140 }
   142 cxmutstr cx_strsubs_m(
   143         cxmutstr string,
   144         size_t start
   145 ) {
   146     return cx_strsubsl_m(string, start, string.length - start);
   147 }
   149 cxstring cx_strsubsl(
   150         cxstring string,
   151         size_t start,
   152         size_t length
   153 ) {
   154     if (start > string.length) {
   155         return (cxstring) {NULL, 0};
   156     }
   158     size_t rem_len = string.length - start;
   159     if (length > rem_len) {
   160         length = rem_len;
   161     }
   163     return (cxstring) {string.ptr + start, length};
   164 }
   166 cxmutstr cx_strsubsl_m(
   167         cxmutstr string,
   168         size_t start,
   169         size_t length
   170 ) {
   171     cxstring result = cx_strsubsl(cx_strcast(string), start, length);
   172     return (cxmutstr) {(char *) result.ptr, result.length};
   173 }
   175 cxstring cx_strchr(
   176         cxstring string,
   177         int chr
   178 ) {
   179     chr = 0xFF & chr;
   180     // TODO: improve by comparing multiple bytes at once
   181     cx_for_n(i, string.length) {
   182         if (string.ptr[i] == chr) {
   183             return cx_strsubs(string, i);
   184         }
   185     }
   186     return (cxstring) {NULL, 0};
   187 }
   189 cxmutstr cx_strchr_m(
   190         cxmutstr string,
   191         int chr
   192 ) {
   193     cxstring result = cx_strchr(cx_strcast(string), chr);
   194     return (cxmutstr) {(char *) result.ptr, result.length};
   195 }
   197 cxstring cx_strrchr(
   198         cxstring string,
   199         int chr
   200 ) {
   201     chr = 0xFF & chr;
   202     size_t i = string.length;
   203     while (i > 0) {
   204         i--;
   205         // TODO: improve by comparing multiple bytes at once
   206         if (string.ptr[i] == chr) {
   207             return cx_strsubs(string, i);
   208         }
   209     }
   210     return (cxstring) {NULL, 0};
   211 }
   213 cxmutstr cx_strrchr_m(
   214         cxmutstr string,
   215         int chr
   216 ) {
   217     cxstring result = cx_strrchr(cx_strcast(string), chr);
   218     return (cxmutstr) {(char *) result.ptr, result.length};
   219 }
   221 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   222     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   224 #define ptable_w(useheap, ptable, index, src) do {\
   225     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   226     else ((size_t*)ptable)[index] = src;\
   227     } while (0)
   230 cxstring cx_strstr(
   231         cxstring haystack,
   232         cxstring needle
   233 ) {
   234     if (needle.length == 0) {
   235         return haystack;
   236     }
   238     /*
   239      * IMPORTANT:
   240      * Our prefix table contains the prefix length PLUS ONE
   241      * this is our decision, because we want to use the full range of size_t.
   242      * The original algorithm needs a (-1) at one single place,
   243      * and we want to avoid that.
   244      */
   246     /* static prefix table */
   247     static uint8_t s_prefix_table[512];
   249     /* check pattern length and use appropriate prefix table */
   250     /* if the pattern exceeds static prefix table, allocate on the heap */
   251     register int useheap = needle.length >= 512;
   252     register void *ptable = useheap ? calloc(needle.length + 1,
   253                                              sizeof(size_t)) : s_prefix_table;
   255     /* keep counter in registers */
   256     register size_t i, j;
   258     /* fill prefix table */
   259     i = 0;
   260     j = 0;
   261     ptable_w(useheap, ptable, i, j);
   262     while (i < needle.length) {
   263         while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
   264             ptable_r(j, useheap, ptable, j - 1);
   265         }
   266         i++;
   267         j++;
   268         ptable_w(useheap, ptable, i, j);
   269     }
   271     /* search */
   272     cxstring result = {NULL, 0};
   273     i = 0;
   274     j = 1;
   275     while (i < haystack.length) {
   276         while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
   277             ptable_r(j, useheap, ptable, j - 1);
   278         }
   279         i++;
   280         j++;
   281         if (j - 1 == needle.length) {
   282             size_t start = i - needle.length;
   283             result.ptr = haystack.ptr + start;
   284             result.length = haystack.length - start;
   285             break;
   286         }
   287     }
   289     /* if prefix table was allocated on the heap, free it */
   290     if (ptable != s_prefix_table) {
   291         free(ptable);
   292     }
   294     return result;
   295 }
   297 cxmutstr cx_strstr_m(
   298         cxmutstr haystack,
   299         cxstring needle
   300 ) {
   301     cxstring result = cx_strstr(cx_strcast(haystack), needle);
   302     return (cxmutstr) {(char *) result.ptr, result.length};
   303 }
   305 size_t cx_strsplit(
   306         cxstring string,
   307         cxstring delim,
   308         size_t limit,
   309         cxstring *output
   310 ) {
   311     // TODO: implement
   312     return 0;
   313 }
   315 size_t cx_strsplit_a(
   316         CxAllocator *allocator,
   317         cxstring string,
   318         cxstring delim,
   319         size_t limit,
   320         cxstring **output
   321 ) {
   322     // TODO: implement
   323     return 0;
   324 }
   326 size_t cx_strsplit_m(
   327         cxmutstr string,
   328         cxstring delim,
   329         size_t limit,
   330         cxmutstr *output
   331 ) {
   332     return cx_strsplit(cx_strcast(string),
   333                        delim, limit, (cxstring *) output);
   334 }
   336 size_t cx_strsplit_ma(
   337         CxAllocator *allocator,
   338         cxmutstr string,
   339         cxstring delim,
   340         size_t limit,
   341         cxmutstr **output
   342 ) {
   343     return cx_strsplit_a(allocator, cx_strcast(string),
   344                          delim, limit, (cxstring **) output);
   345 }
   347 int cx_strcmp(cxstring s1, cxstring s2) {
   348     if (s1.length == s2.length) {
   349         return memcmp(s1.ptr, s2.ptr, s1.length);
   350     } else if (s1.length > s2.length) {
   351         return 1;
   352     } else {
   353         return -1;
   354     }
   355 }
   357 int cx_strcasecmp(cxstring s1, cxstring s2) {
   358     if (s1.length == s2.length) {
   359 #ifdef _WIN32
   360         return _strnicmp(s1.ptr, s2.ptr, s1.length);
   361 #else
   362         return strncasecmp(s1.ptr, s2.ptr, s1.length);
   363 #endif
   364     } else if (s1.length > s2.length) {
   365         return 1;
   366     } else {
   367         return -1;
   368     }
   369 }
   371 cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
   372     cxmutstr result = {
   373             cxMalloc(allocator, string.length + 1),
   374             string.length
   375     };
   376     if (result.ptr == NULL) {
   377         result.length = 0;
   378         return result;
   379     }
   380     memcpy(result.ptr, string.ptr, string.length);
   381     result.ptr[string.length] = '\0';
   382     return result;
   383 }
   385 cxstring cx_strtrim(cxstring string) {
   386     cxstring result = string;
   387     // TODO: optimize by comparing multiple bytes at once
   388     while (result.length > 0 && isspace(*result.ptr)) {
   389         result.ptr++;
   390         result.length--;
   391     }
   392     while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
   393         result.length--;
   394     }
   395     return result;
   396 }
   398 cxmutstr cx_strtrim_m(cxmutstr string) {
   399     cxstring result = cx_strtrim(cx_strcast(string));
   400     return (cxmutstr) {(char *) result.ptr, result.length};
   401 }
   403 bool cx_strprefix(cxstring string, cxstring prefix) {
   404     if (string.length < prefix.length) return false;
   405     return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   406 }
   408 bool cx_strsuffix(cxstring string, cxstring suffix) {
   409     if (string.length < suffix.length) return false;
   410     return memcmp(string.ptr + string.length - suffix.length,
   411                   suffix.ptr, suffix.length) == 0;
   412 }
   414 bool cx_casestrprefix(cxstring string, cxstring prefix) {
   415     if (string.length < prefix.length) return false;
   416 #ifdef _WIN32
   417     return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
   418 #else
   419     return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
   420 #endif
   421 }
   423 bool cx_casestrsuffix(cxstring string, cxstring suffix) {
   424     if (string.length < suffix.length) return false;
   425 #ifdef _WIN32
   426     return _strnicmp(string.ptr+string.length-suffix.length,
   427                   suffix.ptr, suffix.length) == 0;
   428 #else
   429     return strncasecmp(string.ptr + string.length - suffix.length,
   430                        suffix.ptr, suffix.length) == 0;
   431 #endif
   432 }
   434 void cx_strlower(cxmutstr string) {
   435     cx_for_n(i, string.length) {
   436         string.ptr[i] = tolower(string.ptr[i]);
   437     }
   438 }
   440 void cx_strupper(cxmutstr string) {
   441     cx_for_n(i, string.length) {
   442         string.ptr[i] = toupper(string.ptr[i]);
   443     }
   444 }

mercurial