more implementations of string functions

Wed, 31 Aug 2022 23:12:05 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2022 23:12:05 +0200
changeset 580
aac47db8da0b
parent 579
bbc46dcd5255
child 581
c067394737ca

more implementations of string functions

src/string.c file | annotate | diff | comparison | revisions
     1.1 --- a/src/string.c	Tue Aug 30 19:56:07 2022 +0200
     1.2 +++ b/src/string.c	Wed Aug 31 23:12:05 2022 +0200
     1.3 @@ -125,5 +125,214 @@
     1.4      return result;
     1.5  }
     1.6  
     1.7 +cxstring cx_strsubs(
     1.8 +        cxstring string,
     1.9 +        size_t start
    1.10 +) {
    1.11 +    return cx_strsubsl(string, start, string.length - start);
    1.12 +}
    1.13  
    1.14 +cxmutstr cx_strsubs_m(
    1.15 +        cxmutstr string,
    1.16 +        size_t start
    1.17 +) {
    1.18 +    return cx_strsubsl_m(string, start, string.length - start);
    1.19 +}
    1.20  
    1.21 +cxstring cx_strsubsl(
    1.22 +        cxstring string,
    1.23 +        size_t start,
    1.24 +        size_t length
    1.25 +) {
    1.26 +    if (start > string.length) {
    1.27 +        return (cxstring) {NULL, 0};
    1.28 +    }
    1.29 +
    1.30 +    size_t rem_len = string.length - start;
    1.31 +    if (length > rem_len) {
    1.32 +        length = rem_len;
    1.33 +    }
    1.34 +
    1.35 +    return (cxstring) {string.ptr + start, length};
    1.36 +}
    1.37 +
    1.38 +cxmutstr cx_strsubsl_m(
    1.39 +        cxmutstr string,
    1.40 +        size_t start,
    1.41 +        size_t length
    1.42 +) {
    1.43 +    cxstring result = cx_strsubsl(cx_strcast(string), start, length);
    1.44 +    return (cxmutstr) {(char *) result.ptr, result.length};
    1.45 +}
    1.46 +
    1.47 +cxstring cx_strchr(
    1.48 +        cxstring string,
    1.49 +        int chr
    1.50 +) {
    1.51 +    chr = 0xFF & chr;
    1.52 +    // TODO: improve by comparing multiple bytes at once
    1.53 +    cx_for_n(i, string.length) {
    1.54 +        if (string.ptr[i] == chr) {
    1.55 +            return cx_strsubs(string, i);
    1.56 +        }
    1.57 +    }
    1.58 +    return (cxstring) {NULL, 0};
    1.59 +}
    1.60 +
    1.61 +cxmutstr cx_strchr_m(
    1.62 +        cxmutstr string,
    1.63 +        int chr
    1.64 +) {
    1.65 +    cxstring result = cx_strchr(cx_strcast(string), chr);
    1.66 +    return (cxmutstr) {(char *) result.ptr, result.length};
    1.67 +}
    1.68 +
    1.69 +cxstring cx_strrchr(
    1.70 +        cxstring string,
    1.71 +        int chr
    1.72 +) {
    1.73 +    chr = 0xFF & chr;
    1.74 +    size_t i = string.length;
    1.75 +    while (i > 0) {
    1.76 +        i--;
    1.77 +        // TODO: improve by comparing multiple bytes at once
    1.78 +        if (string.ptr[i] == chr) {
    1.79 +            return cx_strsubs(string, i);
    1.80 +        }
    1.81 +    }
    1.82 +    return (cxstring) {NULL, 0};
    1.83 +}
    1.84 +
    1.85 +cxmutstr cx_strrchr_m(
    1.86 +        cxmutstr string,
    1.87 +        int chr
    1.88 +) {
    1.89 +    cxstring result = cx_strrchr(cx_strcast(string), chr);
    1.90 +    return (cxmutstr) {(char *) result.ptr, result.length};
    1.91 +}
    1.92 +
    1.93 +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
    1.94 +    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
    1.95 +
    1.96 +#define ptable_w(useheap, ptable, index, src) do {\
    1.97 +    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
    1.98 +    else ((size_t*)ptable)[index] = src;\
    1.99 +    } while (0)
   1.100 +
   1.101 +
   1.102 +cxstring cx_strstr(
   1.103 +        cxstring haystack,
   1.104 +        cxstring needle
   1.105 +) {
   1.106 +    if (needle.length == 0) {
   1.107 +        return haystack;
   1.108 +    }
   1.109 +
   1.110 +    /*
   1.111 +     * IMPORTANT:
   1.112 +     * Our prefix table contains the prefix length PLUS ONE
   1.113 +     * this is our decision, because we want to use the full range of size_t.
   1.114 +     * The original algorithm needs a (-1) at one single place,
   1.115 +     * and we want to avoid that.
   1.116 +     */
   1.117 +
   1.118 +    /* static prefix table */
   1.119 +    static uint8_t s_prefix_table[512];
   1.120 +
   1.121 +    /* check pattern length and use appropriate prefix table */
   1.122 +    /* if the pattern exceeds static prefix table, allocate on the heap */
   1.123 +    register int useheap = needle.length >= 512;
   1.124 +    register void *ptable = useheap ? calloc(needle.length + 1,
   1.125 +                                             sizeof(size_t)) : s_prefix_table;
   1.126 +
   1.127 +    /* keep counter in registers */
   1.128 +    register size_t i, j;
   1.129 +
   1.130 +    /* fill prefix table */
   1.131 +    i = 0;
   1.132 +    j = 0;
   1.133 +    ptable_w(useheap, ptable, i, j);
   1.134 +    while (i < needle.length) {
   1.135 +        while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
   1.136 +            ptable_r(j, useheap, ptable, j - 1);
   1.137 +        }
   1.138 +        i++;
   1.139 +        j++;
   1.140 +        ptable_w(useheap, ptable, i, j);
   1.141 +    }
   1.142 +
   1.143 +    /* search */
   1.144 +    cxstring result = {NULL, 0};
   1.145 +    i = 0;
   1.146 +    j = 1;
   1.147 +    while (i < haystack.length) {
   1.148 +        while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
   1.149 +            ptable_r(j, useheap, ptable, j - 1);
   1.150 +        }
   1.151 +        i++;
   1.152 +        j++;
   1.153 +        if (j - 1 == needle.length) {
   1.154 +            size_t start = i - needle.length;
   1.155 +            result.ptr = haystack.ptr + start;
   1.156 +            result.length = haystack.length - start;
   1.157 +            break;
   1.158 +        }
   1.159 +    }
   1.160 +
   1.161 +    /* if prefix table was allocated on the heap, free it */
   1.162 +    if (ptable != s_prefix_table) {
   1.163 +        free(ptable);
   1.164 +    }
   1.165 +
   1.166 +    return result;
   1.167 +}
   1.168 +
   1.169 +cxmutstr cx_strstr_m(
   1.170 +        cxmutstr haystack,
   1.171 +        cxstring needle
   1.172 +) {
   1.173 +    cxstring result = cx_strstr(cx_strcast(haystack), needle);
   1.174 +    return (cxmutstr) {(char *) result.ptr, result.length};
   1.175 +}
   1.176 +
   1.177 +size_t cx_strsplit(
   1.178 +        cxstring string,
   1.179 +        cxstring delim,
   1.180 +        size_t limit,
   1.181 +        cxstring *output
   1.182 +) {
   1.183 +    // TODO: implement
   1.184 +    return 0;
   1.185 +}
   1.186 +
   1.187 +size_t cx_strsplit_a(
   1.188 +        CxAllocator *allocator,
   1.189 +        cxstring string,
   1.190 +        cxstring delim,
   1.191 +        size_t limit,
   1.192 +        cxstring **output
   1.193 +) {
   1.194 +    // TODO: implement
   1.195 +    return 0;
   1.196 +}
   1.197 +
   1.198 +size_t cx_strsplit_m(
   1.199 +        cxmutstr string,
   1.200 +        cxstring delim,
   1.201 +        size_t limit,
   1.202 +        cxmutstr *output
   1.203 +) {
   1.204 +    return cx_strsplit(cx_strcast(string),
   1.205 +                       delim, limit, (cxstring *) output);
   1.206 +}
   1.207 +
   1.208 +size_t cx_strsplit_ma(
   1.209 +        CxAllocator *allocator,
   1.210 +        cxmutstr string,
   1.211 +        cxstring delim,
   1.212 +        size_t limit,
   1.213 +        cxmutstr **output
   1.214 +) {
   1.215 +    return cx_strsplit_a(allocator, cx_strcast(string),
   1.216 +                         delim, limit, (cxstring **) output);
   1.217 +}

mercurial