src/string.c

changeset 583
0f3c9662f9b5
parent 582
96fa7fa6af4f
child 590
02a56701a5cb
     1.1 --- a/src/string.c	Sat Sep 03 15:11:23 2022 +0200
     1.2 +++ b/src/string.c	Fri Sep 09 20:19:08 2022 +0200
     1.3 @@ -72,6 +72,15 @@
     1.4      str->length = 0;
     1.5  }
     1.6  
     1.7 +void cx_strfree_a(
     1.8 +        CxAllocator *alloc,
     1.9 +        cxmutstr *str
    1.10 +) {
    1.11 +    cxFree(alloc, str->ptr);
    1.12 +    str->ptr = NULL;
    1.13 +    str->length = 0;
    1.14 +}
    1.15 +
    1.16  size_t cx_strlen(
    1.17          size_t count,
    1.18          ...
    1.19 @@ -235,6 +244,11 @@
    1.20          return haystack;
    1.21      }
    1.22  
    1.23 +    /* optimize for single-char needles */
    1.24 +    if (needle.length == 1) {
    1.25 +        return cx_strchr(haystack, *needle.ptr);
    1.26 +    }
    1.27 +
    1.28      /*
    1.29       * IMPORTANT:
    1.30       * Our prefix table contains the prefix length PLUS ONE
    1.31 @@ -308,8 +322,55 @@
    1.32          size_t limit,
    1.33          cxstring *output
    1.34  ) {
    1.35 -    // TODO: implement
    1.36 -    return 0;
    1.37 +    /* special case: output limit is zero */
    1.38 +    if (limit == 0) return 0;
    1.39 +
    1.40 +    /* special case: delimiter is empty */
    1.41 +    if (delim.length == 0) {
    1.42 +        output[0] = string;
    1.43 +        return 1;
    1.44 +    }
    1.45 +
    1.46 +    /* special cases: delimiter is at least as large as the string */
    1.47 +    if (delim.length >= string.length) {
    1.48 +        /* exact match */
    1.49 +        if (cx_strcmp(string, delim) == 0) {
    1.50 +            output[0] = cx_strn(string.ptr, 0);
    1.51 +            output[1] = cx_strn(string.ptr + string.length, 0);
    1.52 +            return 2;
    1.53 +        } else /* no match possible */ {
    1.54 +            output[0] = string;
    1.55 +            return 1;
    1.56 +        }
    1.57 +    }
    1.58 +
    1.59 +    size_t n = 0;
    1.60 +    cxstring curpos = string;
    1.61 +    while (1) {
    1.62 +        ++n;
    1.63 +        cxstring match = cx_strstr(curpos, delim);
    1.64 +        if (match.length > 0) {
    1.65 +            /* is the limit reached? */
    1.66 +            if (n < limit) {
    1.67 +                /* copy the current string to the array */
    1.68 +                cxstring item = cx_strn(curpos.ptr, match.ptr - curpos.ptr);
    1.69 +                output[n - 1] = item;
    1.70 +                size_t processed = item.length + delim.length;
    1.71 +                curpos.ptr += processed;
    1.72 +                curpos.length -= processed;
    1.73 +            } else {
    1.74 +                /* limit reached, copy the _full_ remaining string */
    1.75 +                output[n - 1] = curpos;
    1.76 +                break;
    1.77 +            }
    1.78 +        } else {
    1.79 +            /* no more matches, copy last string */
    1.80 +            output[n - 1] = curpos;
    1.81 +            break;
    1.82 +        }
    1.83 +    }
    1.84 +
    1.85 +    return n;
    1.86  }
    1.87  
    1.88  size_t cx_strsplit_a(
    1.89 @@ -319,8 +380,29 @@
    1.90          size_t limit,
    1.91          cxstring **output
    1.92  ) {
    1.93 -    // TODO: implement
    1.94 -    return 0;
    1.95 +    /* find out how many splits we're going to make and allocate memory */
    1.96 +    size_t n = 0;
    1.97 +    cxstring curpos = string;
    1.98 +    while (1) {
    1.99 +        ++n;
   1.100 +        cxstring match = cx_strstr(curpos, delim);
   1.101 +        if (match.length > 0) {
   1.102 +            /* is the limit reached? */
   1.103 +            if (n < limit) {
   1.104 +                size_t processed = match.ptr - curpos.ptr + delim.length;
   1.105 +                curpos.ptr += processed;
   1.106 +                curpos.length -= processed;
   1.107 +            } else {
   1.108 +                /* limit reached */
   1.109 +                break;
   1.110 +            }
   1.111 +        } else {
   1.112 +            /* no more matches */
   1.113 +            break;
   1.114 +        }
   1.115 +    }
   1.116 +    *output = cxCalloc(allocator, n, sizeof(cxstring));
   1.117 +    return cx_strsplit(string, delim, n, *output);
   1.118  }
   1.119  
   1.120  size_t cx_strsplit_m(
   1.121 @@ -344,7 +426,10 @@
   1.122                           delim, limit, (cxstring **) output);
   1.123  }
   1.124  
   1.125 -int cx_strcmp(cxstring s1, cxstring s2) {
   1.126 +int cx_strcmp(
   1.127 +        cxstring s1,
   1.128 +        cxstring s2
   1.129 +) {
   1.130      if (s1.length == s2.length) {
   1.131          return memcmp(s1.ptr, s2.ptr, s1.length);
   1.132      } else if (s1.length > s2.length) {
   1.133 @@ -354,7 +439,10 @@
   1.134      }
   1.135  }
   1.136  
   1.137 -int cx_strcasecmp(cxstring s1, cxstring s2) {
   1.138 +int cx_strcasecmp(
   1.139 +        cxstring s1,
   1.140 +        cxstring s2
   1.141 +) {
   1.142      if (s1.length == s2.length) {
   1.143  #ifdef _WIN32
   1.144          return _strnicmp(s1.ptr, s2.ptr, s1.length);
   1.145 @@ -368,7 +456,10 @@
   1.146      }
   1.147  }
   1.148  
   1.149 -cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
   1.150 +cxmutstr cx_strdup_a(
   1.151 +        CxAllocator *allocator,
   1.152 +        cxstring string
   1.153 +) {
   1.154      cxmutstr result = {
   1.155              cxMalloc(allocator, string.length + 1),
   1.156              string.length
   1.157 @@ -400,18 +491,27 @@
   1.158      return (cxmutstr) {(char *) result.ptr, result.length};
   1.159  }
   1.160  
   1.161 -bool cx_strprefix(cxstring string, cxstring prefix) {
   1.162 +bool cx_strprefix(
   1.163 +        cxstring string,
   1.164 +        cxstring prefix
   1.165 +) {
   1.166      if (string.length < prefix.length) return false;
   1.167      return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   1.168  }
   1.169  
   1.170 -bool cx_strsuffix(cxstring string, cxstring suffix) {
   1.171 +bool cx_strsuffix(
   1.172 +        cxstring string,
   1.173 +        cxstring suffix
   1.174 +) {
   1.175      if (string.length < suffix.length) return false;
   1.176      return memcmp(string.ptr + string.length - suffix.length,
   1.177                    suffix.ptr, suffix.length) == 0;
   1.178  }
   1.179  
   1.180 -bool cx_casestrprefix(cxstring string, cxstring prefix) {
   1.181 +bool cx_strcaseprefix(
   1.182 +        cxstring string,
   1.183 +        cxstring prefix
   1.184 +) {
   1.185      if (string.length < prefix.length) return false;
   1.186  #ifdef _WIN32
   1.187      return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
   1.188 @@ -420,7 +520,10 @@
   1.189  #endif
   1.190  }
   1.191  
   1.192 -bool cx_casestrsuffix(cxstring string, cxstring suffix) {
   1.193 +bool cx_strcasesuffix(
   1.194 +        cxstring string,
   1.195 +        cxstring suffix
   1.196 +) {
   1.197      if (string.length < suffix.length) return false;
   1.198  #ifdef _WIN32
   1.199      return _strnicmp(string.ptr+string.length-suffix.length,
   1.200 @@ -442,3 +545,133 @@
   1.201          string.ptr[i] = toupper(string.ptr[i]);
   1.202      }
   1.203  }
   1.204 +
   1.205 +#define REPLACE_INDEX_BUFFER_MAX 100
   1.206 +
   1.207 +struct cx_strreplace_ibuf {
   1.208 +    size_t *buf;
   1.209 +    unsigned int len; /* small indices */
   1.210 +    struct cx_strreplace_ibuf *next;
   1.211 +};
   1.212 +
   1.213 +static void cx_strrepl_free_ibuf(struct cx_strreplace_ibuf *buf) {
   1.214 +    while (buf) {
   1.215 +        struct cx_strreplace_ibuf *next = buf->next;
   1.216 +        free(buf->buf);
   1.217 +        free(buf);
   1.218 +        buf = next;
   1.219 +    }
   1.220 +}
   1.221 +
   1.222 +cxmutstr cx_strreplacen_a(
   1.223 +        CxAllocator *allocator,
   1.224 +        cxstring str,
   1.225 +        cxstring pattern,
   1.226 +        cxstring replacement,
   1.227 +        size_t replmax
   1.228 +) {
   1.229 +
   1.230 +    if (pattern.length == 0 || pattern.length > str.length || replmax == 0)
   1.231 +        return cx_strdup_a(allocator, str);
   1.232 +
   1.233 +    /* Compute expected buffer length */
   1.234 +    size_t ibufmax = str.length / pattern.length;
   1.235 +    size_t ibuflen = replmax < ibufmax ? replmax : ibufmax;
   1.236 +    if (ibuflen > REPLACE_INDEX_BUFFER_MAX) {
   1.237 +        ibuflen = REPLACE_INDEX_BUFFER_MAX;
   1.238 +    }
   1.239 +
   1.240 +    /* Allocate first index buffer */
   1.241 +    struct cx_strreplace_ibuf *firstbuf, *curbuf;
   1.242 +    firstbuf = curbuf = calloc(1, sizeof(struct cx_strreplace_ibuf));
   1.243 +    if (!firstbuf) return cx_mutstrn(NULL, 0);
   1.244 +    firstbuf->buf = calloc(ibuflen, sizeof(size_t));
   1.245 +    if (!firstbuf->buf) {
   1.246 +        free(firstbuf);
   1.247 +        return cx_mutstrn(NULL, 0);
   1.248 +    }
   1.249 +
   1.250 +    /* Search occurrences */
   1.251 +    cxstring searchstr = str;
   1.252 +    size_t found = 0;
   1.253 +    do {
   1.254 +        cxstring match = cx_strstr(searchstr, pattern);
   1.255 +        if (match.length > 0) {
   1.256 +            /* Allocate next buffer in chain, if required */
   1.257 +            if (curbuf->len == ibuflen) {
   1.258 +                struct cx_strreplace_ibuf *nextbuf =
   1.259 +                        calloc(1, sizeof(struct cx_strreplace_ibuf));
   1.260 +                if (!nextbuf) {
   1.261 +                    cx_strrepl_free_ibuf(firstbuf);
   1.262 +                    return cx_mutstrn(NULL, 0);
   1.263 +                }
   1.264 +                nextbuf->buf = calloc(ibuflen, sizeof(size_t));
   1.265 +                if (!nextbuf->buf) {
   1.266 +                    free(nextbuf);
   1.267 +                    cx_strrepl_free_ibuf(firstbuf);
   1.268 +                    return cx_mutstrn(NULL, 0);
   1.269 +                }
   1.270 +                curbuf->next = nextbuf;
   1.271 +                curbuf = nextbuf;
   1.272 +            }
   1.273 +
   1.274 +            /* Record match index */
   1.275 +            found++;
   1.276 +            size_t idx = match.ptr - str.ptr;
   1.277 +            curbuf->buf[curbuf->len++] = idx;
   1.278 +            searchstr.ptr = match.ptr + pattern.length;
   1.279 +            searchstr.length = str.length - idx - pattern.length;
   1.280 +        } else {
   1.281 +            break;
   1.282 +        }
   1.283 +    } while (searchstr.length > 0 && found < replmax);
   1.284 +
   1.285 +    /* Allocate result string */
   1.286 +    cxmutstr result;
   1.287 +    {
   1.288 +        ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length;
   1.289 +        size_t rcount = 0;
   1.290 +        curbuf = firstbuf;
   1.291 +        do {
   1.292 +            rcount += curbuf->len;
   1.293 +            curbuf = curbuf->next;
   1.294 +        } while (curbuf);
   1.295 +        result.length = str.length + rcount * adjlen;
   1.296 +        result.ptr = cxMalloc(allocator, result.length);
   1.297 +        if (!result.ptr) {
   1.298 +            cx_strrepl_free_ibuf(firstbuf);
   1.299 +            return cx_mutstrn(NULL, 0);
   1.300 +        }
   1.301 +    }
   1.302 +
   1.303 +    /* Build result string */
   1.304 +    curbuf = firstbuf;
   1.305 +    size_t srcidx = 0;
   1.306 +    char *destptr = result.ptr;
   1.307 +    do {
   1.308 +        for (size_t i = 0; i < curbuf->len; i++) {
   1.309 +            /* Copy source part up to next match*/
   1.310 +            size_t idx = curbuf->buf[i];
   1.311 +            size_t srclen = idx - srcidx;
   1.312 +            if (srclen > 0) {
   1.313 +                memcpy(destptr, str.ptr + srcidx, srclen);
   1.314 +                destptr += srclen;
   1.315 +                srcidx += srclen;
   1.316 +            }
   1.317 +
   1.318 +            /* Copy the replacement and skip the source pattern */
   1.319 +            srcidx += pattern.length;
   1.320 +            memcpy(destptr, replacement.ptr, replacement.length);
   1.321 +            destptr += replacement.length;
   1.322 +        }
   1.323 +        curbuf = curbuf->next;
   1.324 +    } while (curbuf);
   1.325 +    memcpy(destptr, str.ptr + srcidx, str.length - srcidx);
   1.326 +
   1.327 +    /* Free index buffer */
   1.328 +    cx_strrepl_free_ibuf(firstbuf);
   1.329 +
   1.330 +    return result;
   1.331 +}
   1.332 +
   1.333 +

mercurial