src/string.c

changeset 251
fae240d633fc
parent 250
b7d1317b138e
child 259
2f5dea574a75
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/string.c	Tue Oct 17 16:15:41 2017 +0200
     1.3 @@ -0,0 +1,463 @@
     1.4 +/*
     1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     1.6 + *
     1.7 + * Copyright 2017 Olaf Wintermann. All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions are met:
    1.11 + *
    1.12 + *   1. Redistributions of source code must retain the above copyright
    1.13 + *      notice, this list of conditions and the following disclaimer.
    1.14 + *
    1.15 + *   2. Redistributions in binary form must reproduce the above copyright
    1.16 + *      notice, this list of conditions and the following disclaimer in the
    1.17 + *      documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.29 + * POSSIBILITY OF SUCH DAMAGE.
    1.30 + */
    1.31 +
    1.32 +#include "ucx/string.h"
    1.33 +
    1.34 +#include "ucx/allocator.h"
    1.35 +
    1.36 +#include <stdlib.h>
    1.37 +#include <string.h>
    1.38 +#include <stdarg.h>
    1.39 +#include <stdint.h>
    1.40 +#include <ctype.h>
    1.41 +
    1.42 +sstr_t sstr(char *cstring) {
    1.43 +    sstr_t string;
    1.44 +    string.ptr = cstring;
    1.45 +    string.length = strlen(cstring);
    1.46 +    return string;
    1.47 +}
    1.48 +
    1.49 +sstr_t sstrn(char *cstring, size_t length) {
    1.50 +    sstr_t string;
    1.51 +    string.ptr = cstring;
    1.52 +    string.length = length;
    1.53 +    return string;
    1.54 +}
    1.55 +
    1.56 +size_t sstrnlen(size_t n, sstr_t s, ...) {
    1.57 +    va_list ap;
    1.58 +    size_t size = s.length;
    1.59 +    va_start(ap, s);
    1.60 +
    1.61 +    for (size_t i = 1 ; i < n ; i++) {
    1.62 +        sstr_t str = va_arg(ap, sstr_t);
    1.63 +        size += str.length;
    1.64 +    }
    1.65 +    va_end(ap);
    1.66 +
    1.67 +    return size;
    1.68 +}
    1.69 +
    1.70 +static sstr_t sstrvcat_a(
    1.71 +        UcxAllocator *a,
    1.72 +        size_t count,
    1.73 +        sstr_t s1,
    1.74 +        sstr_t s2,
    1.75 +        va_list ap) {
    1.76 +    sstr_t str;
    1.77 +    str.ptr = NULL;
    1.78 +    str.length = 0;
    1.79 +    if(count < 2) {
    1.80 +        return str;
    1.81 +    }
    1.82 +    
    1.83 +    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    1.84 +    if(!strings) {
    1.85 +        return str;
    1.86 +    }
    1.87 +    
    1.88 +    // get all args and overall length
    1.89 +    strings[0] = s1;
    1.90 +    strings[1] = s2;
    1.91 +    size_t strlen = s1.length + s2.length;
    1.92 +    for (size_t i=2;i<count;i++) {
    1.93 +        sstr_t s = va_arg (ap, sstr_t);
    1.94 +        strings[i] = s;
    1.95 +        strlen += s.length;
    1.96 +    }
    1.97 +    
    1.98 +    // create new string
    1.99 +    str.ptr = (char*) almalloc(a, strlen + 1);
   1.100 +    str.length = strlen;
   1.101 +    if(!str.ptr) {
   1.102 +        free(strings);
   1.103 +        str.length = 0;
   1.104 +        return str;
   1.105 +    }
   1.106 +    
   1.107 +    // concatenate strings
   1.108 +    size_t pos = 0;
   1.109 +    for (size_t i=0;i<count;i++) {
   1.110 +        sstr_t s = strings[i];
   1.111 +        memcpy(str.ptr + pos, s.ptr, s.length);
   1.112 +        pos += s.length;
   1.113 +    }
   1.114 +    
   1.115 +    str.ptr[str.length] = '\0';
   1.116 +    
   1.117 +    free(strings);
   1.118 +    
   1.119 +    return str;
   1.120 +}
   1.121 +
   1.122 +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   1.123 +    va_list ap;
   1.124 +    va_start(ap, s2);
   1.125 +    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   1.126 +    va_end(ap);
   1.127 +    return s;
   1.128 +}
   1.129 +
   1.130 +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   1.131 +    va_list ap;
   1.132 +    va_start(ap, s2);
   1.133 +    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   1.134 +    va_end(ap);
   1.135 +    return s;
   1.136 +}
   1.137 +
   1.138 +sstr_t sstrsubs(sstr_t s, size_t start) {
   1.139 +    return sstrsubsl (s, start, s.length-start);
   1.140 +}
   1.141 +
   1.142 +sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   1.143 +    sstr_t new_sstr;
   1.144 +    if (start >= s.length) {
   1.145 +        new_sstr.ptr = NULL;
   1.146 +        new_sstr.length = 0;
   1.147 +    } else {
   1.148 +        if (length > s.length-start) {
   1.149 +            length = s.length-start;
   1.150 +        }
   1.151 +        new_sstr.ptr = &s.ptr[start];
   1.152 +        new_sstr.length = length;
   1.153 +    }
   1.154 +    return new_sstr;
   1.155 +}
   1.156 +
   1.157 +sstr_t sstrchr(sstr_t s, int c) {
   1.158 +    for(size_t i=0;i<s.length;i++) {
   1.159 +        if(s.ptr[i] == c) {
   1.160 +            return sstrsubs(s, i);
   1.161 +        }
   1.162 +    }
   1.163 +    sstr_t n;
   1.164 +    n.ptr = NULL;
   1.165 +    n.length = 0;
   1.166 +    return n;
   1.167 +}
   1.168 +
   1.169 +sstr_t sstrrchr(sstr_t s, int c) {
   1.170 +    if (s.length > 0) {
   1.171 +        for(size_t i=s.length;i>0;i--) {
   1.172 +            if(s.ptr[i-1] == c) {
   1.173 +                return sstrsubs(s, i-1);
   1.174 +            }
   1.175 +        }
   1.176 +    }
   1.177 +    sstr_t n;
   1.178 +    n.ptr = NULL;
   1.179 +    n.length = 0;
   1.180 +    return n;
   1.181 +}
   1.182 +
   1.183 +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   1.184 +    ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
   1.185 +
   1.186 +#define ptable_w(useheap, ptable, index, src) do {\
   1.187 +    if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
   1.188 +    else ((size_t*)ptable)[index] = src;\
   1.189 +    } while (0);
   1.190 +
   1.191 +sstr_t sstrstr(sstr_t string, sstr_t match) {
   1.192 +    if (match.length == 0) {
   1.193 +        return string;
   1.194 +    }
   1.195 +    
   1.196 +    /* prepare default return value in case of no match */
   1.197 +    sstr_t result = sstrn(NULL, 0);
   1.198 +    
   1.199 +    /*
   1.200 +     * IMPORTANT:
   1.201 +     * our prefix table contains the prefix length PLUS ONE
   1.202 +     * this is our decision, because we want to use the full range of size_t
   1.203 +     * the original algorithm needs a (-1) at one single place
   1.204 +     * and we want to avoid that
   1.205 +     */
   1.206 +    
   1.207 +    /* static prefix table */
   1.208 +    static uint8_t s_prefix_table[256];
   1.209 +    
   1.210 +    /* check pattern length and use appropriate prefix table */
   1.211 +    /* if the pattern exceeds static prefix table, allocate on the heap */
   1.212 +    register int useheap = match.length > 255;
   1.213 +    register void* ptable = useheap ?
   1.214 +        calloc(match.length+1, sizeof(size_t)): s_prefix_table;
   1.215 +    
   1.216 +    /* keep counter in registers */
   1.217 +    register size_t i, j;
   1.218 +    
   1.219 +    /* fill prefix table */
   1.220 +    i = 0; j = 0;
   1.221 +    ptable_w(useheap, ptable, i, j);
   1.222 +    while (i < match.length) {
   1.223 +        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
   1.224 +            ptable_r(j, useheap, ptable, j-1);
   1.225 +        }
   1.226 +        i++; j++;
   1.227 +        ptable_w(useheap, ptable, i, j);
   1.228 +    }
   1.229 +
   1.230 +    /* search */
   1.231 +    i = 0; j = 1;
   1.232 +    while (i < string.length) {
   1.233 +        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
   1.234 +            ptable_r(j, useheap, ptable, j-1);
   1.235 +        }
   1.236 +        i++; j++;
   1.237 +        if (j-1 == match.length) {
   1.238 +            size_t start = i - match.length;
   1.239 +            result.ptr = string.ptr + start;
   1.240 +            result.length = string.length - start;
   1.241 +            break;
   1.242 +        }
   1.243 +    }
   1.244 +
   1.245 +    /* if prefix table was allocated on the heap, free it */
   1.246 +    if (ptable != s_prefix_table) {
   1.247 +        free(ptable);
   1.248 +    }
   1.249 +    
   1.250 +    return result;
   1.251 +}
   1.252 +
   1.253 +#undef ptable_r
   1.254 +#undef ptable_w
   1.255 +
   1.256 +sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   1.257 +    return sstrsplit_a(ucx_default_allocator(), s, d, n);
   1.258 +}
   1.259 +
   1.260 +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   1.261 +    if (s.length == 0 || d.length == 0) {
   1.262 +        *n = -1;
   1.263 +        return NULL;
   1.264 +    }
   1.265 +    
   1.266 +    /* special cases: delimiter is at least as large as the string */
   1.267 +    if (d.length >= s.length) {
   1.268 +        /* exact match */
   1.269 +        if (sstrcmp(s, d) == 0) {
   1.270 +            *n = 0;
   1.271 +            return NULL;
   1.272 +        } else /* no match possible */ {
   1.273 +            *n = 1;
   1.274 +            sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
   1.275 +            *result = sstrdup_a(allocator, s);
   1.276 +            return result;
   1.277 +        }
   1.278 +    }
   1.279 +    
   1.280 +    ssize_t nmax = *n;
   1.281 +    size_t arrlen = 16;
   1.282 +    sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
   1.283 +
   1.284 +    if (result) {
   1.285 +        sstr_t curpos = s;
   1.286 +        ssize_t j = 1;
   1.287 +        while (1) {
   1.288 +            sstr_t match;
   1.289 +            /* optimize for one byte delimiters */
   1.290 +            if (d.length == 1) {
   1.291 +                match = curpos;
   1.292 +                for (size_t i = 0 ; i < curpos.length ; i++) {
   1.293 +                    if (curpos.ptr[i] == *(d.ptr)) {
   1.294 +                        match.ptr = curpos.ptr + i;
   1.295 +                        break;
   1.296 +                    }
   1.297 +                    match.length--;
   1.298 +                }
   1.299 +            } else {
   1.300 +                match = sstrstr(curpos, d);
   1.301 +            }
   1.302 +            if (match.length > 0) {
   1.303 +                /* is this our last try? */
   1.304 +                if (nmax == 0 || j < nmax) {
   1.305 +                    /* copy the current string to the array */
   1.306 +                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
   1.307 +                    result[j-1] = sstrdup_a(allocator, item);
   1.308 +                    size_t processed = item.length + d.length;
   1.309 +                    curpos.ptr += processed;
   1.310 +                    curpos.length -= processed;
   1.311 +
   1.312 +                    /* allocate memory for the next string */
   1.313 +                    j++;
   1.314 +                    if (j > arrlen) {
   1.315 +                        arrlen *= 2;
   1.316 +                        sstr_t* reallocated = (sstr_t*) alrealloc(
   1.317 +                                allocator, result, arrlen*sizeof(sstr_t));
   1.318 +                        if (reallocated) {
   1.319 +                            result = reallocated;
   1.320 +                        } else {
   1.321 +                            for (ssize_t i = 0 ; i < j-1 ; i++) {
   1.322 +                                alfree(allocator, result[i].ptr);
   1.323 +                            }
   1.324 +                            alfree(allocator, result);
   1.325 +                            *n = -2;
   1.326 +                            return NULL;
   1.327 +                        }
   1.328 +                    }
   1.329 +                } else {
   1.330 +                    /* nmax reached, copy the _full_ remaining string */
   1.331 +                    result[j-1] = sstrdup_a(allocator, curpos);
   1.332 +                    break;
   1.333 +                }
   1.334 +            } else {
   1.335 +                /* no more matches, copy last string */
   1.336 +                result[j-1] = sstrdup_a(allocator, curpos);
   1.337 +                break;
   1.338 +            }
   1.339 +        }
   1.340 +        *n = j;
   1.341 +    } else {
   1.342 +        *n = -2;
   1.343 +    }
   1.344 +
   1.345 +    return result;
   1.346 +}
   1.347 +
   1.348 +int sstrcmp(sstr_t s1, sstr_t s2) {
   1.349 +    if (s1.length == s2.length) {
   1.350 +        return memcmp(s1.ptr, s2.ptr, s1.length);
   1.351 +    } else if (s1.length > s2.length) {
   1.352 +        return 1;
   1.353 +    } else {
   1.354 +        return -1;
   1.355 +    }
   1.356 +}
   1.357 +
   1.358 +int sstrcasecmp(sstr_t s1, sstr_t s2) {
   1.359 +    if (s1.length == s2.length) {
   1.360 +#ifdef _WIN32
   1.361 +        return _strnicmp(s1.ptr, s2.ptr, s1.length);
   1.362 +#else
   1.363 +        return strncasecmp(s1.ptr, s2.ptr, s1.length);
   1.364 +#endif
   1.365 +    } else if (s1.length > s2.length) {
   1.366 +        return 1;
   1.367 +    } else {
   1.368 +        return -1;
   1.369 +    }
   1.370 +}
   1.371 +
   1.372 +sstr_t sstrdup(sstr_t s) {
   1.373 +    return sstrdup_a(ucx_default_allocator(), s);
   1.374 +}
   1.375 +
   1.376 +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   1.377 +    sstr_t newstring;
   1.378 +    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   1.379 +    if (newstring.ptr) {
   1.380 +        newstring.length = s.length;
   1.381 +        newstring.ptr[newstring.length] = 0;
   1.382 +        
   1.383 +        memcpy(newstring.ptr, s.ptr, s.length);
   1.384 +    } else {
   1.385 +        newstring.length = 0;
   1.386 +    }
   1.387 +    
   1.388 +    return newstring;
   1.389 +}
   1.390 +
   1.391 +sstr_t sstrtrim(sstr_t string) {
   1.392 +    sstr_t newstr = string;
   1.393 +    
   1.394 +    while (newstr.length > 0 && isspace(*newstr.ptr)) {
   1.395 +        newstr.ptr++;
   1.396 +        newstr.length--;
   1.397 +    }
   1.398 +    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   1.399 +        newstr.length--;
   1.400 +    }
   1.401 +    
   1.402 +    return newstr;
   1.403 +}
   1.404 +
   1.405 +int sstrprefix(sstr_t string, sstr_t prefix) {
   1.406 +    if (string.length == 0) {
   1.407 +        return prefix.length == 0;
   1.408 +    }
   1.409 +    if (prefix.length == 0) {
   1.410 +        return 1;
   1.411 +    }
   1.412 +    
   1.413 +    if (prefix.length > string.length) {
   1.414 +        return 0;
   1.415 +    } else {
   1.416 +        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   1.417 +    }
   1.418 +}
   1.419 +
   1.420 +int sstrsuffix(sstr_t string, sstr_t suffix) {
   1.421 +    if (string.length == 0) {
   1.422 +        return suffix.length == 0;
   1.423 +    }
   1.424 +    if (suffix.length == 0) {
   1.425 +        return 1;
   1.426 +    }
   1.427 +    
   1.428 +    if (suffix.length > string.length) {
   1.429 +        return 0;
   1.430 +    } else {
   1.431 +        return memcmp(string.ptr+string.length-suffix.length,
   1.432 +            suffix.ptr, suffix.length) == 0;
   1.433 +    }
   1.434 +}
   1.435 +
   1.436 +sstr_t sstrlower(sstr_t string) {
   1.437 +    sstr_t ret = sstrdup(string);
   1.438 +    for (size_t i = 0; i < ret.length ; i++) {
   1.439 +        ret.ptr[i] = tolower(ret.ptr[i]);
   1.440 +    }
   1.441 +    return ret;
   1.442 +}
   1.443 +
   1.444 +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   1.445 +    sstr_t ret = sstrdup_a(allocator, string);
   1.446 +    for (size_t i = 0; i < ret.length ; i++) {
   1.447 +        ret.ptr[i] = tolower(ret.ptr[i]);
   1.448 +    }
   1.449 +    return ret;
   1.450 +}
   1.451 +
   1.452 +sstr_t sstrupper(sstr_t string) {
   1.453 +    sstr_t ret = sstrdup(string);
   1.454 +    for (size_t i = 0; i < ret.length ; i++) {
   1.455 +        ret.ptr[i] = toupper(ret.ptr[i]);
   1.456 +    }
   1.457 +    return ret;
   1.458 +}
   1.459 +
   1.460 +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   1.461 +    sstr_t ret = sstrdup_a(allocator, string);
   1.462 +    for (size_t i = 0; i < ret.length ; i++) {
   1.463 +        ret.ptr[i] = toupper(ret.ptr[i]);
   1.464 +    }
   1.465 +    return ret;
   1.466 +}

mercurial