src/ucx/string.c

changeset 39
ac35daceb24c
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/ucx/string.c	Tue Aug 23 13:49:38 2016 +0200
     1.3 @@ -0,0 +1,381 @@
     1.4 +/*
     1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     1.6 + *
     1.7 + * Copyright 2015 Olaf Wintermann. All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions are met:
    1.11 + *
    1.12 + *   1. Redistributions of source code must retain the above copyright
    1.13 + *      notice, this list of conditions and the following disclaimer.
    1.14 + *
    1.15 + *   2. Redistributions in binary form must reproduce the above copyright
    1.16 + *      notice, this list of conditions and the following disclaimer in the
    1.17 + *      documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.29 + * POSSIBILITY OF SUCH DAMAGE.
    1.30 + */
    1.31 +
    1.32 +#include <stdlib.h>
    1.33 +#include <string.h>
    1.34 +#include <stdarg.h>
    1.35 +#include <ctype.h>
    1.36 +
    1.37 +#include "string.h"
    1.38 +#include "allocator.h"
    1.39 +
    1.40 +sstr_t sstr(char *cstring) {
    1.41 +    sstr_t string;
    1.42 +    string.ptr = cstring;
    1.43 +    string.length = strlen(cstring);
    1.44 +    return string;
    1.45 +}
    1.46 +
    1.47 +sstr_t sstrn(char *cstring, size_t length) {
    1.48 +    sstr_t string;
    1.49 +    string.ptr = cstring;
    1.50 +    string.length = length;
    1.51 +    return string;
    1.52 +}
    1.53 +
    1.54 +size_t sstrnlen(size_t n, sstr_t s, ...) {
    1.55 +    va_list ap;
    1.56 +    size_t size = s.length;
    1.57 +    va_start(ap, s);
    1.58 +
    1.59 +    for (size_t i = 1 ; i < n ; i++) {
    1.60 +        sstr_t str = va_arg(ap, sstr_t);
    1.61 +        size += str.length;
    1.62 +    }
    1.63 +    va_end(ap);
    1.64 +
    1.65 +    return size;
    1.66 +}
    1.67 +
    1.68 +static sstr_t sstrvcat_a(
    1.69 +        UcxAllocator *a,
    1.70 +        size_t count,
    1.71 +        sstr_t s1,
    1.72 +        sstr_t s2,
    1.73 +        va_list ap) {
    1.74 +    sstr_t str;
    1.75 +    str.ptr = NULL;
    1.76 +    str.length = 0;
    1.77 +    if(count < 2) {
    1.78 +        return str;
    1.79 +    }
    1.80 +    
    1.81 +    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    1.82 +    if(!strings) {
    1.83 +        return str;
    1.84 +    }
    1.85 +    
    1.86 +    // get all args and overall length
    1.87 +    strings[0] = s1;
    1.88 +    strings[1] = s2;
    1.89 +    size_t strlen = s1.length + s2.length;
    1.90 +    for (size_t i=2;i<count;i++) {
    1.91 +        sstr_t s = va_arg (ap, sstr_t);
    1.92 +        strings[i] = s;
    1.93 +        strlen += s.length;
    1.94 +    }
    1.95 +    
    1.96 +    // create new string
    1.97 +    str.ptr = (char*) almalloc(a, strlen + 1);
    1.98 +    str.length = strlen;
    1.99 +    if(!str.ptr) {
   1.100 +        free(strings);
   1.101 +        str.length = 0;
   1.102 +        return str;
   1.103 +    }
   1.104 +    
   1.105 +    // concatenate strings
   1.106 +    size_t pos = 0;
   1.107 +    for (size_t i=0;i<count;i++) {
   1.108 +        sstr_t s = strings[i];
   1.109 +        memcpy(str.ptr + pos, s.ptr, s.length);
   1.110 +        pos += s.length;
   1.111 +    }
   1.112 +    
   1.113 +    str.ptr[str.length] = '\0';
   1.114 +    
   1.115 +    free(strings);
   1.116 +    
   1.117 +    return str;
   1.118 +}
   1.119 +
   1.120 +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
   1.121 +    va_list ap;
   1.122 +    va_start(ap, s2);
   1.123 +    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
   1.124 +    va_end(ap);
   1.125 +    return s;
   1.126 +}
   1.127 +
   1.128 +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
   1.129 +    va_list ap;
   1.130 +    va_start(ap, s2);
   1.131 +    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   1.132 +    va_end(ap);
   1.133 +    return s;
   1.134 +}
   1.135 +
   1.136 +sstr_t sstrsubs(sstr_t s, size_t start) {
   1.137 +    return sstrsubsl (s, start, s.length-start);
   1.138 +}
   1.139 +
   1.140 +sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   1.141 +    sstr_t new_sstr;
   1.142 +    if (start >= s.length) {
   1.143 +        new_sstr.ptr = NULL;
   1.144 +        new_sstr.length = 0;
   1.145 +    } else {
   1.146 +        if (length > s.length-start) {
   1.147 +            length = s.length-start;
   1.148 +        }
   1.149 +        new_sstr.ptr = &s.ptr[start];
   1.150 +        new_sstr.length = length;
   1.151 +    }
   1.152 +    return new_sstr;
   1.153 +}
   1.154 +
   1.155 +sstr_t sstrchr(sstr_t s, int c) {
   1.156 +    for(size_t i=0;i<s.length;i++) {
   1.157 +        if(s.ptr[i] == c) {
   1.158 +            return sstrsubs(s, i);
   1.159 +        }
   1.160 +    }
   1.161 +    sstr_t n;
   1.162 +    n.ptr = NULL;
   1.163 +    n.length = 0;
   1.164 +    return n;
   1.165 +}
   1.166 +
   1.167 +sstr_t sstrrchr(sstr_t s, int c) {
   1.168 +    if (s.length > 0) {
   1.169 +        for(size_t i=s.length;i>0;i--) {
   1.170 +            if(s.ptr[i-1] == c) {
   1.171 +                return sstrsubs(s, i-1);
   1.172 +            }
   1.173 +        }
   1.174 +    }
   1.175 +    sstr_t n;
   1.176 +    n.ptr = NULL;
   1.177 +    n.length = 0;
   1.178 +    return n;
   1.179 +}
   1.180 +
   1.181 +sstr_t sstrstr(sstr_t string, sstr_t match) {
   1.182 +    if (match.length == 0) {
   1.183 +        return string;
   1.184 +    }
   1.185 +    
   1.186 +    for (size_t i = 0 ; i < string.length ; i++) {
   1.187 +        sstr_t substr = sstrsubs(string, i);
   1.188 +        if (sstrprefix(substr, match)) {
   1.189 +            return substr;
   1.190 +        }
   1.191 +    }
   1.192 +    
   1.193 +    sstr_t emptystr;
   1.194 +    emptystr.length = 0;
   1.195 +    emptystr.ptr = NULL;
   1.196 +    return emptystr;
   1.197 +}
   1.198 +
   1.199 +sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   1.200 +    return sstrsplit_a(ucx_default_allocator(), s, d, n);
   1.201 +}
   1.202 +
   1.203 +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   1.204 +    if (s.length == 0 || d.length == 0) {
   1.205 +        *n = -1;
   1.206 +        return NULL;
   1.207 +    }
   1.208 +
   1.209 +    sstr_t* result;
   1.210 +    ssize_t nmax = *n;
   1.211 +    *n = 1;
   1.212 +
   1.213 +    /* special case: exact match - no processing needed */
   1.214 +    if (sstrcmp(s, d) == 0) {
   1.215 +        *n = 0;
   1.216 +        return NULL;
   1.217 +    }
   1.218 +    sstr_t sv = sstrdup(s);
   1.219 +    if (sv.length == 0) {
   1.220 +        *n = -2;
   1.221 +        return NULL;
   1.222 +    }
   1.223 +
   1.224 +    for (size_t i = 0 ; i < s.length ; i++) {
   1.225 +        sstr_t substr = sstrsubs(sv, i);
   1.226 +        if (sstrprefix(substr, d)) {
   1.227 +            (*n)++;
   1.228 +            for (size_t j = 0 ; j < d.length ; j++) {
   1.229 +                sv.ptr[i+j] = 0;
   1.230 +            }
   1.231 +            i += d.length - 1; // -1, because the loop will do a i++
   1.232 +        }
   1.233 +        if ((*n) == nmax) break;
   1.234 +    }
   1.235 +    result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)*(*n));
   1.236 +
   1.237 +    if (result) {
   1.238 +        char *pptr = sv.ptr;
   1.239 +        for (ssize_t i = 0 ; i < *n ; i++) {
   1.240 +            size_t l = strlen(pptr);
   1.241 +            char* ptr = (char*) almalloc(allocator, l + 1);
   1.242 +            if (ptr) {
   1.243 +                memcpy(ptr, pptr, l);
   1.244 +                ptr[l] = 0;
   1.245 +
   1.246 +                result[i] = sstrn(ptr, l);
   1.247 +                pptr += l + d.length;
   1.248 +            } else {
   1.249 +                for (ssize_t j = i-1 ; j >= 0 ; j--) {
   1.250 +                    alfree(allocator, result[j].ptr);
   1.251 +                }
   1.252 +                alfree(allocator, result);
   1.253 +                *n = -2;
   1.254 +                break;
   1.255 +            }
   1.256 +        }
   1.257 +    } else {
   1.258 +        *n = -2;
   1.259 +    }
   1.260 +    
   1.261 +    free(sv.ptr);
   1.262 +
   1.263 +    return result;
   1.264 +}
   1.265 +
   1.266 +int sstrcmp(sstr_t s1, sstr_t s2) {
   1.267 +    if (s1.length == s2.length) {
   1.268 +        return memcmp(s1.ptr, s2.ptr, s1.length);
   1.269 +    } else if (s1.length > s2.length) {
   1.270 +        return 1;
   1.271 +    } else {
   1.272 +        return -1;
   1.273 +    }
   1.274 +}
   1.275 +
   1.276 +int sstrcasecmp(sstr_t s1, sstr_t s2) {
   1.277 +    if (s1.length == s2.length) {
   1.278 +#ifdef _WIN32
   1.279 +        return _strnicmp(s1.ptr, s2.ptr, s1.length);
   1.280 +#else
   1.281 +        return strncasecmp(s1.ptr, s2.ptr, s1.length);
   1.282 +#endif
   1.283 +    } else if (s1.length > s2.length) {
   1.284 +        return 1;
   1.285 +    } else {
   1.286 +        return -1;
   1.287 +    }
   1.288 +}
   1.289 +
   1.290 +sstr_t sstrdup(sstr_t s) {
   1.291 +    return sstrdup_a(ucx_default_allocator(), s);
   1.292 +}
   1.293 +
   1.294 +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   1.295 +    sstr_t newstring;
   1.296 +    newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   1.297 +    if (newstring.ptr) {
   1.298 +        newstring.length = s.length;
   1.299 +        newstring.ptr[newstring.length] = 0;
   1.300 +        
   1.301 +        memcpy(newstring.ptr, s.ptr, s.length);
   1.302 +    } else {
   1.303 +        newstring.length = 0;
   1.304 +    }
   1.305 +    
   1.306 +    return newstring;
   1.307 +}
   1.308 +
   1.309 +sstr_t sstrtrim(sstr_t string) {
   1.310 +    sstr_t newstr = string;
   1.311 +    
   1.312 +    while (newstr.length > 0 && isspace(*newstr.ptr)) {
   1.313 +        newstr.ptr++;
   1.314 +        newstr.length--;
   1.315 +    }
   1.316 +    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   1.317 +        newstr.length--;
   1.318 +    }
   1.319 +    
   1.320 +    return newstr;
   1.321 +}
   1.322 +
   1.323 +int sstrprefix(sstr_t string, sstr_t prefix) {
   1.324 +    if (string.length == 0) {
   1.325 +        return prefix.length == 0;
   1.326 +    }
   1.327 +    if (prefix.length == 0) {
   1.328 +        return 1;
   1.329 +    }
   1.330 +    
   1.331 +    if (prefix.length > string.length) {
   1.332 +        return 0;
   1.333 +    } else {
   1.334 +        return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
   1.335 +    }
   1.336 +}
   1.337 +
   1.338 +int sstrsuffix(sstr_t string, sstr_t suffix) {
   1.339 +    if (string.length == 0) {
   1.340 +        return suffix.length == 0;
   1.341 +    }
   1.342 +    if (suffix.length == 0) {
   1.343 +        return 1;
   1.344 +    }
   1.345 +    
   1.346 +    if (suffix.length > string.length) {
   1.347 +        return 0;
   1.348 +    } else {
   1.349 +        return memcmp(string.ptr+string.length-suffix.length,
   1.350 +            suffix.ptr, suffix.length) == 0;
   1.351 +    }
   1.352 +}
   1.353 +
   1.354 +sstr_t sstrlower(sstr_t string) {
   1.355 +    sstr_t ret = sstrdup(string);
   1.356 +    for (size_t i = 0; i < ret.length ; i++) {
   1.357 +        ret.ptr[i] = tolower(ret.ptr[i]);
   1.358 +    }
   1.359 +    return ret;
   1.360 +}
   1.361 +
   1.362 +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   1.363 +    sstr_t ret = sstrdup_a(allocator, string);
   1.364 +    for (size_t i = 0; i < ret.length ; i++) {
   1.365 +        ret.ptr[i] = tolower(ret.ptr[i]);
   1.366 +    }
   1.367 +    return ret;
   1.368 +}
   1.369 +
   1.370 +sstr_t sstrupper(sstr_t string) {
   1.371 +    sstr_t ret = sstrdup(string);
   1.372 +    for (size_t i = 0; i < ret.length ; i++) {
   1.373 +        ret.ptr[i] = toupper(ret.ptr[i]);
   1.374 +    }
   1.375 +    return ret;
   1.376 +}
   1.377 +
   1.378 +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   1.379 +    sstr_t ret = sstrdup_a(allocator, string);
   1.380 +    for (size_t i = 0; i < ret.length ; i++) {
   1.381 +        ret.ptr[i] = toupper(ret.ptr[i]);
   1.382 +    }
   1.383 +    return ret;
   1.384 +}

mercurial