1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/string.c Tue Oct 17 16:15:41 2017 +0200 1.3 @@ -0,0 +1,463 @@ 1.4 +/* 1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 1.6 + * 1.7 + * Copyright 2017 Olaf Wintermann. All rights reserved. 1.8 + * 1.9 + * Redistribution and use in source and binary forms, with or without 1.10 + * modification, are permitted provided that the following conditions are met: 1.11 + * 1.12 + * 1. Redistributions of source code must retain the above copyright 1.13 + * notice, this list of conditions and the following disclaimer. 1.14 + * 1.15 + * 2. Redistributions in binary form must reproduce the above copyright 1.16 + * notice, this list of conditions and the following disclaimer in the 1.17 + * documentation and/or other materials provided with the distribution. 1.18 + * 1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 1.29 + * POSSIBILITY OF SUCH DAMAGE. 1.30 + */ 1.31 + 1.32 +#include "ucx/string.h" 1.33 + 1.34 +#include "ucx/allocator.h" 1.35 + 1.36 +#include <stdlib.h> 1.37 +#include <string.h> 1.38 +#include <stdarg.h> 1.39 +#include <stdint.h> 1.40 +#include <ctype.h> 1.41 + 1.42 +sstr_t sstr(char *cstring) { 1.43 + sstr_t string; 1.44 + string.ptr = cstring; 1.45 + string.length = strlen(cstring); 1.46 + return string; 1.47 +} 1.48 + 1.49 +sstr_t sstrn(char *cstring, size_t length) { 1.50 + sstr_t string; 1.51 + string.ptr = cstring; 1.52 + string.length = length; 1.53 + return string; 1.54 +} 1.55 + 1.56 +size_t sstrnlen(size_t n, sstr_t s, ...) { 1.57 + va_list ap; 1.58 + size_t size = s.length; 1.59 + va_start(ap, s); 1.60 + 1.61 + for (size_t i = 1 ; i < n ; i++) { 1.62 + sstr_t str = va_arg(ap, sstr_t); 1.63 + size += str.length; 1.64 + } 1.65 + va_end(ap); 1.66 + 1.67 + return size; 1.68 +} 1.69 + 1.70 +static sstr_t sstrvcat_a( 1.71 + UcxAllocator *a, 1.72 + size_t count, 1.73 + sstr_t s1, 1.74 + sstr_t s2, 1.75 + va_list ap) { 1.76 + sstr_t str; 1.77 + str.ptr = NULL; 1.78 + str.length = 0; 1.79 + if(count < 2) { 1.80 + return str; 1.81 + } 1.82 + 1.83 + sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); 1.84 + if(!strings) { 1.85 + return str; 1.86 + } 1.87 + 1.88 + // get all args and overall length 1.89 + strings[0] = s1; 1.90 + strings[1] = s2; 1.91 + size_t strlen = s1.length + s2.length; 1.92 + for (size_t i=2;i<count;i++) { 1.93 + sstr_t s = va_arg (ap, sstr_t); 1.94 + strings[i] = s; 1.95 + strlen += s.length; 1.96 + } 1.97 + 1.98 + // create new string 1.99 + str.ptr = (char*) almalloc(a, strlen + 1); 1.100 + str.length = strlen; 1.101 + if(!str.ptr) { 1.102 + free(strings); 1.103 + str.length = 0; 1.104 + return str; 1.105 + } 1.106 + 1.107 + // concatenate strings 1.108 + size_t pos = 0; 1.109 + for (size_t i=0;i<count;i++) { 1.110 + sstr_t s = strings[i]; 1.111 + memcpy(str.ptr + pos, s.ptr, s.length); 1.112 + pos += s.length; 1.113 + } 1.114 + 1.115 + str.ptr[str.length] = '\0'; 1.116 + 1.117 + free(strings); 1.118 + 1.119 + return str; 1.120 +} 1.121 + 1.122 +sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { 1.123 + va_list ap; 1.124 + va_start(ap, s2); 1.125 + sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); 1.126 + va_end(ap); 1.127 + return s; 1.128 +} 1.129 + 1.130 +sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { 1.131 + va_list ap; 1.132 + va_start(ap, s2); 1.133 + sstr_t s = sstrvcat_a(a, count, s1, s2, ap); 1.134 + va_end(ap); 1.135 + return s; 1.136 +} 1.137 + 1.138 +sstr_t sstrsubs(sstr_t s, size_t start) { 1.139 + return sstrsubsl (s, start, s.length-start); 1.140 +} 1.141 + 1.142 +sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { 1.143 + sstr_t new_sstr; 1.144 + if (start >= s.length) { 1.145 + new_sstr.ptr = NULL; 1.146 + new_sstr.length = 0; 1.147 + } else { 1.148 + if (length > s.length-start) { 1.149 + length = s.length-start; 1.150 + } 1.151 + new_sstr.ptr = &s.ptr[start]; 1.152 + new_sstr.length = length; 1.153 + } 1.154 + return new_sstr; 1.155 +} 1.156 + 1.157 +sstr_t sstrchr(sstr_t s, int c) { 1.158 + for(size_t i=0;i<s.length;i++) { 1.159 + if(s.ptr[i] == c) { 1.160 + return sstrsubs(s, i); 1.161 + } 1.162 + } 1.163 + sstr_t n; 1.164 + n.ptr = NULL; 1.165 + n.length = 0; 1.166 + return n; 1.167 +} 1.168 + 1.169 +sstr_t sstrrchr(sstr_t s, int c) { 1.170 + if (s.length > 0) { 1.171 + for(size_t i=s.length;i>0;i--) { 1.172 + if(s.ptr[i-1] == c) { 1.173 + return sstrsubs(s, i-1); 1.174 + } 1.175 + } 1.176 + } 1.177 + sstr_t n; 1.178 + n.ptr = NULL; 1.179 + n.length = 0; 1.180 + return n; 1.181 +} 1.182 + 1.183 +#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ 1.184 + ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) 1.185 + 1.186 +#define ptable_w(useheap, ptable, index, src) do {\ 1.187 + if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ 1.188 + else ((size_t*)ptable)[index] = src;\ 1.189 + } while (0); 1.190 + 1.191 +sstr_t sstrstr(sstr_t string, sstr_t match) { 1.192 + if (match.length == 0) { 1.193 + return string; 1.194 + } 1.195 + 1.196 + /* prepare default return value in case of no match */ 1.197 + sstr_t result = sstrn(NULL, 0); 1.198 + 1.199 + /* 1.200 + * IMPORTANT: 1.201 + * our prefix table contains the prefix length PLUS ONE 1.202 + * this is our decision, because we want to use the full range of size_t 1.203 + * the original algorithm needs a (-1) at one single place 1.204 + * and we want to avoid that 1.205 + */ 1.206 + 1.207 + /* static prefix table */ 1.208 + static uint8_t s_prefix_table[256]; 1.209 + 1.210 + /* check pattern length and use appropriate prefix table */ 1.211 + /* if the pattern exceeds static prefix table, allocate on the heap */ 1.212 + register int useheap = match.length > 255; 1.213 + register void* ptable = useheap ? 1.214 + calloc(match.length+1, sizeof(size_t)): s_prefix_table; 1.215 + 1.216 + /* keep counter in registers */ 1.217 + register size_t i, j; 1.218 + 1.219 + /* fill prefix table */ 1.220 + i = 0; j = 0; 1.221 + ptable_w(useheap, ptable, i, j); 1.222 + while (i < match.length) { 1.223 + while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { 1.224 + ptable_r(j, useheap, ptable, j-1); 1.225 + } 1.226 + i++; j++; 1.227 + ptable_w(useheap, ptable, i, j); 1.228 + } 1.229 + 1.230 + /* search */ 1.231 + i = 0; j = 1; 1.232 + while (i < string.length) { 1.233 + while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { 1.234 + ptable_r(j, useheap, ptable, j-1); 1.235 + } 1.236 + i++; j++; 1.237 + if (j-1 == match.length) { 1.238 + size_t start = i - match.length; 1.239 + result.ptr = string.ptr + start; 1.240 + result.length = string.length - start; 1.241 + break; 1.242 + } 1.243 + } 1.244 + 1.245 + /* if prefix table was allocated on the heap, free it */ 1.246 + if (ptable != s_prefix_table) { 1.247 + free(ptable); 1.248 + } 1.249 + 1.250 + return result; 1.251 +} 1.252 + 1.253 +#undef ptable_r 1.254 +#undef ptable_w 1.255 + 1.256 +sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { 1.257 + return sstrsplit_a(ucx_default_allocator(), s, d, n); 1.258 +} 1.259 + 1.260 +sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { 1.261 + if (s.length == 0 || d.length == 0) { 1.262 + *n = -1; 1.263 + return NULL; 1.264 + } 1.265 + 1.266 + /* special cases: delimiter is at least as large as the string */ 1.267 + if (d.length >= s.length) { 1.268 + /* exact match */ 1.269 + if (sstrcmp(s, d) == 0) { 1.270 + *n = 0; 1.271 + return NULL; 1.272 + } else /* no match possible */ { 1.273 + *n = 1; 1.274 + sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); 1.275 + *result = sstrdup_a(allocator, s); 1.276 + return result; 1.277 + } 1.278 + } 1.279 + 1.280 + ssize_t nmax = *n; 1.281 + size_t arrlen = 16; 1.282 + sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); 1.283 + 1.284 + if (result) { 1.285 + sstr_t curpos = s; 1.286 + ssize_t j = 1; 1.287 + while (1) { 1.288 + sstr_t match; 1.289 + /* optimize for one byte delimiters */ 1.290 + if (d.length == 1) { 1.291 + match = curpos; 1.292 + for (size_t i = 0 ; i < curpos.length ; i++) { 1.293 + if (curpos.ptr[i] == *(d.ptr)) { 1.294 + match.ptr = curpos.ptr + i; 1.295 + break; 1.296 + } 1.297 + match.length--; 1.298 + } 1.299 + } else { 1.300 + match = sstrstr(curpos, d); 1.301 + } 1.302 + if (match.length > 0) { 1.303 + /* is this our last try? */ 1.304 + if (nmax == 0 || j < nmax) { 1.305 + /* copy the current string to the array */ 1.306 + sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); 1.307 + result[j-1] = sstrdup_a(allocator, item); 1.308 + size_t processed = item.length + d.length; 1.309 + curpos.ptr += processed; 1.310 + curpos.length -= processed; 1.311 + 1.312 + /* allocate memory for the next string */ 1.313 + j++; 1.314 + if (j > arrlen) { 1.315 + arrlen *= 2; 1.316 + sstr_t* reallocated = (sstr_t*) alrealloc( 1.317 + allocator, result, arrlen*sizeof(sstr_t)); 1.318 + if (reallocated) { 1.319 + result = reallocated; 1.320 + } else { 1.321 + for (ssize_t i = 0 ; i < j-1 ; i++) { 1.322 + alfree(allocator, result[i].ptr); 1.323 + } 1.324 + alfree(allocator, result); 1.325 + *n = -2; 1.326 + return NULL; 1.327 + } 1.328 + } 1.329 + } else { 1.330 + /* nmax reached, copy the _full_ remaining string */ 1.331 + result[j-1] = sstrdup_a(allocator, curpos); 1.332 + break; 1.333 + } 1.334 + } else { 1.335 + /* no more matches, copy last string */ 1.336 + result[j-1] = sstrdup_a(allocator, curpos); 1.337 + break; 1.338 + } 1.339 + } 1.340 + *n = j; 1.341 + } else { 1.342 + *n = -2; 1.343 + } 1.344 + 1.345 + return result; 1.346 +} 1.347 + 1.348 +int sstrcmp(sstr_t s1, sstr_t s2) { 1.349 + if (s1.length == s2.length) { 1.350 + return memcmp(s1.ptr, s2.ptr, s1.length); 1.351 + } else if (s1.length > s2.length) { 1.352 + return 1; 1.353 + } else { 1.354 + return -1; 1.355 + } 1.356 +} 1.357 + 1.358 +int sstrcasecmp(sstr_t s1, sstr_t s2) { 1.359 + if (s1.length == s2.length) { 1.360 +#ifdef _WIN32 1.361 + return _strnicmp(s1.ptr, s2.ptr, s1.length); 1.362 +#else 1.363 + return strncasecmp(s1.ptr, s2.ptr, s1.length); 1.364 +#endif 1.365 + } else if (s1.length > s2.length) { 1.366 + return 1; 1.367 + } else { 1.368 + return -1; 1.369 + } 1.370 +} 1.371 + 1.372 +sstr_t sstrdup(sstr_t s) { 1.373 + return sstrdup_a(ucx_default_allocator(), s); 1.374 +} 1.375 + 1.376 +sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { 1.377 + sstr_t newstring; 1.378 + newstring.ptr = (char*)almalloc(allocator, s.length + 1); 1.379 + if (newstring.ptr) { 1.380 + newstring.length = s.length; 1.381 + newstring.ptr[newstring.length] = 0; 1.382 + 1.383 + memcpy(newstring.ptr, s.ptr, s.length); 1.384 + } else { 1.385 + newstring.length = 0; 1.386 + } 1.387 + 1.388 + return newstring; 1.389 +} 1.390 + 1.391 +sstr_t sstrtrim(sstr_t string) { 1.392 + sstr_t newstr = string; 1.393 + 1.394 + while (newstr.length > 0 && isspace(*newstr.ptr)) { 1.395 + newstr.ptr++; 1.396 + newstr.length--; 1.397 + } 1.398 + while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { 1.399 + newstr.length--; 1.400 + } 1.401 + 1.402 + return newstr; 1.403 +} 1.404 + 1.405 +int sstrprefix(sstr_t string, sstr_t prefix) { 1.406 + if (string.length == 0) { 1.407 + return prefix.length == 0; 1.408 + } 1.409 + if (prefix.length == 0) { 1.410 + return 1; 1.411 + } 1.412 + 1.413 + if (prefix.length > string.length) { 1.414 + return 0; 1.415 + } else { 1.416 + return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; 1.417 + } 1.418 +} 1.419 + 1.420 +int sstrsuffix(sstr_t string, sstr_t suffix) { 1.421 + if (string.length == 0) { 1.422 + return suffix.length == 0; 1.423 + } 1.424 + if (suffix.length == 0) { 1.425 + return 1; 1.426 + } 1.427 + 1.428 + if (suffix.length > string.length) { 1.429 + return 0; 1.430 + } else { 1.431 + return memcmp(string.ptr+string.length-suffix.length, 1.432 + suffix.ptr, suffix.length) == 0; 1.433 + } 1.434 +} 1.435 + 1.436 +sstr_t sstrlower(sstr_t string) { 1.437 + sstr_t ret = sstrdup(string); 1.438 + for (size_t i = 0; i < ret.length ; i++) { 1.439 + ret.ptr[i] = tolower(ret.ptr[i]); 1.440 + } 1.441 + return ret; 1.442 +} 1.443 + 1.444 +sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { 1.445 + sstr_t ret = sstrdup_a(allocator, string); 1.446 + for (size_t i = 0; i < ret.length ; i++) { 1.447 + ret.ptr[i] = tolower(ret.ptr[i]); 1.448 + } 1.449 + return ret; 1.450 +} 1.451 + 1.452 +sstr_t sstrupper(sstr_t string) { 1.453 + sstr_t ret = sstrdup(string); 1.454 + for (size_t i = 0; i < ret.length ; i++) { 1.455 + ret.ptr[i] = toupper(ret.ptr[i]); 1.456 + } 1.457 + return ret; 1.458 +} 1.459 + 1.460 +sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { 1.461 + sstr_t ret = sstrdup_a(allocator, string); 1.462 + for (size_t i = 0; i < ret.length ; i++) { 1.463 + ret.ptr[i] = toupper(ret.ptr[i]); 1.464 + } 1.465 + return ret; 1.466 +}