src/string.c

Sun, 11 Mar 2018 13:43:07 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sun, 11 Mar 2018 13:43:07 +0100
branch
constsstr
changeset 275
96f643d30ff1
parent 272
2def28b65328
child 276
f1b2146d4805
permissions
-rw-r--r--

adds scstr_t struct for const strings and adapts some string functions

olaf@20 1 /*
universe@103 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20 3 *
universe@259 4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
universe@103 5 *
universe@103 6 * Redistribution and use in source and binary forms, with or without
universe@103 7 * modification, are permitted provided that the following conditions are met:
universe@103 8 *
universe@103 9 * 1. Redistributions of source code must retain the above copyright
universe@103 10 * notice, this list of conditions and the following disclaimer.
universe@103 11 *
universe@103 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@103 13 * notice, this list of conditions and the following disclaimer in the
universe@103 14 * documentation and/or other materials provided with the distribution.
universe@103 15 *
universe@103 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103 26 * POSSIBILITY OF SUCH DAMAGE.
olaf@20 27 */
olaf@20 28
universe@251 29 #include "ucx/string.h"
universe@251 30
universe@251 31 #include "ucx/allocator.h"
universe@251 32
olaf@20 33 #include <stdlib.h>
universe@69 34 #include <string.h>
olaf@20 35 #include <stdarg.h>
universe@236 36 #include <stdint.h>
universe@189 37 #include <ctype.h>
olaf@20 38
universe@116 39 sstr_t sstr(char *cstring) {
olaf@20 40 sstr_t string;
universe@116 41 string.ptr = cstring;
universe@116 42 string.length = strlen(cstring);
olaf@20 43 return string;
olaf@20 44 }
olaf@20 45
universe@116 46 sstr_t sstrn(char *cstring, size_t length) {
olaf@20 47 sstr_t string;
universe@116 48 string.ptr = cstring;
universe@116 49 string.length = length;
olaf@20 50 return string;
olaf@20 51 }
olaf@20 52
olaf@275 53 scstr_t scstr(const char *cstring) {
olaf@275 54 scstr_t string;
olaf@275 55 string.ptr = cstring;
olaf@275 56 string.length = strlen(cstring);
olaf@275 57 return string;
olaf@275 58 }
olaf@275 59
olaf@275 60 scstr_t scstrn(const char *cstring, size_t length) {
olaf@275 61 scstr_t string;
olaf@275 62 string.ptr = cstring;
olaf@275 63 string.length = length;
olaf@275 64 return string;
olaf@275 65 }
olaf@275 66
olaf@275 67
olaf@68 68 size_t sstrnlen(size_t n, sstr_t s, ...) {
olaf@20 69 va_list ap;
olaf@20 70 size_t size = s.length;
olaf@20 71 va_start(ap, s);
olaf@20 72
universe@116 73 for (size_t i = 1 ; i < n ; i++) {
olaf@20 74 sstr_t str = va_arg(ap, sstr_t);
olaf@272 75 if(((size_t)-1) - str.length < size) {
olaf@272 76 size = 0;
olaf@272 77 break;
olaf@272 78 }
olaf@20 79 size += str.length;
olaf@20 80 }
universe@24 81 va_end(ap);
olaf@20 82
olaf@20 83 return size;
olaf@20 84 }
olaf@20 85
olaf@180 86 static sstr_t sstrvcat_a(
olaf@180 87 UcxAllocator *a,
olaf@180 88 size_t count,
olaf@180 89 sstr_t s1,
olaf@180 90 sstr_t s2,
olaf@180 91 va_list ap) {
olaf@180 92 sstr_t str;
olaf@180 93 str.ptr = NULL;
olaf@180 94 str.length = 0;
olaf@180 95 if(count < 2) {
olaf@180 96 return str;
olaf@180 97 }
olaf@180 98
olaf@272 99 if(((size_t)-1) - s1.length < s2.length) {
olaf@272 100 return str;
olaf@272 101 }
olaf@272 102
universe@185 103 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
olaf@180 104 if(!strings) {
olaf@180 105 return str;
olaf@180 106 }
olaf@180 107
olaf@180 108 // get all args and overall length
olaf@180 109 strings[0] = s1;
olaf@180 110 strings[1] = s2;
olaf@272 111 size_t slen = s1.length + s2.length;
olaf@272 112 int error = 0;
olaf@180 113 for (size_t i=2;i<count;i++) {
olaf@180 114 sstr_t s = va_arg (ap, sstr_t);
olaf@180 115 strings[i] = s;
olaf@272 116 if(((size_t)-1) - s.length < slen) {
olaf@272 117 error = 1;
olaf@272 118 break;
olaf@272 119 }
olaf@272 120 slen += s.length;
olaf@272 121 }
olaf@272 122 if(error) {
olaf@272 123 free(strings);
olaf@272 124 return str;
olaf@180 125 }
olaf@180 126
olaf@180 127 // create new string
olaf@272 128 str.ptr = (char*) almalloc(a, slen + 1);
olaf@272 129 str.length = slen;
olaf@180 130 if(!str.ptr) {
olaf@180 131 free(strings);
olaf@180 132 str.length = 0;
olaf@180 133 return str;
olaf@180 134 }
olaf@180 135
olaf@180 136 // concatenate strings
olaf@180 137 size_t pos = 0;
olaf@180 138 for (size_t i=0;i<count;i++) {
olaf@180 139 sstr_t s = strings[i];
olaf@180 140 memcpy(str.ptr + pos, s.ptr, s.length);
olaf@180 141 pos += s.length;
olaf@180 142 }
olaf@180 143
olaf@180 144 str.ptr[str.length] = '\0';
olaf@180 145
olaf@180 146 free(strings);
olaf@180 147
olaf@180 148 return str;
olaf@180 149 }
olaf@180 150
olaf@180 151 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 152 va_list ap;
olaf@180 153 va_start(ap, s2);
olaf@180 154 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
olaf@180 155 va_end(ap);
olaf@180 156 return s;
olaf@180 157 }
olaf@180 158
olaf@180 159 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 160 va_list ap;
olaf@180 161 va_start(ap, s2);
olaf@180 162 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
olaf@180 163 va_end(ap);
olaf@180 164 return s;
olaf@180 165 }
olaf@180 166
olaf@68 167 sstr_t sstrsubs(sstr_t s, size_t start) {
olaf@20 168 return sstrsubsl (s, start, s.length-start);
olaf@20 169 }
olaf@20 170
olaf@68 171 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
olaf@20 172 sstr_t new_sstr;
olaf@104 173 if (start >= s.length) {
universe@173 174 new_sstr.ptr = NULL;
universe@173 175 new_sstr.length = 0;
universe@173 176 } else {
universe@173 177 if (length > s.length-start) {
universe@173 178 length = s.length-start;
universe@173 179 }
universe@173 180 new_sstr.ptr = &s.ptr[start];
universe@173 181 new_sstr.length = length;
olaf@20 182 }
olaf@20 183 return new_sstr;
olaf@20 184 }
olaf@20 185
olaf@108 186 sstr_t sstrchr(sstr_t s, int c) {
olaf@108 187 for(size_t i=0;i<s.length;i++) {
olaf@108 188 if(s.ptr[i] == c) {
olaf@108 189 return sstrsubs(s, i);
olaf@108 190 }
olaf@108 191 }
olaf@108 192 sstr_t n;
olaf@108 193 n.ptr = NULL;
olaf@108 194 n.length = 0;
olaf@108 195 return n;
olaf@108 196 }
olaf@108 197
universe@148 198 sstr_t sstrrchr(sstr_t s, int c) {
universe@148 199 if (s.length > 0) {
universe@152 200 for(size_t i=s.length;i>0;i--) {
universe@152 201 if(s.ptr[i-1] == c) {
universe@152 202 return sstrsubs(s, i-1);
universe@148 203 }
universe@148 204 }
universe@148 205 }
universe@148 206 sstr_t n;
universe@148 207 n.ptr = NULL;
universe@148 208 n.length = 0;
universe@148 209 return n;
universe@148 210 }
universe@148 211
universe@237 212 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
universe@237 213 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
universe@236 214
universe@237 215 #define ptable_w(useheap, ptable, index, src) do {\
universe@237 216 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
universe@237 217 else ((size_t*)ptable)[index] = src;\
universe@237 218 } while (0);
universe@236 219
universe@214 220 sstr_t sstrstr(sstr_t string, sstr_t match) {
universe@214 221 if (match.length == 0) {
universe@214 222 return string;
universe@214 223 }
universe@214 224
universe@236 225 /* prepare default return value in case of no match */
universe@236 226 sstr_t result = sstrn(NULL, 0);
universe@236 227
universe@236 228 /*
universe@236 229 * IMPORTANT:
universe@236 230 * our prefix table contains the prefix length PLUS ONE
universe@236 231 * this is our decision, because we want to use the full range of size_t
universe@236 232 * the original algorithm needs a (-1) at one single place
universe@236 233 * and we want to avoid that
universe@236 234 */
universe@236 235
universe@236 236 /* static prefix table */
universe@236 237 static uint8_t s_prefix_table[256];
universe@236 238
universe@236 239 /* check pattern length and use appropriate prefix table */
universe@237 240 /* if the pattern exceeds static prefix table, allocate on the heap */
universe@237 241 register int useheap = match.length > 255;
universe@237 242 register void* ptable = useheap ?
universe@237 243 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
universe@236 244
universe@236 245 /* keep counter in registers */
universe@236 246 register size_t i, j;
universe@236 247
universe@236 248 /* fill prefix table */
universe@236 249 i = 0; j = 0;
universe@237 250 ptable_w(useheap, ptable, i, j);
universe@236 251 while (i < match.length) {
universe@236 252 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
universe@238 253 ptable_r(j, useheap, ptable, j-1);
universe@236 254 }
universe@236 255 i++; j++;
universe@237 256 ptable_w(useheap, ptable, i, j);
universe@236 257 }
universe@236 258
universe@236 259 /* search */
universe@236 260 i = 0; j = 1;
universe@236 261 while (i < string.length) {
universe@236 262 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
universe@237 263 ptable_r(j, useheap, ptable, j-1);
universe@236 264 }
universe@236 265 i++; j++;
universe@236 266 if (j-1 == match.length) {
universe@236 267 size_t start = i - match.length;
universe@236 268 result.ptr = string.ptr + start;
universe@236 269 result.length = string.length - start;
universe@236 270 break;
universe@214 271 }
universe@214 272 }
universe@236 273
universe@236 274 /* if prefix table was allocated on the heap, free it */
universe@236 275 if (ptable != s_prefix_table) {
universe@236 276 free(ptable);
universe@236 277 }
universe@214 278
universe@236 279 return result;
universe@214 280 }
universe@214 281
universe@237 282 #undef ptable_r
universe@237 283 #undef ptable_w
universe@237 284
universe@173 285 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
universe@125 286 return sstrsplit_a(ucx_default_allocator(), s, d, n);
universe@119 287 }
universe@119 288
universe@173 289 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
universe@119 290 if (s.length == 0 || d.length == 0) {
universe@119 291 *n = -1;
universe@39 292 return NULL;
universe@39 293 }
universe@231 294
universe@231 295 /* special cases: delimiter is at least as large as the string */
universe@231 296 if (d.length >= s.length) {
universe@231 297 /* exact match */
universe@231 298 if (sstrcmp(s, d) == 0) {
universe@231 299 *n = 0;
universe@231 300 return NULL;
universe@231 301 } else /* no match possible */ {
universe@231 302 *n = 1;
universe@231 303 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
olaf@270 304 if(result) {
olaf@270 305 *result = sstrdup_a(allocator, s);
olaf@270 306 } else {
olaf@270 307 *n = -2;
olaf@270 308 }
universe@231 309 return result;
universe@231 310 }
universe@231 311 }
universe@231 312
universe@173 313 ssize_t nmax = *n;
universe@235 314 size_t arrlen = 16;
olaf@270 315 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
universe@39 316
universe@119 317 if (result) {
universe@233 318 sstr_t curpos = s;
universe@233 319 ssize_t j = 1;
universe@233 320 while (1) {
universe@234 321 sstr_t match;
universe@234 322 /* optimize for one byte delimiters */
universe@234 323 if (d.length == 1) {
universe@234 324 match = curpos;
universe@234 325 for (size_t i = 0 ; i < curpos.length ; i++) {
universe@234 326 if (curpos.ptr[i] == *(d.ptr)) {
universe@234 327 match.ptr = curpos.ptr + i;
universe@234 328 break;
universe@234 329 }
universe@234 330 match.length--;
universe@234 331 }
universe@234 332 } else {
universe@234 333 match = sstrstr(curpos, d);
universe@234 334 }
universe@233 335 if (match.length > 0) {
universe@233 336 /* is this our last try? */
universe@233 337 if (nmax == 0 || j < nmax) {
universe@233 338 /* copy the current string to the array */
universe@233 339 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
universe@233 340 result[j-1] = sstrdup_a(allocator, item);
universe@233 341 size_t processed = item.length + d.length;
universe@233 342 curpos.ptr += processed;
universe@233 343 curpos.length -= processed;
universe@39 344
universe@233 345 /* allocate memory for the next string */
universe@233 346 j++;
universe@235 347 if (j > arrlen) {
universe@235 348 arrlen *= 2;
olaf@270 349 size_t reallocsz;
olaf@270 350 sstr_t* reallocated = NULL;
olaf@270 351 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
olaf@270 352 reallocated = (sstr_t*) alrealloc(
olaf@270 353 allocator, result, reallocsz);
olaf@270 354 }
universe@235 355 if (reallocated) {
universe@235 356 result = reallocated;
universe@235 357 } else {
universe@235 358 for (ssize_t i = 0 ; i < j-1 ; i++) {
universe@235 359 alfree(allocator, result[i].ptr);
universe@235 360 }
universe@235 361 alfree(allocator, result);
universe@235 362 *n = -2;
universe@235 363 return NULL;
universe@233 364 }
universe@233 365 }
universe@233 366 } else {
universe@233 367 /* nmax reached, copy the _full_ remaining string */
universe@233 368 result[j-1] = sstrdup_a(allocator, curpos);
universe@233 369 break;
universe@233 370 }
universe@173 371 } else {
universe@233 372 /* no more matches, copy last string */
universe@233 373 result[j-1] = sstrdup_a(allocator, curpos);
universe@173 374 break;
universe@173 375 }
universe@119 376 }
universe@233 377 *n = j;
universe@119 378 } else {
universe@119 379 *n = -2;
universe@39 380 }
universe@39 381
universe@39 382 return result;
universe@39 383 }
universe@39 384
olaf@68 385 int sstrcmp(sstr_t s1, sstr_t s2) {
universe@116 386 if (s1.length == s2.length) {
universe@116 387 return memcmp(s1.ptr, s2.ptr, s1.length);
universe@116 388 } else if (s1.length > s2.length) {
universe@116 389 return 1;
universe@116 390 } else {
universe@116 391 return -1;
universe@116 392 }
olaf@20 393 }
olaf@20 394
universe@149 395 int sstrcasecmp(sstr_t s1, sstr_t s2) {
universe@149 396 if (s1.length == s2.length) {
universe@149 397 #ifdef _WIN32
universe@149 398 return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@149 399 #else
universe@149 400 return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@149 401 #endif
universe@149 402 } else if (s1.length > s2.length) {
universe@149 403 return 1;
universe@149 404 } else {
universe@149 405 return -1;
universe@149 406 }
universe@149 407 }
universe@149 408
olaf@275 409 sstr_t scstrdup(scstr_t s) {
universe@125 410 return sstrdup_a(ucx_default_allocator(), s);
olaf@109 411 }
olaf@20 412
olaf@275 413 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
olaf@109 414 sstr_t newstring;
universe@173 415 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
olaf@109 416 if (newstring.ptr) {
olaf@109 417 newstring.length = s.length;
olaf@109 418 newstring.ptr[newstring.length] = 0;
olaf@109 419
olaf@109 420 memcpy(newstring.ptr, s.ptr, s.length);
olaf@109 421 } else {
olaf@109 422 newstring.length = 0;
olaf@109 423 }
olaf@109 424
olaf@20 425 return newstring;
olaf@20 426 }
olaf@96 427
olaf@96 428 sstr_t sstrtrim(sstr_t string) {
olaf@96 429 sstr_t newstr = string;
universe@189 430
universe@189 431 while (newstr.length > 0 && isspace(*newstr.ptr)) {
universe@189 432 newstr.ptr++;
universe@189 433 newstr.length--;
universe@98 434 }
universe@189 435 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
universe@189 436 newstr.length--;
olaf@96 437 }
olaf@96 438
olaf@96 439 return newstr;
olaf@96 440 }
universe@146 441
olaf@275 442 int ucx_strprefix(scstr_t string, scstr_t prefix) {
universe@146 443 if (string.length == 0) {
universe@146 444 return prefix.length == 0;
universe@146 445 }
universe@146 446 if (prefix.length == 0) {
universe@146 447 return 1;
universe@146 448 }
universe@146 449
universe@146 450 if (prefix.length > string.length) {
universe@146 451 return 0;
universe@146 452 } else {
universe@146 453 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@146 454 }
universe@146 455 }
universe@146 456
olaf@275 457 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
universe@146 458 if (string.length == 0) {
universe@146 459 return suffix.length == 0;
universe@146 460 }
universe@146 461 if (suffix.length == 0) {
universe@146 462 return 1;
universe@146 463 }
universe@146 464
universe@146 465 if (suffix.length > string.length) {
universe@146 466 return 0;
universe@146 467 } else {
universe@146 468 return memcmp(string.ptr+string.length-suffix.length,
universe@146 469 suffix.ptr, suffix.length) == 0;
universe@146 470 }
universe@146 471 }
universe@210 472
olaf@275 473 sstr_t ucx_strlower(scstr_t string) {
universe@210 474 sstr_t ret = sstrdup(string);
universe@210 475 for (size_t i = 0; i < ret.length ; i++) {
universe@210 476 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 477 }
universe@210 478 return ret;
universe@210 479 }
universe@210 480
olaf@275 481 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
universe@210 482 sstr_t ret = sstrdup_a(allocator, string);
universe@210 483 for (size_t i = 0; i < ret.length ; i++) {
universe@210 484 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 485 }
universe@210 486 return ret;
universe@210 487 }
universe@210 488
olaf@275 489 sstr_t ucx_strupper(scstr_t string) {
universe@210 490 sstr_t ret = sstrdup(string);
universe@210 491 for (size_t i = 0; i < ret.length ; i++) {
universe@210 492 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 493 }
universe@210 494 return ret;
universe@210 495 }
universe@210 496
olaf@275 497 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
universe@210 498 sstr_t ret = sstrdup_a(allocator, string);
universe@210 499 for (size_t i = 0; i < ret.length ; i++) {
universe@210 500 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 501 }
universe@210 502 return ret;
universe@210 503 }
olaf@275 504
olaf@275 505 // private string conversion functions
olaf@275 506 scstr_t ucx_sc2sc(scstr_t c) {
olaf@275 507 return c;
olaf@275 508 }
olaf@275 509 scstr_t ucx_ss2sc(sstr_t str) {
olaf@275 510 scstr_t cs;
olaf@275 511 cs.ptr = str.ptr;
olaf@275 512 cs.length = str.length;
olaf@275 513 return cs;
olaf@275 514 }
olaf@275 515 scstr_t ucx_ss2c_s(scstr_t c) {
olaf@275 516 return c;
olaf@275 517 }

mercurial