src/string.c

Sun, 21 Jan 2018 10:57:32 +0100

author
Olaf Wintermann <olaf.wintermann@gmail.com>
date
Sun, 21 Jan 2018 10:57:32 +0100
changeset 272
2def28b65328
parent 270
3d80d425543b
child 275
96f643d30ff1
permissions
-rw-r--r--

adds integer overflow checks to sstrlen and sstrcat

olaf@20 1 /*
universe@103 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20 3 *
universe@259 4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
universe@103 5 *
universe@103 6 * Redistribution and use in source and binary forms, with or without
universe@103 7 * modification, are permitted provided that the following conditions are met:
universe@103 8 *
universe@103 9 * 1. Redistributions of source code must retain the above copyright
universe@103 10 * notice, this list of conditions and the following disclaimer.
universe@103 11 *
universe@103 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@103 13 * notice, this list of conditions and the following disclaimer in the
universe@103 14 * documentation and/or other materials provided with the distribution.
universe@103 15 *
universe@103 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103 26 * POSSIBILITY OF SUCH DAMAGE.
olaf@20 27 */
olaf@20 28
universe@251 29 #include "ucx/string.h"
universe@251 30
universe@251 31 #include "ucx/allocator.h"
universe@251 32
olaf@20 33 #include <stdlib.h>
universe@69 34 #include <string.h>
olaf@20 35 #include <stdarg.h>
universe@236 36 #include <stdint.h>
universe@189 37 #include <ctype.h>
olaf@20 38
universe@116 39 sstr_t sstr(char *cstring) {
olaf@20 40 sstr_t string;
universe@116 41 string.ptr = cstring;
universe@116 42 string.length = strlen(cstring);
olaf@20 43 return string;
olaf@20 44 }
olaf@20 45
universe@116 46 sstr_t sstrn(char *cstring, size_t length) {
olaf@20 47 sstr_t string;
universe@116 48 string.ptr = cstring;
universe@116 49 string.length = length;
olaf@20 50 return string;
olaf@20 51 }
olaf@20 52
olaf@68 53 size_t sstrnlen(size_t n, sstr_t s, ...) {
olaf@20 54 va_list ap;
olaf@20 55 size_t size = s.length;
olaf@20 56 va_start(ap, s);
olaf@20 57
universe@116 58 for (size_t i = 1 ; i < n ; i++) {
olaf@20 59 sstr_t str = va_arg(ap, sstr_t);
olaf@272 60 if(((size_t)-1) - str.length < size) {
olaf@272 61 size = 0;
olaf@272 62 break;
olaf@272 63 }
olaf@20 64 size += str.length;
olaf@20 65 }
universe@24 66 va_end(ap);
olaf@20 67
olaf@20 68 return size;
olaf@20 69 }
olaf@20 70
olaf@180 71 static sstr_t sstrvcat_a(
olaf@180 72 UcxAllocator *a,
olaf@180 73 size_t count,
olaf@180 74 sstr_t s1,
olaf@180 75 sstr_t s2,
olaf@180 76 va_list ap) {
olaf@180 77 sstr_t str;
olaf@180 78 str.ptr = NULL;
olaf@180 79 str.length = 0;
olaf@180 80 if(count < 2) {
olaf@180 81 return str;
olaf@180 82 }
olaf@180 83
olaf@272 84 if(((size_t)-1) - s1.length < s2.length) {
olaf@272 85 return str;
olaf@272 86 }
olaf@272 87
universe@185 88 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
olaf@180 89 if(!strings) {
olaf@180 90 return str;
olaf@180 91 }
olaf@180 92
olaf@180 93 // get all args and overall length
olaf@180 94 strings[0] = s1;
olaf@180 95 strings[1] = s2;
olaf@272 96 size_t slen = s1.length + s2.length;
olaf@272 97 int error = 0;
olaf@180 98 for (size_t i=2;i<count;i++) {
olaf@180 99 sstr_t s = va_arg (ap, sstr_t);
olaf@180 100 strings[i] = s;
olaf@272 101 if(((size_t)-1) - s.length < slen) {
olaf@272 102 error = 1;
olaf@272 103 break;
olaf@272 104 }
olaf@272 105 slen += s.length;
olaf@272 106 }
olaf@272 107 if(error) {
olaf@272 108 free(strings);
olaf@272 109 return str;
olaf@180 110 }
olaf@180 111
olaf@180 112 // create new string
olaf@272 113 str.ptr = (char*) almalloc(a, slen + 1);
olaf@272 114 str.length = slen;
olaf@180 115 if(!str.ptr) {
olaf@180 116 free(strings);
olaf@180 117 str.length = 0;
olaf@180 118 return str;
olaf@180 119 }
olaf@180 120
olaf@180 121 // concatenate strings
olaf@180 122 size_t pos = 0;
olaf@180 123 for (size_t i=0;i<count;i++) {
olaf@180 124 sstr_t s = strings[i];
olaf@180 125 memcpy(str.ptr + pos, s.ptr, s.length);
olaf@180 126 pos += s.length;
olaf@180 127 }
olaf@180 128
olaf@180 129 str.ptr[str.length] = '\0';
olaf@180 130
olaf@180 131 free(strings);
olaf@180 132
olaf@180 133 return str;
olaf@180 134 }
olaf@180 135
olaf@180 136 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 137 va_list ap;
olaf@180 138 va_start(ap, s2);
olaf@180 139 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
olaf@180 140 va_end(ap);
olaf@180 141 return s;
olaf@180 142 }
olaf@180 143
olaf@180 144 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 145 va_list ap;
olaf@180 146 va_start(ap, s2);
olaf@180 147 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
olaf@180 148 va_end(ap);
olaf@180 149 return s;
olaf@180 150 }
olaf@180 151
olaf@68 152 sstr_t sstrsubs(sstr_t s, size_t start) {
olaf@20 153 return sstrsubsl (s, start, s.length-start);
olaf@20 154 }
olaf@20 155
olaf@68 156 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
olaf@20 157 sstr_t new_sstr;
olaf@104 158 if (start >= s.length) {
universe@173 159 new_sstr.ptr = NULL;
universe@173 160 new_sstr.length = 0;
universe@173 161 } else {
universe@173 162 if (length > s.length-start) {
universe@173 163 length = s.length-start;
universe@173 164 }
universe@173 165 new_sstr.ptr = &s.ptr[start];
universe@173 166 new_sstr.length = length;
olaf@20 167 }
olaf@20 168 return new_sstr;
olaf@20 169 }
olaf@20 170
olaf@108 171 sstr_t sstrchr(sstr_t s, int c) {
olaf@108 172 for(size_t i=0;i<s.length;i++) {
olaf@108 173 if(s.ptr[i] == c) {
olaf@108 174 return sstrsubs(s, i);
olaf@108 175 }
olaf@108 176 }
olaf@108 177 sstr_t n;
olaf@108 178 n.ptr = NULL;
olaf@108 179 n.length = 0;
olaf@108 180 return n;
olaf@108 181 }
olaf@108 182
universe@148 183 sstr_t sstrrchr(sstr_t s, int c) {
universe@148 184 if (s.length > 0) {
universe@152 185 for(size_t i=s.length;i>0;i--) {
universe@152 186 if(s.ptr[i-1] == c) {
universe@152 187 return sstrsubs(s, i-1);
universe@148 188 }
universe@148 189 }
universe@148 190 }
universe@148 191 sstr_t n;
universe@148 192 n.ptr = NULL;
universe@148 193 n.length = 0;
universe@148 194 return n;
universe@148 195 }
universe@148 196
universe@237 197 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
universe@237 198 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
universe@236 199
universe@237 200 #define ptable_w(useheap, ptable, index, src) do {\
universe@237 201 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
universe@237 202 else ((size_t*)ptable)[index] = src;\
universe@237 203 } while (0);
universe@236 204
universe@214 205 sstr_t sstrstr(sstr_t string, sstr_t match) {
universe@214 206 if (match.length == 0) {
universe@214 207 return string;
universe@214 208 }
universe@214 209
universe@236 210 /* prepare default return value in case of no match */
universe@236 211 sstr_t result = sstrn(NULL, 0);
universe@236 212
universe@236 213 /*
universe@236 214 * IMPORTANT:
universe@236 215 * our prefix table contains the prefix length PLUS ONE
universe@236 216 * this is our decision, because we want to use the full range of size_t
universe@236 217 * the original algorithm needs a (-1) at one single place
universe@236 218 * and we want to avoid that
universe@236 219 */
universe@236 220
universe@236 221 /* static prefix table */
universe@236 222 static uint8_t s_prefix_table[256];
universe@236 223
universe@236 224 /* check pattern length and use appropriate prefix table */
universe@237 225 /* if the pattern exceeds static prefix table, allocate on the heap */
universe@237 226 register int useheap = match.length > 255;
universe@237 227 register void* ptable = useheap ?
universe@237 228 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
universe@236 229
universe@236 230 /* keep counter in registers */
universe@236 231 register size_t i, j;
universe@236 232
universe@236 233 /* fill prefix table */
universe@236 234 i = 0; j = 0;
universe@237 235 ptable_w(useheap, ptable, i, j);
universe@236 236 while (i < match.length) {
universe@236 237 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
universe@238 238 ptable_r(j, useheap, ptable, j-1);
universe@236 239 }
universe@236 240 i++; j++;
universe@237 241 ptable_w(useheap, ptable, i, j);
universe@236 242 }
universe@236 243
universe@236 244 /* search */
universe@236 245 i = 0; j = 1;
universe@236 246 while (i < string.length) {
universe@236 247 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
universe@237 248 ptable_r(j, useheap, ptable, j-1);
universe@236 249 }
universe@236 250 i++; j++;
universe@236 251 if (j-1 == match.length) {
universe@236 252 size_t start = i - match.length;
universe@236 253 result.ptr = string.ptr + start;
universe@236 254 result.length = string.length - start;
universe@236 255 break;
universe@214 256 }
universe@214 257 }
universe@236 258
universe@236 259 /* if prefix table was allocated on the heap, free it */
universe@236 260 if (ptable != s_prefix_table) {
universe@236 261 free(ptable);
universe@236 262 }
universe@214 263
universe@236 264 return result;
universe@214 265 }
universe@214 266
universe@237 267 #undef ptable_r
universe@237 268 #undef ptable_w
universe@237 269
universe@173 270 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
universe@125 271 return sstrsplit_a(ucx_default_allocator(), s, d, n);
universe@119 272 }
universe@119 273
universe@173 274 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
universe@119 275 if (s.length == 0 || d.length == 0) {
universe@119 276 *n = -1;
universe@39 277 return NULL;
universe@39 278 }
universe@231 279
universe@231 280 /* special cases: delimiter is at least as large as the string */
universe@231 281 if (d.length >= s.length) {
universe@231 282 /* exact match */
universe@231 283 if (sstrcmp(s, d) == 0) {
universe@231 284 *n = 0;
universe@231 285 return NULL;
universe@231 286 } else /* no match possible */ {
universe@231 287 *n = 1;
universe@231 288 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
olaf@270 289 if(result) {
olaf@270 290 *result = sstrdup_a(allocator, s);
olaf@270 291 } else {
olaf@270 292 *n = -2;
olaf@270 293 }
universe@231 294 return result;
universe@231 295 }
universe@231 296 }
universe@231 297
universe@173 298 ssize_t nmax = *n;
universe@235 299 size_t arrlen = 16;
olaf@270 300 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
universe@39 301
universe@119 302 if (result) {
universe@233 303 sstr_t curpos = s;
universe@233 304 ssize_t j = 1;
universe@233 305 while (1) {
universe@234 306 sstr_t match;
universe@234 307 /* optimize for one byte delimiters */
universe@234 308 if (d.length == 1) {
universe@234 309 match = curpos;
universe@234 310 for (size_t i = 0 ; i < curpos.length ; i++) {
universe@234 311 if (curpos.ptr[i] == *(d.ptr)) {
universe@234 312 match.ptr = curpos.ptr + i;
universe@234 313 break;
universe@234 314 }
universe@234 315 match.length--;
universe@234 316 }
universe@234 317 } else {
universe@234 318 match = sstrstr(curpos, d);
universe@234 319 }
universe@233 320 if (match.length > 0) {
universe@233 321 /* is this our last try? */
universe@233 322 if (nmax == 0 || j < nmax) {
universe@233 323 /* copy the current string to the array */
universe@233 324 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
universe@233 325 result[j-1] = sstrdup_a(allocator, item);
universe@233 326 size_t processed = item.length + d.length;
universe@233 327 curpos.ptr += processed;
universe@233 328 curpos.length -= processed;
universe@39 329
universe@233 330 /* allocate memory for the next string */
universe@233 331 j++;
universe@235 332 if (j > arrlen) {
universe@235 333 arrlen *= 2;
olaf@270 334 size_t reallocsz;
olaf@270 335 sstr_t* reallocated = NULL;
olaf@270 336 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
olaf@270 337 reallocated = (sstr_t*) alrealloc(
olaf@270 338 allocator, result, reallocsz);
olaf@270 339 }
universe@235 340 if (reallocated) {
universe@235 341 result = reallocated;
universe@235 342 } else {
universe@235 343 for (ssize_t i = 0 ; i < j-1 ; i++) {
universe@235 344 alfree(allocator, result[i].ptr);
universe@235 345 }
universe@235 346 alfree(allocator, result);
universe@235 347 *n = -2;
universe@235 348 return NULL;
universe@233 349 }
universe@233 350 }
universe@233 351 } else {
universe@233 352 /* nmax reached, copy the _full_ remaining string */
universe@233 353 result[j-1] = sstrdup_a(allocator, curpos);
universe@233 354 break;
universe@233 355 }
universe@173 356 } else {
universe@233 357 /* no more matches, copy last string */
universe@233 358 result[j-1] = sstrdup_a(allocator, curpos);
universe@173 359 break;
universe@173 360 }
universe@119 361 }
universe@233 362 *n = j;
universe@119 363 } else {
universe@119 364 *n = -2;
universe@39 365 }
universe@39 366
universe@39 367 return result;
universe@39 368 }
universe@39 369
olaf@68 370 int sstrcmp(sstr_t s1, sstr_t s2) {
universe@116 371 if (s1.length == s2.length) {
universe@116 372 return memcmp(s1.ptr, s2.ptr, s1.length);
universe@116 373 } else if (s1.length > s2.length) {
universe@116 374 return 1;
universe@116 375 } else {
universe@116 376 return -1;
universe@116 377 }
olaf@20 378 }
olaf@20 379
universe@149 380 int sstrcasecmp(sstr_t s1, sstr_t s2) {
universe@149 381 if (s1.length == s2.length) {
universe@149 382 #ifdef _WIN32
universe@149 383 return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@149 384 #else
universe@149 385 return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@149 386 #endif
universe@149 387 } else if (s1.length > s2.length) {
universe@149 388 return 1;
universe@149 389 } else {
universe@149 390 return -1;
universe@149 391 }
universe@149 392 }
universe@149 393
olaf@68 394 sstr_t sstrdup(sstr_t s) {
universe@125 395 return sstrdup_a(ucx_default_allocator(), s);
olaf@109 396 }
olaf@20 397
universe@125 398 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
olaf@109 399 sstr_t newstring;
universe@173 400 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
olaf@109 401 if (newstring.ptr) {
olaf@109 402 newstring.length = s.length;
olaf@109 403 newstring.ptr[newstring.length] = 0;
olaf@109 404
olaf@109 405 memcpy(newstring.ptr, s.ptr, s.length);
olaf@109 406 } else {
olaf@109 407 newstring.length = 0;
olaf@109 408 }
olaf@109 409
olaf@20 410 return newstring;
olaf@20 411 }
olaf@96 412
olaf@96 413 sstr_t sstrtrim(sstr_t string) {
olaf@96 414 sstr_t newstr = string;
universe@189 415
universe@189 416 while (newstr.length > 0 && isspace(*newstr.ptr)) {
universe@189 417 newstr.ptr++;
universe@189 418 newstr.length--;
universe@98 419 }
universe@189 420 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
universe@189 421 newstr.length--;
olaf@96 422 }
olaf@96 423
olaf@96 424 return newstr;
olaf@96 425 }
universe@146 426
universe@146 427 int sstrprefix(sstr_t string, sstr_t prefix) {
universe@146 428 if (string.length == 0) {
universe@146 429 return prefix.length == 0;
universe@146 430 }
universe@146 431 if (prefix.length == 0) {
universe@146 432 return 1;
universe@146 433 }
universe@146 434
universe@146 435 if (prefix.length > string.length) {
universe@146 436 return 0;
universe@146 437 } else {
universe@146 438 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@146 439 }
universe@146 440 }
universe@146 441
universe@146 442 int sstrsuffix(sstr_t string, sstr_t suffix) {
universe@146 443 if (string.length == 0) {
universe@146 444 return suffix.length == 0;
universe@146 445 }
universe@146 446 if (suffix.length == 0) {
universe@146 447 return 1;
universe@146 448 }
universe@146 449
universe@146 450 if (suffix.length > string.length) {
universe@146 451 return 0;
universe@146 452 } else {
universe@146 453 return memcmp(string.ptr+string.length-suffix.length,
universe@146 454 suffix.ptr, suffix.length) == 0;
universe@146 455 }
universe@146 456 }
universe@210 457
universe@210 458 sstr_t sstrlower(sstr_t string) {
universe@210 459 sstr_t ret = sstrdup(string);
universe@210 460 for (size_t i = 0; i < ret.length ; i++) {
universe@210 461 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 462 }
universe@210 463 return ret;
universe@210 464 }
universe@210 465
universe@210 466 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
universe@210 467 sstr_t ret = sstrdup_a(allocator, string);
universe@210 468 for (size_t i = 0; i < ret.length ; i++) {
universe@210 469 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 470 }
universe@210 471 return ret;
universe@210 472 }
universe@210 473
universe@210 474 sstr_t sstrupper(sstr_t string) {
universe@210 475 sstr_t ret = sstrdup(string);
universe@210 476 for (size_t i = 0; i < ret.length ; i++) {
universe@210 477 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 478 }
universe@210 479 return ret;
universe@210 480 }
universe@210 481
universe@210 482 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
universe@210 483 sstr_t ret = sstrdup_a(allocator, string);
universe@210 484 for (size_t i = 0; i < ret.length ; i++) {
universe@210 485 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 486 }
universe@210 487 return ret;
universe@210 488 }

mercurial