ucx/string.c

Mon, 20 Feb 2017 17:28:58 +0100

author
Mike Becker <universe@uap-core.de>
date
Mon, 20 Feb 2017 17:28:58 +0100
changeset 235
7cf1e41833a2
parent 234
7a63b4986b5b
child 236
ffc6d0910342
permissions
-rw-r--r--

reduces amount of realloc calls in sstrsplit

olaf@20 1 /*
universe@103 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20 3 *
universe@225 4 * Copyright 2016 Olaf Wintermann. All rights reserved.
universe@103 5 *
universe@103 6 * Redistribution and use in source and binary forms, with or without
universe@103 7 * modification, are permitted provided that the following conditions are met:
universe@103 8 *
universe@103 9 * 1. Redistributions of source code must retain the above copyright
universe@103 10 * notice, this list of conditions and the following disclaimer.
universe@103 11 *
universe@103 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@103 13 * notice, this list of conditions and the following disclaimer in the
universe@103 14 * documentation and/or other materials provided with the distribution.
universe@103 15 *
universe@103 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103 26 * POSSIBILITY OF SUCH DAMAGE.
olaf@20 27 */
olaf@20 28
olaf@20 29 #include <stdlib.h>
universe@69 30 #include <string.h>
olaf@20 31 #include <stdarg.h>
universe@189 32 #include <ctype.h>
olaf@20 33
olaf@20 34 #include "string.h"
olaf@109 35 #include "allocator.h"
olaf@20 36
universe@116 37 sstr_t sstr(char *cstring) {
olaf@20 38 sstr_t string;
universe@116 39 string.ptr = cstring;
universe@116 40 string.length = strlen(cstring);
olaf@20 41 return string;
olaf@20 42 }
olaf@20 43
universe@116 44 sstr_t sstrn(char *cstring, size_t length) {
olaf@20 45 sstr_t string;
universe@116 46 string.ptr = cstring;
universe@116 47 string.length = length;
olaf@20 48 return string;
olaf@20 49 }
olaf@20 50
olaf@68 51 size_t sstrnlen(size_t n, sstr_t s, ...) {
olaf@20 52 va_list ap;
olaf@20 53 size_t size = s.length;
olaf@20 54 va_start(ap, s);
olaf@20 55
universe@116 56 for (size_t i = 1 ; i < n ; i++) {
olaf@20 57 sstr_t str = va_arg(ap, sstr_t);
olaf@20 58 size += str.length;
olaf@20 59 }
universe@24 60 va_end(ap);
olaf@20 61
olaf@20 62 return size;
olaf@20 63 }
olaf@20 64
olaf@180 65 static sstr_t sstrvcat_a(
olaf@180 66 UcxAllocator *a,
olaf@180 67 size_t count,
olaf@180 68 sstr_t s1,
olaf@180 69 sstr_t s2,
olaf@180 70 va_list ap) {
olaf@180 71 sstr_t str;
olaf@180 72 str.ptr = NULL;
olaf@180 73 str.length = 0;
olaf@180 74 if(count < 2) {
olaf@180 75 return str;
olaf@180 76 }
olaf@180 77
universe@185 78 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
olaf@180 79 if(!strings) {
olaf@180 80 return str;
olaf@180 81 }
olaf@180 82
olaf@180 83 // get all args and overall length
olaf@180 84 strings[0] = s1;
olaf@180 85 strings[1] = s2;
olaf@180 86 size_t strlen = s1.length + s2.length;
olaf@180 87 for (size_t i=2;i<count;i++) {
olaf@180 88 sstr_t s = va_arg (ap, sstr_t);
olaf@180 89 strings[i] = s;
olaf@180 90 strlen += s.length;
olaf@180 91 }
olaf@180 92
olaf@180 93 // create new string
universe@185 94 str.ptr = (char*) almalloc(a, strlen + 1);
olaf@180 95 str.length = strlen;
olaf@180 96 if(!str.ptr) {
olaf@180 97 free(strings);
olaf@180 98 str.length = 0;
olaf@180 99 return str;
olaf@180 100 }
olaf@180 101
olaf@180 102 // concatenate strings
olaf@180 103 size_t pos = 0;
olaf@180 104 for (size_t i=0;i<count;i++) {
olaf@180 105 sstr_t s = strings[i];
olaf@180 106 memcpy(str.ptr + pos, s.ptr, s.length);
olaf@180 107 pos += s.length;
olaf@180 108 }
olaf@180 109
olaf@180 110 str.ptr[str.length] = '\0';
olaf@180 111
olaf@180 112 free(strings);
olaf@180 113
olaf@180 114 return str;
olaf@180 115 }
olaf@180 116
olaf@180 117 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 118 va_list ap;
olaf@180 119 va_start(ap, s2);
olaf@180 120 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
olaf@180 121 va_end(ap);
olaf@180 122 return s;
olaf@180 123 }
olaf@180 124
olaf@180 125 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180 126 va_list ap;
olaf@180 127 va_start(ap, s2);
olaf@180 128 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
olaf@180 129 va_end(ap);
olaf@180 130 return s;
olaf@180 131 }
olaf@180 132
olaf@68 133 sstr_t sstrsubs(sstr_t s, size_t start) {
olaf@20 134 return sstrsubsl (s, start, s.length-start);
olaf@20 135 }
olaf@20 136
olaf@68 137 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
olaf@20 138 sstr_t new_sstr;
olaf@104 139 if (start >= s.length) {
universe@173 140 new_sstr.ptr = NULL;
universe@173 141 new_sstr.length = 0;
universe@173 142 } else {
universe@173 143 if (length > s.length-start) {
universe@173 144 length = s.length-start;
universe@173 145 }
universe@173 146 new_sstr.ptr = &s.ptr[start];
universe@173 147 new_sstr.length = length;
olaf@20 148 }
olaf@20 149 return new_sstr;
olaf@20 150 }
olaf@20 151
olaf@108 152 sstr_t sstrchr(sstr_t s, int c) {
olaf@108 153 for(size_t i=0;i<s.length;i++) {
olaf@108 154 if(s.ptr[i] == c) {
olaf@108 155 return sstrsubs(s, i);
olaf@108 156 }
olaf@108 157 }
olaf@108 158 sstr_t n;
olaf@108 159 n.ptr = NULL;
olaf@108 160 n.length = 0;
olaf@108 161 return n;
olaf@108 162 }
olaf@108 163
universe@148 164 sstr_t sstrrchr(sstr_t s, int c) {
universe@148 165 if (s.length > 0) {
universe@152 166 for(size_t i=s.length;i>0;i--) {
universe@152 167 if(s.ptr[i-1] == c) {
universe@152 168 return sstrsubs(s, i-1);
universe@148 169 }
universe@148 170 }
universe@148 171 }
universe@148 172 sstr_t n;
universe@148 173 n.ptr = NULL;
universe@148 174 n.length = 0;
universe@148 175 return n;
universe@148 176 }
universe@148 177
universe@214 178 sstr_t sstrstr(sstr_t string, sstr_t match) {
universe@214 179 if (match.length == 0) {
universe@214 180 return string;
universe@214 181 }
universe@214 182
universe@214 183 for (size_t i = 0 ; i < string.length ; i++) {
universe@214 184 sstr_t substr = sstrsubs(string, i);
universe@214 185 if (sstrprefix(substr, match)) {
universe@214 186 return substr;
universe@214 187 }
universe@214 188 }
universe@214 189
universe@214 190 sstr_t emptystr;
universe@214 191 emptystr.length = 0;
universe@214 192 emptystr.ptr = NULL;
universe@214 193 return emptystr;
universe@214 194 }
universe@214 195
universe@173 196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
universe@125 197 return sstrsplit_a(ucx_default_allocator(), s, d, n);
universe@119 198 }
universe@119 199
universe@173 200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
universe@119 201 if (s.length == 0 || d.length == 0) {
universe@119 202 *n = -1;
universe@39 203 return NULL;
universe@39 204 }
universe@231 205
universe@231 206 /* special cases: delimiter is at least as large as the string */
universe@231 207 if (d.length >= s.length) {
universe@231 208 /* exact match */
universe@231 209 if (sstrcmp(s, d) == 0) {
universe@231 210 *n = 0;
universe@231 211 return NULL;
universe@231 212 } else /* no match possible */ {
universe@231 213 *n = 1;
universe@231 214 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
universe@233 215 *result = sstrdup_a(allocator, s);
universe@231 216 return result;
universe@231 217 }
universe@231 218 }
universe@231 219
universe@173 220 ssize_t nmax = *n;
universe@235 221 size_t arrlen = 16;
universe@235 222 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
universe@39 223
universe@119 224 if (result) {
universe@233 225 sstr_t curpos = s;
universe@233 226 ssize_t j = 1;
universe@233 227 while (1) {
universe@234 228 sstr_t match;
universe@234 229 /* optimize for one byte delimiters */
universe@234 230 if (d.length == 1) {
universe@234 231 match = curpos;
universe@234 232 for (size_t i = 0 ; i < curpos.length ; i++) {
universe@234 233 if (curpos.ptr[i] == *(d.ptr)) {
universe@234 234 match.ptr = curpos.ptr + i;
universe@234 235 break;
universe@234 236 }
universe@234 237 match.length--;
universe@234 238 }
universe@234 239 } else {
universe@234 240 match = sstrstr(curpos, d);
universe@234 241 }
universe@233 242 if (match.length > 0) {
universe@233 243 /* is this our last try? */
universe@233 244 if (nmax == 0 || j < nmax) {
universe@233 245 /* copy the current string to the array */
universe@233 246 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
universe@233 247 result[j-1] = sstrdup_a(allocator, item);
universe@233 248 size_t processed = item.length + d.length;
universe@233 249 curpos.ptr += processed;
universe@233 250 curpos.length -= processed;
universe@39 251
universe@233 252 /* allocate memory for the next string */
universe@233 253 j++;
universe@235 254 if (j > arrlen) {
universe@235 255 arrlen *= 2;
universe@235 256 sstr_t* reallocated = (sstr_t*) alrealloc(
universe@235 257 allocator, result, arrlen*sizeof(sstr_t));
universe@235 258 if (reallocated) {
universe@235 259 result = reallocated;
universe@235 260 } else {
universe@235 261 for (ssize_t i = 0 ; i < j-1 ; i++) {
universe@235 262 alfree(allocator, result[i].ptr);
universe@235 263 }
universe@235 264 alfree(allocator, result);
universe@235 265 *n = -2;
universe@235 266 return NULL;
universe@233 267 }
universe@233 268 }
universe@233 269 } else {
universe@233 270 /* nmax reached, copy the _full_ remaining string */
universe@233 271 result[j-1] = sstrdup_a(allocator, curpos);
universe@233 272 break;
universe@233 273 }
universe@173 274 } else {
universe@233 275 /* no more matches, copy last string */
universe@233 276 result[j-1] = sstrdup_a(allocator, curpos);
universe@173 277 break;
universe@173 278 }
universe@119 279 }
universe@233 280 *n = j;
universe@119 281 } else {
universe@119 282 *n = -2;
universe@39 283 }
universe@39 284
universe@39 285 return result;
universe@39 286 }
universe@39 287
olaf@68 288 int sstrcmp(sstr_t s1, sstr_t s2) {
universe@116 289 if (s1.length == s2.length) {
universe@116 290 return memcmp(s1.ptr, s2.ptr, s1.length);
universe@116 291 } else if (s1.length > s2.length) {
universe@116 292 return 1;
universe@116 293 } else {
universe@116 294 return -1;
universe@116 295 }
olaf@20 296 }
olaf@20 297
universe@149 298 int sstrcasecmp(sstr_t s1, sstr_t s2) {
universe@149 299 if (s1.length == s2.length) {
universe@149 300 #ifdef _WIN32
universe@149 301 return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@149 302 #else
universe@149 303 return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@149 304 #endif
universe@149 305 } else if (s1.length > s2.length) {
universe@149 306 return 1;
universe@149 307 } else {
universe@149 308 return -1;
universe@149 309 }
universe@149 310 }
universe@149 311
olaf@68 312 sstr_t sstrdup(sstr_t s) {
universe@125 313 return sstrdup_a(ucx_default_allocator(), s);
olaf@109 314 }
olaf@20 315
universe@125 316 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
olaf@109 317 sstr_t newstring;
universe@173 318 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
olaf@109 319 if (newstring.ptr) {
olaf@109 320 newstring.length = s.length;
olaf@109 321 newstring.ptr[newstring.length] = 0;
olaf@109 322
olaf@109 323 memcpy(newstring.ptr, s.ptr, s.length);
olaf@109 324 } else {
olaf@109 325 newstring.length = 0;
olaf@109 326 }
olaf@109 327
olaf@20 328 return newstring;
olaf@20 329 }
olaf@96 330
olaf@96 331 sstr_t sstrtrim(sstr_t string) {
olaf@96 332 sstr_t newstr = string;
universe@189 333
universe@189 334 while (newstr.length > 0 && isspace(*newstr.ptr)) {
universe@189 335 newstr.ptr++;
universe@189 336 newstr.length--;
universe@98 337 }
universe@189 338 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
universe@189 339 newstr.length--;
olaf@96 340 }
olaf@96 341
olaf@96 342 return newstr;
olaf@96 343 }
universe@146 344
universe@146 345 int sstrprefix(sstr_t string, sstr_t prefix) {
universe@146 346 if (string.length == 0) {
universe@146 347 return prefix.length == 0;
universe@146 348 }
universe@146 349 if (prefix.length == 0) {
universe@146 350 return 1;
universe@146 351 }
universe@146 352
universe@146 353 if (prefix.length > string.length) {
universe@146 354 return 0;
universe@146 355 } else {
universe@146 356 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@146 357 }
universe@146 358 }
universe@146 359
universe@146 360 int sstrsuffix(sstr_t string, sstr_t suffix) {
universe@146 361 if (string.length == 0) {
universe@146 362 return suffix.length == 0;
universe@146 363 }
universe@146 364 if (suffix.length == 0) {
universe@146 365 return 1;
universe@146 366 }
universe@146 367
universe@146 368 if (suffix.length > string.length) {
universe@146 369 return 0;
universe@146 370 } else {
universe@146 371 return memcmp(string.ptr+string.length-suffix.length,
universe@146 372 suffix.ptr, suffix.length) == 0;
universe@146 373 }
universe@146 374 }
universe@210 375
universe@210 376 sstr_t sstrlower(sstr_t string) {
universe@210 377 sstr_t ret = sstrdup(string);
universe@210 378 for (size_t i = 0; i < ret.length ; i++) {
universe@210 379 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 380 }
universe@210 381 return ret;
universe@210 382 }
universe@210 383
universe@210 384 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
universe@210 385 sstr_t ret = sstrdup_a(allocator, string);
universe@210 386 for (size_t i = 0; i < ret.length ; i++) {
universe@210 387 ret.ptr[i] = tolower(ret.ptr[i]);
universe@210 388 }
universe@210 389 return ret;
universe@210 390 }
universe@210 391
universe@210 392 sstr_t sstrupper(sstr_t string) {
universe@210 393 sstr_t ret = sstrdup(string);
universe@210 394 for (size_t i = 0; i < ret.length ; i++) {
universe@210 395 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 396 }
universe@210 397 return ret;
universe@210 398 }
universe@210 399
universe@210 400 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
universe@210 401 sstr_t ret = sstrdup_a(allocator, string);
universe@210 402 for (size_t i = 0; i < ret.length ; i++) {
universe@210 403 ret.ptr[i] = toupper(ret.ptr[i]);
universe@210 404 }
universe@210 405 return ret;
universe@210 406 }

mercurial