Mon, 20 Feb 2017 17:28:58 +0100
reduces amount of realloc calls in sstrsplit
olaf@20 | 1 | /* |
universe@103 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
olaf@20 | 3 | * |
universe@225 | 4 | * Copyright 2016 Olaf Wintermann. All rights reserved. |
universe@103 | 5 | * |
universe@103 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@103 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@103 | 8 | * |
universe@103 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@103 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@103 | 11 | * |
universe@103 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@103 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@103 | 14 | * documentation and/or other materials provided with the distribution. |
universe@103 | 15 | * |
universe@103 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@103 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@103 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@103 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@103 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@103 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@103 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@103 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@103 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@103 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@103 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
olaf@20 | 27 | */ |
olaf@20 | 28 | |
olaf@20 | 29 | #include <stdlib.h> |
universe@69 | 30 | #include <string.h> |
olaf@20 | 31 | #include <stdarg.h> |
universe@189 | 32 | #include <ctype.h> |
olaf@20 | 33 | |
olaf@20 | 34 | #include "string.h" |
olaf@109 | 35 | #include "allocator.h" |
olaf@20 | 36 | |
universe@116 | 37 | sstr_t sstr(char *cstring) { |
olaf@20 | 38 | sstr_t string; |
universe@116 | 39 | string.ptr = cstring; |
universe@116 | 40 | string.length = strlen(cstring); |
olaf@20 | 41 | return string; |
olaf@20 | 42 | } |
olaf@20 | 43 | |
universe@116 | 44 | sstr_t sstrn(char *cstring, size_t length) { |
olaf@20 | 45 | sstr_t string; |
universe@116 | 46 | string.ptr = cstring; |
universe@116 | 47 | string.length = length; |
olaf@20 | 48 | return string; |
olaf@20 | 49 | } |
olaf@20 | 50 | |
olaf@68 | 51 | size_t sstrnlen(size_t n, sstr_t s, ...) { |
olaf@20 | 52 | va_list ap; |
olaf@20 | 53 | size_t size = s.length; |
olaf@20 | 54 | va_start(ap, s); |
olaf@20 | 55 | |
universe@116 | 56 | for (size_t i = 1 ; i < n ; i++) { |
olaf@20 | 57 | sstr_t str = va_arg(ap, sstr_t); |
olaf@20 | 58 | size += str.length; |
olaf@20 | 59 | } |
universe@24 | 60 | va_end(ap); |
olaf@20 | 61 | |
olaf@20 | 62 | return size; |
olaf@20 | 63 | } |
olaf@20 | 64 | |
olaf@180 | 65 | static sstr_t sstrvcat_a( |
olaf@180 | 66 | UcxAllocator *a, |
olaf@180 | 67 | size_t count, |
olaf@180 | 68 | sstr_t s1, |
olaf@180 | 69 | sstr_t s2, |
olaf@180 | 70 | va_list ap) { |
olaf@180 | 71 | sstr_t str; |
olaf@180 | 72 | str.ptr = NULL; |
olaf@180 | 73 | str.length = 0; |
olaf@180 | 74 | if(count < 2) { |
olaf@180 | 75 | return str; |
olaf@180 | 76 | } |
olaf@180 | 77 | |
universe@185 | 78 | sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); |
olaf@180 | 79 | if(!strings) { |
olaf@180 | 80 | return str; |
olaf@180 | 81 | } |
olaf@180 | 82 | |
olaf@180 | 83 | // get all args and overall length |
olaf@180 | 84 | strings[0] = s1; |
olaf@180 | 85 | strings[1] = s2; |
olaf@180 | 86 | size_t strlen = s1.length + s2.length; |
olaf@180 | 87 | for (size_t i=2;i<count;i++) { |
olaf@180 | 88 | sstr_t s = va_arg (ap, sstr_t); |
olaf@180 | 89 | strings[i] = s; |
olaf@180 | 90 | strlen += s.length; |
olaf@180 | 91 | } |
olaf@180 | 92 | |
olaf@180 | 93 | // create new string |
universe@185 | 94 | str.ptr = (char*) almalloc(a, strlen + 1); |
olaf@180 | 95 | str.length = strlen; |
olaf@180 | 96 | if(!str.ptr) { |
olaf@180 | 97 | free(strings); |
olaf@180 | 98 | str.length = 0; |
olaf@180 | 99 | return str; |
olaf@180 | 100 | } |
olaf@180 | 101 | |
olaf@180 | 102 | // concatenate strings |
olaf@180 | 103 | size_t pos = 0; |
olaf@180 | 104 | for (size_t i=0;i<count;i++) { |
olaf@180 | 105 | sstr_t s = strings[i]; |
olaf@180 | 106 | memcpy(str.ptr + pos, s.ptr, s.length); |
olaf@180 | 107 | pos += s.length; |
olaf@180 | 108 | } |
olaf@180 | 109 | |
olaf@180 | 110 | str.ptr[str.length] = '\0'; |
olaf@180 | 111 | |
olaf@180 | 112 | free(strings); |
olaf@180 | 113 | |
olaf@180 | 114 | return str; |
olaf@180 | 115 | } |
olaf@180 | 116 | |
olaf@180 | 117 | sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { |
olaf@180 | 118 | va_list ap; |
olaf@180 | 119 | va_start(ap, s2); |
olaf@180 | 120 | sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); |
olaf@180 | 121 | va_end(ap); |
olaf@180 | 122 | return s; |
olaf@180 | 123 | } |
olaf@180 | 124 | |
olaf@180 | 125 | sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { |
olaf@180 | 126 | va_list ap; |
olaf@180 | 127 | va_start(ap, s2); |
olaf@180 | 128 | sstr_t s = sstrvcat_a(a, count, s1, s2, ap); |
olaf@180 | 129 | va_end(ap); |
olaf@180 | 130 | return s; |
olaf@180 | 131 | } |
olaf@180 | 132 | |
olaf@68 | 133 | sstr_t sstrsubs(sstr_t s, size_t start) { |
olaf@20 | 134 | return sstrsubsl (s, start, s.length-start); |
olaf@20 | 135 | } |
olaf@20 | 136 | |
olaf@68 | 137 | sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
olaf@20 | 138 | sstr_t new_sstr; |
olaf@104 | 139 | if (start >= s.length) { |
universe@173 | 140 | new_sstr.ptr = NULL; |
universe@173 | 141 | new_sstr.length = 0; |
universe@173 | 142 | } else { |
universe@173 | 143 | if (length > s.length-start) { |
universe@173 | 144 | length = s.length-start; |
universe@173 | 145 | } |
universe@173 | 146 | new_sstr.ptr = &s.ptr[start]; |
universe@173 | 147 | new_sstr.length = length; |
olaf@20 | 148 | } |
olaf@20 | 149 | return new_sstr; |
olaf@20 | 150 | } |
olaf@20 | 151 | |
olaf@108 | 152 | sstr_t sstrchr(sstr_t s, int c) { |
olaf@108 | 153 | for(size_t i=0;i<s.length;i++) { |
olaf@108 | 154 | if(s.ptr[i] == c) { |
olaf@108 | 155 | return sstrsubs(s, i); |
olaf@108 | 156 | } |
olaf@108 | 157 | } |
olaf@108 | 158 | sstr_t n; |
olaf@108 | 159 | n.ptr = NULL; |
olaf@108 | 160 | n.length = 0; |
olaf@108 | 161 | return n; |
olaf@108 | 162 | } |
olaf@108 | 163 | |
universe@148 | 164 | sstr_t sstrrchr(sstr_t s, int c) { |
universe@148 | 165 | if (s.length > 0) { |
universe@152 | 166 | for(size_t i=s.length;i>0;i--) { |
universe@152 | 167 | if(s.ptr[i-1] == c) { |
universe@152 | 168 | return sstrsubs(s, i-1); |
universe@148 | 169 | } |
universe@148 | 170 | } |
universe@148 | 171 | } |
universe@148 | 172 | sstr_t n; |
universe@148 | 173 | n.ptr = NULL; |
universe@148 | 174 | n.length = 0; |
universe@148 | 175 | return n; |
universe@148 | 176 | } |
universe@148 | 177 | |
universe@214 | 178 | sstr_t sstrstr(sstr_t string, sstr_t match) { |
universe@214 | 179 | if (match.length == 0) { |
universe@214 | 180 | return string; |
universe@214 | 181 | } |
universe@214 | 182 | |
universe@214 | 183 | for (size_t i = 0 ; i < string.length ; i++) { |
universe@214 | 184 | sstr_t substr = sstrsubs(string, i); |
universe@214 | 185 | if (sstrprefix(substr, match)) { |
universe@214 | 186 | return substr; |
universe@214 | 187 | } |
universe@214 | 188 | } |
universe@214 | 189 | |
universe@214 | 190 | sstr_t emptystr; |
universe@214 | 191 | emptystr.length = 0; |
universe@214 | 192 | emptystr.ptr = NULL; |
universe@214 | 193 | return emptystr; |
universe@214 | 194 | } |
universe@214 | 195 | |
universe@173 | 196 | sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
universe@125 | 197 | return sstrsplit_a(ucx_default_allocator(), s, d, n); |
universe@119 | 198 | } |
universe@119 | 199 | |
universe@173 | 200 | sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
universe@119 | 201 | if (s.length == 0 || d.length == 0) { |
universe@119 | 202 | *n = -1; |
universe@39 | 203 | return NULL; |
universe@39 | 204 | } |
universe@231 | 205 | |
universe@231 | 206 | /* special cases: delimiter is at least as large as the string */ |
universe@231 | 207 | if (d.length >= s.length) { |
universe@231 | 208 | /* exact match */ |
universe@231 | 209 | if (sstrcmp(s, d) == 0) { |
universe@231 | 210 | *n = 0; |
universe@231 | 211 | return NULL; |
universe@231 | 212 | } else /* no match possible */ { |
universe@231 | 213 | *n = 1; |
universe@231 | 214 | sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
universe@233 | 215 | *result = sstrdup_a(allocator, s); |
universe@231 | 216 | return result; |
universe@231 | 217 | } |
universe@231 | 218 | } |
universe@231 | 219 | |
universe@173 | 220 | ssize_t nmax = *n; |
universe@235 | 221 | size_t arrlen = 16; |
universe@235 | 222 | sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); |
universe@39 | 223 | |
universe@119 | 224 | if (result) { |
universe@233 | 225 | sstr_t curpos = s; |
universe@233 | 226 | ssize_t j = 1; |
universe@233 | 227 | while (1) { |
universe@234 | 228 | sstr_t match; |
universe@234 | 229 | /* optimize for one byte delimiters */ |
universe@234 | 230 | if (d.length == 1) { |
universe@234 | 231 | match = curpos; |
universe@234 | 232 | for (size_t i = 0 ; i < curpos.length ; i++) { |
universe@234 | 233 | if (curpos.ptr[i] == *(d.ptr)) { |
universe@234 | 234 | match.ptr = curpos.ptr + i; |
universe@234 | 235 | break; |
universe@234 | 236 | } |
universe@234 | 237 | match.length--; |
universe@234 | 238 | } |
universe@234 | 239 | } else { |
universe@234 | 240 | match = sstrstr(curpos, d); |
universe@234 | 241 | } |
universe@233 | 242 | if (match.length > 0) { |
universe@233 | 243 | /* is this our last try? */ |
universe@233 | 244 | if (nmax == 0 || j < nmax) { |
universe@233 | 245 | /* copy the current string to the array */ |
universe@233 | 246 | sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); |
universe@233 | 247 | result[j-1] = sstrdup_a(allocator, item); |
universe@233 | 248 | size_t processed = item.length + d.length; |
universe@233 | 249 | curpos.ptr += processed; |
universe@233 | 250 | curpos.length -= processed; |
universe@39 | 251 | |
universe@233 | 252 | /* allocate memory for the next string */ |
universe@233 | 253 | j++; |
universe@235 | 254 | if (j > arrlen) { |
universe@235 | 255 | arrlen *= 2; |
universe@235 | 256 | sstr_t* reallocated = (sstr_t*) alrealloc( |
universe@235 | 257 | allocator, result, arrlen*sizeof(sstr_t)); |
universe@235 | 258 | if (reallocated) { |
universe@235 | 259 | result = reallocated; |
universe@235 | 260 | } else { |
universe@235 | 261 | for (ssize_t i = 0 ; i < j-1 ; i++) { |
universe@235 | 262 | alfree(allocator, result[i].ptr); |
universe@235 | 263 | } |
universe@235 | 264 | alfree(allocator, result); |
universe@235 | 265 | *n = -2; |
universe@235 | 266 | return NULL; |
universe@233 | 267 | } |
universe@233 | 268 | } |
universe@233 | 269 | } else { |
universe@233 | 270 | /* nmax reached, copy the _full_ remaining string */ |
universe@233 | 271 | result[j-1] = sstrdup_a(allocator, curpos); |
universe@233 | 272 | break; |
universe@233 | 273 | } |
universe@173 | 274 | } else { |
universe@233 | 275 | /* no more matches, copy last string */ |
universe@233 | 276 | result[j-1] = sstrdup_a(allocator, curpos); |
universe@173 | 277 | break; |
universe@173 | 278 | } |
universe@119 | 279 | } |
universe@233 | 280 | *n = j; |
universe@119 | 281 | } else { |
universe@119 | 282 | *n = -2; |
universe@39 | 283 | } |
universe@39 | 284 | |
universe@39 | 285 | return result; |
universe@39 | 286 | } |
universe@39 | 287 | |
olaf@68 | 288 | int sstrcmp(sstr_t s1, sstr_t s2) { |
universe@116 | 289 | if (s1.length == s2.length) { |
universe@116 | 290 | return memcmp(s1.ptr, s2.ptr, s1.length); |
universe@116 | 291 | } else if (s1.length > s2.length) { |
universe@116 | 292 | return 1; |
universe@116 | 293 | } else { |
universe@116 | 294 | return -1; |
universe@116 | 295 | } |
olaf@20 | 296 | } |
olaf@20 | 297 | |
universe@149 | 298 | int sstrcasecmp(sstr_t s1, sstr_t s2) { |
universe@149 | 299 | if (s1.length == s2.length) { |
universe@149 | 300 | #ifdef _WIN32 |
universe@149 | 301 | return _strnicmp(s1.ptr, s2.ptr, s1.length); |
universe@149 | 302 | #else |
universe@149 | 303 | return strncasecmp(s1.ptr, s2.ptr, s1.length); |
universe@149 | 304 | #endif |
universe@149 | 305 | } else if (s1.length > s2.length) { |
universe@149 | 306 | return 1; |
universe@149 | 307 | } else { |
universe@149 | 308 | return -1; |
universe@149 | 309 | } |
universe@149 | 310 | } |
universe@149 | 311 | |
olaf@68 | 312 | sstr_t sstrdup(sstr_t s) { |
universe@125 | 313 | return sstrdup_a(ucx_default_allocator(), s); |
olaf@109 | 314 | } |
olaf@20 | 315 | |
universe@125 | 316 | sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { |
olaf@109 | 317 | sstr_t newstring; |
universe@173 | 318 | newstring.ptr = (char*)almalloc(allocator, s.length + 1); |
olaf@109 | 319 | if (newstring.ptr) { |
olaf@109 | 320 | newstring.length = s.length; |
olaf@109 | 321 | newstring.ptr[newstring.length] = 0; |
olaf@109 | 322 | |
olaf@109 | 323 | memcpy(newstring.ptr, s.ptr, s.length); |
olaf@109 | 324 | } else { |
olaf@109 | 325 | newstring.length = 0; |
olaf@109 | 326 | } |
olaf@109 | 327 | |
olaf@20 | 328 | return newstring; |
olaf@20 | 329 | } |
olaf@96 | 330 | |
olaf@96 | 331 | sstr_t sstrtrim(sstr_t string) { |
olaf@96 | 332 | sstr_t newstr = string; |
universe@189 | 333 | |
universe@189 | 334 | while (newstr.length > 0 && isspace(*newstr.ptr)) { |
universe@189 | 335 | newstr.ptr++; |
universe@189 | 336 | newstr.length--; |
universe@98 | 337 | } |
universe@189 | 338 | while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { |
universe@189 | 339 | newstr.length--; |
olaf@96 | 340 | } |
olaf@96 | 341 | |
olaf@96 | 342 | return newstr; |
olaf@96 | 343 | } |
universe@146 | 344 | |
universe@146 | 345 | int sstrprefix(sstr_t string, sstr_t prefix) { |
universe@146 | 346 | if (string.length == 0) { |
universe@146 | 347 | return prefix.length == 0; |
universe@146 | 348 | } |
universe@146 | 349 | if (prefix.length == 0) { |
universe@146 | 350 | return 1; |
universe@146 | 351 | } |
universe@146 | 352 | |
universe@146 | 353 | if (prefix.length > string.length) { |
universe@146 | 354 | return 0; |
universe@146 | 355 | } else { |
universe@146 | 356 | return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@146 | 357 | } |
universe@146 | 358 | } |
universe@146 | 359 | |
universe@146 | 360 | int sstrsuffix(sstr_t string, sstr_t suffix) { |
universe@146 | 361 | if (string.length == 0) { |
universe@146 | 362 | return suffix.length == 0; |
universe@146 | 363 | } |
universe@146 | 364 | if (suffix.length == 0) { |
universe@146 | 365 | return 1; |
universe@146 | 366 | } |
universe@146 | 367 | |
universe@146 | 368 | if (suffix.length > string.length) { |
universe@146 | 369 | return 0; |
universe@146 | 370 | } else { |
universe@146 | 371 | return memcmp(string.ptr+string.length-suffix.length, |
universe@146 | 372 | suffix.ptr, suffix.length) == 0; |
universe@146 | 373 | } |
universe@146 | 374 | } |
universe@210 | 375 | |
universe@210 | 376 | sstr_t sstrlower(sstr_t string) { |
universe@210 | 377 | sstr_t ret = sstrdup(string); |
universe@210 | 378 | for (size_t i = 0; i < ret.length ; i++) { |
universe@210 | 379 | ret.ptr[i] = tolower(ret.ptr[i]); |
universe@210 | 380 | } |
universe@210 | 381 | return ret; |
universe@210 | 382 | } |
universe@210 | 383 | |
universe@210 | 384 | sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { |
universe@210 | 385 | sstr_t ret = sstrdup_a(allocator, string); |
universe@210 | 386 | for (size_t i = 0; i < ret.length ; i++) { |
universe@210 | 387 | ret.ptr[i] = tolower(ret.ptr[i]); |
universe@210 | 388 | } |
universe@210 | 389 | return ret; |
universe@210 | 390 | } |
universe@210 | 391 | |
universe@210 | 392 | sstr_t sstrupper(sstr_t string) { |
universe@210 | 393 | sstr_t ret = sstrdup(string); |
universe@210 | 394 | for (size_t i = 0; i < ret.length ; i++) { |
universe@210 | 395 | ret.ptr[i] = toupper(ret.ptr[i]); |
universe@210 | 396 | } |
universe@210 | 397 | return ret; |
universe@210 | 398 | } |
universe@210 | 399 | |
universe@210 | 400 | sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { |
universe@210 | 401 | sstr_t ret = sstrdup_a(allocator, string); |
universe@210 | 402 | for (size_t i = 0; i < ret.length ; i++) { |
universe@210 | 403 | ret.ptr[i] = toupper(ret.ptr[i]); |
universe@210 | 404 | } |
universe@210 | 405 | return ret; |
universe@210 | 406 | } |