Sat, 03 Sep 2022 14:56:07 +0200
implement more string functions
universe@576 | 1 | /* |
universe@576 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@576 | 3 | * |
universe@576 | 4 | * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved. |
universe@576 | 5 | * |
universe@576 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@576 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@576 | 8 | * |
universe@576 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@576 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@576 | 11 | * |
universe@576 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@576 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@576 | 14 | * documentation and/or other materials provided with the distribution. |
universe@576 | 15 | * |
universe@576 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@576 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@576 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@576 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@576 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@576 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@576 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@576 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@576 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@576 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@576 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@576 | 27 | */ |
universe@576 | 28 | |
universe@576 | 29 | #include "cx/string.h" |
universe@579 | 30 | #include "cx/utils.h" |
universe@579 | 31 | |
universe@579 | 32 | #include <string.h> |
universe@579 | 33 | #include <stdarg.h> |
universe@579 | 34 | #include <stdint.h> |
universe@581 | 35 | #include <ctype.h> |
universe@581 | 36 | |
universe@581 | 37 | #ifndef _WIN32 |
universe@581 | 38 | |
universe@581 | 39 | #include <strings.h> /* for strncasecmp() */ |
universe@581 | 40 | |
universe@581 | 41 | #endif /* _WIN32 */ |
universe@579 | 42 | |
universe@579 | 43 | cxmutstr cx_mutstr(char *cstring) { |
universe@579 | 44 | return (cxmutstr) {cstring, strlen(cstring)}; |
universe@579 | 45 | } |
universe@579 | 46 | |
universe@579 | 47 | cxmutstr cx_mutstrn( |
universe@579 | 48 | char *cstring, |
universe@579 | 49 | size_t length |
universe@579 | 50 | ) { |
universe@579 | 51 | return (cxmutstr) {cstring, length}; |
universe@579 | 52 | } |
universe@579 | 53 | |
universe@579 | 54 | cxstring cx_str(const char *cstring) { |
universe@579 | 55 | return (cxstring) {cstring, strlen(cstring)}; |
universe@579 | 56 | } |
universe@579 | 57 | |
universe@579 | 58 | cxstring cx_strn( |
universe@579 | 59 | const char *cstring, |
universe@579 | 60 | size_t length |
universe@579 | 61 | ) { |
universe@579 | 62 | return (cxstring) {cstring, length}; |
universe@579 | 63 | } |
universe@579 | 64 | |
universe@579 | 65 | cxstring cx_strcast(cxmutstr str) { |
universe@579 | 66 | return (cxstring) {str.ptr, str.length}; |
universe@579 | 67 | } |
universe@579 | 68 | |
universe@579 | 69 | void cx_strfree(cxmutstr *str) { |
universe@579 | 70 | free(str->ptr); |
universe@579 | 71 | str->ptr = NULL; |
universe@579 | 72 | str->length = 0; |
universe@579 | 73 | } |
universe@579 | 74 | |
universe@579 | 75 | size_t cx_strlen( |
universe@579 | 76 | size_t count, |
universe@579 | 77 | ... |
universe@579 | 78 | ) { |
universe@579 | 79 | if (count == 0) return 0; |
universe@579 | 80 | |
universe@579 | 81 | va_list ap; |
universe@579 | 82 | va_start(ap, count); |
universe@579 | 83 | size_t size = 0; |
universe@579 | 84 | cx_for_n(i, count) { |
universe@579 | 85 | cxstring str = va_arg(ap, cxstring); |
universe@579 | 86 | size += str.length; |
universe@579 | 87 | } |
universe@579 | 88 | va_end(ap); |
universe@579 | 89 | |
universe@579 | 90 | return size; |
universe@579 | 91 | } |
universe@579 | 92 | |
universe@579 | 93 | cxmutstr cx_strcat_a( |
universe@579 | 94 | CxAllocator *alloc, |
universe@579 | 95 | size_t count, |
universe@579 | 96 | ... |
universe@579 | 97 | ) { |
universe@579 | 98 | cxstring *strings = calloc(count, sizeof(cxstring)); |
universe@579 | 99 | if (!strings) abort(); |
universe@579 | 100 | |
universe@579 | 101 | va_list ap; |
universe@579 | 102 | va_start(ap, count); |
universe@579 | 103 | |
universe@579 | 104 | // get all args and overall length |
universe@579 | 105 | size_t slen = 0; |
universe@579 | 106 | cx_for_n(i, count) { |
universe@579 | 107 | cxstring s = va_arg (ap, cxstring); |
universe@579 | 108 | strings[i] = s; |
universe@579 | 109 | slen += s.length; |
universe@579 | 110 | } |
universe@579 | 111 | |
universe@579 | 112 | // create new string |
universe@579 | 113 | cxmutstr result; |
universe@579 | 114 | result.ptr = cxMalloc(alloc, slen + 1); |
universe@579 | 115 | result.length = slen; |
universe@579 | 116 | if (result.ptr == NULL) abort(); |
universe@579 | 117 | |
universe@579 | 118 | // concatenate strings |
universe@579 | 119 | size_t pos = 0; |
universe@579 | 120 | cx_for_n(i, count) { |
universe@579 | 121 | cxstring s = strings[i]; |
universe@579 | 122 | memcpy(result.ptr + pos, s.ptr, s.length); |
universe@579 | 123 | pos += s.length; |
universe@579 | 124 | } |
universe@579 | 125 | |
universe@579 | 126 | // terminate string |
universe@579 | 127 | result.ptr[result.length] = '\0'; |
universe@579 | 128 | |
universe@579 | 129 | // free temporary array |
universe@579 | 130 | free(strings); |
universe@579 | 131 | |
universe@579 | 132 | return result; |
universe@579 | 133 | } |
universe@579 | 134 | |
universe@580 | 135 | cxstring cx_strsubs( |
universe@580 | 136 | cxstring string, |
universe@580 | 137 | size_t start |
universe@580 | 138 | ) { |
universe@580 | 139 | return cx_strsubsl(string, start, string.length - start); |
universe@580 | 140 | } |
universe@579 | 141 | |
universe@580 | 142 | cxmutstr cx_strsubs_m( |
universe@580 | 143 | cxmutstr string, |
universe@580 | 144 | size_t start |
universe@580 | 145 | ) { |
universe@580 | 146 | return cx_strsubsl_m(string, start, string.length - start); |
universe@580 | 147 | } |
universe@579 | 148 | |
universe@580 | 149 | cxstring cx_strsubsl( |
universe@580 | 150 | cxstring string, |
universe@580 | 151 | size_t start, |
universe@580 | 152 | size_t length |
universe@580 | 153 | ) { |
universe@580 | 154 | if (start > string.length) { |
universe@580 | 155 | return (cxstring) {NULL, 0}; |
universe@580 | 156 | } |
universe@580 | 157 | |
universe@580 | 158 | size_t rem_len = string.length - start; |
universe@580 | 159 | if (length > rem_len) { |
universe@580 | 160 | length = rem_len; |
universe@580 | 161 | } |
universe@580 | 162 | |
universe@580 | 163 | return (cxstring) {string.ptr + start, length}; |
universe@580 | 164 | } |
universe@580 | 165 | |
universe@580 | 166 | cxmutstr cx_strsubsl_m( |
universe@580 | 167 | cxmutstr string, |
universe@580 | 168 | size_t start, |
universe@580 | 169 | size_t length |
universe@580 | 170 | ) { |
universe@580 | 171 | cxstring result = cx_strsubsl(cx_strcast(string), start, length); |
universe@580 | 172 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 173 | } |
universe@580 | 174 | |
universe@580 | 175 | cxstring cx_strchr( |
universe@580 | 176 | cxstring string, |
universe@580 | 177 | int chr |
universe@580 | 178 | ) { |
universe@580 | 179 | chr = 0xFF & chr; |
universe@580 | 180 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 181 | cx_for_n(i, string.length) { |
universe@580 | 182 | if (string.ptr[i] == chr) { |
universe@580 | 183 | return cx_strsubs(string, i); |
universe@580 | 184 | } |
universe@580 | 185 | } |
universe@580 | 186 | return (cxstring) {NULL, 0}; |
universe@580 | 187 | } |
universe@580 | 188 | |
universe@580 | 189 | cxmutstr cx_strchr_m( |
universe@580 | 190 | cxmutstr string, |
universe@580 | 191 | int chr |
universe@580 | 192 | ) { |
universe@580 | 193 | cxstring result = cx_strchr(cx_strcast(string), chr); |
universe@580 | 194 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 195 | } |
universe@580 | 196 | |
universe@580 | 197 | cxstring cx_strrchr( |
universe@580 | 198 | cxstring string, |
universe@580 | 199 | int chr |
universe@580 | 200 | ) { |
universe@580 | 201 | chr = 0xFF & chr; |
universe@580 | 202 | size_t i = string.length; |
universe@580 | 203 | while (i > 0) { |
universe@580 | 204 | i--; |
universe@580 | 205 | // TODO: improve by comparing multiple bytes at once |
universe@580 | 206 | if (string.ptr[i] == chr) { |
universe@580 | 207 | return cx_strsubs(string, i); |
universe@580 | 208 | } |
universe@580 | 209 | } |
universe@580 | 210 | return (cxstring) {NULL, 0}; |
universe@580 | 211 | } |
universe@580 | 212 | |
universe@580 | 213 | cxmutstr cx_strrchr_m( |
universe@580 | 214 | cxmutstr string, |
universe@580 | 215 | int chr |
universe@580 | 216 | ) { |
universe@580 | 217 | cxstring result = cx_strrchr(cx_strcast(string), chr); |
universe@580 | 218 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 219 | } |
universe@580 | 220 | |
universe@580 | 221 | #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
universe@580 | 222 | ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
universe@580 | 223 | |
universe@580 | 224 | #define ptable_w(useheap, ptable, index, src) do {\ |
universe@580 | 225 | if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
universe@580 | 226 | else ((size_t*)ptable)[index] = src;\ |
universe@580 | 227 | } while (0) |
universe@580 | 228 | |
universe@580 | 229 | |
universe@580 | 230 | cxstring cx_strstr( |
universe@580 | 231 | cxstring haystack, |
universe@580 | 232 | cxstring needle |
universe@580 | 233 | ) { |
universe@580 | 234 | if (needle.length == 0) { |
universe@580 | 235 | return haystack; |
universe@580 | 236 | } |
universe@580 | 237 | |
universe@580 | 238 | /* |
universe@580 | 239 | * IMPORTANT: |
universe@580 | 240 | * Our prefix table contains the prefix length PLUS ONE |
universe@580 | 241 | * this is our decision, because we want to use the full range of size_t. |
universe@580 | 242 | * The original algorithm needs a (-1) at one single place, |
universe@580 | 243 | * and we want to avoid that. |
universe@580 | 244 | */ |
universe@580 | 245 | |
universe@580 | 246 | /* static prefix table */ |
universe@580 | 247 | static uint8_t s_prefix_table[512]; |
universe@580 | 248 | |
universe@580 | 249 | /* check pattern length and use appropriate prefix table */ |
universe@580 | 250 | /* if the pattern exceeds static prefix table, allocate on the heap */ |
universe@580 | 251 | register int useheap = needle.length >= 512; |
universe@580 | 252 | register void *ptable = useheap ? calloc(needle.length + 1, |
universe@580 | 253 | sizeof(size_t)) : s_prefix_table; |
universe@580 | 254 | |
universe@580 | 255 | /* keep counter in registers */ |
universe@580 | 256 | register size_t i, j; |
universe@580 | 257 | |
universe@580 | 258 | /* fill prefix table */ |
universe@580 | 259 | i = 0; |
universe@580 | 260 | j = 0; |
universe@580 | 261 | ptable_w(useheap, ptable, i, j); |
universe@580 | 262 | while (i < needle.length) { |
universe@580 | 263 | while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) { |
universe@580 | 264 | ptable_r(j, useheap, ptable, j - 1); |
universe@580 | 265 | } |
universe@580 | 266 | i++; |
universe@580 | 267 | j++; |
universe@580 | 268 | ptable_w(useheap, ptable, i, j); |
universe@580 | 269 | } |
universe@580 | 270 | |
universe@580 | 271 | /* search */ |
universe@580 | 272 | cxstring result = {NULL, 0}; |
universe@580 | 273 | i = 0; |
universe@580 | 274 | j = 1; |
universe@580 | 275 | while (i < haystack.length) { |
universe@580 | 276 | while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) { |
universe@580 | 277 | ptable_r(j, useheap, ptable, j - 1); |
universe@580 | 278 | } |
universe@580 | 279 | i++; |
universe@580 | 280 | j++; |
universe@580 | 281 | if (j - 1 == needle.length) { |
universe@580 | 282 | size_t start = i - needle.length; |
universe@580 | 283 | result.ptr = haystack.ptr + start; |
universe@580 | 284 | result.length = haystack.length - start; |
universe@580 | 285 | break; |
universe@580 | 286 | } |
universe@580 | 287 | } |
universe@580 | 288 | |
universe@580 | 289 | /* if prefix table was allocated on the heap, free it */ |
universe@580 | 290 | if (ptable != s_prefix_table) { |
universe@580 | 291 | free(ptable); |
universe@580 | 292 | } |
universe@580 | 293 | |
universe@580 | 294 | return result; |
universe@580 | 295 | } |
universe@580 | 296 | |
universe@580 | 297 | cxmutstr cx_strstr_m( |
universe@580 | 298 | cxmutstr haystack, |
universe@580 | 299 | cxstring needle |
universe@580 | 300 | ) { |
universe@580 | 301 | cxstring result = cx_strstr(cx_strcast(haystack), needle); |
universe@580 | 302 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@580 | 303 | } |
universe@580 | 304 | |
universe@580 | 305 | size_t cx_strsplit( |
universe@580 | 306 | cxstring string, |
universe@580 | 307 | cxstring delim, |
universe@580 | 308 | size_t limit, |
universe@580 | 309 | cxstring *output |
universe@580 | 310 | ) { |
universe@580 | 311 | // TODO: implement |
universe@580 | 312 | return 0; |
universe@580 | 313 | } |
universe@580 | 314 | |
universe@580 | 315 | size_t cx_strsplit_a( |
universe@580 | 316 | CxAllocator *allocator, |
universe@580 | 317 | cxstring string, |
universe@580 | 318 | cxstring delim, |
universe@580 | 319 | size_t limit, |
universe@580 | 320 | cxstring **output |
universe@580 | 321 | ) { |
universe@580 | 322 | // TODO: implement |
universe@580 | 323 | return 0; |
universe@580 | 324 | } |
universe@580 | 325 | |
universe@580 | 326 | size_t cx_strsplit_m( |
universe@580 | 327 | cxmutstr string, |
universe@580 | 328 | cxstring delim, |
universe@580 | 329 | size_t limit, |
universe@580 | 330 | cxmutstr *output |
universe@580 | 331 | ) { |
universe@580 | 332 | return cx_strsplit(cx_strcast(string), |
universe@580 | 333 | delim, limit, (cxstring *) output); |
universe@580 | 334 | } |
universe@580 | 335 | |
universe@580 | 336 | size_t cx_strsplit_ma( |
universe@580 | 337 | CxAllocator *allocator, |
universe@580 | 338 | cxmutstr string, |
universe@580 | 339 | cxstring delim, |
universe@580 | 340 | size_t limit, |
universe@580 | 341 | cxmutstr **output |
universe@580 | 342 | ) { |
universe@580 | 343 | return cx_strsplit_a(allocator, cx_strcast(string), |
universe@580 | 344 | delim, limit, (cxstring **) output); |
universe@580 | 345 | } |
universe@581 | 346 | |
universe@581 | 347 | int cx_strcmp(cxstring s1, cxstring s2) { |
universe@581 | 348 | if (s1.length == s2.length) { |
universe@581 | 349 | return memcmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 350 | } else if (s1.length > s2.length) { |
universe@581 | 351 | return 1; |
universe@581 | 352 | } else { |
universe@581 | 353 | return -1; |
universe@581 | 354 | } |
universe@581 | 355 | } |
universe@581 | 356 | |
universe@581 | 357 | int cx_strcasecmp(cxstring s1, cxstring s2) { |
universe@581 | 358 | if (s1.length == s2.length) { |
universe@581 | 359 | #ifdef _WIN32 |
universe@581 | 360 | return _strnicmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 361 | #else |
universe@581 | 362 | return strncasecmp(s1.ptr, s2.ptr, s1.length); |
universe@581 | 363 | #endif |
universe@581 | 364 | } else if (s1.length > s2.length) { |
universe@581 | 365 | return 1; |
universe@581 | 366 | } else { |
universe@581 | 367 | return -1; |
universe@581 | 368 | } |
universe@581 | 369 | } |
universe@581 | 370 | |
universe@581 | 371 | cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) { |
universe@581 | 372 | cxmutstr result = { |
universe@581 | 373 | cxMalloc(allocator, string.length + 1), |
universe@581 | 374 | string.length |
universe@581 | 375 | }; |
universe@581 | 376 | if (result.ptr == NULL) { |
universe@581 | 377 | result.length = 0; |
universe@581 | 378 | return result; |
universe@581 | 379 | } |
universe@581 | 380 | memcpy(result.ptr, string.ptr, string.length); |
universe@581 | 381 | result.ptr[string.length] = '\0'; |
universe@581 | 382 | return result; |
universe@581 | 383 | } |
universe@581 | 384 | |
universe@581 | 385 | cxstring cx_strtrim(cxstring string) { |
universe@581 | 386 | cxstring result = string; |
universe@581 | 387 | // TODO: optimize by comparing multiple bytes at once |
universe@581 | 388 | while (result.length > 0 && isspace(*result.ptr)) { |
universe@581 | 389 | result.ptr++; |
universe@581 | 390 | result.length--; |
universe@581 | 391 | } |
universe@581 | 392 | while (result.length > 0 && isspace(result.ptr[result.length - 1])) { |
universe@581 | 393 | result.length--; |
universe@581 | 394 | } |
universe@581 | 395 | return result; |
universe@581 | 396 | } |
universe@581 | 397 | |
universe@581 | 398 | cxmutstr cx_strtrim_m(cxmutstr string) { |
universe@581 | 399 | cxstring result = cx_strtrim(cx_strcast(string)); |
universe@581 | 400 | return (cxmutstr) {(char *) result.ptr, result.length}; |
universe@581 | 401 | } |
universe@581 | 402 | |
universe@581 | 403 | bool cx_strprefix(cxstring string, cxstring prefix) { |
universe@581 | 404 | if (string.length < prefix.length) return false; |
universe@581 | 405 | return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 406 | } |
universe@581 | 407 | |
universe@581 | 408 | bool cx_strsuffix(cxstring string, cxstring suffix) { |
universe@581 | 409 | if (string.length < suffix.length) return false; |
universe@581 | 410 | return memcmp(string.ptr + string.length - suffix.length, |
universe@581 | 411 | suffix.ptr, suffix.length) == 0; |
universe@581 | 412 | } |
universe@581 | 413 | |
universe@581 | 414 | bool cx_casestrprefix(cxstring string, cxstring prefix) { |
universe@581 | 415 | if (string.length < prefix.length) return false; |
universe@581 | 416 | #ifdef _WIN32 |
universe@581 | 417 | return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 418 | #else |
universe@581 | 419 | return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0; |
universe@581 | 420 | #endif |
universe@581 | 421 | } |
universe@581 | 422 | |
universe@581 | 423 | bool cx_casestrsuffix(cxstring string, cxstring suffix) { |
universe@581 | 424 | if (string.length < suffix.length) return false; |
universe@581 | 425 | #ifdef _WIN32 |
universe@581 | 426 | return _strnicmp(string.ptr+string.length-suffix.length, |
universe@581 | 427 | suffix.ptr, suffix.length) == 0; |
universe@581 | 428 | #else |
universe@581 | 429 | return strncasecmp(string.ptr + string.length - suffix.length, |
universe@581 | 430 | suffix.ptr, suffix.length) == 0; |
universe@581 | 431 | #endif |
universe@581 | 432 | } |