src/string.c

Sat, 03 Sep 2022 14:56:07 +0200

author
Mike Becker <universe@uap-core.de>
date
Sat, 03 Sep 2022 14:56:07 +0200
changeset 581
c067394737ca
parent 580
aac47db8da0b
child 582
96fa7fa6af4f
permissions
-rw-r--r--

implement more string functions

universe@576 1 /*
universe@576 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@576 3 *
universe@576 4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
universe@576 5 *
universe@576 6 * Redistribution and use in source and binary forms, with or without
universe@576 7 * modification, are permitted provided that the following conditions are met:
universe@576 8 *
universe@576 9 * 1. Redistributions of source code must retain the above copyright
universe@576 10 * notice, this list of conditions and the following disclaimer.
universe@576 11 *
universe@576 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@576 13 * notice, this list of conditions and the following disclaimer in the
universe@576 14 * documentation and/or other materials provided with the distribution.
universe@576 15 *
universe@576 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@576 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@576 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@576 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@576 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@576 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@576 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@576 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@576 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@576 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@576 26 * POSSIBILITY OF SUCH DAMAGE.
universe@576 27 */
universe@576 28
universe@576 29 #include "cx/string.h"
universe@579 30 #include "cx/utils.h"
universe@579 31
universe@579 32 #include <string.h>
universe@579 33 #include <stdarg.h>
universe@579 34 #include <stdint.h>
universe@581 35 #include <ctype.h>
universe@581 36
universe@581 37 #ifndef _WIN32
universe@581 38
universe@581 39 #include <strings.h> /* for strncasecmp() */
universe@581 40
universe@581 41 #endif /* _WIN32 */
universe@579 42
universe@579 43 cxmutstr cx_mutstr(char *cstring) {
universe@579 44 return (cxmutstr) {cstring, strlen(cstring)};
universe@579 45 }
universe@579 46
universe@579 47 cxmutstr cx_mutstrn(
universe@579 48 char *cstring,
universe@579 49 size_t length
universe@579 50 ) {
universe@579 51 return (cxmutstr) {cstring, length};
universe@579 52 }
universe@579 53
universe@579 54 cxstring cx_str(const char *cstring) {
universe@579 55 return (cxstring) {cstring, strlen(cstring)};
universe@579 56 }
universe@579 57
universe@579 58 cxstring cx_strn(
universe@579 59 const char *cstring,
universe@579 60 size_t length
universe@579 61 ) {
universe@579 62 return (cxstring) {cstring, length};
universe@579 63 }
universe@579 64
universe@579 65 cxstring cx_strcast(cxmutstr str) {
universe@579 66 return (cxstring) {str.ptr, str.length};
universe@579 67 }
universe@579 68
universe@579 69 void cx_strfree(cxmutstr *str) {
universe@579 70 free(str->ptr);
universe@579 71 str->ptr = NULL;
universe@579 72 str->length = 0;
universe@579 73 }
universe@579 74
universe@579 75 size_t cx_strlen(
universe@579 76 size_t count,
universe@579 77 ...
universe@579 78 ) {
universe@579 79 if (count == 0) return 0;
universe@579 80
universe@579 81 va_list ap;
universe@579 82 va_start(ap, count);
universe@579 83 size_t size = 0;
universe@579 84 cx_for_n(i, count) {
universe@579 85 cxstring str = va_arg(ap, cxstring);
universe@579 86 size += str.length;
universe@579 87 }
universe@579 88 va_end(ap);
universe@579 89
universe@579 90 return size;
universe@579 91 }
universe@579 92
universe@579 93 cxmutstr cx_strcat_a(
universe@579 94 CxAllocator *alloc,
universe@579 95 size_t count,
universe@579 96 ...
universe@579 97 ) {
universe@579 98 cxstring *strings = calloc(count, sizeof(cxstring));
universe@579 99 if (!strings) abort();
universe@579 100
universe@579 101 va_list ap;
universe@579 102 va_start(ap, count);
universe@579 103
universe@579 104 // get all args and overall length
universe@579 105 size_t slen = 0;
universe@579 106 cx_for_n(i, count) {
universe@579 107 cxstring s = va_arg (ap, cxstring);
universe@579 108 strings[i] = s;
universe@579 109 slen += s.length;
universe@579 110 }
universe@579 111
universe@579 112 // create new string
universe@579 113 cxmutstr result;
universe@579 114 result.ptr = cxMalloc(alloc, slen + 1);
universe@579 115 result.length = slen;
universe@579 116 if (result.ptr == NULL) abort();
universe@579 117
universe@579 118 // concatenate strings
universe@579 119 size_t pos = 0;
universe@579 120 cx_for_n(i, count) {
universe@579 121 cxstring s = strings[i];
universe@579 122 memcpy(result.ptr + pos, s.ptr, s.length);
universe@579 123 pos += s.length;
universe@579 124 }
universe@579 125
universe@579 126 // terminate string
universe@579 127 result.ptr[result.length] = '\0';
universe@579 128
universe@579 129 // free temporary array
universe@579 130 free(strings);
universe@579 131
universe@579 132 return result;
universe@579 133 }
universe@579 134
universe@580 135 cxstring cx_strsubs(
universe@580 136 cxstring string,
universe@580 137 size_t start
universe@580 138 ) {
universe@580 139 return cx_strsubsl(string, start, string.length - start);
universe@580 140 }
universe@579 141
universe@580 142 cxmutstr cx_strsubs_m(
universe@580 143 cxmutstr string,
universe@580 144 size_t start
universe@580 145 ) {
universe@580 146 return cx_strsubsl_m(string, start, string.length - start);
universe@580 147 }
universe@579 148
universe@580 149 cxstring cx_strsubsl(
universe@580 150 cxstring string,
universe@580 151 size_t start,
universe@580 152 size_t length
universe@580 153 ) {
universe@580 154 if (start > string.length) {
universe@580 155 return (cxstring) {NULL, 0};
universe@580 156 }
universe@580 157
universe@580 158 size_t rem_len = string.length - start;
universe@580 159 if (length > rem_len) {
universe@580 160 length = rem_len;
universe@580 161 }
universe@580 162
universe@580 163 return (cxstring) {string.ptr + start, length};
universe@580 164 }
universe@580 165
universe@580 166 cxmutstr cx_strsubsl_m(
universe@580 167 cxmutstr string,
universe@580 168 size_t start,
universe@580 169 size_t length
universe@580 170 ) {
universe@580 171 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
universe@580 172 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 173 }
universe@580 174
universe@580 175 cxstring cx_strchr(
universe@580 176 cxstring string,
universe@580 177 int chr
universe@580 178 ) {
universe@580 179 chr = 0xFF & chr;
universe@580 180 // TODO: improve by comparing multiple bytes at once
universe@580 181 cx_for_n(i, string.length) {
universe@580 182 if (string.ptr[i] == chr) {
universe@580 183 return cx_strsubs(string, i);
universe@580 184 }
universe@580 185 }
universe@580 186 return (cxstring) {NULL, 0};
universe@580 187 }
universe@580 188
universe@580 189 cxmutstr cx_strchr_m(
universe@580 190 cxmutstr string,
universe@580 191 int chr
universe@580 192 ) {
universe@580 193 cxstring result = cx_strchr(cx_strcast(string), chr);
universe@580 194 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 195 }
universe@580 196
universe@580 197 cxstring cx_strrchr(
universe@580 198 cxstring string,
universe@580 199 int chr
universe@580 200 ) {
universe@580 201 chr = 0xFF & chr;
universe@580 202 size_t i = string.length;
universe@580 203 while (i > 0) {
universe@580 204 i--;
universe@580 205 // TODO: improve by comparing multiple bytes at once
universe@580 206 if (string.ptr[i] == chr) {
universe@580 207 return cx_strsubs(string, i);
universe@580 208 }
universe@580 209 }
universe@580 210 return (cxstring) {NULL, 0};
universe@580 211 }
universe@580 212
universe@580 213 cxmutstr cx_strrchr_m(
universe@580 214 cxmutstr string,
universe@580 215 int chr
universe@580 216 ) {
universe@580 217 cxstring result = cx_strrchr(cx_strcast(string), chr);
universe@580 218 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 219 }
universe@580 220
universe@580 221 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
universe@580 222 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
universe@580 223
universe@580 224 #define ptable_w(useheap, ptable, index, src) do {\
universe@580 225 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
universe@580 226 else ((size_t*)ptable)[index] = src;\
universe@580 227 } while (0)
universe@580 228
universe@580 229
universe@580 230 cxstring cx_strstr(
universe@580 231 cxstring haystack,
universe@580 232 cxstring needle
universe@580 233 ) {
universe@580 234 if (needle.length == 0) {
universe@580 235 return haystack;
universe@580 236 }
universe@580 237
universe@580 238 /*
universe@580 239 * IMPORTANT:
universe@580 240 * Our prefix table contains the prefix length PLUS ONE
universe@580 241 * this is our decision, because we want to use the full range of size_t.
universe@580 242 * The original algorithm needs a (-1) at one single place,
universe@580 243 * and we want to avoid that.
universe@580 244 */
universe@580 245
universe@580 246 /* static prefix table */
universe@580 247 static uint8_t s_prefix_table[512];
universe@580 248
universe@580 249 /* check pattern length and use appropriate prefix table */
universe@580 250 /* if the pattern exceeds static prefix table, allocate on the heap */
universe@580 251 register int useheap = needle.length >= 512;
universe@580 252 register void *ptable = useheap ? calloc(needle.length + 1,
universe@580 253 sizeof(size_t)) : s_prefix_table;
universe@580 254
universe@580 255 /* keep counter in registers */
universe@580 256 register size_t i, j;
universe@580 257
universe@580 258 /* fill prefix table */
universe@580 259 i = 0;
universe@580 260 j = 0;
universe@580 261 ptable_w(useheap, ptable, i, j);
universe@580 262 while (i < needle.length) {
universe@580 263 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
universe@580 264 ptable_r(j, useheap, ptable, j - 1);
universe@580 265 }
universe@580 266 i++;
universe@580 267 j++;
universe@580 268 ptable_w(useheap, ptable, i, j);
universe@580 269 }
universe@580 270
universe@580 271 /* search */
universe@580 272 cxstring result = {NULL, 0};
universe@580 273 i = 0;
universe@580 274 j = 1;
universe@580 275 while (i < haystack.length) {
universe@580 276 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
universe@580 277 ptable_r(j, useheap, ptable, j - 1);
universe@580 278 }
universe@580 279 i++;
universe@580 280 j++;
universe@580 281 if (j - 1 == needle.length) {
universe@580 282 size_t start = i - needle.length;
universe@580 283 result.ptr = haystack.ptr + start;
universe@580 284 result.length = haystack.length - start;
universe@580 285 break;
universe@580 286 }
universe@580 287 }
universe@580 288
universe@580 289 /* if prefix table was allocated on the heap, free it */
universe@580 290 if (ptable != s_prefix_table) {
universe@580 291 free(ptable);
universe@580 292 }
universe@580 293
universe@580 294 return result;
universe@580 295 }
universe@580 296
universe@580 297 cxmutstr cx_strstr_m(
universe@580 298 cxmutstr haystack,
universe@580 299 cxstring needle
universe@580 300 ) {
universe@580 301 cxstring result = cx_strstr(cx_strcast(haystack), needle);
universe@580 302 return (cxmutstr) {(char *) result.ptr, result.length};
universe@580 303 }
universe@580 304
universe@580 305 size_t cx_strsplit(
universe@580 306 cxstring string,
universe@580 307 cxstring delim,
universe@580 308 size_t limit,
universe@580 309 cxstring *output
universe@580 310 ) {
universe@580 311 // TODO: implement
universe@580 312 return 0;
universe@580 313 }
universe@580 314
universe@580 315 size_t cx_strsplit_a(
universe@580 316 CxAllocator *allocator,
universe@580 317 cxstring string,
universe@580 318 cxstring delim,
universe@580 319 size_t limit,
universe@580 320 cxstring **output
universe@580 321 ) {
universe@580 322 // TODO: implement
universe@580 323 return 0;
universe@580 324 }
universe@580 325
universe@580 326 size_t cx_strsplit_m(
universe@580 327 cxmutstr string,
universe@580 328 cxstring delim,
universe@580 329 size_t limit,
universe@580 330 cxmutstr *output
universe@580 331 ) {
universe@580 332 return cx_strsplit(cx_strcast(string),
universe@580 333 delim, limit, (cxstring *) output);
universe@580 334 }
universe@580 335
universe@580 336 size_t cx_strsplit_ma(
universe@580 337 CxAllocator *allocator,
universe@580 338 cxmutstr string,
universe@580 339 cxstring delim,
universe@580 340 size_t limit,
universe@580 341 cxmutstr **output
universe@580 342 ) {
universe@580 343 return cx_strsplit_a(allocator, cx_strcast(string),
universe@580 344 delim, limit, (cxstring **) output);
universe@580 345 }
universe@581 346
universe@581 347 int cx_strcmp(cxstring s1, cxstring s2) {
universe@581 348 if (s1.length == s2.length) {
universe@581 349 return memcmp(s1.ptr, s2.ptr, s1.length);
universe@581 350 } else if (s1.length > s2.length) {
universe@581 351 return 1;
universe@581 352 } else {
universe@581 353 return -1;
universe@581 354 }
universe@581 355 }
universe@581 356
universe@581 357 int cx_strcasecmp(cxstring s1, cxstring s2) {
universe@581 358 if (s1.length == s2.length) {
universe@581 359 #ifdef _WIN32
universe@581 360 return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@581 361 #else
universe@581 362 return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@581 363 #endif
universe@581 364 } else if (s1.length > s2.length) {
universe@581 365 return 1;
universe@581 366 } else {
universe@581 367 return -1;
universe@581 368 }
universe@581 369 }
universe@581 370
universe@581 371 cxmutstr cx_strdup_a(CxAllocator *allocator, cxstring string) {
universe@581 372 cxmutstr result = {
universe@581 373 cxMalloc(allocator, string.length + 1),
universe@581 374 string.length
universe@581 375 };
universe@581 376 if (result.ptr == NULL) {
universe@581 377 result.length = 0;
universe@581 378 return result;
universe@581 379 }
universe@581 380 memcpy(result.ptr, string.ptr, string.length);
universe@581 381 result.ptr[string.length] = '\0';
universe@581 382 return result;
universe@581 383 }
universe@581 384
universe@581 385 cxstring cx_strtrim(cxstring string) {
universe@581 386 cxstring result = string;
universe@581 387 // TODO: optimize by comparing multiple bytes at once
universe@581 388 while (result.length > 0 && isspace(*result.ptr)) {
universe@581 389 result.ptr++;
universe@581 390 result.length--;
universe@581 391 }
universe@581 392 while (result.length > 0 && isspace(result.ptr[result.length - 1])) {
universe@581 393 result.length--;
universe@581 394 }
universe@581 395 return result;
universe@581 396 }
universe@581 397
universe@581 398 cxmutstr cx_strtrim_m(cxmutstr string) {
universe@581 399 cxstring result = cx_strtrim(cx_strcast(string));
universe@581 400 return (cxmutstr) {(char *) result.ptr, result.length};
universe@581 401 }
universe@581 402
universe@581 403 bool cx_strprefix(cxstring string, cxstring prefix) {
universe@581 404 if (string.length < prefix.length) return false;
universe@581 405 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 406 }
universe@581 407
universe@581 408 bool cx_strsuffix(cxstring string, cxstring suffix) {
universe@581 409 if (string.length < suffix.length) return false;
universe@581 410 return memcmp(string.ptr + string.length - suffix.length,
universe@581 411 suffix.ptr, suffix.length) == 0;
universe@581 412 }
universe@581 413
universe@581 414 bool cx_casestrprefix(cxstring string, cxstring prefix) {
universe@581 415 if (string.length < prefix.length) return false;
universe@581 416 #ifdef _WIN32
universe@581 417 return _strnicmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 418 #else
universe@581 419 return strncasecmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@581 420 #endif
universe@581 421 }
universe@581 422
universe@581 423 bool cx_casestrsuffix(cxstring string, cxstring suffix) {
universe@581 424 if (string.length < suffix.length) return false;
universe@581 425 #ifdef _WIN32
universe@581 426 return _strnicmp(string.ptr+string.length-suffix.length,
universe@581 427 suffix.ptr, suffix.length) == 0;
universe@581 428 #else
universe@581 429 return strncasecmp(string.ptr + string.length - suffix.length,
universe@581 430 suffix.ptr, suffix.length) == 0;
universe@581 431 #endif
universe@581 432 }

mercurial