Fri, 20 Dec 2019 14:29:41 +0100
adds proper cmake build targets
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 #ifndef _WIN32
40 #include <strings.h> /* for strncasecmp() */
41 #endif /* _WIN32 */
43 sstr_t sstr(char *cstring) {
44 sstr_t string;
45 string.ptr = cstring;
46 string.length = strlen(cstring);
47 return string;
48 }
50 sstr_t sstrn(char *cstring, size_t length) {
51 sstr_t string;
52 string.ptr = cstring;
53 string.length = length;
54 return string;
55 }
57 scstr_t scstr(const char *cstring) {
58 scstr_t string;
59 string.ptr = cstring;
60 string.length = strlen(cstring);
61 return string;
62 }
64 scstr_t scstrn(const char *cstring, size_t length) {
65 scstr_t string;
66 string.ptr = cstring;
67 string.length = length;
68 return string;
69 }
72 size_t scstrnlen(size_t n, ...) {
73 if (n == 0) return 0;
75 va_list ap;
76 va_start(ap, n);
78 size_t size = 0;
80 for (size_t i = 0 ; i < n ; i++) {
81 scstr_t str = va_arg(ap, scstr_t);
82 if(SIZE_MAX - str.length < size) {
83 size = SIZE_MAX;
84 break;
85 }
86 size += str.length;
87 }
88 va_end(ap);
90 return size;
91 }
93 static sstr_t sstrvcat_a(
94 UcxAllocator *a,
95 size_t count,
96 scstr_t s1,
97 va_list ap) {
98 sstr_t str;
99 str.ptr = NULL;
100 str.length = 0;
101 if(count < 2) {
102 return str;
103 }
105 scstr_t s2 = va_arg (ap, scstr_t);
107 if(((size_t)-1) - s1.length < s2.length) {
108 return str;
109 }
111 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
112 if(!strings) {
113 return str;
114 }
116 // get all args and overall length
117 strings[0] = s1;
118 strings[1] = s2;
119 size_t slen = s1.length + s2.length;
120 int error = 0;
121 for (size_t i=2;i<count;i++) {
122 scstr_t s = va_arg (ap, scstr_t);
123 strings[i] = s;
124 if(((size_t)-1) - s.length < slen) {
125 error = 1;
126 break;
127 }
128 slen += s.length;
129 }
130 if(error) {
131 free(strings);
132 return str;
133 }
135 // create new string
136 str.ptr = (char*) almalloc(a, slen + 1);
137 str.length = slen;
138 if(!str.ptr) {
139 free(strings);
140 str.length = 0;
141 return str;
142 }
144 // concatenate strings
145 size_t pos = 0;
146 for (size_t i=0;i<count;i++) {
147 scstr_t s = strings[i];
148 memcpy(str.ptr + pos, s.ptr, s.length);
149 pos += s.length;
150 }
152 str.ptr[str.length] = '\0';
154 free(strings);
156 return str;
157 }
159 sstr_t scstrcat(size_t count, scstr_t s1, ...) {
160 va_list ap;
161 va_start(ap, s1);
162 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
163 va_end(ap);
164 return s;
165 }
167 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
168 va_list ap;
169 va_start(ap, s1);
170 sstr_t s = sstrvcat_a(a, count, s1, ap);
171 va_end(ap);
172 return s;
173 }
175 static int ucx_substring(
176 size_t str_length,
177 size_t start,
178 size_t length,
179 size_t *newlen,
180 size_t *newpos)
181 {
182 *newlen = 0;
183 *newpos = 0;
185 if(start > str_length) {
186 return 0;
187 }
189 if(length > str_length - start) {
190 length = str_length - start;
191 }
192 *newlen = length;
193 *newpos = start;
194 return 1;
195 }
197 sstr_t sstrsubs(sstr_t s, size_t start) {
198 return sstrsubsl (s, start, s.length-start);
199 }
201 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
202 size_t pos;
203 sstr_t ret = { NULL, 0 };
204 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
205 ret.ptr = s.ptr + pos;
206 }
207 return ret;
208 }
210 scstr_t scstrsubs(scstr_t string, size_t start) {
211 return scstrsubsl(string, start, string.length-start);
212 }
214 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
215 size_t pos;
216 scstr_t ret = { NULL, 0 };
217 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
218 ret.ptr = s.ptr + pos;
219 }
220 return ret;
221 }
224 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
225 for(size_t i=0;i<length;i++) {
226 if(str[i] == chr) {
227 *pos = i;
228 return 1;
229 }
230 }
231 return 0;
232 }
234 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
235 if(length > 0) {
236 for(size_t i=length ; i>0 ; i--) {
237 if(str[i-1] == chr) {
238 *pos = i-1;
239 return 1;
240 }
241 }
242 }
243 return 0;
244 }
246 sstr_t sstrchr(sstr_t s, int c) {
247 size_t pos = 0;
248 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
249 return sstrsubs(s, pos);
250 }
251 return sstrn(NULL, 0);
252 }
254 sstr_t sstrrchr(sstr_t s, int c) {
255 size_t pos = 0;
256 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
257 return sstrsubs(s, pos);
258 }
259 return sstrn(NULL, 0);
260 }
262 scstr_t scstrchr(scstr_t s, int c) {
263 size_t pos = 0;
264 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
265 return scstrsubs(s, pos);
266 }
267 return scstrn(NULL, 0);
268 }
270 scstr_t scstrrchr(scstr_t s, int c) {
271 size_t pos = 0;
272 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
273 return scstrsubs(s, pos);
274 }
275 return scstrn(NULL, 0);
276 }
278 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
279 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
281 #define ptable_w(useheap, ptable, index, src) do {\
282 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
283 else ((size_t*)ptable)[index] = src;\
284 } while (0);
287 static const char* ucx_strstr(
288 const char *str,
289 size_t length,
290 const char *match,
291 size_t matchlen,
292 size_t *newlen)
293 {
294 *newlen = length;
295 if (matchlen == 0) {
296 return str;
297 }
299 const char *result = NULL;
300 size_t resultlen = 0;
302 /*
303 * IMPORTANT:
304 * our prefix table contains the prefix length PLUS ONE
305 * this is our decision, because we want to use the full range of size_t
306 * the original algorithm needs a (-1) at one single place
307 * and we want to avoid that
308 */
310 /* static prefix table */
311 static uint8_t s_prefix_table[256];
313 /* check pattern length and use appropriate prefix table */
314 /* if the pattern exceeds static prefix table, allocate on the heap */
315 register int useheap = matchlen > 255;
316 register void* ptable = useheap ?
317 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
319 /* keep counter in registers */
320 register size_t i, j;
322 /* fill prefix table */
323 i = 0; j = 0;
324 ptable_w(useheap, ptable, i, j);
325 while (i < matchlen) {
326 while (j >= 1 && match[j-1] != match[i]) {
327 ptable_r(j, useheap, ptable, j-1);
328 }
329 i++; j++;
330 ptable_w(useheap, ptable, i, j);
331 }
333 /* search */
334 i = 0; j = 1;
335 while (i < length) {
336 while (j >= 1 && str[i] != match[j-1]) {
337 ptable_r(j, useheap, ptable, j-1);
338 }
339 i++; j++;
340 if (j-1 == matchlen) {
341 size_t start = i - matchlen;
342 result = str + start;
343 resultlen = length - start;
344 break;
345 }
346 }
348 /* if prefix table was allocated on the heap, free it */
349 if (ptable != s_prefix_table) {
350 free(ptable);
351 }
353 *newlen = resultlen;
354 return result;
355 }
357 sstr_t scstrsstr(sstr_t string, scstr_t match) {
358 sstr_t result;
360 size_t reslen;
361 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
362 if(!resstr) {
363 result.ptr = NULL;
364 result.length = 0;
365 return result;
366 }
368 size_t pos = resstr - string.ptr;
369 result.ptr = string.ptr + pos;
370 result.length = reslen;
372 return result;
373 }
375 scstr_t scstrscstr(scstr_t string, scstr_t match) {
376 scstr_t result;
378 size_t reslen;
379 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
380 if(!resstr) {
381 result.ptr = NULL;
382 result.length = 0;
383 return result;
384 }
386 size_t pos = resstr - string.ptr;
387 result.ptr = string.ptr + pos;
388 result.length = reslen;
390 return result;
391 }
393 #undef ptable_r
394 #undef ptable_w
396 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
397 return scstrsplit_a(ucx_default_allocator(), s, d, n);
398 }
400 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
401 if (s.length == 0 || d.length == 0) {
402 *n = -1;
403 return NULL;
404 }
406 /* special cases: delimiter is at least as large as the string */
407 if (d.length >= s.length) {
408 /* exact match */
409 if (sstrcmp(s, d) == 0) {
410 *n = 0;
411 return NULL;
412 } else /* no match possible */ {
413 *n = 1;
414 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
415 if(result) {
416 *result = sstrdup_a(allocator, s);
417 } else {
418 *n = -2;
419 }
420 return result;
421 }
422 }
424 ssize_t nmax = *n;
425 size_t arrlen = 16;
426 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
428 if (result) {
429 scstr_t curpos = s;
430 ssize_t j = 1;
431 while (1) {
432 scstr_t match;
433 /* optimize for one byte delimiters */
434 if (d.length == 1) {
435 match = curpos;
436 for (size_t i = 0 ; i < curpos.length ; i++) {
437 if (curpos.ptr[i] == *(d.ptr)) {
438 match.ptr = curpos.ptr + i;
439 break;
440 }
441 match.length--;
442 }
443 } else {
444 match = scstrscstr(curpos, d);
445 }
446 if (match.length > 0) {
447 /* is this our last try? */
448 if (nmax == 0 || j < nmax) {
449 /* copy the current string to the array */
450 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
451 result[j-1] = sstrdup_a(allocator, item);
452 size_t processed = item.length + d.length;
453 curpos.ptr += processed;
454 curpos.length -= processed;
456 /* allocate memory for the next string */
457 j++;
458 if (j > arrlen) {
459 arrlen *= 2;
460 size_t reallocsz;
461 sstr_t* reallocated = NULL;
462 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
463 reallocated = (sstr_t*) alrealloc(
464 allocator, result, reallocsz);
465 }
466 if (reallocated) {
467 result = reallocated;
468 } else {
469 for (ssize_t i = 0 ; i < j-1 ; i++) {
470 alfree(allocator, result[i].ptr);
471 }
472 alfree(allocator, result);
473 *n = -2;
474 return NULL;
475 }
476 }
477 } else {
478 /* nmax reached, copy the _full_ remaining string */
479 result[j-1] = sstrdup_a(allocator, curpos);
480 break;
481 }
482 } else {
483 /* no more matches, copy last string */
484 result[j-1] = sstrdup_a(allocator, curpos);
485 break;
486 }
487 }
488 *n = j;
489 } else {
490 *n = -2;
491 }
493 return result;
494 }
496 int scstrcmp(scstr_t s1, scstr_t s2) {
497 if (s1.length == s2.length) {
498 return memcmp(s1.ptr, s2.ptr, s1.length);
499 } else if (s1.length > s2.length) {
500 return 1;
501 } else {
502 return -1;
503 }
504 }
506 int scstrcasecmp(scstr_t s1, scstr_t s2) {
507 if (s1.length == s2.length) {
508 #ifdef _WIN32
509 return _strnicmp(s1.ptr, s2.ptr, s1.length);
510 #else
511 return strncasecmp(s1.ptr, s2.ptr, s1.length);
512 #endif
513 } else if (s1.length > s2.length) {
514 return 1;
515 } else {
516 return -1;
517 }
518 }
520 sstr_t scstrdup(scstr_t s) {
521 return sstrdup_a(ucx_default_allocator(), s);
522 }
524 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
525 sstr_t newstring;
526 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
527 if (newstring.ptr) {
528 newstring.length = s.length;
529 newstring.ptr[newstring.length] = 0;
531 memcpy(newstring.ptr, s.ptr, s.length);
532 } else {
533 newstring.length = 0;
534 }
536 return newstring;
537 }
540 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
541 const char *newptr = s;
542 size_t length = len;
544 while(length > 0 && isspace(*newptr)) {
545 newptr++;
546 length--;
547 }
548 while(length > 0 && isspace(newptr[length-1])) {
549 length--;
550 }
552 *newlen = length;
553 return newptr - s;
554 }
556 sstr_t sstrtrim(sstr_t string) {
557 sstr_t newstr;
558 newstr.ptr = string.ptr
559 + ucx_strtrim(string.ptr, string.length, &newstr.length);
560 return newstr;
561 }
563 scstr_t scstrtrim(scstr_t string) {
564 scstr_t newstr;
565 newstr.ptr = string.ptr
566 + ucx_strtrim(string.ptr, string.length, &newstr.length);
567 return newstr;
568 }
570 int scstrprefix(scstr_t string, scstr_t prefix) {
571 if (string.length == 0) {
572 return prefix.length == 0;
573 }
574 if (prefix.length == 0) {
575 return 1;
576 }
578 if (prefix.length > string.length) {
579 return 0;
580 } else {
581 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
582 }
583 }
585 int scstrsuffix(scstr_t string, scstr_t suffix) {
586 if (string.length == 0) {
587 return suffix.length == 0;
588 }
589 if (suffix.length == 0) {
590 return 1;
591 }
593 if (suffix.length > string.length) {
594 return 0;
595 } else {
596 return memcmp(string.ptr+string.length-suffix.length,
597 suffix.ptr, suffix.length) == 0;
598 }
599 }
601 int scstrcaseprefix(scstr_t string, scstr_t prefix) {
602 if (string.length == 0) {
603 return prefix.length == 0;
604 }
605 if (prefix.length == 0) {
606 return 1;
607 }
609 if (prefix.length > string.length) {
610 return 0;
611 } else {
612 scstr_t subs = scstrsubsl(string, 0, prefix.length);
613 return scstrcasecmp(subs, prefix) == 0;
614 }
615 }
617 int scstrcasesuffix(scstr_t string, scstr_t suffix) {
618 if (string.length == 0) {
619 return suffix.length == 0;
620 }
621 if (suffix.length == 0) {
622 return 1;
623 }
625 if (suffix.length > string.length) {
626 return 0;
627 } else {
628 scstr_t subs = scstrsubs(string, string.length-suffix.length);
629 return scstrcasecmp(subs, suffix) == 0;
630 }
631 }
633 sstr_t scstrlower(scstr_t string) {
634 sstr_t ret = sstrdup(string);
635 for (size_t i = 0; i < ret.length ; i++) {
636 ret.ptr[i] = tolower(ret.ptr[i]);
637 }
638 return ret;
639 }
641 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
642 sstr_t ret = sstrdup_a(allocator, string);
643 for (size_t i = 0; i < ret.length ; i++) {
644 ret.ptr[i] = tolower(ret.ptr[i]);
645 }
646 return ret;
647 }
649 sstr_t scstrupper(scstr_t string) {
650 sstr_t ret = sstrdup(string);
651 for (size_t i = 0; i < ret.length ; i++) {
652 ret.ptr[i] = toupper(ret.ptr[i]);
653 }
654 return ret;
655 }
657 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
658 sstr_t ret = sstrdup_a(allocator, string);
659 for (size_t i = 0; i < ret.length ; i++) {
660 ret.ptr[i] = toupper(ret.ptr[i]);
661 }
662 return ret;
663 }
665 // type adjustment functions
666 scstr_t ucx_sc2sc(scstr_t str) {
667 return str;
668 }
669 scstr_t ucx_ss2sc(sstr_t str) {
670 scstr_t cs;
671 cs.ptr = str.ptr;
672 cs.length = str.length;
673 return cs;
674 }
675 scstr_t ucx_ss2c_s(scstr_t c) {
676 return c;
677 }