Sat, 05 Oct 2019 17:07:16 +0200
adds missing include for strncasecmp() to avoid an implicit declaration
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 #ifndef _WIN32
40 #include <strings.h> /* for strncasecmp() */
41 #endif /* _WIN32 */
43 sstr_t sstr(char *cstring) {
44 sstr_t string;
45 string.ptr = cstring;
46 string.length = strlen(cstring);
47 return string;
48 }
50 sstr_t sstrn(char *cstring, size_t length) {
51 sstr_t string;
52 string.ptr = cstring;
53 string.length = length;
54 return string;
55 }
57 scstr_t scstr(const char *cstring) {
58 scstr_t string;
59 string.ptr = cstring;
60 string.length = strlen(cstring);
61 return string;
62 }
64 scstr_t scstrn(const char *cstring, size_t length) {
65 scstr_t string;
66 string.ptr = cstring;
67 string.length = length;
68 return string;
69 }
72 size_t scstrnlen(size_t n, ...) {
73 va_list ap;
74 va_start(ap, n);
76 size_t size = 0;
78 for (size_t i = 0 ; i < n ; i++) {
79 scstr_t str = va_arg(ap, scstr_t);
80 if(SIZE_MAX - str.length < size) {
81 size = SIZE_MAX;
82 break;
83 }
84 size += str.length;
85 }
86 va_end(ap);
88 return size;
89 }
91 static sstr_t sstrvcat_a(
92 UcxAllocator *a,
93 size_t count,
94 scstr_t s1,
95 va_list ap) {
96 sstr_t str;
97 str.ptr = NULL;
98 str.length = 0;
99 if(count < 2) {
100 return str;
101 }
103 scstr_t s2 = va_arg (ap, scstr_t);
105 if(((size_t)-1) - s1.length < s2.length) {
106 return str;
107 }
109 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
110 if(!strings) {
111 return str;
112 }
114 // get all args and overall length
115 strings[0] = s1;
116 strings[1] = s2;
117 size_t slen = s1.length + s2.length;
118 int error = 0;
119 for (size_t i=2;i<count;i++) {
120 scstr_t s = va_arg (ap, scstr_t);
121 strings[i] = s;
122 if(((size_t)-1) - s.length < slen) {
123 error = 1;
124 break;
125 }
126 slen += s.length;
127 }
128 if(error) {
129 free(strings);
130 return str;
131 }
133 // create new string
134 str.ptr = (char*) almalloc(a, slen + 1);
135 str.length = slen;
136 if(!str.ptr) {
137 free(strings);
138 str.length = 0;
139 return str;
140 }
142 // concatenate strings
143 size_t pos = 0;
144 for (size_t i=0;i<count;i++) {
145 scstr_t s = strings[i];
146 memcpy(str.ptr + pos, s.ptr, s.length);
147 pos += s.length;
148 }
150 str.ptr[str.length] = '\0';
152 free(strings);
154 return str;
155 }
157 sstr_t scstrcat(size_t count, scstr_t s1, ...) {
158 va_list ap;
159 va_start(ap, s1);
160 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
161 va_end(ap);
162 return s;
163 }
165 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
166 va_list ap;
167 va_start(ap, s1);
168 sstr_t s = sstrvcat_a(a, count, s1, ap);
169 va_end(ap);
170 return s;
171 }
173 static int ucx_substring(
174 size_t str_length,
175 size_t start,
176 size_t length,
177 size_t *newlen,
178 size_t *newpos)
179 {
180 *newlen = 0;
181 *newpos = 0;
183 if(start > str_length) {
184 return 0;
185 }
187 if(length > str_length - start) {
188 length = str_length - start;
189 }
190 *newlen = length;
191 *newpos = start;
192 return 1;
193 }
195 sstr_t sstrsubs(sstr_t s, size_t start) {
196 return sstrsubsl (s, start, s.length-start);
197 }
199 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
200 size_t pos;
201 sstr_t ret = { NULL, 0 };
202 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
203 ret.ptr = s.ptr + pos;
204 }
205 return ret;
206 }
208 scstr_t scstrsubs(scstr_t string, size_t start) {
209 return scstrsubsl(string, start, string.length-start);
210 }
212 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
213 size_t pos;
214 scstr_t ret = { NULL, 0 };
215 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
216 ret.ptr = s.ptr + pos;
217 }
218 return ret;
219 }
222 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
223 for(size_t i=0;i<length;i++) {
224 if(str[i] == chr) {
225 *pos = i;
226 return 1;
227 }
228 }
229 return 0;
230 }
232 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
233 if(length > 0) {
234 for(size_t i=length ; i>0 ; i--) {
235 if(str[i-1] == chr) {
236 *pos = i-1;
237 return 1;
238 }
239 }
240 }
241 return 0;
242 }
244 sstr_t sstrchr(sstr_t s, int c) {
245 size_t pos = 0;
246 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
247 return sstrsubs(s, pos);
248 }
249 return sstrn(NULL, 0);
250 }
252 sstr_t sstrrchr(sstr_t s, int c) {
253 size_t pos = 0;
254 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
255 return sstrsubs(s, pos);
256 }
257 return sstrn(NULL, 0);
258 }
260 scstr_t scstrchr(scstr_t s, int c) {
261 size_t pos = 0;
262 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
263 return scstrsubs(s, pos);
264 }
265 return scstrn(NULL, 0);
266 }
268 scstr_t scstrrchr(scstr_t s, int c) {
269 size_t pos = 0;
270 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
271 return scstrsubs(s, pos);
272 }
273 return scstrn(NULL, 0);
274 }
276 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
277 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
279 #define ptable_w(useheap, ptable, index, src) do {\
280 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
281 else ((size_t*)ptable)[index] = src;\
282 } while (0);
285 static const char* ucx_strstr(
286 const char *str,
287 size_t length,
288 const char *match,
289 size_t matchlen,
290 size_t *newlen)
291 {
292 *newlen = length;
293 if (matchlen == 0) {
294 return str;
295 }
297 const char *result = NULL;
298 size_t resultlen = 0;
300 /*
301 * IMPORTANT:
302 * our prefix table contains the prefix length PLUS ONE
303 * this is our decision, because we want to use the full range of size_t
304 * the original algorithm needs a (-1) at one single place
305 * and we want to avoid that
306 */
308 /* static prefix table */
309 static uint8_t s_prefix_table[256];
311 /* check pattern length and use appropriate prefix table */
312 /* if the pattern exceeds static prefix table, allocate on the heap */
313 register int useheap = matchlen > 255;
314 register void* ptable = useheap ?
315 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
317 /* keep counter in registers */
318 register size_t i, j;
320 /* fill prefix table */
321 i = 0; j = 0;
322 ptable_w(useheap, ptable, i, j);
323 while (i < matchlen) {
324 while (j >= 1 && match[j-1] != match[i]) {
325 ptable_r(j, useheap, ptable, j-1);
326 }
327 i++; j++;
328 ptable_w(useheap, ptable, i, j);
329 }
331 /* search */
332 i = 0; j = 1;
333 while (i < length) {
334 while (j >= 1 && str[i] != match[j-1]) {
335 ptable_r(j, useheap, ptable, j-1);
336 }
337 i++; j++;
338 if (j-1 == matchlen) {
339 size_t start = i - matchlen;
340 result = str + start;
341 resultlen = length - start;
342 break;
343 }
344 }
346 /* if prefix table was allocated on the heap, free it */
347 if (ptable != s_prefix_table) {
348 free(ptable);
349 }
351 *newlen = resultlen;
352 return result;
353 }
355 sstr_t scstrsstr(sstr_t string, scstr_t match) {
356 sstr_t result;
358 size_t reslen;
359 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
360 if(!resstr) {
361 result.ptr = NULL;
362 result.length = 0;
363 return result;
364 }
366 size_t pos = resstr - string.ptr;
367 result.ptr = string.ptr + pos;
368 result.length = reslen;
370 return result;
371 }
373 scstr_t scstrscstr(scstr_t string, scstr_t match) {
374 scstr_t result;
376 size_t reslen;
377 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
378 if(!resstr) {
379 result.ptr = NULL;
380 result.length = 0;
381 return result;
382 }
384 size_t pos = resstr - string.ptr;
385 result.ptr = string.ptr + pos;
386 result.length = reslen;
388 return result;
389 }
391 #undef ptable_r
392 #undef ptable_w
394 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
395 return scstrsplit_a(ucx_default_allocator(), s, d, n);
396 }
398 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
399 if (s.length == 0 || d.length == 0) {
400 *n = -1;
401 return NULL;
402 }
404 /* special cases: delimiter is at least as large as the string */
405 if (d.length >= s.length) {
406 /* exact match */
407 if (sstrcmp(s, d) == 0) {
408 *n = 0;
409 return NULL;
410 } else /* no match possible */ {
411 *n = 1;
412 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
413 if(result) {
414 *result = sstrdup_a(allocator, s);
415 } else {
416 *n = -2;
417 }
418 return result;
419 }
420 }
422 ssize_t nmax = *n;
423 size_t arrlen = 16;
424 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
426 if (result) {
427 scstr_t curpos = s;
428 ssize_t j = 1;
429 while (1) {
430 scstr_t match;
431 /* optimize for one byte delimiters */
432 if (d.length == 1) {
433 match = curpos;
434 for (size_t i = 0 ; i < curpos.length ; i++) {
435 if (curpos.ptr[i] == *(d.ptr)) {
436 match.ptr = curpos.ptr + i;
437 break;
438 }
439 match.length--;
440 }
441 } else {
442 match = scstrscstr(curpos, d);
443 }
444 if (match.length > 0) {
445 /* is this our last try? */
446 if (nmax == 0 || j < nmax) {
447 /* copy the current string to the array */
448 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
449 result[j-1] = sstrdup_a(allocator, item);
450 size_t processed = item.length + d.length;
451 curpos.ptr += processed;
452 curpos.length -= processed;
454 /* allocate memory for the next string */
455 j++;
456 if (j > arrlen) {
457 arrlen *= 2;
458 size_t reallocsz;
459 sstr_t* reallocated = NULL;
460 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
461 reallocated = (sstr_t*) alrealloc(
462 allocator, result, reallocsz);
463 }
464 if (reallocated) {
465 result = reallocated;
466 } else {
467 for (ssize_t i = 0 ; i < j-1 ; i++) {
468 alfree(allocator, result[i].ptr);
469 }
470 alfree(allocator, result);
471 *n = -2;
472 return NULL;
473 }
474 }
475 } else {
476 /* nmax reached, copy the _full_ remaining string */
477 result[j-1] = sstrdup_a(allocator, curpos);
478 break;
479 }
480 } else {
481 /* no more matches, copy last string */
482 result[j-1] = sstrdup_a(allocator, curpos);
483 break;
484 }
485 }
486 *n = j;
487 } else {
488 *n = -2;
489 }
491 return result;
492 }
494 int scstrcmp(scstr_t s1, scstr_t s2) {
495 if (s1.length == s2.length) {
496 return memcmp(s1.ptr, s2.ptr, s1.length);
497 } else if (s1.length > s2.length) {
498 return 1;
499 } else {
500 return -1;
501 }
502 }
504 int scstrcasecmp(scstr_t s1, scstr_t s2) {
505 if (s1.length == s2.length) {
506 #ifdef _WIN32
507 return _strnicmp(s1.ptr, s2.ptr, s1.length);
508 #else
509 return strncasecmp(s1.ptr, s2.ptr, s1.length);
510 #endif
511 } else if (s1.length > s2.length) {
512 return 1;
513 } else {
514 return -1;
515 }
516 }
518 sstr_t scstrdup(scstr_t s) {
519 return sstrdup_a(ucx_default_allocator(), s);
520 }
522 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
523 sstr_t newstring;
524 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
525 if (newstring.ptr) {
526 newstring.length = s.length;
527 newstring.ptr[newstring.length] = 0;
529 memcpy(newstring.ptr, s.ptr, s.length);
530 } else {
531 newstring.length = 0;
532 }
534 return newstring;
535 }
538 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
539 const char *newptr = s;
540 size_t length = len;
542 while(length > 0 && isspace(*newptr)) {
543 newptr++;
544 length--;
545 }
546 while(length > 0 && isspace(newptr[length-1])) {
547 length--;
548 }
550 *newlen = length;
551 return newptr - s;
552 }
554 sstr_t sstrtrim(sstr_t string) {
555 sstr_t newstr;
556 newstr.ptr = string.ptr
557 + ucx_strtrim(string.ptr, string.length, &newstr.length);
558 return newstr;
559 }
561 scstr_t scstrtrim(scstr_t string) {
562 scstr_t newstr;
563 newstr.ptr = string.ptr
564 + ucx_strtrim(string.ptr, string.length, &newstr.length);
565 return newstr;
566 }
568 int scstrprefix(scstr_t string, scstr_t prefix) {
569 if (string.length == 0) {
570 return prefix.length == 0;
571 }
572 if (prefix.length == 0) {
573 return 1;
574 }
576 if (prefix.length > string.length) {
577 return 0;
578 } else {
579 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
580 }
581 }
583 int scstrsuffix(scstr_t string, scstr_t suffix) {
584 if (string.length == 0) {
585 return suffix.length == 0;
586 }
587 if (suffix.length == 0) {
588 return 1;
589 }
591 if (suffix.length > string.length) {
592 return 0;
593 } else {
594 return memcmp(string.ptr+string.length-suffix.length,
595 suffix.ptr, suffix.length) == 0;
596 }
597 }
599 sstr_t scstrlower(scstr_t string) {
600 sstr_t ret = sstrdup(string);
601 for (size_t i = 0; i < ret.length ; i++) {
602 ret.ptr[i] = tolower(ret.ptr[i]);
603 }
604 return ret;
605 }
607 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
608 sstr_t ret = sstrdup_a(allocator, string);
609 for (size_t i = 0; i < ret.length ; i++) {
610 ret.ptr[i] = tolower(ret.ptr[i]);
611 }
612 return ret;
613 }
615 sstr_t scstrupper(scstr_t string) {
616 sstr_t ret = sstrdup(string);
617 for (size_t i = 0; i < ret.length ; i++) {
618 ret.ptr[i] = toupper(ret.ptr[i]);
619 }
620 return ret;
621 }
623 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
624 sstr_t ret = sstrdup_a(allocator, string);
625 for (size_t i = 0; i < ret.length ; i++) {
626 ret.ptr[i] = toupper(ret.ptr[i]);
627 }
628 return ret;
629 }
631 // type adjustment functions
632 scstr_t ucx_sc2sc(scstr_t str) {
633 return str;
634 }
635 scstr_t ucx_ss2sc(sstr_t str) {
636 scstr_t cs;
637 cs.ptr = str.ptr;
638 cs.length = str.length;
639 return cs;
640 }
641 scstr_t ucx_ss2c_s(scstr_t c) {
642 return c;
643 }