Wed, 16 May 2018 19:33:31 +0200
Tags finalization of the scstr_t integration.
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 sstr_t sstr(char *cstring) {
40 sstr_t string;
41 string.ptr = cstring;
42 string.length = strlen(cstring);
43 return string;
44 }
46 sstr_t sstrn(char *cstring, size_t length) {
47 sstr_t string;
48 string.ptr = cstring;
49 string.length = length;
50 return string;
51 }
53 scstr_t scstr(const char *cstring) {
54 scstr_t string;
55 string.ptr = cstring;
56 string.length = strlen(cstring);
57 return string;
58 }
60 scstr_t scstrn(const char *cstring, size_t length) {
61 scstr_t string;
62 string.ptr = cstring;
63 string.length = length;
64 return string;
65 }
68 size_t scstrnlen(size_t n, ...) {
69 va_list ap;
70 va_start(ap, n);
72 size_t size = 0;
74 for (size_t i = 0 ; i < n ; i++) {
75 scstr_t str = va_arg(ap, scstr_t);
76 if(SIZE_MAX - str.length < size) {
77 size = SIZE_MAX;
78 break;
79 }
80 size += str.length;
81 }
82 va_end(ap);
84 return size;
85 }
87 static sstr_t sstrvcat_a(
88 UcxAllocator *a,
89 size_t count,
90 scstr_t s1,
91 va_list ap) {
92 sstr_t str;
93 str.ptr = NULL;
94 str.length = 0;
95 if(count < 2) {
96 return str;
97 }
99 scstr_t s2 = va_arg (ap, scstr_t);
101 if(((size_t)-1) - s1.length < s2.length) {
102 return str;
103 }
105 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
106 if(!strings) {
107 return str;
108 }
110 // get all args and overall length
111 strings[0] = s1;
112 strings[1] = s2;
113 size_t slen = s1.length + s2.length;
114 int error = 0;
115 for (size_t i=2;i<count;i++) {
116 scstr_t s = va_arg (ap, scstr_t);
117 strings[i] = s;
118 if(((size_t)-1) - s.length < slen) {
119 error = 1;
120 break;
121 }
122 slen += s.length;
123 }
124 if(error) {
125 free(strings);
126 return str;
127 }
129 // create new string
130 str.ptr = (char*) almalloc(a, slen + 1);
131 str.length = slen;
132 if(!str.ptr) {
133 free(strings);
134 str.length = 0;
135 return str;
136 }
138 // concatenate strings
139 size_t pos = 0;
140 for (size_t i=0;i<count;i++) {
141 scstr_t s = strings[i];
142 memcpy(str.ptr + pos, s.ptr, s.length);
143 pos += s.length;
144 }
146 str.ptr[str.length] = '\0';
148 free(strings);
150 return str;
151 }
153 sstr_t scstrcat(size_t count, scstr_t s1, ...) {
154 va_list ap;
155 va_start(ap, s1);
156 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
157 va_end(ap);
158 return s;
159 }
161 sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
162 va_list ap;
163 va_start(ap, s1);
164 sstr_t s = sstrvcat_a(a, count, s1, ap);
165 va_end(ap);
166 return s;
167 }
169 static int ucx_substring(
170 size_t str_length,
171 size_t start,
172 size_t length,
173 size_t *newlen,
174 size_t *newpos)
175 {
176 *newlen = 0;
177 *newpos = 0;
179 if(start > str_length) {
180 return 0;
181 }
183 if(length > str_length - start) {
184 length = str_length - start;
185 }
186 *newlen = length;
187 *newpos = start;
188 return 1;
189 }
191 sstr_t sstrsubs(sstr_t s, size_t start) {
192 return sstrsubsl (s, start, s.length-start);
193 }
195 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
196 size_t pos;
197 sstr_t ret = { NULL, 0 };
198 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
199 ret.ptr = s.ptr + pos;
200 }
201 return ret;
202 }
204 scstr_t scstrsubs(scstr_t string, size_t start) {
205 return scstrsubsl(string, start, string.length-start);
206 }
208 scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
209 size_t pos;
210 scstr_t ret = { NULL, 0 };
211 if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
212 ret.ptr = s.ptr + pos;
213 }
214 return ret;
215 }
218 static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) {
219 for(size_t i=0;i<length;i++) {
220 if(str[i] == chr) {
221 *pos = i;
222 return 1;
223 }
224 }
225 return 0;
226 }
228 static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) {
229 if(length > 0) {
230 for(size_t i=length ; i>0 ; i--) {
231 if(str[i-1] == chr) {
232 *pos = i-1;
233 return 1;
234 }
235 }
236 }
237 return 0;
238 }
240 sstr_t sstrchr(sstr_t s, int c) {
241 size_t pos = 0;
242 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
243 return sstrsubs(s, pos);
244 }
245 return sstrn(NULL, 0);
246 }
248 sstr_t sstrrchr(sstr_t s, int c) {
249 size_t pos = 0;
250 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
251 return sstrsubs(s, pos);
252 }
253 return sstrn(NULL, 0);
254 }
256 scstr_t scstrchr(scstr_t s, int c) {
257 size_t pos = 0;
258 if(ucx_strchr(s.ptr, s.length, c, &pos)) {
259 return scstrsubs(s, pos);
260 }
261 return scstrn(NULL, 0);
262 }
264 scstr_t scstrrchr(scstr_t s, int c) {
265 size_t pos = 0;
266 if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
267 return scstrsubs(s, pos);
268 }
269 return scstrn(NULL, 0);
270 }
272 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
273 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
275 #define ptable_w(useheap, ptable, index, src) do {\
276 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
277 else ((size_t*)ptable)[index] = src;\
278 } while (0);
281 static const char* ucx_strstr(
282 const char *str,
283 size_t length,
284 const char *match,
285 size_t matchlen,
286 size_t *newlen)
287 {
288 *newlen = length;
289 if (matchlen == 0) {
290 return str;
291 }
293 const char *result = NULL;
294 size_t resultlen = 0;
296 /*
297 * IMPORTANT:
298 * our prefix table contains the prefix length PLUS ONE
299 * this is our decision, because we want to use the full range of size_t
300 * the original algorithm needs a (-1) at one single place
301 * and we want to avoid that
302 */
304 /* static prefix table */
305 static uint8_t s_prefix_table[256];
307 /* check pattern length and use appropriate prefix table */
308 /* if the pattern exceeds static prefix table, allocate on the heap */
309 register int useheap = matchlen > 255;
310 register void* ptable = useheap ?
311 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
313 /* keep counter in registers */
314 register size_t i, j;
316 /* fill prefix table */
317 i = 0; j = 0;
318 ptable_w(useheap, ptable, i, j);
319 while (i < matchlen) {
320 while (j >= 1 && match[j-1] != match[i]) {
321 ptable_r(j, useheap, ptable, j-1);
322 }
323 i++; j++;
324 ptable_w(useheap, ptable, i, j);
325 }
327 /* search */
328 i = 0; j = 1;
329 while (i < length) {
330 while (j >= 1 && str[i] != match[j-1]) {
331 ptable_r(j, useheap, ptable, j-1);
332 }
333 i++; j++;
334 if (j-1 == matchlen) {
335 size_t start = i - matchlen;
336 result = str + start;
337 resultlen = length - start;
338 break;
339 }
340 }
342 /* if prefix table was allocated on the heap, free it */
343 if (ptable != s_prefix_table) {
344 free(ptable);
345 }
347 *newlen = resultlen;
348 return result;
349 }
351 sstr_t scstrsstr(sstr_t string, scstr_t match) {
352 sstr_t result;
354 size_t reslen;
355 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
356 if(!resstr) {
357 result.ptr = NULL;
358 result.length = 0;
359 return result;
360 }
362 size_t pos = resstr - string.ptr;
363 result.ptr = string.ptr + pos;
364 result.length = reslen;
366 return result;
367 }
369 scstr_t scstrscstr(scstr_t string, scstr_t match) {
370 scstr_t result;
372 size_t reslen;
373 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
374 if(!resstr) {
375 result.ptr = NULL;
376 result.length = 0;
377 return result;
378 }
380 size_t pos = resstr - string.ptr;
381 result.ptr = string.ptr + pos;
382 result.length = reslen;
384 return result;
385 }
387 #undef ptable_r
388 #undef ptable_w
390 sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) {
391 return scstrsplit_a(ucx_default_allocator(), s, d, n);
392 }
394 sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
395 if (s.length == 0 || d.length == 0) {
396 *n = -1;
397 return NULL;
398 }
400 /* special cases: delimiter is at least as large as the string */
401 if (d.length >= s.length) {
402 /* exact match */
403 if (sstrcmp(s, d) == 0) {
404 *n = 0;
405 return NULL;
406 } else /* no match possible */ {
407 *n = 1;
408 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
409 if(result) {
410 *result = sstrdup_a(allocator, s);
411 } else {
412 *n = -2;
413 }
414 return result;
415 }
416 }
418 ssize_t nmax = *n;
419 size_t arrlen = 16;
420 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
422 if (result) {
423 scstr_t curpos = s;
424 ssize_t j = 1;
425 while (1) {
426 scstr_t match;
427 /* optimize for one byte delimiters */
428 if (d.length == 1) {
429 match = curpos;
430 for (size_t i = 0 ; i < curpos.length ; i++) {
431 if (curpos.ptr[i] == *(d.ptr)) {
432 match.ptr = curpos.ptr + i;
433 break;
434 }
435 match.length--;
436 }
437 } else {
438 match = scstrscstr(curpos, d);
439 }
440 if (match.length > 0) {
441 /* is this our last try? */
442 if (nmax == 0 || j < nmax) {
443 /* copy the current string to the array */
444 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
445 result[j-1] = sstrdup_a(allocator, item);
446 size_t processed = item.length + d.length;
447 curpos.ptr += processed;
448 curpos.length -= processed;
450 /* allocate memory for the next string */
451 j++;
452 if (j > arrlen) {
453 arrlen *= 2;
454 size_t reallocsz;
455 sstr_t* reallocated = NULL;
456 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
457 reallocated = (sstr_t*) alrealloc(
458 allocator, result, reallocsz);
459 }
460 if (reallocated) {
461 result = reallocated;
462 } else {
463 for (ssize_t i = 0 ; i < j-1 ; i++) {
464 alfree(allocator, result[i].ptr);
465 }
466 alfree(allocator, result);
467 *n = -2;
468 return NULL;
469 }
470 }
471 } else {
472 /* nmax reached, copy the _full_ remaining string */
473 result[j-1] = sstrdup_a(allocator, curpos);
474 break;
475 }
476 } else {
477 /* no more matches, copy last string */
478 result[j-1] = sstrdup_a(allocator, curpos);
479 break;
480 }
481 }
482 *n = j;
483 } else {
484 *n = -2;
485 }
487 return result;
488 }
490 int scstrcmp(scstr_t s1, scstr_t s2) {
491 if (s1.length == s2.length) {
492 return memcmp(s1.ptr, s2.ptr, s1.length);
493 } else if (s1.length > s2.length) {
494 return 1;
495 } else {
496 return -1;
497 }
498 }
500 int scstrcasecmp(scstr_t s1, scstr_t s2) {
501 if (s1.length == s2.length) {
502 #ifdef _WIN32
503 return _strnicmp(s1.ptr, s2.ptr, s1.length);
504 #else
505 return strncasecmp(s1.ptr, s2.ptr, s1.length);
506 #endif
507 } else if (s1.length > s2.length) {
508 return 1;
509 } else {
510 return -1;
511 }
512 }
514 sstr_t scstrdup(scstr_t s) {
515 return sstrdup_a(ucx_default_allocator(), s);
516 }
518 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
519 sstr_t newstring;
520 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
521 if (newstring.ptr) {
522 newstring.length = s.length;
523 newstring.ptr[newstring.length] = 0;
525 memcpy(newstring.ptr, s.ptr, s.length);
526 } else {
527 newstring.length = 0;
528 }
530 return newstring;
531 }
534 static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
535 const char *newptr = s;
536 size_t length = len;
538 while(length > 0 && isspace(*newptr)) {
539 newptr++;
540 length--;
541 }
542 while(length > 0 && isspace(newptr[length-1])) {
543 length--;
544 }
546 *newlen = length;
547 return newptr - s;
548 }
550 sstr_t sstrtrim(sstr_t string) {
551 sstr_t newstr;
552 newstr.ptr = string.ptr
553 + ucx_strtrim(string.ptr, string.length, &newstr.length);
554 return newstr;
555 }
557 scstr_t scstrtrim(scstr_t string) {
558 scstr_t newstr;
559 newstr.ptr = string.ptr
560 + ucx_strtrim(string.ptr, string.length, &newstr.length);
561 return newstr;
562 }
564 int scstrprefix(scstr_t string, scstr_t prefix) {
565 if (string.length == 0) {
566 return prefix.length == 0;
567 }
568 if (prefix.length == 0) {
569 return 1;
570 }
572 if (prefix.length > string.length) {
573 return 0;
574 } else {
575 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
576 }
577 }
579 int scstrsuffix(scstr_t string, scstr_t suffix) {
580 if (string.length == 0) {
581 return suffix.length == 0;
582 }
583 if (suffix.length == 0) {
584 return 1;
585 }
587 if (suffix.length > string.length) {
588 return 0;
589 } else {
590 return memcmp(string.ptr+string.length-suffix.length,
591 suffix.ptr, suffix.length) == 0;
592 }
593 }
595 sstr_t scstrlower(scstr_t string) {
596 sstr_t ret = sstrdup(string);
597 for (size_t i = 0; i < ret.length ; i++) {
598 ret.ptr[i] = tolower(ret.ptr[i]);
599 }
600 return ret;
601 }
603 sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) {
604 sstr_t ret = sstrdup_a(allocator, string);
605 for (size_t i = 0; i < ret.length ; i++) {
606 ret.ptr[i] = tolower(ret.ptr[i]);
607 }
608 return ret;
609 }
611 sstr_t scstrupper(scstr_t string) {
612 sstr_t ret = sstrdup(string);
613 for (size_t i = 0; i < ret.length ; i++) {
614 ret.ptr[i] = toupper(ret.ptr[i]);
615 }
616 return ret;
617 }
619 sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) {
620 sstr_t ret = sstrdup_a(allocator, string);
621 for (size_t i = 0; i < ret.length ; i++) {
622 ret.ptr[i] = toupper(ret.ptr[i]);
623 }
624 return ret;
625 }
627 // type adjustment functions
628 scstr_t ucx_sc2sc(scstr_t str) {
629 return str;
630 }
631 scstr_t ucx_ss2sc(sstr_t str) {
632 scstr_t cs;
633 cs.ptr = str.ptr;
634 cs.length = str.length;
635 return cs;
636 }
637 scstr_t ucx_ss2c_s(scstr_t c) {
638 return c;
639 }