Sun, 01 Apr 2018 09:51:01 +0200
adapts sstrtrim, sstrsplit, sstrcmp and sstrstr to new const string API
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 sstr_t sstr(char *cstring) {
40 sstr_t string;
41 string.ptr = cstring;
42 string.length = strlen(cstring);
43 return string;
44 }
46 sstr_t sstrn(char *cstring, size_t length) {
47 sstr_t string;
48 string.ptr = cstring;
49 string.length = length;
50 return string;
51 }
53 scstr_t scstr(const char *cstring) {
54 scstr_t string;
55 string.ptr = cstring;
56 string.length = strlen(cstring);
57 return string;
58 }
60 scstr_t scstrn(const char *cstring, size_t length) {
61 scstr_t string;
62 string.ptr = cstring;
63 string.length = length;
64 return string;
65 }
68 size_t sstrnlen(size_t n, sstr_t s, ...) {
69 va_list ap;
70 size_t size = s.length;
71 va_start(ap, s);
73 for (size_t i = 1 ; i < n ; i++) {
74 sstr_t str = va_arg(ap, sstr_t);
75 if(((size_t)-1) - str.length < size) {
76 size = 0;
77 break;
78 }
79 size += str.length;
80 }
81 va_end(ap);
83 return size;
84 }
86 static sstr_t sstrvcat_a(
87 UcxAllocator *a,
88 size_t count,
89 sstr_t s1,
90 sstr_t s2,
91 va_list ap) {
92 sstr_t str;
93 str.ptr = NULL;
94 str.length = 0;
95 if(count < 2) {
96 return str;
97 }
99 if(((size_t)-1) - s1.length < s2.length) {
100 return str;
101 }
103 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
104 if(!strings) {
105 return str;
106 }
108 // get all args and overall length
109 strings[0] = s1;
110 strings[1] = s2;
111 size_t slen = s1.length + s2.length;
112 int error = 0;
113 for (size_t i=2;i<count;i++) {
114 sstr_t s = va_arg (ap, sstr_t);
115 strings[i] = s;
116 if(((size_t)-1) - s.length < slen) {
117 error = 1;
118 break;
119 }
120 slen += s.length;
121 }
122 if(error) {
123 free(strings);
124 return str;
125 }
127 // create new string
128 str.ptr = (char*) almalloc(a, slen + 1);
129 str.length = slen;
130 if(!str.ptr) {
131 free(strings);
132 str.length = 0;
133 return str;
134 }
136 // concatenate strings
137 size_t pos = 0;
138 for (size_t i=0;i<count;i++) {
139 sstr_t s = strings[i];
140 memcpy(str.ptr + pos, s.ptr, s.length);
141 pos += s.length;
142 }
144 str.ptr[str.length] = '\0';
146 free(strings);
148 return str;
149 }
151 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
152 va_list ap;
153 va_start(ap, s2);
154 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
155 va_end(ap);
156 return s;
157 }
159 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
160 va_list ap;
161 va_start(ap, s2);
162 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
163 va_end(ap);
164 return s;
165 }
167 sstr_t sstrsubs(sstr_t s, size_t start) {
168 return sstrsubsl (s, start, s.length-start);
169 }
171 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
172 sstr_t new_sstr;
173 if (start >= s.length) {
174 new_sstr.ptr = NULL;
175 new_sstr.length = 0;
176 } else {
177 if (length > s.length-start) {
178 length = s.length-start;
179 }
180 new_sstr.ptr = &s.ptr[start];
181 new_sstr.length = length;
182 }
183 return new_sstr;
184 }
186 sstr_t sstrchr(sstr_t s, int c) {
187 for(size_t i=0;i<s.length;i++) {
188 if(s.ptr[i] == c) {
189 return sstrsubs(s, i);
190 }
191 }
192 sstr_t n;
193 n.ptr = NULL;
194 n.length = 0;
195 return n;
196 }
198 sstr_t sstrrchr(sstr_t s, int c) {
199 if (s.length > 0) {
200 for(size_t i=s.length;i>0;i--) {
201 if(s.ptr[i-1] == c) {
202 return sstrsubs(s, i-1);
203 }
204 }
205 }
206 sstr_t n;
207 n.ptr = NULL;
208 n.length = 0;
209 return n;
210 }
212 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
213 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
215 #define ptable_w(useheap, ptable, index, src) do {\
216 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
217 else ((size_t*)ptable)[index] = src;\
218 } while (0);
221 const char* ucx_strstr(
222 const char *str,
223 size_t length,
224 const char *match,
225 size_t matchlen,
226 size_t *newlen)
227 {
228 *newlen = length;
229 if (matchlen == 0) {
230 return str;
231 }
233 const char *result = NULL;
234 size_t resultlen = 0;
236 /*
237 * IMPORTANT:
238 * our prefix table contains the prefix length PLUS ONE
239 * this is our decision, because we want to use the full range of size_t
240 * the original algorithm needs a (-1) at one single place
241 * and we want to avoid that
242 */
244 /* static prefix table */
245 static uint8_t s_prefix_table[256];
247 /* check pattern length and use appropriate prefix table */
248 /* if the pattern exceeds static prefix table, allocate on the heap */
249 register int useheap = matchlen > 255;
250 register void* ptable = useheap ?
251 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
253 /* keep counter in registers */
254 register size_t i, j;
256 /* fill prefix table */
257 i = 0; j = 0;
258 ptable_w(useheap, ptable, i, j);
259 while (i < matchlen) {
260 while (j >= 1 && match[j-1] != match[i]) {
261 ptable_r(j, useheap, ptable, j-1);
262 }
263 i++; j++;
264 ptable_w(useheap, ptable, i, j);
265 }
267 /* search */
268 i = 0; j = 1;
269 while (i < length) {
270 while (j >= 1 && str[i] != match[j-1]) {
271 ptable_r(j, useheap, ptable, j-1);
272 }
273 i++; j++;
274 if (j-1 == matchlen) {
275 size_t start = i - matchlen;
276 result = str + start;
277 resultlen = length - start;
278 break;
279 }
280 }
282 /* if prefix table was allocated on the heap, free it */
283 if (ptable != s_prefix_table) {
284 free(ptable);
285 }
287 *newlen = resultlen;
288 return result;
289 }
291 sstr_t ucx_sstrstr(sstr_t string, scstr_t match) {
292 sstr_t result;
294 size_t reslen;
295 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
296 if(!resstr) {
297 result.ptr = NULL;
298 result.length = 0;
299 return result;
300 }
302 size_t pos = resstr - string.ptr;
303 result.ptr = string.ptr + pos;
304 result.length = reslen;
306 return result;
307 }
309 scstr_t ucx_scstrstr(scstr_t string, scstr_t match) {
310 scstr_t result;
312 size_t reslen;
313 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
314 if(!resstr) {
315 result.ptr = NULL;
316 result.length = 0;
317 return result;
318 }
320 size_t pos = resstr - string.ptr;
321 result.ptr = string.ptr + pos;
322 result.length = reslen;
324 return result;
325 }
327 #undef ptable_r
328 #undef ptable_w
330 sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) {
331 return ucx_strsplit_a(ucx_default_allocator(), s, d, n);
332 }
334 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
335 if (s.length == 0 || d.length == 0) {
336 *n = -1;
337 return NULL;
338 }
340 /* special cases: delimiter is at least as large as the string */
341 if (d.length >= s.length) {
342 /* exact match */
343 if (sstrcmp(s, d) == 0) {
344 *n = 0;
345 return NULL;
346 } else /* no match possible */ {
347 *n = 1;
348 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
349 if(result) {
350 *result = sstrdup_a(allocator, s);
351 } else {
352 *n = -2;
353 }
354 return result;
355 }
356 }
358 ssize_t nmax = *n;
359 size_t arrlen = 16;
360 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
362 if (result) {
363 scstr_t curpos = s;
364 ssize_t j = 1;
365 while (1) {
366 scstr_t match;
367 /* optimize for one byte delimiters */
368 if (d.length == 1) {
369 match = curpos;
370 for (size_t i = 0 ; i < curpos.length ; i++) {
371 if (curpos.ptr[i] == *(d.ptr)) {
372 match.ptr = curpos.ptr + i;
373 break;
374 }
375 match.length--;
376 }
377 } else {
378 match = scstrstr(curpos, d);
379 }
380 if (match.length > 0) {
381 /* is this our last try? */
382 if (nmax == 0 || j < nmax) {
383 /* copy the current string to the array */
384 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
385 result[j-1] = sstrdup_a(allocator, item);
386 size_t processed = item.length + d.length;
387 curpos.ptr += processed;
388 curpos.length -= processed;
390 /* allocate memory for the next string */
391 j++;
392 if (j > arrlen) {
393 arrlen *= 2;
394 size_t reallocsz;
395 sstr_t* reallocated = NULL;
396 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
397 reallocated = (sstr_t*) alrealloc(
398 allocator, result, reallocsz);
399 }
400 if (reallocated) {
401 result = reallocated;
402 } else {
403 for (ssize_t i = 0 ; i < j-1 ; i++) {
404 alfree(allocator, result[i].ptr);
405 }
406 alfree(allocator, result);
407 *n = -2;
408 return NULL;
409 }
410 }
411 } else {
412 /* nmax reached, copy the _full_ remaining string */
413 result[j-1] = sstrdup_a(allocator, curpos);
414 break;
415 }
416 } else {
417 /* no more matches, copy last string */
418 result[j-1] = sstrdup_a(allocator, curpos);
419 break;
420 }
421 }
422 *n = j;
423 } else {
424 *n = -2;
425 }
427 return result;
428 }
430 int ucx_str_cmp(scstr_t s1, scstr_t s2) {
431 if (s1.length == s2.length) {
432 return memcmp(s1.ptr, s2.ptr, s1.length);
433 } else if (s1.length > s2.length) {
434 return 1;
435 } else {
436 return -1;
437 }
438 }
440 int ucx_str_casecmp(scstr_t s1, scstr_t s2) {
441 if (s1.length == s2.length) {
442 #ifdef _WIN32
443 return _strnicmp(s1.ptr, s2.ptr, s1.length);
444 #else
445 return strncasecmp(s1.ptr, s2.ptr, s1.length);
446 #endif
447 } else if (s1.length > s2.length) {
448 return 1;
449 } else {
450 return -1;
451 }
452 }
454 sstr_t scstrdup(scstr_t s) {
455 return sstrdup_a(ucx_default_allocator(), s);
456 }
458 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
459 sstr_t newstring;
460 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
461 if (newstring.ptr) {
462 newstring.length = s.length;
463 newstring.ptr[newstring.length] = 0;
465 memcpy(newstring.ptr, s.ptr, s.length);
466 } else {
467 newstring.length = 0;
468 }
470 return newstring;
471 }
474 size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
475 const char *newptr = s;
476 size_t length = len;
478 while(length > 0 && isspace(*newptr)) {
479 newptr++;
480 length--;
481 }
482 while(length > 0 && isspace(newptr[length-1])) {
483 length--;
484 }
486 *newlen = length;
487 return newptr - s;
488 }
490 sstr_t sstrtrim(sstr_t string) {
491 sstr_t newstr;
492 newstr.ptr = string.ptr
493 + ucx_strtrim(string.ptr, string.length, &newstr.length);
494 return newstr;
495 }
497 scstr_t scstrtrim(scstr_t string) {
498 scstr_t newstr;
499 newstr.ptr = string.ptr
500 + ucx_strtrim(string.ptr, string.length, &newstr.length);
501 return newstr;
502 }
504 int ucx_strprefix(scstr_t string, scstr_t prefix) {
505 if (string.length == 0) {
506 return prefix.length == 0;
507 }
508 if (prefix.length == 0) {
509 return 1;
510 }
512 if (prefix.length > string.length) {
513 return 0;
514 } else {
515 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
516 }
517 }
519 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
520 if (string.length == 0) {
521 return suffix.length == 0;
522 }
523 if (suffix.length == 0) {
524 return 1;
525 }
527 if (suffix.length > string.length) {
528 return 0;
529 } else {
530 return memcmp(string.ptr+string.length-suffix.length,
531 suffix.ptr, suffix.length) == 0;
532 }
533 }
535 sstr_t ucx_strlower(scstr_t string) {
536 sstr_t ret = sstrdup(string);
537 for (size_t i = 0; i < ret.length ; i++) {
538 ret.ptr[i] = tolower(ret.ptr[i]);
539 }
540 return ret;
541 }
543 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
544 sstr_t ret = sstrdup_a(allocator, string);
545 for (size_t i = 0; i < ret.length ; i++) {
546 ret.ptr[i] = tolower(ret.ptr[i]);
547 }
548 return ret;
549 }
551 sstr_t ucx_strupper(scstr_t string) {
552 sstr_t ret = sstrdup(string);
553 for (size_t i = 0; i < ret.length ; i++) {
554 ret.ptr[i] = toupper(ret.ptr[i]);
555 }
556 return ret;
557 }
559 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
560 sstr_t ret = sstrdup_a(allocator, string);
561 for (size_t i = 0; i < ret.length ; i++) {
562 ret.ptr[i] = toupper(ret.ptr[i]);
563 }
564 return ret;
565 }
567 // private string conversion functions
568 scstr_t ucx_sc2sc(scstr_t c) {
569 return c;
570 }
571 scstr_t ucx_ss2sc(sstr_t str) {
572 scstr_t cs;
573 cs.ptr = str.ptr;
574 cs.length = str.length;
575 return cs;
576 }
577 scstr_t ucx_ss2c_s(scstr_t c) {
578 return c;
579 }