Tue, 08 May 2018 12:49:56 +0200
makes sstrcat and sstrnlen scstr_t compatible
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 sstr_t sstr(char *cstring) {
40 sstr_t string;
41 string.ptr = cstring;
42 string.length = strlen(cstring);
43 return string;
44 }
46 sstr_t sstrn(char *cstring, size_t length) {
47 sstr_t string;
48 string.ptr = cstring;
49 string.length = length;
50 return string;
51 }
53 scstr_t scstr(const char *cstring) {
54 scstr_t string;
55 string.ptr = cstring;
56 string.length = strlen(cstring);
57 return string;
58 }
60 scstr_t scstrn(const char *cstring, size_t length) {
61 scstr_t string;
62 string.ptr = cstring;
63 string.length = length;
64 return string;
65 }
68 size_t ucx_strnlen(size_t n, ...) {
69 va_list ap;
70 va_start(ap, n);
72 size_t size = 0;
74 for (size_t i = 0 ; i < n ; i++) {
75 scstr_t str = va_arg(ap, scstr_t);
76 if(((size_t)-1) - str.length < size) {
77 size = 0;
78 break;
79 }
80 size += str.length;
81 }
82 va_end(ap);
84 return size;
85 }
87 static sstr_t sstrvcat_a(
88 UcxAllocator *a,
89 size_t count,
90 scstr_t s1,
91 va_list ap) {
92 sstr_t str;
93 str.ptr = NULL;
94 str.length = 0;
95 if(count < 2) {
96 return str;
97 }
99 scstr_t s2 = va_arg (ap, scstr_t);
101 if(((size_t)-1) - s1.length < s2.length) {
102 return str;
103 }
105 scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
106 if(!strings) {
107 return str;
108 }
110 // get all args and overall length
111 strings[0] = s1;
112 strings[1] = s2;
113 size_t slen = s1.length + s2.length;
114 int error = 0;
115 for (size_t i=2;i<count;i++) {
116 scstr_t s = va_arg (ap, scstr_t);
117 strings[i] = s;
118 if(((size_t)-1) - s.length < slen) {
119 error = 1;
120 break;
121 }
122 slen += s.length;
123 }
124 if(error) {
125 free(strings);
126 return str;
127 }
129 // create new string
130 str.ptr = (char*) almalloc(a, slen + 1);
131 str.length = slen;
132 if(!str.ptr) {
133 free(strings);
134 str.length = 0;
135 return str;
136 }
138 // concatenate strings
139 size_t pos = 0;
140 for (size_t i=0;i<count;i++) {
141 scstr_t s = strings[i];
142 memcpy(str.ptr + pos, s.ptr, s.length);
143 pos += s.length;
144 }
146 str.ptr[str.length] = '\0';
148 free(strings);
150 return str;
151 }
153 sstr_t ucx_strcat(size_t count, scstr_t s1, ...) {
154 va_list ap;
155 va_start(ap, s1);
156 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
157 va_end(ap);
158 return s;
159 }
161 sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
162 va_list ap;
163 va_start(ap, s1);
164 sstr_t s = sstrvcat_a(a, count, s1, ap);
165 va_end(ap);
166 return s;
167 }
169 sstr_t sstrsubs(sstr_t s, size_t start) {
170 return sstrsubsl (s, start, s.length-start);
171 }
173 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
174 sstr_t new_sstr;
175 if (start >= s.length) {
176 new_sstr.ptr = NULL;
177 new_sstr.length = 0;
178 } else {
179 if (length > s.length-start) {
180 length = s.length-start;
181 }
182 new_sstr.ptr = &s.ptr[start];
183 new_sstr.length = length;
184 }
185 return new_sstr;
186 }
188 sstr_t sstrchr(sstr_t s, int c) {
189 for(size_t i=0;i<s.length;i++) {
190 if(s.ptr[i] == c) {
191 return sstrsubs(s, i);
192 }
193 }
194 sstr_t n;
195 n.ptr = NULL;
196 n.length = 0;
197 return n;
198 }
200 sstr_t sstrrchr(sstr_t s, int c) {
201 if (s.length > 0) {
202 for(size_t i=s.length;i>0;i--) {
203 if(s.ptr[i-1] == c) {
204 return sstrsubs(s, i-1);
205 }
206 }
207 }
208 sstr_t n;
209 n.ptr = NULL;
210 n.length = 0;
211 return n;
212 }
214 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
215 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
217 #define ptable_w(useheap, ptable, index, src) do {\
218 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
219 else ((size_t*)ptable)[index] = src;\
220 } while (0);
223 const char* ucx_strstr(
224 const char *str,
225 size_t length,
226 const char *match,
227 size_t matchlen,
228 size_t *newlen)
229 {
230 *newlen = length;
231 if (matchlen == 0) {
232 return str;
233 }
235 const char *result = NULL;
236 size_t resultlen = 0;
238 /*
239 * IMPORTANT:
240 * our prefix table contains the prefix length PLUS ONE
241 * this is our decision, because we want to use the full range of size_t
242 * the original algorithm needs a (-1) at one single place
243 * and we want to avoid that
244 */
246 /* static prefix table */
247 static uint8_t s_prefix_table[256];
249 /* check pattern length and use appropriate prefix table */
250 /* if the pattern exceeds static prefix table, allocate on the heap */
251 register int useheap = matchlen > 255;
252 register void* ptable = useheap ?
253 calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
255 /* keep counter in registers */
256 register size_t i, j;
258 /* fill prefix table */
259 i = 0; j = 0;
260 ptable_w(useheap, ptable, i, j);
261 while (i < matchlen) {
262 while (j >= 1 && match[j-1] != match[i]) {
263 ptable_r(j, useheap, ptable, j-1);
264 }
265 i++; j++;
266 ptable_w(useheap, ptable, i, j);
267 }
269 /* search */
270 i = 0; j = 1;
271 while (i < length) {
272 while (j >= 1 && str[i] != match[j-1]) {
273 ptable_r(j, useheap, ptable, j-1);
274 }
275 i++; j++;
276 if (j-1 == matchlen) {
277 size_t start = i - matchlen;
278 result = str + start;
279 resultlen = length - start;
280 break;
281 }
282 }
284 /* if prefix table was allocated on the heap, free it */
285 if (ptable != s_prefix_table) {
286 free(ptable);
287 }
289 *newlen = resultlen;
290 return result;
291 }
293 sstr_t ucx_sstrstr(sstr_t string, scstr_t match) {
294 sstr_t result;
296 size_t reslen;
297 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
298 if(!resstr) {
299 result.ptr = NULL;
300 result.length = 0;
301 return result;
302 }
304 size_t pos = resstr - string.ptr;
305 result.ptr = string.ptr + pos;
306 result.length = reslen;
308 return result;
309 }
311 scstr_t ucx_scstrstr(scstr_t string, scstr_t match) {
312 scstr_t result;
314 size_t reslen;
315 const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
316 if(!resstr) {
317 result.ptr = NULL;
318 result.length = 0;
319 return result;
320 }
322 size_t pos = resstr - string.ptr;
323 result.ptr = string.ptr + pos;
324 result.length = reslen;
326 return result;
327 }
329 #undef ptable_r
330 #undef ptable_w
332 sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) {
333 return ucx_strsplit_a(ucx_default_allocator(), s, d, n);
334 }
336 sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
337 if (s.length == 0 || d.length == 0) {
338 *n = -1;
339 return NULL;
340 }
342 /* special cases: delimiter is at least as large as the string */
343 if (d.length >= s.length) {
344 /* exact match */
345 if (sstrcmp(s, d) == 0) {
346 *n = 0;
347 return NULL;
348 } else /* no match possible */ {
349 *n = 1;
350 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
351 if(result) {
352 *result = sstrdup_a(allocator, s);
353 } else {
354 *n = -2;
355 }
356 return result;
357 }
358 }
360 ssize_t nmax = *n;
361 size_t arrlen = 16;
362 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
364 if (result) {
365 scstr_t curpos = s;
366 ssize_t j = 1;
367 while (1) {
368 scstr_t match;
369 /* optimize for one byte delimiters */
370 if (d.length == 1) {
371 match = curpos;
372 for (size_t i = 0 ; i < curpos.length ; i++) {
373 if (curpos.ptr[i] == *(d.ptr)) {
374 match.ptr = curpos.ptr + i;
375 break;
376 }
377 match.length--;
378 }
379 } else {
380 match = scstrstr(curpos, d);
381 }
382 if (match.length > 0) {
383 /* is this our last try? */
384 if (nmax == 0 || j < nmax) {
385 /* copy the current string to the array */
386 scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
387 result[j-1] = sstrdup_a(allocator, item);
388 size_t processed = item.length + d.length;
389 curpos.ptr += processed;
390 curpos.length -= processed;
392 /* allocate memory for the next string */
393 j++;
394 if (j > arrlen) {
395 arrlen *= 2;
396 size_t reallocsz;
397 sstr_t* reallocated = NULL;
398 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
399 reallocated = (sstr_t*) alrealloc(
400 allocator, result, reallocsz);
401 }
402 if (reallocated) {
403 result = reallocated;
404 } else {
405 for (ssize_t i = 0 ; i < j-1 ; i++) {
406 alfree(allocator, result[i].ptr);
407 }
408 alfree(allocator, result);
409 *n = -2;
410 return NULL;
411 }
412 }
413 } else {
414 /* nmax reached, copy the _full_ remaining string */
415 result[j-1] = sstrdup_a(allocator, curpos);
416 break;
417 }
418 } else {
419 /* no more matches, copy last string */
420 result[j-1] = sstrdup_a(allocator, curpos);
421 break;
422 }
423 }
424 *n = j;
425 } else {
426 *n = -2;
427 }
429 return result;
430 }
432 int ucx_str_cmp(scstr_t s1, scstr_t s2) {
433 if (s1.length == s2.length) {
434 return memcmp(s1.ptr, s2.ptr, s1.length);
435 } else if (s1.length > s2.length) {
436 return 1;
437 } else {
438 return -1;
439 }
440 }
442 int ucx_str_casecmp(scstr_t s1, scstr_t s2) {
443 if (s1.length == s2.length) {
444 #ifdef _WIN32
445 return _strnicmp(s1.ptr, s2.ptr, s1.length);
446 #else
447 return strncasecmp(s1.ptr, s2.ptr, s1.length);
448 #endif
449 } else if (s1.length > s2.length) {
450 return 1;
451 } else {
452 return -1;
453 }
454 }
456 sstr_t scstrdup(scstr_t s) {
457 return sstrdup_a(ucx_default_allocator(), s);
458 }
460 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
461 sstr_t newstring;
462 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
463 if (newstring.ptr) {
464 newstring.length = s.length;
465 newstring.ptr[newstring.length] = 0;
467 memcpy(newstring.ptr, s.ptr, s.length);
468 } else {
469 newstring.length = 0;
470 }
472 return newstring;
473 }
476 size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
477 const char *newptr = s;
478 size_t length = len;
480 while(length > 0 && isspace(*newptr)) {
481 newptr++;
482 length--;
483 }
484 while(length > 0 && isspace(newptr[length-1])) {
485 length--;
486 }
488 *newlen = length;
489 return newptr - s;
490 }
492 sstr_t sstrtrim(sstr_t string) {
493 sstr_t newstr;
494 newstr.ptr = string.ptr
495 + ucx_strtrim(string.ptr, string.length, &newstr.length);
496 return newstr;
497 }
499 scstr_t scstrtrim(scstr_t string) {
500 scstr_t newstr;
501 newstr.ptr = string.ptr
502 + ucx_strtrim(string.ptr, string.length, &newstr.length);
503 return newstr;
504 }
506 int ucx_strprefix(scstr_t string, scstr_t prefix) {
507 if (string.length == 0) {
508 return prefix.length == 0;
509 }
510 if (prefix.length == 0) {
511 return 1;
512 }
514 if (prefix.length > string.length) {
515 return 0;
516 } else {
517 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
518 }
519 }
521 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
522 if (string.length == 0) {
523 return suffix.length == 0;
524 }
525 if (suffix.length == 0) {
526 return 1;
527 }
529 if (suffix.length > string.length) {
530 return 0;
531 } else {
532 return memcmp(string.ptr+string.length-suffix.length,
533 suffix.ptr, suffix.length) == 0;
534 }
535 }
537 sstr_t ucx_strlower(scstr_t string) {
538 sstr_t ret = sstrdup(string);
539 for (size_t i = 0; i < ret.length ; i++) {
540 ret.ptr[i] = tolower(ret.ptr[i]);
541 }
542 return ret;
543 }
545 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
546 sstr_t ret = sstrdup_a(allocator, string);
547 for (size_t i = 0; i < ret.length ; i++) {
548 ret.ptr[i] = tolower(ret.ptr[i]);
549 }
550 return ret;
551 }
553 sstr_t ucx_strupper(scstr_t string) {
554 sstr_t ret = sstrdup(string);
555 for (size_t i = 0; i < ret.length ; i++) {
556 ret.ptr[i] = toupper(ret.ptr[i]);
557 }
558 return ret;
559 }
561 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
562 sstr_t ret = sstrdup_a(allocator, string);
563 for (size_t i = 0; i < ret.length ; i++) {
564 ret.ptr[i] = toupper(ret.ptr[i]);
565 }
566 return ret;
567 }
569 // private string conversion functions
570 scstr_t ucx_sc2sc(scstr_t c) {
571 return c;
572 }
573 scstr_t ucx_ss2sc(sstr_t str) {
574 scstr_t cs;
575 cs.ptr = str.ptr;
576 cs.length = str.length;
577 return cs;
578 }
579 scstr_t ucx_ss2c_s(scstr_t c) {
580 return c;
581 }