Sun, 11 Mar 2018 13:43:07 +0100
adds scstr_t struct for const strings and adapts some string functions
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "ucx/string.h"
31 #include "ucx/allocator.h"
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
39 sstr_t sstr(char *cstring) {
40 sstr_t string;
41 string.ptr = cstring;
42 string.length = strlen(cstring);
43 return string;
44 }
46 sstr_t sstrn(char *cstring, size_t length) {
47 sstr_t string;
48 string.ptr = cstring;
49 string.length = length;
50 return string;
51 }
53 scstr_t scstr(const char *cstring) {
54 scstr_t string;
55 string.ptr = cstring;
56 string.length = strlen(cstring);
57 return string;
58 }
60 scstr_t scstrn(const char *cstring, size_t length) {
61 scstr_t string;
62 string.ptr = cstring;
63 string.length = length;
64 return string;
65 }
68 size_t sstrnlen(size_t n, sstr_t s, ...) {
69 va_list ap;
70 size_t size = s.length;
71 va_start(ap, s);
73 for (size_t i = 1 ; i < n ; i++) {
74 sstr_t str = va_arg(ap, sstr_t);
75 if(((size_t)-1) - str.length < size) {
76 size = 0;
77 break;
78 }
79 size += str.length;
80 }
81 va_end(ap);
83 return size;
84 }
86 static sstr_t sstrvcat_a(
87 UcxAllocator *a,
88 size_t count,
89 sstr_t s1,
90 sstr_t s2,
91 va_list ap) {
92 sstr_t str;
93 str.ptr = NULL;
94 str.length = 0;
95 if(count < 2) {
96 return str;
97 }
99 if(((size_t)-1) - s1.length < s2.length) {
100 return str;
101 }
103 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
104 if(!strings) {
105 return str;
106 }
108 // get all args and overall length
109 strings[0] = s1;
110 strings[1] = s2;
111 size_t slen = s1.length + s2.length;
112 int error = 0;
113 for (size_t i=2;i<count;i++) {
114 sstr_t s = va_arg (ap, sstr_t);
115 strings[i] = s;
116 if(((size_t)-1) - s.length < slen) {
117 error = 1;
118 break;
119 }
120 slen += s.length;
121 }
122 if(error) {
123 free(strings);
124 return str;
125 }
127 // create new string
128 str.ptr = (char*) almalloc(a, slen + 1);
129 str.length = slen;
130 if(!str.ptr) {
131 free(strings);
132 str.length = 0;
133 return str;
134 }
136 // concatenate strings
137 size_t pos = 0;
138 for (size_t i=0;i<count;i++) {
139 sstr_t s = strings[i];
140 memcpy(str.ptr + pos, s.ptr, s.length);
141 pos += s.length;
142 }
144 str.ptr[str.length] = '\0';
146 free(strings);
148 return str;
149 }
151 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
152 va_list ap;
153 va_start(ap, s2);
154 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
155 va_end(ap);
156 return s;
157 }
159 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
160 va_list ap;
161 va_start(ap, s2);
162 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
163 va_end(ap);
164 return s;
165 }
167 sstr_t sstrsubs(sstr_t s, size_t start) {
168 return sstrsubsl (s, start, s.length-start);
169 }
171 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
172 sstr_t new_sstr;
173 if (start >= s.length) {
174 new_sstr.ptr = NULL;
175 new_sstr.length = 0;
176 } else {
177 if (length > s.length-start) {
178 length = s.length-start;
179 }
180 new_sstr.ptr = &s.ptr[start];
181 new_sstr.length = length;
182 }
183 return new_sstr;
184 }
186 sstr_t sstrchr(sstr_t s, int c) {
187 for(size_t i=0;i<s.length;i++) {
188 if(s.ptr[i] == c) {
189 return sstrsubs(s, i);
190 }
191 }
192 sstr_t n;
193 n.ptr = NULL;
194 n.length = 0;
195 return n;
196 }
198 sstr_t sstrrchr(sstr_t s, int c) {
199 if (s.length > 0) {
200 for(size_t i=s.length;i>0;i--) {
201 if(s.ptr[i-1] == c) {
202 return sstrsubs(s, i-1);
203 }
204 }
205 }
206 sstr_t n;
207 n.ptr = NULL;
208 n.length = 0;
209 return n;
210 }
212 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
213 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
215 #define ptable_w(useheap, ptable, index, src) do {\
216 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
217 else ((size_t*)ptable)[index] = src;\
218 } while (0);
220 sstr_t sstrstr(sstr_t string, sstr_t match) {
221 if (match.length == 0) {
222 return string;
223 }
225 /* prepare default return value in case of no match */
226 sstr_t result = sstrn(NULL, 0);
228 /*
229 * IMPORTANT:
230 * our prefix table contains the prefix length PLUS ONE
231 * this is our decision, because we want to use the full range of size_t
232 * the original algorithm needs a (-1) at one single place
233 * and we want to avoid that
234 */
236 /* static prefix table */
237 static uint8_t s_prefix_table[256];
239 /* check pattern length and use appropriate prefix table */
240 /* if the pattern exceeds static prefix table, allocate on the heap */
241 register int useheap = match.length > 255;
242 register void* ptable = useheap ?
243 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
245 /* keep counter in registers */
246 register size_t i, j;
248 /* fill prefix table */
249 i = 0; j = 0;
250 ptable_w(useheap, ptable, i, j);
251 while (i < match.length) {
252 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
253 ptable_r(j, useheap, ptable, j-1);
254 }
255 i++; j++;
256 ptable_w(useheap, ptable, i, j);
257 }
259 /* search */
260 i = 0; j = 1;
261 while (i < string.length) {
262 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
263 ptable_r(j, useheap, ptable, j-1);
264 }
265 i++; j++;
266 if (j-1 == match.length) {
267 size_t start = i - match.length;
268 result.ptr = string.ptr + start;
269 result.length = string.length - start;
270 break;
271 }
272 }
274 /* if prefix table was allocated on the heap, free it */
275 if (ptable != s_prefix_table) {
276 free(ptable);
277 }
279 return result;
280 }
282 #undef ptable_r
283 #undef ptable_w
285 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
286 return sstrsplit_a(ucx_default_allocator(), s, d, n);
287 }
289 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
290 if (s.length == 0 || d.length == 0) {
291 *n = -1;
292 return NULL;
293 }
295 /* special cases: delimiter is at least as large as the string */
296 if (d.length >= s.length) {
297 /* exact match */
298 if (sstrcmp(s, d) == 0) {
299 *n = 0;
300 return NULL;
301 } else /* no match possible */ {
302 *n = 1;
303 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
304 if(result) {
305 *result = sstrdup_a(allocator, s);
306 } else {
307 *n = -2;
308 }
309 return result;
310 }
311 }
313 ssize_t nmax = *n;
314 size_t arrlen = 16;
315 sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
317 if (result) {
318 sstr_t curpos = s;
319 ssize_t j = 1;
320 while (1) {
321 sstr_t match;
322 /* optimize for one byte delimiters */
323 if (d.length == 1) {
324 match = curpos;
325 for (size_t i = 0 ; i < curpos.length ; i++) {
326 if (curpos.ptr[i] == *(d.ptr)) {
327 match.ptr = curpos.ptr + i;
328 break;
329 }
330 match.length--;
331 }
332 } else {
333 match = sstrstr(curpos, d);
334 }
335 if (match.length > 0) {
336 /* is this our last try? */
337 if (nmax == 0 || j < nmax) {
338 /* copy the current string to the array */
339 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
340 result[j-1] = sstrdup_a(allocator, item);
341 size_t processed = item.length + d.length;
342 curpos.ptr += processed;
343 curpos.length -= processed;
345 /* allocate memory for the next string */
346 j++;
347 if (j > arrlen) {
348 arrlen *= 2;
349 size_t reallocsz;
350 sstr_t* reallocated = NULL;
351 if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) {
352 reallocated = (sstr_t*) alrealloc(
353 allocator, result, reallocsz);
354 }
355 if (reallocated) {
356 result = reallocated;
357 } else {
358 for (ssize_t i = 0 ; i < j-1 ; i++) {
359 alfree(allocator, result[i].ptr);
360 }
361 alfree(allocator, result);
362 *n = -2;
363 return NULL;
364 }
365 }
366 } else {
367 /* nmax reached, copy the _full_ remaining string */
368 result[j-1] = sstrdup_a(allocator, curpos);
369 break;
370 }
371 } else {
372 /* no more matches, copy last string */
373 result[j-1] = sstrdup_a(allocator, curpos);
374 break;
375 }
376 }
377 *n = j;
378 } else {
379 *n = -2;
380 }
382 return result;
383 }
385 int sstrcmp(sstr_t s1, sstr_t s2) {
386 if (s1.length == s2.length) {
387 return memcmp(s1.ptr, s2.ptr, s1.length);
388 } else if (s1.length > s2.length) {
389 return 1;
390 } else {
391 return -1;
392 }
393 }
395 int sstrcasecmp(sstr_t s1, sstr_t s2) {
396 if (s1.length == s2.length) {
397 #ifdef _WIN32
398 return _strnicmp(s1.ptr, s2.ptr, s1.length);
399 #else
400 return strncasecmp(s1.ptr, s2.ptr, s1.length);
401 #endif
402 } else if (s1.length > s2.length) {
403 return 1;
404 } else {
405 return -1;
406 }
407 }
409 sstr_t scstrdup(scstr_t s) {
410 return sstrdup_a(ucx_default_allocator(), s);
411 }
413 sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
414 sstr_t newstring;
415 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
416 if (newstring.ptr) {
417 newstring.length = s.length;
418 newstring.ptr[newstring.length] = 0;
420 memcpy(newstring.ptr, s.ptr, s.length);
421 } else {
422 newstring.length = 0;
423 }
425 return newstring;
426 }
428 sstr_t sstrtrim(sstr_t string) {
429 sstr_t newstr = string;
431 while (newstr.length > 0 && isspace(*newstr.ptr)) {
432 newstr.ptr++;
433 newstr.length--;
434 }
435 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
436 newstr.length--;
437 }
439 return newstr;
440 }
442 int ucx_strprefix(scstr_t string, scstr_t prefix) {
443 if (string.length == 0) {
444 return prefix.length == 0;
445 }
446 if (prefix.length == 0) {
447 return 1;
448 }
450 if (prefix.length > string.length) {
451 return 0;
452 } else {
453 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
454 }
455 }
457 int ucx_strsuffix(scstr_t string, scstr_t suffix) {
458 if (string.length == 0) {
459 return suffix.length == 0;
460 }
461 if (suffix.length == 0) {
462 return 1;
463 }
465 if (suffix.length > string.length) {
466 return 0;
467 } else {
468 return memcmp(string.ptr+string.length-suffix.length,
469 suffix.ptr, suffix.length) == 0;
470 }
471 }
473 sstr_t ucx_strlower(scstr_t string) {
474 sstr_t ret = sstrdup(string);
475 for (size_t i = 0; i < ret.length ; i++) {
476 ret.ptr[i] = tolower(ret.ptr[i]);
477 }
478 return ret;
479 }
481 sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
482 sstr_t ret = sstrdup_a(allocator, string);
483 for (size_t i = 0; i < ret.length ; i++) {
484 ret.ptr[i] = tolower(ret.ptr[i]);
485 }
486 return ret;
487 }
489 sstr_t ucx_strupper(scstr_t string) {
490 sstr_t ret = sstrdup(string);
491 for (size_t i = 0; i < ret.length ; i++) {
492 ret.ptr[i] = toupper(ret.ptr[i]);
493 }
494 return ret;
495 }
497 sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
498 sstr_t ret = sstrdup_a(allocator, string);
499 for (size_t i = 0; i < ret.length ; i++) {
500 ret.ptr[i] = toupper(ret.ptr[i]);
501 }
502 return ret;
503 }
505 // private string conversion functions
506 scstr_t ucx_sc2sc(scstr_t c) {
507 return c;
508 }
509 scstr_t ucx_ss2sc(sstr_t str) {
510 scstr_t cs;
511 cs.ptr = str.ptr;
512 cs.length = str.length;
513 return cs;
514 }
515 scstr_t ucx_ss2c_s(scstr_t c) {
516 return c;
517 }