|
1 /* |
|
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
|
3 * |
|
4 * Copyright 2017 Olaf Wintermann. All rights reserved. |
|
5 * |
|
6 * Redistribution and use in source and binary forms, with or without |
|
7 * modification, are permitted provided that the following conditions are met: |
|
8 * |
|
9 * 1. Redistributions of source code must retain the above copyright |
|
10 * notice, this list of conditions and the following disclaimer. |
|
11 * |
|
12 * 2. Redistributions in binary form must reproduce the above copyright |
|
13 * notice, this list of conditions and the following disclaimer in the |
|
14 * documentation and/or other materials provided with the distribution. |
|
15 * |
|
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
|
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
26 * POSSIBILITY OF SUCH DAMAGE. |
|
27 */ |
|
28 |
|
29 #include "ucx/string.h" |
|
30 |
|
31 #include "ucx/allocator.h" |
|
32 |
|
33 #include <stdlib.h> |
|
34 #include <string.h> |
|
35 #include <stdarg.h> |
|
36 #include <stdint.h> |
|
37 #include <ctype.h> |
|
38 |
|
39 sstr_t sstr(char *cstring) { |
|
40 sstr_t string; |
|
41 string.ptr = cstring; |
|
42 string.length = strlen(cstring); |
|
43 return string; |
|
44 } |
|
45 |
|
46 sstr_t sstrn(char *cstring, size_t length) { |
|
47 sstr_t string; |
|
48 string.ptr = cstring; |
|
49 string.length = length; |
|
50 return string; |
|
51 } |
|
52 |
|
53 size_t sstrnlen(size_t n, sstr_t s, ...) { |
|
54 va_list ap; |
|
55 size_t size = s.length; |
|
56 va_start(ap, s); |
|
57 |
|
58 for (size_t i = 1 ; i < n ; i++) { |
|
59 sstr_t str = va_arg(ap, sstr_t); |
|
60 size += str.length; |
|
61 } |
|
62 va_end(ap); |
|
63 |
|
64 return size; |
|
65 } |
|
66 |
|
67 static sstr_t sstrvcat_a( |
|
68 UcxAllocator *a, |
|
69 size_t count, |
|
70 sstr_t s1, |
|
71 sstr_t s2, |
|
72 va_list ap) { |
|
73 sstr_t str; |
|
74 str.ptr = NULL; |
|
75 str.length = 0; |
|
76 if(count < 2) { |
|
77 return str; |
|
78 } |
|
79 |
|
80 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t)); |
|
81 if(!strings) { |
|
82 return str; |
|
83 } |
|
84 |
|
85 // get all args and overall length |
|
86 strings[0] = s1; |
|
87 strings[1] = s2; |
|
88 size_t strlen = s1.length + s2.length; |
|
89 for (size_t i=2;i<count;i++) { |
|
90 sstr_t s = va_arg (ap, sstr_t); |
|
91 strings[i] = s; |
|
92 strlen += s.length; |
|
93 } |
|
94 |
|
95 // create new string |
|
96 str.ptr = (char*) almalloc(a, strlen + 1); |
|
97 str.length = strlen; |
|
98 if(!str.ptr) { |
|
99 free(strings); |
|
100 str.length = 0; |
|
101 return str; |
|
102 } |
|
103 |
|
104 // concatenate strings |
|
105 size_t pos = 0; |
|
106 for (size_t i=0;i<count;i++) { |
|
107 sstr_t s = strings[i]; |
|
108 memcpy(str.ptr + pos, s.ptr, s.length); |
|
109 pos += s.length; |
|
110 } |
|
111 |
|
112 str.ptr[str.length] = '\0'; |
|
113 |
|
114 free(strings); |
|
115 |
|
116 return str; |
|
117 } |
|
118 |
|
119 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) { |
|
120 va_list ap; |
|
121 va_start(ap, s2); |
|
122 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap); |
|
123 va_end(ap); |
|
124 return s; |
|
125 } |
|
126 |
|
127 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) { |
|
128 va_list ap; |
|
129 va_start(ap, s2); |
|
130 sstr_t s = sstrvcat_a(a, count, s1, s2, ap); |
|
131 va_end(ap); |
|
132 return s; |
|
133 } |
|
134 |
|
135 sstr_t sstrsubs(sstr_t s, size_t start) { |
|
136 return sstrsubsl (s, start, s.length-start); |
|
137 } |
|
138 |
|
139 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { |
|
140 sstr_t new_sstr; |
|
141 if (start >= s.length) { |
|
142 new_sstr.ptr = NULL; |
|
143 new_sstr.length = 0; |
|
144 } else { |
|
145 if (length > s.length-start) { |
|
146 length = s.length-start; |
|
147 } |
|
148 new_sstr.ptr = &s.ptr[start]; |
|
149 new_sstr.length = length; |
|
150 } |
|
151 return new_sstr; |
|
152 } |
|
153 |
|
154 sstr_t sstrchr(sstr_t s, int c) { |
|
155 for(size_t i=0;i<s.length;i++) { |
|
156 if(s.ptr[i] == c) { |
|
157 return sstrsubs(s, i); |
|
158 } |
|
159 } |
|
160 sstr_t n; |
|
161 n.ptr = NULL; |
|
162 n.length = 0; |
|
163 return n; |
|
164 } |
|
165 |
|
166 sstr_t sstrrchr(sstr_t s, int c) { |
|
167 if (s.length > 0) { |
|
168 for(size_t i=s.length;i>0;i--) { |
|
169 if(s.ptr[i-1] == c) { |
|
170 return sstrsubs(s, i-1); |
|
171 } |
|
172 } |
|
173 } |
|
174 sstr_t n; |
|
175 n.ptr = NULL; |
|
176 n.length = 0; |
|
177 return n; |
|
178 } |
|
179 |
|
180 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ |
|
181 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) |
|
182 |
|
183 #define ptable_w(useheap, ptable, index, src) do {\ |
|
184 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ |
|
185 else ((size_t*)ptable)[index] = src;\ |
|
186 } while (0); |
|
187 |
|
188 sstr_t sstrstr(sstr_t string, sstr_t match) { |
|
189 if (match.length == 0) { |
|
190 return string; |
|
191 } |
|
192 |
|
193 /* prepare default return value in case of no match */ |
|
194 sstr_t result = sstrn(NULL, 0); |
|
195 |
|
196 /* |
|
197 * IMPORTANT: |
|
198 * our prefix table contains the prefix length PLUS ONE |
|
199 * this is our decision, because we want to use the full range of size_t |
|
200 * the original algorithm needs a (-1) at one single place |
|
201 * and we want to avoid that |
|
202 */ |
|
203 |
|
204 /* static prefix table */ |
|
205 static uint8_t s_prefix_table[256]; |
|
206 |
|
207 /* check pattern length and use appropriate prefix table */ |
|
208 /* if the pattern exceeds static prefix table, allocate on the heap */ |
|
209 register int useheap = match.length > 255; |
|
210 register void* ptable = useheap ? |
|
211 calloc(match.length+1, sizeof(size_t)): s_prefix_table; |
|
212 |
|
213 /* keep counter in registers */ |
|
214 register size_t i, j; |
|
215 |
|
216 /* fill prefix table */ |
|
217 i = 0; j = 0; |
|
218 ptable_w(useheap, ptable, i, j); |
|
219 while (i < match.length) { |
|
220 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) { |
|
221 ptable_r(j, useheap, ptable, j-1); |
|
222 } |
|
223 i++; j++; |
|
224 ptable_w(useheap, ptable, i, j); |
|
225 } |
|
226 |
|
227 /* search */ |
|
228 i = 0; j = 1; |
|
229 while (i < string.length) { |
|
230 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) { |
|
231 ptable_r(j, useheap, ptable, j-1); |
|
232 } |
|
233 i++; j++; |
|
234 if (j-1 == match.length) { |
|
235 size_t start = i - match.length; |
|
236 result.ptr = string.ptr + start; |
|
237 result.length = string.length - start; |
|
238 break; |
|
239 } |
|
240 } |
|
241 |
|
242 /* if prefix table was allocated on the heap, free it */ |
|
243 if (ptable != s_prefix_table) { |
|
244 free(ptable); |
|
245 } |
|
246 |
|
247 return result; |
|
248 } |
|
249 |
|
250 #undef ptable_r |
|
251 #undef ptable_w |
|
252 |
|
253 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) { |
|
254 return sstrsplit_a(ucx_default_allocator(), s, d, n); |
|
255 } |
|
256 |
|
257 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) { |
|
258 if (s.length == 0 || d.length == 0) { |
|
259 *n = -1; |
|
260 return NULL; |
|
261 } |
|
262 |
|
263 /* special cases: delimiter is at least as large as the string */ |
|
264 if (d.length >= s.length) { |
|
265 /* exact match */ |
|
266 if (sstrcmp(s, d) == 0) { |
|
267 *n = 0; |
|
268 return NULL; |
|
269 } else /* no match possible */ { |
|
270 *n = 1; |
|
271 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); |
|
272 *result = sstrdup_a(allocator, s); |
|
273 return result; |
|
274 } |
|
275 } |
|
276 |
|
277 ssize_t nmax = *n; |
|
278 size_t arrlen = 16; |
|
279 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t)); |
|
280 |
|
281 if (result) { |
|
282 sstr_t curpos = s; |
|
283 ssize_t j = 1; |
|
284 while (1) { |
|
285 sstr_t match; |
|
286 /* optimize for one byte delimiters */ |
|
287 if (d.length == 1) { |
|
288 match = curpos; |
|
289 for (size_t i = 0 ; i < curpos.length ; i++) { |
|
290 if (curpos.ptr[i] == *(d.ptr)) { |
|
291 match.ptr = curpos.ptr + i; |
|
292 break; |
|
293 } |
|
294 match.length--; |
|
295 } |
|
296 } else { |
|
297 match = sstrstr(curpos, d); |
|
298 } |
|
299 if (match.length > 0) { |
|
300 /* is this our last try? */ |
|
301 if (nmax == 0 || j < nmax) { |
|
302 /* copy the current string to the array */ |
|
303 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr); |
|
304 result[j-1] = sstrdup_a(allocator, item); |
|
305 size_t processed = item.length + d.length; |
|
306 curpos.ptr += processed; |
|
307 curpos.length -= processed; |
|
308 |
|
309 /* allocate memory for the next string */ |
|
310 j++; |
|
311 if (j > arrlen) { |
|
312 arrlen *= 2; |
|
313 sstr_t* reallocated = (sstr_t*) alrealloc( |
|
314 allocator, result, arrlen*sizeof(sstr_t)); |
|
315 if (reallocated) { |
|
316 result = reallocated; |
|
317 } else { |
|
318 for (ssize_t i = 0 ; i < j-1 ; i++) { |
|
319 alfree(allocator, result[i].ptr); |
|
320 } |
|
321 alfree(allocator, result); |
|
322 *n = -2; |
|
323 return NULL; |
|
324 } |
|
325 } |
|
326 } else { |
|
327 /* nmax reached, copy the _full_ remaining string */ |
|
328 result[j-1] = sstrdup_a(allocator, curpos); |
|
329 break; |
|
330 } |
|
331 } else { |
|
332 /* no more matches, copy last string */ |
|
333 result[j-1] = sstrdup_a(allocator, curpos); |
|
334 break; |
|
335 } |
|
336 } |
|
337 *n = j; |
|
338 } else { |
|
339 *n = -2; |
|
340 } |
|
341 |
|
342 return result; |
|
343 } |
|
344 |
|
345 int sstrcmp(sstr_t s1, sstr_t s2) { |
|
346 if (s1.length == s2.length) { |
|
347 return memcmp(s1.ptr, s2.ptr, s1.length); |
|
348 } else if (s1.length > s2.length) { |
|
349 return 1; |
|
350 } else { |
|
351 return -1; |
|
352 } |
|
353 } |
|
354 |
|
355 int sstrcasecmp(sstr_t s1, sstr_t s2) { |
|
356 if (s1.length == s2.length) { |
|
357 #ifdef _WIN32 |
|
358 return _strnicmp(s1.ptr, s2.ptr, s1.length); |
|
359 #else |
|
360 return strncasecmp(s1.ptr, s2.ptr, s1.length); |
|
361 #endif |
|
362 } else if (s1.length > s2.length) { |
|
363 return 1; |
|
364 } else { |
|
365 return -1; |
|
366 } |
|
367 } |
|
368 |
|
369 sstr_t sstrdup(sstr_t s) { |
|
370 return sstrdup_a(ucx_default_allocator(), s); |
|
371 } |
|
372 |
|
373 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) { |
|
374 sstr_t newstring; |
|
375 newstring.ptr = (char*)almalloc(allocator, s.length + 1); |
|
376 if (newstring.ptr) { |
|
377 newstring.length = s.length; |
|
378 newstring.ptr[newstring.length] = 0; |
|
379 |
|
380 memcpy(newstring.ptr, s.ptr, s.length); |
|
381 } else { |
|
382 newstring.length = 0; |
|
383 } |
|
384 |
|
385 return newstring; |
|
386 } |
|
387 |
|
388 sstr_t sstrtrim(sstr_t string) { |
|
389 sstr_t newstr = string; |
|
390 |
|
391 while (newstr.length > 0 && isspace(*newstr.ptr)) { |
|
392 newstr.ptr++; |
|
393 newstr.length--; |
|
394 } |
|
395 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) { |
|
396 newstr.length--; |
|
397 } |
|
398 |
|
399 return newstr; |
|
400 } |
|
401 |
|
402 int sstrprefix(sstr_t string, sstr_t prefix) { |
|
403 if (string.length == 0) { |
|
404 return prefix.length == 0; |
|
405 } |
|
406 if (prefix.length == 0) { |
|
407 return 1; |
|
408 } |
|
409 |
|
410 if (prefix.length > string.length) { |
|
411 return 0; |
|
412 } else { |
|
413 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; |
|
414 } |
|
415 } |
|
416 |
|
417 int sstrsuffix(sstr_t string, sstr_t suffix) { |
|
418 if (string.length == 0) { |
|
419 return suffix.length == 0; |
|
420 } |
|
421 if (suffix.length == 0) { |
|
422 return 1; |
|
423 } |
|
424 |
|
425 if (suffix.length > string.length) { |
|
426 return 0; |
|
427 } else { |
|
428 return memcmp(string.ptr+string.length-suffix.length, |
|
429 suffix.ptr, suffix.length) == 0; |
|
430 } |
|
431 } |
|
432 |
|
433 sstr_t sstrlower(sstr_t string) { |
|
434 sstr_t ret = sstrdup(string); |
|
435 for (size_t i = 0; i < ret.length ; i++) { |
|
436 ret.ptr[i] = tolower(ret.ptr[i]); |
|
437 } |
|
438 return ret; |
|
439 } |
|
440 |
|
441 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) { |
|
442 sstr_t ret = sstrdup_a(allocator, string); |
|
443 for (size_t i = 0; i < ret.length ; i++) { |
|
444 ret.ptr[i] = tolower(ret.ptr[i]); |
|
445 } |
|
446 return ret; |
|
447 } |
|
448 |
|
449 sstr_t sstrupper(sstr_t string) { |
|
450 sstr_t ret = sstrdup(string); |
|
451 for (size_t i = 0; i < ret.length ; i++) { |
|
452 ret.ptr[i] = toupper(ret.ptr[i]); |
|
453 } |
|
454 return ret; |
|
455 } |
|
456 |
|
457 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) { |
|
458 sstr_t ret = sstrdup_a(allocator, string); |
|
459 for (size_t i = 0; i < ret.length ; i++) { |
|
460 ret.ptr[i] = toupper(ret.ptr[i]); |
|
461 } |
|
462 return ret; |
|
463 } |