src/string.c

changeset 251
fae240d633fc
parent 250
b7d1317b138e
child 259
2f5dea574a75
equal deleted inserted replaced
250:b7d1317b138e 251:fae240d633fc
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2017 Olaf Wintermann. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "ucx/string.h"
30
31 #include "ucx/allocator.h"
32
33 #include <stdlib.h>
34 #include <string.h>
35 #include <stdarg.h>
36 #include <stdint.h>
37 #include <ctype.h>
38
39 sstr_t sstr(char *cstring) {
40 sstr_t string;
41 string.ptr = cstring;
42 string.length = strlen(cstring);
43 return string;
44 }
45
46 sstr_t sstrn(char *cstring, size_t length) {
47 sstr_t string;
48 string.ptr = cstring;
49 string.length = length;
50 return string;
51 }
52
53 size_t sstrnlen(size_t n, sstr_t s, ...) {
54 va_list ap;
55 size_t size = s.length;
56 va_start(ap, s);
57
58 for (size_t i = 1 ; i < n ; i++) {
59 sstr_t str = va_arg(ap, sstr_t);
60 size += str.length;
61 }
62 va_end(ap);
63
64 return size;
65 }
66
67 static sstr_t sstrvcat_a(
68 UcxAllocator *a,
69 size_t count,
70 sstr_t s1,
71 sstr_t s2,
72 va_list ap) {
73 sstr_t str;
74 str.ptr = NULL;
75 str.length = 0;
76 if(count < 2) {
77 return str;
78 }
79
80 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
81 if(!strings) {
82 return str;
83 }
84
85 // get all args and overall length
86 strings[0] = s1;
87 strings[1] = s2;
88 size_t strlen = s1.length + s2.length;
89 for (size_t i=2;i<count;i++) {
90 sstr_t s = va_arg (ap, sstr_t);
91 strings[i] = s;
92 strlen += s.length;
93 }
94
95 // create new string
96 str.ptr = (char*) almalloc(a, strlen + 1);
97 str.length = strlen;
98 if(!str.ptr) {
99 free(strings);
100 str.length = 0;
101 return str;
102 }
103
104 // concatenate strings
105 size_t pos = 0;
106 for (size_t i=0;i<count;i++) {
107 sstr_t s = strings[i];
108 memcpy(str.ptr + pos, s.ptr, s.length);
109 pos += s.length;
110 }
111
112 str.ptr[str.length] = '\0';
113
114 free(strings);
115
116 return str;
117 }
118
119 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
120 va_list ap;
121 va_start(ap, s2);
122 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
123 va_end(ap);
124 return s;
125 }
126
127 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
128 va_list ap;
129 va_start(ap, s2);
130 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
131 va_end(ap);
132 return s;
133 }
134
135 sstr_t sstrsubs(sstr_t s, size_t start) {
136 return sstrsubsl (s, start, s.length-start);
137 }
138
139 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
140 sstr_t new_sstr;
141 if (start >= s.length) {
142 new_sstr.ptr = NULL;
143 new_sstr.length = 0;
144 } else {
145 if (length > s.length-start) {
146 length = s.length-start;
147 }
148 new_sstr.ptr = &s.ptr[start];
149 new_sstr.length = length;
150 }
151 return new_sstr;
152 }
153
154 sstr_t sstrchr(sstr_t s, int c) {
155 for(size_t i=0;i<s.length;i++) {
156 if(s.ptr[i] == c) {
157 return sstrsubs(s, i);
158 }
159 }
160 sstr_t n;
161 n.ptr = NULL;
162 n.length = 0;
163 return n;
164 }
165
166 sstr_t sstrrchr(sstr_t s, int c) {
167 if (s.length > 0) {
168 for(size_t i=s.length;i>0;i--) {
169 if(s.ptr[i-1] == c) {
170 return sstrsubs(s, i-1);
171 }
172 }
173 }
174 sstr_t n;
175 n.ptr = NULL;
176 n.length = 0;
177 return n;
178 }
179
180 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
181 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
182
183 #define ptable_w(useheap, ptable, index, src) do {\
184 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
185 else ((size_t*)ptable)[index] = src;\
186 } while (0);
187
188 sstr_t sstrstr(sstr_t string, sstr_t match) {
189 if (match.length == 0) {
190 return string;
191 }
192
193 /* prepare default return value in case of no match */
194 sstr_t result = sstrn(NULL, 0);
195
196 /*
197 * IMPORTANT:
198 * our prefix table contains the prefix length PLUS ONE
199 * this is our decision, because we want to use the full range of size_t
200 * the original algorithm needs a (-1) at one single place
201 * and we want to avoid that
202 */
203
204 /* static prefix table */
205 static uint8_t s_prefix_table[256];
206
207 /* check pattern length and use appropriate prefix table */
208 /* if the pattern exceeds static prefix table, allocate on the heap */
209 register int useheap = match.length > 255;
210 register void* ptable = useheap ?
211 calloc(match.length+1, sizeof(size_t)): s_prefix_table;
212
213 /* keep counter in registers */
214 register size_t i, j;
215
216 /* fill prefix table */
217 i = 0; j = 0;
218 ptable_w(useheap, ptable, i, j);
219 while (i < match.length) {
220 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
221 ptable_r(j, useheap, ptable, j-1);
222 }
223 i++; j++;
224 ptable_w(useheap, ptable, i, j);
225 }
226
227 /* search */
228 i = 0; j = 1;
229 while (i < string.length) {
230 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
231 ptable_r(j, useheap, ptable, j-1);
232 }
233 i++; j++;
234 if (j-1 == match.length) {
235 size_t start = i - match.length;
236 result.ptr = string.ptr + start;
237 result.length = string.length - start;
238 break;
239 }
240 }
241
242 /* if prefix table was allocated on the heap, free it */
243 if (ptable != s_prefix_table) {
244 free(ptable);
245 }
246
247 return result;
248 }
249
250 #undef ptable_r
251 #undef ptable_w
252
253 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
254 return sstrsplit_a(ucx_default_allocator(), s, d, n);
255 }
256
257 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
258 if (s.length == 0 || d.length == 0) {
259 *n = -1;
260 return NULL;
261 }
262
263 /* special cases: delimiter is at least as large as the string */
264 if (d.length >= s.length) {
265 /* exact match */
266 if (sstrcmp(s, d) == 0) {
267 *n = 0;
268 return NULL;
269 } else /* no match possible */ {
270 *n = 1;
271 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
272 *result = sstrdup_a(allocator, s);
273 return result;
274 }
275 }
276
277 ssize_t nmax = *n;
278 size_t arrlen = 16;
279 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
280
281 if (result) {
282 sstr_t curpos = s;
283 ssize_t j = 1;
284 while (1) {
285 sstr_t match;
286 /* optimize for one byte delimiters */
287 if (d.length == 1) {
288 match = curpos;
289 for (size_t i = 0 ; i < curpos.length ; i++) {
290 if (curpos.ptr[i] == *(d.ptr)) {
291 match.ptr = curpos.ptr + i;
292 break;
293 }
294 match.length--;
295 }
296 } else {
297 match = sstrstr(curpos, d);
298 }
299 if (match.length > 0) {
300 /* is this our last try? */
301 if (nmax == 0 || j < nmax) {
302 /* copy the current string to the array */
303 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
304 result[j-1] = sstrdup_a(allocator, item);
305 size_t processed = item.length + d.length;
306 curpos.ptr += processed;
307 curpos.length -= processed;
308
309 /* allocate memory for the next string */
310 j++;
311 if (j > arrlen) {
312 arrlen *= 2;
313 sstr_t* reallocated = (sstr_t*) alrealloc(
314 allocator, result, arrlen*sizeof(sstr_t));
315 if (reallocated) {
316 result = reallocated;
317 } else {
318 for (ssize_t i = 0 ; i < j-1 ; i++) {
319 alfree(allocator, result[i].ptr);
320 }
321 alfree(allocator, result);
322 *n = -2;
323 return NULL;
324 }
325 }
326 } else {
327 /* nmax reached, copy the _full_ remaining string */
328 result[j-1] = sstrdup_a(allocator, curpos);
329 break;
330 }
331 } else {
332 /* no more matches, copy last string */
333 result[j-1] = sstrdup_a(allocator, curpos);
334 break;
335 }
336 }
337 *n = j;
338 } else {
339 *n = -2;
340 }
341
342 return result;
343 }
344
345 int sstrcmp(sstr_t s1, sstr_t s2) {
346 if (s1.length == s2.length) {
347 return memcmp(s1.ptr, s2.ptr, s1.length);
348 } else if (s1.length > s2.length) {
349 return 1;
350 } else {
351 return -1;
352 }
353 }
354
355 int sstrcasecmp(sstr_t s1, sstr_t s2) {
356 if (s1.length == s2.length) {
357 #ifdef _WIN32
358 return _strnicmp(s1.ptr, s2.ptr, s1.length);
359 #else
360 return strncasecmp(s1.ptr, s2.ptr, s1.length);
361 #endif
362 } else if (s1.length > s2.length) {
363 return 1;
364 } else {
365 return -1;
366 }
367 }
368
369 sstr_t sstrdup(sstr_t s) {
370 return sstrdup_a(ucx_default_allocator(), s);
371 }
372
373 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
374 sstr_t newstring;
375 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
376 if (newstring.ptr) {
377 newstring.length = s.length;
378 newstring.ptr[newstring.length] = 0;
379
380 memcpy(newstring.ptr, s.ptr, s.length);
381 } else {
382 newstring.length = 0;
383 }
384
385 return newstring;
386 }
387
388 sstr_t sstrtrim(sstr_t string) {
389 sstr_t newstr = string;
390
391 while (newstr.length > 0 && isspace(*newstr.ptr)) {
392 newstr.ptr++;
393 newstr.length--;
394 }
395 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
396 newstr.length--;
397 }
398
399 return newstr;
400 }
401
402 int sstrprefix(sstr_t string, sstr_t prefix) {
403 if (string.length == 0) {
404 return prefix.length == 0;
405 }
406 if (prefix.length == 0) {
407 return 1;
408 }
409
410 if (prefix.length > string.length) {
411 return 0;
412 } else {
413 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
414 }
415 }
416
417 int sstrsuffix(sstr_t string, sstr_t suffix) {
418 if (string.length == 0) {
419 return suffix.length == 0;
420 }
421 if (suffix.length == 0) {
422 return 1;
423 }
424
425 if (suffix.length > string.length) {
426 return 0;
427 } else {
428 return memcmp(string.ptr+string.length-suffix.length,
429 suffix.ptr, suffix.length) == 0;
430 }
431 }
432
433 sstr_t sstrlower(sstr_t string) {
434 sstr_t ret = sstrdup(string);
435 for (size_t i = 0; i < ret.length ; i++) {
436 ret.ptr[i] = tolower(ret.ptr[i]);
437 }
438 return ret;
439 }
440
441 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
442 sstr_t ret = sstrdup_a(allocator, string);
443 for (size_t i = 0; i < ret.length ; i++) {
444 ret.ptr[i] = tolower(ret.ptr[i]);
445 }
446 return ret;
447 }
448
449 sstr_t sstrupper(sstr_t string) {
450 sstr_t ret = sstrdup(string);
451 for (size_t i = 0; i < ret.length ; i++) {
452 ret.ptr[i] = toupper(ret.ptr[i]);
453 }
454 return ret;
455 }
456
457 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
458 sstr_t ret = sstrdup_a(allocator, string);
459 for (size_t i = 0; i < ret.length ; i++) {
460 ret.ptr[i] = toupper(ret.ptr[i]);
461 }
462 return ret;
463 }

mercurial