Thu, 23 Feb 2017 14:30:12 +0100
improves sstrstr function by using KMP string search algorithm
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2016 Olaf Wintermann. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 #include <stdint.h>
33 #include <ctype.h>
35 #include "string.h"
36 #include "allocator.h"
38 sstr_t sstr(char *cstring) {
39 sstr_t string;
40 string.ptr = cstring;
41 string.length = strlen(cstring);
42 return string;
43 }
45 sstr_t sstrn(char *cstring, size_t length) {
46 sstr_t string;
47 string.ptr = cstring;
48 string.length = length;
49 return string;
50 }
52 size_t sstrnlen(size_t n, sstr_t s, ...) {
53 va_list ap;
54 size_t size = s.length;
55 va_start(ap, s);
57 for (size_t i = 1 ; i < n ; i++) {
58 sstr_t str = va_arg(ap, sstr_t);
59 size += str.length;
60 }
61 va_end(ap);
63 return size;
64 }
66 static sstr_t sstrvcat_a(
67 UcxAllocator *a,
68 size_t count,
69 sstr_t s1,
70 sstr_t s2,
71 va_list ap) {
72 sstr_t str;
73 str.ptr = NULL;
74 str.length = 0;
75 if(count < 2) {
76 return str;
77 }
79 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
80 if(!strings) {
81 return str;
82 }
84 // get all args and overall length
85 strings[0] = s1;
86 strings[1] = s2;
87 size_t strlen = s1.length + s2.length;
88 for (size_t i=2;i<count;i++) {
89 sstr_t s = va_arg (ap, sstr_t);
90 strings[i] = s;
91 strlen += s.length;
92 }
94 // create new string
95 str.ptr = (char*) almalloc(a, strlen + 1);
96 str.length = strlen;
97 if(!str.ptr) {
98 free(strings);
99 str.length = 0;
100 return str;
101 }
103 // concatenate strings
104 size_t pos = 0;
105 for (size_t i=0;i<count;i++) {
106 sstr_t s = strings[i];
107 memcpy(str.ptr + pos, s.ptr, s.length);
108 pos += s.length;
109 }
111 str.ptr[str.length] = '\0';
113 free(strings);
115 return str;
116 }
118 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
119 va_list ap;
120 va_start(ap, s2);
121 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
122 va_end(ap);
123 return s;
124 }
126 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
127 va_list ap;
128 va_start(ap, s2);
129 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
130 va_end(ap);
131 return s;
132 }
134 sstr_t sstrsubs(sstr_t s, size_t start) {
135 return sstrsubsl (s, start, s.length-start);
136 }
138 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
139 sstr_t new_sstr;
140 if (start >= s.length) {
141 new_sstr.ptr = NULL;
142 new_sstr.length = 0;
143 } else {
144 if (length > s.length-start) {
145 length = s.length-start;
146 }
147 new_sstr.ptr = &s.ptr[start];
148 new_sstr.length = length;
149 }
150 return new_sstr;
151 }
153 sstr_t sstrchr(sstr_t s, int c) {
154 for(size_t i=0;i<s.length;i++) {
155 if(s.ptr[i] == c) {
156 return sstrsubs(s, i);
157 }
158 }
159 sstr_t n;
160 n.ptr = NULL;
161 n.length = 0;
162 return n;
163 }
165 sstr_t sstrrchr(sstr_t s, int c) {
166 if (s.length > 0) {
167 for(size_t i=s.length;i>0;i--) {
168 if(s.ptr[i-1] == c) {
169 return sstrsubs(s, i-1);
170 }
171 }
172 }
173 sstr_t n;
174 n.ptr = NULL;
175 n.length = 0;
176 return n;
177 }
179 typedef size_t(*prefix_table_rfun)(void*,size_t);
180 typedef void(*prefix_table_wfun)(void*,size_t,size_t);
182 static size_t s_prefix_table_r(void* table, size_t index) {
183 return (size_t) ((uint8_t*)table)[index];
184 }
185 static void s_prefix_table_w(void* table, size_t index, size_t value) {
186 ((uint8_t*)table)[index] = (uint8_t) value;
187 }
188 static size_t h_prefix_table_r(void* table, size_t index) {
189 return ((size_t*)table)[index];
190 }
191 static void h_prefix_table_w(void* table, size_t index, size_t value) {
192 ((size_t*)table)[index] = value;
193 }
195 sstr_t sstrstr(sstr_t string, sstr_t match) {
196 if (match.length == 0) {
197 return string;
198 }
200 /* prepare default return value in case of no match */
201 sstr_t result = sstrn(NULL, 0);
203 /*
204 * IMPORTANT:
205 * our prefix table contains the prefix length PLUS ONE
206 * this is our decision, because we want to use the full range of size_t
207 * the original algorithm needs a (-1) at one single place
208 * and we want to avoid that
209 */
211 /* static prefix table */
212 static uint8_t s_prefix_table[256];
214 /* check pattern length and use appropriate prefix table */
215 register void* ptable;
216 prefix_table_rfun ptable_r;
217 prefix_table_wfun ptable_w;
218 if (match.length > 255) {
219 /* pattern exceeds static prefix table, allocate on the heap */
220 ptable = calloc(match.length+1, sizeof(size_t));
221 ptable_r = h_prefix_table_r;
222 ptable_w = h_prefix_table_w;
223 } else {
224 ptable = s_prefix_table;
225 ptable_r = s_prefix_table_r;
226 ptable_w = s_prefix_table_w;
227 }
229 /* keep counter in registers */
230 register size_t i, j;
232 /* fill prefix table */
233 i = 0; j = 0;
234 ptable_w(ptable, i, j);
235 while (i < match.length) {
236 while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
237 j = ptable_r(ptable, j-i);
238 }
239 i++; j++;
240 ptable_w(ptable, i, j);
241 }
243 /* search */
244 i = 0; j = 1;
245 while (i < string.length) {
246 while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
247 j = ptable_r(ptable, j-1);
248 }
249 i++; j++;
250 if (j-1 == match.length) {
251 size_t start = i - match.length;
252 result.ptr = string.ptr + start;
253 result.length = string.length - start;
254 break;
255 }
256 }
258 /* if prefix table was allocated on the heap, free it */
259 if (ptable != s_prefix_table) {
260 free(ptable);
261 }
263 return result;
264 }
266 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
267 return sstrsplit_a(ucx_default_allocator(), s, d, n);
268 }
270 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
271 if (s.length == 0 || d.length == 0) {
272 *n = -1;
273 return NULL;
274 }
276 /* special cases: delimiter is at least as large as the string */
277 if (d.length >= s.length) {
278 /* exact match */
279 if (sstrcmp(s, d) == 0) {
280 *n = 0;
281 return NULL;
282 } else /* no match possible */ {
283 *n = 1;
284 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
285 *result = sstrdup_a(allocator, s);
286 return result;
287 }
288 }
290 ssize_t nmax = *n;
291 size_t arrlen = 16;
292 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
294 if (result) {
295 sstr_t curpos = s;
296 ssize_t j = 1;
297 while (1) {
298 sstr_t match;
299 /* optimize for one byte delimiters */
300 if (d.length == 1) {
301 match = curpos;
302 for (size_t i = 0 ; i < curpos.length ; i++) {
303 if (curpos.ptr[i] == *(d.ptr)) {
304 match.ptr = curpos.ptr + i;
305 break;
306 }
307 match.length--;
308 }
309 } else {
310 match = sstrstr(curpos, d);
311 }
312 if (match.length > 0) {
313 /* is this our last try? */
314 if (nmax == 0 || j < nmax) {
315 /* copy the current string to the array */
316 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
317 result[j-1] = sstrdup_a(allocator, item);
318 size_t processed = item.length + d.length;
319 curpos.ptr += processed;
320 curpos.length -= processed;
322 /* allocate memory for the next string */
323 j++;
324 if (j > arrlen) {
325 arrlen *= 2;
326 sstr_t* reallocated = (sstr_t*) alrealloc(
327 allocator, result, arrlen*sizeof(sstr_t));
328 if (reallocated) {
329 result = reallocated;
330 } else {
331 for (ssize_t i = 0 ; i < j-1 ; i++) {
332 alfree(allocator, result[i].ptr);
333 }
334 alfree(allocator, result);
335 *n = -2;
336 return NULL;
337 }
338 }
339 } else {
340 /* nmax reached, copy the _full_ remaining string */
341 result[j-1] = sstrdup_a(allocator, curpos);
342 break;
343 }
344 } else {
345 /* no more matches, copy last string */
346 result[j-1] = sstrdup_a(allocator, curpos);
347 break;
348 }
349 }
350 *n = j;
351 } else {
352 *n = -2;
353 }
355 return result;
356 }
358 int sstrcmp(sstr_t s1, sstr_t s2) {
359 if (s1.length == s2.length) {
360 return memcmp(s1.ptr, s2.ptr, s1.length);
361 } else if (s1.length > s2.length) {
362 return 1;
363 } else {
364 return -1;
365 }
366 }
368 int sstrcasecmp(sstr_t s1, sstr_t s2) {
369 if (s1.length == s2.length) {
370 #ifdef _WIN32
371 return _strnicmp(s1.ptr, s2.ptr, s1.length);
372 #else
373 return strncasecmp(s1.ptr, s2.ptr, s1.length);
374 #endif
375 } else if (s1.length > s2.length) {
376 return 1;
377 } else {
378 return -1;
379 }
380 }
382 sstr_t sstrdup(sstr_t s) {
383 return sstrdup_a(ucx_default_allocator(), s);
384 }
386 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
387 sstr_t newstring;
388 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
389 if (newstring.ptr) {
390 newstring.length = s.length;
391 newstring.ptr[newstring.length] = 0;
393 memcpy(newstring.ptr, s.ptr, s.length);
394 } else {
395 newstring.length = 0;
396 }
398 return newstring;
399 }
401 sstr_t sstrtrim(sstr_t string) {
402 sstr_t newstr = string;
404 while (newstr.length > 0 && isspace(*newstr.ptr)) {
405 newstr.ptr++;
406 newstr.length--;
407 }
408 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
409 newstr.length--;
410 }
412 return newstr;
413 }
415 int sstrprefix(sstr_t string, sstr_t prefix) {
416 if (string.length == 0) {
417 return prefix.length == 0;
418 }
419 if (prefix.length == 0) {
420 return 1;
421 }
423 if (prefix.length > string.length) {
424 return 0;
425 } else {
426 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
427 }
428 }
430 int sstrsuffix(sstr_t string, sstr_t suffix) {
431 if (string.length == 0) {
432 return suffix.length == 0;
433 }
434 if (suffix.length == 0) {
435 return 1;
436 }
438 if (suffix.length > string.length) {
439 return 0;
440 } else {
441 return memcmp(string.ptr+string.length-suffix.length,
442 suffix.ptr, suffix.length) == 0;
443 }
444 }
446 sstr_t sstrlower(sstr_t string) {
447 sstr_t ret = sstrdup(string);
448 for (size_t i = 0; i < ret.length ; i++) {
449 ret.ptr[i] = tolower(ret.ptr[i]);
450 }
451 return ret;
452 }
454 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
455 sstr_t ret = sstrdup_a(allocator, string);
456 for (size_t i = 0; i < ret.length ; i++) {
457 ret.ptr[i] = tolower(ret.ptr[i]);
458 }
459 return ret;
460 }
462 sstr_t sstrupper(sstr_t string) {
463 sstr_t ret = sstrdup(string);
464 for (size_t i = 0; i < ret.length ; i++) {
465 ret.ptr[i] = toupper(ret.ptr[i]);
466 }
467 return ret;
468 }
470 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
471 sstr_t ret = sstrdup_a(allocator, string);
472 for (size_t i = 0; i < ret.length ; i++) {
473 ret.ptr[i] = toupper(ret.ptr[i]);
474 }
475 return ret;
476 }