Mon, 20 Feb 2017 17:28:58 +0100
reduces amount of realloc calls in sstrsplit
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2016 Olaf Wintermann. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 #include <ctype.h>
34 #include "string.h"
35 #include "allocator.h"
37 sstr_t sstr(char *cstring) {
38 sstr_t string;
39 string.ptr = cstring;
40 string.length = strlen(cstring);
41 return string;
42 }
44 sstr_t sstrn(char *cstring, size_t length) {
45 sstr_t string;
46 string.ptr = cstring;
47 string.length = length;
48 return string;
49 }
51 size_t sstrnlen(size_t n, sstr_t s, ...) {
52 va_list ap;
53 size_t size = s.length;
54 va_start(ap, s);
56 for (size_t i = 1 ; i < n ; i++) {
57 sstr_t str = va_arg(ap, sstr_t);
58 size += str.length;
59 }
60 va_end(ap);
62 return size;
63 }
65 static sstr_t sstrvcat_a(
66 UcxAllocator *a,
67 size_t count,
68 sstr_t s1,
69 sstr_t s2,
70 va_list ap) {
71 sstr_t str;
72 str.ptr = NULL;
73 str.length = 0;
74 if(count < 2) {
75 return str;
76 }
78 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
79 if(!strings) {
80 return str;
81 }
83 // get all args and overall length
84 strings[0] = s1;
85 strings[1] = s2;
86 size_t strlen = s1.length + s2.length;
87 for (size_t i=2;i<count;i++) {
88 sstr_t s = va_arg (ap, sstr_t);
89 strings[i] = s;
90 strlen += s.length;
91 }
93 // create new string
94 str.ptr = (char*) almalloc(a, strlen + 1);
95 str.length = strlen;
96 if(!str.ptr) {
97 free(strings);
98 str.length = 0;
99 return str;
100 }
102 // concatenate strings
103 size_t pos = 0;
104 for (size_t i=0;i<count;i++) {
105 sstr_t s = strings[i];
106 memcpy(str.ptr + pos, s.ptr, s.length);
107 pos += s.length;
108 }
110 str.ptr[str.length] = '\0';
112 free(strings);
114 return str;
115 }
117 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
118 va_list ap;
119 va_start(ap, s2);
120 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
121 va_end(ap);
122 return s;
123 }
125 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
126 va_list ap;
127 va_start(ap, s2);
128 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
129 va_end(ap);
130 return s;
131 }
133 sstr_t sstrsubs(sstr_t s, size_t start) {
134 return sstrsubsl (s, start, s.length-start);
135 }
137 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
138 sstr_t new_sstr;
139 if (start >= s.length) {
140 new_sstr.ptr = NULL;
141 new_sstr.length = 0;
142 } else {
143 if (length > s.length-start) {
144 length = s.length-start;
145 }
146 new_sstr.ptr = &s.ptr[start];
147 new_sstr.length = length;
148 }
149 return new_sstr;
150 }
152 sstr_t sstrchr(sstr_t s, int c) {
153 for(size_t i=0;i<s.length;i++) {
154 if(s.ptr[i] == c) {
155 return sstrsubs(s, i);
156 }
157 }
158 sstr_t n;
159 n.ptr = NULL;
160 n.length = 0;
161 return n;
162 }
164 sstr_t sstrrchr(sstr_t s, int c) {
165 if (s.length > 0) {
166 for(size_t i=s.length;i>0;i--) {
167 if(s.ptr[i-1] == c) {
168 return sstrsubs(s, i-1);
169 }
170 }
171 }
172 sstr_t n;
173 n.ptr = NULL;
174 n.length = 0;
175 return n;
176 }
178 sstr_t sstrstr(sstr_t string, sstr_t match) {
179 if (match.length == 0) {
180 return string;
181 }
183 for (size_t i = 0 ; i < string.length ; i++) {
184 sstr_t substr = sstrsubs(string, i);
185 if (sstrprefix(substr, match)) {
186 return substr;
187 }
188 }
190 sstr_t emptystr;
191 emptystr.length = 0;
192 emptystr.ptr = NULL;
193 return emptystr;
194 }
196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
197 return sstrsplit_a(ucx_default_allocator(), s, d, n);
198 }
200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
201 if (s.length == 0 || d.length == 0) {
202 *n = -1;
203 return NULL;
204 }
206 /* special cases: delimiter is at least as large as the string */
207 if (d.length >= s.length) {
208 /* exact match */
209 if (sstrcmp(s, d) == 0) {
210 *n = 0;
211 return NULL;
212 } else /* no match possible */ {
213 *n = 1;
214 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
215 *result = sstrdup_a(allocator, s);
216 return result;
217 }
218 }
220 ssize_t nmax = *n;
221 size_t arrlen = 16;
222 sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
224 if (result) {
225 sstr_t curpos = s;
226 ssize_t j = 1;
227 while (1) {
228 sstr_t match;
229 /* optimize for one byte delimiters */
230 if (d.length == 1) {
231 match = curpos;
232 for (size_t i = 0 ; i < curpos.length ; i++) {
233 if (curpos.ptr[i] == *(d.ptr)) {
234 match.ptr = curpos.ptr + i;
235 break;
236 }
237 match.length--;
238 }
239 } else {
240 match = sstrstr(curpos, d);
241 }
242 if (match.length > 0) {
243 /* is this our last try? */
244 if (nmax == 0 || j < nmax) {
245 /* copy the current string to the array */
246 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
247 result[j-1] = sstrdup_a(allocator, item);
248 size_t processed = item.length + d.length;
249 curpos.ptr += processed;
250 curpos.length -= processed;
252 /* allocate memory for the next string */
253 j++;
254 if (j > arrlen) {
255 arrlen *= 2;
256 sstr_t* reallocated = (sstr_t*) alrealloc(
257 allocator, result, arrlen*sizeof(sstr_t));
258 if (reallocated) {
259 result = reallocated;
260 } else {
261 for (ssize_t i = 0 ; i < j-1 ; i++) {
262 alfree(allocator, result[i].ptr);
263 }
264 alfree(allocator, result);
265 *n = -2;
266 return NULL;
267 }
268 }
269 } else {
270 /* nmax reached, copy the _full_ remaining string */
271 result[j-1] = sstrdup_a(allocator, curpos);
272 break;
273 }
274 } else {
275 /* no more matches, copy last string */
276 result[j-1] = sstrdup_a(allocator, curpos);
277 break;
278 }
279 }
280 *n = j;
281 } else {
282 *n = -2;
283 }
285 return result;
286 }
288 int sstrcmp(sstr_t s1, sstr_t s2) {
289 if (s1.length == s2.length) {
290 return memcmp(s1.ptr, s2.ptr, s1.length);
291 } else if (s1.length > s2.length) {
292 return 1;
293 } else {
294 return -1;
295 }
296 }
298 int sstrcasecmp(sstr_t s1, sstr_t s2) {
299 if (s1.length == s2.length) {
300 #ifdef _WIN32
301 return _strnicmp(s1.ptr, s2.ptr, s1.length);
302 #else
303 return strncasecmp(s1.ptr, s2.ptr, s1.length);
304 #endif
305 } else if (s1.length > s2.length) {
306 return 1;
307 } else {
308 return -1;
309 }
310 }
312 sstr_t sstrdup(sstr_t s) {
313 return sstrdup_a(ucx_default_allocator(), s);
314 }
316 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
317 sstr_t newstring;
318 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
319 if (newstring.ptr) {
320 newstring.length = s.length;
321 newstring.ptr[newstring.length] = 0;
323 memcpy(newstring.ptr, s.ptr, s.length);
324 } else {
325 newstring.length = 0;
326 }
328 return newstring;
329 }
331 sstr_t sstrtrim(sstr_t string) {
332 sstr_t newstr = string;
334 while (newstr.length > 0 && isspace(*newstr.ptr)) {
335 newstr.ptr++;
336 newstr.length--;
337 }
338 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
339 newstr.length--;
340 }
342 return newstr;
343 }
345 int sstrprefix(sstr_t string, sstr_t prefix) {
346 if (string.length == 0) {
347 return prefix.length == 0;
348 }
349 if (prefix.length == 0) {
350 return 1;
351 }
353 if (prefix.length > string.length) {
354 return 0;
355 } else {
356 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
357 }
358 }
360 int sstrsuffix(sstr_t string, sstr_t suffix) {
361 if (string.length == 0) {
362 return suffix.length == 0;
363 }
364 if (suffix.length == 0) {
365 return 1;
366 }
368 if (suffix.length > string.length) {
369 return 0;
370 } else {
371 return memcmp(string.ptr+string.length-suffix.length,
372 suffix.ptr, suffix.length) == 0;
373 }
374 }
376 sstr_t sstrlower(sstr_t string) {
377 sstr_t ret = sstrdup(string);
378 for (size_t i = 0; i < ret.length ; i++) {
379 ret.ptr[i] = tolower(ret.ptr[i]);
380 }
381 return ret;
382 }
384 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
385 sstr_t ret = sstrdup_a(allocator, string);
386 for (size_t i = 0; i < ret.length ; i++) {
387 ret.ptr[i] = tolower(ret.ptr[i]);
388 }
389 return ret;
390 }
392 sstr_t sstrupper(sstr_t string) {
393 sstr_t ret = sstrdup(string);
394 for (size_t i = 0; i < ret.length ; i++) {
395 ret.ptr[i] = toupper(ret.ptr[i]);
396 }
397 return ret;
398 }
400 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
401 sstr_t ret = sstrdup_a(allocator, string);
402 for (size_t i = 0; i < ret.length ; i++) {
403 ret.ptr[i] = toupper(ret.ptr[i]);
404 }
405 return ret;
406 }