Mon, 20 Feb 2017 17:12:14 +0100
adds an optimization for one-byte delimiters to sstrsplit (will take effect after planned reimplementation of sstrstr)
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2016 Olaf Wintermann. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32 #include <ctype.h>
34 #include "string.h"
35 #include "allocator.h"
37 sstr_t sstr(char *cstring) {
38 sstr_t string;
39 string.ptr = cstring;
40 string.length = strlen(cstring);
41 return string;
42 }
44 sstr_t sstrn(char *cstring, size_t length) {
45 sstr_t string;
46 string.ptr = cstring;
47 string.length = length;
48 return string;
49 }
51 size_t sstrnlen(size_t n, sstr_t s, ...) {
52 va_list ap;
53 size_t size = s.length;
54 va_start(ap, s);
56 for (size_t i = 1 ; i < n ; i++) {
57 sstr_t str = va_arg(ap, sstr_t);
58 size += str.length;
59 }
60 va_end(ap);
62 return size;
63 }
65 static sstr_t sstrvcat_a(
66 UcxAllocator *a,
67 size_t count,
68 sstr_t s1,
69 sstr_t s2,
70 va_list ap) {
71 sstr_t str;
72 str.ptr = NULL;
73 str.length = 0;
74 if(count < 2) {
75 return str;
76 }
78 sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
79 if(!strings) {
80 return str;
81 }
83 // get all args and overall length
84 strings[0] = s1;
85 strings[1] = s2;
86 size_t strlen = s1.length + s2.length;
87 for (size_t i=2;i<count;i++) {
88 sstr_t s = va_arg (ap, sstr_t);
89 strings[i] = s;
90 strlen += s.length;
91 }
93 // create new string
94 str.ptr = (char*) almalloc(a, strlen + 1);
95 str.length = strlen;
96 if(!str.ptr) {
97 free(strings);
98 str.length = 0;
99 return str;
100 }
102 // concatenate strings
103 size_t pos = 0;
104 for (size_t i=0;i<count;i++) {
105 sstr_t s = strings[i];
106 memcpy(str.ptr + pos, s.ptr, s.length);
107 pos += s.length;
108 }
110 str.ptr[str.length] = '\0';
112 free(strings);
114 return str;
115 }
117 sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
118 va_list ap;
119 va_start(ap, s2);
120 sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
121 va_end(ap);
122 return s;
123 }
125 sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
126 va_list ap;
127 va_start(ap, s2);
128 sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
129 va_end(ap);
130 return s;
131 }
133 sstr_t sstrsubs(sstr_t s, size_t start) {
134 return sstrsubsl (s, start, s.length-start);
135 }
137 sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
138 sstr_t new_sstr;
139 if (start >= s.length) {
140 new_sstr.ptr = NULL;
141 new_sstr.length = 0;
142 } else {
143 if (length > s.length-start) {
144 length = s.length-start;
145 }
146 new_sstr.ptr = &s.ptr[start];
147 new_sstr.length = length;
148 }
149 return new_sstr;
150 }
152 sstr_t sstrchr(sstr_t s, int c) {
153 for(size_t i=0;i<s.length;i++) {
154 if(s.ptr[i] == c) {
155 return sstrsubs(s, i);
156 }
157 }
158 sstr_t n;
159 n.ptr = NULL;
160 n.length = 0;
161 return n;
162 }
164 sstr_t sstrrchr(sstr_t s, int c) {
165 if (s.length > 0) {
166 for(size_t i=s.length;i>0;i--) {
167 if(s.ptr[i-1] == c) {
168 return sstrsubs(s, i-1);
169 }
170 }
171 }
172 sstr_t n;
173 n.ptr = NULL;
174 n.length = 0;
175 return n;
176 }
178 sstr_t sstrstr(sstr_t string, sstr_t match) {
179 if (match.length == 0) {
180 return string;
181 }
183 for (size_t i = 0 ; i < string.length ; i++) {
184 sstr_t substr = sstrsubs(string, i);
185 if (sstrprefix(substr, match)) {
186 return substr;
187 }
188 }
190 sstr_t emptystr;
191 emptystr.length = 0;
192 emptystr.ptr = NULL;
193 return emptystr;
194 }
196 sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
197 return sstrsplit_a(ucx_default_allocator(), s, d, n);
198 }
200 sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
201 if (s.length == 0 || d.length == 0) {
202 *n = -1;
203 return NULL;
204 }
206 /* special cases: delimiter is at least as large as the string */
207 if (d.length >= s.length) {
208 /* exact match */
209 if (sstrcmp(s, d) == 0) {
210 *n = 0;
211 return NULL;
212 } else /* no match possible */ {
213 *n = 1;
214 sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
215 *result = sstrdup_a(allocator, s);
216 return result;
217 }
218 }
220 ssize_t nmax = *n;
221 sstr_t* result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
223 if (result) {
224 sstr_t curpos = s;
225 ssize_t j = 1;
226 while (1) {
227 sstr_t match;
228 /* optimize for one byte delimiters */
229 if (d.length == 1) {
230 match = curpos;
231 for (size_t i = 0 ; i < curpos.length ; i++) {
232 if (curpos.ptr[i] == *(d.ptr)) {
233 match.ptr = curpos.ptr + i;
234 break;
235 }
236 match.length--;
237 }
238 } else {
239 match = sstrstr(curpos, d);
240 }
241 if (match.length > 0) {
242 /* is this our last try? */
243 if (nmax == 0 || j < nmax) {
244 /* copy the current string to the array */
245 sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
246 result[j-1] = sstrdup_a(allocator, item);
247 size_t processed = item.length + d.length;
248 curpos.ptr += processed;
249 curpos.length -= processed;
251 /* allocate memory for the next string */
252 j++;
253 sstr_t* reallocated = (sstr_t*)
254 alrealloc(allocator, result, j*sizeof(sstr_t));
255 if (reallocated) {
256 result = reallocated;
257 } else {
258 for (ssize_t i = 0 ; i < j-1 ; i++) {
259 alfree(allocator, result[i].ptr);
260 }
261 alfree(allocator, result);
262 *n = -2;
263 return NULL;
264 }
265 } else {
266 /* nmax reached, copy the _full_ remaining string */
267 result[j-1] = sstrdup_a(allocator, curpos);
268 break;
269 }
270 } else {
271 /* no more matches, copy last string */
272 result[j-1] = sstrdup_a(allocator, curpos);
273 break;
274 }
275 }
276 *n = j;
277 } else {
278 *n = -2;
279 }
281 return result;
282 }
284 int sstrcmp(sstr_t s1, sstr_t s2) {
285 if (s1.length == s2.length) {
286 return memcmp(s1.ptr, s2.ptr, s1.length);
287 } else if (s1.length > s2.length) {
288 return 1;
289 } else {
290 return -1;
291 }
292 }
294 int sstrcasecmp(sstr_t s1, sstr_t s2) {
295 if (s1.length == s2.length) {
296 #ifdef _WIN32
297 return _strnicmp(s1.ptr, s2.ptr, s1.length);
298 #else
299 return strncasecmp(s1.ptr, s2.ptr, s1.length);
300 #endif
301 } else if (s1.length > s2.length) {
302 return 1;
303 } else {
304 return -1;
305 }
306 }
308 sstr_t sstrdup(sstr_t s) {
309 return sstrdup_a(ucx_default_allocator(), s);
310 }
312 sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
313 sstr_t newstring;
314 newstring.ptr = (char*)almalloc(allocator, s.length + 1);
315 if (newstring.ptr) {
316 newstring.length = s.length;
317 newstring.ptr[newstring.length] = 0;
319 memcpy(newstring.ptr, s.ptr, s.length);
320 } else {
321 newstring.length = 0;
322 }
324 return newstring;
325 }
327 sstr_t sstrtrim(sstr_t string) {
328 sstr_t newstr = string;
330 while (newstr.length > 0 && isspace(*newstr.ptr)) {
331 newstr.ptr++;
332 newstr.length--;
333 }
334 while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
335 newstr.length--;
336 }
338 return newstr;
339 }
341 int sstrprefix(sstr_t string, sstr_t prefix) {
342 if (string.length == 0) {
343 return prefix.length == 0;
344 }
345 if (prefix.length == 0) {
346 return 1;
347 }
349 if (prefix.length > string.length) {
350 return 0;
351 } else {
352 return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
353 }
354 }
356 int sstrsuffix(sstr_t string, sstr_t suffix) {
357 if (string.length == 0) {
358 return suffix.length == 0;
359 }
360 if (suffix.length == 0) {
361 return 1;
362 }
364 if (suffix.length > string.length) {
365 return 0;
366 } else {
367 return memcmp(string.ptr+string.length-suffix.length,
368 suffix.ptr, suffix.length) == 0;
369 }
370 }
372 sstr_t sstrlower(sstr_t string) {
373 sstr_t ret = sstrdup(string);
374 for (size_t i = 0; i < ret.length ; i++) {
375 ret.ptr[i] = tolower(ret.ptr[i]);
376 }
377 return ret;
378 }
380 sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
381 sstr_t ret = sstrdup_a(allocator, string);
382 for (size_t i = 0; i < ret.length ; i++) {
383 ret.ptr[i] = tolower(ret.ptr[i]);
384 }
385 return ret;
386 }
388 sstr_t sstrupper(sstr_t string) {
389 sstr_t ret = sstrdup(string);
390 for (size_t i = 0; i < ret.length ; i++) {
391 ret.ptr[i] = toupper(ret.ptr[i]);
392 }
393 return ret;
394 }
396 sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
397 sstr_t ret = sstrdup_a(allocator, string);
398 for (size_t i = 0; i < ret.length ; i++) {
399 ret.ptr[i] = toupper(ret.ptr[i]);
400 }
401 return ret;
402 }