Wed, 31 Aug 2022 23:12:05 +0200
more implementations of string functions
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2021 Mike Becker, Olaf Wintermann All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include "cx/string.h"
30 #include "cx/utils.h"
32 #include <string.h>
33 #include <stdarg.h>
34 #include <stdint.h>
36 cxmutstr cx_mutstr(char *cstring) {
37 return (cxmutstr) {cstring, strlen(cstring)};
38 }
40 cxmutstr cx_mutstrn(
41 char *cstring,
42 size_t length
43 ) {
44 return (cxmutstr) {cstring, length};
45 }
47 cxstring cx_str(const char *cstring) {
48 return (cxstring) {cstring, strlen(cstring)};
49 }
51 cxstring cx_strn(
52 const char *cstring,
53 size_t length
54 ) {
55 return (cxstring) {cstring, length};
56 }
58 cxstring cx_strcast(cxmutstr str) {
59 return (cxstring) {str.ptr, str.length};
60 }
62 void cx_strfree(cxmutstr *str) {
63 free(str->ptr);
64 str->ptr = NULL;
65 str->length = 0;
66 }
68 size_t cx_strlen(
69 size_t count,
70 ...
71 ) {
72 if (count == 0) return 0;
74 va_list ap;
75 va_start(ap, count);
76 size_t size = 0;
77 cx_for_n(i, count) {
78 cxstring str = va_arg(ap, cxstring);
79 size += str.length;
80 }
81 va_end(ap);
83 return size;
84 }
86 cxmutstr cx_strcat_a(
87 CxAllocator *alloc,
88 size_t count,
89 ...
90 ) {
91 cxstring *strings = calloc(count, sizeof(cxstring));
92 if (!strings) abort();
94 va_list ap;
95 va_start(ap, count);
97 // get all args and overall length
98 size_t slen = 0;
99 cx_for_n(i, count) {
100 cxstring s = va_arg (ap, cxstring);
101 strings[i] = s;
102 slen += s.length;
103 }
105 // create new string
106 cxmutstr result;
107 result.ptr = cxMalloc(alloc, slen + 1);
108 result.length = slen;
109 if (result.ptr == NULL) abort();
111 // concatenate strings
112 size_t pos = 0;
113 cx_for_n(i, count) {
114 cxstring s = strings[i];
115 memcpy(result.ptr + pos, s.ptr, s.length);
116 pos += s.length;
117 }
119 // terminate string
120 result.ptr[result.length] = '\0';
122 // free temporary array
123 free(strings);
125 return result;
126 }
128 cxstring cx_strsubs(
129 cxstring string,
130 size_t start
131 ) {
132 return cx_strsubsl(string, start, string.length - start);
133 }
135 cxmutstr cx_strsubs_m(
136 cxmutstr string,
137 size_t start
138 ) {
139 return cx_strsubsl_m(string, start, string.length - start);
140 }
142 cxstring cx_strsubsl(
143 cxstring string,
144 size_t start,
145 size_t length
146 ) {
147 if (start > string.length) {
148 return (cxstring) {NULL, 0};
149 }
151 size_t rem_len = string.length - start;
152 if (length > rem_len) {
153 length = rem_len;
154 }
156 return (cxstring) {string.ptr + start, length};
157 }
159 cxmutstr cx_strsubsl_m(
160 cxmutstr string,
161 size_t start,
162 size_t length
163 ) {
164 cxstring result = cx_strsubsl(cx_strcast(string), start, length);
165 return (cxmutstr) {(char *) result.ptr, result.length};
166 }
168 cxstring cx_strchr(
169 cxstring string,
170 int chr
171 ) {
172 chr = 0xFF & chr;
173 // TODO: improve by comparing multiple bytes at once
174 cx_for_n(i, string.length) {
175 if (string.ptr[i] == chr) {
176 return cx_strsubs(string, i);
177 }
178 }
179 return (cxstring) {NULL, 0};
180 }
182 cxmutstr cx_strchr_m(
183 cxmutstr string,
184 int chr
185 ) {
186 cxstring result = cx_strchr(cx_strcast(string), chr);
187 return (cxmutstr) {(char *) result.ptr, result.length};
188 }
190 cxstring cx_strrchr(
191 cxstring string,
192 int chr
193 ) {
194 chr = 0xFF & chr;
195 size_t i = string.length;
196 while (i > 0) {
197 i--;
198 // TODO: improve by comparing multiple bytes at once
199 if (string.ptr[i] == chr) {
200 return cx_strsubs(string, i);
201 }
202 }
203 return (cxstring) {NULL, 0};
204 }
206 cxmutstr cx_strrchr_m(
207 cxmutstr string,
208 int chr
209 ) {
210 cxstring result = cx_strrchr(cx_strcast(string), chr);
211 return (cxmutstr) {(char *) result.ptr, result.length};
212 }
214 #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
215 ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
217 #define ptable_w(useheap, ptable, index, src) do {\
218 if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
219 else ((size_t*)ptable)[index] = src;\
220 } while (0)
223 cxstring cx_strstr(
224 cxstring haystack,
225 cxstring needle
226 ) {
227 if (needle.length == 0) {
228 return haystack;
229 }
231 /*
232 * IMPORTANT:
233 * Our prefix table contains the prefix length PLUS ONE
234 * this is our decision, because we want to use the full range of size_t.
235 * The original algorithm needs a (-1) at one single place,
236 * and we want to avoid that.
237 */
239 /* static prefix table */
240 static uint8_t s_prefix_table[512];
242 /* check pattern length and use appropriate prefix table */
243 /* if the pattern exceeds static prefix table, allocate on the heap */
244 register int useheap = needle.length >= 512;
245 register void *ptable = useheap ? calloc(needle.length + 1,
246 sizeof(size_t)) : s_prefix_table;
248 /* keep counter in registers */
249 register size_t i, j;
251 /* fill prefix table */
252 i = 0;
253 j = 0;
254 ptable_w(useheap, ptable, i, j);
255 while (i < needle.length) {
256 while (j >= 1 && needle.ptr[j - 1] != needle.ptr[i]) {
257 ptable_r(j, useheap, ptable, j - 1);
258 }
259 i++;
260 j++;
261 ptable_w(useheap, ptable, i, j);
262 }
264 /* search */
265 cxstring result = {NULL, 0};
266 i = 0;
267 j = 1;
268 while (i < haystack.length) {
269 while (j >= 1 && haystack.ptr[i] != needle.ptr[j - 1]) {
270 ptable_r(j, useheap, ptable, j - 1);
271 }
272 i++;
273 j++;
274 if (j - 1 == needle.length) {
275 size_t start = i - needle.length;
276 result.ptr = haystack.ptr + start;
277 result.length = haystack.length - start;
278 break;
279 }
280 }
282 /* if prefix table was allocated on the heap, free it */
283 if (ptable != s_prefix_table) {
284 free(ptable);
285 }
287 return result;
288 }
290 cxmutstr cx_strstr_m(
291 cxmutstr haystack,
292 cxstring needle
293 ) {
294 cxstring result = cx_strstr(cx_strcast(haystack), needle);
295 return (cxmutstr) {(char *) result.ptr, result.length};
296 }
298 size_t cx_strsplit(
299 cxstring string,
300 cxstring delim,
301 size_t limit,
302 cxstring *output
303 ) {
304 // TODO: implement
305 return 0;
306 }
308 size_t cx_strsplit_a(
309 CxAllocator *allocator,
310 cxstring string,
311 cxstring delim,
312 size_t limit,
313 cxstring **output
314 ) {
315 // TODO: implement
316 return 0;
317 }
319 size_t cx_strsplit_m(
320 cxmutstr string,
321 cxstring delim,
322 size_t limit,
323 cxmutstr *output
324 ) {
325 return cx_strsplit(cx_strcast(string),
326 delim, limit, (cxstring *) output);
327 }
329 size_t cx_strsplit_ma(
330 CxAllocator *allocator,
331 cxmutstr string,
332 cxstring delim,
333 size_t limit,
334 cxmutstr **output
335 ) {
336 return cx_strsplit_a(allocator, cx_strcast(string),
337 delim, limit, (cxstring **) output);
338 }