Sun, 29 Dec 2019 16:04:34 +0100
adds more sstrreplace tests
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "ucx/string.h" #include "ucx/allocator.h" #include <stdlib.h> #include <string.h> #include <stdarg.h> #include <stdint.h> #include <ctype.h> #ifndef _WIN32 #include <strings.h> /* for strncasecmp() */ #endif /* _WIN32 */ sstr_t sstr(char *cstring) { sstr_t string; string.ptr = cstring; string.length = strlen(cstring); return string; } sstr_t sstrn(char *cstring, size_t length) { sstr_t string; string.ptr = cstring; string.length = length; return string; } scstr_t scstr(const char *cstring) { scstr_t string; string.ptr = cstring; string.length = strlen(cstring); return string; } scstr_t scstrn(const char *cstring, size_t length) { scstr_t string; string.ptr = cstring; string.length = length; return string; } size_t scstrnlen(size_t n, ...) { if (n == 0) return 0; va_list ap; va_start(ap, n); size_t size = 0; for (size_t i = 0 ; i < n ; i++) { scstr_t str = va_arg(ap, scstr_t); if(SIZE_MAX - str.length < size) { size = SIZE_MAX; break; } size += str.length; } va_end(ap); return size; } static sstr_t sstrvcat_a( UcxAllocator *a, size_t count, scstr_t s1, va_list ap) { sstr_t str; str.ptr = NULL; str.length = 0; if(count < 2) { return str; } scstr_t s2 = va_arg (ap, scstr_t); if(((size_t)-1) - s1.length < s2.length) { return str; } scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); if(!strings) { return str; } // get all args and overall length strings[0] = s1; strings[1] = s2; size_t slen = s1.length + s2.length; int error = 0; for (size_t i=2;i<count;i++) { scstr_t s = va_arg (ap, scstr_t); strings[i] = s; if(((size_t)-1) - s.length < slen) { error = 1; break; } slen += s.length; } if(error) { free(strings); return str; } // create new string str.ptr = (char*) almalloc(a, slen + 1); str.length = slen; if(!str.ptr) { free(strings); str.length = 0; return str; } // concatenate strings size_t pos = 0; for (size_t i=0;i<count;i++) { scstr_t s = strings[i]; memcpy(str.ptr + pos, s.ptr, s.length); pos += s.length; } str.ptr[str.length] = '\0'; free(strings); return str; } sstr_t scstrcat(size_t count, scstr_t s1, ...) { va_list ap; va_start(ap, s1); sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap); va_end(ap); return s; } sstr_t scstrcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) { va_list ap; va_start(ap, s1); sstr_t s = sstrvcat_a(a, count, s1, ap); va_end(ap); return s; } static int ucx_substring( size_t str_length, size_t start, size_t length, size_t *newlen, size_t *newpos) { *newlen = 0; *newpos = 0; if(start > str_length) { return 0; } if(length > str_length - start) { length = str_length - start; } *newlen = length; *newpos = start; return 1; } sstr_t sstrsubs(sstr_t s, size_t start) { return sstrsubsl (s, start, s.length-start); } sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { size_t pos; sstr_t ret = { NULL, 0 }; if(ucx_substring(s.length, start, length, &ret.length, &pos)) { ret.ptr = s.ptr + pos; } return ret; } scstr_t scstrsubs(scstr_t string, size_t start) { return scstrsubsl(string, start, string.length-start); } scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { size_t pos; scstr_t ret = { NULL, 0 }; if(ucx_substring(s.length, start, length, &ret.length, &pos)) { ret.ptr = s.ptr + pos; } return ret; } static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { for(size_t i=0;i<length;i++) { if(str[i] == chr) { *pos = i; return 1; } } return 0; } static int ucx_strrchr(const char *str, size_t length, int chr, size_t *pos) { if(length > 0) { for(size_t i=length ; i>0 ; i--) { if(str[i-1] == chr) { *pos = i-1; return 1; } } } return 0; } sstr_t sstrchr(sstr_t s, int c) { size_t pos = 0; if(ucx_strchr(s.ptr, s.length, c, &pos)) { return sstrsubs(s, pos); } return sstrn(NULL, 0); } sstr_t sstrrchr(sstr_t s, int c) { size_t pos = 0; if(ucx_strrchr(s.ptr, s.length, c, &pos)) { return sstrsubs(s, pos); } return sstrn(NULL, 0); } scstr_t scstrchr(scstr_t s, int c) { size_t pos = 0; if(ucx_strchr(s.ptr, s.length, c, &pos)) { return scstrsubs(s, pos); } return scstrn(NULL, 0); } scstr_t scstrrchr(scstr_t s, int c) { size_t pos = 0; if(ucx_strrchr(s.ptr, s.length, c, &pos)) { return scstrsubs(s, pos); } return scstrn(NULL, 0); } #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) #define ptable_w(useheap, ptable, index, src) do {\ if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ else ((size_t*)ptable)[index] = src;\ } while (0); static const char* ucx_strstr( const char *str, size_t length, const char *match, size_t matchlen, size_t *newlen) { *newlen = length; if (matchlen == 0) { return str; } const char *result = NULL; size_t resultlen = 0; /* * IMPORTANT: * our prefix table contains the prefix length PLUS ONE * this is our decision, because we want to use the full range of size_t * the original algorithm needs a (-1) at one single place * and we want to avoid that */ /* static prefix table */ static uint8_t s_prefix_table[256]; /* check pattern length and use appropriate prefix table */ /* if the pattern exceeds static prefix table, allocate on the heap */ register int useheap = matchlen > 255; register void* ptable = useheap ? calloc(matchlen+1, sizeof(size_t)): s_prefix_table; /* keep counter in registers */ register size_t i, j; /* fill prefix table */ i = 0; j = 0; ptable_w(useheap, ptable, i, j); while (i < matchlen) { while (j >= 1 && match[j-1] != match[i]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; ptable_w(useheap, ptable, i, j); } /* search */ i = 0; j = 1; while (i < length) { while (j >= 1 && str[i] != match[j-1]) { ptable_r(j, useheap, ptable, j-1); } i++; j++; if (j-1 == matchlen) { size_t start = i - matchlen; result = str + start; resultlen = length - start; break; } } /* if prefix table was allocated on the heap, free it */ if (ptable != s_prefix_table) { free(ptable); } *newlen = resultlen; return result; } sstr_t scstrsstr(sstr_t string, scstr_t match) { sstr_t result; size_t reslen; const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); if(!resstr) { result.ptr = NULL; result.length = 0; return result; } size_t pos = resstr - string.ptr; result.ptr = string.ptr + pos; result.length = reslen; return result; } scstr_t scstrscstr(scstr_t string, scstr_t match) { scstr_t result; size_t reslen; const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); if(!resstr) { result.ptr = NULL; result.length = 0; return result; } size_t pos = resstr - string.ptr; result.ptr = string.ptr + pos; result.length = reslen; return result; } #undef ptable_r #undef ptable_w sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { return scstrsplit_a(ucx_default_allocator(), s, d, n); } sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { if (s.length == 0 || d.length == 0) { *n = -1; return NULL; } /* special cases: delimiter is at least as large as the string */ if (d.length >= s.length) { /* exact match */ if (sstrcmp(s, d) == 0) { *n = 0; return NULL; } else /* no match possible */ { *n = 1; sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); if(result) { *result = sstrdup_a(allocator, s); } else { *n = -2; } return result; } } ssize_t nmax = *n; size_t arrlen = 16; sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); if (result) { scstr_t curpos = s; ssize_t j = 1; while (1) { scstr_t match; /* optimize for one byte delimiters */ if (d.length == 1) { match = curpos; for (size_t i = 0 ; i < curpos.length ; i++) { if (curpos.ptr[i] == *(d.ptr)) { match.ptr = curpos.ptr + i; break; } match.length--; } } else { match = scstrscstr(curpos, d); } if (match.length > 0) { /* is this our last try? */ if (nmax == 0 || j < nmax) { /* copy the current string to the array */ scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); result[j-1] = sstrdup_a(allocator, item); size_t processed = item.length + d.length; curpos.ptr += processed; curpos.length -= processed; /* allocate memory for the next string */ j++; if (j > arrlen) { arrlen *= 2; size_t reallocsz; sstr_t* reallocated = NULL; if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { reallocated = (sstr_t*) alrealloc( allocator, result, reallocsz); } if (reallocated) { result = reallocated; } else { for (ssize_t i = 0 ; i < j-1 ; i++) { alfree(allocator, result[i].ptr); } alfree(allocator, result); *n = -2; return NULL; } } } else { /* nmax reached, copy the _full_ remaining string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } else { /* no more matches, copy last string */ result[j-1] = sstrdup_a(allocator, curpos); break; } } *n = j; } else { *n = -2; } return result; } int scstrcmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { return memcmp(s1.ptr, s2.ptr, s1.length); } else if (s1.length > s2.length) { return 1; } else { return -1; } } int scstrcasecmp(scstr_t s1, scstr_t s2) { if (s1.length == s2.length) { #ifdef _WIN32 return _strnicmp(s1.ptr, s2.ptr, s1.length); #else return strncasecmp(s1.ptr, s2.ptr, s1.length); #endif } else if (s1.length > s2.length) { return 1; } else { return -1; } } sstr_t scstrdup(scstr_t s) { return sstrdup_a(ucx_default_allocator(), s); } sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { sstr_t newstring; newstring.ptr = (char*)almalloc(allocator, s.length + 1); if (newstring.ptr) { newstring.length = s.length; newstring.ptr[newstring.length] = 0; memcpy(newstring.ptr, s.ptr, s.length); } else { newstring.length = 0; } return newstring; } static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { const char *newptr = s; size_t length = len; while(length > 0 && isspace(*newptr)) { newptr++; length--; } while(length > 0 && isspace(newptr[length-1])) { length--; } *newlen = length; return newptr - s; } sstr_t sstrtrim(sstr_t string) { sstr_t newstr; newstr.ptr = string.ptr + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } scstr_t scstrtrim(scstr_t string) { scstr_t newstr; newstr.ptr = string.ptr + ucx_strtrim(string.ptr, string.length, &newstr.length); return newstr; } int scstrprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } if (prefix.length == 0) { return 1; } if (prefix.length > string.length) { return 0; } else { return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; } } int scstrsuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } if (suffix.length == 0) { return 1; } if (suffix.length > string.length) { return 0; } else { return memcmp(string.ptr+string.length-suffix.length, suffix.ptr, suffix.length) == 0; } } int scstrcaseprefix(scstr_t string, scstr_t prefix) { if (string.length == 0) { return prefix.length == 0; } if (prefix.length == 0) { return 1; } if (prefix.length > string.length) { return 0; } else { scstr_t subs = scstrsubsl(string, 0, prefix.length); return scstrcasecmp(subs, prefix) == 0; } } int scstrcasesuffix(scstr_t string, scstr_t suffix) { if (string.length == 0) { return suffix.length == 0; } if (suffix.length == 0) { return 1; } if (suffix.length > string.length) { return 0; } else { scstr_t subs = scstrsubs(string, string.length-suffix.length); return scstrcasecmp(subs, suffix) == 0; } } sstr_t scstrlower(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = tolower(ret.ptr[i]); } return ret; } sstr_t scstrupper(scstr_t string) { sstr_t ret = sstrdup(string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { sstr_t ret = sstrdup_a(allocator, string); for (size_t i = 0; i < ret.length ; i++) { ret.ptr[i] = toupper(ret.ptr[i]); } return ret; } #define REPLACE_INDEX_BUFFER_MAX 100 struct scstrreplace_ibuf { size_t* buf; unsigned int len; /* small indices */ struct scstrreplace_ibuf* next; }; static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { while (buf) { struct scstrreplace_ibuf *next = buf->next; free(buf->buf); free(buf); buf = next; } } sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, scstr_t pattern, scstr_t replacement, size_t replmax) { if (pattern.length == 0 || pattern.length > str.length) return sstrdup(str); /* Compute expected buffer length */ size_t ibufmax = str.length / pattern.length; size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { ibuflen = REPLACE_INDEX_BUFFER_MAX; } /* Allocate first index buffer */ struct scstrreplace_ibuf *firstbuf, *curbuf; firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); if (!firstbuf) return sstrn(NULL, 0); firstbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!firstbuf->buf) { free(firstbuf); return sstrn(NULL, 0); } /* Search occurrences */ scstr_t searchstr = str; size_t found = 0; do { scstr_t match = scstrscstr(searchstr, pattern); if (match.length > 0) { /* Allocate next buffer in chain, if required */ if (curbuf->len == ibuflen) { struct scstrreplace_ibuf *nextbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); if (!nextbuf) { scstrrepl_free_ibuf(firstbuf); return sstrn(NULL, 0); } nextbuf->buf = calloc(ibuflen, sizeof(size_t)); if (!nextbuf->buf) { free(nextbuf); scstrrepl_free_ibuf(firstbuf); return sstrn(NULL, 0); } curbuf->next = nextbuf; curbuf = nextbuf; } /* Record match index */ found++; size_t idx = match.ptr - str.ptr; curbuf->buf[curbuf->len++] = idx; searchstr.ptr = match.ptr + pattern.length; searchstr.length = str.length - idx - pattern.length; } else { break; } } while (searchstr.length > 0 && found < replmax); /* Allocate result string */ sstr_t result; { ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; size_t rcount = 0; curbuf = firstbuf; do { rcount += curbuf->len; curbuf = curbuf->next; } while (curbuf); result.length = str.length + rcount * adjlen; result.ptr = almalloc(allocator, result.length); if (!result.ptr) { scstrrepl_free_ibuf(firstbuf); return sstrn(NULL, 0); } } /* Build result string */ curbuf = firstbuf; size_t srcidx = 0; char* destptr = result.ptr; do { for (size_t i = 0; i < curbuf->len; i++) { /* Copy source part up to next match*/ size_t idx = curbuf->buf[i]; size_t srclen = idx - srcidx; if (srclen > 0) { memcpy(destptr, str.ptr+srcidx, srclen); destptr += srclen; srcidx += srclen; } /* Copy the replacement and skip the source pattern */ srcidx += pattern.length; memcpy(destptr, replacement.ptr, replacement.length); destptr += replacement.length; } curbuf = curbuf->next; } while (curbuf); memcpy(destptr, str.ptr+srcidx, str.length-srcidx); /* Free index buffer */ scstrrepl_free_ibuf(firstbuf); return result; } sstr_t scstrreplacen(scstr_t str, scstr_t pattern, scstr_t replacement, size_t replmax) { return scstrreplacen_a(ucx_default_allocator(), str, pattern, replacement, replmax); } // type adjustment functions scstr_t ucx_sc2sc(scstr_t str) { return str; } scstr_t ucx_ss2sc(sstr_t str) { scstr_t cs; cs.ptr = str.ptr; cs.length = str.length; return cs; } scstr_t ucx_ss2c_s(scstr_t c) { return c; }