diff -r 92e482410453 -r d345541018fa src/string.c --- a/src/string.c Mon Dec 30 09:54:10 2019 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,807 +0,0 @@ -/* - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. - * - * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "ucx/string.h" - -#include "ucx/allocator.h" - -#include -#include -#include -#include -#include - -#ifndef _WIN32 -#include /* for strncasecmp() */ -#endif /* _WIN32 */ - -sstr_t sstr(char *cstring) { - sstr_t string; - string.ptr = cstring; - string.length = strlen(cstring); - return string; -} - -sstr_t sstrn(char *cstring, size_t length) { - sstr_t string; - string.ptr = cstring; - string.length = length; - return string; -} - -scstr_t scstr(const char *cstring) { - scstr_t string; - string.ptr = cstring; - string.length = strlen(cstring); - return string; -} - -scstr_t scstrn(const char *cstring, size_t length) { - scstr_t string; - string.ptr = cstring; - string.length = length; - return string; -} - - -size_t scstrnlen(size_t n, ...) { - if (n == 0) return 0; - - va_list ap; - va_start(ap, n); - - size_t size = 0; - - for (size_t i = 0 ; i < n ; i++) { - scstr_t str = va_arg(ap, scstr_t); - if(SIZE_MAX - str.length < size) { - size = SIZE_MAX; - break; - } - size += str.length; - } - va_end(ap); - - return size; -} - -static sstr_t sstrvcat_a( - UcxAllocator *a, - size_t count, - scstr_t s1, - va_list ap) { - sstr_t str; - str.ptr = NULL; - str.length = 0; - if(count < 2) { - return str; - } - - scstr_t s2 = va_arg (ap, scstr_t); - - if(((size_t)-1) - s1.length < s2.length) { - return str; - } - - scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); - if(!strings) { - return str; - } - - // get all args and overall length - strings[0] = s1; - strings[1] = s2; - size_t slen = s1.length + s2.length; - int error = 0; - for (size_t i=2;i str_length) { - return 0; - } - - if(length > str_length - start) { - length = str_length - start; - } - *newlen = length; - *newpos = start; - return 1; -} - -sstr_t sstrsubs(sstr_t s, size_t start) { - return sstrsubsl (s, start, s.length-start); -} - -sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { - size_t pos; - sstr_t ret = { NULL, 0 }; - if(ucx_substring(s.length, start, length, &ret.length, &pos)) { - ret.ptr = s.ptr + pos; - } - return ret; -} - -scstr_t scstrsubs(scstr_t string, size_t start) { - return scstrsubsl(string, start, string.length-start); -} - -scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { - size_t pos; - scstr_t ret = { NULL, 0 }; - if(ucx_substring(s.length, start, length, &ret.length, &pos)) { - ret.ptr = s.ptr + pos; - } - return ret; -} - - -static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { - for(size_t i=0;i 0) { - for(size_t i=length ; i>0 ; i--) { - if(str[i-1] == chr) { - *pos = i-1; - return 1; - } - } - } - return 0; -} - -sstr_t sstrchr(sstr_t s, int c) { - size_t pos = 0; - if(ucx_strchr(s.ptr, s.length, c, &pos)) { - return sstrsubs(s, pos); - } - return sstrn(NULL, 0); -} - -sstr_t sstrrchr(sstr_t s, int c) { - size_t pos = 0; - if(ucx_strrchr(s.ptr, s.length, c, &pos)) { - return sstrsubs(s, pos); - } - return sstrn(NULL, 0); -} - -scstr_t scstrchr(scstr_t s, int c) { - size_t pos = 0; - if(ucx_strchr(s.ptr, s.length, c, &pos)) { - return scstrsubs(s, pos); - } - return scstrn(NULL, 0); -} - -scstr_t scstrrchr(scstr_t s, int c) { - size_t pos = 0; - if(ucx_strrchr(s.ptr, s.length, c, &pos)) { - return scstrsubs(s, pos); - } - return scstrn(NULL, 0); -} - -#define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ - ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) - -#define ptable_w(useheap, ptable, index, src) do {\ - if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ - else ((size_t*)ptable)[index] = src;\ - } while (0); - - -static const char* ucx_strstr( - const char *str, - size_t length, - const char *match, - size_t matchlen, - size_t *newlen) -{ - *newlen = length; - if (matchlen == 0) { - return str; - } - - const char *result = NULL; - size_t resultlen = 0; - - /* - * IMPORTANT: - * our prefix table contains the prefix length PLUS ONE - * this is our decision, because we want to use the full range of size_t - * the original algorithm needs a (-1) at one single place - * and we want to avoid that - */ - - /* static prefix table */ - static uint8_t s_prefix_table[256]; - - /* check pattern length and use appropriate prefix table */ - /* if the pattern exceeds static prefix table, allocate on the heap */ - register int useheap = matchlen > 255; - register void* ptable = useheap ? - calloc(matchlen+1, sizeof(size_t)): s_prefix_table; - - /* keep counter in registers */ - register size_t i, j; - - /* fill prefix table */ - i = 0; j = 0; - ptable_w(useheap, ptable, i, j); - while (i < matchlen) { - while (j >= 1 && match[j-1] != match[i]) { - ptable_r(j, useheap, ptable, j-1); - } - i++; j++; - ptable_w(useheap, ptable, i, j); - } - - /* search */ - i = 0; j = 1; - while (i < length) { - while (j >= 1 && str[i] != match[j-1]) { - ptable_r(j, useheap, ptable, j-1); - } - i++; j++; - if (j-1 == matchlen) { - size_t start = i - matchlen; - result = str + start; - resultlen = length - start; - break; - } - } - - /* if prefix table was allocated on the heap, free it */ - if (ptable != s_prefix_table) { - free(ptable); - } - - *newlen = resultlen; - return result; -} - -sstr_t scstrsstr(sstr_t string, scstr_t match) { - sstr_t result; - - size_t reslen; - const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); - if(!resstr) { - result.ptr = NULL; - result.length = 0; - return result; - } - - size_t pos = resstr - string.ptr; - result.ptr = string.ptr + pos; - result.length = reslen; - - return result; -} - -scstr_t scstrscstr(scstr_t string, scstr_t match) { - scstr_t result; - - size_t reslen; - const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); - if(!resstr) { - result.ptr = NULL; - result.length = 0; - return result; - } - - size_t pos = resstr - string.ptr; - result.ptr = string.ptr + pos; - result.length = reslen; - - return result; -} - -#undef ptable_r -#undef ptable_w - -sstr_t* scstrsplit(scstr_t s, scstr_t d, ssize_t *n) { - return scstrsplit_a(ucx_default_allocator(), s, d, n); -} - -sstr_t* scstrsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { - if (s.length == 0 || d.length == 0) { - *n = -1; - return NULL; - } - - /* special cases: delimiter is at least as large as the string */ - if (d.length >= s.length) { - /* exact match */ - if (sstrcmp(s, d) == 0) { - *n = 0; - return NULL; - } else /* no match possible */ { - *n = 1; - sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); - if(result) { - *result = sstrdup_a(allocator, s); - } else { - *n = -2; - } - return result; - } - } - - ssize_t nmax = *n; - size_t arrlen = 16; - sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); - - if (result) { - scstr_t curpos = s; - ssize_t j = 1; - while (1) { - scstr_t match; - /* optimize for one byte delimiters */ - if (d.length == 1) { - match = curpos; - for (size_t i = 0 ; i < curpos.length ; i++) { - if (curpos.ptr[i] == *(d.ptr)) { - match.ptr = curpos.ptr + i; - break; - } - match.length--; - } - } else { - match = scstrscstr(curpos, d); - } - if (match.length > 0) { - /* is this our last try? */ - if (nmax == 0 || j < nmax) { - /* copy the current string to the array */ - scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); - result[j-1] = sstrdup_a(allocator, item); - size_t processed = item.length + d.length; - curpos.ptr += processed; - curpos.length -= processed; - - /* allocate memory for the next string */ - j++; - if (j > arrlen) { - arrlen *= 2; - size_t reallocsz; - sstr_t* reallocated = NULL; - if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { - reallocated = (sstr_t*) alrealloc( - allocator, result, reallocsz); - } - if (reallocated) { - result = reallocated; - } else { - for (ssize_t i = 0 ; i < j-1 ; i++) { - alfree(allocator, result[i].ptr); - } - alfree(allocator, result); - *n = -2; - return NULL; - } - } - } else { - /* nmax reached, copy the _full_ remaining string */ - result[j-1] = sstrdup_a(allocator, curpos); - break; - } - } else { - /* no more matches, copy last string */ - result[j-1] = sstrdup_a(allocator, curpos); - break; - } - } - *n = j; - } else { - *n = -2; - } - - return result; -} - -int scstrcmp(scstr_t s1, scstr_t s2) { - if (s1.length == s2.length) { - return memcmp(s1.ptr, s2.ptr, s1.length); - } else if (s1.length > s2.length) { - return 1; - } else { - return -1; - } -} - -int scstrcasecmp(scstr_t s1, scstr_t s2) { - if (s1.length == s2.length) { -#ifdef _WIN32 - return _strnicmp(s1.ptr, s2.ptr, s1.length); -#else - return strncasecmp(s1.ptr, s2.ptr, s1.length); -#endif - } else if (s1.length > s2.length) { - return 1; - } else { - return -1; - } -} - -sstr_t scstrdup(scstr_t s) { - return sstrdup_a(ucx_default_allocator(), s); -} - -sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) { - sstr_t newstring; - newstring.ptr = (char*)almalloc(allocator, s.length + 1); - if (newstring.ptr) { - newstring.length = s.length; - newstring.ptr[newstring.length] = 0; - - memcpy(newstring.ptr, s.ptr, s.length); - } else { - newstring.length = 0; - } - - return newstring; -} - - -static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { - const char *newptr = s; - size_t length = len; - - while(length > 0 && isspace(*newptr)) { - newptr++; - length--; - } - while(length > 0 && isspace(newptr[length-1])) { - length--; - } - - *newlen = length; - return newptr - s; -} - -sstr_t sstrtrim(sstr_t string) { - sstr_t newstr; - newstr.ptr = string.ptr - + ucx_strtrim(string.ptr, string.length, &newstr.length); - return newstr; -} - -scstr_t scstrtrim(scstr_t string) { - scstr_t newstr; - newstr.ptr = string.ptr - + ucx_strtrim(string.ptr, string.length, &newstr.length); - return newstr; -} - -int scstrprefix(scstr_t string, scstr_t prefix) { - if (string.length == 0) { - return prefix.length == 0; - } - if (prefix.length == 0) { - return 1; - } - - if (prefix.length > string.length) { - return 0; - } else { - return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; - } -} - -int scstrsuffix(scstr_t string, scstr_t suffix) { - if (string.length == 0) { - return suffix.length == 0; - } - if (suffix.length == 0) { - return 1; - } - - if (suffix.length > string.length) { - return 0; - } else { - return memcmp(string.ptr+string.length-suffix.length, - suffix.ptr, suffix.length) == 0; - } -} - -int scstrcaseprefix(scstr_t string, scstr_t prefix) { - if (string.length == 0) { - return prefix.length == 0; - } - if (prefix.length == 0) { - return 1; - } - - if (prefix.length > string.length) { - return 0; - } else { - scstr_t subs = scstrsubsl(string, 0, prefix.length); - return scstrcasecmp(subs, prefix) == 0; - } -} - -int scstrcasesuffix(scstr_t string, scstr_t suffix) { - if (string.length == 0) { - return suffix.length == 0; - } - if (suffix.length == 0) { - return 1; - } - - if (suffix.length > string.length) { - return 0; - } else { - scstr_t subs = scstrsubs(string, string.length-suffix.length); - return scstrcasecmp(subs, suffix) == 0; - } -} - -sstr_t scstrlower(scstr_t string) { - sstr_t ret = sstrdup(string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = tolower(ret.ptr[i]); - } - return ret; -} - -sstr_t scstrlower_a(UcxAllocator *allocator, scstr_t string) { - sstr_t ret = sstrdup_a(allocator, string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = tolower(ret.ptr[i]); - } - return ret; -} - -sstr_t scstrupper(scstr_t string) { - sstr_t ret = sstrdup(string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = toupper(ret.ptr[i]); - } - return ret; -} - -sstr_t scstrupper_a(UcxAllocator *allocator, scstr_t string) { - sstr_t ret = sstrdup_a(allocator, string); - for (size_t i = 0; i < ret.length ; i++) { - ret.ptr[i] = toupper(ret.ptr[i]); - } - return ret; -} - -#define REPLACE_INDEX_BUFFER_MAX 100 - -struct scstrreplace_ibuf { - size_t* buf; - unsigned int len; /* small indices */ - struct scstrreplace_ibuf* next; -}; - -static void scstrrepl_free_ibuf(struct scstrreplace_ibuf *buf) { - while (buf) { - struct scstrreplace_ibuf *next = buf->next; - free(buf->buf); - free(buf); - buf = next; - } -} - -sstr_t scstrreplacen_a(UcxAllocator *allocator, scstr_t str, - scstr_t pattern, scstr_t replacement, size_t replmax) { - - if (pattern.length == 0 || pattern.length > str.length || replmax == 0) - return sstrdup(str); - - /* Compute expected buffer length */ - size_t ibufmax = str.length / pattern.length; - size_t ibuflen = replmax < ibufmax ? replmax : ibufmax; - if (ibuflen > REPLACE_INDEX_BUFFER_MAX) { - ibuflen = REPLACE_INDEX_BUFFER_MAX; - } - - /* Allocate first index buffer */ - struct scstrreplace_ibuf *firstbuf, *curbuf; - firstbuf = curbuf = calloc(1, sizeof(struct scstrreplace_ibuf)); - if (!firstbuf) return sstrn(NULL, 0); - firstbuf->buf = calloc(ibuflen, sizeof(size_t)); - if (!firstbuf->buf) { - free(firstbuf); - return sstrn(NULL, 0); - } - - /* Search occurrences */ - scstr_t searchstr = str; - size_t found = 0; - do { - scstr_t match = scstrscstr(searchstr, pattern); - if (match.length > 0) { - /* Allocate next buffer in chain, if required */ - if (curbuf->len == ibuflen) { - struct scstrreplace_ibuf *nextbuf = - calloc(1, sizeof(struct scstrreplace_ibuf)); - if (!nextbuf) { - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); - } - nextbuf->buf = calloc(ibuflen, sizeof(size_t)); - if (!nextbuf->buf) { - free(nextbuf); - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); - } - curbuf->next = nextbuf; - curbuf = nextbuf; - } - - /* Record match index */ - found++; - size_t idx = match.ptr - str.ptr; - curbuf->buf[curbuf->len++] = idx; - searchstr.ptr = match.ptr + pattern.length; - searchstr.length = str.length - idx - pattern.length; - } else { - break; - } - } while (searchstr.length > 0 && found < replmax); - - /* Allocate result string */ - sstr_t result; - { - ssize_t adjlen = (ssize_t) replacement.length - (ssize_t) pattern.length; - size_t rcount = 0; - curbuf = firstbuf; - do { - rcount += curbuf->len; - curbuf = curbuf->next; - } while (curbuf); - result.length = str.length + rcount * adjlen; - result.ptr = almalloc(allocator, result.length); - if (!result.ptr) { - scstrrepl_free_ibuf(firstbuf); - return sstrn(NULL, 0); - } - } - - /* Build result string */ - curbuf = firstbuf; - size_t srcidx = 0; - char* destptr = result.ptr; - do { - for (size_t i = 0; i < curbuf->len; i++) { - /* Copy source part up to next match*/ - size_t idx = curbuf->buf[i]; - size_t srclen = idx - srcidx; - if (srclen > 0) { - memcpy(destptr, str.ptr+srcidx, srclen); - destptr += srclen; - srcidx += srclen; - } - - /* Copy the replacement and skip the source pattern */ - srcidx += pattern.length; - memcpy(destptr, replacement.ptr, replacement.length); - destptr += replacement.length; - } - curbuf = curbuf->next; - } while (curbuf); - memcpy(destptr, str.ptr+srcidx, str.length-srcidx); - - /* Free index buffer */ - scstrrepl_free_ibuf(firstbuf); - - return result; -} - -sstr_t scstrreplacen(scstr_t str, scstr_t pattern, - scstr_t replacement, size_t replmax) { - return scstrreplacen_a(ucx_default_allocator(), - str, pattern, replacement, replmax); -} - - -// type adjustment functions -scstr_t ucx_sc2sc(scstr_t str) { - return str; -} -scstr_t ucx_ss2sc(sstr_t str) { - scstr_t cs; - cs.ptr = str.ptr; - cs.length = str.length; - return cs; -} -scstr_t ucx_ss2c_s(scstr_t c) { - return c; -}