olaf@20: /* universe@103: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. olaf@20: * universe@259: * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved. universe@103: * universe@103: * Redistribution and use in source and binary forms, with or without universe@103: * modification, are permitted provided that the following conditions are met: universe@103: * universe@103: * 1. Redistributions of source code must retain the above copyright universe@103: * notice, this list of conditions and the following disclaimer. universe@103: * universe@103: * 2. Redistributions in binary form must reproduce the above copyright universe@103: * notice, this list of conditions and the following disclaimer in the universe@103: * documentation and/or other materials provided with the distribution. universe@103: * universe@103: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" universe@103: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE universe@103: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE universe@103: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE universe@103: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR universe@103: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF universe@103: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS universe@103: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN universe@103: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) universe@103: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE universe@103: * POSSIBILITY OF SUCH DAMAGE. olaf@20: */ olaf@20: universe@251: #include "ucx/string.h" universe@251: universe@251: #include "ucx/allocator.h" universe@251: olaf@20: #include universe@69: #include olaf@20: #include universe@236: #include universe@189: #include olaf@20: universe@116: sstr_t sstr(char *cstring) { olaf@20: sstr_t string; universe@116: string.ptr = cstring; universe@116: string.length = strlen(cstring); olaf@20: return string; olaf@20: } olaf@20: universe@116: sstr_t sstrn(char *cstring, size_t length) { olaf@20: sstr_t string; universe@116: string.ptr = cstring; universe@116: string.length = length; olaf@20: return string; olaf@20: } olaf@20: olaf@275: scstr_t scstr(const char *cstring) { olaf@275: scstr_t string; olaf@275: string.ptr = cstring; olaf@275: string.length = strlen(cstring); olaf@275: return string; olaf@275: } olaf@275: olaf@275: scstr_t scstrn(const char *cstring, size_t length) { olaf@275: scstr_t string; olaf@275: string.ptr = cstring; olaf@275: string.length = length; olaf@275: return string; olaf@275: } olaf@275: olaf@275: olaf@288: size_t ucx_strnlen(size_t n, ...) { olaf@20: va_list ap; olaf@288: va_start(ap, n); olaf@288: olaf@288: size_t size = 0; olaf@20: olaf@288: for (size_t i = 0 ; i < n ; i++) { olaf@288: scstr_t str = va_arg(ap, scstr_t); universe@317: if(SIZE_MAX - str.length < size) { universe@317: size = SIZE_MAX; olaf@272: break; olaf@272: } olaf@20: size += str.length; olaf@20: } universe@24: va_end(ap); olaf@20: olaf@20: return size; olaf@20: } olaf@20: olaf@180: static sstr_t sstrvcat_a( olaf@180: UcxAllocator *a, olaf@180: size_t count, olaf@288: scstr_t s1, olaf@180: va_list ap) { olaf@180: sstr_t str; olaf@180: str.ptr = NULL; olaf@180: str.length = 0; olaf@180: if(count < 2) { olaf@180: return str; olaf@180: } olaf@180: olaf@288: scstr_t s2 = va_arg (ap, scstr_t); olaf@288: olaf@272: if(((size_t)-1) - s1.length < s2.length) { olaf@272: return str; olaf@272: } olaf@272: olaf@288: scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t)); olaf@180: if(!strings) { olaf@180: return str; olaf@180: } olaf@180: olaf@180: // get all args and overall length olaf@180: strings[0] = s1; olaf@180: strings[1] = s2; olaf@272: size_t slen = s1.length + s2.length; olaf@272: int error = 0; olaf@180: for (size_t i=2;i str_length) { olaf@300: return 0; olaf@300: } olaf@300: olaf@300: if(length > str_length - start) { olaf@300: length = str_length - start; olaf@300: } olaf@300: *newlen = length; olaf@300: *newpos = start; olaf@300: return 1; olaf@300: } olaf@300: olaf@68: sstr_t sstrsubs(sstr_t s, size_t start) { olaf@20: return sstrsubsl (s, start, s.length-start); olaf@20: } olaf@20: olaf@68: sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) { olaf@300: size_t pos; olaf@300: sstr_t ret = { NULL, 0 }; olaf@300: if(ucx_substring(s.length, start, length, &ret.length, &pos)) { olaf@300: ret.ptr = s.ptr + pos; olaf@300: } olaf@300: return ret; olaf@300: } olaf@300: universe@318: scstr_t scstrsubs(scstr_t string, size_t start) { universe@318: return scstrsubsl(string, start, string.length-start); olaf@300: } olaf@300: olaf@300: scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) { olaf@300: size_t pos; olaf@300: scstr_t ret = { NULL, 0 }; olaf@300: if(ucx_substring(s.length, start, length, &ret.length, &pos)) { olaf@300: ret.ptr = s.ptr + pos; olaf@300: } olaf@300: return ret; olaf@300: } olaf@300: olaf@300: universe@318: static int ucx_strchr(const char *str, size_t length, int chr, size_t *pos) { olaf@300: for(size_t i=0;i 0) { universe@306: for(size_t i=length ; i>0 ; i--) { universe@318: if(str[i-1] == chr) { universe@306: *pos = i-1; olaf@300: return 1; olaf@300: } olaf@300: } olaf@300: } olaf@300: return 0; olaf@20: } olaf@20: olaf@108: sstr_t sstrchr(sstr_t s, int c) { olaf@300: size_t pos = 0; olaf@300: if(ucx_strchr(s.ptr, s.length, c, &pos)) { olaf@300: return sstrsubs(s, pos); olaf@108: } olaf@300: return sstrn(NULL, 0); olaf@108: } olaf@108: universe@148: sstr_t sstrrchr(sstr_t s, int c) { olaf@300: size_t pos = 0; olaf@300: if(ucx_strrchr(s.ptr, s.length, c, &pos)) { olaf@300: return sstrsubs(s, pos); universe@148: } olaf@300: return sstrn(NULL, 0); olaf@300: } olaf@300: olaf@300: scstr_t scstrchr(scstr_t s, int c) { olaf@300: size_t pos = 0; olaf@300: if(ucx_strchr(s.ptr, s.length, c, &pos)) { olaf@300: return scstrsubs(s, pos); olaf@300: } olaf@300: return scstrn(NULL, 0); olaf@300: } olaf@300: olaf@300: scstr_t scstrrchr(scstr_t s, int c) { olaf@300: size_t pos = 0; olaf@300: if(ucx_strrchr(s.ptr, s.length, c, &pos)) { olaf@300: return scstrsubs(s, pos); olaf@300: } olaf@300: return scstrn(NULL, 0); universe@148: } universe@148: universe@237: #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \ universe@237: ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index]) universe@236: universe@237: #define ptable_w(useheap, ptable, index, src) do {\ universe@237: if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\ universe@237: else ((size_t*)ptable)[index] = src;\ universe@237: } while (0); universe@236: olaf@276: universe@318: static const char* ucx_strstr( olaf@276: const char *str, olaf@276: size_t length, olaf@276: const char *match, olaf@276: size_t matchlen, olaf@276: size_t *newlen) olaf@276: { olaf@276: *newlen = length; olaf@276: if (matchlen == 0) { olaf@276: return str; universe@214: } universe@214: olaf@276: const char *result = NULL; olaf@276: size_t resultlen = 0; universe@236: universe@236: /* universe@236: * IMPORTANT: universe@236: * our prefix table contains the prefix length PLUS ONE universe@236: * this is our decision, because we want to use the full range of size_t universe@236: * the original algorithm needs a (-1) at one single place universe@236: * and we want to avoid that universe@236: */ universe@236: universe@236: /* static prefix table */ universe@236: static uint8_t s_prefix_table[256]; universe@236: universe@236: /* check pattern length and use appropriate prefix table */ universe@237: /* if the pattern exceeds static prefix table, allocate on the heap */ olaf@276: register int useheap = matchlen > 255; universe@237: register void* ptable = useheap ? olaf@276: calloc(matchlen+1, sizeof(size_t)): s_prefix_table; universe@236: universe@236: /* keep counter in registers */ universe@236: register size_t i, j; universe@236: universe@236: /* fill prefix table */ universe@236: i = 0; j = 0; universe@237: ptable_w(useheap, ptable, i, j); olaf@276: while (i < matchlen) { olaf@276: while (j >= 1 && match[j-1] != match[i]) { universe@238: ptable_r(j, useheap, ptable, j-1); universe@236: } universe@236: i++; j++; universe@237: ptable_w(useheap, ptable, i, j); universe@236: } universe@236: universe@236: /* search */ universe@236: i = 0; j = 1; olaf@276: while (i < length) { olaf@276: while (j >= 1 && str[i] != match[j-1]) { universe@237: ptable_r(j, useheap, ptable, j-1); universe@236: } universe@236: i++; j++; olaf@276: if (j-1 == matchlen) { olaf@276: size_t start = i - matchlen; olaf@276: result = str + start; olaf@276: resultlen = length - start; universe@236: break; universe@214: } universe@214: } universe@236: universe@236: /* if prefix table was allocated on the heap, free it */ universe@236: if (ptable != s_prefix_table) { universe@236: free(ptable); universe@236: } universe@214: olaf@276: *newlen = resultlen; olaf@276: return result; olaf@276: } olaf@276: olaf@276: sstr_t ucx_sstrstr(sstr_t string, scstr_t match) { olaf@276: sstr_t result; olaf@276: olaf@276: size_t reslen; olaf@276: const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); olaf@276: if(!resstr) { olaf@276: result.ptr = NULL; olaf@276: result.length = 0; olaf@276: return result; olaf@276: } olaf@276: olaf@276: size_t pos = resstr - string.ptr; olaf@276: result.ptr = string.ptr + pos; olaf@276: result.length = reslen; olaf@276: olaf@276: return result; olaf@276: } olaf@276: olaf@276: scstr_t ucx_scstrstr(scstr_t string, scstr_t match) { olaf@276: scstr_t result; olaf@276: olaf@276: size_t reslen; olaf@276: const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen); olaf@276: if(!resstr) { olaf@276: result.ptr = NULL; olaf@276: result.length = 0; olaf@276: return result; olaf@276: } olaf@276: olaf@276: size_t pos = resstr - string.ptr; olaf@276: result.ptr = string.ptr + pos; olaf@276: result.length = reslen; olaf@276: universe@236: return result; universe@214: } universe@214: universe@237: #undef ptable_r universe@237: #undef ptable_w universe@237: olaf@276: sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) { olaf@276: return ucx_strsplit_a(ucx_default_allocator(), s, d, n); universe@119: } universe@119: olaf@276: sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) { universe@119: if (s.length == 0 || d.length == 0) { universe@119: *n = -1; universe@39: return NULL; universe@39: } universe@231: universe@231: /* special cases: delimiter is at least as large as the string */ universe@231: if (d.length >= s.length) { universe@231: /* exact match */ universe@231: if (sstrcmp(s, d) == 0) { universe@231: *n = 0; universe@231: return NULL; universe@231: } else /* no match possible */ { universe@231: *n = 1; universe@231: sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t)); olaf@270: if(result) { olaf@270: *result = sstrdup_a(allocator, s); olaf@270: } else { olaf@270: *n = -2; olaf@270: } universe@231: return result; universe@231: } universe@231: } universe@231: universe@173: ssize_t nmax = *n; universe@235: size_t arrlen = 16; olaf@270: sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t)); universe@39: universe@119: if (result) { olaf@276: scstr_t curpos = s; universe@233: ssize_t j = 1; universe@233: while (1) { olaf@276: scstr_t match; universe@234: /* optimize for one byte delimiters */ universe@234: if (d.length == 1) { universe@234: match = curpos; universe@234: for (size_t i = 0 ; i < curpos.length ; i++) { universe@234: if (curpos.ptr[i] == *(d.ptr)) { universe@234: match.ptr = curpos.ptr + i; universe@234: break; universe@234: } universe@234: match.length--; universe@234: } universe@234: } else { olaf@276: match = scstrstr(curpos, d); universe@234: } universe@233: if (match.length > 0) { universe@233: /* is this our last try? */ universe@233: if (nmax == 0 || j < nmax) { universe@233: /* copy the current string to the array */ olaf@276: scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr); universe@233: result[j-1] = sstrdup_a(allocator, item); universe@233: size_t processed = item.length + d.length; universe@233: curpos.ptr += processed; universe@233: curpos.length -= processed; universe@39: universe@233: /* allocate memory for the next string */ universe@233: j++; universe@235: if (j > arrlen) { universe@235: arrlen *= 2; olaf@270: size_t reallocsz; olaf@270: sstr_t* reallocated = NULL; olaf@270: if(!ucx_szmul(arrlen, sizeof(sstr_t), &reallocsz)) { olaf@270: reallocated = (sstr_t*) alrealloc( olaf@270: allocator, result, reallocsz); olaf@270: } universe@235: if (reallocated) { universe@235: result = reallocated; universe@235: } else { universe@235: for (ssize_t i = 0 ; i < j-1 ; i++) { universe@235: alfree(allocator, result[i].ptr); universe@235: } universe@235: alfree(allocator, result); universe@235: *n = -2; universe@235: return NULL; universe@233: } universe@233: } universe@233: } else { universe@233: /* nmax reached, copy the _full_ remaining string */ universe@233: result[j-1] = sstrdup_a(allocator, curpos); universe@233: break; universe@233: } universe@173: } else { universe@233: /* no more matches, copy last string */ universe@233: result[j-1] = sstrdup_a(allocator, curpos); universe@173: break; universe@173: } universe@119: } universe@233: *n = j; universe@119: } else { universe@119: *n = -2; universe@39: } universe@39: universe@39: return result; universe@39: } universe@39: universe@315: int ucx_strcmp(scstr_t s1, scstr_t s2) { universe@116: if (s1.length == s2.length) { universe@116: return memcmp(s1.ptr, s2.ptr, s1.length); universe@116: } else if (s1.length > s2.length) { universe@116: return 1; universe@116: } else { universe@116: return -1; universe@116: } olaf@20: } olaf@20: universe@315: int ucx_strcasecmp(scstr_t s1, scstr_t s2) { universe@149: if (s1.length == s2.length) { universe@149: #ifdef _WIN32 universe@149: return _strnicmp(s1.ptr, s2.ptr, s1.length); universe@149: #else universe@149: return strncasecmp(s1.ptr, s2.ptr, s1.length); universe@149: #endif universe@149: } else if (s1.length > s2.length) { universe@149: return 1; universe@149: } else { universe@149: return -1; universe@149: } universe@149: } universe@149: universe@318: sstr_t ucx_strdup(scstr_t s) { universe@125: return sstrdup_a(ucx_default_allocator(), s); olaf@109: } olaf@20: universe@318: sstr_t ucx_strdup_a(UcxAllocator *allocator, scstr_t s) { olaf@109: sstr_t newstring; universe@173: newstring.ptr = (char*)almalloc(allocator, s.length + 1); olaf@109: if (newstring.ptr) { olaf@109: newstring.length = s.length; olaf@109: newstring.ptr[newstring.length] = 0; olaf@109: olaf@109: memcpy(newstring.ptr, s.ptr, s.length); olaf@109: } else { olaf@109: newstring.length = 0; olaf@109: } olaf@109: olaf@20: return newstring; olaf@20: } olaf@96: olaf@276: universe@318: static size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) { olaf@276: const char *newptr = s; olaf@276: size_t length = len; universe@189: olaf@276: while(length > 0 && isspace(*newptr)) { olaf@276: newptr++; olaf@276: length--; universe@98: } olaf@276: while(length > 0 && isspace(newptr[length-1])) { olaf@276: length--; olaf@96: } olaf@96: olaf@276: *newlen = length; olaf@276: return newptr - s; olaf@276: } olaf@276: olaf@276: sstr_t sstrtrim(sstr_t string) { olaf@276: sstr_t newstr; olaf@276: newstr.ptr = string.ptr olaf@276: + ucx_strtrim(string.ptr, string.length, &newstr.length); olaf@276: return newstr; olaf@276: } olaf@276: olaf@276: scstr_t scstrtrim(scstr_t string) { olaf@276: scstr_t newstr; olaf@276: newstr.ptr = string.ptr olaf@276: + ucx_strtrim(string.ptr, string.length, &newstr.length); olaf@96: return newstr; olaf@96: } universe@146: olaf@275: int ucx_strprefix(scstr_t string, scstr_t prefix) { universe@146: if (string.length == 0) { universe@146: return prefix.length == 0; universe@146: } universe@146: if (prefix.length == 0) { universe@146: return 1; universe@146: } universe@146: universe@146: if (prefix.length > string.length) { universe@146: return 0; universe@146: } else { universe@146: return memcmp(string.ptr, prefix.ptr, prefix.length) == 0; universe@146: } universe@146: } universe@146: olaf@275: int ucx_strsuffix(scstr_t string, scstr_t suffix) { universe@146: if (string.length == 0) { universe@146: return suffix.length == 0; universe@146: } universe@146: if (suffix.length == 0) { universe@146: return 1; universe@146: } universe@146: universe@146: if (suffix.length > string.length) { universe@146: return 0; universe@146: } else { universe@146: return memcmp(string.ptr+string.length-suffix.length, universe@146: suffix.ptr, suffix.length) == 0; universe@146: } universe@146: } universe@210: olaf@275: sstr_t ucx_strlower(scstr_t string) { universe@210: sstr_t ret = sstrdup(string); universe@210: for (size_t i = 0; i < ret.length ; i++) { universe@210: ret.ptr[i] = tolower(ret.ptr[i]); universe@210: } universe@210: return ret; universe@210: } universe@210: olaf@275: sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) { universe@210: sstr_t ret = sstrdup_a(allocator, string); universe@210: for (size_t i = 0; i < ret.length ; i++) { universe@210: ret.ptr[i] = tolower(ret.ptr[i]); universe@210: } universe@210: return ret; universe@210: } universe@210: olaf@275: sstr_t ucx_strupper(scstr_t string) { universe@210: sstr_t ret = sstrdup(string); universe@210: for (size_t i = 0; i < ret.length ; i++) { universe@210: ret.ptr[i] = toupper(ret.ptr[i]); universe@210: } universe@210: return ret; universe@210: } universe@210: olaf@275: sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) { universe@210: sstr_t ret = sstrdup_a(allocator, string); universe@210: for (size_t i = 0; i < ret.length ; i++) { universe@210: ret.ptr[i] = toupper(ret.ptr[i]); universe@210: } universe@210: return ret; universe@210: } olaf@275: universe@316: // type adjustment functions universe@316: scstr_t ucx_sc2sc(scstr_t str) { universe@316: return str; olaf@275: } olaf@275: scstr_t ucx_ss2sc(sstr_t str) { olaf@275: scstr_t cs; olaf@275: cs.ptr = str.ptr; olaf@275: cs.length = str.length; olaf@275: return cs; olaf@275: } olaf@275: scstr_t ucx_ss2c_s(scstr_t c) { olaf@275: return c; olaf@275: }