olaf@20: /*
universe@103:  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
olaf@20:  *
universe@259:  * Copyright 2017 Mike Becker, Olaf Wintermann All rights reserved.
universe@103:  *
universe@103:  * Redistribution and use in source and binary forms, with or without
universe@103:  * modification, are permitted provided that the following conditions are met:
universe@103:  *
universe@103:  *   1. Redistributions of source code must retain the above copyright
universe@103:  *      notice, this list of conditions and the following disclaimer.
universe@103:  *
universe@103:  *   2. Redistributions in binary form must reproduce the above copyright
universe@103:  *      notice, this list of conditions and the following disclaimer in the
universe@103:  *      documentation and/or other materials provided with the distribution.
universe@103:  *
universe@103:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@103:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@103:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@103:  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@103:  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@103:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@103:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@103:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@103:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@103:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@103:  * POSSIBILITY OF SUCH DAMAGE.
olaf@20:  */
olaf@20: 
universe@251: #include "ucx/string.h"
universe@251: 
universe@251: #include "ucx/allocator.h"
universe@251: 
olaf@20: #include <stdlib.h>
universe@69: #include <string.h>
olaf@20: #include <stdarg.h>
universe@236: #include <stdint.h>
universe@189: #include <ctype.h>
olaf@20: 
universe@116: sstr_t sstr(char *cstring) {
olaf@20:     sstr_t string;
universe@116:     string.ptr = cstring;
universe@116:     string.length = strlen(cstring);
olaf@20:     return string;
olaf@20: }
olaf@20: 
universe@116: sstr_t sstrn(char *cstring, size_t length) {
olaf@20:     sstr_t string;
universe@116:     string.ptr = cstring;
universe@116:     string.length = length;
olaf@20:     return string;
olaf@20: }
olaf@20: 
olaf@68: size_t sstrnlen(size_t n, sstr_t s, ...) {
olaf@20:     va_list ap;
olaf@20:     size_t size = s.length;
olaf@20:     va_start(ap, s);
olaf@20: 
universe@116:     for (size_t i = 1 ; i < n ; i++) {
olaf@20:         sstr_t str = va_arg(ap, sstr_t);
olaf@20:         size += str.length;
olaf@20:     }
universe@24:     va_end(ap);
olaf@20: 
olaf@20:     return size;
olaf@20: }
olaf@20: 
olaf@180: static sstr_t sstrvcat_a(
olaf@180:         UcxAllocator *a,
olaf@180:         size_t count,
olaf@180:         sstr_t s1,
olaf@180:         sstr_t s2,
olaf@180:         va_list ap) {
olaf@180:     sstr_t str;
olaf@180:     str.ptr = NULL;
olaf@180:     str.length = 0;
olaf@180:     if(count < 2) {
olaf@180:         return str;
olaf@180:     }
olaf@180:     
universe@185:     sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
olaf@180:     if(!strings) {
olaf@180:         return str;
olaf@180:     }
olaf@180:     
olaf@180:     // get all args and overall length
olaf@180:     strings[0] = s1;
olaf@180:     strings[1] = s2;
olaf@180:     size_t strlen = s1.length + s2.length;
olaf@180:     for (size_t i=2;i<count;i++) {
olaf@180:         sstr_t s = va_arg (ap, sstr_t);
olaf@180:         strings[i] = s;
olaf@180:         strlen += s.length;
olaf@180:     }
olaf@180:     
olaf@180:     // create new string
universe@185:     str.ptr = (char*) almalloc(a, strlen + 1);
olaf@180:     str.length = strlen;
olaf@180:     if(!str.ptr) {
olaf@180:         free(strings);
olaf@180:         str.length = 0;
olaf@180:         return str;
olaf@180:     }
olaf@180:     
olaf@180:     // concatenate strings
olaf@180:     size_t pos = 0;
olaf@180:     for (size_t i=0;i<count;i++) {
olaf@180:         sstr_t s = strings[i];
olaf@180:         memcpy(str.ptr + pos, s.ptr, s.length);
olaf@180:         pos += s.length;
olaf@180:     }
olaf@180:     
olaf@180:     str.ptr[str.length] = '\0';
olaf@180:     
olaf@180:     free(strings);
olaf@180:     
olaf@180:     return str;
olaf@180: }
olaf@180: 
olaf@180: sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180:     va_list ap;
olaf@180:     va_start(ap, s2);
olaf@180:     sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
olaf@180:     va_end(ap);
olaf@180:     return s;
olaf@180: }
olaf@180: 
olaf@180: sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
olaf@180:     va_list ap;
olaf@180:     va_start(ap, s2);
olaf@180:     sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
olaf@180:     va_end(ap);
olaf@180:     return s;
olaf@180: }
olaf@180: 
olaf@68: sstr_t sstrsubs(sstr_t s, size_t start) {
olaf@20:     return sstrsubsl (s, start, s.length-start);
olaf@20: }
olaf@20: 
olaf@68: sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
olaf@20:     sstr_t new_sstr;
olaf@104:     if (start >= s.length) {
universe@173:         new_sstr.ptr = NULL;
universe@173:         new_sstr.length = 0;
universe@173:     } else {
universe@173:         if (length > s.length-start) {
universe@173:             length = s.length-start;
universe@173:         }
universe@173:         new_sstr.ptr = &s.ptr[start];
universe@173:         new_sstr.length = length;
olaf@20:     }
olaf@20:     return new_sstr;
olaf@20: }
olaf@20: 
olaf@108: sstr_t sstrchr(sstr_t s, int c) {
olaf@108:     for(size_t i=0;i<s.length;i++) {
olaf@108:         if(s.ptr[i] == c) {
olaf@108:             return sstrsubs(s, i);
olaf@108:         }
olaf@108:     }
olaf@108:     sstr_t n;
olaf@108:     n.ptr = NULL;
olaf@108:     n.length = 0;
olaf@108:     return n;
olaf@108: }
olaf@108: 
universe@148: sstr_t sstrrchr(sstr_t s, int c) {
universe@148:     if (s.length > 0) {
universe@152:         for(size_t i=s.length;i>0;i--) {
universe@152:             if(s.ptr[i-1] == c) {
universe@152:                 return sstrsubs(s, i-1);
universe@148:             }
universe@148:         }
universe@148:     }
universe@148:     sstr_t n;
universe@148:     n.ptr = NULL;
universe@148:     n.length = 0;
universe@148:     return n;
universe@148: }
universe@148: 
universe@237: #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
universe@237:     ((size_t*)ptable)[index] : (size_t) ((uint8_t*)ptable)[index])
universe@236: 
universe@237: #define ptable_w(useheap, ptable, index, src) do {\
universe@237:     if (!useheap) ((uint8_t*)ptable)[index] = (uint8_t) src;\
universe@237:     else ((size_t*)ptable)[index] = src;\
universe@237:     } while (0);
universe@236: 
universe@214: sstr_t sstrstr(sstr_t string, sstr_t match) {
universe@214:     if (match.length == 0) {
universe@214:         return string;
universe@214:     }
universe@214:     
universe@236:     /* prepare default return value in case of no match */
universe@236:     sstr_t result = sstrn(NULL, 0);
universe@236:     
universe@236:     /*
universe@236:      * IMPORTANT:
universe@236:      * our prefix table contains the prefix length PLUS ONE
universe@236:      * this is our decision, because we want to use the full range of size_t
universe@236:      * the original algorithm needs a (-1) at one single place
universe@236:      * and we want to avoid that
universe@236:      */
universe@236:     
universe@236:     /* static prefix table */
universe@236:     static uint8_t s_prefix_table[256];
universe@236:     
universe@236:     /* check pattern length and use appropriate prefix table */
universe@237:     /* if the pattern exceeds static prefix table, allocate on the heap */
universe@237:     register int useheap = match.length > 255;
universe@237:     register void* ptable = useheap ?
universe@237:         calloc(match.length+1, sizeof(size_t)): s_prefix_table;
universe@236:     
universe@236:     /* keep counter in registers */
universe@236:     register size_t i, j;
universe@236:     
universe@236:     /* fill prefix table */
universe@236:     i = 0; j = 0;
universe@237:     ptable_w(useheap, ptable, i, j);
universe@236:     while (i < match.length) {
universe@236:         while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
universe@238:             ptable_r(j, useheap, ptable, j-1);
universe@236:         }
universe@236:         i++; j++;
universe@237:         ptable_w(useheap, ptable, i, j);
universe@236:     }
universe@236: 
universe@236:     /* search */
universe@236:     i = 0; j = 1;
universe@236:     while (i < string.length) {
universe@236:         while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
universe@237:             ptable_r(j, useheap, ptable, j-1);
universe@236:         }
universe@236:         i++; j++;
universe@236:         if (j-1 == match.length) {
universe@236:             size_t start = i - match.length;
universe@236:             result.ptr = string.ptr + start;
universe@236:             result.length = string.length - start;
universe@236:             break;
universe@214:         }
universe@214:     }
universe@236: 
universe@236:     /* if prefix table was allocated on the heap, free it */
universe@236:     if (ptable != s_prefix_table) {
universe@236:         free(ptable);
universe@236:     }
universe@214:     
universe@236:     return result;
universe@214: }
universe@214: 
universe@237: #undef ptable_r
universe@237: #undef ptable_w
universe@237: 
universe@173: sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
universe@125:     return sstrsplit_a(ucx_default_allocator(), s, d, n);
universe@119: }
universe@119: 
universe@173: sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
universe@119:     if (s.length == 0 || d.length == 0) {
universe@119:         *n = -1;
universe@39:         return NULL;
universe@39:     }
universe@231:     
universe@231:     /* special cases: delimiter is at least as large as the string */
universe@231:     if (d.length >= s.length) {
universe@231:         /* exact match */
universe@231:         if (sstrcmp(s, d) == 0) {
universe@231:             *n = 0;
universe@231:             return NULL;
universe@231:         } else /* no match possible */ {
universe@231:             *n = 1;
universe@231:             sstr_t *result = (sstr_t*) almalloc(allocator, sizeof(sstr_t));
universe@233:             *result = sstrdup_a(allocator, s);
universe@231:             return result;
universe@231:         }
universe@231:     }
universe@231:     
universe@173:     ssize_t nmax = *n;
universe@235:     size_t arrlen = 16;
universe@235:     sstr_t* result = (sstr_t*) almalloc(allocator, arrlen*sizeof(sstr_t));
universe@39: 
universe@119:     if (result) {
universe@233:         sstr_t curpos = s;
universe@233:         ssize_t j = 1;
universe@233:         while (1) {
universe@234:             sstr_t match;
universe@234:             /* optimize for one byte delimiters */
universe@234:             if (d.length == 1) {
universe@234:                 match = curpos;
universe@234:                 for (size_t i = 0 ; i < curpos.length ; i++) {
universe@234:                     if (curpos.ptr[i] == *(d.ptr)) {
universe@234:                         match.ptr = curpos.ptr + i;
universe@234:                         break;
universe@234:                     }
universe@234:                     match.length--;
universe@234:                 }
universe@234:             } else {
universe@234:                 match = sstrstr(curpos, d);
universe@234:             }
universe@233:             if (match.length > 0) {
universe@233:                 /* is this our last try? */
universe@233:                 if (nmax == 0 || j < nmax) {
universe@233:                     /* copy the current string to the array */
universe@233:                     sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
universe@233:                     result[j-1] = sstrdup_a(allocator, item);
universe@233:                     size_t processed = item.length + d.length;
universe@233:                     curpos.ptr += processed;
universe@233:                     curpos.length -= processed;
universe@39: 
universe@233:                     /* allocate memory for the next string */
universe@233:                     j++;
universe@235:                     if (j > arrlen) {
universe@235:                         arrlen *= 2;
universe@235:                         sstr_t* reallocated = (sstr_t*) alrealloc(
universe@235:                                 allocator, result, arrlen*sizeof(sstr_t));
universe@235:                         if (reallocated) {
universe@235:                             result = reallocated;
universe@235:                         } else {
universe@235:                             for (ssize_t i = 0 ; i < j-1 ; i++) {
universe@235:                                 alfree(allocator, result[i].ptr);
universe@235:                             }
universe@235:                             alfree(allocator, result);
universe@235:                             *n = -2;
universe@235:                             return NULL;
universe@233:                         }
universe@233:                     }
universe@233:                 } else {
universe@233:                     /* nmax reached, copy the _full_ remaining string */
universe@233:                     result[j-1] = sstrdup_a(allocator, curpos);
universe@233:                     break;
universe@233:                 }
universe@173:             } else {
universe@233:                 /* no more matches, copy last string */
universe@233:                 result[j-1] = sstrdup_a(allocator, curpos);
universe@173:                 break;
universe@173:             }
universe@119:         }
universe@233:         *n = j;
universe@119:     } else {
universe@119:         *n = -2;
universe@39:     }
universe@39: 
universe@39:     return result;
universe@39: }
universe@39: 
olaf@68: int sstrcmp(sstr_t s1, sstr_t s2) {
universe@116:     if (s1.length == s2.length) {
universe@116:         return memcmp(s1.ptr, s2.ptr, s1.length);
universe@116:     } else if (s1.length > s2.length) {
universe@116:         return 1;
universe@116:     } else {
universe@116:         return -1;
universe@116:     }
olaf@20: }
olaf@20: 
universe@149: int sstrcasecmp(sstr_t s1, sstr_t s2) {
universe@149:     if (s1.length == s2.length) {
universe@149: #ifdef _WIN32
universe@149:         return _strnicmp(s1.ptr, s2.ptr, s1.length);
universe@149: #else
universe@149:         return strncasecmp(s1.ptr, s2.ptr, s1.length);
universe@149: #endif
universe@149:     } else if (s1.length > s2.length) {
universe@149:         return 1;
universe@149:     } else {
universe@149:         return -1;
universe@149:     }
universe@149: }
universe@149: 
olaf@68: sstr_t sstrdup(sstr_t s) {
universe@125:     return sstrdup_a(ucx_default_allocator(), s);
olaf@109: }
olaf@20: 
universe@125: sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
olaf@109:     sstr_t newstring;
universe@173:     newstring.ptr = (char*)almalloc(allocator, s.length + 1);
olaf@109:     if (newstring.ptr) {
olaf@109:         newstring.length = s.length;
olaf@109:         newstring.ptr[newstring.length] = 0;
olaf@109:         
olaf@109:         memcpy(newstring.ptr, s.ptr, s.length);
olaf@109:     } else {
olaf@109:         newstring.length = 0;
olaf@109:     }
olaf@109:     
olaf@20:     return newstring;
olaf@20: }
olaf@96: 
olaf@96: sstr_t sstrtrim(sstr_t string) {
olaf@96:     sstr_t newstr = string;
universe@189:     
universe@189:     while (newstr.length > 0 && isspace(*newstr.ptr)) {
universe@189:         newstr.ptr++;
universe@189:         newstr.length--;
universe@98:     }
universe@189:     while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
universe@189:         newstr.length--;
olaf@96:     }
olaf@96:     
olaf@96:     return newstr;
olaf@96: }
universe@146: 
universe@146: int sstrprefix(sstr_t string, sstr_t prefix) {
universe@146:     if (string.length == 0) {
universe@146:         return prefix.length == 0;
universe@146:     }
universe@146:     if (prefix.length == 0) {
universe@146:         return 1;
universe@146:     }
universe@146:     
universe@146:     if (prefix.length > string.length) {
universe@146:         return 0;
universe@146:     } else {
universe@146:         return memcmp(string.ptr, prefix.ptr, prefix.length) == 0;
universe@146:     }
universe@146: }
universe@146: 
universe@146: int sstrsuffix(sstr_t string, sstr_t suffix) {
universe@146:     if (string.length == 0) {
universe@146:         return suffix.length == 0;
universe@146:     }
universe@146:     if (suffix.length == 0) {
universe@146:         return 1;
universe@146:     }
universe@146:     
universe@146:     if (suffix.length > string.length) {
universe@146:         return 0;
universe@146:     } else {
universe@146:         return memcmp(string.ptr+string.length-suffix.length,
universe@146:             suffix.ptr, suffix.length) == 0;
universe@146:     }
universe@146: }
universe@210: 
universe@210: sstr_t sstrlower(sstr_t string) {
universe@210:     sstr_t ret = sstrdup(string);
universe@210:     for (size_t i = 0; i < ret.length ; i++) {
universe@210:         ret.ptr[i] = tolower(ret.ptr[i]);
universe@210:     }
universe@210:     return ret;
universe@210: }
universe@210: 
universe@210: sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
universe@210:     sstr_t ret = sstrdup_a(allocator, string);
universe@210:     for (size_t i = 0; i < ret.length ; i++) {
universe@210:         ret.ptr[i] = tolower(ret.ptr[i]);
universe@210:     }
universe@210:     return ret;
universe@210: }
universe@210: 
universe@210: sstr_t sstrupper(sstr_t string) {
universe@210:     sstr_t ret = sstrdup(string);
universe@210:     for (size_t i = 0; i < ret.length ; i++) {
universe@210:         ret.ptr[i] = toupper(ret.ptr[i]);
universe@210:     }
universe@210:     return ret;
universe@210: }
universe@210: 
universe@210: sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
universe@210:     sstr_t ret = sstrdup_a(allocator, string);
universe@210:     for (size_t i = 0; i < ret.length ; i++) {
universe@210:         ret.ptr[i] = toupper(ret.ptr[i]);
universe@210:     }
universe@210:     return ret;
universe@210: }