src/string.c

changeset 306
90b6d69bb499
parent 300
d1f814633049
child 315
5b97de37aada
     1.1 --- a/src/string.c	Mon May 14 15:58:51 2018 +0200
     1.2 +++ b/src/string.c	Mon May 14 17:56:03 2018 +0200
     1.3 @@ -50,13 +50,29 @@
     1.4      return string;
     1.5  }
     1.6  
     1.7 -size_t sstrnlen(size_t n, sstr_t s, ...) {
     1.8 +scstr_t scstr(const char *cstring) {
     1.9 +    scstr_t string;
    1.10 +    string.ptr = cstring;
    1.11 +    string.length = strlen(cstring);
    1.12 +    return string;
    1.13 +}
    1.14 +
    1.15 +scstr_t scstrn(const char *cstring, size_t length) {
    1.16 +    scstr_t string;
    1.17 +    string.ptr = cstring;
    1.18 +    string.length = length;
    1.19 +    return string;
    1.20 +}
    1.21 +
    1.22 +
    1.23 +size_t ucx_strnlen(size_t n, ...) {
    1.24      va_list ap;
    1.25 -    size_t size = s.length;
    1.26 -    va_start(ap, s);
    1.27 +    va_start(ap, n);
    1.28 +    
    1.29 +    size_t size = 0;
    1.30  
    1.31 -    for (size_t i = 1 ; i < n ; i++) {
    1.32 -        sstr_t str = va_arg(ap, sstr_t);
    1.33 +    for (size_t i = 0 ; i < n ; i++) {
    1.34 +        scstr_t str = va_arg(ap, scstr_t);
    1.35          if(((size_t)-1) - str.length < size) {
    1.36              size = 0;
    1.37              break;
    1.38 @@ -71,8 +87,7 @@
    1.39  static sstr_t sstrvcat_a(
    1.40          UcxAllocator *a,
    1.41          size_t count,
    1.42 -        sstr_t s1,
    1.43 -        sstr_t s2,
    1.44 +        scstr_t s1,
    1.45          va_list ap) {
    1.46      sstr_t str;
    1.47      str.ptr = NULL;
    1.48 @@ -81,11 +96,13 @@
    1.49          return str;
    1.50      }
    1.51      
    1.52 +    scstr_t s2 = va_arg (ap, scstr_t);
    1.53 +    
    1.54      if(((size_t)-1) - s1.length < s2.length) {
    1.55          return str;
    1.56      }
    1.57      
    1.58 -    sstr_t *strings = (sstr_t*) calloc(count, sizeof(sstr_t));
    1.59 +    scstr_t *strings = (scstr_t*) calloc(count, sizeof(scstr_t));
    1.60      if(!strings) {
    1.61          return str;
    1.62      }
    1.63 @@ -96,7 +113,7 @@
    1.64      size_t slen = s1.length + s2.length;
    1.65      int error = 0;
    1.66      for (size_t i=2;i<count;i++) {
    1.67 -        sstr_t s = va_arg (ap, sstr_t);
    1.68 +        scstr_t s = va_arg (ap, scstr_t);
    1.69          strings[i] = s;
    1.70          if(((size_t)-1) - s.length < slen) {
    1.71              error = 1;
    1.72 @@ -121,7 +138,7 @@
    1.73      // concatenate strings
    1.74      size_t pos = 0;
    1.75      for (size_t i=0;i<count;i++) {
    1.76 -        sstr_t s = strings[i];
    1.77 +        scstr_t s = strings[i];
    1.78          memcpy(str.ptr + pos, s.ptr, s.length);
    1.79          pos += s.length;
    1.80      }
    1.81 @@ -133,65 +150,123 @@
    1.82      return str;
    1.83  }
    1.84  
    1.85 -sstr_t sstrcat(size_t count, sstr_t s1, sstr_t s2, ...) {
    1.86 +sstr_t ucx_strcat(size_t count, scstr_t s1, ...) {
    1.87      va_list ap;
    1.88 -    va_start(ap, s2);
    1.89 -    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, s2, ap);
    1.90 +    va_start(ap, s1);
    1.91 +    sstr_t s = sstrvcat_a(ucx_default_allocator(), count, s1, ap);
    1.92      va_end(ap);
    1.93      return s;
    1.94  }
    1.95  
    1.96 -sstr_t sstrcat_a(UcxAllocator *a, size_t count, sstr_t s1, sstr_t s2, ...) {
    1.97 +sstr_t ucx_strcat_a(UcxAllocator *a, size_t count, scstr_t s1, ...) {
    1.98      va_list ap;
    1.99 -    va_start(ap, s2);
   1.100 -    sstr_t s = sstrvcat_a(a, count, s1, s2, ap);
   1.101 +    va_start(ap, s1);
   1.102 +    sstr_t s = sstrvcat_a(a, count, s1, ap);
   1.103      va_end(ap);
   1.104      return s;
   1.105  }
   1.106  
   1.107 +static int ucx_substring(
   1.108 +        size_t str_length,
   1.109 +        size_t start,
   1.110 +        size_t length,
   1.111 +        size_t *newlen,
   1.112 +        size_t *newpos)
   1.113 +{
   1.114 +    *newlen = 0;
   1.115 +    *newpos = 0;
   1.116 +    
   1.117 +    if(start > str_length) {
   1.118 +        return 0;
   1.119 +    }
   1.120 +    
   1.121 +    if(length > str_length - start) {
   1.122 +        length = str_length - start;
   1.123 +    }
   1.124 +    *newlen = length;
   1.125 +    *newpos = start;
   1.126 +    return 1;
   1.127 +}
   1.128 +
   1.129  sstr_t sstrsubs(sstr_t s, size_t start) {
   1.130      return sstrsubsl (s, start, s.length-start);
   1.131  }
   1.132  
   1.133  sstr_t sstrsubsl(sstr_t s, size_t start, size_t length) {
   1.134 -    sstr_t new_sstr;
   1.135 -    if (start >= s.length) {
   1.136 -        new_sstr.ptr = NULL;
   1.137 -        new_sstr.length = 0;
   1.138 -    } else {
   1.139 -        if (length > s.length-start) {
   1.140 -            length = s.length-start;
   1.141 +    size_t pos;
   1.142 +    sstr_t ret = { NULL, 0 };
   1.143 +    if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   1.144 +        ret.ptr = s.ptr + pos;
   1.145 +    }
   1.146 +    return ret;
   1.147 +}
   1.148 +
   1.149 +scstr_t scstrsubs(scstr_t s, size_t start) {
   1.150 +    return scstrsubsl (s, start, s.length-start);
   1.151 +}
   1.152 +
   1.153 +scstr_t scstrsubsl(scstr_t s, size_t start, size_t length) {
   1.154 +    size_t pos;
   1.155 +    scstr_t ret = { NULL, 0 };
   1.156 +    if(ucx_substring(s.length, start, length, &ret.length, &pos)) {
   1.157 +        ret.ptr = s.ptr + pos;
   1.158 +    }
   1.159 +    return ret;
   1.160 +}
   1.161 +
   1.162 +
   1.163 +int ucx_strchr(const char *string, size_t length, int chr, size_t *pos) {
   1.164 +    for(size_t i=0;i<length;i++) {
   1.165 +        if(string[i] == chr) {
   1.166 +            *pos = i;
   1.167 +            return 1;
   1.168          }
   1.169 -        new_sstr.ptr = &s.ptr[start];
   1.170 -        new_sstr.length = length;
   1.171      }
   1.172 -    return new_sstr;
   1.173 +    return 0;
   1.174 +}
   1.175 +
   1.176 +int ucx_strrchr(const char *string, size_t length, int chr, size_t *pos) {
   1.177 +    if(length > 0) {
   1.178 +        for(size_t i=length ; i>0 ; i--) {
   1.179 +            if(string[i-1] == chr) {
   1.180 +                *pos = i-1;
   1.181 +                return 1;
   1.182 +            }
   1.183 +        }
   1.184 +    }
   1.185 +    return 0;
   1.186  }
   1.187  
   1.188  sstr_t sstrchr(sstr_t s, int c) {
   1.189 -    for(size_t i=0;i<s.length;i++) {
   1.190 -        if(s.ptr[i] == c) {
   1.191 -            return sstrsubs(s, i);
   1.192 -        }
   1.193 +    size_t pos = 0;
   1.194 +    if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   1.195 +        return sstrsubs(s, pos);
   1.196      }
   1.197 -    sstr_t n;
   1.198 -    n.ptr = NULL;
   1.199 -    n.length = 0;
   1.200 -    return n;
   1.201 +    return sstrn(NULL, 0);
   1.202  }
   1.203  
   1.204  sstr_t sstrrchr(sstr_t s, int c) {
   1.205 -    if (s.length > 0) {
   1.206 -        for(size_t i=s.length;i>0;i--) {
   1.207 -            if(s.ptr[i-1] == c) {
   1.208 -                return sstrsubs(s, i-1);
   1.209 -            }
   1.210 -        }
   1.211 +    size_t pos = 0;
   1.212 +    if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   1.213 +        return sstrsubs(s, pos);
   1.214      }
   1.215 -    sstr_t n;
   1.216 -    n.ptr = NULL;
   1.217 -    n.length = 0;
   1.218 -    return n;
   1.219 +    return sstrn(NULL, 0);
   1.220 +}
   1.221 +
   1.222 +scstr_t scstrchr(scstr_t s, int c) {
   1.223 +    size_t pos = 0;
   1.224 +    if(ucx_strchr(s.ptr, s.length, c, &pos)) {
   1.225 +        return scstrsubs(s, pos);
   1.226 +    }
   1.227 +    return scstrn(NULL, 0);
   1.228 +}
   1.229 +
   1.230 +scstr_t scstrrchr(scstr_t s, int c) {
   1.231 +    size_t pos = 0;
   1.232 +    if(ucx_strrchr(s.ptr, s.length, c, &pos)) {
   1.233 +        return scstrsubs(s, pos);
   1.234 +    }
   1.235 +    return scstrn(NULL, 0);
   1.236  }
   1.237  
   1.238  #define ptable_r(dest, useheap, ptable, index) (dest = useheap ? \
   1.239 @@ -202,13 +277,21 @@
   1.240      else ((size_t*)ptable)[index] = src;\
   1.241      } while (0);
   1.242  
   1.243 -sstr_t sstrstr(sstr_t string, sstr_t match) {
   1.244 -    if (match.length == 0) {
   1.245 -        return string;
   1.246 +
   1.247 +const char* ucx_strstr(
   1.248 +        const char *str,
   1.249 +        size_t length,
   1.250 +        const char *match,
   1.251 +        size_t matchlen,
   1.252 +        size_t *newlen)
   1.253 +{
   1.254 +    *newlen = length;
   1.255 +    if (matchlen == 0) {
   1.256 +        return str;
   1.257      }
   1.258      
   1.259 -    /* prepare default return value in case of no match */
   1.260 -    sstr_t result = sstrn(NULL, 0);
   1.261 +    const char *result = NULL;
   1.262 +    size_t resultlen = 0;
   1.263      
   1.264      /*
   1.265       * IMPORTANT:
   1.266 @@ -223,9 +306,9 @@
   1.267      
   1.268      /* check pattern length and use appropriate prefix table */
   1.269      /* if the pattern exceeds static prefix table, allocate on the heap */
   1.270 -    register int useheap = match.length > 255;
   1.271 +    register int useheap = matchlen > 255;
   1.272      register void* ptable = useheap ?
   1.273 -        calloc(match.length+1, sizeof(size_t)): s_prefix_table;
   1.274 +        calloc(matchlen+1, sizeof(size_t)): s_prefix_table;
   1.275      
   1.276      /* keep counter in registers */
   1.277      register size_t i, j;
   1.278 @@ -233,8 +316,8 @@
   1.279      /* fill prefix table */
   1.280      i = 0; j = 0;
   1.281      ptable_w(useheap, ptable, i, j);
   1.282 -    while (i < match.length) {
   1.283 -        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
   1.284 +    while (i < matchlen) {
   1.285 +        while (j >= 1 && match[j-1] != match[i]) {
   1.286              ptable_r(j, useheap, ptable, j-1);
   1.287          }
   1.288          i++; j++;
   1.289 @@ -243,15 +326,15 @@
   1.290  
   1.291      /* search */
   1.292      i = 0; j = 1;
   1.293 -    while (i < string.length) {
   1.294 -        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
   1.295 +    while (i < length) {
   1.296 +        while (j >= 1 && str[i] != match[j-1]) {
   1.297              ptable_r(j, useheap, ptable, j-1);
   1.298          }
   1.299          i++; j++;
   1.300 -        if (j-1 == match.length) {
   1.301 -            size_t start = i - match.length;
   1.302 -            result.ptr = string.ptr + start;
   1.303 -            result.length = string.length - start;
   1.304 +        if (j-1 == matchlen) {
   1.305 +            size_t start = i - matchlen;
   1.306 +            result = str + start;
   1.307 +            resultlen = length - start;
   1.308              break;
   1.309          }
   1.310      }
   1.311 @@ -261,17 +344,54 @@
   1.312          free(ptable);
   1.313      }
   1.314      
   1.315 +    *newlen = resultlen;
   1.316 +    return result;
   1.317 +}
   1.318 +
   1.319 +sstr_t ucx_sstrstr(sstr_t string, scstr_t match) {
   1.320 +    sstr_t result;
   1.321 +    
   1.322 +    size_t reslen;
   1.323 +    const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   1.324 +    if(!resstr) {
   1.325 +        result.ptr = NULL;
   1.326 +        result.length = 0;
   1.327 +        return result;
   1.328 +    }
   1.329 +    
   1.330 +    size_t pos = resstr - string.ptr;
   1.331 +    result.ptr = string.ptr + pos;
   1.332 +    result.length = reslen;
   1.333 +    
   1.334 +    return result;
   1.335 +}
   1.336 +
   1.337 +scstr_t ucx_scstrstr(scstr_t string, scstr_t match) {
   1.338 +    scstr_t result;
   1.339 +    
   1.340 +    size_t reslen;
   1.341 +    const char *resstr = ucx_strstr(string.ptr, string.length, match.ptr, match.length, &reslen);
   1.342 +    if(!resstr) {
   1.343 +        result.ptr = NULL;
   1.344 +        result.length = 0;
   1.345 +        return result;
   1.346 +    }
   1.347 +    
   1.348 +    size_t pos = resstr - string.ptr;
   1.349 +    result.ptr = string.ptr + pos;
   1.350 +    result.length = reslen;
   1.351 +    
   1.352      return result;
   1.353  }
   1.354  
   1.355  #undef ptable_r
   1.356  #undef ptable_w
   1.357  
   1.358 -sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {
   1.359 -    return sstrsplit_a(ucx_default_allocator(), s, d, n);
   1.360 +sstr_t* ucx_strsplit(scstr_t s, scstr_t d, ssize_t *n) {
   1.361 +    return ucx_strsplit_a(ucx_default_allocator(), s, d, n);
   1.362  }
   1.363  
   1.364 -sstr_t* sstrsplit_a(UcxAllocator *allocator, sstr_t s, sstr_t d, ssize_t *n) {
   1.365 +sstr_t* ucx_strsplit_a(UcxAllocator *allocator, scstr_t s, scstr_t d, ssize_t *n) {
   1.366      if (s.length == 0 || d.length == 0) {
   1.367          *n = -1;
   1.368          return NULL;
   1.369 @@ -300,10 +420,10 @@
   1.370      sstr_t* result = (sstr_t*) alcalloc(allocator, arrlen, sizeof(sstr_t));
   1.371  
   1.372      if (result) {
   1.373 -        sstr_t curpos = s;
   1.374 +        scstr_t curpos = s;
   1.375          ssize_t j = 1;
   1.376          while (1) {
   1.377 -            sstr_t match;
   1.378 +            scstr_t match;
   1.379              /* optimize for one byte delimiters */
   1.380              if (d.length == 1) {
   1.381                  match = curpos;
   1.382 @@ -315,13 +435,13 @@
   1.383                      match.length--;
   1.384                  }
   1.385              } else {
   1.386 -                match = sstrstr(curpos, d);
   1.387 +                match = scstrstr(curpos, d);
   1.388              }
   1.389              if (match.length > 0) {
   1.390                  /* is this our last try? */
   1.391                  if (nmax == 0 || j < nmax) {
   1.392                      /* copy the current string to the array */
   1.393 -                    sstr_t item = sstrn(curpos.ptr, match.ptr - curpos.ptr);
   1.394 +                    scstr_t item = scstrn(curpos.ptr, match.ptr - curpos.ptr);
   1.395                      result[j-1] = sstrdup_a(allocator, item);
   1.396                      size_t processed = item.length + d.length;
   1.397                      curpos.ptr += processed;
   1.398 @@ -367,7 +487,7 @@
   1.399      return result;
   1.400  }
   1.401  
   1.402 -int sstrcmp(sstr_t s1, sstr_t s2) {
   1.403 +int ucx_str_cmp(scstr_t s1, scstr_t s2) {
   1.404      if (s1.length == s2.length) {
   1.405          return memcmp(s1.ptr, s2.ptr, s1.length);
   1.406      } else if (s1.length > s2.length) {
   1.407 @@ -377,7 +497,7 @@
   1.408      }
   1.409  }
   1.410  
   1.411 -int sstrcasecmp(sstr_t s1, sstr_t s2) {
   1.412 +int ucx_str_casecmp(scstr_t s1, scstr_t s2) {
   1.413      if (s1.length == s2.length) {
   1.414  #ifdef _WIN32
   1.415          return _strnicmp(s1.ptr, s2.ptr, s1.length);
   1.416 @@ -391,11 +511,11 @@
   1.417      }
   1.418  }
   1.419  
   1.420 -sstr_t sstrdup(sstr_t s) {
   1.421 +sstr_t scstrdup(scstr_t s) {
   1.422      return sstrdup_a(ucx_default_allocator(), s);
   1.423  }
   1.424  
   1.425 -sstr_t sstrdup_a(UcxAllocator *allocator, sstr_t s) {
   1.426 +sstr_t scstrdup_a(UcxAllocator *allocator, scstr_t s) {
   1.427      sstr_t newstring;
   1.428      newstring.ptr = (char*)almalloc(allocator, s.length + 1);
   1.429      if (newstring.ptr) {
   1.430 @@ -410,21 +530,38 @@
   1.431      return newstring;
   1.432  }
   1.433  
   1.434 -sstr_t sstrtrim(sstr_t string) {
   1.435 -    sstr_t newstr = string;
   1.436 +
   1.437 +size_t ucx_strtrim(const char *s, size_t len, size_t *newlen) {
   1.438 +    const char *newptr = s;
   1.439 +    size_t length = len;
   1.440      
   1.441 -    while (newstr.length > 0 && isspace(*newstr.ptr)) {
   1.442 -        newstr.ptr++;
   1.443 -        newstr.length--;
   1.444 +    while(length > 0 && isspace(*newptr)) {
   1.445 +        newptr++;
   1.446 +        length--;
   1.447      }
   1.448 -    while (newstr.length > 0 && isspace(newstr.ptr[newstr.length-1])) {
   1.449 -        newstr.length--;
   1.450 +    while(length > 0 && isspace(newptr[length-1])) {
   1.451 +        length--;
   1.452      }
   1.453      
   1.454 +    *newlen = length;
   1.455 +    return newptr - s;
   1.456 +}
   1.457 +
   1.458 +sstr_t sstrtrim(sstr_t string) {
   1.459 +    sstr_t newstr;
   1.460 +    newstr.ptr = string.ptr
   1.461 +                 + ucx_strtrim(string.ptr, string.length, &newstr.length);
   1.462      return newstr;
   1.463  }
   1.464  
   1.465 -int sstrprefix(sstr_t string, sstr_t prefix) {
   1.466 +scstr_t scstrtrim(scstr_t string) {
   1.467 +    scstr_t newstr;
   1.468 +    newstr.ptr = string.ptr
   1.469 +                 + ucx_strtrim(string.ptr, string.length, &newstr.length);
   1.470 +    return newstr;
   1.471 +}
   1.472 +
   1.473 +int ucx_strprefix(scstr_t string, scstr_t prefix) {
   1.474      if (string.length == 0) {
   1.475          return prefix.length == 0;
   1.476      }
   1.477 @@ -439,7 +576,7 @@
   1.478      }
   1.479  }
   1.480  
   1.481 -int sstrsuffix(sstr_t string, sstr_t suffix) {
   1.482 +int ucx_strsuffix(scstr_t string, scstr_t suffix) {
   1.483      if (string.length == 0) {
   1.484          return suffix.length == 0;
   1.485      }
   1.486 @@ -455,7 +592,7 @@
   1.487      }
   1.488  }
   1.489  
   1.490 -sstr_t sstrlower(sstr_t string) {
   1.491 +sstr_t ucx_strlower(scstr_t string) {
   1.492      sstr_t ret = sstrdup(string);
   1.493      for (size_t i = 0; i < ret.length ; i++) {
   1.494          ret.ptr[i] = tolower(ret.ptr[i]);
   1.495 @@ -463,7 +600,7 @@
   1.496      return ret;
   1.497  }
   1.498  
   1.499 -sstr_t sstrlower_a(UcxAllocator *allocator, sstr_t string) {
   1.500 +sstr_t ucx_strlower_a(UcxAllocator *allocator, scstr_t string) {
   1.501      sstr_t ret = sstrdup_a(allocator, string);
   1.502      for (size_t i = 0; i < ret.length ; i++) {
   1.503          ret.ptr[i] = tolower(ret.ptr[i]);
   1.504 @@ -471,7 +608,7 @@
   1.505      return ret;
   1.506  }
   1.507  
   1.508 -sstr_t sstrupper(sstr_t string) {
   1.509 +sstr_t ucx_strupper(scstr_t string) {
   1.510      sstr_t ret = sstrdup(string);
   1.511      for (size_t i = 0; i < ret.length ; i++) {
   1.512          ret.ptr[i] = toupper(ret.ptr[i]);
   1.513 @@ -479,10 +616,24 @@
   1.514      return ret;
   1.515  }
   1.516  
   1.517 -sstr_t sstrupper_a(UcxAllocator *allocator, sstr_t string) {
   1.518 +sstr_t ucx_strupper_a(UcxAllocator *allocator, scstr_t string) {
   1.519      sstr_t ret = sstrdup_a(allocator, string);
   1.520      for (size_t i = 0; i < ret.length ; i++) {
   1.521          ret.ptr[i] = toupper(ret.ptr[i]);
   1.522      }
   1.523      return ret;
   1.524  }
   1.525 +
   1.526 +// private string conversion functions
   1.527 +scstr_t ucx_sc2sc(scstr_t c) {
   1.528 +    return c;
   1.529 +}
   1.530 +scstr_t ucx_ss2sc(sstr_t str) {
   1.531 +    scstr_t cs;
   1.532 +    cs.ptr = str.ptr;
   1.533 +    cs.length = str.length;
   1.534 +    return cs;
   1.535 +}
   1.536 +scstr_t ucx_ss2c_s(scstr_t c) {
   1.537 +    return c;
   1.538 +}

mercurial