ucx/string.c

changeset 236
ffc6d0910342
parent 235
7cf1e41833a2
child 237
5ba9de6361ff
     1.1 --- a/ucx/string.c	Mon Feb 20 17:28:58 2017 +0100
     1.2 +++ b/ucx/string.c	Thu Feb 23 14:30:12 2017 +0100
     1.3 @@ -29,6 +29,7 @@
     1.4  #include <stdlib.h>
     1.5  #include <string.h>
     1.6  #include <stdarg.h>
     1.7 +#include <stdint.h>
     1.8  #include <ctype.h>
     1.9  
    1.10  #include "string.h"
    1.11 @@ -175,22 +176,91 @@
    1.12      return n;
    1.13  }
    1.14  
    1.15 +typedef size_t(*prefix_table_rfun)(void*,size_t);
    1.16 +typedef void(*prefix_table_wfun)(void*,size_t,size_t);
    1.17 +
    1.18 +static size_t s_prefix_table_r(void* table, size_t index) {
    1.19 +    return (size_t) ((uint8_t*)table)[index];
    1.20 +}
    1.21 +static void s_prefix_table_w(void* table, size_t index, size_t value) {
    1.22 +    ((uint8_t*)table)[index] = (uint8_t) value;
    1.23 +}
    1.24 +static size_t h_prefix_table_r(void* table, size_t index) {
    1.25 +    return ((size_t*)table)[index];
    1.26 +}
    1.27 +static void h_prefix_table_w(void* table, size_t index, size_t value) {
    1.28 +    ((size_t*)table)[index] = value;
    1.29 +}
    1.30 +
    1.31  sstr_t sstrstr(sstr_t string, sstr_t match) {
    1.32      if (match.length == 0) {
    1.33          return string;
    1.34      }
    1.35      
    1.36 -    for (size_t i = 0 ; i < string.length ; i++) {
    1.37 -        sstr_t substr = sstrsubs(string, i);
    1.38 -        if (sstrprefix(substr, match)) {
    1.39 -            return substr;
    1.40 +    /* prepare default return value in case of no match */
    1.41 +    sstr_t result = sstrn(NULL, 0);
    1.42 +    
    1.43 +    /*
    1.44 +     * IMPORTANT:
    1.45 +     * our prefix table contains the prefix length PLUS ONE
    1.46 +     * this is our decision, because we want to use the full range of size_t
    1.47 +     * the original algorithm needs a (-1) at one single place
    1.48 +     * and we want to avoid that
    1.49 +     */
    1.50 +    
    1.51 +    /* static prefix table */
    1.52 +    static uint8_t s_prefix_table[256];
    1.53 +    
    1.54 +    /* check pattern length and use appropriate prefix table */
    1.55 +    register void* ptable;
    1.56 +    prefix_table_rfun ptable_r;
    1.57 +    prefix_table_wfun ptable_w;
    1.58 +    if (match.length > 255) {
    1.59 +        /* pattern exceeds static prefix table, allocate on the heap */
    1.60 +        ptable = calloc(match.length+1, sizeof(size_t));
    1.61 +        ptable_r = h_prefix_table_r;
    1.62 +        ptable_w = h_prefix_table_w;
    1.63 +    } else {
    1.64 +        ptable = s_prefix_table;
    1.65 +        ptable_r = s_prefix_table_r;
    1.66 +        ptable_w = s_prefix_table_w;
    1.67 +    }
    1.68 +    
    1.69 +    /* keep counter in registers */
    1.70 +    register size_t i, j;
    1.71 +    
    1.72 +    /* fill prefix table */
    1.73 +    i = 0; j = 0;
    1.74 +    ptable_w(ptable, i, j);
    1.75 +    while (i < match.length) {
    1.76 +        while (j >= 1 && match.ptr[j-1] != match.ptr[i]) {
    1.77 +            j = ptable_r(ptable, j-i);
    1.78 +        }
    1.79 +        i++; j++;
    1.80 +        ptable_w(ptable, i, j);
    1.81 +    }
    1.82 +
    1.83 +    /* search */
    1.84 +    i = 0; j = 1;
    1.85 +    while (i < string.length) {
    1.86 +        while (j >= 1 && string.ptr[i] != match.ptr[j-1]) {
    1.87 +            j = ptable_r(ptable, j-1);
    1.88 +        }
    1.89 +        i++; j++;
    1.90 +        if (j-1 == match.length) {
    1.91 +            size_t start = i - match.length;
    1.92 +            result.ptr = string.ptr + start;
    1.93 +            result.length = string.length - start;
    1.94 +            break;
    1.95          }
    1.96      }
    1.97 +
    1.98 +    /* if prefix table was allocated on the heap, free it */
    1.99 +    if (ptable != s_prefix_table) {
   1.100 +        free(ptable);
   1.101 +    }
   1.102      
   1.103 -    sstr_t emptystr;
   1.104 -    emptystr.length = 0;
   1.105 -    emptystr.ptr = NULL;
   1.106 -    return emptystr;
   1.107 +    return result;
   1.108  }
   1.109  
   1.110  sstr_t* sstrsplit(sstr_t s, sstr_t d, ssize_t *n) {

mercurial