bfile_heuristics.c

changeset 22
4508da679ffb
parent 21
91e0890464b0
child 23
778388400f7b
     1.1 --- a/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
     1.2 +++ b/bfile_heuristics.c	Thu Oct 20 17:29:23 2011 +0200
     1.3 @@ -6,22 +6,49 @@
     1.4   */
     1.5  
     1.6  #include "bfile_heuristics.h"
     1.7 +#include <ctype.h>
     1.8  
     1.9  bfile_heuristics_t *new_bfile_heuristics_t() {
    1.10    bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
    1.11    ret->level = BFILE_MEDIUM_ACCURACY;
    1.12 -  /* TODO: check why this fails */
    1.13 -  /* ret->ccount = calloc(256, sizeof(int)); */
    1.14 +  bfile_reset(ret);
    1.15    return ret;
    1.16  }
    1.17  
    1.18  void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
    1.19 -  free(def->ccount);
    1.20    free(def);
    1.21  }
    1.22  
    1.23 +void bfile_reset(bfile_heuristics_t *def) {
    1.24 +  def->bcount = 0;
    1.25 +  def->tcount = 0;
    1.26 +}
    1.27 +
    1.28  bool bfile_check(bfile_heuristics_t *def, int next_char) {
    1.29    bool ret = false;
    1.30 +  if (def->level != BFILE_IGNORE) {
    1.31 +    def->tcount++;
    1.32 +    if (!isprint(next_char) && !isspace(next_char)) {
    1.33 +      def->bcount++;
    1.34 +    }
    1.35 +
    1.36 +    switch (def->level) {
    1.37 +    case BFILE_LOW_ACCURACY:
    1.38 +      if (def->tcount > 15 || next_char == EOF) {
    1.39 +        ret = (1.0*def->bcount)/def->tcount > 0.32;
    1.40 +      }
    1.41 +      break;
    1.42 +    case BFILE_HIGH_ACCURACY:
    1.43 +      if (def->tcount > 500 || next_char == EOF) {
    1.44 +        ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.45 +      }
    1.46 +      break;
    1.47 +    default: /* BFILE_MEDIUM_ACCURACY */
    1.48 +      if (def->tcount > 100 || next_char == EOF) {
    1.49 +        ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.50 +      }
    1.51 +    }
    1.52 +  }
    1.53  
    1.54    return ret;
    1.55  }

mercurial