src/bfile_heuristics.c

changeset 34
fa9bda32de17
parent 23
778388400f7b
child 36
a7ff583e153f
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/bfile_heuristics.c	Fri Dec 28 15:44:28 2012 +0100
     1.3 @@ -0,0 +1,81 @@
     1.4 +/*
     1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
     1.6 + * Copyright 2011 Mike Becker. All rights reserved.
     1.7 + * 
     1.8 + * Redistribution and use in source and binary forms, with or without
     1.9 + * modification, are permitted provided that the following conditions are met:
    1.10 + * 
    1.11 + * 1. Redistributions of source code must retain the above copyright
    1.12 + * notice, this list of conditions and the following disclaimer.
    1.13 + * 
    1.14 + * 2. Redistributions in binary form must reproduce the above copyright
    1.15 + * notice, this list of conditions and the following disclaimer in the
    1.16 + * documentation and/or other materials provided with the distribution.
    1.17 + * 
    1.18 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.19 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    1.21 + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    1.22 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    1.23 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    1.25 + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    1.26 + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    1.27 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
    1.28 + *
    1.29 + * bfile_heuristics.c
    1.30 + *
    1.31 + *  Created on: 20.10.2011
    1.32 + *      Author: Mike
    1.33 + */
    1.34 +
    1.35 +#include "bfile_heuristics.h"
    1.36 +#include <ctype.h>
    1.37 +
    1.38 +bfile_heuristics_t *new_bfile_heuristics_t() {
    1.39 +  bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
    1.40 +  ret->level = BFILE_MEDIUM_ACCURACY;
    1.41 +  bfile_reset(ret);
    1.42 +  return ret;
    1.43 +}
    1.44 +
    1.45 +void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
    1.46 +  free(def);
    1.47 +}
    1.48 +
    1.49 +void bfile_reset(bfile_heuristics_t *def) {
    1.50 +  def->bcount = 0;
    1.51 +  def->tcount = 0;
    1.52 +}
    1.53 +
    1.54 +bool bfile_check(bfile_heuristics_t *def, int next_char) {
    1.55 +  bool ret = false;
    1.56 +  if (def->level != BFILE_IGNORE) {
    1.57 +    def->tcount++;
    1.58 +    if (!isprint(next_char) && !isspace(next_char)) {
    1.59 +      def->bcount++;
    1.60 +    }
    1.61 +
    1.62 +    if (def->tcount > 1) { /* empty files are text files */
    1.63 +      switch (def->level) {
    1.64 +      case BFILE_LOW_ACCURACY:
    1.65 +        if (def->tcount > 15 || next_char == EOF) {
    1.66 +          ret = (1.0*def->bcount)/def->tcount > 0.32;
    1.67 +        }
    1.68 +        break;
    1.69 +      case BFILE_HIGH_ACCURACY:
    1.70 +        if (def->tcount > 500 || next_char == EOF) {
    1.71 +          ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.72 +        }
    1.73 +        break;
    1.74 +      default: /* BFILE_MEDIUM_ACCURACY */
    1.75 +        if (def->tcount > 100 || next_char == EOF) {
    1.76 +          ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.77 +        }
    1.78 +        break;
    1.79 +      }
    1.80 +    }
    1.81 +  }
    1.82 +
    1.83 +  return ret;
    1.84 +}

mercurial