1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/bfile_heuristics.c Fri Dec 28 15:44:28 2012 +0100 1.3 @@ -0,0 +1,81 @@ 1.4 +/* 1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 1.6 + * Copyright 2011 Mike Becker. All rights reserved. 1.7 + * 1.8 + * Redistribution and use in source and binary forms, with or without 1.9 + * modification, are permitted provided that the following conditions are met: 1.10 + * 1.11 + * 1. Redistributions of source code must retain the above copyright 1.12 + * notice, this list of conditions and the following disclaimer. 1.13 + * 1.14 + * 2. Redistributions in binary form must reproduce the above copyright 1.15 + * notice, this list of conditions and the following disclaimer in the 1.16 + * documentation and/or other materials provided with the distribution. 1.17 + * 1.18 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.19 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.20 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 1.21 + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 1.22 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1.23 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 1.24 + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 1.25 + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 1.26 + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 1.27 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1.28 + * 1.29 + * bfile_heuristics.c 1.30 + * 1.31 + * Created on: 20.10.2011 1.32 + * Author: Mike 1.33 + */ 1.34 + 1.35 +#include "bfile_heuristics.h" 1.36 +#include <ctype.h> 1.37 + 1.38 +bfile_heuristics_t *new_bfile_heuristics_t() { 1.39 + bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t)); 1.40 + ret->level = BFILE_MEDIUM_ACCURACY; 1.41 + bfile_reset(ret); 1.42 + return ret; 1.43 +} 1.44 + 1.45 +void destroy_bfile_heuristics_t(bfile_heuristics_t *def) { 1.46 + free(def); 1.47 +} 1.48 + 1.49 +void bfile_reset(bfile_heuristics_t *def) { 1.50 + def->bcount = 0; 1.51 + def->tcount = 0; 1.52 +} 1.53 + 1.54 +bool bfile_check(bfile_heuristics_t *def, int next_char) { 1.55 + bool ret = false; 1.56 + if (def->level != BFILE_IGNORE) { 1.57 + def->tcount++; 1.58 + if (!isprint(next_char) && !isspace(next_char)) { 1.59 + def->bcount++; 1.60 + } 1.61 + 1.62 + if (def->tcount > 1) { /* empty files are text files */ 1.63 + switch (def->level) { 1.64 + case BFILE_LOW_ACCURACY: 1.65 + if (def->tcount > 15 || next_char == EOF) { 1.66 + ret = (1.0*def->bcount)/def->tcount > 0.32; 1.67 + } 1.68 + break; 1.69 + case BFILE_HIGH_ACCURACY: 1.70 + if (def->tcount > 500 || next_char == EOF) { 1.71 + ret = (1.0*def->bcount)/def->tcount > 0.1; 1.72 + } 1.73 + break; 1.74 + default: /* BFILE_MEDIUM_ACCURACY */ 1.75 + if (def->tcount > 100 || next_char == EOF) { 1.76 + ret = (1.0*def->bcount)/def->tcount > 0.1; 1.77 + } 1.78 + break; 1.79 + } 1.80 + } 1.81 + } 1.82 + 1.83 + return ret; 1.84 +}