--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/bfile_heuristics.c Fri Dec 28 15:44:28 2012 +0100 @@ -0,0 +1,81 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * Copyright 2011 Mike Becker. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * bfile_heuristics.c + * + * Created on: 20.10.2011 + * Author: Mike + */ + +#include "bfile_heuristics.h" +#include <ctype.h> + +bfile_heuristics_t *new_bfile_heuristics_t() { + bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t)); + ret->level = BFILE_MEDIUM_ACCURACY; + bfile_reset(ret); + return ret; +} + +void destroy_bfile_heuristics_t(bfile_heuristics_t *def) { + free(def); +} + +void bfile_reset(bfile_heuristics_t *def) { + def->bcount = 0; + def->tcount = 0; +} + +bool bfile_check(bfile_heuristics_t *def, int next_char) { + bool ret = false; + if (def->level != BFILE_IGNORE) { + def->tcount++; + if (!isprint(next_char) && !isspace(next_char)) { + def->bcount++; + } + + if (def->tcount > 1) { /* empty files are text files */ + switch (def->level) { + case BFILE_LOW_ACCURACY: + if (def->tcount > 15 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.32; + } + break; + case BFILE_HIGH_ACCURACY: + if (def->tcount > 500 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.1; + } + break; + default: /* BFILE_MEDIUM_ACCURACY */ + if (def->tcount > 100 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.1; + } + break; + } + } + } + + return ret; +}