Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * Copyright 2018 Mike Becker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "bfile_heuristics.h" #include <ctype.h> bfile_heuristics_t *new_bfile_heuristics_t() { bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t)); ret->level = BFILE_MEDIUM_ACCURACY; bfile_reset(ret); return ret; } void destroy_bfile_heuristics_t(bfile_heuristics_t *def) { free(def); } void bfile_reset(bfile_heuristics_t *def) { def->bcount = 0; def->tcount = 0; } bool bfile_check(bfile_heuristics_t *def, int next_char) { bool ret = false; if (def->level != BFILE_IGNORE) { def->tcount++; if (!isprint(next_char) && !isspace(next_char)) { def->bcount++; } if (def->tcount > 1) { /* empty files are text files */ switch (def->level) { case BFILE_LOW_ACCURACY: if (def->tcount > 15 || next_char == EOF) { ret = (1.0*def->bcount)/def->tcount > 0.32; } break; case BFILE_HIGH_ACCURACY: if (def->tcount > 500 || next_char == EOF) { ret = (1.0*def->bcount)/def->tcount > 0.1; } break; default: /* BFILE_MEDIUM_ACCURACY */ if (def->tcount > 100 || next_char == EOF) { ret = (1.0*def->bcount)/def->tcount > 0.1; } break; } } } return ret; }