src/bfile_heuristics.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 57
68018eac46c3
permissions
-rw-r--r--

new feature: count non-whitespace characters

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
 * Copyright 2018 Mike Becker. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "bfile_heuristics.h"
#include <ctype.h>

bfile_heuristics_t *new_bfile_heuristics_t() {
  bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
  ret->level = BFILE_MEDIUM_ACCURACY;
  bfile_reset(ret);
  return ret;
}

void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
  free(def);
}

void bfile_reset(bfile_heuristics_t *def) {
  def->bcount = 0;
  def->tcount = 0;
}

bool bfile_check(bfile_heuristics_t *def, int next_char) {
  bool ret = false;
  if (def->level != BFILE_IGNORE) {
    def->tcount++;
    if (!isprint(next_char) && !isspace(next_char)) {
      def->bcount++;
    }

    if (def->tcount > 1) { /* empty files are text files */
      switch (def->level) {
      case BFILE_LOW_ACCURACY:
        if (def->tcount > 15 || next_char == EOF) {
          ret = (1.0*def->bcount)/def->tcount > 0.32;
        }
        break;
      case BFILE_HIGH_ACCURACY:
        if (def->tcount > 500 || next_char == EOF) {
          ret = (1.0*def->bcount)/def->tcount > 0.1;
        }
        break;
      default: /* BFILE_MEDIUM_ACCURACY */
        if (def->tcount > 100 || next_char == EOF) {
          ret = (1.0*def->bcount)/def->tcount > 0.1;
        }
        break;
      }
    }
  }

  return ret;
}

mercurial