completed binary file heuristics

Thu, 20 Oct 2011 17:29:23 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 20 Oct 2011 17:29:23 +0200
changeset 22
4508da679ffb
parent 21
91e0890464b0
child 23
778388400f7b

completed binary file heuristics

bfile_heuristics.c file | annotate | diff | comparison | revisions
bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
cline.h file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
     1.1 --- a/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
     1.2 +++ b/bfile_heuristics.c	Thu Oct 20 17:29:23 2011 +0200
     1.3 @@ -6,22 +6,49 @@
     1.4   */
     1.5  
     1.6  #include "bfile_heuristics.h"
     1.7 +#include <ctype.h>
     1.8  
     1.9  bfile_heuristics_t *new_bfile_heuristics_t() {
    1.10    bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
    1.11    ret->level = BFILE_MEDIUM_ACCURACY;
    1.12 -  /* TODO: check why this fails */
    1.13 -  /* ret->ccount = calloc(256, sizeof(int)); */
    1.14 +  bfile_reset(ret);
    1.15    return ret;
    1.16  }
    1.17  
    1.18  void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
    1.19 -  free(def->ccount);
    1.20    free(def);
    1.21  }
    1.22  
    1.23 +void bfile_reset(bfile_heuristics_t *def) {
    1.24 +  def->bcount = 0;
    1.25 +  def->tcount = 0;
    1.26 +}
    1.27 +
    1.28  bool bfile_check(bfile_heuristics_t *def, int next_char) {
    1.29    bool ret = false;
    1.30 +  if (def->level != BFILE_IGNORE) {
    1.31 +    def->tcount++;
    1.32 +    if (!isprint(next_char) && !isspace(next_char)) {
    1.33 +      def->bcount++;
    1.34 +    }
    1.35 +
    1.36 +    switch (def->level) {
    1.37 +    case BFILE_LOW_ACCURACY:
    1.38 +      if (def->tcount > 15 || next_char == EOF) {
    1.39 +        ret = (1.0*def->bcount)/def->tcount > 0.32;
    1.40 +      }
    1.41 +      break;
    1.42 +    case BFILE_HIGH_ACCURACY:
    1.43 +      if (def->tcount > 500 || next_char == EOF) {
    1.44 +        ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.45 +      }
    1.46 +      break;
    1.47 +    default: /* BFILE_MEDIUM_ACCURACY */
    1.48 +      if (def->tcount > 100 || next_char == EOF) {
    1.49 +        ret = (1.0*def->bcount)/def->tcount > 0.1;
    1.50 +      }
    1.51 +    }
    1.52 +  }
    1.53  
    1.54    return ret;
    1.55  }
     2.1 --- a/bfile_heuristics.h	Thu Oct 20 15:21:53 2011 +0200
     2.2 +++ b/bfile_heuristics.h	Thu Oct 20 17:29:23 2011 +0200
     2.3 @@ -17,7 +17,8 @@
     2.4  
     2.5  typedef struct {
     2.6    int level;
     2.7 -  int *ccount;
     2.8 +  int bcount; /* 'binary' character count */
     2.9 +  int tcount; /* total count */
    2.10  } bfile_heuristics_t;
    2.11  
    2.12  #ifdef _cplusplus
    2.13 @@ -26,6 +27,7 @@
    2.14  
    2.15  bfile_heuristics_t *new_bfile_heuristics_t();
    2.16  void destroy_bfile_heuristics_t(bfile_heuristics_t *def);
    2.17 +void bfile_reset(bfile_heuristics_t *def);
    2.18  bool bfile_check(bfile_heuristics_t *def, int next_char);
    2.19  
    2.20  #ifdef _cplusplus
     3.1 --- a/cline.c	Thu Oct 20 15:21:53 2011 +0200
     3.2 +++ b/cline.c	Thu Oct 20 17:29:23 2011 +0200
     3.3 @@ -54,14 +54,14 @@
     3.4  
     3.5  int main(int argc, char** argv) {
     3.6  
     3.7 -  // Settings
     3.8 +  /* Settings */
     3.9    settings_t *settings = new_settings_t();
    3.10    if (settings == NULL) {
    3.11      fprintf(stderr, "Memory allocation failed.\n");
    3.12      return 1;
    3.13    }
    3.14  
    3.15 -  // Get arguments
    3.16 +  /* Get arguments */
    3.17    char* directory = "./";
    3.18    char* suffix = " ";
    3.19    int checked = 0;
    3.20 @@ -70,7 +70,7 @@
    3.21  
    3.22      int argflags = checkArgument(argv[t], "hsSrRmvVb");
    3.23  
    3.24 -    // s, S
    3.25 +    /* s, S */
    3.26      if ((argflags & 6) > 0) {
    3.27        if (registerArgument(&checked, 6)) {
    3.28          return exit_with_help(settings, 1);
    3.29 @@ -82,36 +82,36 @@
    3.30        }
    3.31        suffix = argv[t]; 
    3.32      }
    3.33 -    // h
    3.34 +    /* h */
    3.35      if ((argflags & 1) > 0 || strcmp(argv[t], "--help") == 0) {
    3.36        return exit_with_help(settings, 0);
    3.37      }
    3.38 -    // r, R
    3.39 +    /* r, R */
    3.40      if ((argflags & 24) > 0) {
    3.41        if (registerArgument(&checked, 24)) {
    3.42          return exit_with_help(settings, 1);
    3.43        }
    3.44        settings->recursive = true;
    3.45      }
    3.46 -    // m
    3.47 +    /* m */
    3.48      if ((argflags & 32) > 0) {
    3.49        if (registerArgument(&checked, 32)) {
    3.50          return exit_with_help(settings, 1);
    3.51        }
    3.52        settings->matchesOnly = true;
    3.53      }
    3.54 -    // v
    3.55 +    /* v */
    3.56      if ((argflags & 64) > 0 || strcmp(argv[t], "--version") == 0) {
    3.57        return exit_with_version(settings);
    3.58      }
    3.59 -    // V
    3.60 +    /* V */
    3.61      if ((argflags & 128) > 0) {
    3.62        if (registerArgument(&checked, 128)) {
    3.63          return exit_with_help(settings, 1);
    3.64        }
    3.65        settings->verbose = false;
    3.66      }
    3.67 -    // b
    3.68 +    /* b */
    3.69      if ((argflags & 256) > 0) {
    3.70        if (registerArgument(&checked, 256)) {
    3.71          return exit_with_help(settings, 1);
    3.72 @@ -132,7 +132,7 @@
    3.73          return exit_with_help(settings, 1);
    3.74        }
    3.75      }
    3.76 -    // Path
    3.77 +    /* Path */
    3.78      if (argflags == 0) {
    3.79        if (registerArgument(&checked, 1024)) {
    3.80          return exit_with_help(settings, 1);
    3.81 @@ -141,19 +141,19 @@
    3.82      }
    3.83    }
    3.84  
    3.85 -  // Configure output
    3.86 +  /* Configure output */
    3.87    if (!settings->verbose) {
    3.88      close_stdout();
    3.89    }
    3.90  
    3.91 -  // Find tokens
    3.92 +  /* Find tokens */
    3.93    char* finder = strtok(suffix, ",");
    3.94    while (finder != NULL) {
    3.95      add_string(settings->suffixList, finder);
    3.96      finder = strtok(NULL, ",");
    3.97    }
    3.98  
    3.99 -  // Open directory
   3.100 +  /* Open directory */
   3.101    DIR *dir = opendir(directory);
   3.102    if (dir == NULL) {
   3.103      perror("Operation failed");
   3.104 @@ -161,12 +161,12 @@
   3.105      return 1;
   3.106    }
   3.107  
   3.108 -  // Scan directory
   3.109 +  /* Scan directory */
   3.110    int lines = scanDirectory(dir, 0, directory, settings);
   3.111    closedir(dir);
   3.112    destroy_settings_t(settings);
   3.113  
   3.114 -  // Print double line and line count
   3.115 +  /* Print double line and line count */
   3.116    for (int t = 0 ; t < 79 ; t++) {
   3.117      printf("=");
   3.118    }
     4.1 --- a/cline.h	Thu Oct 20 15:21:53 2011 +0200
     4.2 +++ b/cline.h	Thu Oct 20 17:29:23 2011 +0200
     4.3 @@ -8,7 +8,7 @@
     4.4  #ifndef CLINE_H_
     4.5  #define CLINE_H_
     4.6  
     4.7 -const char* VERSION=""; // will be replaced by makefile
     4.8 +const char* VERSION=""; /* will be replaced by makefile */
     4.9  
    4.10  #include "stdinc.h"
    4.11  #include "settings.h"
     5.1 --- a/scanner.c	Thu Oct 20 15:21:53 2011 +0200
     5.2 +++ b/scanner.c	Thu Oct 20 17:29:23 2011 +0200
     5.3 @@ -20,7 +20,7 @@
     5.4  
     5.5    while ((entry = readdir(dir)) != NULL) {
     5.6      if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
     5.7 -      // Print occurence
     5.8 +      /* Print occurence */
     5.9        char entryname[strlen(entry->d_name)+spaces];
    5.10        for (int t = 0 ; t < spaces ; t++) {
    5.11          entryname[t]=' ';
    5.12 @@ -33,7 +33,7 @@
    5.13        strncat(filename, &settings->fileSeparator, 1);
    5.14        strcat(filename, entry->d_name);
    5.15  
    5.16 -      // Check for subdirectory
    5.17 +      /* Check for subdirectory */
    5.18        if ((subdir = opendir(filename)) != NULL) {
    5.19          printf("%-60s\n", entryname);
    5.20          if (settings->recursive) {
    5.21 @@ -43,9 +43,10 @@
    5.22          continue;
    5.23        }
    5.24  
    5.25 -      // Count lines
    5.26 +      /* Count lines */
    5.27        lines = 0;
    5.28        bfile = false;
    5.29 +      bfile_reset(settings->bfileHeuristics);
    5.30        if (testSuffix(filename, settings)) {
    5.31          FILE *file = fopen(filename, "r");
    5.32          if (file == NULL) {
    5.33 @@ -64,16 +65,18 @@
    5.34          } while (!bfile && a != EOF);
    5.35          fclose(file);
    5.36  
    5.37 -        // Print line count
    5.38 +        /* Print and sum line count */
    5.39          if (bfile) {
    5.40 -          printf("%-60s%19s\n", entryname, "binary");
    5.41 +          if (!settings->matchesOnly) {
    5.42 +            printf("%-60s%19s\n", entryname, "binary");
    5.43 +          }
    5.44          } else {
    5.45 +          lineSum += lines;
    5.46            printf("%-60s%13d lines\n", entryname, lines);
    5.47          }
    5.48 -        lineSum += lines;
    5.49        } else {
    5.50          if (!settings->matchesOnly) {
    5.51 -          // Print hint
    5.52 +          /* Print hint */
    5.53            printf("%-60s%19s\n", entryname, "no match");
    5.54          }
    5.55        }

mercurial