completed binary file heuristics

Thu, 20 Oct 2011 17:29:23 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 20 Oct 2011 17:29:23 +0200
changeset 22
4508da679ffb
parent 21
91e0890464b0
child 23
778388400f7b

completed binary file heuristics

bfile_heuristics.c file | annotate | diff | comparison | revisions
bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
cline.h file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
--- a/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
+++ b/bfile_heuristics.c	Thu Oct 20 17:29:23 2011 +0200
@@ -6,22 +6,49 @@
  */
 
 #include "bfile_heuristics.h"
+#include <ctype.h>
 
 bfile_heuristics_t *new_bfile_heuristics_t() {
   bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
   ret->level = BFILE_MEDIUM_ACCURACY;
-  /* TODO: check why this fails */
-  /* ret->ccount = calloc(256, sizeof(int)); */
+  bfile_reset(ret);
   return ret;
 }
 
 void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
-  free(def->ccount);
   free(def);
 }
 
+void bfile_reset(bfile_heuristics_t *def) {
+  def->bcount = 0;
+  def->tcount = 0;
+}
+
 bool bfile_check(bfile_heuristics_t *def, int next_char) {
   bool ret = false;
+  if (def->level != BFILE_IGNORE) {
+    def->tcount++;
+    if (!isprint(next_char) && !isspace(next_char)) {
+      def->bcount++;
+    }
+
+    switch (def->level) {
+    case BFILE_LOW_ACCURACY:
+      if (def->tcount > 15 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.32;
+      }
+      break;
+    case BFILE_HIGH_ACCURACY:
+      if (def->tcount > 500 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.1;
+      }
+      break;
+    default: /* BFILE_MEDIUM_ACCURACY */
+      if (def->tcount > 100 || next_char == EOF) {
+        ret = (1.0*def->bcount)/def->tcount > 0.1;
+      }
+    }
+  }
 
   return ret;
 }
--- a/bfile_heuristics.h	Thu Oct 20 15:21:53 2011 +0200
+++ b/bfile_heuristics.h	Thu Oct 20 17:29:23 2011 +0200
@@ -17,7 +17,8 @@
 
 typedef struct {
   int level;
-  int *ccount;
+  int bcount; /* 'binary' character count */
+  int tcount; /* total count */
 } bfile_heuristics_t;
 
 #ifdef _cplusplus
@@ -26,6 +27,7 @@
 
 bfile_heuristics_t *new_bfile_heuristics_t();
 void destroy_bfile_heuristics_t(bfile_heuristics_t *def);
+void bfile_reset(bfile_heuristics_t *def);
 bool bfile_check(bfile_heuristics_t *def, int next_char);
 
 #ifdef _cplusplus
--- a/cline.c	Thu Oct 20 15:21:53 2011 +0200
+++ b/cline.c	Thu Oct 20 17:29:23 2011 +0200
@@ -54,14 +54,14 @@
 
 int main(int argc, char** argv) {
 
-  // Settings
+  /* Settings */
   settings_t *settings = new_settings_t();
   if (settings == NULL) {
     fprintf(stderr, "Memory allocation failed.\n");
     return 1;
   }
 
-  // Get arguments
+  /* Get arguments */
   char* directory = "./";
   char* suffix = " ";
   int checked = 0;
@@ -70,7 +70,7 @@
 
     int argflags = checkArgument(argv[t], "hsSrRmvVb");
 
-    // s, S
+    /* s, S */
     if ((argflags & 6) > 0) {
       if (registerArgument(&checked, 6)) {
         return exit_with_help(settings, 1);
@@ -82,36 +82,36 @@
       }
       suffix = argv[t]; 
     }
-    // h
+    /* h */
     if ((argflags & 1) > 0 || strcmp(argv[t], "--help") == 0) {
       return exit_with_help(settings, 0);
     }
-    // r, R
+    /* r, R */
     if ((argflags & 24) > 0) {
       if (registerArgument(&checked, 24)) {
         return exit_with_help(settings, 1);
       }
       settings->recursive = true;
     }
-    // m
+    /* m */
     if ((argflags & 32) > 0) {
       if (registerArgument(&checked, 32)) {
         return exit_with_help(settings, 1);
       }
       settings->matchesOnly = true;
     }
-    // v
+    /* v */
     if ((argflags & 64) > 0 || strcmp(argv[t], "--version") == 0) {
       return exit_with_version(settings);
     }
-    // V
+    /* V */
     if ((argflags & 128) > 0) {
       if (registerArgument(&checked, 128)) {
         return exit_with_help(settings, 1);
       }
       settings->verbose = false;
     }
-    // b
+    /* b */
     if ((argflags & 256) > 0) {
       if (registerArgument(&checked, 256)) {
         return exit_with_help(settings, 1);
@@ -132,7 +132,7 @@
         return exit_with_help(settings, 1);
       }
     }
-    // Path
+    /* Path */
     if (argflags == 0) {
       if (registerArgument(&checked, 1024)) {
         return exit_with_help(settings, 1);
@@ -141,19 +141,19 @@
     }
   }
 
-  // Configure output
+  /* Configure output */
   if (!settings->verbose) {
     close_stdout();
   }
 
-  // Find tokens
+  /* Find tokens */
   char* finder = strtok(suffix, ",");
   while (finder != NULL) {
     add_string(settings->suffixList, finder);
     finder = strtok(NULL, ",");
   }
 
-  // Open directory
+  /* Open directory */
   DIR *dir = opendir(directory);
   if (dir == NULL) {
     perror("Operation failed");
@@ -161,12 +161,12 @@
     return 1;
   }
 
-  // Scan directory
+  /* Scan directory */
   int lines = scanDirectory(dir, 0, directory, settings);
   closedir(dir);
   destroy_settings_t(settings);
 
-  // Print double line and line count
+  /* Print double line and line count */
   for (int t = 0 ; t < 79 ; t++) {
     printf("=");
   }
--- a/cline.h	Thu Oct 20 15:21:53 2011 +0200
+++ b/cline.h	Thu Oct 20 17:29:23 2011 +0200
@@ -8,7 +8,7 @@
 #ifndef CLINE_H_
 #define CLINE_H_
 
-const char* VERSION=""; // will be replaced by makefile
+const char* VERSION=""; /* will be replaced by makefile */
 
 #include "stdinc.h"
 #include "settings.h"
--- a/scanner.c	Thu Oct 20 15:21:53 2011 +0200
+++ b/scanner.c	Thu Oct 20 17:29:23 2011 +0200
@@ -20,7 +20,7 @@
 
   while ((entry = readdir(dir)) != NULL) {
     if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
-      // Print occurence
+      /* Print occurence */
       char entryname[strlen(entry->d_name)+spaces];
       for (int t = 0 ; t < spaces ; t++) {
         entryname[t]=' ';
@@ -33,7 +33,7 @@
       strncat(filename, &settings->fileSeparator, 1);
       strcat(filename, entry->d_name);
 
-      // Check for subdirectory
+      /* Check for subdirectory */
       if ((subdir = opendir(filename)) != NULL) {
         printf("%-60s\n", entryname);
         if (settings->recursive) {
@@ -43,9 +43,10 @@
         continue;
       }
 
-      // Count lines
+      /* Count lines */
       lines = 0;
       bfile = false;
+      bfile_reset(settings->bfileHeuristics);
       if (testSuffix(filename, settings)) {
         FILE *file = fopen(filename, "r");
         if (file == NULL) {
@@ -64,16 +65,18 @@
         } while (!bfile && a != EOF);
         fclose(file);
 
-        // Print line count
+        /* Print and sum line count */
         if (bfile) {
-          printf("%-60s%19s\n", entryname, "binary");
+          if (!settings->matchesOnly) {
+            printf("%-60s%19s\n", entryname, "binary");
+          }
         } else {
+          lineSum += lines;
           printf("%-60s%13d lines\n", entryname, lines);
         }
-        lineSum += lines;
       } else {
         if (!settings->matchesOnly) {
-          // Print hint
+          /* Print hint */
           printf("%-60s%19s\n", entryname, "no match");
         }
       }

mercurial