implemented bfile heuristics option + TODO: implement algorithm

Thu, 20 Oct 2011 15:21:53 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 20 Oct 2011 15:21:53 +0200
changeset 21
91e0890464b0
parent 20
43725438ac50
child 22
4508da679ffb

implemented bfile heuristics option + TODO: implement algorithm

bfile_heuristics.c file | annotate | diff | comparison | revisions
bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
settings.c file | annotate | diff | comparison | revisions
settings.h file | annotate | diff | comparison | revisions
--- a/bfile_heuristics.c	Thu Oct 20 14:13:56 2011 +0200
+++ b/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
@@ -5,18 +5,23 @@
  *      Author: Mike
  */
 
-
 #include "bfile_heuristics.h"
 
-bfile_heuristics *new_bfile_heuristics(int level) {
-   bfile_heuristics *ret = malloc(sizeof(bfile_heuristics));
-   ret->level = level;
-   memset(ret->ccount, 0, sizeof(int)*256);
-   return ret;
+bfile_heuristics_t *new_bfile_heuristics_t() {
+  bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
+  ret->level = BFILE_MEDIUM_ACCURACY;
+  /* TODO: check why this fails */
+  /* ret->ccount = calloc(256, sizeof(int)); */
+  return ret;
 }
 
-bool bfile_check(bfile_heuristics *def, int next_char) {
-   bool ret = false;
-   
-   return ret;
+void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
+  free(def->ccount);
+  free(def);
 }
+
+bool bfile_check(bfile_heuristics_t *def, int next_char) {
+  bool ret = false;
+
+  return ret;
+}
--- a/bfile_heuristics.h	Thu Oct 20 14:13:56 2011 +0200
+++ b/bfile_heuristics.h	Thu Oct 20 15:21:53 2011 +0200
@@ -9,22 +9,27 @@
 #define BFILE_HEURISTICS_H_
 
 #include "stdinc.h"
-#include "settings.h"
+
+#define BFILE_IGNORE           0x00
+#define BFILE_LOW_ACCURACY     0x01
+#define BFILE_MEDIUM_ACCURACY  0x02
+#define BFILE_HIGH_ACCURACY    0x04
+
+typedef struct {
+  int level;
+  int *ccount;
+} bfile_heuristics_t;
 
 #ifdef _cplusplus
 extern "C" {
 #endif
 
-typedef struct {
-   int level;
-   int ccount[256];
-} bfile_heuristics;
-
-bfile_heuristics *new_bfile_heuristics(int level);
-bool bfile_check(bfile_heuristics *def, int next_char);
+bfile_heuristics_t *new_bfile_heuristics_t();
+void destroy_bfile_heuristics_t(bfile_heuristics_t *def);
+bool bfile_check(bfile_heuristics_t *def, int next_char);
 
 #ifdef _cplusplus
 }
 #endif
 
-#endif /* BFILE_HEURISTICS_H_ */
\ No newline at end of file
+#endif /* BFILE_HEURISTICS_H_ */
--- a/cline.c	Thu Oct 20 14:13:56 2011 +0200
+++ b/cline.c	Thu Oct 20 15:21:53 2011 +0200
@@ -14,11 +14,13 @@
 void printHelpText() {
   const char* helpText = 
     "\nUsage:"
-    "\n      cline [-hrm][-s suffix][<directory>]"
-    "\n      cline [-hrm][-S suffix][<directory>]"
+    "\n      cline [-hrmvV][-s suffix][-b level][<directory>]"
+    "\n      cline [-hrmvV][-S suffix][-b level][<directory>]"
     "\n\nCounts the line terminator characters (\\n) within all"
     " files in the specified\ndirectory."
     "\n\nOptions:"
+    "\n  -b <level>          - binary file heuristics level (default medium)"
+    "\n                        One of: ignore low medium high"
     "\n  -h, --help          - this help text"
     "\n  -m                  - print information about matching files only"
     "\n  -s <suffixes>       - only count files with these suffixes (separated"
@@ -66,7 +68,7 @@
 
   for (int t = 1 ; t < argc ; t++) {
 
-    int argflags = checkArgument(argv[t], "hsSrRmvV");
+    int argflags = checkArgument(argv[t], "hsSrRmvVb");
 
     // s, S
     if ((argflags & 6) > 0) {
@@ -109,6 +111,27 @@
       }
       settings->verbose = false;
     }
+    // b
+    if ((argflags & 256) > 0) {
+      if (registerArgument(&checked, 256)) {
+        return exit_with_help(settings, 1);
+      }
+      t++;
+      if (t >= argc) {
+        return exit_with_help(settings, 1);
+      }
+      if (stricmp(argv[t], "ignore") == 0) {
+        settings->bfileHeuristics->level = BFILE_IGNORE;
+      } else if (stricmp(argv[t], "low") == 0) {
+        settings->bfileHeuristics->level = BFILE_LOW_ACCURACY;
+      } else if (stricmp(argv[t], "medium") == 0) {
+        settings->bfileHeuristics->level = BFILE_MEDIUM_ACCURACY;
+      } else if (stricmp(argv[t], "high") == 0) {
+        settings->bfileHeuristics->level = BFILE_HIGH_ACCURACY;
+      } else {
+        return exit_with_help(settings, 1);
+      }
+    }
     // Path
     if (argflags == 0) {
       if (registerArgument(&checked, 1024)) {
@@ -137,27 +160,17 @@
     destroy_settings_t(settings);
     return 1;
   }
-  
+
   // Scan directory
   int lines = scanDirectory(dir, 0, directory, settings);
   closedir(dir);
   destroy_settings_t(settings);
 
   // Print double line and line count
-#ifdef _WIN32
-    const int columns = 79;
-#else
-    const int columns = 80;
-#endif /* _WIN32 */
-
-  for (int t = 0 ; t < columns ; t++) {
+  for (int t = 0 ; t < 79 ; t++) {
     printf("=");
   }
-#ifdef _WIN32
-    printf("\n%73d lines\n", lines);
-#else
-    printf("\n%74d lines\n", lines);
-#endif /* _WIN32 */
+  printf("\n%73d lines\n", lines);
 
   if (!settings->verbose) {
     reopen_stdout();
--- a/scanner.c	Thu Oct 20 14:13:56 2011 +0200
+++ b/scanner.c	Thu Oct 20 15:21:53 2011 +0200
@@ -8,6 +8,7 @@
 
 #include "scanner.h"
 #include "suffix_fnc.h"
+#include "bfile_heuristics.h"
 
 int scanDirectory(DIR *dir, const int spaces,
                   char* currdir, settings_t* settings) {
@@ -15,6 +16,7 @@
   struct dirent *entry;
   int lines, a;
   int lineSum = 0;
+  bool bfile;
 
   while ((entry = readdir(dir)) != NULL) {
     if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
@@ -43,6 +45,7 @@
 
       // Count lines
       lines = 0;
+      bfile = false;
       if (testSuffix(filename, settings)) {
         FILE *file = fopen(filename, "r");
         if (file == NULL) {
@@ -53,28 +56,25 @@
         do {
           a = fgetc(file);
 
+          bfile = bfile_check(settings->bfileHeuristics, a);
+
           if (a == 10) {
             lines++;
           }
-        } while (a != EOF);
+        } while (!bfile && a != EOF);
         fclose(file);
 
         // Print line count
-        #ifdef _WIN32
+        if (bfile) {
+          printf("%-60s%19s\n", entryname, "binary");
+        } else {
           printf("%-60s%13d lines\n", entryname, lines);
-        #else
-          printf("%-60s%14d lines\n", entryname, lines);
-        #endif /* _WIN32 */
-
+        }
         lineSum += lines;
       } else {
         if (!settings->matchesOnly) {
           // Print hint
-          #ifdef _WIN32
-            printf("%-60s%19s\n", entryname, "no match");
-          #else
-            printf("%-60s%20s\n", entryname, "no match");
-          #endif /* _WIN32 */
+          printf("%-60s%19s\n", entryname, "no match");
         }
       }
     }
--- a/settings.c	Thu Oct 20 14:13:56 2011 +0200
+++ b/settings.c	Thu Oct 20 15:21:53 2011 +0200
@@ -20,6 +20,7 @@
     settings->matchesOnly        = false;
     settings->suffixList         = new_string_list_t();
     settings->verbose            = true;
+    settings->bfileHeuristics    = new_bfile_heuristics_t();
   }
 
   return settings;
@@ -27,5 +28,6 @@
 
 void destroy_settings_t(settings_t* settings) {
   destroy_string_list_t(settings->suffixList);
+  destroy_bfile_heuristics_t(settings->bfileHeuristics);
   free(settings);
 }
--- a/settings.h	Thu Oct 20 14:13:56 2011 +0200
+++ b/settings.h	Thu Oct 20 15:21:53 2011 +0200
@@ -10,10 +10,12 @@
 
 #include "stdinc.h"
 #include "string_list.h"
+#include "bfile_heuristics.h"
 
 typedef struct _settings {
+  string_list_t* suffixList;
+  bfile_heuristics_t* bfileHeuristics;
   char fileSeparator;
-  string_list_t* suffixList;
   bool recursive;
   bool includeSuffixes;
   bool matchesOnly;

mercurial