implemented bfile heuristics option + TODO: implement algorithm

Thu, 20 Oct 2011 15:21:53 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 20 Oct 2011 15:21:53 +0200
changeset 21
91e0890464b0
parent 20
43725438ac50
child 22
4508da679ffb

implemented bfile heuristics option + TODO: implement algorithm

bfile_heuristics.c file | annotate | diff | comparison | revisions
bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
settings.c file | annotate | diff | comparison | revisions
settings.h file | annotate | diff | comparison | revisions
     1.1 --- a/bfile_heuristics.c	Thu Oct 20 14:13:56 2011 +0200
     1.2 +++ b/bfile_heuristics.c	Thu Oct 20 15:21:53 2011 +0200
     1.3 @@ -5,18 +5,23 @@
     1.4   *      Author: Mike
     1.5   */
     1.6  
     1.7 -
     1.8  #include "bfile_heuristics.h"
     1.9  
    1.10 -bfile_heuristics *new_bfile_heuristics(int level) {
    1.11 -   bfile_heuristics *ret = malloc(sizeof(bfile_heuristics));
    1.12 -   ret->level = level;
    1.13 -   memset(ret->ccount, 0, sizeof(int)*256);
    1.14 -   return ret;
    1.15 +bfile_heuristics_t *new_bfile_heuristics_t() {
    1.16 +  bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t));
    1.17 +  ret->level = BFILE_MEDIUM_ACCURACY;
    1.18 +  /* TODO: check why this fails */
    1.19 +  /* ret->ccount = calloc(256, sizeof(int)); */
    1.20 +  return ret;
    1.21  }
    1.22  
    1.23 -bool bfile_check(bfile_heuristics *def, int next_char) {
    1.24 -   bool ret = false;
    1.25 -   
    1.26 -   return ret;
    1.27 +void destroy_bfile_heuristics_t(bfile_heuristics_t *def) {
    1.28 +  free(def->ccount);
    1.29 +  free(def);
    1.30  }
    1.31 +
    1.32 +bool bfile_check(bfile_heuristics_t *def, int next_char) {
    1.33 +  bool ret = false;
    1.34 +
    1.35 +  return ret;
    1.36 +}
     2.1 --- a/bfile_heuristics.h	Thu Oct 20 14:13:56 2011 +0200
     2.2 +++ b/bfile_heuristics.h	Thu Oct 20 15:21:53 2011 +0200
     2.3 @@ -9,22 +9,27 @@
     2.4  #define BFILE_HEURISTICS_H_
     2.5  
     2.6  #include "stdinc.h"
     2.7 -#include "settings.h"
     2.8 +
     2.9 +#define BFILE_IGNORE           0x00
    2.10 +#define BFILE_LOW_ACCURACY     0x01
    2.11 +#define BFILE_MEDIUM_ACCURACY  0x02
    2.12 +#define BFILE_HIGH_ACCURACY    0x04
    2.13 +
    2.14 +typedef struct {
    2.15 +  int level;
    2.16 +  int *ccount;
    2.17 +} bfile_heuristics_t;
    2.18  
    2.19  #ifdef _cplusplus
    2.20  extern "C" {
    2.21  #endif
    2.22  
    2.23 -typedef struct {
    2.24 -   int level;
    2.25 -   int ccount[256];
    2.26 -} bfile_heuristics;
    2.27 -
    2.28 -bfile_heuristics *new_bfile_heuristics(int level);
    2.29 -bool bfile_check(bfile_heuristics *def, int next_char);
    2.30 +bfile_heuristics_t *new_bfile_heuristics_t();
    2.31 +void destroy_bfile_heuristics_t(bfile_heuristics_t *def);
    2.32 +bool bfile_check(bfile_heuristics_t *def, int next_char);
    2.33  
    2.34  #ifdef _cplusplus
    2.35  }
    2.36  #endif
    2.37  
    2.38 -#endif /* BFILE_HEURISTICS_H_ */
    2.39 \ No newline at end of file
    2.40 +#endif /* BFILE_HEURISTICS_H_ */
     3.1 --- a/cline.c	Thu Oct 20 14:13:56 2011 +0200
     3.2 +++ b/cline.c	Thu Oct 20 15:21:53 2011 +0200
     3.3 @@ -14,11 +14,13 @@
     3.4  void printHelpText() {
     3.5    const char* helpText = 
     3.6      "\nUsage:"
     3.7 -    "\n      cline [-hrm][-s suffix][<directory>]"
     3.8 -    "\n      cline [-hrm][-S suffix][<directory>]"
     3.9 +    "\n      cline [-hrmvV][-s suffix][-b level][<directory>]"
    3.10 +    "\n      cline [-hrmvV][-S suffix][-b level][<directory>]"
    3.11      "\n\nCounts the line terminator characters (\\n) within all"
    3.12      " files in the specified\ndirectory."
    3.13      "\n\nOptions:"
    3.14 +    "\n  -b <level>          - binary file heuristics level (default medium)"
    3.15 +    "\n                        One of: ignore low medium high"
    3.16      "\n  -h, --help          - this help text"
    3.17      "\n  -m                  - print information about matching files only"
    3.18      "\n  -s <suffixes>       - only count files with these suffixes (separated"
    3.19 @@ -66,7 +68,7 @@
    3.20  
    3.21    for (int t = 1 ; t < argc ; t++) {
    3.22  
    3.23 -    int argflags = checkArgument(argv[t], "hsSrRmvV");
    3.24 +    int argflags = checkArgument(argv[t], "hsSrRmvVb");
    3.25  
    3.26      // s, S
    3.27      if ((argflags & 6) > 0) {
    3.28 @@ -109,6 +111,27 @@
    3.29        }
    3.30        settings->verbose = false;
    3.31      }
    3.32 +    // b
    3.33 +    if ((argflags & 256) > 0) {
    3.34 +      if (registerArgument(&checked, 256)) {
    3.35 +        return exit_with_help(settings, 1);
    3.36 +      }
    3.37 +      t++;
    3.38 +      if (t >= argc) {
    3.39 +        return exit_with_help(settings, 1);
    3.40 +      }
    3.41 +      if (stricmp(argv[t], "ignore") == 0) {
    3.42 +        settings->bfileHeuristics->level = BFILE_IGNORE;
    3.43 +      } else if (stricmp(argv[t], "low") == 0) {
    3.44 +        settings->bfileHeuristics->level = BFILE_LOW_ACCURACY;
    3.45 +      } else if (stricmp(argv[t], "medium") == 0) {
    3.46 +        settings->bfileHeuristics->level = BFILE_MEDIUM_ACCURACY;
    3.47 +      } else if (stricmp(argv[t], "high") == 0) {
    3.48 +        settings->bfileHeuristics->level = BFILE_HIGH_ACCURACY;
    3.49 +      } else {
    3.50 +        return exit_with_help(settings, 1);
    3.51 +      }
    3.52 +    }
    3.53      // Path
    3.54      if (argflags == 0) {
    3.55        if (registerArgument(&checked, 1024)) {
    3.56 @@ -137,27 +160,17 @@
    3.57      destroy_settings_t(settings);
    3.58      return 1;
    3.59    }
    3.60 -  
    3.61 +
    3.62    // Scan directory
    3.63    int lines = scanDirectory(dir, 0, directory, settings);
    3.64    closedir(dir);
    3.65    destroy_settings_t(settings);
    3.66  
    3.67    // Print double line and line count
    3.68 -#ifdef _WIN32
    3.69 -    const int columns = 79;
    3.70 -#else
    3.71 -    const int columns = 80;
    3.72 -#endif /* _WIN32 */
    3.73 -
    3.74 -  for (int t = 0 ; t < columns ; t++) {
    3.75 +  for (int t = 0 ; t < 79 ; t++) {
    3.76      printf("=");
    3.77    }
    3.78 -#ifdef _WIN32
    3.79 -    printf("\n%73d lines\n", lines);
    3.80 -#else
    3.81 -    printf("\n%74d lines\n", lines);
    3.82 -#endif /* _WIN32 */
    3.83 +  printf("\n%73d lines\n", lines);
    3.84  
    3.85    if (!settings->verbose) {
    3.86      reopen_stdout();
     4.1 --- a/scanner.c	Thu Oct 20 14:13:56 2011 +0200
     4.2 +++ b/scanner.c	Thu Oct 20 15:21:53 2011 +0200
     4.3 @@ -8,6 +8,7 @@
     4.4  
     4.5  #include "scanner.h"
     4.6  #include "suffix_fnc.h"
     4.7 +#include "bfile_heuristics.h"
     4.8  
     4.9  int scanDirectory(DIR *dir, const int spaces,
    4.10                    char* currdir, settings_t* settings) {
    4.11 @@ -15,6 +16,7 @@
    4.12    struct dirent *entry;
    4.13    int lines, a;
    4.14    int lineSum = 0;
    4.15 +  bool bfile;
    4.16  
    4.17    while ((entry = readdir(dir)) != NULL) {
    4.18      if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
    4.19 @@ -43,6 +45,7 @@
    4.20  
    4.21        // Count lines
    4.22        lines = 0;
    4.23 +      bfile = false;
    4.24        if (testSuffix(filename, settings)) {
    4.25          FILE *file = fopen(filename, "r");
    4.26          if (file == NULL) {
    4.27 @@ -53,28 +56,25 @@
    4.28          do {
    4.29            a = fgetc(file);
    4.30  
    4.31 +          bfile = bfile_check(settings->bfileHeuristics, a);
    4.32 +
    4.33            if (a == 10) {
    4.34              lines++;
    4.35            }
    4.36 -        } while (a != EOF);
    4.37 +        } while (!bfile && a != EOF);
    4.38          fclose(file);
    4.39  
    4.40          // Print line count
    4.41 -        #ifdef _WIN32
    4.42 +        if (bfile) {
    4.43 +          printf("%-60s%19s\n", entryname, "binary");
    4.44 +        } else {
    4.45            printf("%-60s%13d lines\n", entryname, lines);
    4.46 -        #else
    4.47 -          printf("%-60s%14d lines\n", entryname, lines);
    4.48 -        #endif /* _WIN32 */
    4.49 -
    4.50 +        }
    4.51          lineSum += lines;
    4.52        } else {
    4.53          if (!settings->matchesOnly) {
    4.54            // Print hint
    4.55 -          #ifdef _WIN32
    4.56 -            printf("%-60s%19s\n", entryname, "no match");
    4.57 -          #else
    4.58 -            printf("%-60s%20s\n", entryname, "no match");
    4.59 -          #endif /* _WIN32 */
    4.60 +          printf("%-60s%19s\n", entryname, "no match");
    4.61          }
    4.62        }
    4.63      }
     5.1 --- a/settings.c	Thu Oct 20 14:13:56 2011 +0200
     5.2 +++ b/settings.c	Thu Oct 20 15:21:53 2011 +0200
     5.3 @@ -20,6 +20,7 @@
     5.4      settings->matchesOnly        = false;
     5.5      settings->suffixList         = new_string_list_t();
     5.6      settings->verbose            = true;
     5.7 +    settings->bfileHeuristics    = new_bfile_heuristics_t();
     5.8    }
     5.9  
    5.10    return settings;
    5.11 @@ -27,5 +28,6 @@
    5.12  
    5.13  void destroy_settings_t(settings_t* settings) {
    5.14    destroy_string_list_t(settings->suffixList);
    5.15 +  destroy_bfile_heuristics_t(settings->bfileHeuristics);
    5.16    free(settings);
    5.17  }
     6.1 --- a/settings.h	Thu Oct 20 14:13:56 2011 +0200
     6.2 +++ b/settings.h	Thu Oct 20 15:21:53 2011 +0200
     6.3 @@ -10,10 +10,12 @@
     6.4  
     6.5  #include "stdinc.h"
     6.6  #include "string_list.h"
     6.7 +#include "bfile_heuristics.h"
     6.8  
     6.9  typedef struct _settings {
    6.10 +  string_list_t* suffixList;
    6.11 +  bfile_heuristics_t* bfileHeuristics;
    6.12    char fileSeparator;
    6.13 -  string_list_t* suffixList;
    6.14    bool recursive;
    6.15    bool includeSuffixes;
    6.16    bool matchesOnly;

mercurial