Thu, 20 Oct 2011 17:29:23 +0200
completed binary file heuristics
bfile_heuristics.c | file | annotate | diff | comparison | revisions | |
bfile_heuristics.h | file | annotate | diff | comparison | revisions | |
cline.c | file | annotate | diff | comparison | revisions | |
cline.h | file | annotate | diff | comparison | revisions | |
scanner.c | file | annotate | diff | comparison | revisions |
--- a/bfile_heuristics.c Thu Oct 20 15:21:53 2011 +0200 +++ b/bfile_heuristics.c Thu Oct 20 17:29:23 2011 +0200 @@ -6,22 +6,49 @@ */ #include "bfile_heuristics.h" +#include <ctype.h> bfile_heuristics_t *new_bfile_heuristics_t() { bfile_heuristics_t *ret = malloc(sizeof(bfile_heuristics_t)); ret->level = BFILE_MEDIUM_ACCURACY; - /* TODO: check why this fails */ - /* ret->ccount = calloc(256, sizeof(int)); */ + bfile_reset(ret); return ret; } void destroy_bfile_heuristics_t(bfile_heuristics_t *def) { - free(def->ccount); free(def); } +void bfile_reset(bfile_heuristics_t *def) { + def->bcount = 0; + def->tcount = 0; +} + bool bfile_check(bfile_heuristics_t *def, int next_char) { bool ret = false; + if (def->level != BFILE_IGNORE) { + def->tcount++; + if (!isprint(next_char) && !isspace(next_char)) { + def->bcount++; + } + + switch (def->level) { + case BFILE_LOW_ACCURACY: + if (def->tcount > 15 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.32; + } + break; + case BFILE_HIGH_ACCURACY: + if (def->tcount > 500 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.1; + } + break; + default: /* BFILE_MEDIUM_ACCURACY */ + if (def->tcount > 100 || next_char == EOF) { + ret = (1.0*def->bcount)/def->tcount > 0.1; + } + } + } return ret; }
--- a/bfile_heuristics.h Thu Oct 20 15:21:53 2011 +0200 +++ b/bfile_heuristics.h Thu Oct 20 17:29:23 2011 +0200 @@ -17,7 +17,8 @@ typedef struct { int level; - int *ccount; + int bcount; /* 'binary' character count */ + int tcount; /* total count */ } bfile_heuristics_t; #ifdef _cplusplus @@ -26,6 +27,7 @@ bfile_heuristics_t *new_bfile_heuristics_t(); void destroy_bfile_heuristics_t(bfile_heuristics_t *def); +void bfile_reset(bfile_heuristics_t *def); bool bfile_check(bfile_heuristics_t *def, int next_char); #ifdef _cplusplus
--- a/cline.c Thu Oct 20 15:21:53 2011 +0200 +++ b/cline.c Thu Oct 20 17:29:23 2011 +0200 @@ -54,14 +54,14 @@ int main(int argc, char** argv) { - // Settings + /* Settings */ settings_t *settings = new_settings_t(); if (settings == NULL) { fprintf(stderr, "Memory allocation failed.\n"); return 1; } - // Get arguments + /* Get arguments */ char* directory = "./"; char* suffix = " "; int checked = 0; @@ -70,7 +70,7 @@ int argflags = checkArgument(argv[t], "hsSrRmvVb"); - // s, S + /* s, S */ if ((argflags & 6) > 0) { if (registerArgument(&checked, 6)) { return exit_with_help(settings, 1); @@ -82,36 +82,36 @@ } suffix = argv[t]; } - // h + /* h */ if ((argflags & 1) > 0 || strcmp(argv[t], "--help") == 0) { return exit_with_help(settings, 0); } - // r, R + /* r, R */ if ((argflags & 24) > 0) { if (registerArgument(&checked, 24)) { return exit_with_help(settings, 1); } settings->recursive = true; } - // m + /* m */ if ((argflags & 32) > 0) { if (registerArgument(&checked, 32)) { return exit_with_help(settings, 1); } settings->matchesOnly = true; } - // v + /* v */ if ((argflags & 64) > 0 || strcmp(argv[t], "--version") == 0) { return exit_with_version(settings); } - // V + /* V */ if ((argflags & 128) > 0) { if (registerArgument(&checked, 128)) { return exit_with_help(settings, 1); } settings->verbose = false; } - // b + /* b */ if ((argflags & 256) > 0) { if (registerArgument(&checked, 256)) { return exit_with_help(settings, 1); @@ -132,7 +132,7 @@ return exit_with_help(settings, 1); } } - // Path + /* Path */ if (argflags == 0) { if (registerArgument(&checked, 1024)) { return exit_with_help(settings, 1); @@ -141,19 +141,19 @@ } } - // Configure output + /* Configure output */ if (!settings->verbose) { close_stdout(); } - // Find tokens + /* Find tokens */ char* finder = strtok(suffix, ","); while (finder != NULL) { add_string(settings->suffixList, finder); finder = strtok(NULL, ","); } - // Open directory + /* Open directory */ DIR *dir = opendir(directory); if (dir == NULL) { perror("Operation failed"); @@ -161,12 +161,12 @@ return 1; } - // Scan directory + /* Scan directory */ int lines = scanDirectory(dir, 0, directory, settings); closedir(dir); destroy_settings_t(settings); - // Print double line and line count + /* Print double line and line count */ for (int t = 0 ; t < 79 ; t++) { printf("="); }
--- a/cline.h Thu Oct 20 15:21:53 2011 +0200 +++ b/cline.h Thu Oct 20 17:29:23 2011 +0200 @@ -8,7 +8,7 @@ #ifndef CLINE_H_ #define CLINE_H_ -const char* VERSION=""; // will be replaced by makefile +const char* VERSION=""; /* will be replaced by makefile */ #include "stdinc.h" #include "settings.h"
--- a/scanner.c Thu Oct 20 15:21:53 2011 +0200 +++ b/scanner.c Thu Oct 20 17:29:23 2011 +0200 @@ -20,7 +20,7 @@ while ((entry = readdir(dir)) != NULL) { if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) { - // Print occurence + /* Print occurence */ char entryname[strlen(entry->d_name)+spaces]; for (int t = 0 ; t < spaces ; t++) { entryname[t]=' '; @@ -33,7 +33,7 @@ strncat(filename, &settings->fileSeparator, 1); strcat(filename, entry->d_name); - // Check for subdirectory + /* Check for subdirectory */ if ((subdir = opendir(filename)) != NULL) { printf("%-60s\n", entryname); if (settings->recursive) { @@ -43,9 +43,10 @@ continue; } - // Count lines + /* Count lines */ lines = 0; bfile = false; + bfile_reset(settings->bfileHeuristics); if (testSuffix(filename, settings)) { FILE *file = fopen(filename, "r"); if (file == NULL) { @@ -64,16 +65,18 @@ } while (!bfile && a != EOF); fclose(file); - // Print line count + /* Print and sum line count */ if (bfile) { - printf("%-60s%19s\n", entryname, "binary"); + if (!settings->matchesOnly) { + printf("%-60s%19s\n", entryname, "binary"); + } } else { + lineSum += lines; printf("%-60s%13d lines\n", entryname, lines); } - lineSum += lines; } else { if (!settings->matchesOnly) { - // Print hint + /* Print hint */ printf("%-60s%19s\n", entryname, "no match"); } }