src/scanner.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 61
9c8d768f0244
permissions
-rw-r--r--

new feature: count non-whitespace characters

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
     3  * Copyright 2018 Mike Becker. All rights reserved.
     4  * 
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions are met:
     7  * 
     8  * 1. Redistributions of source code must retain the above copyright
     9  * notice, this list of conditions and the following disclaimer.
    10  * 
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  * notice, this list of conditions and the following disclaimer in the
    13  * documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    22  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    25  */
    28 #include "scanner.h"
    29 #include "bfile_heuristics.h"
    30 #include "regex_parser.h"
    31 #include <sys/stat.h>
    32 #include <ctype.h>
    34 typedef struct filelist filelist_t;
    36 struct filelist {
    37   char *displayname;
    38   unsigned displayname_len;
    39   char *filename;
    40   char *ext;
    41   unsigned st_mode;
    42   filelist_t *next;
    43 };
    45 static bool testSuffix(char* filename, string_list_t* list) {
    46   bool ret = false;
    47   size_t tokenlen, fnamelen = strlen(filename);
    48   for (size_t t = 0 ; t < list->count ; t++) {
    49     tokenlen = strlen(list->items[t]);
    50     if (fnamelen >= tokenlen && tokenlen > 0) {
    51       if (strncmp(filename+fnamelen-tokenlen,
    52                   list->items[t], tokenlen) == 0) {
    53         ret = true;
    54         break;
    55       }
    56     }
    57   }
    58   return ret;
    59 }
    61 static void addResultPerExtension(scanresult_ext_t* result,
    62                                   char* ext, unsigned value) {
    63   if (!result) return;
    65   if (!ext) ext = "w/o";
    67   for (unsigned i = 0 ; i < result->count ; i++) {
    68     if (strcasecmp(result->extensions[i], ext) == 0) {
    69       result->result[i] += value;
    70       return;
    71     }
    72   }
    74   if (result->count == result->capacity) {
    75     unsigned newcap = result->capacity+8;
    76     char** extarr = realloc(result->extensions, newcap*sizeof(char*));
    77     unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned));
    78     if (!extarr || !resultarr) {
    79       fprintf(stderr, "Memory allocation error.\n");
    80       abort();
    81     }
    82     result->extensions = extarr;
    83     result->result = resultarr;
    84     result->capacity = newcap;
    85   }
    87   result->extensions[result->count] = strdup(ext);
    88   result->result[result->count] = value;
    89   result->count++;
    90 }
    92 scanresult_t* new_scanresult_t(settings_t* settings) {
    93   scanresult_t* result = calloc(1, sizeof(scanresult_t));
    94   if (settings->individual_sums) {
    95     result->ext = calloc(1, sizeof(scanresult_ext_t));
    96   }
    97   return result;
    98 }
   100 void destroy_scanresult_t(scanresult_t* result) {
   101   if (result->ext) {
   102     if (result->ext->count > 0) {
   103       for (unsigned i = 0 ; i < result->ext->count ; i++) {
   104         free(result->ext->extensions[i]);
   105       }
   106       free(result->ext->extensions);
   107       free(result->ext->result);
   108     }
   109     free(result->ext);
   110   }
   111   free(result);
   112 }
   114 static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) {
   116   filelist_t* list = NULL;
   117   DIR *dirf;
   118   struct dirent *entry;
   119   struct stat statbuf;
   121   if ((dirf = opendir(scanner.dir)) == NULL) {
   122     fprintf(stderr, "%s - ", scanner.dir);
   123     perror("Directory access failed");
   124     return 0;
   125   }
   127   while ((entry = readdir(dirf)) != NULL) {
   128     if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
   130       /* Create new filelist entry */
   131       filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t));
   132       newentry->next = NULL;
   134       newentry->displayname_len = strlen(entry->d_name);
   135       newentry->displayname = (char*) malloc(newentry->displayname_len+1);
   136       memcpy(newentry->displayname, entry->d_name, newentry->displayname_len);
   137       newentry->displayname[newentry->displayname_len] = 0;
   139       newentry->st_mode = 0;
   141       /* Construct absolute pathname string */
   142       size_t dirnamelen = strlen(scanner.dir);
   143       char *filename = (char*) malloc(2+dirnamelen+newentry->displayname_len);
   144       memcpy(filename, scanner.dir, dirnamelen);
   145       filename[dirnamelen] = settings->fileSeparator;
   146       memcpy(filename+dirnamelen+1, entry->d_name, newentry->displayname_len);
   147       filename[1+dirnamelen+newentry->displayname_len] = 0;
   148       newentry->filename = filename;
   150       /* Obtain file extension */
   151       newentry->ext = strrchr(newentry->displayname, '.');
   153       /* Check for subdirectory */
   154       if (stat(filename, &statbuf) == 0) {
   155         newentry->st_mode = statbuf.st_mode;
   156       } else {
   157         perror("  Error in stat call");
   158         continue;
   159       }
   161       if (list) {
   162         /* create fake root to have a pointer on the true root */
   163         filelist_t root;
   164         root.next = list;
   165         filelist_t *parent = &root;
   166         while (parent->next &&
   167             (strcasecmp(parent->next->displayname, newentry->displayname) < 0 ||
   168               (!S_ISDIR(newentry->st_mode) && S_ISDIR(parent->next->st_mode))
   169             ) &&
   170             (!S_ISDIR(newentry->st_mode) || S_ISDIR(parent->next->st_mode))
   171             ) {
   172           parent = parent->next;
   173         }
   174         newentry->next = parent->next;
   175         parent->next = newentry;
   176         list = root.next;
   177       } else {
   178         list = newentry;
   179       }
   180     }
   181   }
   183   closedir(dirf);
   185   return list;
   186 }
   188 void scanDirectory(scanner_t scanner, settings_t* settings,
   189     string_list_t* output, scanresult_t* result) {
   191   result->result = 0;
   192   bool bfile;
   193   char *outbuf;
   194   const char *result_type = settings->count_chars ? "chars" : "lines";
   196   filelist_t *filelist = buildFileList(scanner, settings);
   198   while (filelist != NULL) {
   200     /* Scan subdirectories */
   201     if (!S_ISREG(filelist->st_mode)) {
   202       if (settings->recursive && S_ISDIR(filelist->st_mode)) {
   203         string_list_t *recoutput = new_string_list_t();
   204         scanresult_t recresult;
   205         recresult.ext = result->ext;
   206         scanDirectory(
   207             (scanner_t) {filelist->filename, scanner.spaces+1},
   208             settings, recoutput, &recresult);
   209         result->result += recresult.result;
   210         if (!settings->matchesOnly || recoutput->count > 0) {
   211           outbuf = (char*) malloc(81);
   212           snprintf(outbuf, 81, "%*s/%*s%13u %s\n",
   213               filelist->displayname_len+scanner.spaces, filelist->displayname,
   214               60-filelist->displayname_len-scanner.spaces-1, "",
   215               recresult.result, result_type);
   216           add_string(output, outbuf);
   217           for (unsigned i = 0 ; i < recoutput->count ; i++) {
   218             add_string(output, recoutput->items[i]);
   219           }
   220         }
   221         destroy_string_list_t(recoutput);
   222       } else {
   223         outbuf = (char*) malloc(81);
   224         snprintf(outbuf, 81, "%*s\n",
   225                  filelist->displayname_len+scanner.spaces,
   226                  filelist->displayname);
   227         add_string(output, outbuf);
   228       }
   229     } else {
   230       if ((settings->includeSuffixes->count == 0
   231         || testSuffix(filelist->displayname, settings->includeSuffixes))
   232         && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {
   234         /* Count */
   235         unsigned res_value = 0;
   236         bfile = false;
   237         bfile_reset(settings->bfileHeuristics);
   238         regex_parser_reset(settings->regex);
   239         char line_buffer[MAX_LINELENGTH];
   240         unsigned line_buffer_pos = 0;
   242         FILE *file = fopen(filelist->filename, "r");
   243         if (file == NULL) {
   244           outbuf = (char*) malloc(81);
   245           snprintf(outbuf, 81, "%*s",
   246                    filelist->displayname_len+scanner.spaces,
   247                    filelist->displayname);
   248           add_string(output, outbuf);
   249           perror("  File acces failed");
   250         } else {
   251           int a;
   252           do {
   253             a = fgetc(file);
   255             bfile = bfile_check(settings->bfileHeuristics, a);
   257             /* ignore carriage return completely */
   258             if (a == 13) continue;
   260             if (a == 10 || a == EOF) {
   261               line_buffer[line_buffer_pos] = 0;
   262               if (regex_parser_do(settings->regex, line_buffer) == 0) {
   263                 /* Subtract excluded lines/chars when matching has finished */
   264                 if (!regex_parser_matching(settings->regex)) {
   265                   res_value -= settings->regex->matched_counted;
   266                 }
   267               }
   269               if (settings->count_chars) {
   270                 for (size_t i = 0 ; i < line_buffer_pos ; i++) {
   271                   if (!isspace(line_buffer[i])) res_value++;
   272                 }
   273               } else {
   274                 res_value++;
   275               }
   276               line_buffer_pos = 0;
   277             } else {
   278               if (line_buffer_pos < MAX_LINELENGTH) {
   279                 line_buffer[line_buffer_pos] = (char) a;
   280                 line_buffer_pos++;
   281               } else {
   282                 line_buffer[line_buffer_pos - 1] = 0;
   283                 settings->confusing_lnlen = true;
   284               }
   285             }
   286           } while (!bfile && a != EOF);
   287           fclose(file);
   289           /* Print and sum line count */
   290           if (bfile) {
   291             if (!settings->matchesOnly) {
   292               outbuf = (char*) malloc(81);
   293               snprintf(outbuf, 81,
   294                   "%*s%*s%19s\n", filelist->displayname_len+scanner.spaces,
   295                   filelist->displayname,
   296                   60-filelist->displayname_len-scanner.spaces, "", "binary");
   297               add_string(output, outbuf);
   298             }
   299           } else {
   300             addResultPerExtension(result->ext, filelist->ext, res_value);
   301             result->result += res_value;
   302             outbuf = (char*) malloc(81);
   303             snprintf(outbuf, 81, "%*s%*s%13u %s\n",
   304                      filelist->displayname_len+scanner.spaces,
   305                      filelist->displayname,
   306                      60-filelist->displayname_len-scanner.spaces,
   307                      "",
   308                      res_value,
   309                      result_type
   310             );
   311             add_string(output, outbuf);
   312           }
   313         }
   314       } else {
   315         if (!settings->matchesOnly) {
   316           /* Print hint */
   317           outbuf = (char*) malloc(81);
   318           snprintf(outbuf, 81, "%*s%*s%19s\n",
   319               filelist->displayname_len+scanner.spaces, filelist->displayname,
   320               60-filelist->displayname_len-scanner.spaces, "", "no match");
   321           add_string(output, outbuf);
   322         }
   323       }
   324     }
   326     free(filelist->filename);
   327     free(filelist->displayname);
   328     filelist_t *freethis = filelist;
   329     filelist = filelist->next;
   330     free(freethis);
   331   }
   332 }

mercurial