src/scanner.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 61
9c8d768f0244
permissions
-rw-r--r--

new feature: count non-whitespace characters

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
 * Copyright 2018 Mike Becker. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */


#include "scanner.h"
#include "bfile_heuristics.h"
#include "regex_parser.h"
#include <sys/stat.h>
#include <ctype.h>

typedef struct filelist filelist_t;

struct filelist {
  char *displayname;
  unsigned displayname_len;
  char *filename;
  char *ext;
  unsigned st_mode;
  filelist_t *next;
};

static bool testSuffix(char* filename, string_list_t* list) {
  bool ret = false;
  size_t tokenlen, fnamelen = strlen(filename);
  for (size_t t = 0 ; t < list->count ; t++) {
    tokenlen = strlen(list->items[t]);
    if (fnamelen >= tokenlen && tokenlen > 0) {
      if (strncmp(filename+fnamelen-tokenlen,
                  list->items[t], tokenlen) == 0) {
        ret = true;
        break;
      }
    }
  }
  return ret;
}

static void addResultPerExtension(scanresult_ext_t* result,
                                  char* ext, unsigned value) {
  if (!result) return;
  
  if (!ext) ext = "w/o";
  
  for (unsigned i = 0 ; i < result->count ; i++) {
    if (strcasecmp(result->extensions[i], ext) == 0) {
      result->result[i] += value;
      return;
    }
  }
  
  if (result->count == result->capacity) {
    unsigned newcap = result->capacity+8;
    char** extarr = realloc(result->extensions, newcap*sizeof(char*));
    unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned));
    if (!extarr || !resultarr) {
      fprintf(stderr, "Memory allocation error.\n");
      abort();
    }
    result->extensions = extarr;
    result->result = resultarr;
    result->capacity = newcap;
  }
  
  result->extensions[result->count] = strdup(ext);
  result->result[result->count] = value;
  result->count++;
}

scanresult_t* new_scanresult_t(settings_t* settings) {
  scanresult_t* result = calloc(1, sizeof(scanresult_t));
  if (settings->individual_sums) {
    result->ext = calloc(1, sizeof(scanresult_ext_t));
  }
  return result;
}

void destroy_scanresult_t(scanresult_t* result) {
  if (result->ext) {
    if (result->ext->count > 0) {
      for (unsigned i = 0 ; i < result->ext->count ; i++) {
        free(result->ext->extensions[i]);
      }
      free(result->ext->extensions);
      free(result->ext->result);
    }
    free(result->ext);
  }
  free(result);
}

static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) {

  filelist_t* list = NULL;
  DIR *dirf;
  struct dirent *entry;
  struct stat statbuf;
  
  if ((dirf = opendir(scanner.dir)) == NULL) {
    fprintf(stderr, "%s - ", scanner.dir);
    perror("Directory access failed");
    return 0;
  }

  while ((entry = readdir(dirf)) != NULL) {
    if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
      
      /* Create new filelist entry */
      filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t));
      newentry->next = NULL;
      
      newentry->displayname_len = strlen(entry->d_name);
      newentry->displayname = (char*) malloc(newentry->displayname_len+1);
      memcpy(newentry->displayname, entry->d_name, newentry->displayname_len);
      newentry->displayname[newentry->displayname_len] = 0;
      
      newentry->st_mode = 0;
      
      /* Construct absolute pathname string */
      size_t dirnamelen = strlen(scanner.dir);
      char *filename = (char*) malloc(2+dirnamelen+newentry->displayname_len);
      memcpy(filename, scanner.dir, dirnamelen);
      filename[dirnamelen] = settings->fileSeparator;
      memcpy(filename+dirnamelen+1, entry->d_name, newentry->displayname_len);
      filename[1+dirnamelen+newentry->displayname_len] = 0;
      newentry->filename = filename;
      
      /* Obtain file extension */
      newentry->ext = strrchr(newentry->displayname, '.');

      /* Check for subdirectory */
      if (stat(filename, &statbuf) == 0) {
        newentry->st_mode = statbuf.st_mode;
      } else {
        perror("  Error in stat call");
        continue;
      }
      
      if (list) {
        /* create fake root to have a pointer on the true root */
        filelist_t root;
        root.next = list;
        filelist_t *parent = &root;
        while (parent->next &&
            (strcasecmp(parent->next->displayname, newentry->displayname) < 0 ||
              (!S_ISDIR(newentry->st_mode) && S_ISDIR(parent->next->st_mode))
            ) &&
            (!S_ISDIR(newentry->st_mode) || S_ISDIR(parent->next->st_mode))
            ) {
          parent = parent->next;
        }
        newentry->next = parent->next;
        parent->next = newentry;
        list = root.next;
      } else {
        list = newentry;
      }
    }
  }
  
  closedir(dirf);
  
  return list;
}

void scanDirectory(scanner_t scanner, settings_t* settings,
    string_list_t* output, scanresult_t* result) {

  result->result = 0;
  bool bfile;
  char *outbuf;
  const char *result_type = settings->count_chars ? "chars" : "lines";

  filelist_t *filelist = buildFileList(scanner, settings);

  while (filelist != NULL) {

    /* Scan subdirectories */
    if (!S_ISREG(filelist->st_mode)) {
      if (settings->recursive && S_ISDIR(filelist->st_mode)) {
        string_list_t *recoutput = new_string_list_t();
        scanresult_t recresult;
        recresult.ext = result->ext;
        scanDirectory(
            (scanner_t) {filelist->filename, scanner.spaces+1},
            settings, recoutput, &recresult);
        result->result += recresult.result;
        if (!settings->matchesOnly || recoutput->count > 0) {
          outbuf = (char*) malloc(81);
          snprintf(outbuf, 81, "%*s/%*s%13u %s\n",
              filelist->displayname_len+scanner.spaces, filelist->displayname,
              60-filelist->displayname_len-scanner.spaces-1, "",
              recresult.result, result_type);
          add_string(output, outbuf);
          for (unsigned i = 0 ; i < recoutput->count ; i++) {
            add_string(output, recoutput->items[i]);
          }
        }
        destroy_string_list_t(recoutput);
      } else {
        outbuf = (char*) malloc(81);
        snprintf(outbuf, 81, "%*s\n",
                 filelist->displayname_len+scanner.spaces,
                 filelist->displayname);
        add_string(output, outbuf);
      }
    } else {
      if ((settings->includeSuffixes->count == 0
        || testSuffix(filelist->displayname, settings->includeSuffixes))
        && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {

        /* Count */
        unsigned res_value = 0;
        bfile = false;
        bfile_reset(settings->bfileHeuristics);
        regex_parser_reset(settings->regex);
        char line_buffer[MAX_LINELENGTH];
        unsigned line_buffer_pos = 0;

        FILE *file = fopen(filelist->filename, "r");
        if (file == NULL) {
          outbuf = (char*) malloc(81);
          snprintf(outbuf, 81, "%*s",
                   filelist->displayname_len+scanner.spaces,
                   filelist->displayname);
          add_string(output, outbuf);
          perror("  File acces failed");
        } else {
          int a;
          do {
            a = fgetc(file);

            bfile = bfile_check(settings->bfileHeuristics, a);

            /* ignore carriage return completely */
            if (a == 13) continue;

            if (a == 10 || a == EOF) {
              line_buffer[line_buffer_pos] = 0;
              if (regex_parser_do(settings->regex, line_buffer) == 0) {
                /* Subtract excluded lines/chars when matching has finished */
                if (!regex_parser_matching(settings->regex)) {
                  res_value -= settings->regex->matched_counted;
                }
              }

              if (settings->count_chars) {
                for (size_t i = 0 ; i < line_buffer_pos ; i++) {
                  if (!isspace(line_buffer[i])) res_value++;
                }
              } else {
                res_value++;
              }
              line_buffer_pos = 0;
            } else {
              if (line_buffer_pos < MAX_LINELENGTH) {
                line_buffer[line_buffer_pos] = (char) a;
                line_buffer_pos++;
              } else {
                line_buffer[line_buffer_pos - 1] = 0;
                settings->confusing_lnlen = true;
              }
            }
          } while (!bfile && a != EOF);
          fclose(file);

          /* Print and sum line count */
          if (bfile) {
            if (!settings->matchesOnly) {
              outbuf = (char*) malloc(81);
              snprintf(outbuf, 81,
                  "%*s%*s%19s\n", filelist->displayname_len+scanner.spaces,
                  filelist->displayname,
                  60-filelist->displayname_len-scanner.spaces, "", "binary");
              add_string(output, outbuf);
            }
          } else {
            addResultPerExtension(result->ext, filelist->ext, res_value);
            result->result += res_value;
            outbuf = (char*) malloc(81);
            snprintf(outbuf, 81, "%*s%*s%13u %s\n",
                     filelist->displayname_len+scanner.spaces,
                     filelist->displayname,
                     60-filelist->displayname_len-scanner.spaces,
                     "",
                     res_value,
                     result_type
            );
            add_string(output, outbuf);
          }
        }
      } else {
        if (!settings->matchesOnly) {
          /* Print hint */
          outbuf = (char*) malloc(81);
          snprintf(outbuf, 81, "%*s%*s%19s\n",
              filelist->displayname_len+scanner.spaces, filelist->displayname,
              60-filelist->displayname_len-scanner.spaces, "", "no match");
          add_string(output, outbuf);
        }
      }
    }
    
    free(filelist->filename);
    free(filelist->displayname);
    filelist_t *freethis = filelist;
    filelist = filelist->next;
    free(freethis);
  }
}

mercurial