src/scanner.c

Tue, 21 May 2013 13:19:37 +0200

author
Mike Becker <universe@uap-core.de>
date
Tue, 21 May 2013 13:19:37 +0200
changeset 41
c2e73e175341
parent 40
5938a9b74e8e
child 42
0402b9b41b0a
permissions
-rw-r--r--

replaced direct scanning of directories with a two-step approach (first: create filename list, second: scan)

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
     3  * Copyright 2013 Mike Becker. All rights reserved.
     4  * 
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions are met:
     7  * 
     8  * 1. Redistributions of source code must retain the above copyright
     9  * notice, this list of conditions and the following disclaimer.
    10  * 
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  * notice, this list of conditions and the following disclaimer in the
    13  * documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    22  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
    25  *
    26  * scanner.c
    27  *
    28  *  Created on: 23.05.2011
    29  *      Author: Mike
    30  */
    33 #include "scanner.h"
    34 #include "suffix_fnc.h"
    35 #include "bfile_heuristics.h"
    36 #include "regex_parser.h"
    37 #include <sys/stat.h>
    39 typedef struct filelist filelist_t;
    41 struct filelist {
    42   char *displayname;
    43   int displayname_len;
    44   char *filename;
    45   int st_mode;
    46   filelist_t *next;
    47 };
    49 filelist_t *buildFileList(scanner_t scanner, settings_t* settings,
    50     filelist_t* list) {
    52   DIR *dirf;
    53   struct dirent *entry;
    54   struct stat statbuf;
    55   filelist_t *listentry = list;
    57   if ((dirf = opendir(scanner.dir)) == NULL) {
    58     printf("%s", scanner.dir);
    59     perror("  Directory access failed");
    60     return 0;
    61   }
    63   while ((entry = readdir(dirf)) != NULL) {
    64     if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
    66       /* Create new filelist entry */
    67       filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t));
    68       // TODO: don't just append - create a sorted list!
    69       if (listentry) {
    70         listentry->next = newentry;
    71       }
    72       listentry = newentry;
    73       if (!list) {
    74         list = listentry;
    75       }
    77       listentry->next = NULL;
    79       listentry->displayname_len = strlen(entry->d_name);
    80       listentry->displayname = (char*) malloc(listentry->displayname_len+1);
    81       memcpy(listentry->displayname, entry->d_name, listentry->displayname_len);
    82       listentry->displayname[listentry->displayname_len] = 0;
    84       listentry->st_mode = 0;
    86       /* Construct absolute pathname string */
    87       size_t dirnamelen = strlen(scanner.dir);
    88       char *filename = (char*) malloc(2+dirnamelen+listentry->displayname_len);
    89       memcpy(filename, scanner.dir, dirnamelen);
    90       filename[dirnamelen] = settings->fileSeparator;
    91       memcpy(filename+dirnamelen+1, entry->d_name, listentry->displayname_len);
    92       filename[1+dirnamelen+listentry->displayname_len] = 0;
    93       listentry->filename = filename;
    95       /* Check for subdirectory */
    96       if (stat(filename, &statbuf) == 0) {
    97         listentry->st_mode = statbuf.st_mode;
    98       } else {
    99         perror("  Error in stat call");
   100         continue;
   101       }
   102     }
   103   }
   105   closedir(dirf);
   107   return list;
   108 }
   110 int scanDirectory(scanner_t scanner, settings_t* settings) {
   112   int lines, a;
   113   int lineSum = 0;
   114   bool bfile;
   116   filelist_t *filelist = buildFileList(scanner, settings, NULL);
   118   while (filelist != NULL) {
   120     /* Scan subdirectories */
   121     if (!(filelist->st_mode & S_IFREG)) {
   122       printf("%*s\n", filelist->displayname_len+scanner.spaces,
   123           filelist->displayname);
   124       if (settings->recursive && (filelist->st_mode & S_IFDIR)) {
   125         scanDirectory((scanner_t) {filelist->filename, scanner.spaces+1},
   126             settings);
   127       }
   128     } else {
   129       if ((settings->includeSuffixes->count == 0
   130         || testSuffix(filelist->displayname, settings->includeSuffixes))
   131         && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {
   133         /* Count lines */
   134         lines = 0;
   135         bfile = false;
   136         bfile_reset(settings->bfileHeuristics);
   137         char line_buffer[REGEX_MAX_LINELENGTH];
   138         int line_buffer_offset = 0;
   140         FILE *file = fopen(filelist->filename, "r");
   141         if (file == NULL) {
   142           printf("%*s", filelist->displayname_len+scanner.spaces,
   143               filelist->displayname);
   144           perror("  File acces failed");
   145         } else {
   146           do {
   147             a = fgetc(file);
   149             bfile = bfile_check(settings->bfileHeuristics, a);
   151             if (a == 10 || a == EOF) {
   152               line_buffer[line_buffer_offset] = 0;
   153               if (regex_parser_do(settings->regex, line_buffer) == 0) {
   154                 /* Only subtract lines when matching has finished */
   155                 if (!regex_parser_matching(settings->regex)) {
   156                   lines -= settings->regex->matched_lines;
   157                 }
   158               }
   160               line_buffer_offset = 0;
   161               lines++;
   162             } else {
   163               if (line_buffer_offset < REGEX_MAX_LINELENGTH) {
   164                 line_buffer[line_buffer_offset] = a;
   165                 line_buffer_offset++;
   166               } else {
   167                 line_buffer[line_buffer_offset-1] = 0;
   168                 settings->confusing_lnlen = true;
   169               }
   170             }
   171           } while (!bfile && a != EOF);
   172           fclose(file);
   174           /* Print and sum line count */
   175           if (bfile) {
   176             if (!settings->matchesOnly) {
   177               printf("%*s%*s%19s\n", filelist->displayname_len+scanner.spaces,
   178                   filelist->displayname,
   179                   60-filelist->displayname_len-scanner.spaces, "", "binary");
   180             }
   181           } else {
   182             lineSum += lines;
   183             printf("%*s%*s%13d lines\n",
   184                 filelist->displayname_len+scanner.spaces, filelist->displayname,
   185                 60-filelist->displayname_len-scanner.spaces, "", lines);
   186           }
   187         }
   188       } else {
   189         if (!settings->matchesOnly) {
   190           /* Print hint */
   191           printf("%*s%*s%19s\n",
   192               filelist->displayname_len+scanner.spaces, filelist->displayname,
   193               60-filelist->displayname_len-scanner.spaces, "", "no match");
   194         }
   195       }
   196     }
   198     free(filelist->filename);
   199     free(filelist->displayname);
   200     filelist_t *freethis = filelist;
   201     filelist = filelist->next;
   202     free(freethis);
   203   }
   205   return lineSum;
   206 }

mercurial