src/scanner.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 61
9c8d768f0244
permissions
-rw-r--r--

new feature: count non-whitespace characters

universe@10 1 /*
universe@34 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@57 3 * Copyright 2018 Mike Becker. All rights reserved.
universe@34 4 *
universe@34 5 * Redistribution and use in source and binary forms, with or without
universe@34 6 * modification, are permitted provided that the following conditions are met:
universe@34 7 *
universe@34 8 * 1. Redistributions of source code must retain the above copyright
universe@34 9 * notice, this list of conditions and the following disclaimer.
universe@34 10 *
universe@34 11 * 2. Redistributions in binary form must reproduce the above copyright
universe@34 12 * notice, this list of conditions and the following disclaimer in the
universe@34 13 * documentation and/or other materials provided with the distribution.
universe@34 14 *
universe@34 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@34 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@34 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
universe@34 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
universe@34 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
universe@34 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
universe@34 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
universe@34 22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
universe@34 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
universe@57 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
universe@10 25 */
universe@1 26
universe@1 27
universe@10 28 #include "scanner.h"
universe@21 29 #include "bfile_heuristics.h"
universe@27 30 #include "regex_parser.h"
universe@23 31 #include <sys/stat.h>
universe@66 32 #include <ctype.h>
universe@3 33
universe@41 34 typedef struct filelist filelist_t;
universe@23 35
universe@41 36 struct filelist {
universe@41 37 char *displayname;
universe@66 38 unsigned displayname_len;
universe@41 39 char *filename;
universe@61 40 char *ext;
universe@66 41 unsigned st_mode;
universe@41 42 filelist_t *next;
universe@41 43 };
universe@41 44
universe@61 45 static bool testSuffix(char* filename, string_list_t* list) {
universe@61 46 bool ret = false;
universe@66 47 size_t tokenlen, fnamelen = strlen(filename);
universe@66 48 for (size_t t = 0 ; t < list->count ; t++) {
universe@61 49 tokenlen = strlen(list->items[t]);
universe@61 50 if (fnamelen >= tokenlen && tokenlen > 0) {
universe@61 51 if (strncmp(filename+fnamelen-tokenlen,
universe@61 52 list->items[t], tokenlen) == 0) {
universe@61 53 ret = true;
universe@61 54 break;
universe@61 55 }
universe@61 56 }
universe@61 57 }
universe@61 58 return ret;
universe@61 59 }
universe@61 60
universe@66 61 static void addResultPerExtension(scanresult_ext_t* result,
universe@66 62 char* ext, unsigned value) {
universe@61 63 if (!result) return;
universe@61 64
universe@61 65 if (!ext) ext = "w/o";
universe@61 66
universe@66 67 for (unsigned i = 0 ; i < result->count ; i++) {
universe@61 68 if (strcasecmp(result->extensions[i], ext) == 0) {
universe@66 69 result->result[i] += value;
universe@61 70 return;
universe@61 71 }
universe@61 72 }
universe@61 73
universe@61 74 if (result->count == result->capacity) {
universe@66 75 unsigned newcap = result->capacity+8;
universe@61 76 char** extarr = realloc(result->extensions, newcap*sizeof(char*));
universe@66 77 unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned));
universe@66 78 if (!extarr || !resultarr) {
universe@61 79 fprintf(stderr, "Memory allocation error.\n");
universe@61 80 abort();
universe@61 81 }
universe@61 82 result->extensions = extarr;
universe@66 83 result->result = resultarr;
universe@61 84 result->capacity = newcap;
universe@61 85 }
universe@61 86
universe@61 87 result->extensions[result->count] = strdup(ext);
universe@66 88 result->result[result->count] = value;
universe@61 89 result->count++;
universe@61 90 }
universe@61 91
universe@61 92 scanresult_t* new_scanresult_t(settings_t* settings) {
universe@61 93 scanresult_t* result = calloc(1, sizeof(scanresult_t));
universe@61 94 if (settings->individual_sums) {
universe@61 95 result->ext = calloc(1, sizeof(scanresult_ext_t));
universe@61 96 }
universe@61 97 return result;
universe@61 98 }
universe@61 99
universe@61 100 void destroy_scanresult_t(scanresult_t* result) {
universe@61 101 if (result->ext) {
universe@61 102 if (result->ext->count > 0) {
universe@66 103 for (unsigned i = 0 ; i < result->ext->count ; i++) {
universe@61 104 free(result->ext->extensions[i]);
universe@61 105 }
universe@61 106 free(result->ext->extensions);
universe@66 107 free(result->ext->result);
universe@61 108 }
universe@61 109 free(result->ext);
universe@61 110 }
universe@61 111 free(result);
universe@61 112 }
universe@61 113
universe@66 114 static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) {
universe@61 115
universe@66 116 filelist_t* list = NULL;
universe@23 117 DIR *dirf;
universe@3 118 struct dirent *entry;
universe@23 119 struct stat statbuf;
universe@41 120
universe@23 121 if ((dirf = opendir(scanner.dir)) == NULL) {
universe@58 122 fprintf(stderr, "%s - ", scanner.dir);
universe@58 123 perror("Directory access failed");
universe@23 124 return 0;
universe@23 125 }
universe@23 126
universe@23 127 while ((entry = readdir(dirf)) != NULL) {
universe@3 128 if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
universe@41 129
universe@41 130 /* Create new filelist entry */
universe@41 131 filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t));
universe@42 132 newentry->next = NULL;
universe@41 133
universe@42 134 newentry->displayname_len = strlen(entry->d_name);
universe@42 135 newentry->displayname = (char*) malloc(newentry->displayname_len+1);
universe@42 136 memcpy(newentry->displayname, entry->d_name, newentry->displayname_len);
universe@42 137 newentry->displayname[newentry->displayname_len] = 0;
universe@41 138
universe@42 139 newentry->st_mode = 0;
universe@41 140
universe@40 141 /* Construct absolute pathname string */
universe@41 142 size_t dirnamelen = strlen(scanner.dir);
universe@42 143 char *filename = (char*) malloc(2+dirnamelen+newentry->displayname_len);
universe@41 144 memcpy(filename, scanner.dir, dirnamelen);
universe@41 145 filename[dirnamelen] = settings->fileSeparator;
universe@42 146 memcpy(filename+dirnamelen+1, entry->d_name, newentry->displayname_len);
universe@42 147 filename[1+dirnamelen+newentry->displayname_len] = 0;
universe@42 148 newentry->filename = filename;
universe@61 149
universe@61 150 /* Obtain file extension */
universe@61 151 newentry->ext = strrchr(newentry->displayname, '.');
universe@14 152
universe@22 153 /* Check for subdirectory */
universe@23 154 if (stat(filename, &statbuf) == 0) {
universe@42 155 newentry->st_mode = statbuf.st_mode;
universe@23 156 } else {
universe@23 157 perror(" Error in stat call");
universe@3 158 continue;
universe@3 159 }
universe@42 160
universe@42 161 if (list) {
universe@66 162 /* create fake root to have a pointer on the true root */
universe@42 163 filelist_t root;
universe@42 164 root.next = list;
universe@42 165 filelist_t *parent = &root;
universe@42 166 while (parent->next &&
universe@42 167 (strcasecmp(parent->next->displayname, newentry->displayname) < 0 ||
universe@42 168 (!S_ISDIR(newentry->st_mode) && S_ISDIR(parent->next->st_mode))
universe@42 169 ) &&
universe@42 170 (!S_ISDIR(newentry->st_mode) || S_ISDIR(parent->next->st_mode))
universe@42 171 ) {
universe@42 172 parent = parent->next;
universe@42 173 }
universe@42 174 newentry->next = parent->next;
universe@42 175 parent->next = newentry;
universe@42 176 list = root.next;
universe@42 177 } else {
universe@42 178 list = newentry;
universe@42 179 }
universe@41 180 }
universe@41 181 }
universe@41 182
universe@41 183 closedir(dirf);
universe@41 184
universe@41 185 return list;
universe@41 186 }
universe@3 187
universe@60 188 void scanDirectory(scanner_t scanner, settings_t* settings,
universe@60 189 string_list_t* output, scanresult_t* result) {
universe@41 190
universe@66 191 result->result = 0;
universe@41 192 bool bfile;
universe@44 193 char *outbuf;
universe@66 194 const char *result_type = settings->count_chars ? "chars" : "lines";
universe@41 195
universe@66 196 filelist_t *filelist = buildFileList(scanner, settings);
universe@41 197
universe@41 198 while (filelist != NULL) {
universe@41 199
universe@41 200 /* Scan subdirectories */
universe@42 201 if (!S_ISREG(filelist->st_mode)) {
universe@44 202 if (settings->recursive && S_ISDIR(filelist->st_mode)) {
universe@44 203 string_list_t *recoutput = new_string_list_t();
universe@60 204 scanresult_t recresult;
universe@61 205 recresult.ext = result->ext;
universe@60 206 scanDirectory(
universe@44 207 (scanner_t) {filelist->filename, scanner.spaces+1},
universe@60 208 settings, recoutput, &recresult);
universe@66 209 result->result += recresult.result;
universe@44 210 if (!settings->matchesOnly || recoutput->count > 0) {
universe@44 211 outbuf = (char*) malloc(81);
universe@66 212 snprintf(outbuf, 81, "%*s/%*s%13u %s\n",
universe@44 213 filelist->displayname_len+scanner.spaces, filelist->displayname,
universe@60 214 60-filelist->displayname_len-scanner.spaces-1, "",
universe@66 215 recresult.result, result_type);
universe@44 216 add_string(output, outbuf);
universe@66 217 for (unsigned i = 0 ; i < recoutput->count ; i++) {
universe@44 218 add_string(output, recoutput->items[i]);
universe@44 219 }
universe@44 220 }
universe@44 221 destroy_string_list_t(recoutput);
universe@44 222 } else {
universe@44 223 outbuf = (char*) malloc(81);
universe@66 224 snprintf(outbuf, 81, "%*s\n",
universe@66 225 filelist->displayname_len+scanner.spaces,
universe@66 226 filelist->displayname);
universe@44 227 add_string(output, outbuf);
universe@41 228 }
universe@41 229 } else {
universe@30 230 if ((settings->includeSuffixes->count == 0
universe@41 231 || testSuffix(filelist->displayname, settings->includeSuffixes))
universe@41 232 && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {
universe@41 233
universe@66 234 /* Count */
universe@66 235 unsigned res_value = 0;
universe@25 236 bfile = false;
universe@25 237 bfile_reset(settings->bfileHeuristics);
universe@54 238 regex_parser_reset(settings->regex);
universe@66 239 char line_buffer[MAX_LINELENGTH];
universe@66 240 unsigned line_buffer_pos = 0;
universe@25 241
universe@41 242 FILE *file = fopen(filelist->filename, "r");
universe@3 243 if (file == NULL) {
universe@44 244 outbuf = (char*) malloc(81);
universe@66 245 snprintf(outbuf, 81, "%*s",
universe@66 246 filelist->displayname_len+scanner.spaces,
universe@66 247 filelist->displayname);
universe@44 248 add_string(output, outbuf);
universe@3 249 perror(" File acces failed");
universe@41 250 } else {
universe@66 251 int a;
universe@41 252 do {
universe@41 253 a = fgetc(file);
universe@3 254
universe@41 255 bfile = bfile_check(settings->bfileHeuristics, a);
universe@3 256
universe@66 257 /* ignore carriage return completely */
universe@66 258 if (a == 13) continue;
universe@66 259
universe@41 260 if (a == 10 || a == EOF) {
universe@66 261 line_buffer[line_buffer_pos] = 0;
universe@41 262 if (regex_parser_do(settings->regex, line_buffer) == 0) {
universe@66 263 /* Subtract excluded lines/chars when matching has finished */
universe@41 264 if (!regex_parser_matching(settings->regex)) {
universe@66 265 res_value -= settings->regex->matched_counted;
universe@41 266 }
universe@41 267 }
universe@21 268
universe@66 269 if (settings->count_chars) {
universe@66 270 for (size_t i = 0 ; i < line_buffer_pos ; i++) {
universe@66 271 if (!isspace(line_buffer[i])) res_value++;
universe@66 272 }
universe@66 273 } else {
universe@66 274 res_value++;
universe@66 275 }
universe@66 276 line_buffer_pos = 0;
universe@41 277 } else {
universe@66 278 if (line_buffer_pos < MAX_LINELENGTH) {
universe@66 279 line_buffer[line_buffer_pos] = (char) a;
universe@66 280 line_buffer_pos++;
universe@41 281 } else {
universe@66 282 line_buffer[line_buffer_pos - 1] = 0;
universe@41 283 settings->confusing_lnlen = true;
universe@28 284 }
universe@28 285 }
universe@41 286 } while (!bfile && a != EOF);
universe@41 287 fclose(file);
universe@25 288
universe@41 289 /* Print and sum line count */
universe@41 290 if (bfile) {
universe@41 291 if (!settings->matchesOnly) {
universe@44 292 outbuf = (char*) malloc(81);
universe@44 293 snprintf(outbuf, 81,
universe@44 294 "%*s%*s%19s\n", filelist->displayname_len+scanner.spaces,
universe@41 295 filelist->displayname,
universe@41 296 60-filelist->displayname_len-scanner.spaces, "", "binary");
universe@44 297 add_string(output, outbuf);
universe@41 298 }
universe@25 299 } else {
universe@66 300 addResultPerExtension(result->ext, filelist->ext, res_value);
universe@66 301 result->result += res_value;
universe@44 302 outbuf = (char*) malloc(81);
universe@66 303 snprintf(outbuf, 81, "%*s%*s%13u %s\n",
universe@66 304 filelist->displayname_len+scanner.spaces,
universe@66 305 filelist->displayname,
universe@66 306 60-filelist->displayname_len-scanner.spaces,
universe@66 307 "",
universe@66 308 res_value,
universe@66 309 result_type
universe@66 310 );
universe@44 311 add_string(output, outbuf);
universe@3 312 }
universe@21 313 }
universe@16 314 } else {
universe@3 315 if (!settings->matchesOnly) {
universe@22 316 /* Print hint */
universe@44 317 outbuf = (char*) malloc(81);
universe@44 318 snprintf(outbuf, 81, "%*s%*s%19s\n",
universe@41 319 filelist->displayname_len+scanner.spaces, filelist->displayname,
universe@41 320 60-filelist->displayname_len-scanner.spaces, "", "no match");
universe@44 321 add_string(output, outbuf);
universe@3 322 }
universe@3 323 }
universe@3 324 }
universe@41 325
universe@41 326 free(filelist->filename);
universe@41 327 free(filelist->displayname);
universe@41 328 filelist_t *freethis = filelist;
universe@41 329 filelist = filelist->next;
universe@41 330 free(freethis);
universe@3 331 }
universe@3 332 }

mercurial