Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
universe@10 | 1 | /* |
universe@34 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@57 | 3 | * Copyright 2018 Mike Becker. All rights reserved. |
universe@34 | 4 | * |
universe@34 | 5 | * Redistribution and use in source and binary forms, with or without |
universe@34 | 6 | * modification, are permitted provided that the following conditions are met: |
universe@34 | 7 | * |
universe@34 | 8 | * 1. Redistributions of source code must retain the above copyright |
universe@34 | 9 | * notice, this list of conditions and the following disclaimer. |
universe@34 | 10 | * |
universe@34 | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@34 | 12 | * notice, this list of conditions and the following disclaimer in the |
universe@34 | 13 | * documentation and/or other materials provided with the distribution. |
universe@34 | 14 | * |
universe@34 | 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@34 | 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@34 | 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
universe@34 | 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
universe@34 | 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
universe@34 | 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
universe@34 | 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
universe@34 | 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
universe@34 | 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
universe@57 | 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
universe@10 | 25 | */ |
universe@1 | 26 | |
universe@1 | 27 | |
universe@10 | 28 | #include "scanner.h" |
universe@21 | 29 | #include "bfile_heuristics.h" |
universe@27 | 30 | #include "regex_parser.h" |
universe@23 | 31 | #include <sys/stat.h> |
universe@66 | 32 | #include <ctype.h> |
universe@3 | 33 | |
universe@41 | 34 | typedef struct filelist filelist_t; |
universe@23 | 35 | |
universe@41 | 36 | struct filelist { |
universe@41 | 37 | char *displayname; |
universe@66 | 38 | unsigned displayname_len; |
universe@41 | 39 | char *filename; |
universe@61 | 40 | char *ext; |
universe@66 | 41 | unsigned st_mode; |
universe@41 | 42 | filelist_t *next; |
universe@41 | 43 | }; |
universe@41 | 44 | |
universe@61 | 45 | static bool testSuffix(char* filename, string_list_t* list) { |
universe@61 | 46 | bool ret = false; |
universe@66 | 47 | size_t tokenlen, fnamelen = strlen(filename); |
universe@66 | 48 | for (size_t t = 0 ; t < list->count ; t++) { |
universe@61 | 49 | tokenlen = strlen(list->items[t]); |
universe@61 | 50 | if (fnamelen >= tokenlen && tokenlen > 0) { |
universe@61 | 51 | if (strncmp(filename+fnamelen-tokenlen, |
universe@61 | 52 | list->items[t], tokenlen) == 0) { |
universe@61 | 53 | ret = true; |
universe@61 | 54 | break; |
universe@61 | 55 | } |
universe@61 | 56 | } |
universe@61 | 57 | } |
universe@61 | 58 | return ret; |
universe@61 | 59 | } |
universe@61 | 60 | |
universe@66 | 61 | static void addResultPerExtension(scanresult_ext_t* result, |
universe@66 | 62 | char* ext, unsigned value) { |
universe@61 | 63 | if (!result) return; |
universe@61 | 64 | |
universe@61 | 65 | if (!ext) ext = "w/o"; |
universe@61 | 66 | |
universe@66 | 67 | for (unsigned i = 0 ; i < result->count ; i++) { |
universe@61 | 68 | if (strcasecmp(result->extensions[i], ext) == 0) { |
universe@66 | 69 | result->result[i] += value; |
universe@61 | 70 | return; |
universe@61 | 71 | } |
universe@61 | 72 | } |
universe@61 | 73 | |
universe@61 | 74 | if (result->count == result->capacity) { |
universe@66 | 75 | unsigned newcap = result->capacity+8; |
universe@61 | 76 | char** extarr = realloc(result->extensions, newcap*sizeof(char*)); |
universe@66 | 77 | unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned)); |
universe@66 | 78 | if (!extarr || !resultarr) { |
universe@61 | 79 | fprintf(stderr, "Memory allocation error.\n"); |
universe@61 | 80 | abort(); |
universe@61 | 81 | } |
universe@61 | 82 | result->extensions = extarr; |
universe@66 | 83 | result->result = resultarr; |
universe@61 | 84 | result->capacity = newcap; |
universe@61 | 85 | } |
universe@61 | 86 | |
universe@61 | 87 | result->extensions[result->count] = strdup(ext); |
universe@66 | 88 | result->result[result->count] = value; |
universe@61 | 89 | result->count++; |
universe@61 | 90 | } |
universe@61 | 91 | |
universe@61 | 92 | scanresult_t* new_scanresult_t(settings_t* settings) { |
universe@61 | 93 | scanresult_t* result = calloc(1, sizeof(scanresult_t)); |
universe@61 | 94 | if (settings->individual_sums) { |
universe@61 | 95 | result->ext = calloc(1, sizeof(scanresult_ext_t)); |
universe@61 | 96 | } |
universe@61 | 97 | return result; |
universe@61 | 98 | } |
universe@61 | 99 | |
universe@61 | 100 | void destroy_scanresult_t(scanresult_t* result) { |
universe@61 | 101 | if (result->ext) { |
universe@61 | 102 | if (result->ext->count > 0) { |
universe@66 | 103 | for (unsigned i = 0 ; i < result->ext->count ; i++) { |
universe@61 | 104 | free(result->ext->extensions[i]); |
universe@61 | 105 | } |
universe@61 | 106 | free(result->ext->extensions); |
universe@66 | 107 | free(result->ext->result); |
universe@61 | 108 | } |
universe@61 | 109 | free(result->ext); |
universe@61 | 110 | } |
universe@61 | 111 | free(result); |
universe@61 | 112 | } |
universe@61 | 113 | |
universe@66 | 114 | static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) { |
universe@61 | 115 | |
universe@66 | 116 | filelist_t* list = NULL; |
universe@23 | 117 | DIR *dirf; |
universe@3 | 118 | struct dirent *entry; |
universe@23 | 119 | struct stat statbuf; |
universe@41 | 120 | |
universe@23 | 121 | if ((dirf = opendir(scanner.dir)) == NULL) { |
universe@58 | 122 | fprintf(stderr, "%s - ", scanner.dir); |
universe@58 | 123 | perror("Directory access failed"); |
universe@23 | 124 | return 0; |
universe@23 | 125 | } |
universe@23 | 126 | |
universe@23 | 127 | while ((entry = readdir(dirf)) != NULL) { |
universe@3 | 128 | if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) { |
universe@41 | 129 | |
universe@41 | 130 | /* Create new filelist entry */ |
universe@41 | 131 | filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t)); |
universe@42 | 132 | newentry->next = NULL; |
universe@41 | 133 | |
universe@42 | 134 | newentry->displayname_len = strlen(entry->d_name); |
universe@42 | 135 | newentry->displayname = (char*) malloc(newentry->displayname_len+1); |
universe@42 | 136 | memcpy(newentry->displayname, entry->d_name, newentry->displayname_len); |
universe@42 | 137 | newentry->displayname[newentry->displayname_len] = 0; |
universe@41 | 138 | |
universe@42 | 139 | newentry->st_mode = 0; |
universe@41 | 140 | |
universe@40 | 141 | /* Construct absolute pathname string */ |
universe@41 | 142 | size_t dirnamelen = strlen(scanner.dir); |
universe@42 | 143 | char *filename = (char*) malloc(2+dirnamelen+newentry->displayname_len); |
universe@41 | 144 | memcpy(filename, scanner.dir, dirnamelen); |
universe@41 | 145 | filename[dirnamelen] = settings->fileSeparator; |
universe@42 | 146 | memcpy(filename+dirnamelen+1, entry->d_name, newentry->displayname_len); |
universe@42 | 147 | filename[1+dirnamelen+newentry->displayname_len] = 0; |
universe@42 | 148 | newentry->filename = filename; |
universe@61 | 149 | |
universe@61 | 150 | /* Obtain file extension */ |
universe@61 | 151 | newentry->ext = strrchr(newentry->displayname, '.'); |
universe@14 | 152 | |
universe@22 | 153 | /* Check for subdirectory */ |
universe@23 | 154 | if (stat(filename, &statbuf) == 0) { |
universe@42 | 155 | newentry->st_mode = statbuf.st_mode; |
universe@23 | 156 | } else { |
universe@23 | 157 | perror(" Error in stat call"); |
universe@3 | 158 | continue; |
universe@3 | 159 | } |
universe@42 | 160 | |
universe@42 | 161 | if (list) { |
universe@66 | 162 | /* create fake root to have a pointer on the true root */ |
universe@42 | 163 | filelist_t root; |
universe@42 | 164 | root.next = list; |
universe@42 | 165 | filelist_t *parent = &root; |
universe@42 | 166 | while (parent->next && |
universe@42 | 167 | (strcasecmp(parent->next->displayname, newentry->displayname) < 0 || |
universe@42 | 168 | (!S_ISDIR(newentry->st_mode) && S_ISDIR(parent->next->st_mode)) |
universe@42 | 169 | ) && |
universe@42 | 170 | (!S_ISDIR(newentry->st_mode) || S_ISDIR(parent->next->st_mode)) |
universe@42 | 171 | ) { |
universe@42 | 172 | parent = parent->next; |
universe@42 | 173 | } |
universe@42 | 174 | newentry->next = parent->next; |
universe@42 | 175 | parent->next = newentry; |
universe@42 | 176 | list = root.next; |
universe@42 | 177 | } else { |
universe@42 | 178 | list = newentry; |
universe@42 | 179 | } |
universe@41 | 180 | } |
universe@41 | 181 | } |
universe@41 | 182 | |
universe@41 | 183 | closedir(dirf); |
universe@41 | 184 | |
universe@41 | 185 | return list; |
universe@41 | 186 | } |
universe@3 | 187 | |
universe@60 | 188 | void scanDirectory(scanner_t scanner, settings_t* settings, |
universe@60 | 189 | string_list_t* output, scanresult_t* result) { |
universe@41 | 190 | |
universe@66 | 191 | result->result = 0; |
universe@41 | 192 | bool bfile; |
universe@44 | 193 | char *outbuf; |
universe@66 | 194 | const char *result_type = settings->count_chars ? "chars" : "lines"; |
universe@41 | 195 | |
universe@66 | 196 | filelist_t *filelist = buildFileList(scanner, settings); |
universe@41 | 197 | |
universe@41 | 198 | while (filelist != NULL) { |
universe@41 | 199 | |
universe@41 | 200 | /* Scan subdirectories */ |
universe@42 | 201 | if (!S_ISREG(filelist->st_mode)) { |
universe@44 | 202 | if (settings->recursive && S_ISDIR(filelist->st_mode)) { |
universe@44 | 203 | string_list_t *recoutput = new_string_list_t(); |
universe@60 | 204 | scanresult_t recresult; |
universe@61 | 205 | recresult.ext = result->ext; |
universe@60 | 206 | scanDirectory( |
universe@44 | 207 | (scanner_t) {filelist->filename, scanner.spaces+1}, |
universe@60 | 208 | settings, recoutput, &recresult); |
universe@66 | 209 | result->result += recresult.result; |
universe@44 | 210 | if (!settings->matchesOnly || recoutput->count > 0) { |
universe@44 | 211 | outbuf = (char*) malloc(81); |
universe@66 | 212 | snprintf(outbuf, 81, "%*s/%*s%13u %s\n", |
universe@44 | 213 | filelist->displayname_len+scanner.spaces, filelist->displayname, |
universe@60 | 214 | 60-filelist->displayname_len-scanner.spaces-1, "", |
universe@66 | 215 | recresult.result, result_type); |
universe@44 | 216 | add_string(output, outbuf); |
universe@66 | 217 | for (unsigned i = 0 ; i < recoutput->count ; i++) { |
universe@44 | 218 | add_string(output, recoutput->items[i]); |
universe@44 | 219 | } |
universe@44 | 220 | } |
universe@44 | 221 | destroy_string_list_t(recoutput); |
universe@44 | 222 | } else { |
universe@44 | 223 | outbuf = (char*) malloc(81); |
universe@66 | 224 | snprintf(outbuf, 81, "%*s\n", |
universe@66 | 225 | filelist->displayname_len+scanner.spaces, |
universe@66 | 226 | filelist->displayname); |
universe@44 | 227 | add_string(output, outbuf); |
universe@41 | 228 | } |
universe@41 | 229 | } else { |
universe@30 | 230 | if ((settings->includeSuffixes->count == 0 |
universe@41 | 231 | || testSuffix(filelist->displayname, settings->includeSuffixes)) |
universe@41 | 232 | && !testSuffix(filelist->displayname, settings->excludeSuffixes)) { |
universe@41 | 233 | |
universe@66 | 234 | /* Count */ |
universe@66 | 235 | unsigned res_value = 0; |
universe@25 | 236 | bfile = false; |
universe@25 | 237 | bfile_reset(settings->bfileHeuristics); |
universe@54 | 238 | regex_parser_reset(settings->regex); |
universe@66 | 239 | char line_buffer[MAX_LINELENGTH]; |
universe@66 | 240 | unsigned line_buffer_pos = 0; |
universe@25 | 241 | |
universe@41 | 242 | FILE *file = fopen(filelist->filename, "r"); |
universe@3 | 243 | if (file == NULL) { |
universe@44 | 244 | outbuf = (char*) malloc(81); |
universe@66 | 245 | snprintf(outbuf, 81, "%*s", |
universe@66 | 246 | filelist->displayname_len+scanner.spaces, |
universe@66 | 247 | filelist->displayname); |
universe@44 | 248 | add_string(output, outbuf); |
universe@3 | 249 | perror(" File acces failed"); |
universe@41 | 250 | } else { |
universe@66 | 251 | int a; |
universe@41 | 252 | do { |
universe@41 | 253 | a = fgetc(file); |
universe@3 | 254 | |
universe@41 | 255 | bfile = bfile_check(settings->bfileHeuristics, a); |
universe@3 | 256 | |
universe@66 | 257 | /* ignore carriage return completely */ |
universe@66 | 258 | if (a == 13) continue; |
universe@66 | 259 | |
universe@41 | 260 | if (a == 10 || a == EOF) { |
universe@66 | 261 | line_buffer[line_buffer_pos] = 0; |
universe@41 | 262 | if (regex_parser_do(settings->regex, line_buffer) == 0) { |
universe@66 | 263 | /* Subtract excluded lines/chars when matching has finished */ |
universe@41 | 264 | if (!regex_parser_matching(settings->regex)) { |
universe@66 | 265 | res_value -= settings->regex->matched_counted; |
universe@41 | 266 | } |
universe@41 | 267 | } |
universe@21 | 268 | |
universe@66 | 269 | if (settings->count_chars) { |
universe@66 | 270 | for (size_t i = 0 ; i < line_buffer_pos ; i++) { |
universe@66 | 271 | if (!isspace(line_buffer[i])) res_value++; |
universe@66 | 272 | } |
universe@66 | 273 | } else { |
universe@66 | 274 | res_value++; |
universe@66 | 275 | } |
universe@66 | 276 | line_buffer_pos = 0; |
universe@41 | 277 | } else { |
universe@66 | 278 | if (line_buffer_pos < MAX_LINELENGTH) { |
universe@66 | 279 | line_buffer[line_buffer_pos] = (char) a; |
universe@66 | 280 | line_buffer_pos++; |
universe@41 | 281 | } else { |
universe@66 | 282 | line_buffer[line_buffer_pos - 1] = 0; |
universe@41 | 283 | settings->confusing_lnlen = true; |
universe@28 | 284 | } |
universe@28 | 285 | } |
universe@41 | 286 | } while (!bfile && a != EOF); |
universe@41 | 287 | fclose(file); |
universe@25 | 288 | |
universe@41 | 289 | /* Print and sum line count */ |
universe@41 | 290 | if (bfile) { |
universe@41 | 291 | if (!settings->matchesOnly) { |
universe@44 | 292 | outbuf = (char*) malloc(81); |
universe@44 | 293 | snprintf(outbuf, 81, |
universe@44 | 294 | "%*s%*s%19s\n", filelist->displayname_len+scanner.spaces, |
universe@41 | 295 | filelist->displayname, |
universe@41 | 296 | 60-filelist->displayname_len-scanner.spaces, "", "binary"); |
universe@44 | 297 | add_string(output, outbuf); |
universe@41 | 298 | } |
universe@25 | 299 | } else { |
universe@66 | 300 | addResultPerExtension(result->ext, filelist->ext, res_value); |
universe@66 | 301 | result->result += res_value; |
universe@44 | 302 | outbuf = (char*) malloc(81); |
universe@66 | 303 | snprintf(outbuf, 81, "%*s%*s%13u %s\n", |
universe@66 | 304 | filelist->displayname_len+scanner.spaces, |
universe@66 | 305 | filelist->displayname, |
universe@66 | 306 | 60-filelist->displayname_len-scanner.spaces, |
universe@66 | 307 | "", |
universe@66 | 308 | res_value, |
universe@66 | 309 | result_type |
universe@66 | 310 | ); |
universe@44 | 311 | add_string(output, outbuf); |
universe@3 | 312 | } |
universe@21 | 313 | } |
universe@16 | 314 | } else { |
universe@3 | 315 | if (!settings->matchesOnly) { |
universe@22 | 316 | /* Print hint */ |
universe@44 | 317 | outbuf = (char*) malloc(81); |
universe@44 | 318 | snprintf(outbuf, 81, "%*s%*s%19s\n", |
universe@41 | 319 | filelist->displayname_len+scanner.spaces, filelist->displayname, |
universe@41 | 320 | 60-filelist->displayname_len-scanner.spaces, "", "no match"); |
universe@44 | 321 | add_string(output, outbuf); |
universe@3 | 322 | } |
universe@3 | 323 | } |
universe@3 | 324 | } |
universe@41 | 325 | |
universe@41 | 326 | free(filelist->filename); |
universe@41 | 327 | free(filelist->displayname); |
universe@41 | 328 | filelist_t *freethis = filelist; |
universe@41 | 329 | filelist = filelist->next; |
universe@41 | 330 | free(freethis); |
universe@3 | 331 | } |
universe@3 | 332 | } |