Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * Copyright 2018 Mike Becker. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
28 #include "scanner.h"
29 #include "bfile_heuristics.h"
30 #include "regex_parser.h"
31 #include <sys/stat.h>
32 #include <ctype.h>
34 typedef struct filelist filelist_t;
36 struct filelist {
37 char *displayname;
38 unsigned displayname_len;
39 char *filename;
40 char *ext;
41 unsigned st_mode;
42 filelist_t *next;
43 };
45 static bool testSuffix(char* filename, string_list_t* list) {
46 bool ret = false;
47 size_t tokenlen, fnamelen = strlen(filename);
48 for (size_t t = 0 ; t < list->count ; t++) {
49 tokenlen = strlen(list->items[t]);
50 if (fnamelen >= tokenlen && tokenlen > 0) {
51 if (strncmp(filename+fnamelen-tokenlen,
52 list->items[t], tokenlen) == 0) {
53 ret = true;
54 break;
55 }
56 }
57 }
58 return ret;
59 }
61 static void addResultPerExtension(scanresult_ext_t* result,
62 char* ext, unsigned value) {
63 if (!result) return;
65 if (!ext) ext = "w/o";
67 for (unsigned i = 0 ; i < result->count ; i++) {
68 if (strcasecmp(result->extensions[i], ext) == 0) {
69 result->result[i] += value;
70 return;
71 }
72 }
74 if (result->count == result->capacity) {
75 unsigned newcap = result->capacity+8;
76 char** extarr = realloc(result->extensions, newcap*sizeof(char*));
77 unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned));
78 if (!extarr || !resultarr) {
79 fprintf(stderr, "Memory allocation error.\n");
80 abort();
81 }
82 result->extensions = extarr;
83 result->result = resultarr;
84 result->capacity = newcap;
85 }
87 result->extensions[result->count] = strdup(ext);
88 result->result[result->count] = value;
89 result->count++;
90 }
92 scanresult_t* new_scanresult_t(settings_t* settings) {
93 scanresult_t* result = calloc(1, sizeof(scanresult_t));
94 if (settings->individual_sums) {
95 result->ext = calloc(1, sizeof(scanresult_ext_t));
96 }
97 return result;
98 }
100 void destroy_scanresult_t(scanresult_t* result) {
101 if (result->ext) {
102 if (result->ext->count > 0) {
103 for (unsigned i = 0 ; i < result->ext->count ; i++) {
104 free(result->ext->extensions[i]);
105 }
106 free(result->ext->extensions);
107 free(result->ext->result);
108 }
109 free(result->ext);
110 }
111 free(result);
112 }
114 static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) {
116 filelist_t* list = NULL;
117 DIR *dirf;
118 struct dirent *entry;
119 struct stat statbuf;
121 if ((dirf = opendir(scanner.dir)) == NULL) {
122 fprintf(stderr, "%s - ", scanner.dir);
123 perror("Directory access failed");
124 return 0;
125 }
127 while ((entry = readdir(dirf)) != NULL) {
128 if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
130 /* Create new filelist entry */
131 filelist_t *newentry = (filelist_t*) malloc(sizeof(filelist_t));
132 newentry->next = NULL;
134 newentry->displayname_len = strlen(entry->d_name);
135 newentry->displayname = (char*) malloc(newentry->displayname_len+1);
136 memcpy(newentry->displayname, entry->d_name, newentry->displayname_len);
137 newentry->displayname[newentry->displayname_len] = 0;
139 newentry->st_mode = 0;
141 /* Construct absolute pathname string */
142 size_t dirnamelen = strlen(scanner.dir);
143 char *filename = (char*) malloc(2+dirnamelen+newentry->displayname_len);
144 memcpy(filename, scanner.dir, dirnamelen);
145 filename[dirnamelen] = settings->fileSeparator;
146 memcpy(filename+dirnamelen+1, entry->d_name, newentry->displayname_len);
147 filename[1+dirnamelen+newentry->displayname_len] = 0;
148 newentry->filename = filename;
150 /* Obtain file extension */
151 newentry->ext = strrchr(newentry->displayname, '.');
153 /* Check for subdirectory */
154 if (stat(filename, &statbuf) == 0) {
155 newentry->st_mode = statbuf.st_mode;
156 } else {
157 perror(" Error in stat call");
158 continue;
159 }
161 if (list) {
162 /* create fake root to have a pointer on the true root */
163 filelist_t root;
164 root.next = list;
165 filelist_t *parent = &root;
166 while (parent->next &&
167 (strcasecmp(parent->next->displayname, newentry->displayname) < 0 ||
168 (!S_ISDIR(newentry->st_mode) && S_ISDIR(parent->next->st_mode))
169 ) &&
170 (!S_ISDIR(newentry->st_mode) || S_ISDIR(parent->next->st_mode))
171 ) {
172 parent = parent->next;
173 }
174 newentry->next = parent->next;
175 parent->next = newentry;
176 list = root.next;
177 } else {
178 list = newentry;
179 }
180 }
181 }
183 closedir(dirf);
185 return list;
186 }
188 void scanDirectory(scanner_t scanner, settings_t* settings,
189 string_list_t* output, scanresult_t* result) {
191 result->result = 0;
192 bool bfile;
193 char *outbuf;
194 const char *result_type = settings->count_chars ? "chars" : "lines";
196 filelist_t *filelist = buildFileList(scanner, settings);
198 while (filelist != NULL) {
200 /* Scan subdirectories */
201 if (!S_ISREG(filelist->st_mode)) {
202 if (settings->recursive && S_ISDIR(filelist->st_mode)) {
203 string_list_t *recoutput = new_string_list_t();
204 scanresult_t recresult;
205 recresult.ext = result->ext;
206 scanDirectory(
207 (scanner_t) {filelist->filename, scanner.spaces+1},
208 settings, recoutput, &recresult);
209 result->result += recresult.result;
210 if (!settings->matchesOnly || recoutput->count > 0) {
211 outbuf = (char*) malloc(81);
212 snprintf(outbuf, 81, "%*s/%*s%13u %s\n",
213 filelist->displayname_len+scanner.spaces, filelist->displayname,
214 60-filelist->displayname_len-scanner.spaces-1, "",
215 recresult.result, result_type);
216 add_string(output, outbuf);
217 for (unsigned i = 0 ; i < recoutput->count ; i++) {
218 add_string(output, recoutput->items[i]);
219 }
220 }
221 destroy_string_list_t(recoutput);
222 } else {
223 outbuf = (char*) malloc(81);
224 snprintf(outbuf, 81, "%*s\n",
225 filelist->displayname_len+scanner.spaces,
226 filelist->displayname);
227 add_string(output, outbuf);
228 }
229 } else {
230 if ((settings->includeSuffixes->count == 0
231 || testSuffix(filelist->displayname, settings->includeSuffixes))
232 && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {
234 /* Count */
235 unsigned res_value = 0;
236 bfile = false;
237 bfile_reset(settings->bfileHeuristics);
238 regex_parser_reset(settings->regex);
239 char line_buffer[MAX_LINELENGTH];
240 unsigned line_buffer_pos = 0;
242 FILE *file = fopen(filelist->filename, "r");
243 if (file == NULL) {
244 outbuf = (char*) malloc(81);
245 snprintf(outbuf, 81, "%*s",
246 filelist->displayname_len+scanner.spaces,
247 filelist->displayname);
248 add_string(output, outbuf);
249 perror(" File acces failed");
250 } else {
251 int a;
252 do {
253 a = fgetc(file);
255 bfile = bfile_check(settings->bfileHeuristics, a);
257 /* ignore carriage return completely */
258 if (a == 13) continue;
260 if (a == 10 || a == EOF) {
261 line_buffer[line_buffer_pos] = 0;
262 if (regex_parser_do(settings->regex, line_buffer) == 0) {
263 /* Subtract excluded lines/chars when matching has finished */
264 if (!regex_parser_matching(settings->regex)) {
265 res_value -= settings->regex->matched_counted;
266 }
267 }
269 if (settings->count_chars) {
270 for (size_t i = 0 ; i < line_buffer_pos ; i++) {
271 if (!isspace(line_buffer[i])) res_value++;
272 }
273 } else {
274 res_value++;
275 }
276 line_buffer_pos = 0;
277 } else {
278 if (line_buffer_pos < MAX_LINELENGTH) {
279 line_buffer[line_buffer_pos] = (char) a;
280 line_buffer_pos++;
281 } else {
282 line_buffer[line_buffer_pos - 1] = 0;
283 settings->confusing_lnlen = true;
284 }
285 }
286 } while (!bfile && a != EOF);
287 fclose(file);
289 /* Print and sum line count */
290 if (bfile) {
291 if (!settings->matchesOnly) {
292 outbuf = (char*) malloc(81);
293 snprintf(outbuf, 81,
294 "%*s%*s%19s\n", filelist->displayname_len+scanner.spaces,
295 filelist->displayname,
296 60-filelist->displayname_len-scanner.spaces, "", "binary");
297 add_string(output, outbuf);
298 }
299 } else {
300 addResultPerExtension(result->ext, filelist->ext, res_value);
301 result->result += res_value;
302 outbuf = (char*) malloc(81);
303 snprintf(outbuf, 81, "%*s%*s%13u %s\n",
304 filelist->displayname_len+scanner.spaces,
305 filelist->displayname,
306 60-filelist->displayname_len-scanner.spaces,
307 "",
308 res_value,
309 result_type
310 );
311 add_string(output, outbuf);
312 }
313 }
314 } else {
315 if (!settings->matchesOnly) {
316 /* Print hint */
317 outbuf = (char*) malloc(81);
318 snprintf(outbuf, 81, "%*s%*s%19s\n",
319 filelist->displayname_len+scanner.spaces, filelist->displayname,
320 60-filelist->displayname_len-scanner.spaces, "", "no match");
321 add_string(output, outbuf);
322 }
323 }
324 }
326 free(filelist->filename);
327 free(filelist->displayname);
328 filelist_t *freethis = filelist;
329 filelist = filelist->next;
330 free(freethis);
331 }
332 }