Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
universe@10 | 1 | /* |
universe@34 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@57 | 3 | * Copyright 2018 Mike Becker. All rights reserved. |
universe@34 | 4 | * |
universe@34 | 5 | * Redistribution and use in source and binary forms, with or without |
universe@34 | 6 | * modification, are permitted provided that the following conditions are met: |
universe@34 | 7 | * |
universe@34 | 8 | * 1. Redistributions of source code must retain the above copyright |
universe@34 | 9 | * notice, this list of conditions and the following disclaimer. |
universe@34 | 10 | * |
universe@34 | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@34 | 12 | * notice, this list of conditions and the following disclaimer in the |
universe@34 | 13 | * documentation and/or other materials provided with the distribution. |
universe@34 | 14 | * |
universe@34 | 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@34 | 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@34 | 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
universe@34 | 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
universe@34 | 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
universe@34 | 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
universe@34 | 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
universe@34 | 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
universe@34 | 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
universe@57 | 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
universe@10 | 25 | */ |
universe@10 | 26 | |
universe@3 | 27 | #include "cline.h" |
universe@10 | 28 | #include "scanner.h" |
universe@10 | 29 | #include "settings.h" |
universe@12 | 30 | #include "arguments.h" |
universe@27 | 31 | #include "regex_parser.h" |
universe@0 | 32 | |
universe@12 | 33 | void printHelpText() { |
universe@34 | 34 | printf( |
universe@1 | 35 | "\nUsage:" |
universe@33 | 36 | "\n cline [Options] [Directories...]" |
universe@0 | 37 | "\n\nCounts the line terminator characters (\\n) within all" |
universe@33 | 38 | " files in the specified\ndirectories." |
universe@0 | 39 | "\n\nOptions:" |
universe@21 | 40 | "\n -b <level> - binary file heuristics level (default medium)" |
universe@21 | 41 | "\n One of: ignore low medium high" |
universe@66 | 42 | "\n -c - Count non-whitespace characters instead of lines" |
universe@28 | 43 | "\n -E <pattern> - Excludes any line matching the <pattern>" |
universe@27 | 44 | "\n -e <start> <end> - Excludes lines between <start> and <end>" |
universe@28 | 45 | "\n You may use these options multiple times" |
universe@0 | 46 | "\n -h, --help - this help text" |
universe@61 | 47 | "\n -i - print out individual sums per file extension" |
universe@61 | 48 | "\n (cannot be used together with -V)" |
universe@1 | 49 | "\n -m - print information about matching files only" |
universe@1 | 50 | "\n -s <suffixes> - only count files with these suffixes (separated" |
universe@0 | 51 | "\n by commas)" |
universe@1 | 52 | "\n -S <suffixes> - count any file except those with these suffixes" |
universe@0 | 53 | "\n (separated by commas)" |
universe@1 | 54 | "\n -r, -R - includes subdirectories" |
universe@14 | 55 | "\n -v, --version - print out version information" |
universe@16 | 56 | "\n -V - turn verbose output off, print the result only" |
universe@31 | 57 | "\n\nShortcuts:" |
universe@57 | 58 | "\n --exclude-cstyle-comments : -E '\\s*//' -e '\\s*/\\*' '\\*/\\s*'" |
universe@57 | 59 | "\n --exclude-blank-lines : -E '^\\s*$'" |
universe@0 | 60 | "\n\n" |
universe@1 | 61 | "The default call without any options is:" |
universe@28 | 62 | "\n cline ./\n\n" |
universe@7 | 63 | "So each file in the working directory is counted. If you want to count C" |
universe@7 | 64 | "\nsource code in your working directory and its subdirectories, type:" |
universe@27 | 65 | "\n cline -rs .c\n" |
universe@28 | 66 | "\nIf you want to exclude comment lines, you may use the -e/-E option." |
universe@66 | 67 | "\nAfter a line matches the regex pattern <start>, this and any following" |
universe@66 | 68 | "\nline is not counted unless a line matches the <end> pattern. A line is" |
universe@66 | 69 | "\nstill counted when it does not start or end with the respective pattern." |
universe@66 | 70 | "\nPlease note, that cline does not trim the lines before matching against" |
universe@66 | 71 | "\nthe pattern." |
universe@31 | 72 | "\n\nExample (C without comments):" |
universe@36 | 73 | "\n cline -s .c,.h --exclude-cstyle-comments" |
universe@36 | 74 | "\n"); |
universe@1 | 75 | } |
universe@1 | 76 | |
universe@14 | 77 | int exit_with_version(settings_t* settings) { |
universe@48 | 78 | printf("cline - Version: " VERSION "\n"); |
universe@14 | 79 | destroy_settings_t(settings); |
universe@14 | 80 | return 0; |
universe@14 | 81 | } |
universe@14 | 82 | |
universe@12 | 83 | int exit_with_help(settings_t* settings, int code) { |
universe@50 | 84 | printf("cline - Version: " VERSION "\n"); |
universe@12 | 85 | printHelpText(); |
universe@8 | 86 | destroy_settings_t(settings); |
universe@8 | 87 | return code; |
universe@8 | 88 | } |
universe@8 | 89 | |
universe@1 | 90 | int main(int argc, char** argv) { |
universe@0 | 91 | |
universe@22 | 92 | /* Settings */ |
universe@3 | 93 | settings_t *settings = new_settings_t(); |
universe@5 | 94 | if (settings == NULL) { |
universe@5 | 95 | fprintf(stderr, "Memory allocation failed.\n"); |
universe@5 | 96 | return 1; |
universe@5 | 97 | } |
universe@3 | 98 | |
universe@22 | 99 | /* Get arguments */ |
universe@33 | 100 | string_list_t *directories = new_string_list_t(); |
universe@33 | 101 | if (directories == NULL) { |
universe@33 | 102 | fprintf(stderr, "Memory allocation failed.\n"); |
universe@33 | 103 | return 1; |
universe@33 | 104 | } |
universe@30 | 105 | char* includeSuffix = NULL; |
universe@30 | 106 | char* excludeSuffix = NULL; |
universe@8 | 107 | int checked = 0; |
universe@0 | 108 | |
universe@1 | 109 | for (int t = 1 ; t < argc ; t++) { |
universe@1 | 110 | |
universe@66 | 111 | int argflags = checkArgument(argv[t], "hsSrRmvVbeEic"); |
universe@30 | 112 | int paropt = 0; |
universe@1 | 113 | |
universe@59 | 114 | /* h */ |
universe@59 | 115 | if ((argflags & 1) > 0 || strcmp(argv[t], "--help") == 0) { |
universe@59 | 116 | return exit_with_help(settings, 0); |
universe@59 | 117 | } |
universe@30 | 118 | /* s */ |
universe@30 | 119 | if ((argflags & 2) > 0) { |
universe@30 | 120 | if (!checkParamOpt(&paropt) || registerArgument(&checked, 2)) { |
universe@12 | 121 | return exit_with_help(settings, 1); |
universe@0 | 122 | } |
universe@1 | 123 | t++; |
universe@1 | 124 | if (t >= argc) { |
universe@12 | 125 | return exit_with_help(settings, 1); |
universe@1 | 126 | } |
universe@30 | 127 | includeSuffix = argv[t]; |
universe@30 | 128 | } |
universe@30 | 129 | /* S */ |
universe@30 | 130 | if ((argflags & 4) > 0) { |
universe@30 | 131 | if (!checkParamOpt(&paropt) || registerArgument(&checked, 4)) { |
universe@30 | 132 | return exit_with_help(settings, 1); |
universe@30 | 133 | } |
universe@30 | 134 | t++; |
universe@30 | 135 | if (t >= argc) { |
universe@30 | 136 | return exit_with_help(settings, 1); |
universe@30 | 137 | } |
universe@30 | 138 | excludeSuffix = argv[t]; |
universe@0 | 139 | } |
universe@22 | 140 | /* r, R */ |
universe@1 | 141 | if ((argflags & 24) > 0) { |
universe@8 | 142 | if (registerArgument(&checked, 24)) { |
universe@12 | 143 | return exit_with_help(settings, 1); |
universe@0 | 144 | } |
universe@3 | 145 | settings->recursive = true; |
universe@0 | 146 | } |
universe@22 | 147 | /* m */ |
universe@1 | 148 | if ((argflags & 32) > 0) { |
universe@8 | 149 | if (registerArgument(&checked, 32)) { |
universe@12 | 150 | return exit_with_help(settings, 1); |
universe@0 | 151 | } |
universe@3 | 152 | settings->matchesOnly = true; |
universe@0 | 153 | } |
universe@22 | 154 | /* v */ |
universe@14 | 155 | if ((argflags & 64) > 0 || strcmp(argv[t], "--version") == 0) { |
universe@14 | 156 | return exit_with_version(settings); |
universe@14 | 157 | } |
universe@22 | 158 | /* V */ |
universe@16 | 159 | if ((argflags & 128) > 0) { |
universe@16 | 160 | if (registerArgument(&checked, 128)) { |
universe@16 | 161 | return exit_with_help(settings, 1); |
universe@16 | 162 | } |
universe@16 | 163 | settings->verbose = false; |
universe@16 | 164 | } |
universe@22 | 165 | /* b */ |
universe@21 | 166 | if ((argflags & 256) > 0) { |
universe@30 | 167 | if (!checkParamOpt(&paropt) || registerArgument(&checked, 256)) { |
universe@21 | 168 | return exit_with_help(settings, 1); |
universe@21 | 169 | } |
universe@21 | 170 | t++; |
universe@21 | 171 | if (t >= argc) { |
universe@21 | 172 | return exit_with_help(settings, 1); |
universe@21 | 173 | } |
universe@24 | 174 | if (strcasecmp(argv[t], "ignore") == 0) { |
universe@21 | 175 | settings->bfileHeuristics->level = BFILE_IGNORE; |
universe@24 | 176 | } else if (strcasecmp(argv[t], "low") == 0) { |
universe@21 | 177 | settings->bfileHeuristics->level = BFILE_LOW_ACCURACY; |
universe@24 | 178 | } else if (strcasecmp(argv[t], "medium") == 0) { |
universe@21 | 179 | settings->bfileHeuristics->level = BFILE_MEDIUM_ACCURACY; |
universe@24 | 180 | } else if (strcasecmp(argv[t], "high") == 0) { |
universe@21 | 181 | settings->bfileHeuristics->level = BFILE_HIGH_ACCURACY; |
universe@21 | 182 | } else { |
universe@21 | 183 | return exit_with_help(settings, 1); |
universe@21 | 184 | } |
universe@21 | 185 | } |
universe@28 | 186 | /* e */ |
universe@27 | 187 | if ((argflags & 512) > 0) { |
universe@30 | 188 | if (!checkParamOpt(&paropt) || t + 2 >= argc) { |
universe@27 | 189 | return exit_with_help(settings, 1); |
universe@27 | 190 | } |
universe@27 | 191 | t++; add_string(settings->regex->pattern_list, argv[t]); |
universe@27 | 192 | t++; add_string(settings->regex->pattern_list, argv[t]); |
universe@27 | 193 | } |
universe@28 | 194 | /* E */ |
universe@28 | 195 | if ((argflags & 1024) > 0) { |
universe@28 | 196 | t++; |
universe@30 | 197 | if (!checkParamOpt(&paropt) || t >= argc) { |
universe@28 | 198 | return exit_with_help(settings, 1); |
universe@28 | 199 | } |
universe@28 | 200 | add_string(settings->regex->pattern_list, argv[t]); |
universe@28 | 201 | add_string(settings->regex->pattern_list, "$"); |
universe@28 | 202 | } |
universe@61 | 203 | /* i */ |
universe@60 | 204 | if ((argflags & 2048) > 0) { |
universe@66 | 205 | /* cannot be used together with -V */ |
universe@61 | 206 | if (registerArgument(&checked, 128)) { |
universe@61 | 207 | return exit_with_help(settings, 1); |
universe@61 | 208 | } |
universe@61 | 209 | settings->individual_sums = true; |
universe@60 | 210 | } |
universe@66 | 211 | if ((argflags & 4096) > 0) { |
universe@66 | 212 | if (registerArgument(&checked, 4096)) { |
universe@66 | 213 | return exit_with_help(settings, 1); |
universe@66 | 214 | } |
universe@66 | 215 | settings->count_chars = true; |
universe@66 | 216 | settings->regex->count_chars = true; |
universe@66 | 217 | } |
universe@1 | 218 | if (argflags == 0) { |
universe@31 | 219 | /* SHORTCUTS */ |
universe@31 | 220 | if (strcmp(argv[t], "--exclude-cstyle-comments") == 0) { |
universe@31 | 221 | add_string(settings->regex->pattern_list, "\\s*//"); |
universe@31 | 222 | add_string(settings->regex->pattern_list, "$"); |
universe@31 | 223 | add_string(settings->regex->pattern_list, "\\s*/\\*"); |
universe@31 | 224 | add_string(settings->regex->pattern_list, "\\*/\\s*"); |
universe@57 | 225 | } else if (strcmp(argv[t], "--exclude-blank-lines") == 0) { |
universe@57 | 226 | add_string(settings->regex->pattern_list, "^\\s*$"); |
universe@57 | 227 | add_string(settings->regex->pattern_list, "$"); |
universe@31 | 228 | } |
universe@31 | 229 | /* Path */ |
universe@33 | 230 | else { |
universe@33 | 231 | add_string(directories, argv[t]); |
universe@0 | 232 | } |
universe@0 | 233 | } |
universe@0 | 234 | } |
universe@0 | 235 | |
universe@22 | 236 | /* Find tokens */ |
universe@30 | 237 | parseCSL(includeSuffix, settings->includeSuffixes); |
universe@30 | 238 | parseCSL(excludeSuffix, settings->excludeSuffixes); |
universe@0 | 239 | |
universe@33 | 240 | /* Scan directories */ |
universe@28 | 241 | if (regex_compile_all(settings->regex)) { |
universe@61 | 242 | scanresult_t* result = new_scanresult_t(settings); |
universe@44 | 243 | /* Don't waste memory when only the total sum is needed */ |
universe@44 | 244 | string_list_t *output = settings->verbose ? new_string_list_t() : NULL; |
universe@44 | 245 | char *outbuf; |
universe@66 | 246 | const char* result_type = settings->count_chars ? "chars" : "lines"; |
universe@44 | 247 | |
universe@66 | 248 | unsigned total = 0; |
universe@33 | 249 | if (directories->count == 0) { |
universe@33 | 250 | add_string(directories, "."); |
universe@33 | 251 | } |
universe@66 | 252 | for (unsigned t = 0 ; t < directories->count ; t++) { |
universe@60 | 253 | scanDirectory((scanner_t){directories->items[t], 0}, settings, |
universe@61 | 254 | output, result); |
universe@66 | 255 | total += result->result; |
universe@44 | 256 | if (directories->count > 1 ) { |
universe@44 | 257 | outbuf = (char*) malloc(81); |
universe@44 | 258 | memset(outbuf, '-', 79); |
universe@44 | 259 | outbuf[79] = '\n'; |
universe@44 | 260 | outbuf[80] = 0; |
universe@44 | 261 | add_string(output, outbuf); |
universe@44 | 262 | outbuf = (char*) malloc(81); |
universe@66 | 263 | snprintf(outbuf, 81, "%-63s%10u %s\n", directories->items[t], |
universe@66 | 264 | result->result, result_type); |
universe@44 | 265 | add_string(output, outbuf); |
universe@44 | 266 | outbuf = (char*) malloc(81); |
universe@44 | 267 | memset(outbuf, '-', 79); |
universe@44 | 268 | outbuf[79] = '\n'; |
universe@44 | 269 | outbuf[80] = 0; |
universe@44 | 270 | add_string(output, outbuf); |
universe@33 | 271 | } |
universe@33 | 272 | } |
universe@33 | 273 | destroy_string_list_t(directories); |
universe@0 | 274 | |
universe@44 | 275 | /* Print result */ |
universe@44 | 276 | if (settings->verbose) { |
universe@44 | 277 | for (int i = 0 ; i < output->count ; i++) { |
universe@44 | 278 | printf("%s", output->items[i]); |
universe@44 | 279 | free(output->items[i]); |
universe@44 | 280 | } |
universe@44 | 281 | |
universe@61 | 282 | if (result->ext) { |
universe@61 | 283 | if (result->ext->count > 0) { |
universe@66 | 284 | for (unsigned t = 0 ; t < 79 ; t++) { |
universe@61 | 285 | printf("="); |
universe@61 | 286 | } |
universe@61 | 287 | printf("\nIndividual sums:\n"); |
universe@66 | 288 | for (unsigned t = 0 ; t < result->ext->count ; t++) { |
universe@66 | 289 | printf(" %-62s%10u %s\n", |
universe@66 | 290 | result->ext->extensions[t], |
universe@66 | 291 | result->ext->result[t], |
universe@66 | 292 | result_type); |
universe@61 | 293 | } |
universe@61 | 294 | } |
universe@61 | 295 | } |
universe@61 | 296 | |
universe@66 | 297 | for (unsigned t = 0 ; t < 79 ; t++) { |
universe@44 | 298 | printf("="); |
universe@44 | 299 | } |
universe@66 | 300 | printf("\n%73d %s\n", total, result_type); |
universe@44 | 301 | |
universe@44 | 302 | if (settings->confusing_lnlen && |
universe@44 | 303 | settings->regex->pattern_list->count > 0) { |
universe@44 | 304 | |
universe@44 | 305 | printf("\nSome files contain too long lines.\n" |
universe@66 | 306 | "The parser currently supports a maximum line length of %u." |
universe@66 | 307 | "\nThe result might be wrong.\n", MAX_LINELENGTH); |
universe@44 | 308 | } |
universe@44 | 309 | } else { |
universe@66 | 310 | printf("%u", total); |
universe@28 | 311 | } |
universe@61 | 312 | destroy_scanresult_t(result); |
universe@44 | 313 | destroy_string_list_t(output); |
universe@33 | 314 | destroy_settings_t(settings); |
universe@16 | 315 | } |
universe@16 | 316 | |
universe@16 | 317 | fflush(stdout); |
universe@28 | 318 | fflush(stderr); |
universe@0 | 319 | return 0; |
universe@0 | 320 | } |