new feature: count non-whitespace characters v1.4

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 65
49fa681f3a7e
child 67
767fe7a007f7

new feature: count non-whitespace characters

prebuild.sh file | annotate | diff | comparison | revisions
src/arguments.c file | annotate | diff | comparison | revisions
src/cline.c file | annotate | diff | comparison | revisions
src/regex_parser.c file | annotate | diff | comparison | revisions
src/regex_parser.h file | annotate | diff | comparison | revisions
src/scanner.c file | annotate | diff | comparison | revisions
src/scanner.h file | annotate | diff | comparison | revisions
src/settings.c file | annotate | diff | comparison | revisions
src/settings.h file | annotate | diff | comparison | revisions
src/string_list.h file | annotate | diff | comparison | revisions
test/Makefile.am file | annotate | diff | comparison | revisions
test/codeonly.sh file | annotate | diff | comparison | revisions
test/codeonly_chars.sh file | annotate | diff | comparison | revisions
test/nocomments.sh file | annotate | diff | comparison | revisions
test/testfile.c file | annotate | diff | comparison | revisions
test/total.sh file | annotate | diff | comparison | revisions
test/total_chars.sh file | annotate | diff | comparison | revisions
     1.1 --- a/src/arguments.c	Fri Jun 03 18:13:46 2022 +0200
     1.2 +++ b/src/arguments.c	Fri Jun 03 20:05:15 2022 +0200
     1.3 @@ -27,12 +27,12 @@
     1.4  #include "arguments.h"
     1.5  
     1.6  int checkArgument(const char* arg, const char* expected) {
     1.7 -  int len = strlen(expected);
     1.8 +  size_t len = strlen(expected);
     1.9    int ret = 0;
    1.10  
    1.11    if (arg[0] == '-') {
    1.12      if (arg[1] != '-') {
    1.13 -      for (int t = 0 ; t < len ; t++) {
    1.14 +      for (size_t t = 0 ; t < len ; t++) {
    1.15          ret |= (strchr(arg, expected[t])?1:0) << t;
    1.16        }
    1.17      }
     2.1 --- a/src/cline.c	Fri Jun 03 18:13:46 2022 +0200
     2.2 +++ b/src/cline.c	Fri Jun 03 20:05:15 2022 +0200
     2.3 @@ -39,6 +39,7 @@
     2.4      "\n\nOptions:"
     2.5      "\n  -b <level>          - binary file heuristics level (default medium)"
     2.6      "\n                        One of: ignore low medium high"
     2.7 +    "\n  -c                  - Count non-whitespace characters instead of lines"
     2.8      "\n  -E <pattern>        - Excludes any line matching the <pattern>"
     2.9      "\n  -e <start> <end>    - Excludes lines between <start> and <end>"
    2.10      "\n                        You may use these options multiple times"
    2.11 @@ -63,11 +64,11 @@
    2.12      "\nsource code in your working directory and its subdirectories, type:"
    2.13      "\n  cline -rs .c\n"
    2.14      "\nIf you want to exclude comment lines, you may use the -e/-E option."
    2.15 -    "\nAfter a line matches the regex pattern <start> any following line is"
    2.16 -    "\nnot counted unless a line matches the <end> pattern. A line is still "
    2.17 -    "\ncounted when it does not start or end with the respective patterns."
    2.18 -    "\nPlease note, that cline does not remove whitespace characters as this"
    2.19 -    "\nmight not be reasonable in some cases."
    2.20 +    "\nAfter a line matches the regex pattern <start>, this and any following"
    2.21 +    "\nline is not counted unless a line matches the <end> pattern. A line is"
    2.22 +    "\nstill counted when it does not start or end with the respective pattern."
    2.23 +    "\nPlease note, that cline does not trim the lines before matching against"
    2.24 +    "\nthe pattern."
    2.25      "\n\nExample (C without comments):"
    2.26      "\n  cline -s .c,.h --exclude-cstyle-comments"
    2.27      "\n");
    2.28 @@ -107,7 +108,7 @@
    2.29  
    2.30    for (int t = 1 ; t < argc ; t++) {
    2.31  
    2.32 -    int argflags = checkArgument(argv[t], "hsSrRmvVbeEi");
    2.33 +    int argflags = checkArgument(argv[t], "hsSrRmvVbeEic");
    2.34      int paropt = 0;
    2.35  
    2.36      /* h */
    2.37 @@ -201,12 +202,19 @@
    2.38      }
    2.39      /* i */
    2.40      if ((argflags & 2048) > 0) {
    2.41 -      // cannot be used together with -V
    2.42 +      /* cannot be used together with -V */
    2.43        if (registerArgument(&checked, 128)) {
    2.44          return exit_with_help(settings, 1);
    2.45        }
    2.46        settings->individual_sums = true;
    2.47      }
    2.48 +    if ((argflags & 4096) > 0) {
    2.49 +        if (registerArgument(&checked, 4096)) {
    2.50 +            return exit_with_help(settings, 1);
    2.51 +        }
    2.52 +        settings->count_chars = true;
    2.53 +        settings->regex->count_chars = true;
    2.54 +    }
    2.55      if (argflags == 0) {
    2.56        /* SHORTCUTS */
    2.57        if (strcmp(argv[t], "--exclude-cstyle-comments") == 0) {
    2.58 @@ -235,15 +243,16 @@
    2.59      /* Don't waste memory when only the total sum is needed */
    2.60      string_list_t *output = settings->verbose ? new_string_list_t() : NULL;
    2.61      char *outbuf;
    2.62 +    const char* result_type = settings->count_chars ? "chars" : "lines";
    2.63      
    2.64 -    int total = 0;
    2.65 +    unsigned total = 0;
    2.66      if (directories->count == 0) {
    2.67          add_string(directories, ".");
    2.68      }
    2.69 -    for (int t = 0 ; t < directories->count ; t++) {
    2.70 +    for (unsigned t = 0 ; t < directories->count ; t++) {
    2.71        scanDirectory((scanner_t){directories->items[t], 0}, settings,
    2.72            output, result);
    2.73 -      total += result->lines;
    2.74 +      total += result->result;
    2.75        if (directories->count > 1 ) {
    2.76          outbuf = (char*) malloc(81);
    2.77          memset(outbuf, '-', 79);
    2.78 @@ -251,8 +260,8 @@
    2.79          outbuf[80] = 0;
    2.80          add_string(output, outbuf);
    2.81          outbuf = (char*) malloc(81);
    2.82 -        snprintf(outbuf, 81, "%-63s%10d lines\n", directories->items[t],
    2.83 -                result->lines);
    2.84 +        snprintf(outbuf, 81, "%-63s%10u %s\n", directories->items[t],
    2.85 +                result->result, result_type);
    2.86          add_string(output, outbuf);
    2.87          outbuf = (char*) malloc(81);
    2.88          memset(outbuf, '-', 79);
    2.89 @@ -272,32 +281,33 @@
    2.90        
    2.91        if (result->ext) {
    2.92          if (result->ext->count > 0) {
    2.93 -          for (int t = 0 ; t < 79 ; t++) {
    2.94 +          for (unsigned t = 0 ; t < 79 ; t++) {
    2.95              printf("=");
    2.96            }
    2.97            printf("\nIndividual sums:\n");
    2.98 -          for (int t = 0 ; t < result->ext->count ; t++) {
    2.99 -            printf(" %-62s%10d lines\n",
   2.100 -                    result->ext->extensions[t],
   2.101 -                    result->ext->lines[t]);
   2.102 +          for (unsigned t = 0 ; t < result->ext->count ; t++) {
   2.103 +            printf(" %-62s%10u %s\n",
   2.104 +                   result->ext->extensions[t],
   2.105 +                   result->ext->result[t],
   2.106 +                   result_type);
   2.107            }
   2.108          }
   2.109        }
   2.110        
   2.111 -      for (int t = 0 ; t < 79 ; t++) {
   2.112 +      for (unsigned t = 0 ; t < 79 ; t++) {
   2.113          printf("=");
   2.114        }
   2.115 -      printf("\n%73d lines\n", total);
   2.116 +      printf("\n%73d %s\n", total, result_type);
   2.117  
   2.118        if (settings->confusing_lnlen &&
   2.119            settings->regex->pattern_list->count > 0) {
   2.120  
   2.121          printf("\nSome files contain too long lines.\n"
   2.122 -          "The regex parser currently supports a maximum line length of %d."
   2.123 -          "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH);
   2.124 +          "The parser currently supports a maximum line length of %u."
   2.125 +          "\nThe result might be wrong.\n", MAX_LINELENGTH);
   2.126        }
   2.127      } else {
   2.128 -      printf("%d", total);
   2.129 +      printf("%u", total);
   2.130      }
   2.131      destroy_scanresult_t(result);
   2.132      destroy_string_list_t(output);
     3.1 --- a/src/regex_parser.c	Fri Jun 03 18:13:46 2022 +0200
     3.2 +++ b/src/regex_parser.c	Fri Jun 03 20:05:15 2022 +0200
     3.3 @@ -25,26 +25,28 @@
     3.4   */
     3.5  
     3.6  #include "regex_parser.h"
     3.7 +#include <ctype.h>
     3.8  
     3.9  regex_parser_t* new_regex_parser_t() {
    3.10    regex_parser_t* ret = malloc(sizeof(regex_parser_t));
    3.11    if (ret != NULL) {
    3.12      ret->pattern_list = new_string_list_t();
    3.13 -    ret->matched_lines = 0;
    3.14 +    ret->matched_counted = 0;
    3.15      ret->pattern_match = 0;
    3.16      ret->compiled_patterns = NULL;
    3.17      ret->compiled_pattern_count = 0;
    3.18 +    ret->count_chars = false;
    3.19    }
    3.20    return ret;
    3.21  }
    3.22  
    3.23  void regex_parser_reset(regex_parser_t* parser) {
    3.24 -  parser->pattern_match = parser->matched_lines = 0;
    3.25 +  parser->pattern_match = parser->matched_counted = 0;
    3.26  }
    3.27  
    3.28  void regex_destcomppats(regex_parser_t* parser) {
    3.29    if (parser->compiled_patterns != NULL) {
    3.30 -    for (int i = 0 ; i < parser->compiled_pattern_count ; i++) {
    3.31 +    for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
    3.32        if (parser->compiled_patterns[i] != NULL) {
    3.33          free(parser->compiled_patterns[i]);
    3.34        }
    3.35 @@ -65,13 +67,27 @@
    3.36    return parser->pattern_match > 0;
    3.37  }
    3.38  
    3.39 +static unsigned regex_parser_count_chars(const char* input,
    3.40 +                                         unsigned start, unsigned end) {
    3.41 +  unsigned ret = 0;
    3.42 +  for (unsigned i = start ; i < end ; i++) {
    3.43 +    ret += isspace(input[i]) ? 0 : 1;
    3.44 +  }
    3.45 +  return ret;
    3.46 +}
    3.47 +
    3.48  int regex_parser_do(regex_parser_t* parser, char* input) {
    3.49    int err = REG_NOMATCH;
    3.50    if (parser->compiled_pattern_count > 0) {
    3.51      regmatch_t match;
    3.52  
    3.53      if (regex_parser_matching(parser)) {
    3.54 -      parser->matched_lines++;
    3.55 +      if (parser->count_chars) {
    3.56 +        parser->matched_counted +=
    3.57 +            regex_parser_count_chars(input, 0, strlen(input));
    3.58 +      } else {
    3.59 +        parser->matched_counted++;
    3.60 +      }
    3.61  
    3.62        err = regexec(parser->compiled_patterns[parser->pattern_match],
    3.63            input, 1, &match, 0);
    3.64 @@ -80,25 +96,42 @@
    3.65        }
    3.66        if (err == 0) {
    3.67          parser->pattern_match = 0;
    3.68 -        /* do not match line, if it does not end with the pattern */
    3.69 -        if (match.rm_eo < strlen(input)) {
    3.70 -          parser->matched_lines--;
    3.71 +        size_t input_len = strlen(input);
    3.72 +        if (match.rm_eo < input_len) {
    3.73 +          if (parser->count_chars) {
    3.74 +            /* do not exclude chars that occur after pattern end */
    3.75 +            parser->matched_counted -=
    3.76 +                regex_parser_count_chars(input, match.rm_eo, input_len);
    3.77 +          } else {
    3.78 +            /* do not exclude line, if it does not end with the pattern */
    3.79 +            parser->matched_counted--;
    3.80 +          }
    3.81          }
    3.82        }
    3.83      } else {
    3.84 -      for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
    3.85 +      for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
    3.86          err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
    3.87          if (err > 0 && err != REG_NOMATCH) {
    3.88            fprintf(stderr, "Regex-Error: 0x%08x", err);
    3.89          }
    3.90          if (err == 0) {
    3.91 +          /* a start pattern matches, start counting */
    3.92 +          parser->matched_counted = 0;
    3.93 +          /* Check, if end pattern is also in this line */
    3.94            parser->pattern_match = i+1;
    3.95 -          parser->matched_lines = 0;
    3.96 -          /* Check, if end pattern is also in this line */
    3.97            regex_parser_do(parser, input);
    3.98 -          /* do not match line, if it does not start with the pattern */
    3.99 -          if (match.rm_so > 0 && parser->matched_lines > 0) {
   3.100 -            parser->matched_lines--;
   3.101 +          /* If something was found, determine what exactly to exclude */
   3.102 +          if (parser->matched_counted > 0) {
   3.103 +            if (parser->count_chars) {
   3.104 +              /* do not exclude the characters before the pattern */
   3.105 +              parser->matched_counted -=
   3.106 +                  regex_parser_count_chars(input, 0, match.rm_so);
   3.107 +            } else {
   3.108 +              /* do not match line, if it does not start with the pattern */
   3.109 +              if (match.rm_so > 0) {
   3.110 +                parser->matched_counted--;
   3.111 +              }
   3.112 +            }
   3.113            }
   3.114            break;
   3.115          }
   3.116 @@ -117,7 +150,7 @@
   3.117      parser->compiled_pattern_count = pcount;
   3.118  
   3.119      regex_t* re;
   3.120 -    for (int i = 0 ; i < pcount ; i++) {
   3.121 +    for (unsigned i = 0 ; i < pcount ; i++) {
   3.122        re = malloc(sizeof(regex_t));
   3.123        if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
   3.124          parser->compiled_patterns[i] = re;
     4.1 --- a/src/regex_parser.h	Fri Jun 03 18:13:46 2022 +0200
     4.2 +++ b/src/regex_parser.h	Fri Jun 03 20:05:15 2022 +0200
     4.3 @@ -27,7 +27,7 @@
     4.4  #ifndef REGEX_PARSER_H_
     4.5  #define REGEX_PARSER_H_
     4.6  
     4.7 -#define REGEX_MAX_LINELENGTH           2048
     4.8 +#define MAX_LINELENGTH           4096u
     4.9  
    4.10  #include <sys/types.h>
    4.11  #include <stdbool.h>
    4.12 @@ -40,7 +40,8 @@
    4.13    size_t compiled_pattern_count;
    4.14    unsigned int pattern_match; /* save position of end pattern to match -
    4.15                                   NULL when a start pattern shall match first */
    4.16 -  unsigned int matched_lines;
    4.17 +  unsigned int matched_counted;
    4.18 +  bool count_chars;
    4.19  } regex_parser_t;
    4.20  
    4.21  #ifdef _cplusplus
     5.1 --- a/src/scanner.c	Fri Jun 03 18:13:46 2022 +0200
     5.2 +++ b/src/scanner.c	Fri Jun 03 20:05:15 2022 +0200
     5.3 @@ -29,22 +29,23 @@
     5.4  #include "bfile_heuristics.h"
     5.5  #include "regex_parser.h"
     5.6  #include <sys/stat.h>
     5.7 +#include <ctype.h>
     5.8  
     5.9  typedef struct filelist filelist_t;
    5.10  
    5.11  struct filelist {
    5.12    char *displayname;
    5.13 -  int displayname_len;
    5.14 +  unsigned displayname_len;
    5.15    char *filename;
    5.16    char *ext;
    5.17 -  int st_mode;
    5.18 +  unsigned st_mode;
    5.19    filelist_t *next;
    5.20  };
    5.21  
    5.22  static bool testSuffix(char* filename, string_list_t* list) {
    5.23    bool ret = false;
    5.24 -  int tokenlen, fnamelen = strlen(filename);
    5.25 -  for (int t = 0 ; t < list->count ; t++) {
    5.26 +  size_t tokenlen, fnamelen = strlen(filename);
    5.27 +  for (size_t t = 0 ; t < list->count ; t++) {
    5.28      tokenlen = strlen(list->items[t]);
    5.29      if (fnamelen >= tokenlen && tokenlen > 0) {
    5.30        if (strncmp(filename+fnamelen-tokenlen,
    5.31 @@ -57,34 +58,34 @@
    5.32    return ret;
    5.33  }
    5.34  
    5.35 -static void addLinesPerExtension(scanresult_ext_t* result,
    5.36 -        char* ext, int lines) {
    5.37 +static void addResultPerExtension(scanresult_ext_t* result,
    5.38 +                                  char* ext, unsigned value) {
    5.39    if (!result) return;
    5.40    
    5.41    if (!ext) ext = "w/o";
    5.42    
    5.43 -  for (int i = 0 ; i < result->count ; i++) {
    5.44 +  for (unsigned i = 0 ; i < result->count ; i++) {
    5.45      if (strcasecmp(result->extensions[i], ext) == 0) {
    5.46 -      result->lines[i] += lines;
    5.47 +      result->result[i] += value;
    5.48        return;
    5.49      }
    5.50    }
    5.51    
    5.52    if (result->count == result->capacity) {
    5.53 -    int newcap = result->capacity+8;
    5.54 +    unsigned newcap = result->capacity+8;
    5.55      char** extarr = realloc(result->extensions, newcap*sizeof(char*));
    5.56 -    int* linesarr = realloc(result->lines, newcap*sizeof(int));
    5.57 -    if (!extarr || !linesarr) {
    5.58 +    unsigned* resultarr = realloc(result->result, newcap*sizeof(unsigned));
    5.59 +    if (!extarr || !resultarr) {
    5.60        fprintf(stderr, "Memory allocation error.\n");
    5.61        abort();
    5.62      }
    5.63      result->extensions = extarr;
    5.64 -    result->lines = linesarr;
    5.65 +    result->result = resultarr;
    5.66      result->capacity = newcap;
    5.67    }
    5.68    
    5.69    result->extensions[result->count] = strdup(ext);
    5.70 -  result->lines[result->count] = lines;
    5.71 +  result->result[result->count] = value;
    5.72    result->count++;
    5.73  }
    5.74  
    5.75 @@ -99,21 +100,20 @@
    5.76  void destroy_scanresult_t(scanresult_t* result) {
    5.77    if (result->ext) {
    5.78      if (result->ext->count > 0) {
    5.79 -      for (int i = 0 ; i < result->ext->count ; i++) {
    5.80 +      for (unsigned i = 0 ; i < result->ext->count ; i++) {
    5.81          free(result->ext->extensions[i]);
    5.82        }
    5.83        free(result->ext->extensions);
    5.84 -      free(result->ext->lines);
    5.85 +      free(result->ext->result);
    5.86      }
    5.87      free(result->ext);
    5.88    }
    5.89    free(result);
    5.90  }
    5.91  
    5.92 +static filelist_t *buildFileList(scanner_t scanner, settings_t* settings) {
    5.93  
    5.94 -static filelist_t *buildFileList(scanner_t scanner, settings_t* settings,
    5.95 -    filelist_t* list) {
    5.96 -  
    5.97 +  filelist_t* list = NULL;
    5.98    DIR *dirf;
    5.99    struct dirent *entry;
   5.100    struct stat statbuf;
   5.101 @@ -159,7 +159,7 @@
   5.102        }
   5.103        
   5.104        if (list) {
   5.105 -        // create fake root to have a pointer on the true root
   5.106 +        /* create fake root to have a pointer on the true root */
   5.107          filelist_t root;
   5.108          root.next = list;
   5.109          filelist_t *parent = &root;
   5.110 @@ -188,12 +188,12 @@
   5.111  void scanDirectory(scanner_t scanner, settings_t* settings,
   5.112      string_list_t* output, scanresult_t* result) {
   5.113  
   5.114 -  result->lines = 0;
   5.115 -  int a;
   5.116 +  result->result = 0;
   5.117    bool bfile;
   5.118    char *outbuf;
   5.119 +  const char *result_type = settings->count_chars ? "chars" : "lines";
   5.120  
   5.121 -  filelist_t *filelist = buildFileList(scanner, settings, NULL);
   5.122 +  filelist_t *filelist = buildFileList(scanner, settings);
   5.123  
   5.124    while (filelist != NULL) {
   5.125  
   5.126 @@ -206,23 +206,24 @@
   5.127          scanDirectory(
   5.128              (scanner_t) {filelist->filename, scanner.spaces+1},
   5.129              settings, recoutput, &recresult);
   5.130 -        result->lines += recresult.lines;
   5.131 +        result->result += recresult.result;
   5.132          if (!settings->matchesOnly || recoutput->count > 0) {
   5.133            outbuf = (char*) malloc(81);
   5.134 -          snprintf(outbuf, 81, "%*s/%*s%13d lines\n",
   5.135 +          snprintf(outbuf, 81, "%*s/%*s%13u %s\n",
   5.136                filelist->displayname_len+scanner.spaces, filelist->displayname,
   5.137                60-filelist->displayname_len-scanner.spaces-1, "",
   5.138 -              recresult.lines);
   5.139 +              recresult.result, result_type);
   5.140            add_string(output, outbuf);
   5.141 -          for (int i = 0 ; i < recoutput->count ; i++) {
   5.142 +          for (unsigned i = 0 ; i < recoutput->count ; i++) {
   5.143              add_string(output, recoutput->items[i]);
   5.144            }
   5.145          }
   5.146          destroy_string_list_t(recoutput);
   5.147        } else {
   5.148          outbuf = (char*) malloc(81);
   5.149 -        snprintf(outbuf, 81, "%*s\n", filelist->displayname_len+scanner.spaces,
   5.150 -          filelist->displayname);
   5.151 +        snprintf(outbuf, 81, "%*s\n",
   5.152 +                 filelist->displayname_len+scanner.spaces,
   5.153 +                 filelist->displayname);
   5.154          add_string(output, outbuf);
   5.155        }
   5.156      } else {
   5.157 @@ -230,44 +231,55 @@
   5.158          || testSuffix(filelist->displayname, settings->includeSuffixes))
   5.159          && !testSuffix(filelist->displayname, settings->excludeSuffixes)) {
   5.160  
   5.161 -        /* Count lines */
   5.162 -        int lines = 0;
   5.163 +        /* Count */
   5.164 +        unsigned res_value = 0;
   5.165          bfile = false;
   5.166          bfile_reset(settings->bfileHeuristics);
   5.167          regex_parser_reset(settings->regex);
   5.168 -        char line_buffer[REGEX_MAX_LINELENGTH];
   5.169 -        int line_buffer_offset = 0;
   5.170 +        char line_buffer[MAX_LINELENGTH];
   5.171 +        unsigned line_buffer_pos = 0;
   5.172  
   5.173          FILE *file = fopen(filelist->filename, "r");
   5.174          if (file == NULL) {
   5.175            outbuf = (char*) malloc(81);
   5.176 -          snprintf(outbuf, 81, "%*s", filelist->displayname_len+scanner.spaces,
   5.177 -              filelist->displayname);
   5.178 +          snprintf(outbuf, 81, "%*s",
   5.179 +                   filelist->displayname_len+scanner.spaces,
   5.180 +                   filelist->displayname);
   5.181            add_string(output, outbuf);
   5.182            perror("  File acces failed");
   5.183          } else {
   5.184 +          int a;
   5.185            do {
   5.186              a = fgetc(file);
   5.187  
   5.188              bfile = bfile_check(settings->bfileHeuristics, a);
   5.189  
   5.190 +            /* ignore carriage return completely */
   5.191 +            if (a == 13) continue;
   5.192 +
   5.193              if (a == 10 || a == EOF) {
   5.194 -              line_buffer[line_buffer_offset] = 0;
   5.195 +              line_buffer[line_buffer_pos] = 0;
   5.196                if (regex_parser_do(settings->regex, line_buffer) == 0) {
   5.197 -                /* Only subtract lines when matching has finished */
   5.198 +                /* Subtract excluded lines/chars when matching has finished */
   5.199                  if (!regex_parser_matching(settings->regex)) {
   5.200 -                  lines -= settings->regex->matched_lines;
   5.201 +                  res_value -= settings->regex->matched_counted;
   5.202                  }
   5.203                }
   5.204  
   5.205 -              line_buffer_offset = 0;
   5.206 -              lines++;
   5.207 +              if (settings->count_chars) {
   5.208 +                for (size_t i = 0 ; i < line_buffer_pos ; i++) {
   5.209 +                  if (!isspace(line_buffer[i])) res_value++;
   5.210 +                }
   5.211 +              } else {
   5.212 +                res_value++;
   5.213 +              }
   5.214 +              line_buffer_pos = 0;
   5.215              } else {
   5.216 -              if (line_buffer_offset < REGEX_MAX_LINELENGTH) {
   5.217 -                line_buffer[line_buffer_offset] = a;
   5.218 -                line_buffer_offset++;
   5.219 +              if (line_buffer_pos < MAX_LINELENGTH) {
   5.220 +                line_buffer[line_buffer_pos] = (char) a;
   5.221 +                line_buffer_pos++;
   5.222                } else {
   5.223 -                line_buffer[line_buffer_offset-1] = 0;
   5.224 +                line_buffer[line_buffer_pos - 1] = 0;
   5.225                  settings->confusing_lnlen = true;
   5.226                }
   5.227              }
   5.228 @@ -285,12 +297,17 @@
   5.229                add_string(output, outbuf);
   5.230              }
   5.231            } else {
   5.232 -            addLinesPerExtension(result->ext, filelist->ext, lines);
   5.233 -            result->lines += lines;
   5.234 +            addResultPerExtension(result->ext, filelist->ext, res_value);
   5.235 +            result->result += res_value;
   5.236              outbuf = (char*) malloc(81);
   5.237 -            snprintf(outbuf, 81, "%*s%*s%13d lines\n",
   5.238 -                filelist->displayname_len+scanner.spaces, filelist->displayname,
   5.239 -                60-filelist->displayname_len-scanner.spaces, "", lines);
   5.240 +            snprintf(outbuf, 81, "%*s%*s%13u %s\n",
   5.241 +                     filelist->displayname_len+scanner.spaces,
   5.242 +                     filelist->displayname,
   5.243 +                     60-filelist->displayname_len-scanner.spaces,
   5.244 +                     "",
   5.245 +                     res_value,
   5.246 +                     result_type
   5.247 +            );
   5.248              add_string(output, outbuf);
   5.249            }
   5.250          }
     6.1 --- a/src/scanner.h	Fri Jun 03 18:13:46 2022 +0200
     6.2 +++ b/src/scanner.h	Fri Jun 03 20:05:15 2022 +0200
     6.3 @@ -33,18 +33,18 @@
     6.4  
     6.5  typedef struct {
     6.6    char *dir;
     6.7 -  int spaces;
     6.8 +  unsigned spaces;
     6.9  } scanner_t;
    6.10  
    6.11  typedef struct {
    6.12 -  int count;
    6.13 -  int capacity;
    6.14 +  unsigned count;
    6.15 +  unsigned capacity;
    6.16    char** extensions;
    6.17 -  int* lines; 
    6.18 +  unsigned* result;
    6.19  } scanresult_ext_t;
    6.20  
    6.21  typedef struct {
    6.22 -  int lines;
    6.23 +  unsigned result;
    6.24    scanresult_ext_t* ext;
    6.25  } scanresult_t;
    6.26  
     7.1 --- a/src/settings.c	Fri Jun 03 18:13:46 2022 +0200
     7.2 +++ b/src/settings.c	Fri Jun 03 20:05:15 2022 +0200
     7.3 @@ -42,7 +42,8 @@
     7.4      settings->bfileHeuristics    = new_bfile_heuristics_t();
     7.5      settings->confusing_lnlen    = false;
     7.6      settings->regex              = new_regex_parser_t();
     7.7 -    settings->individual_sums           = false;
     7.8 +    settings->individual_sums    = false;
     7.9 +    settings->count_chars        = false;
    7.10    }
    7.11  
    7.12    return settings;
     8.1 --- a/src/settings.h	Fri Jun 03 18:13:46 2022 +0200
     8.2 +++ b/src/settings.h	Fri Jun 03 20:05:15 2022 +0200
     8.3 @@ -32,7 +32,7 @@
     8.4  #include "bfile_heuristics.h"
     8.5  #include "regex_parser.h"
     8.6  
     8.7 -typedef struct _settings {
     8.8 +typedef struct settings_s {
     8.9    string_list_t* includeSuffixes;
    8.10    string_list_t* excludeSuffixes;
    8.11    regex_parser_t* regex;
    8.12 @@ -43,6 +43,7 @@
    8.13    bool verbose;
    8.14    bool confusing_lnlen; /* this flag is set by the scanner */
    8.15    bool individual_sums;
    8.16 +  bool count_chars;
    8.17  } settings_t;
    8.18  
    8.19  #ifdef _cplusplus
     9.1 --- a/src/string_list.h	Fri Jun 03 18:13:46 2022 +0200
     9.2 +++ b/src/string_list.h	Fri Jun 03 20:05:15 2022 +0200
     9.3 @@ -29,7 +29,7 @@
     9.4  
     9.5  #include "stdinc.h"
     9.6  
     9.7 -typedef struct _string_list {
     9.8 +typedef struct string_list_s {
     9.9    size_t count;
    9.10    char** items;
    9.11  } string_list_t;
    10.1 --- a/test/Makefile.am	Fri Jun 03 18:13:46 2022 +0200
    10.2 +++ b/test/Makefile.am	Fri Jun 03 20:05:15 2022 +0200
    10.3 @@ -22,5 +22,5 @@
    10.4  # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
    10.5  
    10.6  TESTS = $(check_SCRIPTS)
    10.7 -check_SCRIPTS = total.sh nocomments.sh codeonly.sh
    10.8 +check_SCRIPTS = total.sh total_chars.sh nocomments.sh codeonly.sh codeonly_chars.sh
    10.9  EXTRA_DIST = $(check_SCRIPTS) testfile.c
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/test/codeonly_chars.sh	Fri Jun 03 20:05:15 2022 +0200
    11.3 @@ -0,0 +1,44 @@
    11.4 +#!/bin/sh
    11.5 +#
    11.6 +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
    11.7 +#
    11.8 +# Copyright 2018 Mike Becker. All rights reserved.
    11.9 +#
   11.10 +# Redistribution and use in source and binary forms, with or without
   11.11 +# modification, are permitted provided that the following conditions are met:
   11.12 +#
   11.13 +#   1. Redistributions of source code must retain the above copyright
   11.14 +#      notice, this list of conditions and the following disclaimer.
   11.15 +#
   11.16 +#   2. Redistributions in binary form must reproduce the above copyright
   11.17 +#      notice, this list of conditions and the following disclaimer in the
   11.18 +#      documentation and/or other materials provided with the distribution.
   11.19 +#
   11.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   11.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   11.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   11.23 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   11.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   11.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   11.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   11.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   11.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   11.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   11.30 +# POSSIBILITY OF SUCH DAMAGE.
   11.31 +#
   11.32 +
   11.33 +echo -n "Verifying correctness of code only character count: "
   11.34 +
   11.35 +clineprg="$(pwd)/../src/cline"
   11.36 +
   11.37 +cd $srcdir
   11.38 +count=`$clineprg -Vcs testfile.c --exclude-cstyle-comments`
   11.39 +expected=50
   11.40 +
   11.41 +if [ "$count" -eq "$expected" ]; then
   11.42 +    echo "OK."
   11.43 +    exit 0;
   11.44 +else
   11.45 +    echo "FAIL! $count is not $expected"
   11.46 +    exit 1;
   11.47 +fi
    12.1 --- a/test/testfile.c	Fri Jun 03 18:13:46 2022 +0200
    12.2 +++ b/test/testfile.c	Fri Jun 03 20:05:15 2022 +0200
    12.3 @@ -4,7 +4,7 @@
    12.4  // And add other stuff below.
    12.5  
    12.6  
    12.7 -int main(int argc, char* argv) { // comments behind code don't matter
    12.8 +int main(int argc, char** argv) { // comments behind code don't matter
    12.9      
   12.10      int zero /* in block comments we are genius enough to
   12.11                * detect, if there is still some code in the line
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/test/total_chars.sh	Fri Jun 03 20:05:15 2022 +0200
    13.3 @@ -0,0 +1,44 @@
    13.4 +#!/bin/sh
    13.5 +#
    13.6 +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
    13.7 +#
    13.8 +# Copyright 2018 Mike Becker. All rights reserved.
    13.9 +#
   13.10 +# Redistribution and use in source and binary forms, with or without
   13.11 +# modification, are permitted provided that the following conditions are met:
   13.12 +#
   13.13 +#   1. Redistributions of source code must retain the above copyright
   13.14 +#      notice, this list of conditions and the following disclaimer.
   13.15 +#
   13.16 +#   2. Redistributions in binary form must reproduce the above copyright
   13.17 +#      notice, this list of conditions and the following disclaimer in the
   13.18 +#      documentation and/or other materials provided with the distribution.
   13.19 +#
   13.20 +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   13.21 +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   13.22 +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   13.23 +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   13.24 +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   13.25 +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   13.26 +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   13.27 +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   13.28 +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   13.29 +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   13.30 +# POSSIBILITY OF SUCH DAMAGE.
   13.31 +#
   13.32 +
   13.33 +echo -n "Verifying correctness of total character count: "
   13.34 +
   13.35 +clineprg="$(pwd)/../src/cline"
   13.36 +
   13.37 +cd $srcdir
   13.38 +count=`$clineprg -Vcs testfile.c`
   13.39 +expected=334
   13.40 +
   13.41 +if [ "$count" -eq "$expected" ]; then
   13.42 +    echo "OK."
   13.43 +    exit 0;
   13.44 +else
   13.45 +    echo "FAIL! $count is not $expected"
   13.46 +    exit 1;
   13.47 +fi

mercurial