added regexp_parser struct and compile function

Thu, 26 Jan 2012 15:55:52 +0100

author
Mike Becker <universe@uap-core.de>
date
Thu, 26 Jan 2012 15:55:52 +0100
changeset 27
95a958e3de88
parent 26
853a1181884b
child 28
72a98cbcb9f1

added regexp_parser struct and compile function

bfile_heuristics.h file | annotate | diff | comparison | revisions
cline.c file | annotate | diff | comparison | revisions
regex_parser.c file | annotate | diff | comparison | revisions
regex_parser.h file | annotate | diff | comparison | revisions
scanner.c file | annotate | diff | comparison | revisions
settings.c file | annotate | diff | comparison | revisions
settings.h file | annotate | diff | comparison | revisions
string_list.h file | annotate | diff | comparison | revisions
     1.1 --- a/bfile_heuristics.h	Thu Dec 01 17:06:27 2011 +0100
     1.2 +++ b/bfile_heuristics.h	Thu Jan 26 15:55:52 2012 +0100
     1.3 @@ -16,9 +16,9 @@
     1.4  #define BFILE_HIGH_ACCURACY    0x04
     1.5  
     1.6  typedef struct {
     1.7 -  int level;
     1.8 -  int bcount; /* 'binary' character count */
     1.9 -  int tcount; /* total count */
    1.10 +  unsigned int level;
    1.11 +  unsigned int bcount; /* 'binary' character count */
    1.12 +  unsigned int tcount; /* total count */
    1.13  } bfile_heuristics_t;
    1.14  
    1.15  #ifdef _cplusplus
     2.1 --- a/cline.c	Thu Dec 01 17:06:27 2011 +0100
     2.2 +++ b/cline.c	Thu Jan 26 15:55:52 2012 +0100
     2.3 @@ -10,17 +10,20 @@
     2.4  #include "settings.h"
     2.5  #include "arguments.h"
     2.6  #include "stream.h"
     2.7 +#include "regex_parser.h"
     2.8  
     2.9  void printHelpText() {
    2.10    const char* helpText = 
    2.11      "\nUsage:"
    2.12 -    "\n      cline [-hrmvV][-s suffix][-b level][<directory>]"
    2.13 -    "\n      cline [-hrmvV][-S suffix][-b level][<directory>]"
    2.14 +    "\n      cline [Options] [Directory]"
    2.15 +    "\n      cline [Options] [Directory]"
    2.16      "\n\nCounts the line terminator characters (\\n) within all"
    2.17      " files in the specified\ndirectory."
    2.18      "\n\nOptions:"
    2.19      "\n  -b <level>          - binary file heuristics level (default medium)"
    2.20      "\n                        One of: ignore low medium high"
    2.21 +    "\n  -e <start> <end>    - Excludes lines between <start> and <end>"
    2.22 +    "\n                        You may use this option multiple times"
    2.23      "\n  -h, --help          - this help text"
    2.24      "\n  -m                  - print information about matching files only"
    2.25      "\n  -s <suffixes>       - only count files with these suffixes (separated"
    2.26 @@ -35,7 +38,10 @@
    2.27      "\n  cline ./\n"
    2.28      "So each file in the working directory is counted. If you want to count C"
    2.29      "\nsource code in your working directory and its subdirectories, type:"
    2.30 -    "\n  cline -rs .c\n";
    2.31 +    "\n  cline -rs .c\n"
    2.32 +    "\nIf you want to exclude comment lines, you may use the -e option."
    2.33 +    "\nAfter a line matches the regex pattern <start> any following line is"
    2.34 +    "\nnot counted unless a line matches the <end> pattern.";
    2.35      
    2.36    printf(helpText);
    2.37  }
    2.38 @@ -68,7 +74,7 @@
    2.39  
    2.40    for (int t = 1 ; t < argc ; t++) {
    2.41  
    2.42 -    int argflags = checkArgument(argv[t], "hsSrRmvVb");
    2.43 +    int argflags = checkArgument(argv[t], "hsSrRmvVbe");
    2.44  
    2.45      /* s, S */
    2.46      if ((argflags & 6) > 0) {
    2.47 @@ -132,6 +138,13 @@
    2.48          return exit_with_help(settings, 1);
    2.49        }
    2.50      }
    2.51 +    if ((argflags & 512) > 0) {
    2.52 +      if (t + 2 >= argc) {
    2.53 +        return exit_with_help(settings, 1);
    2.54 +      }
    2.55 +      t++; add_string(settings->regex->pattern_list, argv[t]);
    2.56 +      t++; add_string(settings->regex->pattern_list, argv[t]);
    2.57 +    }
    2.58      /* Path */
    2.59      if (argflags == 0) {
    2.60        if (registerArgument(&checked, 1024)) {
    2.61 @@ -154,6 +167,7 @@
    2.62    }
    2.63  
    2.64    /* Scan directory */
    2.65 +  regex_compile_all(settings->regex);
    2.66    int lines = scanDirectory((scanner_t){directory, 0}, settings);
    2.67    destroy_settings_t(settings);
    2.68  
    2.69 @@ -163,11 +177,10 @@
    2.70    }
    2.71    printf("\n%73d lines\n", lines);
    2.72  
    2.73 -  if (settings->confusing_lnlen) {
    2.74 -    /* TODO: display this only when the regexp parser is used */
    2.75 +  if (settings->confusing_lnlen && settings->regex->pattern_list->count > 0) {
    2.76      printf("\nSome files contain too long lines.\n"
    2.77 -      "The regexp parser currently supports a maximum line length of 2048."
    2.78 -      "\nThe result might be wrong.\n");
    2.79 +      "The regex parser currently supports a maximum line length of %d."
    2.80 +      "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH);
    2.81    }
    2.82  
    2.83    if (!settings->verbose) {
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/regex_parser.c	Thu Jan 26 15:55:52 2012 +0100
     3.3 @@ -0,0 +1,50 @@
     3.4 +/*
     3.5 + * regex_parser.c
     3.6 + *
     3.7 + *  Created on: 26.01.2012
     3.8 + *      Author: fox3049
     3.9 + */
    3.10 +
    3.11 +#include "regex_parser.h"
    3.12 +
    3.13 +regex_parser_t* new_regex_parser_t() {
    3.14 +  regex_parser_t* ret = malloc(sizeof(regex_parser_t));
    3.15 +  if (ret != NULL) {
    3.16 +    ret->pattern_list = new_string_list_t();
    3.17 +    ret->matched_lines = 0;
    3.18 +    ret->pattern_match = 0;
    3.19 +    ret->compiled_patterns = NULL;
    3.20 +  }
    3.21 +  return ret;
    3.22 +}
    3.23 +
    3.24 +void destroy_regex_parser_t(regex_parser_t* parser) {
    3.25 +  destroy_string_list_t(parser->pattern_list);
    3.26 +  free(parser);
    3.27 +}
    3.28 +
    3.29 +bool regex_parser_matching(regex_parser_t* parser) {
    3.30 +  return parser->pattern_match > 0;
    3.31 +}
    3.32 +
    3.33 +void regex_compile_all(regex_parser_t* parser) {
    3.34 +  size_t pcount = parser->pattern_list->count;
    3.35 +  if (pcount > 0) {
    3.36 +    if (parser->compiled_patterns != NULL) {
    3.37 +      free(parser->compiled_patterns);
    3.38 +    }
    3.39 +    parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
    3.40 +
    3.41 +    regex_t* re = malloc(sizeof(regex_t));
    3.42 +    for (int i = 0 ; i < pcount ; i++) {
    3.43 +      if (regcomp(re, parser->pattern_list->items[i],
    3.44 +          REG_EXTENDED|REG_NOSUB) == 0) {
    3.45 +        parser->compiled_patterns[i] = re;
    3.46 +      } else {
    3.47 +        fprintf(stderr, "Cannot compile: %s\n",
    3.48 +            (parser->pattern_list->items[i]));
    3.49 +        parser->compiled_patterns[i] = NULL;
    3.50 +      }
    3.51 +    }
    3.52 +  }
    3.53 +}
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/regex_parser.h	Thu Jan 26 15:55:52 2012 +0100
     4.3 @@ -0,0 +1,40 @@
     4.4 +/*
     4.5 + * regex_parser.h
     4.6 + *
     4.7 + *  Created on: 26.01.2012
     4.8 + *      Author: Mike
     4.9 + */
    4.10 +
    4.11 +#ifndef REGEX_PARSER_H_
    4.12 +#define REGEX_PARSER_H_
    4.13 +
    4.14 +#define REGEX_MAX_LINELENGTH           2048
    4.15 +
    4.16 +#include <sys/types.h>
    4.17 +#include <stdbool.h>
    4.18 +#include <regex.h>
    4.19 +#include "string_list.h"
    4.20 +
    4.21 +typedef struct {
    4.22 +  string_list_t* pattern_list; /* even entries: start ; odd entries: end */
    4.23 +  regex_t** compiled_patterns;
    4.24 +  unsigned int pattern_match; /* save position of end pattern to match -
    4.25 +                                 NULL when a start pattern shall match first */
    4.26 +  unsigned int matched_lines;
    4.27 +} regex_parser_t;
    4.28 +
    4.29 +#ifdef _cplusplus
    4.30 +extern "C" {
    4.31 +#endif
    4.32 +
    4.33 +regex_parser_t* new_regex_parser_t();
    4.34 +void destroy_regex_parser_t(regex_parser_t*);
    4.35 +
    4.36 +bool regex_parser_matching(regex_parser_t*);
    4.37 +void regex_compile_all(regex_parser_t*);
    4.38 +
    4.39 +#ifdef _cplusplus
    4.40 +}
    4.41 +#endif
    4.42 +
    4.43 +#endif /* REGEX_PARSER_H_ */
     5.1 --- a/scanner.c	Thu Dec 01 17:06:27 2011 +0100
     5.2 +++ b/scanner.c	Thu Jan 26 15:55:52 2012 +0100
     5.3 @@ -9,6 +9,7 @@
     5.4  #include "scanner.h"
     5.5  #include "suffix_fnc.h"
     5.6  #include "bfile_heuristics.h"
     5.7 +#include "regex_parser.h"
     5.8  #include <sys/stat.h>
     5.9  
    5.10  int scanDirectory(scanner_t scanner, settings_t* settings) {
    5.11 @@ -61,7 +62,7 @@
    5.12          lines = 0;
    5.13          bfile = false;
    5.14          bfile_reset(settings->bfileHeuristics);
    5.15 -        char line_buffer[2048];
    5.16 +        char line_buffer[REGEX_MAX_LINELENGTH];
    5.17          int line_buffer_offset = 0;
    5.18  
    5.19          FILE *file = fopen(filename, "r");
    5.20 @@ -83,7 +84,7 @@
    5.21              line_buffer_offset = 0;
    5.22              lines++;
    5.23            } else {
    5.24 -            if (line_buffer_offset < 2048) {
    5.25 +            if (line_buffer_offset < REGEX_MAX_LINELENGTH) {
    5.26                line_buffer[line_buffer_offset] = a;
    5.27                line_buffer_offset++;
    5.28              } else {
     6.1 --- a/settings.c	Thu Dec 01 17:06:27 2011 +0100
     6.2 +++ b/settings.c	Thu Jan 26 15:55:52 2012 +0100
     6.3 @@ -21,13 +21,15 @@
     6.4      settings->suffixList         = new_string_list_t();
     6.5      settings->verbose            = true;
     6.6      settings->bfileHeuristics    = new_bfile_heuristics_t();
     6.7 -    settings->confusing_lnlen   = false;
     6.8 +    settings->confusing_lnlen    = false;
     6.9 +    settings->regex              = new_regex_parser_t();
    6.10    }
    6.11  
    6.12    return settings;
    6.13  }
    6.14  
    6.15  void destroy_settings_t(settings_t* settings) {
    6.16 +  destroy_regex_parser_t(settings->regex);
    6.17    destroy_string_list_t(settings->suffixList);
    6.18    destroy_bfile_heuristics_t(settings->bfileHeuristics);
    6.19    free(settings);
     7.1 --- a/settings.h	Thu Dec 01 17:06:27 2011 +0100
     7.2 +++ b/settings.h	Thu Jan 26 15:55:52 2012 +0100
     7.3 @@ -11,9 +11,11 @@
     7.4  #include "stdinc.h"
     7.5  #include "string_list.h"
     7.6  #include "bfile_heuristics.h"
     7.7 +#include "regex_parser.h"
     7.8  
     7.9  typedef struct _settings {
    7.10    string_list_t* suffixList;
    7.11 +  regex_parser_t* regex;
    7.12    bfile_heuristics_t* bfileHeuristics;
    7.13    char fileSeparator;
    7.14    bool recursive;
     8.1 --- a/string_list.h	Thu Dec 01 17:06:27 2011 +0100
     8.2 +++ b/string_list.h	Thu Jan 26 15:55:52 2012 +0100
     8.3 @@ -11,7 +11,7 @@
     8.4  #include "stdinc.h"
     8.5  
     8.6  typedef struct _string_list {
     8.7 -  int count;
     8.8 +  size_t count;
     8.9    char** items;
    8.10  } string_list_t;
    8.11  

mercurial