# HG changeset patch # User Mike Becker # Date 1327589752 -3600 # Node ID 95a958e3de88df539130be6da3a423c90066d419 # Parent 853a1181884baa4bfddcb3da4b9b6a0820824231 added regexp_parser struct and compile function diff -r 853a1181884b -r 95a958e3de88 bfile_heuristics.h --- a/bfile_heuristics.h Thu Dec 01 17:06:27 2011 +0100 +++ b/bfile_heuristics.h Thu Jan 26 15:55:52 2012 +0100 @@ -16,9 +16,9 @@ #define BFILE_HIGH_ACCURACY 0x04 typedef struct { - int level; - int bcount; /* 'binary' character count */ - int tcount; /* total count */ + unsigned int level; + unsigned int bcount; /* 'binary' character count */ + unsigned int tcount; /* total count */ } bfile_heuristics_t; #ifdef _cplusplus diff -r 853a1181884b -r 95a958e3de88 cline.c --- a/cline.c Thu Dec 01 17:06:27 2011 +0100 +++ b/cline.c Thu Jan 26 15:55:52 2012 +0100 @@ -10,17 +10,20 @@ #include "settings.h" #include "arguments.h" #include "stream.h" +#include "regex_parser.h" void printHelpText() { const char* helpText = "\nUsage:" - "\n cline [-hrmvV][-s suffix][-b level][]" - "\n cline [-hrmvV][-S suffix][-b level][]" + "\n cline [Options] [Directory]" + "\n cline [Options] [Directory]" "\n\nCounts the line terminator characters (\\n) within all" " files in the specified\ndirectory." "\n\nOptions:" "\n -b - binary file heuristics level (default medium)" "\n One of: ignore low medium high" + "\n -e - Excludes lines between and " + "\n You may use this option multiple times" "\n -h, --help - this help text" "\n -m - print information about matching files only" "\n -s - only count files with these suffixes (separated" @@ -35,7 +38,10 @@ "\n cline ./\n" "So each file in the working directory is counted. If you want to count C" "\nsource code in your working directory and its subdirectories, type:" - "\n cline -rs .c\n"; + "\n cline -rs .c\n" + "\nIf you want to exclude comment lines, you may use the -e option." + "\nAfter a line matches the regex pattern any following line is" + "\nnot counted unless a line matches the pattern."; printf(helpText); } @@ -68,7 +74,7 @@ for (int t = 1 ; t < argc ; t++) { - int argflags = checkArgument(argv[t], "hsSrRmvVb"); + int argflags = checkArgument(argv[t], "hsSrRmvVbe"); /* s, S */ if ((argflags & 6) > 0) { @@ -132,6 +138,13 @@ return exit_with_help(settings, 1); } } + if ((argflags & 512) > 0) { + if (t + 2 >= argc) { + return exit_with_help(settings, 1); + } + t++; add_string(settings->regex->pattern_list, argv[t]); + t++; add_string(settings->regex->pattern_list, argv[t]); + } /* Path */ if (argflags == 0) { if (registerArgument(&checked, 1024)) { @@ -154,6 +167,7 @@ } /* Scan directory */ + regex_compile_all(settings->regex); int lines = scanDirectory((scanner_t){directory, 0}, settings); destroy_settings_t(settings); @@ -163,11 +177,10 @@ } printf("\n%73d lines\n", lines); - if (settings->confusing_lnlen) { - /* TODO: display this only when the regexp parser is used */ + if (settings->confusing_lnlen && settings->regex->pattern_list->count > 0) { printf("\nSome files contain too long lines.\n" - "The regexp parser currently supports a maximum line length of 2048." - "\nThe result might be wrong.\n"); + "The regex parser currently supports a maximum line length of %d." + "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH); } if (!settings->verbose) { diff -r 853a1181884b -r 95a958e3de88 regex_parser.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regex_parser.c Thu Jan 26 15:55:52 2012 +0100 @@ -0,0 +1,50 @@ +/* + * regex_parser.c + * + * Created on: 26.01.2012 + * Author: fox3049 + */ + +#include "regex_parser.h" + +regex_parser_t* new_regex_parser_t() { + regex_parser_t* ret = malloc(sizeof(regex_parser_t)); + if (ret != NULL) { + ret->pattern_list = new_string_list_t(); + ret->matched_lines = 0; + ret->pattern_match = 0; + ret->compiled_patterns = NULL; + } + return ret; +} + +void destroy_regex_parser_t(regex_parser_t* parser) { + destroy_string_list_t(parser->pattern_list); + free(parser); +} + +bool regex_parser_matching(regex_parser_t* parser) { + return parser->pattern_match > 0; +} + +void regex_compile_all(regex_parser_t* parser) { + size_t pcount = parser->pattern_list->count; + if (pcount > 0) { + if (parser->compiled_patterns != NULL) { + free(parser->compiled_patterns); + } + parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); + + regex_t* re = malloc(sizeof(regex_t)); + for (int i = 0 ; i < pcount ; i++) { + if (regcomp(re, parser->pattern_list->items[i], + REG_EXTENDED|REG_NOSUB) == 0) { + parser->compiled_patterns[i] = re; + } else { + fprintf(stderr, "Cannot compile: %s\n", + (parser->pattern_list->items[i])); + parser->compiled_patterns[i] = NULL; + } + } + } +} diff -r 853a1181884b -r 95a958e3de88 regex_parser.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/regex_parser.h Thu Jan 26 15:55:52 2012 +0100 @@ -0,0 +1,40 @@ +/* + * regex_parser.h + * + * Created on: 26.01.2012 + * Author: Mike + */ + +#ifndef REGEX_PARSER_H_ +#define REGEX_PARSER_H_ + +#define REGEX_MAX_LINELENGTH 2048 + +#include +#include +#include +#include "string_list.h" + +typedef struct { + string_list_t* pattern_list; /* even entries: start ; odd entries: end */ + regex_t** compiled_patterns; + unsigned int pattern_match; /* save position of end pattern to match - + NULL when a start pattern shall match first */ + unsigned int matched_lines; +} regex_parser_t; + +#ifdef _cplusplus +extern "C" { +#endif + +regex_parser_t* new_regex_parser_t(); +void destroy_regex_parser_t(regex_parser_t*); + +bool regex_parser_matching(regex_parser_t*); +void regex_compile_all(regex_parser_t*); + +#ifdef _cplusplus +} +#endif + +#endif /* REGEX_PARSER_H_ */ diff -r 853a1181884b -r 95a958e3de88 scanner.c --- a/scanner.c Thu Dec 01 17:06:27 2011 +0100 +++ b/scanner.c Thu Jan 26 15:55:52 2012 +0100 @@ -9,6 +9,7 @@ #include "scanner.h" #include "suffix_fnc.h" #include "bfile_heuristics.h" +#include "regex_parser.h" #include int scanDirectory(scanner_t scanner, settings_t* settings) { @@ -61,7 +62,7 @@ lines = 0; bfile = false; bfile_reset(settings->bfileHeuristics); - char line_buffer[2048]; + char line_buffer[REGEX_MAX_LINELENGTH]; int line_buffer_offset = 0; FILE *file = fopen(filename, "r"); @@ -83,7 +84,7 @@ line_buffer_offset = 0; lines++; } else { - if (line_buffer_offset < 2048) { + if (line_buffer_offset < REGEX_MAX_LINELENGTH) { line_buffer[line_buffer_offset] = a; line_buffer_offset++; } else { diff -r 853a1181884b -r 95a958e3de88 settings.c --- a/settings.c Thu Dec 01 17:06:27 2011 +0100 +++ b/settings.c Thu Jan 26 15:55:52 2012 +0100 @@ -21,13 +21,15 @@ settings->suffixList = new_string_list_t(); settings->verbose = true; settings->bfileHeuristics = new_bfile_heuristics_t(); - settings->confusing_lnlen = false; + settings->confusing_lnlen = false; + settings->regex = new_regex_parser_t(); } return settings; } void destroy_settings_t(settings_t* settings) { + destroy_regex_parser_t(settings->regex); destroy_string_list_t(settings->suffixList); destroy_bfile_heuristics_t(settings->bfileHeuristics); free(settings); diff -r 853a1181884b -r 95a958e3de88 settings.h --- a/settings.h Thu Dec 01 17:06:27 2011 +0100 +++ b/settings.h Thu Jan 26 15:55:52 2012 +0100 @@ -11,9 +11,11 @@ #include "stdinc.h" #include "string_list.h" #include "bfile_heuristics.h" +#include "regex_parser.h" typedef struct _settings { string_list_t* suffixList; + regex_parser_t* regex; bfile_heuristics_t* bfileHeuristics; char fileSeparator; bool recursive; diff -r 853a1181884b -r 95a958e3de88 string_list.h --- a/string_list.h Thu Dec 01 17:06:27 2011 +0100 +++ b/string_list.h Thu Jan 26 15:55:52 2012 +0100 @@ -11,7 +11,7 @@ #include "stdinc.h" typedef struct _string_list { - int count; + size_t count; char** items; } string_list_t;