--- a/src/regex_parser.c Fri Jun 03 18:13:46 2022 +0200 +++ b/src/regex_parser.c Fri Jun 03 20:05:15 2022 +0200 @@ -25,26 +25,28 @@ */ #include "regex_parser.h" +#include <ctype.h> regex_parser_t* new_regex_parser_t() { regex_parser_t* ret = malloc(sizeof(regex_parser_t)); if (ret != NULL) { ret->pattern_list = new_string_list_t(); - ret->matched_lines = 0; + ret->matched_counted = 0; ret->pattern_match = 0; ret->compiled_patterns = NULL; ret->compiled_pattern_count = 0; + ret->count_chars = false; } return ret; } void regex_parser_reset(regex_parser_t* parser) { - parser->pattern_match = parser->matched_lines = 0; + parser->pattern_match = parser->matched_counted = 0; } void regex_destcomppats(regex_parser_t* parser) { if (parser->compiled_patterns != NULL) { - for (int i = 0 ; i < parser->compiled_pattern_count ; i++) { + for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) { if (parser->compiled_patterns[i] != NULL) { free(parser->compiled_patterns[i]); } @@ -65,13 +67,27 @@ return parser->pattern_match > 0; } +static unsigned regex_parser_count_chars(const char* input, + unsigned start, unsigned end) { + unsigned ret = 0; + for (unsigned i = start ; i < end ; i++) { + ret += isspace(input[i]) ? 0 : 1; + } + return ret; +} + int regex_parser_do(regex_parser_t* parser, char* input) { int err = REG_NOMATCH; if (parser->compiled_pattern_count > 0) { regmatch_t match; if (regex_parser_matching(parser)) { - parser->matched_lines++; + if (parser->count_chars) { + parser->matched_counted += + regex_parser_count_chars(input, 0, strlen(input)); + } else { + parser->matched_counted++; + } err = regexec(parser->compiled_patterns[parser->pattern_match], input, 1, &match, 0); @@ -80,25 +96,42 @@ } if (err == 0) { parser->pattern_match = 0; - /* do not match line, if it does not end with the pattern */ - if (match.rm_eo < strlen(input)) { - parser->matched_lines--; + size_t input_len = strlen(input); + if (match.rm_eo < input_len) { + if (parser->count_chars) { + /* do not exclude chars that occur after pattern end */ + parser->matched_counted -= + regex_parser_count_chars(input, match.rm_eo, input_len); + } else { + /* do not exclude line, if it does not end with the pattern */ + parser->matched_counted--; + } } } } else { - for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { + for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); if (err > 0 && err != REG_NOMATCH) { fprintf(stderr, "Regex-Error: 0x%08x", err); } if (err == 0) { + /* a start pattern matches, start counting */ + parser->matched_counted = 0; + /* Check, if end pattern is also in this line */ parser->pattern_match = i+1; - parser->matched_lines = 0; - /* Check, if end pattern is also in this line */ regex_parser_do(parser, input); - /* do not match line, if it does not start with the pattern */ - if (match.rm_so > 0 && parser->matched_lines > 0) { - parser->matched_lines--; + /* If something was found, determine what exactly to exclude */ + if (parser->matched_counted > 0) { + if (parser->count_chars) { + /* do not exclude the characters before the pattern */ + parser->matched_counted -= + regex_parser_count_chars(input, 0, match.rm_so); + } else { + /* do not match line, if it does not start with the pattern */ + if (match.rm_so > 0) { + parser->matched_counted--; + } + } } break; } @@ -117,7 +150,7 @@ parser->compiled_pattern_count = pcount; regex_t* re; - for (int i = 0 ; i < pcount ; i++) { + for (unsigned i = 0 ; i < pcount ; i++) { re = malloc(sizeof(regex_t)); if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { parser->compiled_patterns[i] = re;