src/regex_parser.c

changeset 66
be2084398c37
parent 57
68018eac46c3
     1.1 --- a/src/regex_parser.c	Fri Jun 03 18:13:46 2022 +0200
     1.2 +++ b/src/regex_parser.c	Fri Jun 03 20:05:15 2022 +0200
     1.3 @@ -25,26 +25,28 @@
     1.4   */
     1.5  
     1.6  #include "regex_parser.h"
     1.7 +#include <ctype.h>
     1.8  
     1.9  regex_parser_t* new_regex_parser_t() {
    1.10    regex_parser_t* ret = malloc(sizeof(regex_parser_t));
    1.11    if (ret != NULL) {
    1.12      ret->pattern_list = new_string_list_t();
    1.13 -    ret->matched_lines = 0;
    1.14 +    ret->matched_counted = 0;
    1.15      ret->pattern_match = 0;
    1.16      ret->compiled_patterns = NULL;
    1.17      ret->compiled_pattern_count = 0;
    1.18 +    ret->count_chars = false;
    1.19    }
    1.20    return ret;
    1.21  }
    1.22  
    1.23  void regex_parser_reset(regex_parser_t* parser) {
    1.24 -  parser->pattern_match = parser->matched_lines = 0;
    1.25 +  parser->pattern_match = parser->matched_counted = 0;
    1.26  }
    1.27  
    1.28  void regex_destcomppats(regex_parser_t* parser) {
    1.29    if (parser->compiled_patterns != NULL) {
    1.30 -    for (int i = 0 ; i < parser->compiled_pattern_count ; i++) {
    1.31 +    for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
    1.32        if (parser->compiled_patterns[i] != NULL) {
    1.33          free(parser->compiled_patterns[i]);
    1.34        }
    1.35 @@ -65,13 +67,27 @@
    1.36    return parser->pattern_match > 0;
    1.37  }
    1.38  
    1.39 +static unsigned regex_parser_count_chars(const char* input,
    1.40 +                                         unsigned start, unsigned end) {
    1.41 +  unsigned ret = 0;
    1.42 +  for (unsigned i = start ; i < end ; i++) {
    1.43 +    ret += isspace(input[i]) ? 0 : 1;
    1.44 +  }
    1.45 +  return ret;
    1.46 +}
    1.47 +
    1.48  int regex_parser_do(regex_parser_t* parser, char* input) {
    1.49    int err = REG_NOMATCH;
    1.50    if (parser->compiled_pattern_count > 0) {
    1.51      regmatch_t match;
    1.52  
    1.53      if (regex_parser_matching(parser)) {
    1.54 -      parser->matched_lines++;
    1.55 +      if (parser->count_chars) {
    1.56 +        parser->matched_counted +=
    1.57 +            regex_parser_count_chars(input, 0, strlen(input));
    1.58 +      } else {
    1.59 +        parser->matched_counted++;
    1.60 +      }
    1.61  
    1.62        err = regexec(parser->compiled_patterns[parser->pattern_match],
    1.63            input, 1, &match, 0);
    1.64 @@ -80,25 +96,42 @@
    1.65        }
    1.66        if (err == 0) {
    1.67          parser->pattern_match = 0;
    1.68 -        /* do not match line, if it does not end with the pattern */
    1.69 -        if (match.rm_eo < strlen(input)) {
    1.70 -          parser->matched_lines--;
    1.71 +        size_t input_len = strlen(input);
    1.72 +        if (match.rm_eo < input_len) {
    1.73 +          if (parser->count_chars) {
    1.74 +            /* do not exclude chars that occur after pattern end */
    1.75 +            parser->matched_counted -=
    1.76 +                regex_parser_count_chars(input, match.rm_eo, input_len);
    1.77 +          } else {
    1.78 +            /* do not exclude line, if it does not end with the pattern */
    1.79 +            parser->matched_counted--;
    1.80 +          }
    1.81          }
    1.82        }
    1.83      } else {
    1.84 -      for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
    1.85 +      for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
    1.86          err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
    1.87          if (err > 0 && err != REG_NOMATCH) {
    1.88            fprintf(stderr, "Regex-Error: 0x%08x", err);
    1.89          }
    1.90          if (err == 0) {
    1.91 +          /* a start pattern matches, start counting */
    1.92 +          parser->matched_counted = 0;
    1.93 +          /* Check, if end pattern is also in this line */
    1.94            parser->pattern_match = i+1;
    1.95 -          parser->matched_lines = 0;
    1.96 -          /* Check, if end pattern is also in this line */
    1.97            regex_parser_do(parser, input);
    1.98 -          /* do not match line, if it does not start with the pattern */
    1.99 -          if (match.rm_so > 0 && parser->matched_lines > 0) {
   1.100 -            parser->matched_lines--;
   1.101 +          /* If something was found, determine what exactly to exclude */
   1.102 +          if (parser->matched_counted > 0) {
   1.103 +            if (parser->count_chars) {
   1.104 +              /* do not exclude the characters before the pattern */
   1.105 +              parser->matched_counted -=
   1.106 +                  regex_parser_count_chars(input, 0, match.rm_so);
   1.107 +            } else {
   1.108 +              /* do not match line, if it does not start with the pattern */
   1.109 +              if (match.rm_so > 0) {
   1.110 +                parser->matched_counted--;
   1.111 +              }
   1.112 +            }
   1.113            }
   1.114            break;
   1.115          }
   1.116 @@ -117,7 +150,7 @@
   1.117      parser->compiled_pattern_count = pcount;
   1.118  
   1.119      regex_t* re;
   1.120 -    for (int i = 0 ; i < pcount ; i++) {
   1.121 +    for (unsigned i = 0 ; i < pcount ; i++) {
   1.122        re = malloc(sizeof(regex_t));
   1.123        if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
   1.124          parser->compiled_patterns[i] = re;

mercurial