src/regex_parser.c

changeset 66
be2084398c37
parent 57
68018eac46c3
--- a/src/regex_parser.c	Fri Jun 03 18:13:46 2022 +0200
+++ b/src/regex_parser.c	Fri Jun 03 20:05:15 2022 +0200
@@ -25,26 +25,28 @@
  */
 
 #include "regex_parser.h"
+#include <ctype.h>
 
 regex_parser_t* new_regex_parser_t() {
   regex_parser_t* ret = malloc(sizeof(regex_parser_t));
   if (ret != NULL) {
     ret->pattern_list = new_string_list_t();
-    ret->matched_lines = 0;
+    ret->matched_counted = 0;
     ret->pattern_match = 0;
     ret->compiled_patterns = NULL;
     ret->compiled_pattern_count = 0;
+    ret->count_chars = false;
   }
   return ret;
 }
 
 void regex_parser_reset(regex_parser_t* parser) {
-  parser->pattern_match = parser->matched_lines = 0;
+  parser->pattern_match = parser->matched_counted = 0;
 }
 
 void regex_destcomppats(regex_parser_t* parser) {
   if (parser->compiled_patterns != NULL) {
-    for (int i = 0 ; i < parser->compiled_pattern_count ; i++) {
+    for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
       if (parser->compiled_patterns[i] != NULL) {
         free(parser->compiled_patterns[i]);
       }
@@ -65,13 +67,27 @@
   return parser->pattern_match > 0;
 }
 
+static unsigned regex_parser_count_chars(const char* input,
+                                         unsigned start, unsigned end) {
+  unsigned ret = 0;
+  for (unsigned i = start ; i < end ; i++) {
+    ret += isspace(input[i]) ? 0 : 1;
+  }
+  return ret;
+}
+
 int regex_parser_do(regex_parser_t* parser, char* input) {
   int err = REG_NOMATCH;
   if (parser->compiled_pattern_count > 0) {
     regmatch_t match;
 
     if (regex_parser_matching(parser)) {
-      parser->matched_lines++;
+      if (parser->count_chars) {
+        parser->matched_counted +=
+            regex_parser_count_chars(input, 0, strlen(input));
+      } else {
+        parser->matched_counted++;
+      }
 
       err = regexec(parser->compiled_patterns[parser->pattern_match],
           input, 1, &match, 0);
@@ -80,25 +96,42 @@
       }
       if (err == 0) {
         parser->pattern_match = 0;
-        /* do not match line, if it does not end with the pattern */
-        if (match.rm_eo < strlen(input)) {
-          parser->matched_lines--;
+        size_t input_len = strlen(input);
+        if (match.rm_eo < input_len) {
+          if (parser->count_chars) {
+            /* do not exclude chars that occur after pattern end */
+            parser->matched_counted -=
+                regex_parser_count_chars(input, match.rm_eo, input_len);
+          } else {
+            /* do not exclude line, if it does not end with the pattern */
+            parser->matched_counted--;
+          }
         }
       }
     } else {
-      for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
+      for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
         err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
         if (err > 0 && err != REG_NOMATCH) {
           fprintf(stderr, "Regex-Error: 0x%08x", err);
         }
         if (err == 0) {
+          /* a start pattern matches, start counting */
+          parser->matched_counted = 0;
+          /* Check, if end pattern is also in this line */
           parser->pattern_match = i+1;
-          parser->matched_lines = 0;
-          /* Check, if end pattern is also in this line */
           regex_parser_do(parser, input);
-          /* do not match line, if it does not start with the pattern */
-          if (match.rm_so > 0 && parser->matched_lines > 0) {
-            parser->matched_lines--;
+          /* If something was found, determine what exactly to exclude */
+          if (parser->matched_counted > 0) {
+            if (parser->count_chars) {
+              /* do not exclude the characters before the pattern */
+              parser->matched_counted -=
+                  regex_parser_count_chars(input, 0, match.rm_so);
+            } else {
+              /* do not match line, if it does not start with the pattern */
+              if (match.rm_so > 0) {
+                parser->matched_counted--;
+              }
+            }
           }
           break;
         }
@@ -117,7 +150,7 @@
     parser->compiled_pattern_count = pcount;
 
     regex_t* re;
-    for (int i = 0 ; i < pcount ; i++) {
+    for (unsigned i = 0 ; i < pcount ; i++) {
       re = malloc(sizeof(regex_t));
       if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
         parser->compiled_patterns[i] = re;

mercurial