src/regex_parser.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 57
68018eac46c3
permissions
-rw-r--r--

new feature: count non-whitespace characters

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
     3  * Copyright 2018 Mike Becker. All rights reserved.
     4  * 
     5  * Redistribution and use in source and binary forms, with or without
     6  * modification, are permitted provided that the following conditions are met:
     7  * 
     8  * 1. Redistributions of source code must retain the above copyright
     9  * notice, this list of conditions and the following disclaimer.
    10  * 
    11  * 2. Redistributions in binary form must reproduce the above copyright
    12  * notice, this list of conditions and the following disclaimer in the
    13  * documentation and/or other materials provided with the distribution.
    14  * 
    15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    16  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
    19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
    22  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
    23  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    25  */
    27 #include "regex_parser.h"
    28 #include <ctype.h>
    30 regex_parser_t* new_regex_parser_t() {
    31   regex_parser_t* ret = malloc(sizeof(regex_parser_t));
    32   if (ret != NULL) {
    33     ret->pattern_list = new_string_list_t();
    34     ret->matched_counted = 0;
    35     ret->pattern_match = 0;
    36     ret->compiled_patterns = NULL;
    37     ret->compiled_pattern_count = 0;
    38     ret->count_chars = false;
    39   }
    40   return ret;
    41 }
    43 void regex_parser_reset(regex_parser_t* parser) {
    44   parser->pattern_match = parser->matched_counted = 0;
    45 }
    47 void regex_destcomppats(regex_parser_t* parser) {
    48   if (parser->compiled_patterns != NULL) {
    49     for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
    50       if (parser->compiled_patterns[i] != NULL) {
    51         free(parser->compiled_patterns[i]);
    52       }
    53     }
    54     free(parser->compiled_patterns);
    55     parser->compiled_patterns = NULL;
    56     parser->compiled_pattern_count = 0;
    57   }
    58 }
    60 void destroy_regex_parser_t(regex_parser_t* parser) {
    61   regex_destcomppats(parser);
    62   destroy_string_list_t(parser->pattern_list);
    63   free(parser);
    64 }
    66 bool regex_parser_matching(regex_parser_t* parser) {
    67   return parser->pattern_match > 0;
    68 }
    70 static unsigned regex_parser_count_chars(const char* input,
    71                                          unsigned start, unsigned end) {
    72   unsigned ret = 0;
    73   for (unsigned i = start ; i < end ; i++) {
    74     ret += isspace(input[i]) ? 0 : 1;
    75   }
    76   return ret;
    77 }
    79 int regex_parser_do(regex_parser_t* parser, char* input) {
    80   int err = REG_NOMATCH;
    81   if (parser->compiled_pattern_count > 0) {
    82     regmatch_t match;
    84     if (regex_parser_matching(parser)) {
    85       if (parser->count_chars) {
    86         parser->matched_counted +=
    87             regex_parser_count_chars(input, 0, strlen(input));
    88       } else {
    89         parser->matched_counted++;
    90       }
    92       err = regexec(parser->compiled_patterns[parser->pattern_match],
    93           input, 1, &match, 0);
    94       if (err > 0 && err != REG_NOMATCH) {
    95         fprintf(stderr, "Regex-Error: 0x%08x", err);
    96       }
    97       if (err == 0) {
    98         parser->pattern_match = 0;
    99         size_t input_len = strlen(input);
   100         if (match.rm_eo < input_len) {
   101           if (parser->count_chars) {
   102             /* do not exclude chars that occur after pattern end */
   103             parser->matched_counted -=
   104                 regex_parser_count_chars(input, match.rm_eo, input_len);
   105           } else {
   106             /* do not exclude line, if it does not end with the pattern */
   107             parser->matched_counted--;
   108           }
   109         }
   110       }
   111     } else {
   112       for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
   113         err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
   114         if (err > 0 && err != REG_NOMATCH) {
   115           fprintf(stderr, "Regex-Error: 0x%08x", err);
   116         }
   117         if (err == 0) {
   118           /* a start pattern matches, start counting */
   119           parser->matched_counted = 0;
   120           /* Check, if end pattern is also in this line */
   121           parser->pattern_match = i+1;
   122           regex_parser_do(parser, input);
   123           /* If something was found, determine what exactly to exclude */
   124           if (parser->matched_counted > 0) {
   125             if (parser->count_chars) {
   126               /* do not exclude the characters before the pattern */
   127               parser->matched_counted -=
   128                   regex_parser_count_chars(input, 0, match.rm_so);
   129             } else {
   130               /* do not match line, if it does not start with the pattern */
   131               if (match.rm_so > 0) {
   132                 parser->matched_counted--;
   133               }
   134             }
   135           }
   136           break;
   137         }
   138       }
   139     }
   140   }
   141   return err;
   142 }
   144 bool regex_compile_all(regex_parser_t* parser) {
   145   bool success = true;
   146   size_t pcount = parser->pattern_list->count;
   147   if (pcount > 0) {
   148     regex_destcomppats(parser);
   149     parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
   150     parser->compiled_pattern_count = pcount;
   152     regex_t* re;
   153     for (unsigned i = 0 ; i < pcount ; i++) {
   154       re = malloc(sizeof(regex_t));
   155       if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
   156         parser->compiled_patterns[i] = re;
   157       } else {
   158         fprintf(stderr, "Cannot compile pattern: %s\n",
   159             (parser->pattern_list->items[i]));
   160         parser->compiled_patterns[i] = NULL;
   161         success = false;
   162       }
   163     }
   164   }
   165   return success;
   166 }

mercurial