Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
universe@27 | 1 | /* |
universe@34 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@57 | 3 | * Copyright 2018 Mike Becker. All rights reserved. |
universe@34 | 4 | * |
universe@34 | 5 | * Redistribution and use in source and binary forms, with or without |
universe@34 | 6 | * modification, are permitted provided that the following conditions are met: |
universe@34 | 7 | * |
universe@34 | 8 | * 1. Redistributions of source code must retain the above copyright |
universe@34 | 9 | * notice, this list of conditions and the following disclaimer. |
universe@34 | 10 | * |
universe@34 | 11 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@34 | 12 | * notice, this list of conditions and the following disclaimer in the |
universe@34 | 13 | * documentation and/or other materials provided with the distribution. |
universe@34 | 14 | * |
universe@34 | 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@34 | 16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@34 | 17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
universe@34 | 18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
universe@34 | 19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
universe@34 | 20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
universe@34 | 21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
universe@34 | 22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
universe@34 | 23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
universe@57 | 24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
universe@27 | 25 | */ |
universe@27 | 26 | |
universe@27 | 27 | #include "regex_parser.h" |
universe@66 | 28 | #include <ctype.h> |
universe@27 | 29 | |
universe@27 | 30 | regex_parser_t* new_regex_parser_t() { |
universe@27 | 31 | regex_parser_t* ret = malloc(sizeof(regex_parser_t)); |
universe@27 | 32 | if (ret != NULL) { |
universe@27 | 33 | ret->pattern_list = new_string_list_t(); |
universe@66 | 34 | ret->matched_counted = 0; |
universe@27 | 35 | ret->pattern_match = 0; |
universe@27 | 36 | ret->compiled_patterns = NULL; |
universe@28 | 37 | ret->compiled_pattern_count = 0; |
universe@66 | 38 | ret->count_chars = false; |
universe@27 | 39 | } |
universe@27 | 40 | return ret; |
universe@27 | 41 | } |
universe@27 | 42 | |
universe@54 | 43 | void regex_parser_reset(regex_parser_t* parser) { |
universe@66 | 44 | parser->pattern_match = parser->matched_counted = 0; |
universe@54 | 45 | } |
universe@54 | 46 | |
universe@28 | 47 | void regex_destcomppats(regex_parser_t* parser) { |
universe@28 | 48 | if (parser->compiled_patterns != NULL) { |
universe@66 | 49 | for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) { |
universe@28 | 50 | if (parser->compiled_patterns[i] != NULL) { |
universe@28 | 51 | free(parser->compiled_patterns[i]); |
universe@28 | 52 | } |
universe@28 | 53 | } |
universe@28 | 54 | free(parser->compiled_patterns); |
universe@28 | 55 | parser->compiled_patterns = NULL; |
universe@28 | 56 | parser->compiled_pattern_count = 0; |
universe@28 | 57 | } |
universe@28 | 58 | } |
universe@28 | 59 | |
universe@27 | 60 | void destroy_regex_parser_t(regex_parser_t* parser) { |
universe@28 | 61 | regex_destcomppats(parser); |
universe@27 | 62 | destroy_string_list_t(parser->pattern_list); |
universe@27 | 63 | free(parser); |
universe@27 | 64 | } |
universe@27 | 65 | |
universe@27 | 66 | bool regex_parser_matching(regex_parser_t* parser) { |
universe@27 | 67 | return parser->pattern_match > 0; |
universe@27 | 68 | } |
universe@27 | 69 | |
universe@66 | 70 | static unsigned regex_parser_count_chars(const char* input, |
universe@66 | 71 | unsigned start, unsigned end) { |
universe@66 | 72 | unsigned ret = 0; |
universe@66 | 73 | for (unsigned i = start ; i < end ; i++) { |
universe@66 | 74 | ret += isspace(input[i]) ? 0 : 1; |
universe@66 | 75 | } |
universe@66 | 76 | return ret; |
universe@66 | 77 | } |
universe@66 | 78 | |
universe@28 | 79 | int regex_parser_do(regex_parser_t* parser, char* input) { |
universe@28 | 80 | int err = REG_NOMATCH; |
universe@28 | 81 | if (parser->compiled_pattern_count > 0) { |
universe@28 | 82 | regmatch_t match; |
universe@27 | 83 | |
universe@28 | 84 | if (regex_parser_matching(parser)) { |
universe@66 | 85 | if (parser->count_chars) { |
universe@66 | 86 | parser->matched_counted += |
universe@66 | 87 | regex_parser_count_chars(input, 0, strlen(input)); |
universe@66 | 88 | } else { |
universe@66 | 89 | parser->matched_counted++; |
universe@66 | 90 | } |
universe@28 | 91 | |
universe@28 | 92 | err = regexec(parser->compiled_patterns[parser->pattern_match], |
universe@28 | 93 | input, 1, &match, 0); |
universe@28 | 94 | if (err > 0 && err != REG_NOMATCH) { |
universe@28 | 95 | fprintf(stderr, "Regex-Error: 0x%08x", err); |
universe@28 | 96 | } |
universe@28 | 97 | if (err == 0) { |
universe@28 | 98 | parser->pattern_match = 0; |
universe@66 | 99 | size_t input_len = strlen(input); |
universe@66 | 100 | if (match.rm_eo < input_len) { |
universe@66 | 101 | if (parser->count_chars) { |
universe@66 | 102 | /* do not exclude chars that occur after pattern end */ |
universe@66 | 103 | parser->matched_counted -= |
universe@66 | 104 | regex_parser_count_chars(input, match.rm_eo, input_len); |
universe@66 | 105 | } else { |
universe@66 | 106 | /* do not exclude line, if it does not end with the pattern */ |
universe@66 | 107 | parser->matched_counted--; |
universe@66 | 108 | } |
universe@28 | 109 | } |
universe@28 | 110 | } |
universe@28 | 111 | } else { |
universe@66 | 112 | for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { |
universe@28 | 113 | err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); |
universe@28 | 114 | if (err > 0 && err != REG_NOMATCH) { |
universe@28 | 115 | fprintf(stderr, "Regex-Error: 0x%08x", err); |
universe@28 | 116 | } |
universe@28 | 117 | if (err == 0) { |
universe@66 | 118 | /* a start pattern matches, start counting */ |
universe@66 | 119 | parser->matched_counted = 0; |
universe@66 | 120 | /* Check, if end pattern is also in this line */ |
universe@28 | 121 | parser->pattern_match = i+1; |
universe@28 | 122 | regex_parser_do(parser, input); |
universe@66 | 123 | /* If something was found, determine what exactly to exclude */ |
universe@66 | 124 | if (parser->matched_counted > 0) { |
universe@66 | 125 | if (parser->count_chars) { |
universe@66 | 126 | /* do not exclude the characters before the pattern */ |
universe@66 | 127 | parser->matched_counted -= |
universe@66 | 128 | regex_parser_count_chars(input, 0, match.rm_so); |
universe@66 | 129 | } else { |
universe@66 | 130 | /* do not match line, if it does not start with the pattern */ |
universe@66 | 131 | if (match.rm_so > 0) { |
universe@66 | 132 | parser->matched_counted--; |
universe@66 | 133 | } |
universe@66 | 134 | } |
universe@28 | 135 | } |
universe@28 | 136 | break; |
universe@28 | 137 | } |
universe@27 | 138 | } |
universe@27 | 139 | } |
universe@27 | 140 | } |
universe@28 | 141 | return err; |
universe@27 | 142 | } |
universe@28 | 143 | |
universe@28 | 144 | bool regex_compile_all(regex_parser_t* parser) { |
universe@28 | 145 | bool success = true; |
universe@28 | 146 | size_t pcount = parser->pattern_list->count; |
universe@28 | 147 | if (pcount > 0) { |
universe@28 | 148 | regex_destcomppats(parser); |
universe@28 | 149 | parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); |
universe@28 | 150 | parser->compiled_pattern_count = pcount; |
universe@28 | 151 | |
universe@28 | 152 | regex_t* re; |
universe@66 | 153 | for (unsigned i = 0 ; i < pcount ; i++) { |
universe@28 | 154 | re = malloc(sizeof(regex_t)); |
universe@28 | 155 | if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { |
universe@28 | 156 | parser->compiled_patterns[i] = re; |
universe@28 | 157 | } else { |
universe@28 | 158 | fprintf(stderr, "Cannot compile pattern: %s\n", |
universe@28 | 159 | (parser->pattern_list->items[i])); |
universe@28 | 160 | parser->compiled_patterns[i] = NULL; |
universe@28 | 161 | success = false; |
universe@28 | 162 | } |
universe@28 | 163 | } |
universe@28 | 164 | } |
universe@28 | 165 | return success; |
universe@28 | 166 | } |