src/regex_parser.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 57
68018eac46c3
permissions
-rw-r--r--

new feature: count non-whitespace characters

universe@27 1 /*
universe@34 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@57 3 * Copyright 2018 Mike Becker. All rights reserved.
universe@34 4 *
universe@34 5 * Redistribution and use in source and binary forms, with or without
universe@34 6 * modification, are permitted provided that the following conditions are met:
universe@34 7 *
universe@34 8 * 1. Redistributions of source code must retain the above copyright
universe@34 9 * notice, this list of conditions and the following disclaimer.
universe@34 10 *
universe@34 11 * 2. Redistributions in binary form must reproduce the above copyright
universe@34 12 * notice, this list of conditions and the following disclaimer in the
universe@34 13 * documentation and/or other materials provided with the distribution.
universe@34 14 *
universe@34 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@34 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@34 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
universe@34 18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
universe@34 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
universe@34 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
universe@34 21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
universe@34 22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
universe@34 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
universe@57 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
universe@27 25 */
universe@27 26
universe@27 27 #include "regex_parser.h"
universe@66 28 #include <ctype.h>
universe@27 29
universe@27 30 regex_parser_t* new_regex_parser_t() {
universe@27 31 regex_parser_t* ret = malloc(sizeof(regex_parser_t));
universe@27 32 if (ret != NULL) {
universe@27 33 ret->pattern_list = new_string_list_t();
universe@66 34 ret->matched_counted = 0;
universe@27 35 ret->pattern_match = 0;
universe@27 36 ret->compiled_patterns = NULL;
universe@28 37 ret->compiled_pattern_count = 0;
universe@66 38 ret->count_chars = false;
universe@27 39 }
universe@27 40 return ret;
universe@27 41 }
universe@27 42
universe@54 43 void regex_parser_reset(regex_parser_t* parser) {
universe@66 44 parser->pattern_match = parser->matched_counted = 0;
universe@54 45 }
universe@54 46
universe@28 47 void regex_destcomppats(regex_parser_t* parser) {
universe@28 48 if (parser->compiled_patterns != NULL) {
universe@66 49 for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
universe@28 50 if (parser->compiled_patterns[i] != NULL) {
universe@28 51 free(parser->compiled_patterns[i]);
universe@28 52 }
universe@28 53 }
universe@28 54 free(parser->compiled_patterns);
universe@28 55 parser->compiled_patterns = NULL;
universe@28 56 parser->compiled_pattern_count = 0;
universe@28 57 }
universe@28 58 }
universe@28 59
universe@27 60 void destroy_regex_parser_t(regex_parser_t* parser) {
universe@28 61 regex_destcomppats(parser);
universe@27 62 destroy_string_list_t(parser->pattern_list);
universe@27 63 free(parser);
universe@27 64 }
universe@27 65
universe@27 66 bool regex_parser_matching(regex_parser_t* parser) {
universe@27 67 return parser->pattern_match > 0;
universe@27 68 }
universe@27 69
universe@66 70 static unsigned regex_parser_count_chars(const char* input,
universe@66 71 unsigned start, unsigned end) {
universe@66 72 unsigned ret = 0;
universe@66 73 for (unsigned i = start ; i < end ; i++) {
universe@66 74 ret += isspace(input[i]) ? 0 : 1;
universe@66 75 }
universe@66 76 return ret;
universe@66 77 }
universe@66 78
universe@28 79 int regex_parser_do(regex_parser_t* parser, char* input) {
universe@28 80 int err = REG_NOMATCH;
universe@28 81 if (parser->compiled_pattern_count > 0) {
universe@28 82 regmatch_t match;
universe@27 83
universe@28 84 if (regex_parser_matching(parser)) {
universe@66 85 if (parser->count_chars) {
universe@66 86 parser->matched_counted +=
universe@66 87 regex_parser_count_chars(input, 0, strlen(input));
universe@66 88 } else {
universe@66 89 parser->matched_counted++;
universe@66 90 }
universe@28 91
universe@28 92 err = regexec(parser->compiled_patterns[parser->pattern_match],
universe@28 93 input, 1, &match, 0);
universe@28 94 if (err > 0 && err != REG_NOMATCH) {
universe@28 95 fprintf(stderr, "Regex-Error: 0x%08x", err);
universe@28 96 }
universe@28 97 if (err == 0) {
universe@28 98 parser->pattern_match = 0;
universe@66 99 size_t input_len = strlen(input);
universe@66 100 if (match.rm_eo < input_len) {
universe@66 101 if (parser->count_chars) {
universe@66 102 /* do not exclude chars that occur after pattern end */
universe@66 103 parser->matched_counted -=
universe@66 104 regex_parser_count_chars(input, match.rm_eo, input_len);
universe@66 105 } else {
universe@66 106 /* do not exclude line, if it does not end with the pattern */
universe@66 107 parser->matched_counted--;
universe@66 108 }
universe@28 109 }
universe@28 110 }
universe@28 111 } else {
universe@66 112 for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
universe@28 113 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
universe@28 114 if (err > 0 && err != REG_NOMATCH) {
universe@28 115 fprintf(stderr, "Regex-Error: 0x%08x", err);
universe@28 116 }
universe@28 117 if (err == 0) {
universe@66 118 /* a start pattern matches, start counting */
universe@66 119 parser->matched_counted = 0;
universe@66 120 /* Check, if end pattern is also in this line */
universe@28 121 parser->pattern_match = i+1;
universe@28 122 regex_parser_do(parser, input);
universe@66 123 /* If something was found, determine what exactly to exclude */
universe@66 124 if (parser->matched_counted > 0) {
universe@66 125 if (parser->count_chars) {
universe@66 126 /* do not exclude the characters before the pattern */
universe@66 127 parser->matched_counted -=
universe@66 128 regex_parser_count_chars(input, 0, match.rm_so);
universe@66 129 } else {
universe@66 130 /* do not match line, if it does not start with the pattern */
universe@66 131 if (match.rm_so > 0) {
universe@66 132 parser->matched_counted--;
universe@66 133 }
universe@66 134 }
universe@28 135 }
universe@28 136 break;
universe@28 137 }
universe@27 138 }
universe@27 139 }
universe@27 140 }
universe@28 141 return err;
universe@27 142 }
universe@28 143
universe@28 144 bool regex_compile_all(regex_parser_t* parser) {
universe@28 145 bool success = true;
universe@28 146 size_t pcount = parser->pattern_list->count;
universe@28 147 if (pcount > 0) {
universe@28 148 regex_destcomppats(parser);
universe@28 149 parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
universe@28 150 parser->compiled_pattern_count = pcount;
universe@28 151
universe@28 152 regex_t* re;
universe@66 153 for (unsigned i = 0 ; i < pcount ; i++) {
universe@28 154 re = malloc(sizeof(regex_t));
universe@28 155 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
universe@28 156 parser->compiled_patterns[i] = re;
universe@28 157 } else {
universe@28 158 fprintf(stderr, "Cannot compile pattern: %s\n",
universe@28 159 (parser->pattern_list->items[i]));
universe@28 160 parser->compiled_patterns[i] = NULL;
universe@28 161 success = false;
universe@28 162 }
universe@28 163 }
universe@28 164 }
universe@28 165 return success;
universe@28 166 }

mercurial