Fri, 03 Jun 2022 20:05:15 +0200
new feature: count non-whitespace characters
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 * Copyright 2018 Mike Becker. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
27 #include "regex_parser.h"
28 #include <ctype.h>
30 regex_parser_t* new_regex_parser_t() {
31 regex_parser_t* ret = malloc(sizeof(regex_parser_t));
32 if (ret != NULL) {
33 ret->pattern_list = new_string_list_t();
34 ret->matched_counted = 0;
35 ret->pattern_match = 0;
36 ret->compiled_patterns = NULL;
37 ret->compiled_pattern_count = 0;
38 ret->count_chars = false;
39 }
40 return ret;
41 }
43 void regex_parser_reset(regex_parser_t* parser) {
44 parser->pattern_match = parser->matched_counted = 0;
45 }
47 void regex_destcomppats(regex_parser_t* parser) {
48 if (parser->compiled_patterns != NULL) {
49 for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
50 if (parser->compiled_patterns[i] != NULL) {
51 free(parser->compiled_patterns[i]);
52 }
53 }
54 free(parser->compiled_patterns);
55 parser->compiled_patterns = NULL;
56 parser->compiled_pattern_count = 0;
57 }
58 }
60 void destroy_regex_parser_t(regex_parser_t* parser) {
61 regex_destcomppats(parser);
62 destroy_string_list_t(parser->pattern_list);
63 free(parser);
64 }
66 bool regex_parser_matching(regex_parser_t* parser) {
67 return parser->pattern_match > 0;
68 }
70 static unsigned regex_parser_count_chars(const char* input,
71 unsigned start, unsigned end) {
72 unsigned ret = 0;
73 for (unsigned i = start ; i < end ; i++) {
74 ret += isspace(input[i]) ? 0 : 1;
75 }
76 return ret;
77 }
79 int regex_parser_do(regex_parser_t* parser, char* input) {
80 int err = REG_NOMATCH;
81 if (parser->compiled_pattern_count > 0) {
82 regmatch_t match;
84 if (regex_parser_matching(parser)) {
85 if (parser->count_chars) {
86 parser->matched_counted +=
87 regex_parser_count_chars(input, 0, strlen(input));
88 } else {
89 parser->matched_counted++;
90 }
92 err = regexec(parser->compiled_patterns[parser->pattern_match],
93 input, 1, &match, 0);
94 if (err > 0 && err != REG_NOMATCH) {
95 fprintf(stderr, "Regex-Error: 0x%08x", err);
96 }
97 if (err == 0) {
98 parser->pattern_match = 0;
99 size_t input_len = strlen(input);
100 if (match.rm_eo < input_len) {
101 if (parser->count_chars) {
102 /* do not exclude chars that occur after pattern end */
103 parser->matched_counted -=
104 regex_parser_count_chars(input, match.rm_eo, input_len);
105 } else {
106 /* do not exclude line, if it does not end with the pattern */
107 parser->matched_counted--;
108 }
109 }
110 }
111 } else {
112 for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
113 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
114 if (err > 0 && err != REG_NOMATCH) {
115 fprintf(stderr, "Regex-Error: 0x%08x", err);
116 }
117 if (err == 0) {
118 /* a start pattern matches, start counting */
119 parser->matched_counted = 0;
120 /* Check, if end pattern is also in this line */
121 parser->pattern_match = i+1;
122 regex_parser_do(parser, input);
123 /* If something was found, determine what exactly to exclude */
124 if (parser->matched_counted > 0) {
125 if (parser->count_chars) {
126 /* do not exclude the characters before the pattern */
127 parser->matched_counted -=
128 regex_parser_count_chars(input, 0, match.rm_so);
129 } else {
130 /* do not match line, if it does not start with the pattern */
131 if (match.rm_so > 0) {
132 parser->matched_counted--;
133 }
134 }
135 }
136 break;
137 }
138 }
139 }
140 }
141 return err;
142 }
144 bool regex_compile_all(regex_parser_t* parser) {
145 bool success = true;
146 size_t pcount = parser->pattern_list->count;
147 if (pcount > 0) {
148 regex_destcomppats(parser);
149 parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
150 parser->compiled_pattern_count = pcount;
152 regex_t* re;
153 for (unsigned i = 0 ; i < pcount ; i++) {
154 re = malloc(sizeof(regex_t));
155 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
156 parser->compiled_patterns[i] = re;
157 } else {
158 fprintf(stderr, "Cannot compile pattern: %s\n",
159 (parser->pattern_list->items[i]));
160 parser->compiled_patterns[i] = NULL;
161 success = false;
162 }
163 }
164 }
165 return success;
166 }