Sun, 10 Nov 2024 14:06:03 +0100
improve the dist Makefile target
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
1 | /* |
34
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
57
68018eac46c3
adds simple tiny test suite and updates license headers
Mike Becker <universe@uap-core.de>
parents:
54
diff
changeset
|
3 | * Copyright 2018 Mike Becker. All rights reserved. |
34
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
4 | * |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
5 | * Redistribution and use in source and binary forms, with or without |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
6 | * modification, are permitted provided that the following conditions are met: |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
7 | * |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
8 | * 1. Redistributions of source code must retain the above copyright |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
9 | * notice, this list of conditions and the following disclaimer. |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
10 | * |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
11 | * 2. Redistributions in binary form must reproduce the above copyright |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
12 | * notice, this list of conditions and the following disclaimer in the |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
13 | * documentation and/or other materials provided with the distribution. |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
14 | * |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
16 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
19 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
20 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
21 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
22 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
fa9bda32de17
moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents:
29
diff
changeset
|
23 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
57
68018eac46c3
adds simple tiny test suite and updates license headers
Mike Becker <universe@uap-core.de>
parents:
54
diff
changeset
|
24 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
25 | */ |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
26 | |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
27 | #include "regex_parser.h" |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
28 | #include <ctype.h> |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
29 | |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
30 | regex_parser_t* new_regex_parser_t() { |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
31 | regex_parser_t* ret = malloc(sizeof(regex_parser_t)); |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
32 | if (ret != NULL) { |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
33 | ret->pattern_list = new_string_list_t(); |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
34 | ret->matched_counted = 0; |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
35 | ret->pattern_match = 0; |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
36 | ret->compiled_patterns = NULL; |
28 | 37 | ret->compiled_pattern_count = 0; |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
38 | ret->count_chars = false; |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
39 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
40 | return ret; |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
41 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
42 | |
54
76d46533b9a9
regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents:
48
diff
changeset
|
43 | void regex_parser_reset(regex_parser_t* parser) { |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
44 | parser->pattern_match = parser->matched_counted = 0; |
54
76d46533b9a9
regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents:
48
diff
changeset
|
45 | } |
76d46533b9a9
regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents:
48
diff
changeset
|
46 | |
28 | 47 | void regex_destcomppats(regex_parser_t* parser) { |
48 | if (parser->compiled_patterns != NULL) { | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
49 | for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) { |
28 | 50 | if (parser->compiled_patterns[i] != NULL) { |
51 | free(parser->compiled_patterns[i]); | |
52 | } | |
53 | } | |
54 | free(parser->compiled_patterns); | |
55 | parser->compiled_patterns = NULL; | |
56 | parser->compiled_pattern_count = 0; | |
57 | } | |
58 | } | |
59 | ||
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
60 | void destroy_regex_parser_t(regex_parser_t* parser) { |
28 | 61 | regex_destcomppats(parser); |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
62 | destroy_string_list_t(parser->pattern_list); |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
63 | free(parser); |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
64 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
65 | |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
66 | bool regex_parser_matching(regex_parser_t* parser) { |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
67 | return parser->pattern_match > 0; |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
68 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
69 | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
70 | static unsigned regex_parser_count_chars(const char* input, |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
71 | unsigned start, unsigned end) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
72 | unsigned ret = 0; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
73 | for (unsigned i = start ; i < end ; i++) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
74 | ret += isspace(input[i]) ? 0 : 1; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
75 | } |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
76 | return ret; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
77 | } |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
78 | |
28 | 79 | int regex_parser_do(regex_parser_t* parser, char* input) { |
80 | int err = REG_NOMATCH; | |
81 | if (parser->compiled_pattern_count > 0) { | |
82 | regmatch_t match; | |
83 | ||
84 | if (regex_parser_matching(parser)) { | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
85 | if (parser->count_chars) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
86 | parser->matched_counted += |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
87 | regex_parser_count_chars(input, 0, strlen(input)); |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
88 | } else { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
89 | parser->matched_counted++; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
90 | } |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
91 | |
28 | 92 | err = regexec(parser->compiled_patterns[parser->pattern_match], |
93 | input, 1, &match, 0); | |
94 | if (err > 0 && err != REG_NOMATCH) { | |
95 | fprintf(stderr, "Regex-Error: 0x%08x", err); | |
96 | } | |
97 | if (err == 0) { | |
98 | parser->pattern_match = 0; | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
99 | size_t input_len = strlen(input); |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
100 | if (match.rm_eo < input_len) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
101 | if (parser->count_chars) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
102 | /* do not exclude chars that occur after pattern end */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
103 | parser->matched_counted -= |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
104 | regex_parser_count_chars(input, match.rm_eo, input_len); |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
105 | } else { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
106 | /* do not exclude line, if it does not end with the pattern */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
107 | parser->matched_counted--; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
108 | } |
28 | 109 | } |
110 | } | |
111 | } else { | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
112 | for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { |
28 | 113 | err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); |
114 | if (err > 0 && err != REG_NOMATCH) { | |
115 | fprintf(stderr, "Regex-Error: 0x%08x", err); | |
116 | } | |
117 | if (err == 0) { | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
118 | /* a start pattern matches, start counting */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
119 | parser->matched_counted = 0; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
120 | /* Check, if end pattern is also in this line */ |
28 | 121 | parser->pattern_match = i+1; |
122 | regex_parser_do(parser, input); | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
123 | /* If something was found, determine what exactly to exclude */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
124 | if (parser->matched_counted > 0) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
125 | if (parser->count_chars) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
126 | /* do not exclude the characters before the pattern */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
127 | parser->matched_counted -= |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
128 | regex_parser_count_chars(input, 0, match.rm_so); |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
129 | } else { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
130 | /* do not match line, if it does not start with the pattern */ |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
131 | if (match.rm_so > 0) { |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
132 | parser->matched_counted--; |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
133 | } |
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
134 | } |
28 | 135 | } |
136 | break; | |
137 | } | |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
138 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
139 | } |
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
140 | } |
28 | 141 | return err; |
27
95a958e3de88
added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff
changeset
|
142 | } |
28 | 143 | |
144 | bool regex_compile_all(regex_parser_t* parser) { | |
145 | bool success = true; | |
146 | size_t pcount = parser->pattern_list->count; | |
147 | if (pcount > 0) { | |
148 | regex_destcomppats(parser); | |
149 | parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); | |
150 | parser->compiled_pattern_count = pcount; | |
151 | ||
152 | regex_t* re; | |
66
be2084398c37
new feature: count non-whitespace characters
Mike Becker <universe@uap-core.de>
parents:
57
diff
changeset
|
153 | for (unsigned i = 0 ; i < pcount ; i++) { |
28 | 154 | re = malloc(sizeof(regex_t)); |
155 | if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { | |
156 | parser->compiled_patterns[i] = re; | |
157 | } else { | |
158 | fprintf(stderr, "Cannot compile pattern: %s\n", | |
159 | (parser->pattern_list->items[i])); | |
160 | parser->compiled_patterns[i] = NULL; | |
161 | success = false; | |
162 | } | |
163 | } | |
164 | } | |
165 | return success; | |
166 | } |