src/regex_parser.c

Mon, 19 Mar 2018 16:36:14 +0100

author
Mike Becker <universe@uap-core.de>
date
Mon, 19 Mar 2018 16:36:14 +0100
changeset 54
76d46533b9a9
parent 48
0d2c13c24fd0
child 57
68018eac46c3
permissions
-rw-r--r--

regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match

27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
1 /*
34
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
48
0d2c13c24fd0 adds remaining autoconf files
Mike Becker <universe@uap-core.de>
parents: 36
diff changeset
3 * Copyright 2017 Mike Becker. All rights reserved.
34
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
4 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
5 * Redistribution and use in source and binary forms, with or without
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
6 * modification, are permitted provided that the following conditions are met:
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
7 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
8 * 1. Redistributions of source code must retain the above copyright
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
9 * notice, this list of conditions and the following disclaimer.
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
10 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
11 * 2. Redistributions in binary form must reproduce the above copyright
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
12 * notice, this list of conditions and the following disclaimer in the
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
13 * documentation and/or other materials provided with the distribution.
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
14 *
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
21 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
22 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fa9bda32de17 moved src files to src subdirectory and added licence text
Mike Becker <universe@uap-core.de>
parents: 29
diff changeset
25 *
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
26 * regex_parser.c
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
27 *
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
28 * Created on: 26.01.2012
29
fa625066ae52 fixed author note
Mike Becker <universe@uap-core.de>
parents: 28
diff changeset
29 * Author: Mike
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
30 */
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
31
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
32 #include "regex_parser.h"
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
33
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
34 regex_parser_t* new_regex_parser_t() {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
35 regex_parser_t* ret = malloc(sizeof(regex_parser_t));
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
36 if (ret != NULL) {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
37 ret->pattern_list = new_string_list_t();
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
38 ret->matched_lines = 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
39 ret->pattern_match = 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
40 ret->compiled_patterns = NULL;
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
41 ret->compiled_pattern_count = 0;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
42 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
43 return ret;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
44 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
45
54
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
46 void regex_parser_reset(regex_parser_t* parser) {
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
47 parser->pattern_match = parser->matched_lines = 0;
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
48 }
76d46533b9a9 regex parser was not properly reset before each file, sometimes resulting in wrong line counts, when the previous scanned file ended with a match
Mike Becker <universe@uap-core.de>
parents: 48
diff changeset
49
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
50 void regex_destcomppats(regex_parser_t* parser) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
51 if (parser->compiled_patterns != NULL) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
52 for (int i = 0 ; i < parser->compiled_pattern_count ; i++) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
53 if (parser->compiled_patterns[i] != NULL) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
54 free(parser->compiled_patterns[i]);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
55 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
56 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
57 free(parser->compiled_patterns);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
58 parser->compiled_patterns = NULL;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
59 parser->compiled_pattern_count = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
60 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
61 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
62
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
63 void destroy_regex_parser_t(regex_parser_t* parser) {
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
64 regex_destcomppats(parser);
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
65 destroy_string_list_t(parser->pattern_list);
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
66 free(parser);
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
67 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
68
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
69 bool regex_parser_matching(regex_parser_t* parser) {
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
70 return parser->pattern_match > 0;
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
71 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
72
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
73 int regex_parser_do(regex_parser_t* parser, char* input) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
74 int err = REG_NOMATCH;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
75 if (parser->compiled_pattern_count > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
76 regmatch_t match;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
77
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
78 if (regex_parser_matching(parser)) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
79 parser->matched_lines++;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
80
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
81 err = regexec(parser->compiled_patterns[parser->pattern_match],
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
82 input, 1, &match, 0);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
83 if (err > 0 && err != REG_NOMATCH) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
84 fprintf(stderr, "Regex-Error: 0x%08x", err);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
85 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
86 if (err == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
87 parser->pattern_match = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
88 /* do not match line, if it does not end with the pattern */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
89 if (match.rm_eo < strlen(input)) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
90 parser->matched_lines--;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
91 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
92 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
93 } else {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
94 for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
95 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
96 if (err > 0 && err != REG_NOMATCH) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
97 fprintf(stderr, "Regex-Error: 0x%08x", err);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
98 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
99 if (err == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
100 parser->pattern_match = i+1;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
101 parser->matched_lines = 0;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
102 /* Check, if end pattern is also in this line */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
103 regex_parser_do(parser, input);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
104 /* do not match line, if it does not start with the pattern */
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
105 if (match.rm_so > 0 && parser->matched_lines > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
106 parser->matched_lines--;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
107 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
108 break;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
109 }
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
110 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
111 }
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
112 }
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
113 return err;
27
95a958e3de88 added regexp_parser struct and compile function
Mike Becker <universe@uap-core.de>
parents:
diff changeset
114 }
28
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
115
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
116 bool regex_compile_all(regex_parser_t* parser) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
117 bool success = true;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
118 size_t pcount = parser->pattern_list->count;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
119 if (pcount > 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
120 regex_destcomppats(parser);
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
121 parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
122 parser->compiled_pattern_count = pcount;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
123
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
124 regex_t* re;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
125 for (int i = 0 ; i < pcount ; i++) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
126 re = malloc(sizeof(regex_t));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
127 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
128 parser->compiled_patterns[i] = re;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
129 } else {
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
130 fprintf(stderr, "Cannot compile pattern: %s\n",
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
131 (parser->pattern_list->items[i]));
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
132 parser->compiled_patterns[i] = NULL;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
133 success = false;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
134 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
135 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
136 }
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
137 return success;
72a98cbcb9f1 added regex parser
Mike Becker <universe@uap-core.de>
parents: 27
diff changeset
138 }

mercurial