src/regex_parser.c

changeset 66
be2084398c37
parent 57
68018eac46c3
equal deleted inserted replaced
65:49fa681f3a7e 66:be2084398c37
23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */ 25 */
26 26
27 #include "regex_parser.h" 27 #include "regex_parser.h"
28 #include <ctype.h>
28 29
29 regex_parser_t* new_regex_parser_t() { 30 regex_parser_t* new_regex_parser_t() {
30 regex_parser_t* ret = malloc(sizeof(regex_parser_t)); 31 regex_parser_t* ret = malloc(sizeof(regex_parser_t));
31 if (ret != NULL) { 32 if (ret != NULL) {
32 ret->pattern_list = new_string_list_t(); 33 ret->pattern_list = new_string_list_t();
33 ret->matched_lines = 0; 34 ret->matched_counted = 0;
34 ret->pattern_match = 0; 35 ret->pattern_match = 0;
35 ret->compiled_patterns = NULL; 36 ret->compiled_patterns = NULL;
36 ret->compiled_pattern_count = 0; 37 ret->compiled_pattern_count = 0;
38 ret->count_chars = false;
37 } 39 }
38 return ret; 40 return ret;
39 } 41 }
40 42
41 void regex_parser_reset(regex_parser_t* parser) { 43 void regex_parser_reset(regex_parser_t* parser) {
42 parser->pattern_match = parser->matched_lines = 0; 44 parser->pattern_match = parser->matched_counted = 0;
43 } 45 }
44 46
45 void regex_destcomppats(regex_parser_t* parser) { 47 void regex_destcomppats(regex_parser_t* parser) {
46 if (parser->compiled_patterns != NULL) { 48 if (parser->compiled_patterns != NULL) {
47 for (int i = 0 ; i < parser->compiled_pattern_count ; i++) { 49 for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) {
48 if (parser->compiled_patterns[i] != NULL) { 50 if (parser->compiled_patterns[i] != NULL) {
49 free(parser->compiled_patterns[i]); 51 free(parser->compiled_patterns[i]);
50 } 52 }
51 } 53 }
52 free(parser->compiled_patterns); 54 free(parser->compiled_patterns);
63 65
64 bool regex_parser_matching(regex_parser_t* parser) { 66 bool regex_parser_matching(regex_parser_t* parser) {
65 return parser->pattern_match > 0; 67 return parser->pattern_match > 0;
66 } 68 }
67 69
70 static unsigned regex_parser_count_chars(const char* input,
71 unsigned start, unsigned end) {
72 unsigned ret = 0;
73 for (unsigned i = start ; i < end ; i++) {
74 ret += isspace(input[i]) ? 0 : 1;
75 }
76 return ret;
77 }
78
68 int regex_parser_do(regex_parser_t* parser, char* input) { 79 int regex_parser_do(regex_parser_t* parser, char* input) {
69 int err = REG_NOMATCH; 80 int err = REG_NOMATCH;
70 if (parser->compiled_pattern_count > 0) { 81 if (parser->compiled_pattern_count > 0) {
71 regmatch_t match; 82 regmatch_t match;
72 83
73 if (regex_parser_matching(parser)) { 84 if (regex_parser_matching(parser)) {
74 parser->matched_lines++; 85 if (parser->count_chars) {
86 parser->matched_counted +=
87 regex_parser_count_chars(input, 0, strlen(input));
88 } else {
89 parser->matched_counted++;
90 }
75 91
76 err = regexec(parser->compiled_patterns[parser->pattern_match], 92 err = regexec(parser->compiled_patterns[parser->pattern_match],
77 input, 1, &match, 0); 93 input, 1, &match, 0);
78 if (err > 0 && err != REG_NOMATCH) { 94 if (err > 0 && err != REG_NOMATCH) {
79 fprintf(stderr, "Regex-Error: 0x%08x", err); 95 fprintf(stderr, "Regex-Error: 0x%08x", err);
80 } 96 }
81 if (err == 0) { 97 if (err == 0) {
82 parser->pattern_match = 0; 98 parser->pattern_match = 0;
83 /* do not match line, if it does not end with the pattern */ 99 size_t input_len = strlen(input);
84 if (match.rm_eo < strlen(input)) { 100 if (match.rm_eo < input_len) {
85 parser->matched_lines--; 101 if (parser->count_chars) {
102 /* do not exclude chars that occur after pattern end */
103 parser->matched_counted -=
104 regex_parser_count_chars(input, match.rm_eo, input_len);
105 } else {
106 /* do not exclude line, if it does not end with the pattern */
107 parser->matched_counted--;
108 }
86 } 109 }
87 } 110 }
88 } else { 111 } else {
89 for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { 112 for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) {
90 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); 113 err = regexec(parser->compiled_patterns[i], input, 1, &match, 0);
91 if (err > 0 && err != REG_NOMATCH) { 114 if (err > 0 && err != REG_NOMATCH) {
92 fprintf(stderr, "Regex-Error: 0x%08x", err); 115 fprintf(stderr, "Regex-Error: 0x%08x", err);
93 } 116 }
94 if (err == 0) { 117 if (err == 0) {
118 /* a start pattern matches, start counting */
119 parser->matched_counted = 0;
120 /* Check, if end pattern is also in this line */
95 parser->pattern_match = i+1; 121 parser->pattern_match = i+1;
96 parser->matched_lines = 0;
97 /* Check, if end pattern is also in this line */
98 regex_parser_do(parser, input); 122 regex_parser_do(parser, input);
99 /* do not match line, if it does not start with the pattern */ 123 /* If something was found, determine what exactly to exclude */
100 if (match.rm_so > 0 && parser->matched_lines > 0) { 124 if (parser->matched_counted > 0) {
101 parser->matched_lines--; 125 if (parser->count_chars) {
126 /* do not exclude the characters before the pattern */
127 parser->matched_counted -=
128 regex_parser_count_chars(input, 0, match.rm_so);
129 } else {
130 /* do not match line, if it does not start with the pattern */
131 if (match.rm_so > 0) {
132 parser->matched_counted--;
133 }
134 }
102 } 135 }
103 break; 136 break;
104 } 137 }
105 } 138 }
106 } 139 }
115 regex_destcomppats(parser); 148 regex_destcomppats(parser);
116 parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); 149 parser->compiled_patterns = calloc(pcount, sizeof(regex_t));
117 parser->compiled_pattern_count = pcount; 150 parser->compiled_pattern_count = pcount;
118 151
119 regex_t* re; 152 regex_t* re;
120 for (int i = 0 ; i < pcount ; i++) { 153 for (unsigned i = 0 ; i < pcount ; i++) {
121 re = malloc(sizeof(regex_t)); 154 re = malloc(sizeof(regex_t));
122 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { 155 if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) {
123 parser->compiled_patterns[i] = re; 156 parser->compiled_patterns[i] = re;
124 } else { 157 } else {
125 fprintf(stderr, "Cannot compile pattern: %s\n", 158 fprintf(stderr, "Cannot compile pattern: %s\n",

mercurial