universe@27: /* universe@34: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. universe@57: * Copyright 2018 Mike Becker. All rights reserved. universe@34: * universe@34: * Redistribution and use in source and binary forms, with or without universe@34: * modification, are permitted provided that the following conditions are met: universe@34: * universe@34: * 1. Redistributions of source code must retain the above copyright universe@34: * notice, this list of conditions and the following disclaimer. universe@34: * universe@34: * 2. Redistributions in binary form must reproduce the above copyright universe@34: * notice, this list of conditions and the following disclaimer in the universe@34: * documentation and/or other materials provided with the distribution. universe@34: * universe@34: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" universe@34: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE universe@34: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE universe@34: * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE universe@34: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL universe@34: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR universe@34: * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER universe@34: * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, universe@34: * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE universe@57: * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. universe@27: */ universe@27: universe@27: #include "regex_parser.h" universe@66: #include universe@27: universe@27: regex_parser_t* new_regex_parser_t() { universe@27: regex_parser_t* ret = malloc(sizeof(regex_parser_t)); universe@27: if (ret != NULL) { universe@27: ret->pattern_list = new_string_list_t(); universe@66: ret->matched_counted = 0; universe@27: ret->pattern_match = 0; universe@27: ret->compiled_patterns = NULL; universe@28: ret->compiled_pattern_count = 0; universe@66: ret->count_chars = false; universe@27: } universe@27: return ret; universe@27: } universe@27: universe@54: void regex_parser_reset(regex_parser_t* parser) { universe@66: parser->pattern_match = parser->matched_counted = 0; universe@54: } universe@54: universe@28: void regex_destcomppats(regex_parser_t* parser) { universe@28: if (parser->compiled_patterns != NULL) { universe@66: for (unsigned i = 0 ; i < parser->compiled_pattern_count ; i++) { universe@28: if (parser->compiled_patterns[i] != NULL) { universe@28: free(parser->compiled_patterns[i]); universe@28: } universe@28: } universe@28: free(parser->compiled_patterns); universe@28: parser->compiled_patterns = NULL; universe@28: parser->compiled_pattern_count = 0; universe@28: } universe@28: } universe@28: universe@27: void destroy_regex_parser_t(regex_parser_t* parser) { universe@28: regex_destcomppats(parser); universe@27: destroy_string_list_t(parser->pattern_list); universe@27: free(parser); universe@27: } universe@27: universe@27: bool regex_parser_matching(regex_parser_t* parser) { universe@27: return parser->pattern_match > 0; universe@27: } universe@27: universe@66: static unsigned regex_parser_count_chars(const char* input, universe@66: unsigned start, unsigned end) { universe@66: unsigned ret = 0; universe@66: for (unsigned i = start ; i < end ; i++) { universe@66: ret += isspace(input[i]) ? 0 : 1; universe@66: } universe@66: return ret; universe@66: } universe@66: universe@28: int regex_parser_do(regex_parser_t* parser, char* input) { universe@28: int err = REG_NOMATCH; universe@28: if (parser->compiled_pattern_count > 0) { universe@28: regmatch_t match; universe@27: universe@28: if (regex_parser_matching(parser)) { universe@66: if (parser->count_chars) { universe@66: parser->matched_counted += universe@66: regex_parser_count_chars(input, 0, strlen(input)); universe@66: } else { universe@66: parser->matched_counted++; universe@66: } universe@28: universe@28: err = regexec(parser->compiled_patterns[parser->pattern_match], universe@28: input, 1, &match, 0); universe@28: if (err > 0 && err != REG_NOMATCH) { universe@28: fprintf(stderr, "Regex-Error: 0x%08x", err); universe@28: } universe@28: if (err == 0) { universe@28: parser->pattern_match = 0; universe@66: size_t input_len = strlen(input); universe@66: if (match.rm_eo < input_len) { universe@66: if (parser->count_chars) { universe@66: /* do not exclude chars that occur after pattern end */ universe@66: parser->matched_counted -= universe@66: regex_parser_count_chars(input, match.rm_eo, input_len); universe@66: } else { universe@66: /* do not exclude line, if it does not end with the pattern */ universe@66: parser->matched_counted--; universe@66: } universe@28: } universe@28: } universe@28: } else { universe@66: for (unsigned i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { universe@28: err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); universe@28: if (err > 0 && err != REG_NOMATCH) { universe@28: fprintf(stderr, "Regex-Error: 0x%08x", err); universe@28: } universe@28: if (err == 0) { universe@66: /* a start pattern matches, start counting */ universe@66: parser->matched_counted = 0; universe@66: /* Check, if end pattern is also in this line */ universe@28: parser->pattern_match = i+1; universe@28: regex_parser_do(parser, input); universe@66: /* If something was found, determine what exactly to exclude */ universe@66: if (parser->matched_counted > 0) { universe@66: if (parser->count_chars) { universe@66: /* do not exclude the characters before the pattern */ universe@66: parser->matched_counted -= universe@66: regex_parser_count_chars(input, 0, match.rm_so); universe@66: } else { universe@66: /* do not match line, if it does not start with the pattern */ universe@66: if (match.rm_so > 0) { universe@66: parser->matched_counted--; universe@66: } universe@66: } universe@28: } universe@28: break; universe@28: } universe@27: } universe@27: } universe@27: } universe@28: return err; universe@27: } universe@28: universe@28: bool regex_compile_all(regex_parser_t* parser) { universe@28: bool success = true; universe@28: size_t pcount = parser->pattern_list->count; universe@28: if (pcount > 0) { universe@28: regex_destcomppats(parser); universe@28: parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); universe@28: parser->compiled_pattern_count = pcount; universe@28: universe@28: regex_t* re; universe@66: for (unsigned i = 0 ; i < pcount ; i++) { universe@28: re = malloc(sizeof(regex_t)); universe@28: if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { universe@28: parser->compiled_patterns[i] = re; universe@28: } else { universe@28: fprintf(stderr, "Cannot compile pattern: %s\n", universe@28: (parser->pattern_list->items[i])); universe@28: parser->compiled_patterns[i] = NULL; universe@28: success = false; universe@28: } universe@28: } universe@28: } universe@28: return success; universe@28: }