# HG changeset patch # User Mike Becker # Date 1328188655 -3600 # Node ID 72a98cbcb9f1b0761e1ad2ef6bfb48b8e60d6ec2 # Parent 95a958e3de88df539130be6da3a423c90066d419 added regex parser diff -r 95a958e3de88 -r 72a98cbcb9f1 .cproject --- a/.cproject Thu Jan 26 15:55:52 2012 +0100 +++ b/.cproject Thu Feb 02 14:17:35 2012 +0100 @@ -20,7 +20,7 @@ - + @@ -44,6 +44,47 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 95a958e3de88 -r 72a98cbcb9f1 .project --- a/.project Thu Jan 26 15:55:52 2012 +0100 +++ b/.project Thu Feb 02 14:17:35 2012 +0100 @@ -51,7 +51,7 @@ org.eclipse.cdt.make.core.fullBuildTarget - all + all CONF=mingw org.eclipse.cdt.make.core.stopOnError diff -r 95a958e3de88 -r 72a98cbcb9f1 Makefile --- a/Makefile Thu Jan 26 15:55:52 2012 +0100 +++ b/Makefile Thu Feb 02 14:17:35 2012 +0100 @@ -1,8 +1,43 @@ -CC = gcc -CARG = -Wall -std=gnu99 -O -BUILDDIR = build/ -OBJ = $(shell ls | grep '\.c' | sed 's/^\([^.]*\)\.c$$/${BUILDDIR:/=\/}\1.o/g' | tr '\n' ' ') -BIN = ${BUILDDIR}cline +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. +# +# Copyright 2011 Mike Becker. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# available configurations: +# gcc (default) (will use libc regex implicitly) +# mingw (will use libregex as static lib) +# +# debug configurations: +# gcc-debug +# mingw-debug +# + +#ifndef CONF +CONF = gcc +#endif + +include ${CONF}.mk .PHONY: setup run-compile teardown @@ -13,11 +48,11 @@ -${MAKE} compile compile: ${OBJ} - ${CC} -o ${BIN} ${OBJ} + ${LD} -o ${BIN} ${OBJ} ${LDFLAGS} setup: mkdir -p ${BUILDDIR} - rm -f build/cline.o + rm -f ${BUILDDIR}cline.o mv cline.h cline.src cat cline.src | sed "s/VERSION.*/VERSION=\"$(shell hg identify -n) ($(shell hg identify -i))\";/g" > cline.h @@ -26,8 +61,8 @@ mv cline.src cline.h ${BUILDDIR}%.o: %.c - ${CC} ${CARG} -c -o ${BUILDDIR}$*.o $< + ${CC} ${CFLAGS} -c -o ${BUILDDIR}$*.o $< clean: - rm -f build/* + rm -f ${BUILDDIR}*.o \ No newline at end of file diff -r 95a958e3de88 -r 72a98cbcb9f1 cline.c --- a/cline.c Thu Jan 26 15:55:52 2012 +0100 +++ b/cline.c Thu Feb 02 14:17:35 2012 +0100 @@ -22,8 +22,9 @@ "\n\nOptions:" "\n -b - binary file heuristics level (default medium)" "\n One of: ignore low medium high" + "\n -E - Excludes any line matching the " "\n -e - Excludes lines between and " - "\n You may use this option multiple times" + "\n You may use these options multiple times" "\n -h, --help - this help text" "\n -m - print information about matching files only" "\n -s - only count files with these suffixes (separated" @@ -35,13 +36,18 @@ "\n -V - turn verbose output off, print the result only" "\n\n" "The default call without any options is:" - "\n cline ./\n" + "\n cline ./\n\n" "So each file in the working directory is counted. If you want to count C" "\nsource code in your working directory and its subdirectories, type:" "\n cline -rs .c\n" - "\nIf you want to exclude comment lines, you may use the -e option." + "\nIf you want to exclude comment lines, you may use the -e/-E option." "\nAfter a line matches the regex pattern any following line is" - "\nnot counted unless a line matches the pattern."; + "\nnot counted unless a line matches the pattern. A line is still " + "\ncounted when it does not start or end with the respective patterns." + "\nPlease note, that cline does not remove whitespace characters as this" + "\nmight not be reasonable in some cases." + "\n\nExample (C comments):" + "\n cline -s .c,.h -E \"\\s*//\" -e \"\\s*/\\*\" \"\\*/\\s*\""; printf(helpText); } @@ -74,7 +80,7 @@ for (int t = 1 ; t < argc ; t++) { - int argflags = checkArgument(argv[t], "hsSrRmvVbe"); + int argflags = checkArgument(argv[t], "hsSrRmvVbeE"); /* s, S */ if ((argflags & 6) > 0) { @@ -138,6 +144,7 @@ return exit_with_help(settings, 1); } } + /* e */ if ((argflags & 512) > 0) { if (t + 2 >= argc) { return exit_with_help(settings, 1); @@ -145,6 +152,15 @@ t++; add_string(settings->regex->pattern_list, argv[t]); t++; add_string(settings->regex->pattern_list, argv[t]); } + /* E */ + if ((argflags & 1024) > 0) { + t++; + if (t >= argc) { + return exit_with_help(settings, 1); + } + add_string(settings->regex->pattern_list, argv[t]); + add_string(settings->regex->pattern_list, "$"); + } /* Path */ if (argflags == 0) { if (registerArgument(&checked, 1024)) { @@ -167,27 +183,29 @@ } /* Scan directory */ - regex_compile_all(settings->regex); - int lines = scanDirectory((scanner_t){directory, 0}, settings); - destroy_settings_t(settings); + if (regex_compile_all(settings->regex)) { + int lines = scanDirectory((scanner_t){directory, 0}, settings); + destroy_settings_t(settings); - /* Print double line and line count */ - for (int t = 0 ; t < 79 ; t++) { - printf("="); - } - printf("\n%73d lines\n", lines); + /* Print double line and line count */ + for (int t = 0 ; t < 79 ; t++) { + printf("="); + } + printf("\n%73d lines\n", lines); - if (settings->confusing_lnlen && settings->regex->pattern_list->count > 0) { - printf("\nSome files contain too long lines.\n" - "The regex parser currently supports a maximum line length of %d." - "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH); - } + if (settings->confusing_lnlen && settings->regex->pattern_list->count > 0) { + printf("\nSome files contain too long lines.\n" + "The regex parser currently supports a maximum line length of %d." + "\nThe result might be wrong.\n", REGEX_MAX_LINELENGTH); + } - if (!settings->verbose) { - reopen_stdout(); - printf("%d", lines); + if (!settings->verbose) { + reopen_stdout(); + printf("%d", lines); + } } fflush(stdout); + fflush(stderr); return 0; } diff -r 95a958e3de88 -r 72a98cbcb9f1 gcc-debug.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc-debug.mk Thu Feb 02 14:17:35 2012 +0100 @@ -0,0 +1,34 @@ +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. +# +# Copyright 2011 Mike Becker. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +CC = gcc +LD = gcc +CFLAGS = -Wall -std=gnu99 -O0 -ggdb +LDFLAGS = +BUILDDIR = build/ +OBJ = $(shell ls | grep '\.c' | sed 's/^\([^.]*\)\.c$$/${BUILDDIR:/=\/}\1.o/g' | tr '\n' ' ') +BIN = ${BUILDDIR}cline diff -r 95a958e3de88 -r 72a98cbcb9f1 gcc.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gcc.mk Thu Feb 02 14:17:35 2012 +0100 @@ -0,0 +1,34 @@ +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. +# +# Copyright 2011 Mike Becker. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +CC = gcc +LD = gcc +CFLAGS = -Wall -std=gnu99 -O +LDFLAGS = +BUILDDIR = build/ +OBJ = $(shell ls | grep '\.c' | sed 's/^\([^.]*\)\.c$$/${BUILDDIR:/=\/}\1.o/g' | tr '\n' ' ') +BIN = ${BUILDDIR}cline diff -r 95a958e3de88 -r 72a98cbcb9f1 mingw-debug.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mingw-debug.mk Thu Feb 02 14:17:35 2012 +0100 @@ -0,0 +1,34 @@ +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. +# +# Copyright 2011 Mike Becker. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +CC = gcc +LD = gcc +CFLAGS = -Wall -std=gnu99 -O0 -g +LDFLAGS = -static -lregex +BUILDDIR = build/ +OBJ = $(shell ls | grep '\.c' | sed 's/^\([^.]*\)\.c$$/${BUILDDIR:/=\/}\1.o/g' | tr '\n' ' ') +BIN = ${BUILDDIR}cline diff -r 95a958e3de88 -r 72a98cbcb9f1 mingw.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mingw.mk Thu Feb 02 14:17:35 2012 +0100 @@ -0,0 +1,34 @@ +# +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. +# +# Copyright 2011 Mike Becker. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +CC = gcc +LD = gcc +CFLAGS = -Wall -std=gnu99 -O +LDFLAGS = -static -lregex +BUILDDIR = build/ +OBJ = $(shell ls | grep '\.c' | sed 's/^\([^.]*\)\.c$$/${BUILDDIR:/=\/}\1.o/g' | tr '\n' ' ') +BIN = ${BUILDDIR}cline diff -r 95a958e3de88 -r 72a98cbcb9f1 regex_parser.c --- a/regex_parser.c Thu Jan 26 15:55:52 2012 +0100 +++ b/regex_parser.c Thu Feb 02 14:17:35 2012 +0100 @@ -14,11 +14,26 @@ ret->matched_lines = 0; ret->pattern_match = 0; ret->compiled_patterns = NULL; + ret->compiled_pattern_count = 0; } return ret; } +void regex_destcomppats(regex_parser_t* parser) { + if (parser->compiled_patterns != NULL) { + for (int i = 0 ; i < parser->compiled_pattern_count ; i++) { + if (parser->compiled_patterns[i] != NULL) { + free(parser->compiled_patterns[i]); + } + } + free(parser->compiled_patterns); + parser->compiled_patterns = NULL; + parser->compiled_pattern_count = 0; + } +} + void destroy_regex_parser_t(regex_parser_t* parser) { + regex_destcomppats(parser); destroy_string_list_t(parser->pattern_list); free(parser); } @@ -27,24 +42,69 @@ return parser->pattern_match > 0; } -void regex_compile_all(regex_parser_t* parser) { - size_t pcount = parser->pattern_list->count; - if (pcount > 0) { - if (parser->compiled_patterns != NULL) { - free(parser->compiled_patterns); - } - parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); +int regex_parser_do(regex_parser_t* parser, char* input) { + int err = REG_NOMATCH; + if (parser->compiled_pattern_count > 0) { + regmatch_t match; - regex_t* re = malloc(sizeof(regex_t)); - for (int i = 0 ; i < pcount ; i++) { - if (regcomp(re, parser->pattern_list->items[i], - REG_EXTENDED|REG_NOSUB) == 0) { - parser->compiled_patterns[i] = re; - } else { - fprintf(stderr, "Cannot compile: %s\n", - (parser->pattern_list->items[i])); - parser->compiled_patterns[i] = NULL; + if (regex_parser_matching(parser)) { + parser->matched_lines++; + + err = regexec(parser->compiled_patterns[parser->pattern_match], + input, 1, &match, 0); + if (err > 0 && err != REG_NOMATCH) { + fprintf(stderr, "Regex-Error: 0x%08x", err); + } + if (err == 0) { + parser->pattern_match = 0; + /* do not match line, if it does not end with the pattern */ + if (match.rm_eo < strlen(input)) { + parser->matched_lines--; + } + } + } else { + for (int i = 0 ; i < parser->compiled_pattern_count - 1 ; i += 2) { + err = regexec(parser->compiled_patterns[i], input, 1, &match, 0); + if (err > 0 && err != REG_NOMATCH) { + fprintf(stderr, "Regex-Error: 0x%08x", err); + } + if (err == 0) { + parser->pattern_match = i+1; + parser->matched_lines = 0; + /* Check, if end pattern is also in this line */ + regex_parser_do(parser, input); + /* do not match line, if it does not start with the pattern */ + if (match.rm_so > 0 && parser->matched_lines > 0) { + parser->matched_lines--; + } + break; + } } } } + return err; } + +bool regex_compile_all(regex_parser_t* parser) { + bool success = true; + size_t pcount = parser->pattern_list->count; + if (pcount > 0) { + regex_destcomppats(parser); + parser->compiled_patterns = calloc(pcount, sizeof(regex_t)); + parser->compiled_pattern_count = pcount; + + regex_t* re; + for (int i = 0 ; i < pcount ; i++) { + re = malloc(sizeof(regex_t)); + if (regcomp(re, parser->pattern_list->items[i], REG_EXTENDED) == 0) { + parser->compiled_patterns[i] = re; + } else { + fprintf(stderr, "Cannot compile pattern: %s\n", + (parser->pattern_list->items[i])); + parser->compiled_patterns[i] = NULL; + success = false; + } + } + } + return success; +} diff -r 95a958e3de88 -r 72a98cbcb9f1 regex_parser.h --- a/regex_parser.h Thu Jan 26 15:55:52 2012 +0100 +++ b/regex_parser.h Thu Feb 02 14:17:35 2012 +0100 @@ -18,6 +18,7 @@ typedef struct { string_list_t* pattern_list; /* even entries: start ; odd entries: end */ regex_t** compiled_patterns; + size_t compiled_pattern_count; unsigned int pattern_match; /* save position of end pattern to match - NULL when a start pattern shall match first */ unsigned int matched_lines; @@ -31,7 +32,8 @@ void destroy_regex_parser_t(regex_parser_t*); bool regex_parser_matching(regex_parser_t*); -void regex_compile_all(regex_parser_t*); +bool regex_compile_all(regex_parser_t*); +int regex_parser_do(regex_parser_t*, char*); #ifdef _cplusplus } diff -r 95a958e3de88 -r 72a98cbcb9f1 scanner.c --- a/scanner.c Thu Jan 26 15:55:52 2012 +0100 +++ b/scanner.c Thu Feb 02 14:17:35 2012 +0100 @@ -77,9 +77,14 @@ bfile = bfile_check(settings->bfileHeuristics, a); - if (a == 10) { + if (a == 10 || a == EOF) { line_buffer[line_buffer_offset] = 0; - /* TODO: do regex parsing */ + if (regex_parser_do(settings->regex, line_buffer) == 0) { + /* Only subtract lines when matching has finished */ + if (!regex_parser_matching(settings->regex)) { + lines -= settings->regex->matched_lines; + } + } line_buffer_offset = 0; lines++;