src/cline.c

Fri, 03 Jun 2022 20:05:15 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 03 Jun 2022 20:05:15 +0200
changeset 66
be2084398c37
parent 62
7f5f9f43d0c0
permissions
-rw-r--r--

new feature: count non-whitespace characters

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 
 * Copyright 2018 Mike Becker. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 * 
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in the
 * documentation and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "cline.h"
#include "scanner.h"
#include "settings.h"
#include "arguments.h"
#include "regex_parser.h"

void printHelpText() {
  printf(
    "\nUsage:"
    "\n      cline [Options] [Directories...]"
    "\n\nCounts the line terminator characters (\\n) within all"
    " files in the specified\ndirectories."
    "\n\nOptions:"
    "\n  -b <level>          - binary file heuristics level (default medium)"
    "\n                        One of: ignore low medium high"
    "\n  -c                  - Count non-whitespace characters instead of lines"
    "\n  -E <pattern>        - Excludes any line matching the <pattern>"
    "\n  -e <start> <end>    - Excludes lines between <start> and <end>"
    "\n                        You may use these options multiple times"
    "\n  -h, --help          - this help text"
    "\n  -i                  - print out individual sums per file extension"
    "\n                        (cannot be used together with -V)"
    "\n  -m                  - print information about matching files only"
    "\n  -s <suffixes>       - only count files with these suffixes (separated"
    "\n                        by commas)"
    "\n  -S <suffixes>       - count any file except those with these suffixes"
    "\n                        (separated by commas)"
    "\n  -r, -R              - includes subdirectories"
    "\n  -v, --version       - print out version information"
    "\n  -V                  - turn verbose output off, print the result only"
    "\n\nShortcuts:"
    "\n  --exclude-cstyle-comments : -E '\\s*//' -e '\\s*/\\*' '\\*/\\s*'"
    "\n  --exclude-blank-lines     : -E '^\\s*$'"
    "\n\n"
    "The default call without any options is:"    
    "\n  cline ./\n\n"
    "So each file in the working directory is counted. If you want to count C"
    "\nsource code in your working directory and its subdirectories, type:"
    "\n  cline -rs .c\n"
    "\nIf you want to exclude comment lines, you may use the -e/-E option."
    "\nAfter a line matches the regex pattern <start>, this and any following"
    "\nline is not counted unless a line matches the <end> pattern. A line is"
    "\nstill counted when it does not start or end with the respective pattern."
    "\nPlease note, that cline does not trim the lines before matching against"
    "\nthe pattern."
    "\n\nExample (C without comments):"
    "\n  cline -s .c,.h --exclude-cstyle-comments"
    "\n");
}

int exit_with_version(settings_t* settings) {
  printf("cline - Version: " VERSION "\n");
  destroy_settings_t(settings);
  return 0;
}

int exit_with_help(settings_t* settings, int code) {
  printf("cline - Version: " VERSION "\n");
  printHelpText();
  destroy_settings_t(settings);
  return code;
}

int main(int argc, char** argv) {

  /* Settings */
  settings_t *settings = new_settings_t();
  if (settings == NULL) {
    fprintf(stderr, "Memory allocation failed.\n");
    return 1;
  }

  /* Get arguments */
  string_list_t *directories = new_string_list_t();
  if (directories == NULL) {
    fprintf(stderr, "Memory allocation failed.\n");
    return 1;
  }
  char* includeSuffix = NULL;
  char* excludeSuffix = NULL;
  int checked = 0;

  for (int t = 1 ; t < argc ; t++) {

    int argflags = checkArgument(argv[t], "hsSrRmvVbeEic");
    int paropt = 0;

    /* h */
    if ((argflags & 1) > 0 || strcmp(argv[t], "--help") == 0) {
      return exit_with_help(settings, 0);
    }
    /* s */
    if ((argflags & 2) > 0) {
      if (!checkParamOpt(&paropt) || registerArgument(&checked, 2)) {
        return exit_with_help(settings, 1);
      }
      t++;
      if (t >= argc) {
        return exit_with_help(settings, 1);
      }
      includeSuffix = argv[t];
    }
    /* S */
    if ((argflags & 4) > 0) {
      if (!checkParamOpt(&paropt) || registerArgument(&checked, 4)) {
        return exit_with_help(settings, 1);
      }
      t++;
      if (t >= argc) {
        return exit_with_help(settings, 1);
      }
      excludeSuffix = argv[t];
    }
    /* r, R */
    if ((argflags & 24) > 0) {
      if (registerArgument(&checked, 24)) {
        return exit_with_help(settings, 1);
      }
      settings->recursive = true;
    }
    /* m */
    if ((argflags & 32) > 0) {
      if (registerArgument(&checked, 32)) {
        return exit_with_help(settings, 1);
      }
      settings->matchesOnly = true;
    }
    /* v */
    if ((argflags & 64) > 0 || strcmp(argv[t], "--version") == 0) {
      return exit_with_version(settings);
    }
    /* V */
    if ((argflags & 128) > 0) {
      if (registerArgument(&checked, 128)) {
        return exit_with_help(settings, 1);
      }
      settings->verbose = false;
    }
    /* b */
    if ((argflags & 256) > 0) {
      if (!checkParamOpt(&paropt) || registerArgument(&checked, 256)) {
        return exit_with_help(settings, 1);
      }
      t++;
      if (t >= argc) {
        return exit_with_help(settings, 1);
      }
      if (strcasecmp(argv[t], "ignore") == 0) {
        settings->bfileHeuristics->level = BFILE_IGNORE;
      } else if (strcasecmp(argv[t], "low") == 0) {
        settings->bfileHeuristics->level = BFILE_LOW_ACCURACY;
      } else if (strcasecmp(argv[t], "medium") == 0) {
        settings->bfileHeuristics->level = BFILE_MEDIUM_ACCURACY;
      } else if (strcasecmp(argv[t], "high") == 0) {
        settings->bfileHeuristics->level = BFILE_HIGH_ACCURACY;
      } else {
        return exit_with_help(settings, 1);
      }
    }
    /* e */
    if ((argflags & 512) > 0) {
      if (!checkParamOpt(&paropt) || t + 2 >= argc) {
        return exit_with_help(settings, 1);
      }
      t++; add_string(settings->regex->pattern_list, argv[t]);
      t++; add_string(settings->regex->pattern_list, argv[t]);
    }
    /* E */
    if ((argflags & 1024) > 0) {
      t++;
      if (!checkParamOpt(&paropt) || t >= argc) {
        return exit_with_help(settings, 1);
      }
      add_string(settings->regex->pattern_list, argv[t]);
      add_string(settings->regex->pattern_list, "$");
    }
    /* i */
    if ((argflags & 2048) > 0) {
      /* cannot be used together with -V */
      if (registerArgument(&checked, 128)) {
        return exit_with_help(settings, 1);
      }
      settings->individual_sums = true;
    }
    if ((argflags & 4096) > 0) {
        if (registerArgument(&checked, 4096)) {
            return exit_with_help(settings, 1);
        }
        settings->count_chars = true;
        settings->regex->count_chars = true;
    }
    if (argflags == 0) {
      /* SHORTCUTS */
      if (strcmp(argv[t], "--exclude-cstyle-comments") == 0) {
        add_string(settings->regex->pattern_list, "\\s*//");
        add_string(settings->regex->pattern_list, "$");
        add_string(settings->regex->pattern_list, "\\s*/\\*");
        add_string(settings->regex->pattern_list, "\\*/\\s*");
      } else if (strcmp(argv[t], "--exclude-blank-lines") == 0) {
        add_string(settings->regex->pattern_list, "^\\s*$");
        add_string(settings->regex->pattern_list, "$");
      }
      /* Path */
      else {
        add_string(directories, argv[t]);
      }
    }
  }

  /* Find tokens */
  parseCSL(includeSuffix, settings->includeSuffixes);
  parseCSL(excludeSuffix, settings->excludeSuffixes);

  /* Scan directories */
  if (regex_compile_all(settings->regex)) {
    scanresult_t* result = new_scanresult_t(settings);
    /* Don't waste memory when only the total sum is needed */
    string_list_t *output = settings->verbose ? new_string_list_t() : NULL;
    char *outbuf;
    const char* result_type = settings->count_chars ? "chars" : "lines";
    
    unsigned total = 0;
    if (directories->count == 0) {
        add_string(directories, ".");
    }
    for (unsigned t = 0 ; t < directories->count ; t++) {
      scanDirectory((scanner_t){directories->items[t], 0}, settings,
          output, result);
      total += result->result;
      if (directories->count > 1 ) {
        outbuf = (char*) malloc(81);
        memset(outbuf, '-', 79);
        outbuf[79] = '\n';
        outbuf[80] = 0;
        add_string(output, outbuf);
        outbuf = (char*) malloc(81);
        snprintf(outbuf, 81, "%-63s%10u %s\n", directories->items[t],
                result->result, result_type);
        add_string(output, outbuf);
        outbuf = (char*) malloc(81);
        memset(outbuf, '-', 79);
        outbuf[79] = '\n';
        outbuf[80] = 0;
        add_string(output, outbuf);
      }
    }
    destroy_string_list_t(directories);

    /* Print result */
    if (settings->verbose) {
      for (int i = 0 ; i < output->count ; i++) {
        printf("%s", output->items[i]);
        free(output->items[i]);
      }
      
      if (result->ext) {
        if (result->ext->count > 0) {
          for (unsigned t = 0 ; t < 79 ; t++) {
            printf("=");
          }
          printf("\nIndividual sums:\n");
          for (unsigned t = 0 ; t < result->ext->count ; t++) {
            printf(" %-62s%10u %s\n",
                   result->ext->extensions[t],
                   result->ext->result[t],
                   result_type);
          }
        }
      }
      
      for (unsigned t = 0 ; t < 79 ; t++) {
        printf("=");
      }
      printf("\n%73d %s\n", total, result_type);

      if (settings->confusing_lnlen &&
          settings->regex->pattern_list->count > 0) {

        printf("\nSome files contain too long lines.\n"
          "The parser currently supports a maximum line length of %u."
          "\nThe result might be wrong.\n", MAX_LINELENGTH);
      }
    } else {
      printf("%u", total);
    }
    destroy_scanresult_t(result);
    destroy_string_list_t(output);
    destroy_settings_t(settings);
  }

  fflush(stdout);
  fflush(stderr);
  return 0;
}

mercurial