src/c2html.c

Wed, 10 Jul 2013 17:57:03 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 10 Jul 2013 17:57:03 +0200
changeset 17
7ea86024aef0
parent 16
fa0bcd0444eb
child 18
5085b57e3fd6
permissions
-rw-r--r--

implemented java highlighting

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2013 Mike Becker. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>

#define INPUTBUF_SIZE 2048
#define WORDBUF_SIZE 16

const char* ckeywords[] = {
  "auto", "break", "case", "char", "const", "continue", "default", "do", 
  "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 
  "long", "register", "return", "short", "signed", "sizeof", "static",
  "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
  "while", NULL
};

const char* jkeywords[] = {
  "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
  "package", "synchronized", "boolean", "do", "if", "private", "this",
  "break", "double", "implements", "protected", "throw", "byte", "else",
  "import", "public", "throws", "case", "enum", "instanceof", "return",
  "transient", "catch", "extends", "int", "short", "try", "char", "final",
  "interface", "static", "void", "class", "finally", "long", "strictfp",
  "volatile", "const", "float", "native", "super", "while", NULL
};

int isctype(char *word, size_t len) {
  return (word[len-2] == '_' && word[len-1] == 't');
}

int iscdirective(char *word) {
  return (word[0] == '#');
}

int isjtype(char *word, size_t len) {
  return isupper(word[0]);
}

int isjdirective(char *word) {
  return word[0] == '@';
}

typedef struct {
  const char** keywords;
  int(*istype)(char*,size_t);
  int(*isdirective)(char*);
} highlighter_t;

typedef struct {
  char* outfilename;
  char* infilename;
  int highlight;
} settings_t;

typedef struct {
  size_t count;
  size_t capacity;
  size_t maxlinewidth;
  char** lines;
} inputfile_t;

inputfile_t *inputfilebuffer(size_t capacity) {
  inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
  inputfile->lines = (char**) malloc(capacity * sizeof(char*));
  inputfile->capacity = capacity;
  inputfile->count = 0;
  inputfile->maxlinewidth = 0;
  
  return inputfile;
}

void addline(inputfile_t *inputfile, char* line, size_t width) {
  char *l = (char*) malloc(width+1);
  memcpy(l, line, width);
  l[width] = 0;
  if (inputfile->count >= inputfile->capacity) {
    inputfile->capacity <<= 1;
    inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
  }
  inputfile->lines[inputfile->count] = l;
  inputfile->maxlinewidth =
          width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
  inputfile->count++;
}

void freeinputfilebuffer(inputfile_t *inputfile) {
  for (int i = 0 ; i < inputfile->count ; i++) {
    free(inputfile->lines[i]);
  }
  free(inputfile->lines);
  free(inputfile);
}

inputfile_t *readinput(char *filename) {

  int fd = open(filename, O_RDONLY);
  if (fd == -1) return NULL;
  
  inputfile_t *inputfile = inputfilebuffer(512);
  
  char buf[INPUTBUF_SIZE];
  ssize_t r;
  
  size_t maxlinewidth = 256;
  char *line = (char*) malloc(maxlinewidth);
  size_t col = 0;
  
  while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
    for (size_t i = 0 ; i < r ; i++) {
      if (col >= maxlinewidth-4) {
        maxlinewidth <<= 1;
        line = realloc(line, maxlinewidth);
      }

      if (buf[i] == '\n') {
        line[col++] = '\n';
        line[col] = 0;
        addline(inputfile, line, col);        
        col = 0;
      } else {
        line[col++] = buf[i];
      }
    }
  }
  
  free(line);
  
  close(fd);
  
  return inputfile;
}

size_t writeescapedchar(char *dest, size_t dp, char c) {
  if (c == '>') {
    dest[dp++] = '&'; dest[dp++] = 'g';
    dest[dp++] = 't'; dest[dp++] = ';';
  } else if (c == '<') {
    dest[dp++] = '&'; dest[dp++] = 'l';
    dest[dp++] = 't'; dest[dp++] = ';';
  } else {
    dest[dp++] = c;
  }
  
  return dp;
}

int iskeyword(char *word, const char** keywords) {
  for (int i = 0 ; keywords[i] ; i++) {
    if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
      return 1;
    }
  }
  return 0;
}

int iscapsonly(char *word, size_t wp) {
  for (size_t i = 0 ; i < wp ; i++) {
    if (!isupper(word[i]) && word[i] != '_') {
      return 0;
    }
  }
  return 1;
}

void parseline(char *src, char *dest, highlighter_t *highlighter) {
  size_t sp = 0, dp = 0;
  /* indent */
  while (isspace(src[sp])) {
    dest[dp++] = src[sp++];
  }
  
  static char word[WORDBUF_SIZE];
  static char includefile[FILENAME_MAX];
  
  memset(word, 0, WORDBUF_SIZE);
  size_t wp = 0, ifp = 0;
  int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
  static int iscommentml;
  int isescaping = 0;
  
  if (iscommentml) {
    iscomment = 1;
    memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
    dp += 29;
  }

  for (char c = src[sp] ; c ; c=src[++sp]) {
    /* comments */
    if (c == '/') {
      if (iscommentml && sp > 0 && src[sp-1] == '*') {
        iscomment = 0;
        iscommentml = 0;
        memcpy(&(dest[dp]), "/</span>", 8);
        dp += 8;
        continue;
      } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
        iscomment = 1;
        iscommentml = (src[sp+1] == '*');
        memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
        dp += 29;
      }
    }
    
    if (iscomment) {
      if (c == '\n') {
        memcpy(&(dest[dp]), "</span>", 7);
        dp += 7;
      }
      dp = writeescapedchar(dest, dp, c);
    } else if (isinclude) {
      if (c == '<') {
        memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
        dp += 32;
        dp = writeescapedchar(dest, dp, c);
      } else if (c == '\"') {
        if (parseinclude) {
          dest[dp++] = '\"';
          dest[dp++] = '>';
          memcpy(&(dest[dp]), includefile, ifp);
          dp += ifp;
          
          dp = writeescapedchar(dest, dp, c);
          memcpy(&(dest[dp]), "</a>", 4);
          dp += 4;
          parseinclude = 0;
        } else {
          memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
          dp += 35;
          dp = writeescapedchar(dest, dp, c);
          ifp = 0;
          includefile[ifp++] = '\"';
          parseinclude = 1;
        }
      } else if (c == '>') {
        dp = writeescapedchar(dest, dp, c);
        memcpy(&(dest[dp]), "</span>", 7);
        dp += 7;
      } else {
        if (parseinclude) {
          includefile[ifp++] = c;
        }
        dp = writeescapedchar(dest, dp, c);
      }
    } else {
      /* strings */
      if (!isescaping && (c == '\'' || c == '\"')) {
        isstring ^= 1;
        if (isstring) {
          memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
          dp += 28;
          dp = writeescapedchar(dest, dp, c);
        } else {
          dp = writeescapedchar(dest, dp, c);
          memcpy(&(dest[dp]), "</span>", 7);
          dp += 7;
        }
      } else {
        if (isstring) {
          dp = writeescapedchar(dest, dp, c);
        } else if (!isalnum(c) && c!='_' && c!='#' && c!='.' && c!='@') {
          /* interpret word int_t */
          if (wp > 0 && wp < WORDBUF_SIZE) {
            int closespan = 1;
            if (iskeyword(word, highlighter->keywords)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
              dp += 29;
            } else if (highlighter->istype(word, wp)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
              dp += 26;
            } else if (highlighter->isdirective(word)) {
              isinclude = !strncmp("#include", word, WORDBUF_SIZE);
              memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
              dp += 31;
            } else if (iscapsonly(word, wp)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
              dp += 32;
            } else {
              closespan = 0;
            }
            for (int i = 0 ; i < wp ; i++) {
              dp = writeescapedchar(dest, dp, word[i]);
            }
            if (closespan) {
              memcpy(&(dest[dp]), "</span>", 7);
              dp += 7;
            }
          }
          memset(word, 0, WORDBUF_SIZE);
          wp = 0;
          dp = writeescapedchar(dest, dp, c);
        } else {
          /* read word */
          if (wp < WORDBUF_SIZE) {
            word[wp++] = c;
          } else if (wp == WORDBUF_SIZE) {
            for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
              dp = writeescapedchar(dest, dp, word[i]);
            }
            wp++;
            dp = writeescapedchar(dest, dp, c);
          } else {
            dp = writeescapedchar(dest, dp, c);
          }
        }
      }

      isescaping = !isescaping & (c == '\\');
    }
  }
  dest[dp] = 0;
}

void printhelp() {
  printf("Formats source code using HTML.\n\nUsage:\n"
      "  c2html [Options] FILE\n\n"
      " Options:\n"
      "  -h                    Prints this help message\n"
      "  -j                    Highlight Java instead of C source code\n"
      "  -o <output>           Output file (if not specified, stdout is used)\n"
      "  -p                    Disable highlighting (plain text)\n"
      "\n");
  
  
}

int lnint(size_t lnc) {
  int w = 1, p = 1;
  while ((p*=10) < lnc) w++;
  return w;
}

int main(int argc, char** argv) {
  
  settings_t settings;
  settings.outfilename = NULL;
  settings.highlight = 1;
  
  highlighter_t highlighter;
  highlighter.isdirective = iscdirective;
  highlighter.istype = isctype;
  highlighter.keywords = ckeywords;
  
  char optc;
  while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
    switch (optc) {
      case 'o':
        if (!(optarg[0] == '-' && optarg[1] == 0)) {
          settings.outfilename = optarg;
        }
        break;
      case 'j':
        highlighter.isdirective = isjdirective;
        highlighter.istype = isjtype;
        highlighter.keywords = jkeywords;
        break;
      case 'p':
        settings.highlight = 0;
        break;
      case 'h':
        printhelp();
        return 0;
      default:
        return 1;
    }
  }

  if (optind != argc-1) {
    printhelp();
    return 1;
  } else {
    settings.infilename = argv[optind];
    
    inputfile_t *inputfile = readinput(settings.infilename);
    if (inputfile) {
      FILE *fout;
      char *line;
      if (settings.highlight) {
        line = (char*) malloc(inputfile->maxlinewidth*64);
      } else {
        line = NULL;
      }
      if (settings.outfilename) {
        fout = fopen(settings.outfilename, "w");
      } else {
        fout = stdout;
      }
      fprintf(fout, "<pre>\n");
      int lnw = lnint(inputfile->count);
      for (int i = 0 ; i < inputfile->count ; i++) {
        if (settings.highlight) {
          parseline(inputfile->lines[i], line, &highlighter);
        } else {
          line = inputfile->lines[i];
        }
        fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
            lnw, i+1, line);
      }
      if (settings.highlight) {
        free(line);
      }
      fprintf(fout, "</pre>\n");
      
      if (fout != stdout) {
        fclose(fout);
      }
      
      freeinputfilebuffer(inputfile);
    }
  
    return 0;
  }
}

mercurial