Wed, 10 Jul 2013 17:57:03 +0200
implemented java highlighting
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2013 Mike Becker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <unistd.h> #include <ctype.h> #define INPUTBUF_SIZE 2048 #define WORDBUF_SIZE 16 const char* ckeywords[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL }; const char* jkeywords[] = { "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", "package", "synchronized", "boolean", "do", "if", "private", "this", "break", "double", "implements", "protected", "throw", "byte", "else", "import", "public", "throws", "case", "enum", "instanceof", "return", "transient", "catch", "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class", "finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while", NULL }; int isctype(char *word, size_t len) { return (word[len-2] == '_' && word[len-1] == 't'); } int iscdirective(char *word) { return (word[0] == '#'); } int isjtype(char *word, size_t len) { return isupper(word[0]); } int isjdirective(char *word) { return word[0] == '@'; } typedef struct { const char** keywords; int(*istype)(char*,size_t); int(*isdirective)(char*); } highlighter_t; typedef struct { char* outfilename; char* infilename; int highlight; } settings_t; typedef struct { size_t count; size_t capacity; size_t maxlinewidth; char** lines; } inputfile_t; inputfile_t *inputfilebuffer(size_t capacity) { inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t)); inputfile->lines = (char**) malloc(capacity * sizeof(char*)); inputfile->capacity = capacity; inputfile->count = 0; inputfile->maxlinewidth = 0; return inputfile; } void addline(inputfile_t *inputfile, char* line, size_t width) { char *l = (char*) malloc(width+1); memcpy(l, line, width); l[width] = 0; if (inputfile->count >= inputfile->capacity) { inputfile->capacity <<= 1; inputfile->lines = realloc(inputfile->lines, inputfile->capacity); } inputfile->lines[inputfile->count] = l; inputfile->maxlinewidth = width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth; inputfile->count++; } void freeinputfilebuffer(inputfile_t *inputfile) { for (int i = 0 ; i < inputfile->count ; i++) { free(inputfile->lines[i]); } free(inputfile->lines); free(inputfile); } inputfile_t *readinput(char *filename) { int fd = open(filename, O_RDONLY); if (fd == -1) return NULL; inputfile_t *inputfile = inputfilebuffer(512); char buf[INPUTBUF_SIZE]; ssize_t r; size_t maxlinewidth = 256; char *line = (char*) malloc(maxlinewidth); size_t col = 0; while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) { for (size_t i = 0 ; i < r ; i++) { if (col >= maxlinewidth-4) { maxlinewidth <<= 1; line = realloc(line, maxlinewidth); } if (buf[i] == '\n') { line[col++] = '\n'; line[col] = 0; addline(inputfile, line, col); col = 0; } else { line[col++] = buf[i]; } } } free(line); close(fd); return inputfile; } size_t writeescapedchar(char *dest, size_t dp, char c) { if (c == '>') { dest[dp++] = '&'; dest[dp++] = 'g'; dest[dp++] = 't'; dest[dp++] = ';'; } else if (c == '<') { dest[dp++] = '&'; dest[dp++] = 'l'; dest[dp++] = 't'; dest[dp++] = ';'; } else { dest[dp++] = c; } return dp; } int iskeyword(char *word, const char** keywords) { for (int i = 0 ; keywords[i] ; i++) { if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) { return 1; } } return 0; } int iscapsonly(char *word, size_t wp) { for (size_t i = 0 ; i < wp ; i++) { if (!isupper(word[i]) && word[i] != '_') { return 0; } } return 1; } void parseline(char *src, char *dest, highlighter_t *highlighter) { size_t sp = 0, dp = 0; /* indent */ while (isspace(src[sp])) { dest[dp++] = src[sp++]; } static char word[WORDBUF_SIZE]; static char includefile[FILENAME_MAX]; memset(word, 0, WORDBUF_SIZE); size_t wp = 0, ifp = 0; int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; static int iscommentml; int isescaping = 0; if (iscommentml) { iscomment = 1; memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); dp += 29; } for (char c = src[sp] ; c ; c=src[++sp]) { /* comments */ if (c == '/') { if (iscommentml && sp > 0 && src[sp-1] == '*') { iscomment = 0; iscommentml = 0; memcpy(&(dest[dp]), "/</span>", 8); dp += 8; continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; iscommentml = (src[sp+1] == '*'); memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); dp += 29; } } if (iscomment) { if (c == '\n') { memcpy(&(dest[dp]), "</span>", 7); dp += 7; } dp = writeescapedchar(dest, dp, c); } else if (isinclude) { if (c == '<') { memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32); dp += 32; dp = writeescapedchar(dest, dp, c); } else if (c == '\"') { if (parseinclude) { dest[dp++] = '\"'; dest[dp++] = '>'; memcpy(&(dest[dp]), includefile, ifp); dp += ifp; dp = writeescapedchar(dest, dp, c); memcpy(&(dest[dp]), "</a>", 4); dp += 4; parseinclude = 0; } else { memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35); dp += 35; dp = writeescapedchar(dest, dp, c); ifp = 0; includefile[ifp++] = '\"'; parseinclude = 1; } } else if (c == '>') { dp = writeescapedchar(dest, dp, c); memcpy(&(dest[dp]), "</span>", 7); dp += 7; } else { if (parseinclude) { includefile[ifp++] = c; } dp = writeescapedchar(dest, dp, c); } } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { isstring ^= 1; if (isstring) { memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28); dp += 28; dp = writeescapedchar(dest, dp, c); } else { dp = writeescapedchar(dest, dp, c); memcpy(&(dest[dp]), "</span>", 7); dp += 7; } } else { if (isstring) { dp = writeescapedchar(dest, dp, c); } else if (!isalnum(c) && c!='_' && c!='#' && c!='.' && c!='@') { /* interpret word int_t */ if (wp > 0 && wp < WORDBUF_SIZE) { int closespan = 1; if (iskeyword(word, highlighter->keywords)) { memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29); dp += 29; } else if (highlighter->istype(word, wp)) { memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26); dp += 26; } else if (highlighter->isdirective(word)) { isinclude = !strncmp("#include", word, WORDBUF_SIZE); memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31); dp += 31; } else if (iscapsonly(word, wp)) { memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32); dp += 32; } else { closespan = 0; } for (int i = 0 ; i < wp ; i++) { dp = writeescapedchar(dest, dp, word[i]); } if (closespan) { memcpy(&(dest[dp]), "</span>", 7); dp += 7; } } memset(word, 0, WORDBUF_SIZE); wp = 0; dp = writeescapedchar(dest, dp, c); } else { /* read word */ if (wp < WORDBUF_SIZE) { word[wp++] = c; } else if (wp == WORDBUF_SIZE) { for (int i = 0 ; i < WORDBUF_SIZE ; i++) { dp = writeescapedchar(dest, dp, word[i]); } wp++; dp = writeescapedchar(dest, dp, c); } else { dp = writeescapedchar(dest, dp, c); } } } isescaping = !isescaping & (c == '\\'); } } dest[dp] = 0; } void printhelp() { printf("Formats source code using HTML.\n\nUsage:\n" " c2html [Options] FILE\n\n" " Options:\n" " -h Prints this help message\n" " -j Highlight Java instead of C source code\n" " -o <output> Output file (if not specified, stdout is used)\n" " -p Disable highlighting (plain text)\n" "\n"); } int lnint(size_t lnc) { int w = 1, p = 1; while ((p*=10) < lnc) w++; return w; } int main(int argc, char** argv) { settings_t settings; settings.outfilename = NULL; settings.highlight = 1; highlighter_t highlighter; highlighter.isdirective = iscdirective; highlighter.istype = isctype; highlighter.keywords = ckeywords; char optc; while ((optc = getopt(argc, argv, "hjo:p")) != -1) { switch (optc) { case 'o': if (!(optarg[0] == '-' && optarg[1] == 0)) { settings.outfilename = optarg; } break; case 'j': highlighter.isdirective = isjdirective; highlighter.istype = isjtype; highlighter.keywords = jkeywords; break; case 'p': settings.highlight = 0; break; case 'h': printhelp(); return 0; default: return 1; } } if (optind != argc-1) { printhelp(); return 1; } else { settings.infilename = argv[optind]; inputfile_t *inputfile = readinput(settings.infilename); if (inputfile) { FILE *fout; char *line; if (settings.highlight) { line = (char*) malloc(inputfile->maxlinewidth*64); } else { line = NULL; } if (settings.outfilename) { fout = fopen(settings.outfilename, "w"); } else { fout = stdout; } fprintf(fout, "<pre>\n"); int lnw = lnint(inputfile->count); for (int i = 0 ; i < inputfile->count ; i++) { if (settings.highlight) { parseline(inputfile->lines[i], line, &highlighter); } else { line = inputfile->lines[i]; } fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s", lnw, i+1, line); } if (settings.highlight) { free(line); } fprintf(fout, "</pre>\n"); if (fout != stdout) { fclose(fout); } freeinputfilebuffer(inputfile); } return 0; } }