Fri, 21 Jun 2013 12:27:45 +0200
comments
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2013 Mike Becker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <unistd.h> #include <ctype.h> #define INPUTBUF_SIZE 2048 #define WORDBUF_SIZE 16 #define istype(word, len) (word[len-2] == '_' && word[len-1] == 't') #define isdirective(word) (word[0] == '#') const char* keywords[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL }; typedef struct { size_t count; size_t capacity; size_t maxlinewidth; char** lines; } inputfile_t; inputfile_t *inputfilebuffer(size_t capacity) { inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t)); inputfile->lines = (char**) malloc(capacity * sizeof(char*)); inputfile->capacity = capacity; inputfile->count = 0; inputfile->maxlinewidth = 0; return inputfile; } void addline(inputfile_t *inputfile, char* line, size_t width) { char *l = (char*) malloc(width+1); memcpy(l, line, width); l[width] = 0; if (inputfile->count >= inputfile->capacity) { inputfile->capacity <<= 1; inputfile->lines = realloc(inputfile->lines, inputfile->capacity); } inputfile->lines[inputfile->count] = l; inputfile->maxlinewidth = width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth; inputfile->count++; } void freeinputfilebuffer(inputfile_t *inputfile) { for (int i = 0 ; i < inputfile->count ; i++) { free(inputfile->lines[i]); } free(inputfile->lines); free(inputfile); } inputfile_t *readinput(char *filename) { int fd = open(filename, O_RDONLY); if (fd == -1) return NULL; inputfile_t *inputfile = inputfilebuffer(512); char buf[INPUTBUF_SIZE]; ssize_t r; size_t maxlinewidth = 256; char *line = (char*) malloc(maxlinewidth); size_t col = 0; while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) { for (size_t i = 0 ; i < r ; i++) { if (col >= maxlinewidth-4) { maxlinewidth <<= 1; line = realloc(line, maxlinewidth); } if (buf[i] == '\n') { line[col++] = '\n'; line[col] = 0; addline(inputfile, line, col); col = 0; } else { line[col++] = buf[i]; } } } free(line); close(fd); return inputfile; } size_t writeescapedchar(char *dest, size_t dp, char c) { if (c == '>') { dest[dp++] = '&'; dest[dp++] = 'g'; dest[dp++] = 't'; dest[dp++] = ';'; } else if (c == '<') { dest[dp++] = '&'; dest[dp++] = 'l'; dest[dp++] = 't'; dest[dp++] = ';'; } else { dest[dp++] = c; } return dp; } int iskeyword(char *word) { for (int i = 0 ; keywords[i] ; i++) { if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) { return 1; } } return 0; } void parseline(char *src, char *dest) { size_t sp = 0, dp = 0; /* indent */ while (isspace(src[sp])) { dest[dp++] = src[sp++]; } char word[WORDBUF_SIZE]; memset(word, 0, WORDBUF_SIZE); size_t wp = 0; int isstring = 0, iscomment = 0; static int iscommentml; int isescaping = 0; if (iscommentml) { iscomment = 1; memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); dp += 29; } // a fucking /* comment */ /* again // / */ for (char c = src[sp] ; c ; c=src[++sp]) { /* comments */ if (c == '/') { if (iscommentml && sp > 0 && src[sp-1] == '*') { iscomment = 0; iscommentml = 0; memcpy(&(dest[dp]), "/</span>", 8); dp += 8; continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; iscommentml = (src[sp+1] == '*'); memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); dp += 29; } } if (iscomment) { if (c == '\n') { memcpy(&(dest[dp]), "</span>", 7); dp += 7; } dp = writeescapedchar(dest, dp, c); } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { isstring ^= 1; if (isstring) { memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28); dp += 28; dp = writeescapedchar(dest, dp, c); } else { dp = writeescapedchar(dest, dp, c); memcpy(&(dest[dp]), "</span>", 7); dp += 7; } } else { if (isstring) { dp = writeescapedchar(dest, dp, c); } else if (!isalnum(c) && c != '_' && c != '#') { /* interpret word int_t */ if (wp > 0 && wp < WORDBUF_SIZE) { int closespan = 1; if (iskeyword(word)) { memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29); dp += 29; } else if (istype(word, wp)) { memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26); dp += 26; } else if (isdirective(word)) { memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31); dp += 31; } else { closespan = 0; } for (int i = 0 ; i < wp ; i++) { dp = writeescapedchar(dest, dp, word[i]); } if (closespan) { memcpy(&(dest[dp]), "</span>", 7); dp += 7; } memset(word, 0, WORDBUF_SIZE); wp = 0; } dp = writeescapedchar(dest, dp, c); } else { /* read word */ if (wp < WORDBUF_SIZE) { word[wp++] = c; } else if (wp == WORDBUF_SIZE) { for (int i = 0 ; i < WORDBUF_SIZE ; i++) { dp = writeescapedchar(dest, dp, word[i]); } wp++; dp = writeescapedchar(dest, dp, c); } else { dp = writeescapedchar(dest, dp, c); } } } isescaping = !isescaping & (c == '\\'); } } dest[dp] = 0; } void printhelp() { printf("Formats source code using HTML.\n\nUsage:\n" " c2html [FILE...]" "\n"); } int lnint(size_t lnc) { int w = 1, p = 1; while ((p*=10) < lnc) w++; return w; } int main(int argc, char** argv) { if (argc == 1) { printhelp(); return 0; } else { inputfile_t *inputfile = readinput(argv[1]); if (inputfile) { printf("<pre>\n"); char *line = (char*) malloc(inputfile->maxlinewidth*64); int lnw = lnint(inputfile->count); for (int i = 0 ; i < inputfile->count ; i++) { parseline(inputfile->lines[i], line); printf("<span class=\"c2html-lineno\">%*d:</span> %s", lnw, i, line); } free(line); printf("</pre>\n"); freeinputfilebuffer(inputfile); } return 0; } }