src/c2html.c

Fri, 30 Aug 2013 11:23:44 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 30 Aug 2013 11:23:44 +0200
changeset 20
ebbf0776c1bc
parent 19
2e812df2b231
child 21
537aec525835
permissions
-rw-r--r--

replaced function static variables with struct members

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2013 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include <stdio.h>
    31 #include <stdlib.h>
    32 #include <string.h>
    33 #include <fcntl.h>
    34 #include <unistd.h>
    35 #include <ctype.h>
    37 #define INPUTBUF_SIZE 2048
    38 #define WORDBUF_SIZE 64
    40 const char* ckeywords[] = {
    41     "auto", "break", "case", "char", "const", "continue", "default", "do",
    42     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
    43     "long", "register", "return", "short", "signed", "sizeof", "static",
    44     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    45     "while", NULL
    46 };
    48 const char* jkeywords[] = {
    49     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
    50     "package", "synchronized", "boolean", "do", "if", "private", "this",
    51     "break", "double", "implements", "protected", "throw", "byte", "else",
    52     "import", "public", "throws", "case", "enum", "instanceof", "return",
    53     "transient", "catch", "extends", "int", "short", "try", "char", "final",
    54     "interface", "static", "void", "class", "finally", "long", "strictfp",
    55     "volatile", "const", "float", "native", "super", "while", NULL
    56 };
    58 #define iswordcharacter(c) (isalnum(c) || c=='_' || c=='#' || c=='@')
    60 int isctype(char *word, size_t len) {
    61     return (word[len-2] == '_' && word[len-1] == 't');
    62 }
    64 int iscdirective(char *word) {
    65     return (word[0] == '#');
    66 }
    68 int isjtype(char *word, size_t len) {
    69     return isupper(word[0]);
    70 }
    72 int isjdirective(char *word) {
    73     return word[0] == '@';
    74 }
    76 typedef struct _highlighter_t highlighter_t;
    78 struct _highlighter_t {
    79     const char** keywords;
    80     int(*istype)(char*,size_t);
    81     int(*isdirective)(char*);
    82     void(*parser)(char*,char*,highlighter_t*);
    83     int iscommentml;
    84     char word[WORDBUF_SIZE];
    85     char includefile[FILENAME_MAX];
    86 };
    88 typedef struct {
    89     char* outfilename;
    90     char* infilename;
    91     int highlight;
    92 } settings_t;
    94 typedef struct {
    95     size_t count;
    96     size_t capacity;
    97     size_t maxlinewidth;
    98     char** lines;
    99 } inputfile_t;
   101 inputfile_t *inputfilebuffer(size_t capacity) {
   102     inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
   103     inputfile->lines = (char**) malloc(capacity * sizeof(char*));
   104     inputfile->capacity = capacity;
   105     inputfile->count = 0;
   106     inputfile->maxlinewidth = 0;
   108     return inputfile;
   109 }
   111 void addline(inputfile_t *inputfile, char* line, size_t width) {
   112     char *l = (char*) malloc(width+1);
   113     memcpy(l, line, width);
   114     l[width] = 0;
   115     if (inputfile->count >= inputfile->capacity) {
   116         inputfile->capacity <<= 1;
   117         inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
   118     }
   119     inputfile->lines[inputfile->count] = l;
   120     inputfile->maxlinewidth =
   121         width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
   122     inputfile->count++;
   123 }
   125 void freeinputfilebuffer(inputfile_t *inputfile) {
   126     for (int i = 0 ; i < inputfile->count ; i++) {
   127         free(inputfile->lines[i]);
   128     }
   129     free(inputfile->lines);
   130     free(inputfile);
   131 }
   133 inputfile_t *readinput(char *filename) {
   135     int fd = open(filename, O_RDONLY);
   136     if (fd == -1) return NULL;
   138     inputfile_t *inputfile = inputfilebuffer(512);
   140     char buf[INPUTBUF_SIZE];
   141     ssize_t r;
   143     size_t maxlinewidth = 256;
   144     char *line = (char*) malloc(maxlinewidth);
   145     size_t col = 0;
   147     while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
   148         for (size_t i = 0 ; i < r ; i++) {
   149             if (col >= maxlinewidth-4) {
   150                 maxlinewidth <<= 1;
   151                 line = realloc(line, maxlinewidth);
   152             }
   154             if (buf[i] == '\n') {
   155                 line[col++] = '\n';
   156                 line[col] = 0;
   157                 addline(inputfile, line, col);
   158                 col = 0;
   159             } else {
   160                 line[col++] = buf[i];
   161             }
   162         }
   163     }
   165     free(line);
   167     close(fd);
   169     return inputfile;
   170 }
   172 size_t writeescapedchar(char *dest, size_t dp, char c) {
   173     if (c == '>') {
   174         dest[dp++] = '&'; dest[dp++] = 'g'; dest[dp++] = 't'; dest[dp++] = ';';
   175     } else if (c == '<') {
   176         dest[dp++] = '&'; dest[dp++] = 'l'; dest[dp++] = 't'; dest[dp++] = ';';
   177     } else {
   178         dest[dp++] = c;
   179     }
   181     return dp;
   182 }
   184 int iskeyword(char *word, const char** keywords) {
   185     for (int i = 0 ; keywords[i] ; i++) {
   186         if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
   187             return 1;
   188         }
   189     }
   190     return 0;
   191 }
   193 int iscapsonly(char *word, size_t wp) {
   194     for (size_t i = 0 ; i < wp ; i++) {
   195         if (!isupper(word[i]) && word[i] != '_') {
   196             return 0;
   197         }
   198     }
   199     return 1;
   200 }
   202 void parseline(char *src, char *dest, highlighter_t *hltr) {
   203     hltr->parser(src, dest, hltr);
   204 }
   206 void cjparseline(char *src, char *dest, highlighter_t *hltr) {
   207     size_t sp = 0, dp = 0;
   208     /* indent */
   209     while (isspace(src[sp])) {
   210         dest[dp++] = src[sp++];
   211     }
   213     memset(hltr->word, 0, WORDBUF_SIZE);
   214     size_t wp = 0, ifp = 0;
   215     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   216     int isescaping = 0;
   218     if (hltr->iscommentml) {
   219         iscomment = 1;
   220         memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   221         dp += 29;
   222     }
   224     for (char c = src[sp] ; c ; c=src[++sp]) {
   225         /* comments */
   226         if (c == '/') {
   227             if (hltr->iscommentml && sp > 0 && src[sp-1] == '*') {
   228                 iscomment = 0;
   229                 hltr->iscommentml = 0;
   230                 memcpy(&(dest[dp]), "/</span>", 8);
   231                 dp += 8;
   232                 continue;
   233             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   234                 iscomment = 1;
   235                 hltr->iscommentml = (src[sp+1] == '*');
   236                 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   237                 dp += 29;
   238             }
   239         }
   241         if (iscomment) {
   242             if (c == '\n') {
   243                 memcpy(&(dest[dp]), "</span>", 7);
   244                 dp += 7;
   245             }
   246             dp = writeescapedchar(dest, dp, c);
   247         } else if (isinclude) {
   248             if (c == '<') {
   249                 memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
   250                 dp += 32;
   251                 dp = writeescapedchar(dest, dp, c);
   252             } else if (c == '\"') {
   253                 if (parseinclude) {
   254                     dest[dp++] = '\"';
   255                     dest[dp++] = '>';
   256                     memcpy(&(dest[dp]), hltr->includefile, ifp);
   257                     dp += ifp;
   259                     dp = writeescapedchar(dest, dp, c);
   260                     memcpy(&(dest[dp]), "</a>", 4);
   261                     dp += 4;
   262                     parseinclude = 0;
   263                 } else {
   264                     memcpy(&(dest[dp]),
   265                         "<a class=\"c2html-userinclude\" href=", 35);
   266                     dp += 35;
   267                     dp = writeescapedchar(dest, dp, c);
   268                     ifp = 0;
   269                     hltr->includefile[ifp++] = '\"';
   270                     parseinclude = 1;
   271                 }
   272             } else if (c == '>') {
   273                 dp = writeescapedchar(dest, dp, c);
   274                 memcpy(&(dest[dp]), "</span>", 7);
   275                 dp += 7;
   276             } else {
   277                 if (parseinclude) {
   278                     hltr->includefile[ifp++] = c;
   279                 }
   280                 dp = writeescapedchar(dest, dp, c);
   281             }
   282         } else {
   283             /* strings */
   284             if (!isescaping && (c == '\'' || c == '\"')) {
   285                 isstring ^= 1;
   286                 if (isstring) {
   287                     memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
   288                     dp += 28;
   289                     dp = writeescapedchar(dest, dp, c);
   290                 } else {
   291                     dp = writeescapedchar(dest, dp, c);
   292                     memcpy(&(dest[dp]), "</span>", 7);
   293                     dp += 7;
   294                 }
   295             } else {
   296                 if (isstring) {
   297                     dp = writeescapedchar(dest, dp, c);
   298                 } else if (!iswordcharacter(c)) {
   299                     /* interpret word int_t */
   300                     if (wp > 0 && wp < WORDBUF_SIZE) {
   301                         int closespan = 1;
   302                         if (iskeyword(hltr->word, hltr->keywords)) {
   303                             memcpy(&(dest[dp]),
   304                                 "<span class=\"c2html-keyword\">", 29);
   305                             dp += 29;
   306                         } else if (hltr->istype(hltr->word, wp)) {
   307                             memcpy(&(dest[dp]),
   308                                 "<span class=\"c2html-type\">", 26);
   309                             dp += 26;
   310                         } else if (hltr->isdirective(hltr->word)) {
   311                             isinclude = !strncmp(
   312                                 "#include", hltr->word, WORDBUF_SIZE);
   313                             memcpy(&(dest[dp]),
   314                                 "<span class=\"c2html-directive\">", 31);
   315                             dp += 31;
   316                         } else if (iscapsonly(hltr->word, wp)) {
   317                             memcpy(&(dest[dp]),
   318                                 "<span class=\"c2html-macroconst\">", 32);
   319                             dp += 32;
   320                         } else {
   321                             closespan = 0;
   322                         }
   323                         for (int i = 0 ; i < wp ; i++) {
   324                             dp = writeescapedchar(dest, dp, hltr->word[i]);
   325                         }
   326                         if (closespan) {
   327                             memcpy(&(dest[dp]), "</span>", 7);
   328                             dp += 7;
   329                         }
   330                     }
   331                     memset(hltr->word, 0, WORDBUF_SIZE);
   332                     wp = 0;
   333                     dp = writeescapedchar(dest, dp, c);
   334                 } else {
   335                     /* read word */
   336                     if (wp < WORDBUF_SIZE) {
   337                         hltr->word[wp++] = c;
   338                     } else if (wp == WORDBUF_SIZE) {
   339                         for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
   340                             dp = writeescapedchar(dest, dp, hltr->word[i]);
   341                         }
   342                         wp++;
   343                         dp = writeescapedchar(dest, dp, c);
   344                     } else {
   345                         dp = writeescapedchar(dest, dp, c);
   346                     }
   347                 }
   348             }
   350             isescaping = !isescaping & (c == '\\');
   351         }
   352     }
   353     dest[dp] = 0;
   354 }
   356 void printhelp() {
   357     printf("Formats source code using HTML.\n\nUsage:\n"
   358         "  c2html [Options] FILE\n\n"
   359         " Options:\n"
   360         "  -h                    Prints this help message\n"
   361         "  -j                    Highlight Java instead of C source code\n"
   362         "  -o <output>           Output file (stdout, if not specified)\n"
   363         "  -p                    Disable highlighting (plain text)\n"
   364         "\n");
   367 }
   369 int lnint(size_t lnc) {
   370     int w = 1, p = 1;
   371     while ((p*=10) < lnc) w++;
   372     return w;
   373 }
   375 int main(int argc, char** argv) {
   376     settings_t settings;
   377     settings.outfilename = NULL;
   378     settings.highlight = 1;
   380     highlighter_t highlighter;
   381     memset(&highlighter, 0, sizeof(highlighter));
   382     highlighter.isdirective = iscdirective;
   383     highlighter.istype = isctype;
   384     highlighter.keywords = ckeywords;
   385     highlighter.parser = cjparseline;
   387     char optc;
   388     while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
   389         switch (optc) {
   390             case 'o':
   391                 if (!(optarg[0] == '-' && optarg[1] == 0)) {
   392                     settings.outfilename = optarg;
   393                 }
   394                 break;
   395             case 'j':
   396                 highlighter.isdirective = isjdirective;
   397                 highlighter.istype = isjtype;
   398                 highlighter.keywords = jkeywords;
   399                 break;
   400             case 'p':
   401                 settings.highlight = 0;
   402                 break;
   403             case 'h':
   404                 printhelp();
   405                 return 0;
   406             default:
   407                 return 1;
   408         }
   409     }
   411     if (optind != argc-1) {
   412         printhelp();
   413         return 1;
   414     } else {
   415         settings.infilename = argv[optind];
   417         inputfile_t *inputfile = readinput(settings.infilename);
   418         if (inputfile) {
   419             FILE *fout;
   420             char *line;
   421             if (settings.highlight) {
   422                 line = (char*) malloc(inputfile->maxlinewidth*64);
   423             } else {
   424                 line = NULL;
   425             }
   426             if (settings.outfilename) {
   427                 fout = fopen(settings.outfilename, "w");
   428             } else {
   429                 fout = stdout;
   430             }
   431             fprintf(fout, "<pre>\n");
   432             int lnw = lnint(inputfile->count);
   433             for (int i = 0 ; i < inputfile->count ; i++) {
   434                 if (settings.highlight) {
   435                     parseline(inputfile->lines[i], line, &highlighter);
   436                 } else {
   437                     line = inputfile->lines[i];
   438                 }
   439                 fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
   440                     lnw, i+1, line);
   441             }
   442             if (settings.highlight) {
   443                 free(line);
   444             }
   445             fprintf(fout, "</pre>\n");
   447             if (fout != stdout) {
   448                 fclose(fout);
   449             }
   451             freeinputfilebuffer(inputfile);
   452         }
   454         return 0;
   455     }
   456 }

mercurial