src/c2html.c

Wed, 10 Jul 2013 18:12:13 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 10 Jul 2013 18:12:13 +0200
changeset 18
5085b57e3fd6
parent 17
7ea86024aef0
child 19
2e812df2b231
permissions
-rw-r--r--

fixed highlighting for java

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2013 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include <stdio.h>
    31 #include <stdlib.h>
    32 #include <string.h>
    33 #include <fcntl.h>
    34 #include <unistd.h>
    35 #include <ctype.h>
    37 #define INPUTBUF_SIZE 2048
    38 #define WORDBUF_SIZE 64
    40 const char* ckeywords[] = {
    41   "auto", "break", "case", "char", "const", "continue", "default", "do", 
    42   "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 
    43   "long", "register", "return", "short", "signed", "sizeof", "static",
    44   "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    45   "while", NULL
    46 };
    48 const char* jkeywords[] = {
    49   "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
    50   "package", "synchronized", "boolean", "do", "if", "private", "this",
    51   "break", "double", "implements", "protected", "throw", "byte", "else",
    52   "import", "public", "throws", "case", "enum", "instanceof", "return",
    53   "transient", "catch", "extends", "int", "short", "try", "char", "final",
    54   "interface", "static", "void", "class", "finally", "long", "strictfp",
    55   "volatile", "const", "float", "native", "super", "while", NULL
    56 };
    58 #define iswordcharacter(c) (isalnum(c) || c=='_' || c=='#' || c=='@')
    60 int isctype(char *word, size_t len) {
    61   return (word[len-2] == '_' && word[len-1] == 't');
    62 }
    64 int iscdirective(char *word) {
    65   return (word[0] == '#');
    66 }
    68 int isjtype(char *word, size_t len) {
    69   return isupper(word[0]);
    70 }
    72 int isjdirective(char *word) {
    73   return word[0] == '@';
    74 }
    76 typedef struct {
    77   const char** keywords;
    78   int(*istype)(char*,size_t);
    79   int(*isdirective)(char*);
    80 } highlighter_t;
    82 typedef struct {
    83   char* outfilename;
    84   char* infilename;
    85   int highlight;
    86 } settings_t;
    88 typedef struct {
    89   size_t count;
    90   size_t capacity;
    91   size_t maxlinewidth;
    92   char** lines;
    93 } inputfile_t;
    95 inputfile_t *inputfilebuffer(size_t capacity) {
    96   inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
    97   inputfile->lines = (char**) malloc(capacity * sizeof(char*));
    98   inputfile->capacity = capacity;
    99   inputfile->count = 0;
   100   inputfile->maxlinewidth = 0;
   102   return inputfile;
   103 }
   105 void addline(inputfile_t *inputfile, char* line, size_t width) {
   106   char *l = (char*) malloc(width+1);
   107   memcpy(l, line, width);
   108   l[width] = 0;
   109   if (inputfile->count >= inputfile->capacity) {
   110     inputfile->capacity <<= 1;
   111     inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
   112   }
   113   inputfile->lines[inputfile->count] = l;
   114   inputfile->maxlinewidth =
   115           width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
   116   inputfile->count++;
   117 }
   119 void freeinputfilebuffer(inputfile_t *inputfile) {
   120   for (int i = 0 ; i < inputfile->count ; i++) {
   121     free(inputfile->lines[i]);
   122   }
   123   free(inputfile->lines);
   124   free(inputfile);
   125 }
   127 inputfile_t *readinput(char *filename) {
   129   int fd = open(filename, O_RDONLY);
   130   if (fd == -1) return NULL;
   132   inputfile_t *inputfile = inputfilebuffer(512);
   134   char buf[INPUTBUF_SIZE];
   135   ssize_t r;
   137   size_t maxlinewidth = 256;
   138   char *line = (char*) malloc(maxlinewidth);
   139   size_t col = 0;
   141   while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
   142     for (size_t i = 0 ; i < r ; i++) {
   143       if (col >= maxlinewidth-4) {
   144         maxlinewidth <<= 1;
   145         line = realloc(line, maxlinewidth);
   146       }
   148       if (buf[i] == '\n') {
   149         line[col++] = '\n';
   150         line[col] = 0;
   151         addline(inputfile, line, col);        
   152         col = 0;
   153       } else {
   154         line[col++] = buf[i];
   155       }
   156     }
   157   }
   159   free(line);
   161   close(fd);
   163   return inputfile;
   164 }
   166 size_t writeescapedchar(char *dest, size_t dp, char c) {
   167   if (c == '>') {
   168     dest[dp++] = '&'; dest[dp++] = 'g';
   169     dest[dp++] = 't'; dest[dp++] = ';';
   170   } else if (c == '<') {
   171     dest[dp++] = '&'; dest[dp++] = 'l';
   172     dest[dp++] = 't'; dest[dp++] = ';';
   173   } else {
   174     dest[dp++] = c;
   175   }
   177   return dp;
   178 }
   180 int iskeyword(char *word, const char** keywords) {
   181   for (int i = 0 ; keywords[i] ; i++) {
   182     if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
   183       return 1;
   184     }
   185   }
   186   return 0;
   187 }
   189 int iscapsonly(char *word, size_t wp) {
   190   for (size_t i = 0 ; i < wp ; i++) {
   191     if (!isupper(word[i]) && word[i] != '_') {
   192       return 0;
   193     }
   194   }
   195   return 1;
   196 }
   198 void parseline(char *src, char *dest, highlighter_t *highlighter) {
   199   size_t sp = 0, dp = 0;
   200   /* indent */
   201   while (isspace(src[sp])) {
   202     dest[dp++] = src[sp++];
   203   }
   205   static char word[WORDBUF_SIZE];
   206   static char includefile[FILENAME_MAX];
   208   memset(word, 0, WORDBUF_SIZE);
   209   size_t wp = 0, ifp = 0;
   210   int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   211   static int iscommentml;
   212   int isescaping = 0;
   214   if (iscommentml) {
   215     iscomment = 1;
   216     memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   217     dp += 29;
   218   }
   220   for (char c = src[sp] ; c ; c=src[++sp]) {
   221     /* comments */
   222     if (c == '/') {
   223       if (iscommentml && sp > 0 && src[sp-1] == '*') {
   224         iscomment = 0;
   225         iscommentml = 0;
   226         memcpy(&(dest[dp]), "/</span>", 8);
   227         dp += 8;
   228         continue;
   229       } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   230         iscomment = 1;
   231         iscommentml = (src[sp+1] == '*');
   232         memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   233         dp += 29;
   234       }
   235     }
   237     if (iscomment) {
   238       if (c == '\n') {
   239         memcpy(&(dest[dp]), "</span>", 7);
   240         dp += 7;
   241       }
   242       dp = writeescapedchar(dest, dp, c);
   243     } else if (isinclude) {
   244       if (c == '<') {
   245         memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
   246         dp += 32;
   247         dp = writeescapedchar(dest, dp, c);
   248       } else if (c == '\"') {
   249         if (parseinclude) {
   250           dest[dp++] = '\"';
   251           dest[dp++] = '>';
   252           memcpy(&(dest[dp]), includefile, ifp);
   253           dp += ifp;
   255           dp = writeescapedchar(dest, dp, c);
   256           memcpy(&(dest[dp]), "</a>", 4);
   257           dp += 4;
   258           parseinclude = 0;
   259         } else {
   260           memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
   261           dp += 35;
   262           dp = writeescapedchar(dest, dp, c);
   263           ifp = 0;
   264           includefile[ifp++] = '\"';
   265           parseinclude = 1;
   266         }
   267       } else if (c == '>') {
   268         dp = writeescapedchar(dest, dp, c);
   269         memcpy(&(dest[dp]), "</span>", 7);
   270         dp += 7;
   271       } else {
   272         if (parseinclude) {
   273           includefile[ifp++] = c;
   274         }
   275         dp = writeescapedchar(dest, dp, c);
   276       }
   277     } else {
   278       /* strings */
   279       if (!isescaping && (c == '\'' || c == '\"')) {
   280         isstring ^= 1;
   281         if (isstring) {
   282           memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
   283           dp += 28;
   284           dp = writeescapedchar(dest, dp, c);
   285         } else {
   286           dp = writeescapedchar(dest, dp, c);
   287           memcpy(&(dest[dp]), "</span>", 7);
   288           dp += 7;
   289         }
   290       } else {
   291         if (isstring) {
   292           dp = writeescapedchar(dest, dp, c);
   293         } else if (!iswordcharacter(c)) {
   294           /* interpret word int_t */
   295           if (wp > 0 && wp < WORDBUF_SIZE) {
   296             int closespan = 1;
   297             if (iskeyword(word, highlighter->keywords)) {
   298               memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
   299               dp += 29;
   300             } else if (highlighter->istype(word, wp)) {
   301               memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
   302               dp += 26;
   303             } else if (highlighter->isdirective(word)) {
   304               isinclude = !strncmp("#include", word, WORDBUF_SIZE);
   305               memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
   306               dp += 31;
   307             } else if (iscapsonly(word, wp)) {
   308               memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
   309               dp += 32;
   310             } else {
   311               closespan = 0;
   312             }
   313             for (int i = 0 ; i < wp ; i++) {
   314               dp = writeescapedchar(dest, dp, word[i]);
   315             }
   316             if (closespan) {
   317               memcpy(&(dest[dp]), "</span>", 7);
   318               dp += 7;
   319             }
   320           }
   321           memset(word, 0, WORDBUF_SIZE);
   322           wp = 0;
   323           dp = writeescapedchar(dest, dp, c);
   324         } else {
   325           /* read word */
   326           if (wp < WORDBUF_SIZE) {
   327             word[wp++] = c;
   328           } else if (wp == WORDBUF_SIZE) {
   329             for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
   330               dp = writeescapedchar(dest, dp, word[i]);
   331             }
   332             wp++;
   333             dp = writeescapedchar(dest, dp, c);
   334           } else {
   335             dp = writeescapedchar(dest, dp, c);
   336           }
   337         }
   338       }
   340       isescaping = !isescaping & (c == '\\');
   341     }
   342   }
   343   dest[dp] = 0;
   344 }
   346 void printhelp() {
   347   printf("Formats source code using HTML.\n\nUsage:\n"
   348       "  c2html [Options] FILE\n\n"
   349       " Options:\n"
   350       "  -h                    Prints this help message\n"
   351       "  -j                    Highlight Java instead of C source code\n"
   352       "  -o <output>           Output file (if not specified, stdout is used)\n"
   353       "  -p                    Disable highlighting (plain text)\n"
   354       "\n");
   357 }
   359 int lnint(size_t lnc) {
   360   int w = 1, p = 1;
   361   while ((p*=10) < lnc) w++;
   362   return w;
   363 }
   365 int main(int argc, char** argv) {
   367   settings_t settings;
   368   settings.outfilename = NULL;
   369   settings.highlight = 1;
   371   highlighter_t highlighter;
   372   highlighter.isdirective = iscdirective;
   373   highlighter.istype = isctype;
   374   highlighter.keywords = ckeywords;
   376   char optc;
   377   while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
   378     switch (optc) {
   379       case 'o':
   380         if (!(optarg[0] == '-' && optarg[1] == 0)) {
   381           settings.outfilename = optarg;
   382         }
   383         break;
   384       case 'j':
   385         highlighter.isdirective = isjdirective;
   386         highlighter.istype = isjtype;
   387         highlighter.keywords = jkeywords;
   388         break;
   389       case 'p':
   390         settings.highlight = 0;
   391         break;
   392       case 'h':
   393         printhelp();
   394         return 0;
   395       default:
   396         return 1;
   397     }
   398   }
   400   if (optind != argc-1) {
   401     printhelp();
   402     return 1;
   403   } else {
   404     settings.infilename = argv[optind];
   406     inputfile_t *inputfile = readinput(settings.infilename);
   407     if (inputfile) {
   408       FILE *fout;
   409       char *line;
   410       if (settings.highlight) {
   411         line = (char*) malloc(inputfile->maxlinewidth*64);
   412       } else {
   413         line = NULL;
   414       }
   415       if (settings.outfilename) {
   416         fout = fopen(settings.outfilename, "w");
   417       } else {
   418         fout = stdout;
   419       }
   420       fprintf(fout, "<pre>\n");
   421       int lnw = lnint(inputfile->count);
   422       for (int i = 0 ; i < inputfile->count ; i++) {
   423         if (settings.highlight) {
   424           parseline(inputfile->lines[i], line, &highlighter);
   425         } else {
   426           line = inputfile->lines[i];
   427         }
   428         fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
   429             lnw, i+1, line);
   430       }
   431       if (settings.highlight) {
   432         free(line);
   433       }
   434       fprintf(fout, "</pre>\n");
   436       if (fout != stdout) {
   437         fclose(fout);
   438       }
   440       freeinputfilebuffer(inputfile);
   441     }
   443     return 0;
   444   }
   445 }

mercurial