src/c2html.c

Wed, 10 Jul 2013 17:57:03 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 10 Jul 2013 17:57:03 +0200
changeset 17
7ea86024aef0
parent 16
fa0bcd0444eb
child 18
5085b57e3fd6
permissions
-rw-r--r--

implemented java highlighting

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2013 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include <stdio.h>
    31 #include <stdlib.h>
    32 #include <string.h>
    33 #include <fcntl.h>
    34 #include <unistd.h>
    35 #include <ctype.h>
    37 #define INPUTBUF_SIZE 2048
    38 #define WORDBUF_SIZE 16
    40 const char* ckeywords[] = {
    41   "auto", "break", "case", "char", "const", "continue", "default", "do", 
    42   "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 
    43   "long", "register", "return", "short", "signed", "sizeof", "static",
    44   "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    45   "while", NULL
    46 };
    48 const char* jkeywords[] = {
    49   "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
    50   "package", "synchronized", "boolean", "do", "if", "private", "this",
    51   "break", "double", "implements", "protected", "throw", "byte", "else",
    52   "import", "public", "throws", "case", "enum", "instanceof", "return",
    53   "transient", "catch", "extends", "int", "short", "try", "char", "final",
    54   "interface", "static", "void", "class", "finally", "long", "strictfp",
    55   "volatile", "const", "float", "native", "super", "while", NULL
    56 };
    58 int isctype(char *word, size_t len) {
    59   return (word[len-2] == '_' && word[len-1] == 't');
    60 }
    62 int iscdirective(char *word) {
    63   return (word[0] == '#');
    64 }
    66 int isjtype(char *word, size_t len) {
    67   return isupper(word[0]);
    68 }
    70 int isjdirective(char *word) {
    71   return word[0] == '@';
    72 }
    74 typedef struct {
    75   const char** keywords;
    76   int(*istype)(char*,size_t);
    77   int(*isdirective)(char*);
    78 } highlighter_t;
    80 typedef struct {
    81   char* outfilename;
    82   char* infilename;
    83   int highlight;
    84 } settings_t;
    86 typedef struct {
    87   size_t count;
    88   size_t capacity;
    89   size_t maxlinewidth;
    90   char** lines;
    91 } inputfile_t;
    93 inputfile_t *inputfilebuffer(size_t capacity) {
    94   inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
    95   inputfile->lines = (char**) malloc(capacity * sizeof(char*));
    96   inputfile->capacity = capacity;
    97   inputfile->count = 0;
    98   inputfile->maxlinewidth = 0;
   100   return inputfile;
   101 }
   103 void addline(inputfile_t *inputfile, char* line, size_t width) {
   104   char *l = (char*) malloc(width+1);
   105   memcpy(l, line, width);
   106   l[width] = 0;
   107   if (inputfile->count >= inputfile->capacity) {
   108     inputfile->capacity <<= 1;
   109     inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
   110   }
   111   inputfile->lines[inputfile->count] = l;
   112   inputfile->maxlinewidth =
   113           width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
   114   inputfile->count++;
   115 }
   117 void freeinputfilebuffer(inputfile_t *inputfile) {
   118   for (int i = 0 ; i < inputfile->count ; i++) {
   119     free(inputfile->lines[i]);
   120   }
   121   free(inputfile->lines);
   122   free(inputfile);
   123 }
   125 inputfile_t *readinput(char *filename) {
   127   int fd = open(filename, O_RDONLY);
   128   if (fd == -1) return NULL;
   130   inputfile_t *inputfile = inputfilebuffer(512);
   132   char buf[INPUTBUF_SIZE];
   133   ssize_t r;
   135   size_t maxlinewidth = 256;
   136   char *line = (char*) malloc(maxlinewidth);
   137   size_t col = 0;
   139   while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
   140     for (size_t i = 0 ; i < r ; i++) {
   141       if (col >= maxlinewidth-4) {
   142         maxlinewidth <<= 1;
   143         line = realloc(line, maxlinewidth);
   144       }
   146       if (buf[i] == '\n') {
   147         line[col++] = '\n';
   148         line[col] = 0;
   149         addline(inputfile, line, col);        
   150         col = 0;
   151       } else {
   152         line[col++] = buf[i];
   153       }
   154     }
   155   }
   157   free(line);
   159   close(fd);
   161   return inputfile;
   162 }
   164 size_t writeescapedchar(char *dest, size_t dp, char c) {
   165   if (c == '>') {
   166     dest[dp++] = '&'; dest[dp++] = 'g';
   167     dest[dp++] = 't'; dest[dp++] = ';';
   168   } else if (c == '<') {
   169     dest[dp++] = '&'; dest[dp++] = 'l';
   170     dest[dp++] = 't'; dest[dp++] = ';';
   171   } else {
   172     dest[dp++] = c;
   173   }
   175   return dp;
   176 }
   178 int iskeyword(char *word, const char** keywords) {
   179   for (int i = 0 ; keywords[i] ; i++) {
   180     if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
   181       return 1;
   182     }
   183   }
   184   return 0;
   185 }
   187 int iscapsonly(char *word, size_t wp) {
   188   for (size_t i = 0 ; i < wp ; i++) {
   189     if (!isupper(word[i]) && word[i] != '_') {
   190       return 0;
   191     }
   192   }
   193   return 1;
   194 }
   196 void parseline(char *src, char *dest, highlighter_t *highlighter) {
   197   size_t sp = 0, dp = 0;
   198   /* indent */
   199   while (isspace(src[sp])) {
   200     dest[dp++] = src[sp++];
   201   }
   203   static char word[WORDBUF_SIZE];
   204   static char includefile[FILENAME_MAX];
   206   memset(word, 0, WORDBUF_SIZE);
   207   size_t wp = 0, ifp = 0;
   208   int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   209   static int iscommentml;
   210   int isescaping = 0;
   212   if (iscommentml) {
   213     iscomment = 1;
   214     memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   215     dp += 29;
   216   }
   218   for (char c = src[sp] ; c ; c=src[++sp]) {
   219     /* comments */
   220     if (c == '/') {
   221       if (iscommentml && sp > 0 && src[sp-1] == '*') {
   222         iscomment = 0;
   223         iscommentml = 0;
   224         memcpy(&(dest[dp]), "/</span>", 8);
   225         dp += 8;
   226         continue;
   227       } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   228         iscomment = 1;
   229         iscommentml = (src[sp+1] == '*');
   230         memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   231         dp += 29;
   232       }
   233     }
   235     if (iscomment) {
   236       if (c == '\n') {
   237         memcpy(&(dest[dp]), "</span>", 7);
   238         dp += 7;
   239       }
   240       dp = writeescapedchar(dest, dp, c);
   241     } else if (isinclude) {
   242       if (c == '<') {
   243         memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
   244         dp += 32;
   245         dp = writeescapedchar(dest, dp, c);
   246       } else if (c == '\"') {
   247         if (parseinclude) {
   248           dest[dp++] = '\"';
   249           dest[dp++] = '>';
   250           memcpy(&(dest[dp]), includefile, ifp);
   251           dp += ifp;
   253           dp = writeescapedchar(dest, dp, c);
   254           memcpy(&(dest[dp]), "</a>", 4);
   255           dp += 4;
   256           parseinclude = 0;
   257         } else {
   258           memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
   259           dp += 35;
   260           dp = writeescapedchar(dest, dp, c);
   261           ifp = 0;
   262           includefile[ifp++] = '\"';
   263           parseinclude = 1;
   264         }
   265       } else if (c == '>') {
   266         dp = writeescapedchar(dest, dp, c);
   267         memcpy(&(dest[dp]), "</span>", 7);
   268         dp += 7;
   269       } else {
   270         if (parseinclude) {
   271           includefile[ifp++] = c;
   272         }
   273         dp = writeescapedchar(dest, dp, c);
   274       }
   275     } else {
   276       /* strings */
   277       if (!isescaping && (c == '\'' || c == '\"')) {
   278         isstring ^= 1;
   279         if (isstring) {
   280           memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
   281           dp += 28;
   282           dp = writeescapedchar(dest, dp, c);
   283         } else {
   284           dp = writeescapedchar(dest, dp, c);
   285           memcpy(&(dest[dp]), "</span>", 7);
   286           dp += 7;
   287         }
   288       } else {
   289         if (isstring) {
   290           dp = writeescapedchar(dest, dp, c);
   291         } else if (!isalnum(c) && c!='_' && c!='#' && c!='.' && c!='@') {
   292           /* interpret word int_t */
   293           if (wp > 0 && wp < WORDBUF_SIZE) {
   294             int closespan = 1;
   295             if (iskeyword(word, highlighter->keywords)) {
   296               memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
   297               dp += 29;
   298             } else if (highlighter->istype(word, wp)) {
   299               memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
   300               dp += 26;
   301             } else if (highlighter->isdirective(word)) {
   302               isinclude = !strncmp("#include", word, WORDBUF_SIZE);
   303               memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
   304               dp += 31;
   305             } else if (iscapsonly(word, wp)) {
   306               memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
   307               dp += 32;
   308             } else {
   309               closespan = 0;
   310             }
   311             for (int i = 0 ; i < wp ; i++) {
   312               dp = writeescapedchar(dest, dp, word[i]);
   313             }
   314             if (closespan) {
   315               memcpy(&(dest[dp]), "</span>", 7);
   316               dp += 7;
   317             }
   318           }
   319           memset(word, 0, WORDBUF_SIZE);
   320           wp = 0;
   321           dp = writeescapedchar(dest, dp, c);
   322         } else {
   323           /* read word */
   324           if (wp < WORDBUF_SIZE) {
   325             word[wp++] = c;
   326           } else if (wp == WORDBUF_SIZE) {
   327             for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
   328               dp = writeescapedchar(dest, dp, word[i]);
   329             }
   330             wp++;
   331             dp = writeescapedchar(dest, dp, c);
   332           } else {
   333             dp = writeescapedchar(dest, dp, c);
   334           }
   335         }
   336       }
   338       isescaping = !isescaping & (c == '\\');
   339     }
   340   }
   341   dest[dp] = 0;
   342 }
   344 void printhelp() {
   345   printf("Formats source code using HTML.\n\nUsage:\n"
   346       "  c2html [Options] FILE\n\n"
   347       " Options:\n"
   348       "  -h                    Prints this help message\n"
   349       "  -j                    Highlight Java instead of C source code\n"
   350       "  -o <output>           Output file (if not specified, stdout is used)\n"
   351       "  -p                    Disable highlighting (plain text)\n"
   352       "\n");
   355 }
   357 int lnint(size_t lnc) {
   358   int w = 1, p = 1;
   359   while ((p*=10) < lnc) w++;
   360   return w;
   361 }
   363 int main(int argc, char** argv) {
   365   settings_t settings;
   366   settings.outfilename = NULL;
   367   settings.highlight = 1;
   369   highlighter_t highlighter;
   370   highlighter.isdirective = iscdirective;
   371   highlighter.istype = isctype;
   372   highlighter.keywords = ckeywords;
   374   char optc;
   375   while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
   376     switch (optc) {
   377       case 'o':
   378         if (!(optarg[0] == '-' && optarg[1] == 0)) {
   379           settings.outfilename = optarg;
   380         }
   381         break;
   382       case 'j':
   383         highlighter.isdirective = isjdirective;
   384         highlighter.istype = isjtype;
   385         highlighter.keywords = jkeywords;
   386         break;
   387       case 'p':
   388         settings.highlight = 0;
   389         break;
   390       case 'h':
   391         printhelp();
   392         return 0;
   393       default:
   394         return 1;
   395     }
   396   }
   398   if (optind != argc-1) {
   399     printhelp();
   400     return 1;
   401   } else {
   402     settings.infilename = argv[optind];
   404     inputfile_t *inputfile = readinput(settings.infilename);
   405     if (inputfile) {
   406       FILE *fout;
   407       char *line;
   408       if (settings.highlight) {
   409         line = (char*) malloc(inputfile->maxlinewidth*64);
   410       } else {
   411         line = NULL;
   412       }
   413       if (settings.outfilename) {
   414         fout = fopen(settings.outfilename, "w");
   415       } else {
   416         fout = stdout;
   417       }
   418       fprintf(fout, "<pre>\n");
   419       int lnw = lnint(inputfile->count);
   420       for (int i = 0 ; i < inputfile->count ; i++) {
   421         if (settings.highlight) {
   422           parseline(inputfile->lines[i], line, &highlighter);
   423         } else {
   424           line = inputfile->lines[i];
   425         }
   426         fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
   427             lnw, i+1, line);
   428       }
   429       if (settings.highlight) {
   430         free(line);
   431       }
   432       fprintf(fout, "</pre>\n");
   434       if (fout != stdout) {
   435         fclose(fout);
   436       }
   438       freeinputfilebuffer(inputfile);
   439     }
   441     return 0;
   442   }
   443 }

mercurial