src/c2html.c

Wed, 10 Jul 2013 16:31:16 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 10 Jul 2013 16:31:16 +0200
changeset 16
fa0bcd0444eb
parent 15
398a7589297f
child 17
7ea86024aef0
permissions
-rw-r--r--

prepared java highlighting

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2013 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include <stdio.h>
    31 #include <stdlib.h>
    32 #include <string.h>
    33 #include <fcntl.h>
    34 #include <unistd.h>
    35 #include <ctype.h>
    37 #define INPUTBUF_SIZE 2048
    38 #define WORDBUF_SIZE 16
    40 const char* ckeywords[] = {
    41   "auto", "break", "case", "char", "const", "continue", "default", "do", 
    42   "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 
    43   "long", "register", "return", "short", "signed", "sizeof", "static",
    44   "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    45   "while", NULL
    46 };
    48 int istype(char *word, size_t len) {
    49   return (word[len-2] == '_' && word[len-1] == 't');
    50 }
    52 int isdirective(char *word) {
    53   return (word[0] == '#');
    54 }
    56 int notypes(char *word, size_t len) {
    57   return 0;
    58 }
    60 int nodirectives(char *word) {
    61   return 0;
    62 }
    64 typedef struct {
    65   const char** keywords;
    66   int(*istype)(char*,size_t);
    67   int(*isdirective)(char*);
    68 } highlighter_t;
    70 typedef struct {
    71   char* outfilename;
    72   char* infilename;
    73   int highlight;
    74 } settings_t;
    76 typedef struct {
    77   size_t count;
    78   size_t capacity;
    79   size_t maxlinewidth;
    80   char** lines;
    81 } inputfile_t;
    83 inputfile_t *inputfilebuffer(size_t capacity) {
    84   inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
    85   inputfile->lines = (char**) malloc(capacity * sizeof(char*));
    86   inputfile->capacity = capacity;
    87   inputfile->count = 0;
    88   inputfile->maxlinewidth = 0;
    90   return inputfile;
    91 }
    93 void addline(inputfile_t *inputfile, char* line, size_t width) {
    94   char *l = (char*) malloc(width+1);
    95   memcpy(l, line, width);
    96   l[width] = 0;
    97   if (inputfile->count >= inputfile->capacity) {
    98     inputfile->capacity <<= 1;
    99     inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
   100   }
   101   inputfile->lines[inputfile->count] = l;
   102   inputfile->maxlinewidth =
   103           width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
   104   inputfile->count++;
   105 }
   107 void freeinputfilebuffer(inputfile_t *inputfile) {
   108   for (int i = 0 ; i < inputfile->count ; i++) {
   109     free(inputfile->lines[i]);
   110   }
   111   free(inputfile->lines);
   112   free(inputfile);
   113 }
   115 inputfile_t *readinput(char *filename) {
   117   int fd = open(filename, O_RDONLY);
   118   if (fd == -1) return NULL;
   120   inputfile_t *inputfile = inputfilebuffer(512);
   122   char buf[INPUTBUF_SIZE];
   123   ssize_t r;
   125   size_t maxlinewidth = 256;
   126   char *line = (char*) malloc(maxlinewidth);
   127   size_t col = 0;
   129   while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
   130     for (size_t i = 0 ; i < r ; i++) {
   131       if (col >= maxlinewidth-4) {
   132         maxlinewidth <<= 1;
   133         line = realloc(line, maxlinewidth);
   134       }
   136       if (buf[i] == '\n') {
   137         line[col++] = '\n';
   138         line[col] = 0;
   139         addline(inputfile, line, col);        
   140         col = 0;
   141       } else {
   142         line[col++] = buf[i];
   143       }
   144     }
   145   }
   147   free(line);
   149   close(fd);
   151   return inputfile;
   152 }
   154 size_t writeescapedchar(char *dest, size_t dp, char c) {
   155   if (c == '>') {
   156     dest[dp++] = '&'; dest[dp++] = 'g';
   157     dest[dp++] = 't'; dest[dp++] = ';';
   158   } else if (c == '<') {
   159     dest[dp++] = '&'; dest[dp++] = 'l';
   160     dest[dp++] = 't'; dest[dp++] = ';';
   161   } else {
   162     dest[dp++] = c;
   163   }
   165   return dp;
   166 }
   168 int iskeyword(char *word, const char** keywords) {
   169   for (int i = 0 ; keywords[i] ; i++) {
   170     if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
   171       return 1;
   172     }
   173   }
   174   return 0;
   175 }
   177 int iscapsonly(char *word, size_t wp) {
   178   for (size_t i = 0 ; i < wp ; i++) {
   179     if (!isupper(word[i]) && word[i] != '_') {
   180       return 0;
   181     }
   182   }
   183   return 1;
   184 }
   186 void parseline(char *src, char *dest, highlighter_t *highlighter) {
   187   size_t sp = 0, dp = 0;
   188   /* indent */
   189   while (isspace(src[sp])) {
   190     dest[dp++] = src[sp++];
   191   }
   193   static char word[WORDBUF_SIZE];
   194   static char includefile[FILENAME_MAX];
   196   memset(word, 0, WORDBUF_SIZE);
   197   size_t wp = 0, ifp = 0;
   198   int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   199   static int iscommentml;
   200   int isescaping = 0;
   202   if (iscommentml) {
   203     iscomment = 1;
   204     memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   205     dp += 29;
   206   }
   208   for (char c = src[sp] ; c ; c=src[++sp]) {
   209     /* comments */
   210     if (c == '/') {
   211       if (iscommentml && sp > 0 && src[sp-1] == '*') {
   212         iscomment = 0;
   213         iscommentml = 0;
   214         memcpy(&(dest[dp]), "/</span>", 8);
   215         dp += 8;
   216         continue;
   217       } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   218         iscomment = 1;
   219         iscommentml = (src[sp+1] == '*');
   220         memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
   221         dp += 29;
   222       }
   223     }
   225     if (iscomment) {
   226       if (c == '\n') {
   227         memcpy(&(dest[dp]), "</span>", 7);
   228         dp += 7;
   229       }
   230       dp = writeescapedchar(dest, dp, c);
   231     } else if (isinclude) {
   232       if (c == '<') {
   233         memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
   234         dp += 32;
   235         dp = writeescapedchar(dest, dp, c);
   236       } else if (c == '\"') {
   237         if (parseinclude) {
   238           dest[dp++] = '\"';
   239           dest[dp++] = '>';
   240           memcpy(&(dest[dp]), includefile, ifp);
   241           dp += ifp;
   243           dp = writeescapedchar(dest, dp, c);
   244           memcpy(&(dest[dp]), "</a>", 4);
   245           dp += 4;
   246           parseinclude = 0;
   247         } else {
   248           memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
   249           dp += 35;
   250           dp = writeescapedchar(dest, dp, c);
   251           ifp = 0;
   252           includefile[ifp++] = '\"';
   253           parseinclude = 1;
   254         }
   255       } else if (c == '>') {
   256         dp = writeescapedchar(dest, dp, c);
   257         memcpy(&(dest[dp]), "</span>", 7);
   258         dp += 7;
   259       } else {
   260         if (parseinclude) {
   261           includefile[ifp++] = c;
   262         }
   263         dp = writeescapedchar(dest, dp, c);
   264       }
   265     } else {
   266       /* strings */
   267       if (!isescaping && (c == '\'' || c == '\"')) {
   268         isstring ^= 1;
   269         if (isstring) {
   270           memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
   271           dp += 28;
   272           dp = writeescapedchar(dest, dp, c);
   273         } else {
   274           dp = writeescapedchar(dest, dp, c);
   275           memcpy(&(dest[dp]), "</span>", 7);
   276           dp += 7;
   277         }
   278       } else {
   279         if (isstring) {
   280           dp = writeescapedchar(dest, dp, c);
   281         } else if (!isalnum(c) && c != '_' && c != '#' && c != '.') {
   282           /* interpret word int_t */
   283           if (wp > 0 && wp < WORDBUF_SIZE) {
   284             int closespan = 1;
   285             if (iskeyword(word, highlighter->keywords)) {
   286               memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
   287               dp += 29;
   288             } else if (highlighter->istype(word, wp)) {
   289               memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
   290               dp += 26;
   291             } else if (highlighter->isdirective(word)) {
   292               isinclude = !strncmp("#include", word, WORDBUF_SIZE);
   293               memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
   294               dp += 31;
   295             } else if (iscapsonly(word, wp)) {
   296               memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
   297               dp += 32;
   298             } else {
   299               closespan = 0;
   300             }
   301             for (int i = 0 ; i < wp ; i++) {
   302               dp = writeescapedchar(dest, dp, word[i]);
   303             }
   304             if (closespan) {
   305               memcpy(&(dest[dp]), "</span>", 7);
   306               dp += 7;
   307             }
   308           }
   309           memset(word, 0, WORDBUF_SIZE);
   310           wp = 0;
   311           dp = writeescapedchar(dest, dp, c);
   312         } else {
   313           /* read word */
   314           if (wp < WORDBUF_SIZE) {
   315             word[wp++] = c;
   316           } else if (wp == WORDBUF_SIZE) {
   317             for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
   318               dp = writeescapedchar(dest, dp, word[i]);
   319             }
   320             wp++;
   321             dp = writeescapedchar(dest, dp, c);
   322           } else {
   323             dp = writeescapedchar(dest, dp, c);
   324           }
   325         }
   326       }
   328       isescaping = !isescaping & (c == '\\');
   329     }
   330   }
   331   dest[dp] = 0;
   332 }
   334 void printhelp() {
   335   printf("Formats source code using HTML.\n\nUsage:\n"
   336       "  c2html [Options] FILE\n\n"
   337       " Options:\n"
   338       "  -h                    Prints this help message\n"
   339       "  -o <output>           Output file (if not specified, stdout is used)\n"
   340       "  -p                    Disable highlighting (plain text)\n"
   341       "\n");
   344 }
   346 int lnint(size_t lnc) {
   347   int w = 1, p = 1;
   348   while ((p*=10) < lnc) w++;
   349   return w;
   350 }
   352 int main(int argc, char** argv) {
   354   settings_t settings;
   355   settings.outfilename = NULL;
   356   settings.highlight = 1;
   358   highlighter_t highlighter;
   359   highlighter.isdirective = isdirective;
   360   highlighter.istype = istype;
   361   highlighter.keywords = ckeywords;
   363   char optc;
   364   while ((optc = getopt(argc, argv, "ho:p")) != -1) {
   365     switch (optc) {
   366       case 'o':
   367         if (!(optarg[0] == '-' && optarg[1] == 0)) {
   368           settings.outfilename = optarg;
   369         }
   370         break;
   371       case 'p':
   372         settings.highlight = 0;
   373         break;
   374       case 'h':
   375         printhelp();
   376         return 0;
   377       default:
   378         return 1;
   379     }
   380   }
   382   if (optind != argc-1) {
   383     printhelp();
   384     return 1;
   385   } else {
   386     settings.infilename = argv[optind];
   388     inputfile_t *inputfile = readinput(settings.infilename);
   389     if (inputfile) {
   390       FILE *fout;
   391       char *line;
   392       if (settings.highlight) {
   393         line = (char*) malloc(inputfile->maxlinewidth*64);
   394       } else {
   395         line = NULL;
   396       }
   397       if (settings.outfilename) {
   398         fout = fopen(settings.outfilename, "w");
   399       } else {
   400         fout = stdout;
   401       }
   402       fprintf(fout, "<pre>\n");
   403       int lnw = lnint(inputfile->count);
   404       for (int i = 0 ; i < inputfile->count ; i++) {
   405         if (settings.highlight) {
   406           parseline(inputfile->lines[i], line, &highlighter);
   407         } else {
   408           line = inputfile->lines[i];
   409         }
   410         fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
   411             lnw, i+1, line);
   412       }
   413       if (settings.highlight) {
   414         free(line);
   415       }
   416       fprintf(fout, "</pre>\n");
   418       if (fout != stdout) {
   419         fclose(fout);
   420       }
   422       freeinputfilebuffer(inputfile);
   423     }
   425     return 0;
   426   }
   427 }

mercurial