c2html: src/c2html.c@6b1fba10c4cb (annotated)

src/c2html.c@6b1fba10c4cb (annotated)

src/c2html.c

Fri, 21 Jun 2013 12:49:46 +0200

author: Mike Becker <universe@uap-core.de>
date: Fri, 21 Jun 2013 12:49:46 +0200
changeset 9: 6b1fba10c4cb
parent 8: 417cd3b29f97
child 10: 925172e535a9
permissions: -rw-r--r--

long words are correctly separated by non alpha numeric chars, underscore or hash + support for macro constants

 /*
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  *
  * Copyright 2013 Mike Becker. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <ctype.h>
 #define INPUTBUF_SIZE 2048
 #define WORDBUF_SIZE 16
 #define istype(word, len) (word[len-2] == '_' && word[len-1] == 't')
 #define isdirective(word) (word[0] == '#')
 const char* keywords[] = {
   "auto", "break", "case", "char", "const", "continue", "default", "do",
   "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
   "long", "register", "return", "short", "signed", "sizeof", "static", "struct",
   "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL
 };
 typedef struct {
   size_t count;
   size_t capacity;
   size_t maxlinewidth;
   char** lines;
 } inputfile_t;
 inputfile_t *inputfilebuffer(size_t capacity) {
   inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
   inputfile->lines = (char**) malloc(capacity * sizeof(char*));
   inputfile->capacity = capacity;
   inputfile->count = 0;
   inputfile->maxlinewidth = 0;
   return inputfile;
 }
 void addline(inputfile_t *inputfile, char* line, size_t width) {
   char *l = (char*) malloc(width+1);
   memcpy(l, line, width);
   l[width] = 0;
   if (inputfile->count >= inputfile->capacity) {
     inputfile->capacity <<= 1;
     inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
   }
   inputfile->lines[inputfile->count] = l;
   inputfile->maxlinewidth =
           width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
   inputfile->count++;
 }
 void freeinputfilebuffer(inputfile_t *inputfile) {
   for (int i = 0 ; i < inputfile->count ; i++) {
     free(inputfile->lines[i]);
   }
   free(inputfile->lines);
   free(inputfile);
 }
 inputfile_t *readinput(char *filename) {
   int fd = open(filename, O_RDONLY);
   if (fd == -1) return NULL;
   inputfile_t *inputfile = inputfilebuffer(512);
   char buf[INPUTBUF_SIZE];
   ssize_t r;
   size_t maxlinewidth = 256;
   char *line = (char*) malloc(maxlinewidth);
   size_t col = 0;
   while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
     for (size_t i = 0 ; i < r ; i++) {
       if (col >= maxlinewidth-4) {
         maxlinewidth <<= 1;
         line = realloc(line, maxlinewidth);
       }
       if (buf[i] == '\n') {
         line[col++] = '\n';
         line[col] = 0;
         addline(inputfile, line, col);
         col = 0;
       } else {
         line[col++] = buf[i];
       }
     }
   }
   free(line);
   close(fd);
   return inputfile;
 }
 size_t writeescapedchar(char *dest, size_t dp, char c) {
   if (c == '>') {
     dest[dp++] = '&'; dest[dp++] = 'g';
     dest[dp++] = 't'; dest[dp++] = ';';
   } else if (c == '<') {
     dest[dp++] = '&'; dest[dp++] = 'l';
     dest[dp++] = 't'; dest[dp++] = ';';
   } else {
     dest[dp++] = c;
   }
   return dp;
 }
 int iskeyword(char *word) {
   for (int i = 0 ; keywords[i] ; i++) {
     if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
       return 1;
     }
   }
   return 0;
 }
 int iscapsonly(char *word, size_t wp) {
   for (size_t i = 0 ; i < wp ; i++) {
     if (!isupper(word[i]) && word[i] != '_') {
       return 0;
     }
   }
   return 1;
 }
 void parseline(char *src, char *dest) {
   size_t sp = 0, dp = 0;
   /* indent */
   while (isspace(src[sp])) {
     dest[dp++] = src[sp++];
   }
   char word[WORDBUF_SIZE];
   memset(word, 0, WORDBUF_SIZE);
   size_t wp = 0;
   int isstring = 0, iscomment = 0;
   static int iscommentml;
   int isescaping = 0;
   if (iscommentml) {
     iscomment = 1;
     memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
     dp += 29;
   }
   for (char c = src[sp] ; c ; c=src[++sp]) {
     /* comments */
     if (c == '/') {
       if (iscommentml && sp > 0 && src[sp-1] == '*') {
         iscomment = 0;
         iscommentml = 0;
         memcpy(&(dest[dp]), "/</span>", 8);
         dp += 8;
         continue;
       } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
         iscomment = 1;
         iscommentml = (src[sp+1] == '*');
         memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
         dp += 29;
       }
     }
     if (iscomment) {
       if (c == '\n') {
         memcpy(&(dest[dp]), "</span>", 7);
         dp += 7;
       }
       dp = writeescapedchar(dest, dp, c);
     } else {
       /* strings */
       if (!isescaping && (c == '\'' || c == '\"')) {
         isstring ^= 1;
         if (isstring) {
           memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
           dp += 28;
           dp = writeescapedchar(dest, dp, c);
         } else {
           dp = writeescapedchar(dest, dp, c);
           memcpy(&(dest[dp]), "</span>", 7);
           dp += 7;
         }
       } else {
         if (isstring) {
           dp = writeescapedchar(dest, dp, c);
         } else if (!isalnum(c) && c != '_' && c != '#') {
           /* interpret word int_t */
           if (wp > 0 && wp < WORDBUF_SIZE) {
             int closespan = 1;
             if (iskeyword(word)) {
               memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
               dp += 29;
             } else if (istype(word, wp)) {
               memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
               dp += 26;
             } else if (isdirective(word)) {
               memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
               dp += 31;
             } else if (iscapsonly(word, wp)) {
               memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
               dp += 32;
             } else {
               closespan = 0;
             }
             for (int i = 0 ; i < wp ; i++) {
               dp = writeescapedchar(dest, dp, word[i]);
             }
             if (closespan) {
               memcpy(&(dest[dp]), "</span>", 7);
               dp += 7;
             }
           }
           memset(word, 0, WORDBUF_SIZE);
           wp = 0;
           dp = writeescapedchar(dest, dp, c);
         } else {
           /* read word */
           if (wp < WORDBUF_SIZE) {
             word[wp++] = c;
           } else if (wp == WORDBUF_SIZE) {
             for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
               dp = writeescapedchar(dest, dp, word[i]);
             }
             wp++;
             dp = writeescapedchar(dest, dp, c);
           } else {
             dp = writeescapedchar(dest, dp, c);
           }
         }
       }
       isescaping = !isescaping & (c == '\\');
     }
   }
   dest[dp] = 0;
 }
 void printhelp() {
   printf("Formats source code using HTML.\n\nUsage:\n"
       "  c2html [FILE...]"
       "\n");
 }
 int lnint(size_t lnc) {
   int w = 1, p = 1;
   while ((p*=10) < lnc) w++;
   return w;
 }
 int main(int argc, char** argv) {
   if (argc == 1) {
     printhelp();
     return 0;
   } else {
     inputfile_t *inputfile = readinput(argv[1]);
     if (inputfile) {
       printf("<pre>\n");
       char *line = (char*) malloc(inputfile->maxlinewidth*64);
       int lnw = lnint(inputfile->count);
       for (int i = 0 ; i < inputfile->count ; i++) {
         parseline(inputfile->lines[i], line);
         printf("<span class=\"c2html-lineno\">%*d:</span> %s",
             lnw, i+1, line);
       }
       free(line);
       printf("</pre>\n");
       freeinputfilebuffer(inputfile);
     }
     return 0;
   }
 }

Mercurial > hg > c2html / annotate

src/c2html.c@6b1fba10c4cb (annotated)

src/c2html.c