src/c2html.c

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2013 Mike Becker. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>

#define INPUTBUF_SIZE 2048
#define WORDBUF_SIZE 16

#define istype(word, len) (word[len-2] == '_' && word[len-1] == 't')
#define isdirective(word) (word[0] == '#')

const char* keywords[] = {
  "auto", "break", "case", "char", "const", "continue", "default", "do", 
  "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 
  "long", "register", "return", "short", "signed", "sizeof", "static", "struct", 
  "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL
};


typedef struct {
  size_t count;
  size_t capacity;
  size_t maxlinewidth;
  char** lines;
} inputfile_t;

inputfile_t *inputfilebuffer(size_t capacity) {
  inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
  inputfile->lines = (char**) malloc(capacity * sizeof(char*));
  inputfile->capacity = capacity;
  inputfile->count = 0;
  inputfile->maxlinewidth = 0;
  
  return inputfile;
}

void addline(inputfile_t *inputfile, char* line, size_t width) {
  char *l = (char*) malloc(width+1);
  memcpy(l, line, width);
  l[width] = 0;
  if (inputfile->count >= inputfile->capacity) {
    inputfile->capacity <<= 1;
    inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
  }
  inputfile->lines[inputfile->count] = l;
  inputfile->maxlinewidth =
          width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
  inputfile->count++;
}

void freeinputfilebuffer(inputfile_t *inputfile) {
  for (int i = 0 ; i < inputfile->count ; i++) {
    free(inputfile->lines[i]);
  }
  free(inputfile->lines);
  free(inputfile);
}

inputfile_t *readinput(char *filename) {

  int fd = open(filename, O_RDONLY);
  if (fd == -1) return NULL;
  
  inputfile_t *inputfile = inputfilebuffer(512);
  
  char buf[INPUTBUF_SIZE];
  ssize_t r;
  
  size_t maxlinewidth = 256;
  char *line = (char*) malloc(maxlinewidth);
  size_t col = 0;
  
  while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
    for (size_t i = 0 ; i < r ; i++) {
      if (col >= maxlinewidth-4) {
        maxlinewidth <<= 1;
        line = realloc(line, maxlinewidth);
      }

      if (buf[i] == '\n') {
        line[col++] = '\n';
        line[col] = 0;
        addline(inputfile, line, col);        
        col = 0;
      } else {
        line[col++] = buf[i];
      }
    }
  }
  
  free(line);
  
  close(fd);
  
  return inputfile;
}

size_t writeescapedchar(char *dest, size_t dp, char c) {
  if (c == '>') {
    dest[dp++] = '&'; dest[dp++] = 'g';
    dest[dp++] = 't'; dest[dp++] = ';';
  } else if (c == '<') {
    dest[dp++] = '&'; dest[dp++] = 'l';
    dest[dp++] = 't'; dest[dp++] = ';';
  } else {
    dest[dp++] = c;
  }
  
  return dp;
}

int iskeyword(char *word) {
  for (int i = 0 ; keywords[i] ; i++) {
    if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
      return 1;
    }
  }
  return 0;
}

void parseline(char *src, char *dest) {
  size_t sp = 0, dp = 0;
  /* indent */
  while (isspace(src[sp])) {
    dest[dp++] = src[sp++];
  }
  char word[WORDBUF_SIZE];
  memset(word, 0, WORDBUF_SIZE);
  size_t wp = 0;
  int isstring = 0, iscomment = 0;
  static int iscommentml;
  int isescaping = 0;
  
  if (iscommentml) {
    iscomment = 1;
    memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
    dp += 29;
  }
  // a fucking /* comment */
  /* again // / */
  for (char c = src[sp] ; c ; c=src[++sp]) {
    /* comments */
    if (c == '/') {
      if (iscommentml && sp > 0 && src[sp-1] == '*') {
        iscomment = 0;
        iscommentml = 0;
        memcpy(&(dest[dp]), "/</span>", 8);
        dp += 8;
        continue;
      } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
        iscomment = 1;
        iscommentml = (src[sp+1] == '*');
        memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
        dp += 29;
      }
    }
    
    if (iscomment) {
      if (c == '\n') {
        memcpy(&(dest[dp]), "</span>", 7);
        dp += 7;
      }
      dp = writeescapedchar(dest, dp, c);
    } else {
      /* strings */
      if (!isescaping && (c == '\'' || c == '\"')) {
        isstring ^= 1;
        if (isstring) {
          memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
          dp += 28;
          dp = writeescapedchar(dest, dp, c);
        } else {
          dp = writeescapedchar(dest, dp, c);
          memcpy(&(dest[dp]), "</span>", 7);
          dp += 7;
        }
      } else {
        if (isstring) {
          dp = writeescapedchar(dest, dp, c);
        } else if (!isalnum(c) && c != '_' && c != '#') {
          /* interpret word int_t */
          if (wp > 0 && wp < WORDBUF_SIZE) {
            int closespan = 1;
            if (iskeyword(word)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
              dp += 29;
            } else if (istype(word, wp)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
              dp += 26;
            } else if (isdirective(word)) {
              memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
              dp += 31;
            } else {
              closespan = 0;
            }
            for (int i = 0 ; i < wp ; i++) {
              dp = writeescapedchar(dest, dp, word[i]);
            }
            if (closespan) {
              memcpy(&(dest[dp]), "</span>", 7);
              dp += 7;
            }
            memset(word, 0, WORDBUF_SIZE);
            wp = 0;
          }
          dp = writeescapedchar(dest, dp, c);
        } else {
          /* read word */
          if (wp < WORDBUF_SIZE) {
            word[wp++] = c;
          } else if (wp == WORDBUF_SIZE) {
            for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
              dp = writeescapedchar(dest, dp, word[i]);
            }
            wp++;
            dp = writeescapedchar(dest, dp, c);
          } else {
            dp = writeescapedchar(dest, dp, c);
          }
        }
      }

      isescaping = !isescaping & (c == '\\');
    }
  }
  dest[dp] = 0;
}

void printhelp() {
  printf("Formats source code using HTML.\n\nUsage:\n"
      "  c2html [FILE...]"
      "\n");
  
  
}

int lnint(size_t lnc) {
  int w = 1, p = 1;
  while ((p*=10) < lnc) w++;
  return w;
}

int main(int argc, char** argv) {
  
  if (argc == 1) {
    printhelp();
    return 0;
  } else {
    
    inputfile_t *inputfile = readinput(argv[1]);
    if (inputfile) {
      printf("<pre>\n");
      char *line = (char*) malloc(inputfile->maxlinewidth*64);
      int lnw = lnint(inputfile->count);
      for (int i = 0 ; i < inputfile->count ; i++) {
        parseline(inputfile->lines[i], line);
        printf("<span class=\"c2html-lineno\">%*d:</span> %s",
            lnw, i, line);
      }
      free(line);
      printf("</pre>\n");
      freeinputfilebuffer(inputfile);
    }
  
    return 0;
  }
}