Fri, 21 Jun 2013 12:49:46 +0200
long words are correctly separated by non alpha numeric chars, underscore or hash + support for macro constants
universe@1 | 1 | /* |
universe@1 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@1 | 3 | * |
universe@1 | 4 | * Copyright 2013 Mike Becker. All rights reserved. |
universe@1 | 5 | * |
universe@1 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@1 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@1 | 8 | * |
universe@1 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@1 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@1 | 11 | * |
universe@1 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@1 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@1 | 14 | * documentation and/or other materials provided with the distribution. |
universe@1 | 15 | * |
universe@1 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@1 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@1 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@1 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@1 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@1 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@1 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@1 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@1 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@1 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@1 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@1 | 27 | * |
universe@1 | 28 | */ |
universe@1 | 29 | |
universe@1 | 30 | #include <stdio.h> |
universe@1 | 31 | #include <stdlib.h> |
universe@1 | 32 | #include <string.h> |
universe@1 | 33 | #include <fcntl.h> |
universe@1 | 34 | #include <unistd.h> |
universe@4 | 35 | #include <ctype.h> |
universe@4 | 36 | |
universe@4 | 37 | #define INPUTBUF_SIZE 2048 |
universe@5 | 38 | #define WORDBUF_SIZE 16 |
universe@5 | 39 | |
universe@7 | 40 | #define istype(word, len) (word[len-2] == '_' && word[len-1] == 't') |
universe@7 | 41 | #define isdirective(word) (word[0] == '#') |
universe@7 | 42 | |
universe@5 | 43 | const char* keywords[] = { |
universe@5 | 44 | "auto", "break", "case", "char", "const", "continue", "default", "do", |
universe@5 | 45 | "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", |
universe@5 | 46 | "long", "register", "return", "short", "signed", "sizeof", "static", "struct", |
universe@5 | 47 | "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL |
universe@5 | 48 | }; |
universe@4 | 49 | |
universe@4 | 50 | |
universe@4 | 51 | typedef struct { |
universe@4 | 52 | size_t count; |
universe@4 | 53 | size_t capacity; |
universe@4 | 54 | size_t maxlinewidth; |
universe@4 | 55 | char** lines; |
universe@4 | 56 | } inputfile_t; |
universe@1 | 57 | |
universe@1 | 58 | inputfile_t *inputfilebuffer(size_t capacity) { |
universe@1 | 59 | inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t)); |
universe@1 | 60 | inputfile->lines = (char**) malloc(capacity * sizeof(char*)); |
universe@1 | 61 | inputfile->capacity = capacity; |
universe@1 | 62 | inputfile->count = 0; |
universe@4 | 63 | inputfile->maxlinewidth = 0; |
universe@1 | 64 | |
universe@1 | 65 | return inputfile; |
universe@0 | 66 | } |
universe@0 | 67 | |
universe@1 | 68 | void addline(inputfile_t *inputfile, char* line, size_t width) { |
universe@1 | 69 | char *l = (char*) malloc(width+1); |
universe@1 | 70 | memcpy(l, line, width); |
universe@1 | 71 | l[width] = 0; |
universe@1 | 72 | if (inputfile->count >= inputfile->capacity) { |
universe@1 | 73 | inputfile->capacity <<= 1; |
universe@1 | 74 | inputfile->lines = realloc(inputfile->lines, inputfile->capacity); |
universe@1 | 75 | } |
universe@1 | 76 | inputfile->lines[inputfile->count] = l; |
universe@4 | 77 | inputfile->maxlinewidth = |
universe@4 | 78 | width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth; |
universe@1 | 79 | inputfile->count++; |
universe@1 | 80 | } |
universe@1 | 81 | |
universe@1 | 82 | void freeinputfilebuffer(inputfile_t *inputfile) { |
universe@1 | 83 | for (int i = 0 ; i < inputfile->count ; i++) { |
universe@1 | 84 | free(inputfile->lines[i]); |
universe@1 | 85 | } |
universe@1 | 86 | free(inputfile->lines); |
universe@1 | 87 | free(inputfile); |
universe@1 | 88 | } |
universe@1 | 89 | |
universe@1 | 90 | inputfile_t *readinput(char *filename) { |
universe@1 | 91 | |
universe@1 | 92 | int fd = open(filename, O_RDONLY); |
universe@1 | 93 | if (fd == -1) return NULL; |
universe@1 | 94 | |
universe@1 | 95 | inputfile_t *inputfile = inputfilebuffer(512); |
universe@1 | 96 | |
universe@4 | 97 | char buf[INPUTBUF_SIZE]; |
universe@1 | 98 | ssize_t r; |
universe@1 | 99 | |
universe@4 | 100 | size_t maxlinewidth = 256; |
universe@1 | 101 | char *line = (char*) malloc(maxlinewidth); |
universe@1 | 102 | size_t col = 0; |
universe@1 | 103 | |
universe@4 | 104 | while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) { |
universe@1 | 105 | for (size_t i = 0 ; i < r ; i++) { |
universe@1 | 106 | if (col >= maxlinewidth-4) { |
universe@1 | 107 | maxlinewidth <<= 1; |
universe@1 | 108 | line = realloc(line, maxlinewidth); |
universe@1 | 109 | } |
universe@1 | 110 | |
universe@1 | 111 | if (buf[i] == '\n') { |
universe@5 | 112 | line[col++] = '\n'; |
universe@1 | 113 | line[col] = 0; |
universe@1 | 114 | addline(inputfile, line, col); |
universe@1 | 115 | col = 0; |
universe@1 | 116 | } else { |
universe@1 | 117 | line[col++] = buf[i]; |
universe@1 | 118 | } |
universe@1 | 119 | } |
universe@1 | 120 | } |
universe@1 | 121 | |
universe@1 | 122 | free(line); |
universe@1 | 123 | |
universe@1 | 124 | close(fd); |
universe@1 | 125 | |
universe@1 | 126 | return inputfile; |
universe@1 | 127 | } |
universe@1 | 128 | |
universe@5 | 129 | size_t writeescapedchar(char *dest, size_t dp, char c) { |
universe@5 | 130 | if (c == '>') { |
universe@5 | 131 | dest[dp++] = '&'; dest[dp++] = 'g'; |
universe@5 | 132 | dest[dp++] = 't'; dest[dp++] = ';'; |
universe@5 | 133 | } else if (c == '<') { |
universe@5 | 134 | dest[dp++] = '&'; dest[dp++] = 'l'; |
universe@5 | 135 | dest[dp++] = 't'; dest[dp++] = ';'; |
universe@5 | 136 | } else { |
universe@5 | 137 | dest[dp++] = c; |
universe@5 | 138 | } |
universe@5 | 139 | |
universe@5 | 140 | return dp; |
universe@5 | 141 | } |
universe@5 | 142 | |
universe@5 | 143 | int iskeyword(char *word) { |
universe@5 | 144 | for (int i = 0 ; keywords[i] ; i++) { |
universe@5 | 145 | if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) { |
universe@5 | 146 | return 1; |
universe@5 | 147 | } |
universe@5 | 148 | } |
universe@5 | 149 | return 0; |
universe@5 | 150 | } |
universe@5 | 151 | |
universe@9 | 152 | int iscapsonly(char *word, size_t wp) { |
universe@9 | 153 | for (size_t i = 0 ; i < wp ; i++) { |
universe@9 | 154 | if (!isupper(word[i]) && word[i] != '_') { |
universe@9 | 155 | return 0; |
universe@9 | 156 | } |
universe@9 | 157 | } |
universe@9 | 158 | return 1; |
universe@9 | 159 | } |
universe@9 | 160 | |
universe@4 | 161 | void parseline(char *src, char *dest) { |
universe@4 | 162 | size_t sp = 0, dp = 0; |
universe@4 | 163 | /* indent */ |
universe@4 | 164 | while (isspace(src[sp])) { |
universe@4 | 165 | dest[dp++] = src[sp++]; |
universe@4 | 166 | } |
universe@5 | 167 | char word[WORDBUF_SIZE]; |
universe@5 | 168 | memset(word, 0, WORDBUF_SIZE); |
universe@5 | 169 | size_t wp = 0; |
universe@7 | 170 | int isstring = 0, iscomment = 0; |
universe@8 | 171 | static int iscommentml; |
universe@7 | 172 | int isescaping = 0; |
universe@8 | 173 | |
universe@8 | 174 | if (iscommentml) { |
universe@8 | 175 | iscomment = 1; |
universe@8 | 176 | memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); |
universe@8 | 177 | dp += 29; |
universe@8 | 178 | } |
universe@9 | 179 | |
universe@4 | 180 | for (char c = src[sp] ; c ; c=src[++sp]) { |
universe@8 | 181 | /* comments */ |
universe@8 | 182 | if (c == '/') { |
universe@8 | 183 | if (iscommentml && sp > 0 && src[sp-1] == '*') { |
universe@8 | 184 | iscomment = 0; |
universe@8 | 185 | iscommentml = 0; |
universe@8 | 186 | memcpy(&(dest[dp]), "/</span>", 8); |
universe@8 | 187 | dp += 8; |
universe@8 | 188 | continue; |
universe@8 | 189 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@8 | 190 | iscomment = 1; |
universe@8 | 191 | iscommentml = (src[sp+1] == '*'); |
universe@8 | 192 | memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); |
universe@8 | 193 | dp += 29; |
universe@8 | 194 | } |
universe@8 | 195 | } |
universe@8 | 196 | |
universe@8 | 197 | if (iscomment) { |
universe@8 | 198 | if (c == '\n') { |
universe@7 | 199 | memcpy(&(dest[dp]), "</span>", 7); |
universe@7 | 200 | dp += 7; |
universe@7 | 201 | } |
universe@8 | 202 | dp = writeescapedchar(dest, dp, c); |
universe@7 | 203 | } else { |
universe@8 | 204 | /* strings */ |
universe@8 | 205 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@8 | 206 | isstring ^= 1; |
universe@8 | 207 | if (isstring) { |
universe@8 | 208 | memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28); |
universe@8 | 209 | dp += 28; |
universe@7 | 210 | dp = writeescapedchar(dest, dp, c); |
universe@7 | 211 | } else { |
universe@7 | 212 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 213 | memcpy(&(dest[dp]), "</span>", 7); |
universe@8 | 214 | dp += 7; |
universe@8 | 215 | } |
universe@8 | 216 | } else { |
universe@8 | 217 | if (isstring) { |
universe@8 | 218 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 219 | } else if (!isalnum(c) && c != '_' && c != '#') { |
universe@8 | 220 | /* interpret word int_t */ |
universe@8 | 221 | if (wp > 0 && wp < WORDBUF_SIZE) { |
universe@8 | 222 | int closespan = 1; |
universe@8 | 223 | if (iskeyword(word)) { |
universe@8 | 224 | memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29); |
universe@8 | 225 | dp += 29; |
universe@8 | 226 | } else if (istype(word, wp)) { |
universe@8 | 227 | memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26); |
universe@8 | 228 | dp += 26; |
universe@8 | 229 | } else if (isdirective(word)) { |
universe@8 | 230 | memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31); |
universe@8 | 231 | dp += 31; |
universe@9 | 232 | } else if (iscapsonly(word, wp)) { |
universe@9 | 233 | memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32); |
universe@9 | 234 | dp += 32; |
universe@8 | 235 | } else { |
universe@8 | 236 | closespan = 0; |
universe@8 | 237 | } |
universe@8 | 238 | for (int i = 0 ; i < wp ; i++) { |
universe@8 | 239 | dp = writeescapedchar(dest, dp, word[i]); |
universe@8 | 240 | } |
universe@8 | 241 | if (closespan) { |
universe@8 | 242 | memcpy(&(dest[dp]), "</span>", 7); |
universe@8 | 243 | dp += 7; |
universe@8 | 244 | } |
universe@8 | 245 | } |
universe@9 | 246 | memset(word, 0, WORDBUF_SIZE); |
universe@9 | 247 | wp = 0; |
universe@8 | 248 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 249 | } else { |
universe@8 | 250 | /* read word */ |
universe@8 | 251 | if (wp < WORDBUF_SIZE) { |
universe@8 | 252 | word[wp++] = c; |
universe@8 | 253 | } else if (wp == WORDBUF_SIZE) { |
universe@8 | 254 | for (int i = 0 ; i < WORDBUF_SIZE ; i++) { |
universe@8 | 255 | dp = writeescapedchar(dest, dp, word[i]); |
universe@8 | 256 | } |
universe@8 | 257 | wp++; |
universe@8 | 258 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 259 | } else { |
universe@8 | 260 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 261 | } |
universe@7 | 262 | } |
universe@5 | 263 | } |
universe@8 | 264 | |
universe@8 | 265 | isescaping = !isescaping & (c == '\\'); |
universe@4 | 266 | } |
universe@4 | 267 | } |
universe@4 | 268 | dest[dp] = 0; |
universe@4 | 269 | } |
universe@4 | 270 | |
universe@1 | 271 | void printhelp() { |
universe@1 | 272 | printf("Formats source code using HTML.\n\nUsage:\n" |
universe@1 | 273 | " c2html [FILE...]" |
universe@1 | 274 | "\n"); |
universe@1 | 275 | |
universe@1 | 276 | |
universe@1 | 277 | } |
universe@1 | 278 | |
universe@4 | 279 | int lnint(size_t lnc) { |
universe@1 | 280 | int w = 1, p = 1; |
universe@1 | 281 | while ((p*=10) < lnc) w++; |
universe@1 | 282 | return w; |
universe@1 | 283 | } |
universe@1 | 284 | |
universe@1 | 285 | int main(int argc, char** argv) { |
universe@1 | 286 | |
universe@1 | 287 | if (argc == 1) { |
universe@1 | 288 | printhelp(); |
universe@1 | 289 | return 0; |
universe@1 | 290 | } else { |
universe@1 | 291 | |
universe@1 | 292 | inputfile_t *inputfile = readinput(argv[1]); |
universe@1 | 293 | if (inputfile) { |
universe@1 | 294 | printf("<pre>\n"); |
universe@4 | 295 | char *line = (char*) malloc(inputfile->maxlinewidth*64); |
universe@4 | 296 | int lnw = lnint(inputfile->count); |
universe@1 | 297 | for (int i = 0 ; i < inputfile->count ; i++) { |
universe@4 | 298 | parseline(inputfile->lines[i], line); |
universe@5 | 299 | printf("<span class=\"c2html-lineno\">%*d:</span> %s", |
universe@9 | 300 | lnw, i+1, line); |
universe@1 | 301 | } |
universe@4 | 302 | free(line); |
universe@1 | 303 | printf("</pre>\n"); |
universe@1 | 304 | freeinputfilebuffer(inputfile); |
universe@1 | 305 | } |
universe@1 | 306 | |
universe@1 | 307 | return 0; |
universe@1 | 308 | } |
universe@1 | 309 | } |
universe@1 | 310 |