src/c2html.c

Wed, 10 Jul 2013 16:31:16 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 10 Jul 2013 16:31:16 +0200
changeset 16
fa0bcd0444eb
parent 15
398a7589297f
child 17
7ea86024aef0
permissions
-rw-r--r--

prepared java highlighting

universe@1 1 /*
universe@1 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@1 3 *
universe@1 4 * Copyright 2013 Mike Becker. All rights reserved.
universe@1 5 *
universe@1 6 * Redistribution and use in source and binary forms, with or without
universe@1 7 * modification, are permitted provided that the following conditions are met:
universe@1 8 *
universe@1 9 * 1. Redistributions of source code must retain the above copyright
universe@1 10 * notice, this list of conditions and the following disclaimer.
universe@1 11 *
universe@1 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@1 13 * notice, this list of conditions and the following disclaimer in the
universe@1 14 * documentation and/or other materials provided with the distribution.
universe@1 15 *
universe@1 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@1 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@1 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@1 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@1 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@1 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@1 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@1 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@1 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@1 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@1 26 * POSSIBILITY OF SUCH DAMAGE.
universe@1 27 *
universe@1 28 */
universe@1 29
universe@1 30 #include <stdio.h>
universe@1 31 #include <stdlib.h>
universe@1 32 #include <string.h>
universe@1 33 #include <fcntl.h>
universe@1 34 #include <unistd.h>
universe@4 35 #include <ctype.h>
universe@4 36
universe@4 37 #define INPUTBUF_SIZE 2048
universe@5 38 #define WORDBUF_SIZE 16
universe@5 39
universe@16 40 const char* ckeywords[] = {
universe@5 41 "auto", "break", "case", "char", "const", "continue", "default", "do",
universe@5 42 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
universe@16 43 "long", "register", "return", "short", "signed", "sizeof", "static",
universe@16 44 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
universe@16 45 "while", NULL
universe@5 46 };
universe@4 47
universe@16 48 int istype(char *word, size_t len) {
universe@16 49 return (word[len-2] == '_' && word[len-1] == 't');
universe@16 50 }
universe@16 51
universe@16 52 int isdirective(char *word) {
universe@16 53 return (word[0] == '#');
universe@16 54 }
universe@16 55
universe@16 56 int notypes(char *word, size_t len) {
universe@16 57 return 0;
universe@16 58 }
universe@16 59
universe@16 60 int nodirectives(char *word) {
universe@16 61 return 0;
universe@16 62 }
universe@16 63
universe@16 64 typedef struct {
universe@16 65 const char** keywords;
universe@16 66 int(*istype)(char*,size_t);
universe@16 67 int(*isdirective)(char*);
universe@16 68 } highlighter_t;
universe@16 69
universe@11 70 typedef struct {
universe@11 71 char* outfilename;
universe@11 72 char* infilename;
universe@12 73 int highlight;
universe@11 74 } settings_t;
universe@4 75
universe@4 76 typedef struct {
universe@4 77 size_t count;
universe@4 78 size_t capacity;
universe@4 79 size_t maxlinewidth;
universe@4 80 char** lines;
universe@4 81 } inputfile_t;
universe@1 82
universe@1 83 inputfile_t *inputfilebuffer(size_t capacity) {
universe@1 84 inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
universe@1 85 inputfile->lines = (char**) malloc(capacity * sizeof(char*));
universe@1 86 inputfile->capacity = capacity;
universe@1 87 inputfile->count = 0;
universe@4 88 inputfile->maxlinewidth = 0;
universe@1 89
universe@1 90 return inputfile;
universe@0 91 }
universe@0 92
universe@1 93 void addline(inputfile_t *inputfile, char* line, size_t width) {
universe@1 94 char *l = (char*) malloc(width+1);
universe@1 95 memcpy(l, line, width);
universe@1 96 l[width] = 0;
universe@1 97 if (inputfile->count >= inputfile->capacity) {
universe@1 98 inputfile->capacity <<= 1;
universe@1 99 inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
universe@1 100 }
universe@1 101 inputfile->lines[inputfile->count] = l;
universe@4 102 inputfile->maxlinewidth =
universe@4 103 width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
universe@1 104 inputfile->count++;
universe@1 105 }
universe@1 106
universe@1 107 void freeinputfilebuffer(inputfile_t *inputfile) {
universe@1 108 for (int i = 0 ; i < inputfile->count ; i++) {
universe@1 109 free(inputfile->lines[i]);
universe@1 110 }
universe@1 111 free(inputfile->lines);
universe@1 112 free(inputfile);
universe@1 113 }
universe@1 114
universe@1 115 inputfile_t *readinput(char *filename) {
universe@1 116
universe@1 117 int fd = open(filename, O_RDONLY);
universe@1 118 if (fd == -1) return NULL;
universe@1 119
universe@1 120 inputfile_t *inputfile = inputfilebuffer(512);
universe@1 121
universe@4 122 char buf[INPUTBUF_SIZE];
universe@1 123 ssize_t r;
universe@1 124
universe@4 125 size_t maxlinewidth = 256;
universe@1 126 char *line = (char*) malloc(maxlinewidth);
universe@1 127 size_t col = 0;
universe@1 128
universe@4 129 while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
universe@1 130 for (size_t i = 0 ; i < r ; i++) {
universe@1 131 if (col >= maxlinewidth-4) {
universe@1 132 maxlinewidth <<= 1;
universe@1 133 line = realloc(line, maxlinewidth);
universe@1 134 }
universe@1 135
universe@1 136 if (buf[i] == '\n') {
universe@5 137 line[col++] = '\n';
universe@1 138 line[col] = 0;
universe@1 139 addline(inputfile, line, col);
universe@1 140 col = 0;
universe@1 141 } else {
universe@1 142 line[col++] = buf[i];
universe@1 143 }
universe@1 144 }
universe@1 145 }
universe@1 146
universe@1 147 free(line);
universe@1 148
universe@1 149 close(fd);
universe@1 150
universe@1 151 return inputfile;
universe@1 152 }
universe@1 153
universe@5 154 size_t writeescapedchar(char *dest, size_t dp, char c) {
universe@5 155 if (c == '>') {
universe@5 156 dest[dp++] = '&'; dest[dp++] = 'g';
universe@5 157 dest[dp++] = 't'; dest[dp++] = ';';
universe@5 158 } else if (c == '<') {
universe@5 159 dest[dp++] = '&'; dest[dp++] = 'l';
universe@5 160 dest[dp++] = 't'; dest[dp++] = ';';
universe@5 161 } else {
universe@5 162 dest[dp++] = c;
universe@5 163 }
universe@5 164
universe@5 165 return dp;
universe@5 166 }
universe@5 167
universe@16 168 int iskeyword(char *word, const char** keywords) {
universe@5 169 for (int i = 0 ; keywords[i] ; i++) {
universe@5 170 if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
universe@5 171 return 1;
universe@5 172 }
universe@5 173 }
universe@5 174 return 0;
universe@5 175 }
universe@5 176
universe@9 177 int iscapsonly(char *word, size_t wp) {
universe@9 178 for (size_t i = 0 ; i < wp ; i++) {
universe@9 179 if (!isupper(word[i]) && word[i] != '_') {
universe@9 180 return 0;
universe@9 181 }
universe@9 182 }
universe@9 183 return 1;
universe@9 184 }
universe@9 185
universe@16 186 void parseline(char *src, char *dest, highlighter_t *highlighter) {
universe@4 187 size_t sp = 0, dp = 0;
universe@4 188 /* indent */
universe@4 189 while (isspace(src[sp])) {
universe@4 190 dest[dp++] = src[sp++];
universe@4 191 }
universe@10 192
universe@10 193 static char word[WORDBUF_SIZE];
universe@10 194 static char includefile[FILENAME_MAX];
universe@10 195
universe@5 196 memset(word, 0, WORDBUF_SIZE);
universe@10 197 size_t wp = 0, ifp = 0;
universe@10 198 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
universe@8 199 static int iscommentml;
universe@7 200 int isescaping = 0;
universe@8 201
universe@8 202 if (iscommentml) {
universe@8 203 iscomment = 1;
universe@8 204 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
universe@8 205 dp += 29;
universe@8 206 }
universe@9 207
universe@4 208 for (char c = src[sp] ; c ; c=src[++sp]) {
universe@8 209 /* comments */
universe@8 210 if (c == '/') {
universe@8 211 if (iscommentml && sp > 0 && src[sp-1] == '*') {
universe@8 212 iscomment = 0;
universe@8 213 iscommentml = 0;
universe@8 214 memcpy(&(dest[dp]), "/</span>", 8);
universe@8 215 dp += 8;
universe@8 216 continue;
universe@8 217 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@8 218 iscomment = 1;
universe@8 219 iscommentml = (src[sp+1] == '*');
universe@8 220 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
universe@8 221 dp += 29;
universe@8 222 }
universe@8 223 }
universe@8 224
universe@8 225 if (iscomment) {
universe@8 226 if (c == '\n') {
universe@7 227 memcpy(&(dest[dp]), "</span>", 7);
universe@7 228 dp += 7;
universe@7 229 }
universe@8 230 dp = writeescapedchar(dest, dp, c);
universe@10 231 } else if (isinclude) {
universe@10 232 if (c == '<') {
universe@10 233 memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
universe@10 234 dp += 32;
universe@10 235 dp = writeescapedchar(dest, dp, c);
universe@10 236 } else if (c == '\"') {
universe@10 237 if (parseinclude) {
universe@10 238 dest[dp++] = '\"';
universe@10 239 dest[dp++] = '>';
universe@10 240 memcpy(&(dest[dp]), includefile, ifp);
universe@10 241 dp += ifp;
universe@10 242
universe@10 243 dp = writeescapedchar(dest, dp, c);
universe@10 244 memcpy(&(dest[dp]), "</a>", 4);
universe@10 245 dp += 4;
universe@10 246 parseinclude = 0;
universe@10 247 } else {
universe@10 248 memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
universe@10 249 dp += 35;
universe@10 250 dp = writeescapedchar(dest, dp, c);
universe@10 251 ifp = 0;
universe@10 252 includefile[ifp++] = '\"';
universe@10 253 parseinclude = 1;
universe@10 254 }
universe@10 255 } else if (c == '>') {
universe@10 256 dp = writeescapedchar(dest, dp, c);
universe@10 257 memcpy(&(dest[dp]), "</span>", 7);
universe@10 258 dp += 7;
universe@10 259 } else {
universe@10 260 if (parseinclude) {
universe@10 261 includefile[ifp++] = c;
universe@10 262 }
universe@10 263 dp = writeescapedchar(dest, dp, c);
universe@10 264 }
universe@7 265 } else {
universe@8 266 /* strings */
universe@8 267 if (!isescaping && (c == '\'' || c == '\"')) {
universe@8 268 isstring ^= 1;
universe@8 269 if (isstring) {
universe@8 270 memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
universe@8 271 dp += 28;
universe@7 272 dp = writeescapedchar(dest, dp, c);
universe@7 273 } else {
universe@7 274 dp = writeescapedchar(dest, dp, c);
universe@8 275 memcpy(&(dest[dp]), "</span>", 7);
universe@8 276 dp += 7;
universe@8 277 }
universe@8 278 } else {
universe@8 279 if (isstring) {
universe@8 280 dp = writeescapedchar(dest, dp, c);
universe@10 281 } else if (!isalnum(c) && c != '_' && c != '#' && c != '.') {
universe@8 282 /* interpret word int_t */
universe@8 283 if (wp > 0 && wp < WORDBUF_SIZE) {
universe@8 284 int closespan = 1;
universe@16 285 if (iskeyword(word, highlighter->keywords)) {
universe@8 286 memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
universe@8 287 dp += 29;
universe@16 288 } else if (highlighter->istype(word, wp)) {
universe@8 289 memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
universe@8 290 dp += 26;
universe@16 291 } else if (highlighter->isdirective(word)) {
universe@10 292 isinclude = !strncmp("#include", word, WORDBUF_SIZE);
universe@8 293 memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
universe@8 294 dp += 31;
universe@9 295 } else if (iscapsonly(word, wp)) {
universe@9 296 memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
universe@9 297 dp += 32;
universe@8 298 } else {
universe@8 299 closespan = 0;
universe@8 300 }
universe@8 301 for (int i = 0 ; i < wp ; i++) {
universe@8 302 dp = writeescapedchar(dest, dp, word[i]);
universe@8 303 }
universe@8 304 if (closespan) {
universe@8 305 memcpy(&(dest[dp]), "</span>", 7);
universe@8 306 dp += 7;
universe@8 307 }
universe@8 308 }
universe@9 309 memset(word, 0, WORDBUF_SIZE);
universe@9 310 wp = 0;
universe@8 311 dp = writeescapedchar(dest, dp, c);
universe@8 312 } else {
universe@8 313 /* read word */
universe@8 314 if (wp < WORDBUF_SIZE) {
universe@8 315 word[wp++] = c;
universe@8 316 } else if (wp == WORDBUF_SIZE) {
universe@8 317 for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
universe@8 318 dp = writeescapedchar(dest, dp, word[i]);
universe@8 319 }
universe@8 320 wp++;
universe@8 321 dp = writeescapedchar(dest, dp, c);
universe@8 322 } else {
universe@8 323 dp = writeescapedchar(dest, dp, c);
universe@8 324 }
universe@7 325 }
universe@5 326 }
universe@8 327
universe@8 328 isescaping = !isescaping & (c == '\\');
universe@4 329 }
universe@4 330 }
universe@4 331 dest[dp] = 0;
universe@4 332 }
universe@4 333
universe@1 334 void printhelp() {
universe@1 335 printf("Formats source code using HTML.\n\nUsage:\n"
universe@11 336 " c2html [Options] FILE\n\n"
universe@11 337 " Options:\n"
universe@11 338 " -h Prints this help message\n"
universe@11 339 " -o <output> Output file (if not specified, stdout is used)\n"
universe@14 340 " -p Disable highlighting (plain text)\n"
universe@1 341 "\n");
universe@1 342
universe@1 343
universe@1 344 }
universe@1 345
universe@4 346 int lnint(size_t lnc) {
universe@1 347 int w = 1, p = 1;
universe@1 348 while ((p*=10) < lnc) w++;
universe@1 349 return w;
universe@1 350 }
universe@1 351
universe@1 352 int main(int argc, char** argv) {
universe@1 353
universe@11 354 settings_t settings;
universe@11 355 settings.outfilename = NULL;
universe@12 356 settings.highlight = 1;
universe@11 357
universe@16 358 highlighter_t highlighter;
universe@16 359 highlighter.isdirective = isdirective;
universe@16 360 highlighter.istype = istype;
universe@16 361 highlighter.keywords = ckeywords;
universe@16 362
universe@11 363 char optc;
universe@12 364 while ((optc = getopt(argc, argv, "ho:p")) != -1) {
universe@11 365 switch (optc) {
universe@11 366 case 'o':
universe@11 367 if (!(optarg[0] == '-' && optarg[1] == 0)) {
universe@11 368 settings.outfilename = optarg;
universe@11 369 }
universe@11 370 break;
universe@12 371 case 'p':
universe@12 372 settings.highlight = 0;
universe@12 373 break;
universe@11 374 case 'h':
universe@11 375 printhelp();
universe@11 376 return 0;
universe@11 377 default:
universe@11 378 return 1;
universe@11 379 }
universe@11 380 }
universe@11 381
universe@11 382 if (optind != argc-1) {
universe@1 383 printhelp();
universe@11 384 return 1;
universe@1 385 } else {
universe@11 386 settings.infilename = argv[optind];
universe@1 387
universe@11 388 inputfile_t *inputfile = readinput(settings.infilename);
universe@1 389 if (inputfile) {
universe@11 390 FILE *fout;
universe@15 391 char *line;
universe@15 392 if (settings.highlight) {
universe@15 393 line = (char*) malloc(inputfile->maxlinewidth*64);
universe@15 394 } else {
universe@15 395 line = NULL;
universe@15 396 }
universe@11 397 if (settings.outfilename) {
universe@11 398 fout = fopen(settings.outfilename, "w");
universe@11 399 } else {
universe@11 400 fout = stdout;
universe@11 401 }
universe@11 402 fprintf(fout, "<pre>\n");
universe@4 403 int lnw = lnint(inputfile->count);
universe@1 404 for (int i = 0 ; i < inputfile->count ; i++) {
universe@12 405 if (settings.highlight) {
universe@16 406 parseline(inputfile->lines[i], line, &highlighter);
universe@12 407 } else {
universe@12 408 line = inputfile->lines[i];
universe@12 409 }
universe@11 410 fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
universe@9 411 lnw, i+1, line);
universe@1 412 }
universe@15 413 if (settings.highlight) {
universe@15 414 free(line);
universe@15 415 }
universe@11 416 fprintf(fout, "</pre>\n");
universe@11 417
universe@11 418 if (fout != stdout) {
universe@11 419 fclose(fout);
universe@11 420 }
universe@11 421
universe@1 422 freeinputfilebuffer(inputfile);
universe@1 423 }
universe@1 424
universe@1 425 return 0;
universe@1 426 }
universe@1 427 }
universe@1 428

mercurial