Wed, 10 Jul 2013 16:31:16 +0200
prepared java highlighting
universe@1 | 1 | /* |
universe@1 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@1 | 3 | * |
universe@1 | 4 | * Copyright 2013 Mike Becker. All rights reserved. |
universe@1 | 5 | * |
universe@1 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@1 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@1 | 8 | * |
universe@1 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@1 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@1 | 11 | * |
universe@1 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@1 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@1 | 14 | * documentation and/or other materials provided with the distribution. |
universe@1 | 15 | * |
universe@1 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@1 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@1 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@1 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@1 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@1 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@1 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@1 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@1 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@1 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@1 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@1 | 27 | * |
universe@1 | 28 | */ |
universe@1 | 29 | |
universe@1 | 30 | #include <stdio.h> |
universe@1 | 31 | #include <stdlib.h> |
universe@1 | 32 | #include <string.h> |
universe@1 | 33 | #include <fcntl.h> |
universe@1 | 34 | #include <unistd.h> |
universe@4 | 35 | #include <ctype.h> |
universe@4 | 36 | |
universe@4 | 37 | #define INPUTBUF_SIZE 2048 |
universe@5 | 38 | #define WORDBUF_SIZE 16 |
universe@5 | 39 | |
universe@16 | 40 | const char* ckeywords[] = { |
universe@5 | 41 | "auto", "break", "case", "char", "const", "continue", "default", "do", |
universe@5 | 42 | "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", |
universe@16 | 43 | "long", "register", "return", "short", "signed", "sizeof", "static", |
universe@16 | 44 | "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", |
universe@16 | 45 | "while", NULL |
universe@5 | 46 | }; |
universe@4 | 47 | |
universe@16 | 48 | int istype(char *word, size_t len) { |
universe@16 | 49 | return (word[len-2] == '_' && word[len-1] == 't'); |
universe@16 | 50 | } |
universe@16 | 51 | |
universe@16 | 52 | int isdirective(char *word) { |
universe@16 | 53 | return (word[0] == '#'); |
universe@16 | 54 | } |
universe@16 | 55 | |
universe@16 | 56 | int notypes(char *word, size_t len) { |
universe@16 | 57 | return 0; |
universe@16 | 58 | } |
universe@16 | 59 | |
universe@16 | 60 | int nodirectives(char *word) { |
universe@16 | 61 | return 0; |
universe@16 | 62 | } |
universe@16 | 63 | |
universe@16 | 64 | typedef struct { |
universe@16 | 65 | const char** keywords; |
universe@16 | 66 | int(*istype)(char*,size_t); |
universe@16 | 67 | int(*isdirective)(char*); |
universe@16 | 68 | } highlighter_t; |
universe@16 | 69 | |
universe@11 | 70 | typedef struct { |
universe@11 | 71 | char* outfilename; |
universe@11 | 72 | char* infilename; |
universe@12 | 73 | int highlight; |
universe@11 | 74 | } settings_t; |
universe@4 | 75 | |
universe@4 | 76 | typedef struct { |
universe@4 | 77 | size_t count; |
universe@4 | 78 | size_t capacity; |
universe@4 | 79 | size_t maxlinewidth; |
universe@4 | 80 | char** lines; |
universe@4 | 81 | } inputfile_t; |
universe@1 | 82 | |
universe@1 | 83 | inputfile_t *inputfilebuffer(size_t capacity) { |
universe@1 | 84 | inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t)); |
universe@1 | 85 | inputfile->lines = (char**) malloc(capacity * sizeof(char*)); |
universe@1 | 86 | inputfile->capacity = capacity; |
universe@1 | 87 | inputfile->count = 0; |
universe@4 | 88 | inputfile->maxlinewidth = 0; |
universe@1 | 89 | |
universe@1 | 90 | return inputfile; |
universe@0 | 91 | } |
universe@0 | 92 | |
universe@1 | 93 | void addline(inputfile_t *inputfile, char* line, size_t width) { |
universe@1 | 94 | char *l = (char*) malloc(width+1); |
universe@1 | 95 | memcpy(l, line, width); |
universe@1 | 96 | l[width] = 0; |
universe@1 | 97 | if (inputfile->count >= inputfile->capacity) { |
universe@1 | 98 | inputfile->capacity <<= 1; |
universe@1 | 99 | inputfile->lines = realloc(inputfile->lines, inputfile->capacity); |
universe@1 | 100 | } |
universe@1 | 101 | inputfile->lines[inputfile->count] = l; |
universe@4 | 102 | inputfile->maxlinewidth = |
universe@4 | 103 | width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth; |
universe@1 | 104 | inputfile->count++; |
universe@1 | 105 | } |
universe@1 | 106 | |
universe@1 | 107 | void freeinputfilebuffer(inputfile_t *inputfile) { |
universe@1 | 108 | for (int i = 0 ; i < inputfile->count ; i++) { |
universe@1 | 109 | free(inputfile->lines[i]); |
universe@1 | 110 | } |
universe@1 | 111 | free(inputfile->lines); |
universe@1 | 112 | free(inputfile); |
universe@1 | 113 | } |
universe@1 | 114 | |
universe@1 | 115 | inputfile_t *readinput(char *filename) { |
universe@1 | 116 | |
universe@1 | 117 | int fd = open(filename, O_RDONLY); |
universe@1 | 118 | if (fd == -1) return NULL; |
universe@1 | 119 | |
universe@1 | 120 | inputfile_t *inputfile = inputfilebuffer(512); |
universe@1 | 121 | |
universe@4 | 122 | char buf[INPUTBUF_SIZE]; |
universe@1 | 123 | ssize_t r; |
universe@1 | 124 | |
universe@4 | 125 | size_t maxlinewidth = 256; |
universe@1 | 126 | char *line = (char*) malloc(maxlinewidth); |
universe@1 | 127 | size_t col = 0; |
universe@1 | 128 | |
universe@4 | 129 | while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) { |
universe@1 | 130 | for (size_t i = 0 ; i < r ; i++) { |
universe@1 | 131 | if (col >= maxlinewidth-4) { |
universe@1 | 132 | maxlinewidth <<= 1; |
universe@1 | 133 | line = realloc(line, maxlinewidth); |
universe@1 | 134 | } |
universe@1 | 135 | |
universe@1 | 136 | if (buf[i] == '\n') { |
universe@5 | 137 | line[col++] = '\n'; |
universe@1 | 138 | line[col] = 0; |
universe@1 | 139 | addline(inputfile, line, col); |
universe@1 | 140 | col = 0; |
universe@1 | 141 | } else { |
universe@1 | 142 | line[col++] = buf[i]; |
universe@1 | 143 | } |
universe@1 | 144 | } |
universe@1 | 145 | } |
universe@1 | 146 | |
universe@1 | 147 | free(line); |
universe@1 | 148 | |
universe@1 | 149 | close(fd); |
universe@1 | 150 | |
universe@1 | 151 | return inputfile; |
universe@1 | 152 | } |
universe@1 | 153 | |
universe@5 | 154 | size_t writeescapedchar(char *dest, size_t dp, char c) { |
universe@5 | 155 | if (c == '>') { |
universe@5 | 156 | dest[dp++] = '&'; dest[dp++] = 'g'; |
universe@5 | 157 | dest[dp++] = 't'; dest[dp++] = ';'; |
universe@5 | 158 | } else if (c == '<') { |
universe@5 | 159 | dest[dp++] = '&'; dest[dp++] = 'l'; |
universe@5 | 160 | dest[dp++] = 't'; dest[dp++] = ';'; |
universe@5 | 161 | } else { |
universe@5 | 162 | dest[dp++] = c; |
universe@5 | 163 | } |
universe@5 | 164 | |
universe@5 | 165 | return dp; |
universe@5 | 166 | } |
universe@5 | 167 | |
universe@16 | 168 | int iskeyword(char *word, const char** keywords) { |
universe@5 | 169 | for (int i = 0 ; keywords[i] ; i++) { |
universe@5 | 170 | if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) { |
universe@5 | 171 | return 1; |
universe@5 | 172 | } |
universe@5 | 173 | } |
universe@5 | 174 | return 0; |
universe@5 | 175 | } |
universe@5 | 176 | |
universe@9 | 177 | int iscapsonly(char *word, size_t wp) { |
universe@9 | 178 | for (size_t i = 0 ; i < wp ; i++) { |
universe@9 | 179 | if (!isupper(word[i]) && word[i] != '_') { |
universe@9 | 180 | return 0; |
universe@9 | 181 | } |
universe@9 | 182 | } |
universe@9 | 183 | return 1; |
universe@9 | 184 | } |
universe@9 | 185 | |
universe@16 | 186 | void parseline(char *src, char *dest, highlighter_t *highlighter) { |
universe@4 | 187 | size_t sp = 0, dp = 0; |
universe@4 | 188 | /* indent */ |
universe@4 | 189 | while (isspace(src[sp])) { |
universe@4 | 190 | dest[dp++] = src[sp++]; |
universe@4 | 191 | } |
universe@10 | 192 | |
universe@10 | 193 | static char word[WORDBUF_SIZE]; |
universe@10 | 194 | static char includefile[FILENAME_MAX]; |
universe@10 | 195 | |
universe@5 | 196 | memset(word, 0, WORDBUF_SIZE); |
universe@10 | 197 | size_t wp = 0, ifp = 0; |
universe@10 | 198 | int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; |
universe@8 | 199 | static int iscommentml; |
universe@7 | 200 | int isescaping = 0; |
universe@8 | 201 | |
universe@8 | 202 | if (iscommentml) { |
universe@8 | 203 | iscomment = 1; |
universe@8 | 204 | memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); |
universe@8 | 205 | dp += 29; |
universe@8 | 206 | } |
universe@9 | 207 | |
universe@4 | 208 | for (char c = src[sp] ; c ; c=src[++sp]) { |
universe@8 | 209 | /* comments */ |
universe@8 | 210 | if (c == '/') { |
universe@8 | 211 | if (iscommentml && sp > 0 && src[sp-1] == '*') { |
universe@8 | 212 | iscomment = 0; |
universe@8 | 213 | iscommentml = 0; |
universe@8 | 214 | memcpy(&(dest[dp]), "/</span>", 8); |
universe@8 | 215 | dp += 8; |
universe@8 | 216 | continue; |
universe@8 | 217 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@8 | 218 | iscomment = 1; |
universe@8 | 219 | iscommentml = (src[sp+1] == '*'); |
universe@8 | 220 | memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29); |
universe@8 | 221 | dp += 29; |
universe@8 | 222 | } |
universe@8 | 223 | } |
universe@8 | 224 | |
universe@8 | 225 | if (iscomment) { |
universe@8 | 226 | if (c == '\n') { |
universe@7 | 227 | memcpy(&(dest[dp]), "</span>", 7); |
universe@7 | 228 | dp += 7; |
universe@7 | 229 | } |
universe@8 | 230 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 231 | } else if (isinclude) { |
universe@10 | 232 | if (c == '<') { |
universe@10 | 233 | memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32); |
universe@10 | 234 | dp += 32; |
universe@10 | 235 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 236 | } else if (c == '\"') { |
universe@10 | 237 | if (parseinclude) { |
universe@10 | 238 | dest[dp++] = '\"'; |
universe@10 | 239 | dest[dp++] = '>'; |
universe@10 | 240 | memcpy(&(dest[dp]), includefile, ifp); |
universe@10 | 241 | dp += ifp; |
universe@10 | 242 | |
universe@10 | 243 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 244 | memcpy(&(dest[dp]), "</a>", 4); |
universe@10 | 245 | dp += 4; |
universe@10 | 246 | parseinclude = 0; |
universe@10 | 247 | } else { |
universe@10 | 248 | memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35); |
universe@10 | 249 | dp += 35; |
universe@10 | 250 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 251 | ifp = 0; |
universe@10 | 252 | includefile[ifp++] = '\"'; |
universe@10 | 253 | parseinclude = 1; |
universe@10 | 254 | } |
universe@10 | 255 | } else if (c == '>') { |
universe@10 | 256 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 257 | memcpy(&(dest[dp]), "</span>", 7); |
universe@10 | 258 | dp += 7; |
universe@10 | 259 | } else { |
universe@10 | 260 | if (parseinclude) { |
universe@10 | 261 | includefile[ifp++] = c; |
universe@10 | 262 | } |
universe@10 | 263 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 264 | } |
universe@7 | 265 | } else { |
universe@8 | 266 | /* strings */ |
universe@8 | 267 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@8 | 268 | isstring ^= 1; |
universe@8 | 269 | if (isstring) { |
universe@8 | 270 | memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28); |
universe@8 | 271 | dp += 28; |
universe@7 | 272 | dp = writeescapedchar(dest, dp, c); |
universe@7 | 273 | } else { |
universe@7 | 274 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 275 | memcpy(&(dest[dp]), "</span>", 7); |
universe@8 | 276 | dp += 7; |
universe@8 | 277 | } |
universe@8 | 278 | } else { |
universe@8 | 279 | if (isstring) { |
universe@8 | 280 | dp = writeescapedchar(dest, dp, c); |
universe@10 | 281 | } else if (!isalnum(c) && c != '_' && c != '#' && c != '.') { |
universe@8 | 282 | /* interpret word int_t */ |
universe@8 | 283 | if (wp > 0 && wp < WORDBUF_SIZE) { |
universe@8 | 284 | int closespan = 1; |
universe@16 | 285 | if (iskeyword(word, highlighter->keywords)) { |
universe@8 | 286 | memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29); |
universe@8 | 287 | dp += 29; |
universe@16 | 288 | } else if (highlighter->istype(word, wp)) { |
universe@8 | 289 | memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26); |
universe@8 | 290 | dp += 26; |
universe@16 | 291 | } else if (highlighter->isdirective(word)) { |
universe@10 | 292 | isinclude = !strncmp("#include", word, WORDBUF_SIZE); |
universe@8 | 293 | memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31); |
universe@8 | 294 | dp += 31; |
universe@9 | 295 | } else if (iscapsonly(word, wp)) { |
universe@9 | 296 | memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32); |
universe@9 | 297 | dp += 32; |
universe@8 | 298 | } else { |
universe@8 | 299 | closespan = 0; |
universe@8 | 300 | } |
universe@8 | 301 | for (int i = 0 ; i < wp ; i++) { |
universe@8 | 302 | dp = writeescapedchar(dest, dp, word[i]); |
universe@8 | 303 | } |
universe@8 | 304 | if (closespan) { |
universe@8 | 305 | memcpy(&(dest[dp]), "</span>", 7); |
universe@8 | 306 | dp += 7; |
universe@8 | 307 | } |
universe@8 | 308 | } |
universe@9 | 309 | memset(word, 0, WORDBUF_SIZE); |
universe@9 | 310 | wp = 0; |
universe@8 | 311 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 312 | } else { |
universe@8 | 313 | /* read word */ |
universe@8 | 314 | if (wp < WORDBUF_SIZE) { |
universe@8 | 315 | word[wp++] = c; |
universe@8 | 316 | } else if (wp == WORDBUF_SIZE) { |
universe@8 | 317 | for (int i = 0 ; i < WORDBUF_SIZE ; i++) { |
universe@8 | 318 | dp = writeescapedchar(dest, dp, word[i]); |
universe@8 | 319 | } |
universe@8 | 320 | wp++; |
universe@8 | 321 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 322 | } else { |
universe@8 | 323 | dp = writeescapedchar(dest, dp, c); |
universe@8 | 324 | } |
universe@7 | 325 | } |
universe@5 | 326 | } |
universe@8 | 327 | |
universe@8 | 328 | isescaping = !isescaping & (c == '\\'); |
universe@4 | 329 | } |
universe@4 | 330 | } |
universe@4 | 331 | dest[dp] = 0; |
universe@4 | 332 | } |
universe@4 | 333 | |
universe@1 | 334 | void printhelp() { |
universe@1 | 335 | printf("Formats source code using HTML.\n\nUsage:\n" |
universe@11 | 336 | " c2html [Options] FILE\n\n" |
universe@11 | 337 | " Options:\n" |
universe@11 | 338 | " -h Prints this help message\n" |
universe@11 | 339 | " -o <output> Output file (if not specified, stdout is used)\n" |
universe@14 | 340 | " -p Disable highlighting (plain text)\n" |
universe@1 | 341 | "\n"); |
universe@1 | 342 | |
universe@1 | 343 | |
universe@1 | 344 | } |
universe@1 | 345 | |
universe@4 | 346 | int lnint(size_t lnc) { |
universe@1 | 347 | int w = 1, p = 1; |
universe@1 | 348 | while ((p*=10) < lnc) w++; |
universe@1 | 349 | return w; |
universe@1 | 350 | } |
universe@1 | 351 | |
universe@1 | 352 | int main(int argc, char** argv) { |
universe@1 | 353 | |
universe@11 | 354 | settings_t settings; |
universe@11 | 355 | settings.outfilename = NULL; |
universe@12 | 356 | settings.highlight = 1; |
universe@11 | 357 | |
universe@16 | 358 | highlighter_t highlighter; |
universe@16 | 359 | highlighter.isdirective = isdirective; |
universe@16 | 360 | highlighter.istype = istype; |
universe@16 | 361 | highlighter.keywords = ckeywords; |
universe@16 | 362 | |
universe@11 | 363 | char optc; |
universe@12 | 364 | while ((optc = getopt(argc, argv, "ho:p")) != -1) { |
universe@11 | 365 | switch (optc) { |
universe@11 | 366 | case 'o': |
universe@11 | 367 | if (!(optarg[0] == '-' && optarg[1] == 0)) { |
universe@11 | 368 | settings.outfilename = optarg; |
universe@11 | 369 | } |
universe@11 | 370 | break; |
universe@12 | 371 | case 'p': |
universe@12 | 372 | settings.highlight = 0; |
universe@12 | 373 | break; |
universe@11 | 374 | case 'h': |
universe@11 | 375 | printhelp(); |
universe@11 | 376 | return 0; |
universe@11 | 377 | default: |
universe@11 | 378 | return 1; |
universe@11 | 379 | } |
universe@11 | 380 | } |
universe@11 | 381 | |
universe@11 | 382 | if (optind != argc-1) { |
universe@1 | 383 | printhelp(); |
universe@11 | 384 | return 1; |
universe@1 | 385 | } else { |
universe@11 | 386 | settings.infilename = argv[optind]; |
universe@1 | 387 | |
universe@11 | 388 | inputfile_t *inputfile = readinput(settings.infilename); |
universe@1 | 389 | if (inputfile) { |
universe@11 | 390 | FILE *fout; |
universe@15 | 391 | char *line; |
universe@15 | 392 | if (settings.highlight) { |
universe@15 | 393 | line = (char*) malloc(inputfile->maxlinewidth*64); |
universe@15 | 394 | } else { |
universe@15 | 395 | line = NULL; |
universe@15 | 396 | } |
universe@11 | 397 | if (settings.outfilename) { |
universe@11 | 398 | fout = fopen(settings.outfilename, "w"); |
universe@11 | 399 | } else { |
universe@11 | 400 | fout = stdout; |
universe@11 | 401 | } |
universe@11 | 402 | fprintf(fout, "<pre>\n"); |
universe@4 | 403 | int lnw = lnint(inputfile->count); |
universe@1 | 404 | for (int i = 0 ; i < inputfile->count ; i++) { |
universe@12 | 405 | if (settings.highlight) { |
universe@16 | 406 | parseline(inputfile->lines[i], line, &highlighter); |
universe@12 | 407 | } else { |
universe@12 | 408 | line = inputfile->lines[i]; |
universe@12 | 409 | } |
universe@11 | 410 | fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s", |
universe@9 | 411 | lnw, i+1, line); |
universe@1 | 412 | } |
universe@15 | 413 | if (settings.highlight) { |
universe@15 | 414 | free(line); |
universe@15 | 415 | } |
universe@11 | 416 | fprintf(fout, "</pre>\n"); |
universe@11 | 417 | |
universe@11 | 418 | if (fout != stdout) { |
universe@11 | 419 | fclose(fout); |
universe@11 | 420 | } |
universe@11 | 421 | |
universe@1 | 422 | freeinputfilebuffer(inputfile); |
universe@1 | 423 | } |
universe@1 | 424 | |
universe@1 | 425 | return 0; |
universe@1 | 426 | } |
universe@1 | 427 | } |
universe@1 | 428 |