Wed, 10 Jul 2013 18:12:13 +0200
fixed highlighting for java
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2013 Mike Becker. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <unistd.h>
35 #include <ctype.h>
37 #define INPUTBUF_SIZE 2048
38 #define WORDBUF_SIZE 64
40 const char* ckeywords[] = {
41 "auto", "break", "case", "char", "const", "continue", "default", "do",
42 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
43 "long", "register", "return", "short", "signed", "sizeof", "static",
44 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
45 "while", NULL
46 };
48 const char* jkeywords[] = {
49 "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
50 "package", "synchronized", "boolean", "do", "if", "private", "this",
51 "break", "double", "implements", "protected", "throw", "byte", "else",
52 "import", "public", "throws", "case", "enum", "instanceof", "return",
53 "transient", "catch", "extends", "int", "short", "try", "char", "final",
54 "interface", "static", "void", "class", "finally", "long", "strictfp",
55 "volatile", "const", "float", "native", "super", "while", NULL
56 };
58 #define iswordcharacter(c) (isalnum(c) || c=='_' || c=='#' || c=='@')
60 int isctype(char *word, size_t len) {
61 return (word[len-2] == '_' && word[len-1] == 't');
62 }
64 int iscdirective(char *word) {
65 return (word[0] == '#');
66 }
68 int isjtype(char *word, size_t len) {
69 return isupper(word[0]);
70 }
72 int isjdirective(char *word) {
73 return word[0] == '@';
74 }
76 typedef struct {
77 const char** keywords;
78 int(*istype)(char*,size_t);
79 int(*isdirective)(char*);
80 } highlighter_t;
82 typedef struct {
83 char* outfilename;
84 char* infilename;
85 int highlight;
86 } settings_t;
88 typedef struct {
89 size_t count;
90 size_t capacity;
91 size_t maxlinewidth;
92 char** lines;
93 } inputfile_t;
95 inputfile_t *inputfilebuffer(size_t capacity) {
96 inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
97 inputfile->lines = (char**) malloc(capacity * sizeof(char*));
98 inputfile->capacity = capacity;
99 inputfile->count = 0;
100 inputfile->maxlinewidth = 0;
102 return inputfile;
103 }
105 void addline(inputfile_t *inputfile, char* line, size_t width) {
106 char *l = (char*) malloc(width+1);
107 memcpy(l, line, width);
108 l[width] = 0;
109 if (inputfile->count >= inputfile->capacity) {
110 inputfile->capacity <<= 1;
111 inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
112 }
113 inputfile->lines[inputfile->count] = l;
114 inputfile->maxlinewidth =
115 width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
116 inputfile->count++;
117 }
119 void freeinputfilebuffer(inputfile_t *inputfile) {
120 for (int i = 0 ; i < inputfile->count ; i++) {
121 free(inputfile->lines[i]);
122 }
123 free(inputfile->lines);
124 free(inputfile);
125 }
127 inputfile_t *readinput(char *filename) {
129 int fd = open(filename, O_RDONLY);
130 if (fd == -1) return NULL;
132 inputfile_t *inputfile = inputfilebuffer(512);
134 char buf[INPUTBUF_SIZE];
135 ssize_t r;
137 size_t maxlinewidth = 256;
138 char *line = (char*) malloc(maxlinewidth);
139 size_t col = 0;
141 while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
142 for (size_t i = 0 ; i < r ; i++) {
143 if (col >= maxlinewidth-4) {
144 maxlinewidth <<= 1;
145 line = realloc(line, maxlinewidth);
146 }
148 if (buf[i] == '\n') {
149 line[col++] = '\n';
150 line[col] = 0;
151 addline(inputfile, line, col);
152 col = 0;
153 } else {
154 line[col++] = buf[i];
155 }
156 }
157 }
159 free(line);
161 close(fd);
163 return inputfile;
164 }
166 size_t writeescapedchar(char *dest, size_t dp, char c) {
167 if (c == '>') {
168 dest[dp++] = '&'; dest[dp++] = 'g';
169 dest[dp++] = 't'; dest[dp++] = ';';
170 } else if (c == '<') {
171 dest[dp++] = '&'; dest[dp++] = 'l';
172 dest[dp++] = 't'; dest[dp++] = ';';
173 } else {
174 dest[dp++] = c;
175 }
177 return dp;
178 }
180 int iskeyword(char *word, const char** keywords) {
181 for (int i = 0 ; keywords[i] ; i++) {
182 if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
183 return 1;
184 }
185 }
186 return 0;
187 }
189 int iscapsonly(char *word, size_t wp) {
190 for (size_t i = 0 ; i < wp ; i++) {
191 if (!isupper(word[i]) && word[i] != '_') {
192 return 0;
193 }
194 }
195 return 1;
196 }
198 void parseline(char *src, char *dest, highlighter_t *highlighter) {
199 size_t sp = 0, dp = 0;
200 /* indent */
201 while (isspace(src[sp])) {
202 dest[dp++] = src[sp++];
203 }
205 static char word[WORDBUF_SIZE];
206 static char includefile[FILENAME_MAX];
208 memset(word, 0, WORDBUF_SIZE);
209 size_t wp = 0, ifp = 0;
210 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
211 static int iscommentml;
212 int isescaping = 0;
214 if (iscommentml) {
215 iscomment = 1;
216 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
217 dp += 29;
218 }
220 for (char c = src[sp] ; c ; c=src[++sp]) {
221 /* comments */
222 if (c == '/') {
223 if (iscommentml && sp > 0 && src[sp-1] == '*') {
224 iscomment = 0;
225 iscommentml = 0;
226 memcpy(&(dest[dp]), "/</span>", 8);
227 dp += 8;
228 continue;
229 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
230 iscomment = 1;
231 iscommentml = (src[sp+1] == '*');
232 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
233 dp += 29;
234 }
235 }
237 if (iscomment) {
238 if (c == '\n') {
239 memcpy(&(dest[dp]), "</span>", 7);
240 dp += 7;
241 }
242 dp = writeescapedchar(dest, dp, c);
243 } else if (isinclude) {
244 if (c == '<') {
245 memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
246 dp += 32;
247 dp = writeescapedchar(dest, dp, c);
248 } else if (c == '\"') {
249 if (parseinclude) {
250 dest[dp++] = '\"';
251 dest[dp++] = '>';
252 memcpy(&(dest[dp]), includefile, ifp);
253 dp += ifp;
255 dp = writeescapedchar(dest, dp, c);
256 memcpy(&(dest[dp]), "</a>", 4);
257 dp += 4;
258 parseinclude = 0;
259 } else {
260 memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
261 dp += 35;
262 dp = writeescapedchar(dest, dp, c);
263 ifp = 0;
264 includefile[ifp++] = '\"';
265 parseinclude = 1;
266 }
267 } else if (c == '>') {
268 dp = writeescapedchar(dest, dp, c);
269 memcpy(&(dest[dp]), "</span>", 7);
270 dp += 7;
271 } else {
272 if (parseinclude) {
273 includefile[ifp++] = c;
274 }
275 dp = writeescapedchar(dest, dp, c);
276 }
277 } else {
278 /* strings */
279 if (!isescaping && (c == '\'' || c == '\"')) {
280 isstring ^= 1;
281 if (isstring) {
282 memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
283 dp += 28;
284 dp = writeescapedchar(dest, dp, c);
285 } else {
286 dp = writeescapedchar(dest, dp, c);
287 memcpy(&(dest[dp]), "</span>", 7);
288 dp += 7;
289 }
290 } else {
291 if (isstring) {
292 dp = writeescapedchar(dest, dp, c);
293 } else if (!iswordcharacter(c)) {
294 /* interpret word int_t */
295 if (wp > 0 && wp < WORDBUF_SIZE) {
296 int closespan = 1;
297 if (iskeyword(word, highlighter->keywords)) {
298 memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
299 dp += 29;
300 } else if (highlighter->istype(word, wp)) {
301 memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
302 dp += 26;
303 } else if (highlighter->isdirective(word)) {
304 isinclude = !strncmp("#include", word, WORDBUF_SIZE);
305 memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
306 dp += 31;
307 } else if (iscapsonly(word, wp)) {
308 memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
309 dp += 32;
310 } else {
311 closespan = 0;
312 }
313 for (int i = 0 ; i < wp ; i++) {
314 dp = writeescapedchar(dest, dp, word[i]);
315 }
316 if (closespan) {
317 memcpy(&(dest[dp]), "</span>", 7);
318 dp += 7;
319 }
320 }
321 memset(word, 0, WORDBUF_SIZE);
322 wp = 0;
323 dp = writeescapedchar(dest, dp, c);
324 } else {
325 /* read word */
326 if (wp < WORDBUF_SIZE) {
327 word[wp++] = c;
328 } else if (wp == WORDBUF_SIZE) {
329 for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
330 dp = writeescapedchar(dest, dp, word[i]);
331 }
332 wp++;
333 dp = writeescapedchar(dest, dp, c);
334 } else {
335 dp = writeescapedchar(dest, dp, c);
336 }
337 }
338 }
340 isescaping = !isescaping & (c == '\\');
341 }
342 }
343 dest[dp] = 0;
344 }
346 void printhelp() {
347 printf("Formats source code using HTML.\n\nUsage:\n"
348 " c2html [Options] FILE\n\n"
349 " Options:\n"
350 " -h Prints this help message\n"
351 " -j Highlight Java instead of C source code\n"
352 " -o <output> Output file (if not specified, stdout is used)\n"
353 " -p Disable highlighting (plain text)\n"
354 "\n");
357 }
359 int lnint(size_t lnc) {
360 int w = 1, p = 1;
361 while ((p*=10) < lnc) w++;
362 return w;
363 }
365 int main(int argc, char** argv) {
367 settings_t settings;
368 settings.outfilename = NULL;
369 settings.highlight = 1;
371 highlighter_t highlighter;
372 highlighter.isdirective = iscdirective;
373 highlighter.istype = isctype;
374 highlighter.keywords = ckeywords;
376 char optc;
377 while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
378 switch (optc) {
379 case 'o':
380 if (!(optarg[0] == '-' && optarg[1] == 0)) {
381 settings.outfilename = optarg;
382 }
383 break;
384 case 'j':
385 highlighter.isdirective = isjdirective;
386 highlighter.istype = isjtype;
387 highlighter.keywords = jkeywords;
388 break;
389 case 'p':
390 settings.highlight = 0;
391 break;
392 case 'h':
393 printhelp();
394 return 0;
395 default:
396 return 1;
397 }
398 }
400 if (optind != argc-1) {
401 printhelp();
402 return 1;
403 } else {
404 settings.infilename = argv[optind];
406 inputfile_t *inputfile = readinput(settings.infilename);
407 if (inputfile) {
408 FILE *fout;
409 char *line;
410 if (settings.highlight) {
411 line = (char*) malloc(inputfile->maxlinewidth*64);
412 } else {
413 line = NULL;
414 }
415 if (settings.outfilename) {
416 fout = fopen(settings.outfilename, "w");
417 } else {
418 fout = stdout;
419 }
420 fprintf(fout, "<pre>\n");
421 int lnw = lnint(inputfile->count);
422 for (int i = 0 ; i < inputfile->count ; i++) {
423 if (settings.highlight) {
424 parseline(inputfile->lines[i], line, &highlighter);
425 } else {
426 line = inputfile->lines[i];
427 }
428 fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
429 lnw, i+1, line);
430 }
431 if (settings.highlight) {
432 free(line);
433 }
434 fprintf(fout, "</pre>\n");
436 if (fout != stdout) {
437 fclose(fout);
438 }
440 freeinputfilebuffer(inputfile);
441 }
443 return 0;
444 }
445 }