Wed, 10 Jul 2013 17:57:03 +0200
implemented java highlighting
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2013 Mike Becker. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <unistd.h>
35 #include <ctype.h>
37 #define INPUTBUF_SIZE 2048
38 #define WORDBUF_SIZE 16
40 const char* ckeywords[] = {
41 "auto", "break", "case", "char", "const", "continue", "default", "do",
42 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
43 "long", "register", "return", "short", "signed", "sizeof", "static",
44 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
45 "while", NULL
46 };
48 const char* jkeywords[] = {
49 "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
50 "package", "synchronized", "boolean", "do", "if", "private", "this",
51 "break", "double", "implements", "protected", "throw", "byte", "else",
52 "import", "public", "throws", "case", "enum", "instanceof", "return",
53 "transient", "catch", "extends", "int", "short", "try", "char", "final",
54 "interface", "static", "void", "class", "finally", "long", "strictfp",
55 "volatile", "const", "float", "native", "super", "while", NULL
56 };
58 int isctype(char *word, size_t len) {
59 return (word[len-2] == '_' && word[len-1] == 't');
60 }
62 int iscdirective(char *word) {
63 return (word[0] == '#');
64 }
66 int isjtype(char *word, size_t len) {
67 return isupper(word[0]);
68 }
70 int isjdirective(char *word) {
71 return word[0] == '@';
72 }
74 typedef struct {
75 const char** keywords;
76 int(*istype)(char*,size_t);
77 int(*isdirective)(char*);
78 } highlighter_t;
80 typedef struct {
81 char* outfilename;
82 char* infilename;
83 int highlight;
84 } settings_t;
86 typedef struct {
87 size_t count;
88 size_t capacity;
89 size_t maxlinewidth;
90 char** lines;
91 } inputfile_t;
93 inputfile_t *inputfilebuffer(size_t capacity) {
94 inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
95 inputfile->lines = (char**) malloc(capacity * sizeof(char*));
96 inputfile->capacity = capacity;
97 inputfile->count = 0;
98 inputfile->maxlinewidth = 0;
100 return inputfile;
101 }
103 void addline(inputfile_t *inputfile, char* line, size_t width) {
104 char *l = (char*) malloc(width+1);
105 memcpy(l, line, width);
106 l[width] = 0;
107 if (inputfile->count >= inputfile->capacity) {
108 inputfile->capacity <<= 1;
109 inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
110 }
111 inputfile->lines[inputfile->count] = l;
112 inputfile->maxlinewidth =
113 width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
114 inputfile->count++;
115 }
117 void freeinputfilebuffer(inputfile_t *inputfile) {
118 for (int i = 0 ; i < inputfile->count ; i++) {
119 free(inputfile->lines[i]);
120 }
121 free(inputfile->lines);
122 free(inputfile);
123 }
125 inputfile_t *readinput(char *filename) {
127 int fd = open(filename, O_RDONLY);
128 if (fd == -1) return NULL;
130 inputfile_t *inputfile = inputfilebuffer(512);
132 char buf[INPUTBUF_SIZE];
133 ssize_t r;
135 size_t maxlinewidth = 256;
136 char *line = (char*) malloc(maxlinewidth);
137 size_t col = 0;
139 while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
140 for (size_t i = 0 ; i < r ; i++) {
141 if (col >= maxlinewidth-4) {
142 maxlinewidth <<= 1;
143 line = realloc(line, maxlinewidth);
144 }
146 if (buf[i] == '\n') {
147 line[col++] = '\n';
148 line[col] = 0;
149 addline(inputfile, line, col);
150 col = 0;
151 } else {
152 line[col++] = buf[i];
153 }
154 }
155 }
157 free(line);
159 close(fd);
161 return inputfile;
162 }
164 size_t writeescapedchar(char *dest, size_t dp, char c) {
165 if (c == '>') {
166 dest[dp++] = '&'; dest[dp++] = 'g';
167 dest[dp++] = 't'; dest[dp++] = ';';
168 } else if (c == '<') {
169 dest[dp++] = '&'; dest[dp++] = 'l';
170 dest[dp++] = 't'; dest[dp++] = ';';
171 } else {
172 dest[dp++] = c;
173 }
175 return dp;
176 }
178 int iskeyword(char *word, const char** keywords) {
179 for (int i = 0 ; keywords[i] ; i++) {
180 if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
181 return 1;
182 }
183 }
184 return 0;
185 }
187 int iscapsonly(char *word, size_t wp) {
188 for (size_t i = 0 ; i < wp ; i++) {
189 if (!isupper(word[i]) && word[i] != '_') {
190 return 0;
191 }
192 }
193 return 1;
194 }
196 void parseline(char *src, char *dest, highlighter_t *highlighter) {
197 size_t sp = 0, dp = 0;
198 /* indent */
199 while (isspace(src[sp])) {
200 dest[dp++] = src[sp++];
201 }
203 static char word[WORDBUF_SIZE];
204 static char includefile[FILENAME_MAX];
206 memset(word, 0, WORDBUF_SIZE);
207 size_t wp = 0, ifp = 0;
208 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
209 static int iscommentml;
210 int isescaping = 0;
212 if (iscommentml) {
213 iscomment = 1;
214 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
215 dp += 29;
216 }
218 for (char c = src[sp] ; c ; c=src[++sp]) {
219 /* comments */
220 if (c == '/') {
221 if (iscommentml && sp > 0 && src[sp-1] == '*') {
222 iscomment = 0;
223 iscommentml = 0;
224 memcpy(&(dest[dp]), "/</span>", 8);
225 dp += 8;
226 continue;
227 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
228 iscomment = 1;
229 iscommentml = (src[sp+1] == '*');
230 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
231 dp += 29;
232 }
233 }
235 if (iscomment) {
236 if (c == '\n') {
237 memcpy(&(dest[dp]), "</span>", 7);
238 dp += 7;
239 }
240 dp = writeescapedchar(dest, dp, c);
241 } else if (isinclude) {
242 if (c == '<') {
243 memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
244 dp += 32;
245 dp = writeescapedchar(dest, dp, c);
246 } else if (c == '\"') {
247 if (parseinclude) {
248 dest[dp++] = '\"';
249 dest[dp++] = '>';
250 memcpy(&(dest[dp]), includefile, ifp);
251 dp += ifp;
253 dp = writeescapedchar(dest, dp, c);
254 memcpy(&(dest[dp]), "</a>", 4);
255 dp += 4;
256 parseinclude = 0;
257 } else {
258 memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
259 dp += 35;
260 dp = writeescapedchar(dest, dp, c);
261 ifp = 0;
262 includefile[ifp++] = '\"';
263 parseinclude = 1;
264 }
265 } else if (c == '>') {
266 dp = writeescapedchar(dest, dp, c);
267 memcpy(&(dest[dp]), "</span>", 7);
268 dp += 7;
269 } else {
270 if (parseinclude) {
271 includefile[ifp++] = c;
272 }
273 dp = writeescapedchar(dest, dp, c);
274 }
275 } else {
276 /* strings */
277 if (!isescaping && (c == '\'' || c == '\"')) {
278 isstring ^= 1;
279 if (isstring) {
280 memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
281 dp += 28;
282 dp = writeescapedchar(dest, dp, c);
283 } else {
284 dp = writeescapedchar(dest, dp, c);
285 memcpy(&(dest[dp]), "</span>", 7);
286 dp += 7;
287 }
288 } else {
289 if (isstring) {
290 dp = writeescapedchar(dest, dp, c);
291 } else if (!isalnum(c) && c!='_' && c!='#' && c!='.' && c!='@') {
292 /* interpret word int_t */
293 if (wp > 0 && wp < WORDBUF_SIZE) {
294 int closespan = 1;
295 if (iskeyword(word, highlighter->keywords)) {
296 memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
297 dp += 29;
298 } else if (highlighter->istype(word, wp)) {
299 memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
300 dp += 26;
301 } else if (highlighter->isdirective(word)) {
302 isinclude = !strncmp("#include", word, WORDBUF_SIZE);
303 memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
304 dp += 31;
305 } else if (iscapsonly(word, wp)) {
306 memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
307 dp += 32;
308 } else {
309 closespan = 0;
310 }
311 for (int i = 0 ; i < wp ; i++) {
312 dp = writeescapedchar(dest, dp, word[i]);
313 }
314 if (closespan) {
315 memcpy(&(dest[dp]), "</span>", 7);
316 dp += 7;
317 }
318 }
319 memset(word, 0, WORDBUF_SIZE);
320 wp = 0;
321 dp = writeescapedchar(dest, dp, c);
322 } else {
323 /* read word */
324 if (wp < WORDBUF_SIZE) {
325 word[wp++] = c;
326 } else if (wp == WORDBUF_SIZE) {
327 for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
328 dp = writeescapedchar(dest, dp, word[i]);
329 }
330 wp++;
331 dp = writeescapedchar(dest, dp, c);
332 } else {
333 dp = writeescapedchar(dest, dp, c);
334 }
335 }
336 }
338 isescaping = !isescaping & (c == '\\');
339 }
340 }
341 dest[dp] = 0;
342 }
344 void printhelp() {
345 printf("Formats source code using HTML.\n\nUsage:\n"
346 " c2html [Options] FILE\n\n"
347 " Options:\n"
348 " -h Prints this help message\n"
349 " -j Highlight Java instead of C source code\n"
350 " -o <output> Output file (if not specified, stdout is used)\n"
351 " -p Disable highlighting (plain text)\n"
352 "\n");
355 }
357 int lnint(size_t lnc) {
358 int w = 1, p = 1;
359 while ((p*=10) < lnc) w++;
360 return w;
361 }
363 int main(int argc, char** argv) {
365 settings_t settings;
366 settings.outfilename = NULL;
367 settings.highlight = 1;
369 highlighter_t highlighter;
370 highlighter.isdirective = iscdirective;
371 highlighter.istype = isctype;
372 highlighter.keywords = ckeywords;
374 char optc;
375 while ((optc = getopt(argc, argv, "hjo:p")) != -1) {
376 switch (optc) {
377 case 'o':
378 if (!(optarg[0] == '-' && optarg[1] == 0)) {
379 settings.outfilename = optarg;
380 }
381 break;
382 case 'j':
383 highlighter.isdirective = isjdirective;
384 highlighter.istype = isjtype;
385 highlighter.keywords = jkeywords;
386 break;
387 case 'p':
388 settings.highlight = 0;
389 break;
390 case 'h':
391 printhelp();
392 return 0;
393 default:
394 return 1;
395 }
396 }
398 if (optind != argc-1) {
399 printhelp();
400 return 1;
401 } else {
402 settings.infilename = argv[optind];
404 inputfile_t *inputfile = readinput(settings.infilename);
405 if (inputfile) {
406 FILE *fout;
407 char *line;
408 if (settings.highlight) {
409 line = (char*) malloc(inputfile->maxlinewidth*64);
410 } else {
411 line = NULL;
412 }
413 if (settings.outfilename) {
414 fout = fopen(settings.outfilename, "w");
415 } else {
416 fout = stdout;
417 }
418 fprintf(fout, "<pre>\n");
419 int lnw = lnint(inputfile->count);
420 for (int i = 0 ; i < inputfile->count ; i++) {
421 if (settings.highlight) {
422 parseline(inputfile->lines[i], line, &highlighter);
423 } else {
424 line = inputfile->lines[i];
425 }
426 fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
427 lnw, i+1, line);
428 }
429 if (settings.highlight) {
430 free(line);
431 }
432 fprintf(fout, "</pre>\n");
434 if (fout != stdout) {
435 fclose(fout);
436 }
438 freeinputfilebuffer(inputfile);
439 }
441 return 0;
442 }
443 }