Wed, 10 Jul 2013 16:31:16 +0200
prepared java highlighting
1 /*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright 2013 Mike Becker. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 *
28 */
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <fcntl.h>
34 #include <unistd.h>
35 #include <ctype.h>
37 #define INPUTBUF_SIZE 2048
38 #define WORDBUF_SIZE 16
40 const char* ckeywords[] = {
41 "auto", "break", "case", "char", "const", "continue", "default", "do",
42 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
43 "long", "register", "return", "short", "signed", "sizeof", "static",
44 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
45 "while", NULL
46 };
48 int istype(char *word, size_t len) {
49 return (word[len-2] == '_' && word[len-1] == 't');
50 }
52 int isdirective(char *word) {
53 return (word[0] == '#');
54 }
56 int notypes(char *word, size_t len) {
57 return 0;
58 }
60 int nodirectives(char *word) {
61 return 0;
62 }
64 typedef struct {
65 const char** keywords;
66 int(*istype)(char*,size_t);
67 int(*isdirective)(char*);
68 } highlighter_t;
70 typedef struct {
71 char* outfilename;
72 char* infilename;
73 int highlight;
74 } settings_t;
76 typedef struct {
77 size_t count;
78 size_t capacity;
79 size_t maxlinewidth;
80 char** lines;
81 } inputfile_t;
83 inputfile_t *inputfilebuffer(size_t capacity) {
84 inputfile_t *inputfile = (inputfile_t*) malloc(sizeof(inputfile_t));
85 inputfile->lines = (char**) malloc(capacity * sizeof(char*));
86 inputfile->capacity = capacity;
87 inputfile->count = 0;
88 inputfile->maxlinewidth = 0;
90 return inputfile;
91 }
93 void addline(inputfile_t *inputfile, char* line, size_t width) {
94 char *l = (char*) malloc(width+1);
95 memcpy(l, line, width);
96 l[width] = 0;
97 if (inputfile->count >= inputfile->capacity) {
98 inputfile->capacity <<= 1;
99 inputfile->lines = realloc(inputfile->lines, inputfile->capacity);
100 }
101 inputfile->lines[inputfile->count] = l;
102 inputfile->maxlinewidth =
103 width > inputfile->maxlinewidth ? width : inputfile->maxlinewidth;
104 inputfile->count++;
105 }
107 void freeinputfilebuffer(inputfile_t *inputfile) {
108 for (int i = 0 ; i < inputfile->count ; i++) {
109 free(inputfile->lines[i]);
110 }
111 free(inputfile->lines);
112 free(inputfile);
113 }
115 inputfile_t *readinput(char *filename) {
117 int fd = open(filename, O_RDONLY);
118 if (fd == -1) return NULL;
120 inputfile_t *inputfile = inputfilebuffer(512);
122 char buf[INPUTBUF_SIZE];
123 ssize_t r;
125 size_t maxlinewidth = 256;
126 char *line = (char*) malloc(maxlinewidth);
127 size_t col = 0;
129 while ((r = read(fd, buf, INPUTBUF_SIZE)) > 0) {
130 for (size_t i = 0 ; i < r ; i++) {
131 if (col >= maxlinewidth-4) {
132 maxlinewidth <<= 1;
133 line = realloc(line, maxlinewidth);
134 }
136 if (buf[i] == '\n') {
137 line[col++] = '\n';
138 line[col] = 0;
139 addline(inputfile, line, col);
140 col = 0;
141 } else {
142 line[col++] = buf[i];
143 }
144 }
145 }
147 free(line);
149 close(fd);
151 return inputfile;
152 }
154 size_t writeescapedchar(char *dest, size_t dp, char c) {
155 if (c == '>') {
156 dest[dp++] = '&'; dest[dp++] = 'g';
157 dest[dp++] = 't'; dest[dp++] = ';';
158 } else if (c == '<') {
159 dest[dp++] = '&'; dest[dp++] = 'l';
160 dest[dp++] = 't'; dest[dp++] = ';';
161 } else {
162 dest[dp++] = c;
163 }
165 return dp;
166 }
168 int iskeyword(char *word, const char** keywords) {
169 for (int i = 0 ; keywords[i] ; i++) {
170 if (strncmp(keywords[i], word, WORDBUF_SIZE) == 0) {
171 return 1;
172 }
173 }
174 return 0;
175 }
177 int iscapsonly(char *word, size_t wp) {
178 for (size_t i = 0 ; i < wp ; i++) {
179 if (!isupper(word[i]) && word[i] != '_') {
180 return 0;
181 }
182 }
183 return 1;
184 }
186 void parseline(char *src, char *dest, highlighter_t *highlighter) {
187 size_t sp = 0, dp = 0;
188 /* indent */
189 while (isspace(src[sp])) {
190 dest[dp++] = src[sp++];
191 }
193 static char word[WORDBUF_SIZE];
194 static char includefile[FILENAME_MAX];
196 memset(word, 0, WORDBUF_SIZE);
197 size_t wp = 0, ifp = 0;
198 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
199 static int iscommentml;
200 int isescaping = 0;
202 if (iscommentml) {
203 iscomment = 1;
204 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
205 dp += 29;
206 }
208 for (char c = src[sp] ; c ; c=src[++sp]) {
209 /* comments */
210 if (c == '/') {
211 if (iscommentml && sp > 0 && src[sp-1] == '*') {
212 iscomment = 0;
213 iscommentml = 0;
214 memcpy(&(dest[dp]), "/</span>", 8);
215 dp += 8;
216 continue;
217 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
218 iscomment = 1;
219 iscommentml = (src[sp+1] == '*');
220 memcpy(&(dest[dp]), "<span class=\"c2html-comment\">", 29);
221 dp += 29;
222 }
223 }
225 if (iscomment) {
226 if (c == '\n') {
227 memcpy(&(dest[dp]), "</span>", 7);
228 dp += 7;
229 }
230 dp = writeescapedchar(dest, dp, c);
231 } else if (isinclude) {
232 if (c == '<') {
233 memcpy(&(dest[dp]), "<span class=\"c2html-stdinclude\">", 32);
234 dp += 32;
235 dp = writeescapedchar(dest, dp, c);
236 } else if (c == '\"') {
237 if (parseinclude) {
238 dest[dp++] = '\"';
239 dest[dp++] = '>';
240 memcpy(&(dest[dp]), includefile, ifp);
241 dp += ifp;
243 dp = writeescapedchar(dest, dp, c);
244 memcpy(&(dest[dp]), "</a>", 4);
245 dp += 4;
246 parseinclude = 0;
247 } else {
248 memcpy(&(dest[dp]), "<a class=\"c2html-userinclude\" href=", 35);
249 dp += 35;
250 dp = writeescapedchar(dest, dp, c);
251 ifp = 0;
252 includefile[ifp++] = '\"';
253 parseinclude = 1;
254 }
255 } else if (c == '>') {
256 dp = writeescapedchar(dest, dp, c);
257 memcpy(&(dest[dp]), "</span>", 7);
258 dp += 7;
259 } else {
260 if (parseinclude) {
261 includefile[ifp++] = c;
262 }
263 dp = writeescapedchar(dest, dp, c);
264 }
265 } else {
266 /* strings */
267 if (!isescaping && (c == '\'' || c == '\"')) {
268 isstring ^= 1;
269 if (isstring) {
270 memcpy(&(dest[dp]), "<span class=\"c2html-string\">", 28);
271 dp += 28;
272 dp = writeescapedchar(dest, dp, c);
273 } else {
274 dp = writeescapedchar(dest, dp, c);
275 memcpy(&(dest[dp]), "</span>", 7);
276 dp += 7;
277 }
278 } else {
279 if (isstring) {
280 dp = writeescapedchar(dest, dp, c);
281 } else if (!isalnum(c) && c != '_' && c != '#' && c != '.') {
282 /* interpret word int_t */
283 if (wp > 0 && wp < WORDBUF_SIZE) {
284 int closespan = 1;
285 if (iskeyword(word, highlighter->keywords)) {
286 memcpy(&(dest[dp]), "<span class=\"c2html-keyword\">", 29);
287 dp += 29;
288 } else if (highlighter->istype(word, wp)) {
289 memcpy(&(dest[dp]), "<span class=\"c2html-type\">", 26);
290 dp += 26;
291 } else if (highlighter->isdirective(word)) {
292 isinclude = !strncmp("#include", word, WORDBUF_SIZE);
293 memcpy(&(dest[dp]), "<span class=\"c2html-directive\">", 31);
294 dp += 31;
295 } else if (iscapsonly(word, wp)) {
296 memcpy(&(dest[dp]), "<span class=\"c2html-macroconst\">", 32);
297 dp += 32;
298 } else {
299 closespan = 0;
300 }
301 for (int i = 0 ; i < wp ; i++) {
302 dp = writeescapedchar(dest, dp, word[i]);
303 }
304 if (closespan) {
305 memcpy(&(dest[dp]), "</span>", 7);
306 dp += 7;
307 }
308 }
309 memset(word, 0, WORDBUF_SIZE);
310 wp = 0;
311 dp = writeescapedchar(dest, dp, c);
312 } else {
313 /* read word */
314 if (wp < WORDBUF_SIZE) {
315 word[wp++] = c;
316 } else if (wp == WORDBUF_SIZE) {
317 for (int i = 0 ; i < WORDBUF_SIZE ; i++) {
318 dp = writeescapedchar(dest, dp, word[i]);
319 }
320 wp++;
321 dp = writeescapedchar(dest, dp, c);
322 } else {
323 dp = writeescapedchar(dest, dp, c);
324 }
325 }
326 }
328 isescaping = !isescaping & (c == '\\');
329 }
330 }
331 dest[dp] = 0;
332 }
334 void printhelp() {
335 printf("Formats source code using HTML.\n\nUsage:\n"
336 " c2html [Options] FILE\n\n"
337 " Options:\n"
338 " -h Prints this help message\n"
339 " -o <output> Output file (if not specified, stdout is used)\n"
340 " -p Disable highlighting (plain text)\n"
341 "\n");
344 }
346 int lnint(size_t lnc) {
347 int w = 1, p = 1;
348 while ((p*=10) < lnc) w++;
349 return w;
350 }
352 int main(int argc, char** argv) {
354 settings_t settings;
355 settings.outfilename = NULL;
356 settings.highlight = 1;
358 highlighter_t highlighter;
359 highlighter.isdirective = isdirective;
360 highlighter.istype = istype;
361 highlighter.keywords = ckeywords;
363 char optc;
364 while ((optc = getopt(argc, argv, "ho:p")) != -1) {
365 switch (optc) {
366 case 'o':
367 if (!(optarg[0] == '-' && optarg[1] == 0)) {
368 settings.outfilename = optarg;
369 }
370 break;
371 case 'p':
372 settings.highlight = 0;
373 break;
374 case 'h':
375 printhelp();
376 return 0;
377 default:
378 return 1;
379 }
380 }
382 if (optind != argc-1) {
383 printhelp();
384 return 1;
385 } else {
386 settings.infilename = argv[optind];
388 inputfile_t *inputfile = readinput(settings.infilename);
389 if (inputfile) {
390 FILE *fout;
391 char *line;
392 if (settings.highlight) {
393 line = (char*) malloc(inputfile->maxlinewidth*64);
394 } else {
395 line = NULL;
396 }
397 if (settings.outfilename) {
398 fout = fopen(settings.outfilename, "w");
399 } else {
400 fout = stdout;
401 }
402 fprintf(fout, "<pre>\n");
403 int lnw = lnint(inputfile->count);
404 for (int i = 0 ; i < inputfile->count ; i++) {
405 if (settings.highlight) {
406 parseline(inputfile->lines[i], line, &highlighter);
407 } else {
408 line = inputfile->lines[i];
409 }
410 fprintf(fout, "<span class=\"c2html-lineno\">%*d:</span> %s",
411 lnw, i+1, line);
412 }
413 if (settings.highlight) {
414 free(line);
415 }
416 fprintf(fout, "</pre>\n");
418 if (fout != stdout) {
419 fclose(fout);
420 }
422 freeinputfilebuffer(inputfile);
423 }
425 return 0;
426 }
427 }