1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/highlighter.c Fri Aug 26 13:49:19 2016 +0200 1.3 @@ -0,0 +1,356 @@ 1.4 +/* 1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 1.6 + * 1.7 + * Copyright 2016 Mike Becker. All rights reserved. 1.8 + * 1.9 + * Redistribution and use in source and binary forms, with or without 1.10 + * modification, are permitted provided that the following conditions are met: 1.11 + * 1.12 + * 1. Redistributions of source code must retain the above copyright 1.13 + * notice, this list of conditions and the following disclaimer. 1.14 + * 1.15 + * 2. Redistributions in binary form must reproduce the above copyright 1.16 + * notice, this list of conditions and the following disclaimer in the 1.17 + * documentation and/or other materials provided with the distribution. 1.18 + * 1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 1.29 + * POSSIBILITY OF SUCH DAMAGE. 1.30 + * 1.31 + */ 1.32 + 1.33 +#include "highlighter.h" 1.34 + 1.35 +HighlighterData* new_highlighter_data() { 1.36 + HighlighterData* hd = malloc(sizeof(HighlighterData)); 1.37 + if (hd == NULL) { 1.38 + return NULL; 1.39 + } else { 1.40 + hd->multiline_comment = 0; 1.41 + hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND); 1.42 + hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND); 1.43 + return hd; 1.44 + } 1.45 +} 1.46 + 1.47 +void free_highlighter_data(HighlighterData *hd) { 1.48 + ucx_buffer_free(hd->primary_buffer); 1.49 + ucx_buffer_free(hd->secondary_buffer); 1.50 + free(hd); 1.51 +} 1.52 + 1.53 +static void put_htmlescaped(UcxBuffer *dest, char c) { 1.54 + if (c == '>') { 1.55 + ucx_buffer_puts(dest, ">"); 1.56 + } else if (c == '<') { 1.57 + ucx_buffer_puts(dest, "<"); 1.58 + } else { 1.59 + ucx_buffer_putc(dest, c); 1.60 + } 1.61 +} 1.62 + 1.63 +static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) { 1.64 + for (int i = 0 ; i < s.length ; i++) { 1.65 + put_htmlescaped(dest, s.ptr[i]); 1.66 + } 1.67 +} 1.68 + 1.69 +static int check_keyword(sstr_t word, const char** keywords) { 1.70 + for (int i = 0 ; keywords[i] ; i++) { 1.71 + if (sstrcmp(word, sstr((char*)keywords[i])) == 0) { 1.72 + return 1; 1.73 + } 1.74 + } 1.75 + return 0; 1.76 +} 1.77 + 1.78 +static int check_capsonly(sstr_t word) { 1.79 + for (size_t i = 0 ; i < word.length ; i++) { 1.80 + if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) 1.81 + && word.ptr[i] != '_') { 1.82 + return 0; 1.83 + } 1.84 + } 1.85 + return 1; 1.86 +} 1.87 + 1.88 +/* Plaintext Highlighter */ 1.89 + 1.90 +void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { 1.91 + while (*src && *src != '\n') { 1.92 + put_htmlescaped(dest, *src); 1.93 + src++; 1.94 + } 1.95 + ucx_buffer_putc(dest, '\n'); 1.96 +} 1.97 + 1.98 +/* C Highlighter */ 1.99 + 1.100 +static const char* ckeywords[] = { 1.101 + "auto", "break", "case", "char", "const", "continue", "default", "do", 1.102 + "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", 1.103 + "long", "register", "return", "short", "signed", "sizeof", "static", 1.104 + "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", 1.105 + "while", NULL 1.106 +}; 1.107 + 1.108 +void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { 1.109 + /* reset buffers without clearing them */ 1.110 + hd->primary_buffer->size = hd->primary_buffer->pos = 0; 1.111 + hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; 1.112 + 1.113 + /* alias the buffers for better handling */ 1.114 + UcxBuffer *wbuf = hd->primary_buffer; 1.115 + UcxBuffer *ifilebuf = hd->secondary_buffer; 1.116 + 1.117 + /* local information */ 1.118 + size_t sp = (size_t)-1; 1.119 + int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; 1.120 + char quote = '\0'; 1.121 + int isescaping = 0; 1.122 + 1.123 + /* continue a multi line comment highlighting */ 1.124 + if (hd->multiline_comment) { 1.125 + iscomment = 1; 1.126 + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); 1.127 + } 1.128 + 1.129 + char c; 1.130 + do { 1.131 + c = src[++sp]; 1.132 + if (!c) break; 1.133 + 1.134 + /* comments */ 1.135 + if (!isstring && c == '/') { 1.136 + if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { 1.137 + iscomment = 0; 1.138 + hd->multiline_comment = 0; 1.139 + ucx_buffer_puts(dest, "/</span>"); 1.140 + continue; 1.141 + } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { 1.142 + iscomment = 1; 1.143 + hd->multiline_comment = (src[sp+1] == '*'); 1.144 + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); 1.145 + } 1.146 + } 1.147 + 1.148 + if (iscomment) { 1.149 + if (c == '\n') { 1.150 + ucx_buffer_puts(dest, "</span>\n"); 1.151 + } else { 1.152 + put_htmlescaped(dest, c); 1.153 + } 1.154 + } else if (isinclude) { 1.155 + if (c == '<') { 1.156 + ucx_buffer_puts(dest, 1.157 + "<span class=\"c2html-stdinclude\"><"); 1.158 + } else if (c == '\"') { 1.159 + if (parseinclude) { 1.160 + ucx_buffer_puts(dest, "\">"); 1.161 + ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest); 1.162 + ucx_buffer_puts(dest, "\"</a>"); 1.163 + parseinclude = 0; 1.164 + } else { 1.165 + ucx_buffer_puts(dest, 1.166 + "<a class=\"c2html-userinclude\" href=\""); 1.167 + ucx_buffer_putc(ifilebuf, '\"'); 1.168 + parseinclude = 1; 1.169 + } 1.170 + } else if (c == '>') { 1.171 + ucx_buffer_puts(dest, "></span>"); 1.172 + } else { 1.173 + if (parseinclude) { 1.174 + ucx_buffer_putc(ifilebuf, c); 1.175 + } 1.176 + put_htmlescaped(dest, c); 1.177 + } 1.178 + } else { 1.179 + /* strings */ 1.180 + if (!isescaping && (c == '\'' || c == '\"')) { 1.181 + if (isstring) { 1.182 + put_htmlescaped(dest, c); 1.183 + if (c == quote) { 1.184 + isstring = 0; 1.185 + ucx_buffer_puts(dest, "</span>"); 1.186 + } else { 1.187 + put_htmlescaped(dest, c); 1.188 + } 1.189 + } else { 1.190 + isstring = 1; 1.191 + quote = c; 1.192 + ucx_buffer_puts(dest, "<span class=\"c2html-string\">"); 1.193 + put_htmlescaped(dest, c); 1.194 + } 1.195 + } else { 1.196 + if (isstring) { 1.197 + put_htmlescaped(dest, c); 1.198 + } else if (!isalnum(c) && c!='_' && c!='#') { 1.199 + /* write buffered word, if any */ 1.200 + if (wbuf->size > 0) { 1.201 + sstr_t word = sstrn(wbuf->space, wbuf->size); 1.202 + int closespan = 1; 1.203 + sstr_t typesuffix = ST("_t"); 1.204 + if (check_keyword(word, ckeywords)) { 1.205 + ucx_buffer_puts(dest, 1.206 + "<span class=\"c2html-keyword\">"); 1.207 + } else if (sstrsuffix(word, typesuffix)) { 1.208 + ucx_buffer_puts(dest, 1.209 + "<span class=\"c2html-type\">"); 1.210 + } else if (word.ptr[0] == '#') { 1.211 + isinclude = !sstrcmp(word, S("#include")); 1.212 + ucx_buffer_puts(dest, 1.213 + "<span class=\"c2html-directive\">"); 1.214 + } else if (check_capsonly(word)) { 1.215 + ucx_buffer_puts(dest, 1.216 + "<span class=\"c2html-macroconst\">"); 1.217 + } else { 1.218 + closespan = 0; 1.219 + } 1.220 + put_htmlescapedstr(dest, word); 1.221 + if (closespan) { 1.222 + ucx_buffer_puts(dest, "</span>"); 1.223 + } 1.224 + } 1.225 + wbuf->pos = wbuf->size = 0; /* reset word buffer */ 1.226 + 1.227 + /* write current character */ 1.228 + put_htmlescaped(dest, c); 1.229 + } else { 1.230 + /* buffer the current word */ 1.231 + ucx_buffer_putc(wbuf, c); 1.232 + } 1.233 + } 1.234 + 1.235 + isescaping = !isescaping & (c == '\\'); 1.236 + } 1.237 + } while (c != '\n'); 1.238 +} 1.239 + 1.240 +/* Java Highlighter */ 1.241 + 1.242 +static const char* jkeywords[] = { 1.243 + "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", 1.244 + "package", "synchronized", "boolean", "do", "if", "private", "this", 1.245 + "break", "double", "implements", "protected", "throw", "byte", "else", 1.246 + "import", "public", "throws", "case", "enum", "instanceof", "return", 1.247 + "transient", "catch", "extends", "int", "short", "try", "char", "final", 1.248 + "interface", "static", "void", "class", "finally", "long", "strictfp", 1.249 + "volatile", "const", "float", "native", "super", "while", NULL 1.250 +}; 1.251 + 1.252 +void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { 1.253 + /* reset buffers without clearing them */ 1.254 + hd->primary_buffer->size = hd->primary_buffer->pos = 0; 1.255 + hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; 1.256 + 1.257 + /* alias the buffers for better handling */ 1.258 + UcxBuffer *wbuf = hd->primary_buffer; 1.259 + 1.260 + /* local information */ 1.261 + size_t sp = (size_t)-1; 1.262 + int isstring = 0, iscomment = 0, isimport = 0; 1.263 + char quote = '\0'; 1.264 + int isescaping = 0; 1.265 + 1.266 + if (hd->multiline_comment) { 1.267 + iscomment = 1; 1.268 + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); 1.269 + } 1.270 + 1.271 + char c; 1.272 + do { 1.273 + c = src[++sp]; 1.274 + if (!c) break; 1.275 + 1.276 + /* comments */ 1.277 + if (!isstring && c == '/') { 1.278 + if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { 1.279 + iscomment = 0; 1.280 + hd->multiline_comment = 0; 1.281 + ucx_buffer_puts(dest, "/</span>"); 1.282 + continue; 1.283 + } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { 1.284 + iscomment = 1; 1.285 + hd->multiline_comment = (src[sp+1] == '*'); 1.286 + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); 1.287 + } 1.288 + } 1.289 + 1.290 + if (iscomment) { 1.291 + if (c == '\n') { 1.292 + ucx_buffer_puts(dest, "</span>\n"); 1.293 + } else { 1.294 + put_htmlescaped(dest, c); 1.295 + } 1.296 + } else if (isimport) { 1.297 + /* TODO: local imports */ 1.298 + } else { 1.299 + /* strings */ 1.300 + if (!isescaping && (c == '\'' || c == '\"')) { 1.301 + if (isstring) { 1.302 + put_htmlescaped(dest, c); 1.303 + if (c == quote) { 1.304 + isstring = 0; 1.305 + ucx_buffer_puts(dest, "</span>"); 1.306 + } else { 1.307 + put_htmlescaped(dest, c); 1.308 + } 1.309 + } else { 1.310 + isstring = 1; 1.311 + quote = c; 1.312 + ucx_buffer_puts(dest, 1.313 + "<span class=\"c2html-string\">"); 1.314 + put_htmlescaped(dest, c); 1.315 + } 1.316 + } else { 1.317 + if (isstring) { 1.318 + put_htmlescaped(dest, c); 1.319 + } else if (!isalnum(c) && c!='_' && c!='@') { 1.320 + /* write buffered word, if any */ 1.321 + if (wbuf->size > 0) { 1.322 + sstr_t word = sstrn(wbuf->space, wbuf->size); 1.323 + int closespan = 1; 1.324 + if (check_keyword(word, jkeywords)) { 1.325 + ucx_buffer_puts(dest, 1.326 + "<span class=\"c2html-keyword\">"); 1.327 + } else if (isupper(word.ptr[0])) { 1.328 + ucx_buffer_puts(dest, 1.329 + "<span class=\"c2html-type\">"); 1.330 + } else if (word.ptr[0] == '@') { 1.331 + ucx_buffer_puts(dest, 1.332 + "<span class=\"c2html-directive\">"); 1.333 + } else if (check_capsonly(word)) { 1.334 + ucx_buffer_puts(dest, 1.335 + "<span class=\"c2html-macroconst\">"); 1.336 + } else { 1.337 + closespan = 0; 1.338 + } 1.339 + put_htmlescapedstr(dest, word); 1.340 + 1.341 + if (closespan) { 1.342 + ucx_buffer_puts(dest, "</span>"); 1.343 + } 1.344 + } 1.345 + wbuf->pos = wbuf->size = 0; /* reset buffer */ 1.346 + 1.347 + /* write current character */ 1.348 + put_htmlescaped(dest, c); 1.349 + } else { 1.350 + /* buffer the current word */ 1.351 + ucx_buffer_putc(wbuf, c); 1.352 + } 1.353 + } 1.354 + 1.355 + isescaping = !isescaping & (c == '\\'); 1.356 + } 1.357 + } while (c != '\n'); 1.358 +} 1.359 +