Wed, 31 Aug 2016 12:58:48 +0200
highlighter can now handle files which do not end with a blank line
universe@21 | 1 | /* |
universe@21 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@21 | 3 | * |
universe@35 | 4 | * Copyright 2016 Mike Becker. All rights reserved. |
universe@21 | 5 | * |
universe@21 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@21 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@21 | 8 | * |
universe@21 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@21 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@21 | 11 | * |
universe@21 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@21 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@21 | 14 | * documentation and/or other materials provided with the distribution. |
universe@21 | 15 | * |
universe@21 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@21 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@21 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@21 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@21 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@21 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@21 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@21 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@21 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@21 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@21 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@21 | 27 | * |
universe@21 | 28 | */ |
universe@21 | 29 | |
universe@52 | 30 | #include "highlighter.h" |
universe@21 | 31 | |
universe@51 | 32 | HighlighterData* new_highlighter_data() { |
universe@51 | 33 | HighlighterData* hd = malloc(sizeof(HighlighterData)); |
universe@51 | 34 | if (hd == NULL) { |
universe@51 | 35 | return NULL; |
universe@51 | 36 | } else { |
universe@51 | 37 | hd->multiline_comment = 0; |
universe@51 | 38 | hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND); |
universe@51 | 39 | hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND); |
universe@51 | 40 | return hd; |
universe@51 | 41 | } |
universe@51 | 42 | } |
universe@51 | 43 | |
universe@51 | 44 | void free_highlighter_data(HighlighterData *hd) { |
universe@51 | 45 | ucx_buffer_free(hd->primary_buffer); |
universe@51 | 46 | ucx_buffer_free(hd->secondary_buffer); |
universe@51 | 47 | free(hd); |
universe@51 | 48 | } |
universe@51 | 49 | |
universe@52 | 50 | static void put_htmlescaped(UcxBuffer *dest, char c) { |
universe@21 | 51 | if (c == '>') { |
universe@48 | 52 | ucx_buffer_puts(dest, ">"); |
universe@21 | 53 | } else if (c == '<') { |
universe@48 | 54 | ucx_buffer_puts(dest, "<"); |
universe@54 | 55 | } else if (c) { |
universe@48 | 56 | ucx_buffer_putc(dest, c); |
universe@21 | 57 | } |
universe@48 | 58 | } |
universe@21 | 59 | |
universe@52 | 60 | static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) { |
universe@48 | 61 | for (int i = 0 ; i < s.length ; i++) { |
universe@48 | 62 | put_htmlescaped(dest, s.ptr[i]); |
universe@48 | 63 | } |
universe@21 | 64 | } |
universe@21 | 65 | |
universe@52 | 66 | static int check_keyword(sstr_t word, const char** keywords) { |
universe@21 | 67 | for (int i = 0 ; keywords[i] ; i++) { |
universe@47 | 68 | if (sstrcmp(word, sstr((char*)keywords[i])) == 0) { |
universe@21 | 69 | return 1; |
universe@21 | 70 | } |
universe@21 | 71 | } |
universe@21 | 72 | return 0; |
universe@21 | 73 | } |
universe@21 | 74 | |
universe@52 | 75 | static int check_capsonly(sstr_t word) { |
universe@47 | 76 | for (size_t i = 0 ; i < word.length ; i++) { |
universe@47 | 77 | if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) |
universe@47 | 78 | && word.ptr[i] != '_') { |
universe@21 | 79 | return 0; |
universe@21 | 80 | } |
universe@21 | 81 | } |
universe@21 | 82 | return 1; |
universe@21 | 83 | } |
universe@52 | 84 | |
universe@52 | 85 | /* Plaintext Highlighter */ |
universe@52 | 86 | |
universe@52 | 87 | void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { |
universe@52 | 88 | while (*src && *src != '\n') { |
universe@52 | 89 | put_htmlescaped(dest, *src); |
universe@52 | 90 | src++; |
universe@52 | 91 | } |
universe@52 | 92 | ucx_buffer_putc(dest, '\n'); |
universe@52 | 93 | } |
universe@52 | 94 | |
universe@52 | 95 | /* C Highlighter */ |
universe@52 | 96 | |
universe@52 | 97 | static const char* ckeywords[] = { |
universe@52 | 98 | "auto", "break", "case", "char", "const", "continue", "default", "do", |
universe@52 | 99 | "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", |
universe@52 | 100 | "long", "register", "return", "short", "signed", "sizeof", "static", |
universe@52 | 101 | "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", |
universe@52 | 102 | "while", NULL |
universe@52 | 103 | }; |
universe@52 | 104 | |
universe@52 | 105 | void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { |
universe@52 | 106 | /* reset buffers without clearing them */ |
universe@52 | 107 | hd->primary_buffer->size = hd->primary_buffer->pos = 0; |
universe@52 | 108 | hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; |
universe@52 | 109 | |
universe@52 | 110 | /* alias the buffers for better handling */ |
universe@52 | 111 | UcxBuffer *wbuf = hd->primary_buffer; |
universe@52 | 112 | UcxBuffer *ifilebuf = hd->secondary_buffer; |
universe@52 | 113 | |
universe@52 | 114 | /* local information */ |
universe@52 | 115 | size_t sp = (size_t)-1; |
universe@52 | 116 | int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; |
universe@52 | 117 | char quote = '\0'; |
universe@52 | 118 | int isescaping = 0; |
universe@52 | 119 | |
universe@52 | 120 | /* continue a multi line comment highlighting */ |
universe@52 | 121 | if (hd->multiline_comment) { |
universe@52 | 122 | iscomment = 1; |
universe@52 | 123 | ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 124 | } |
universe@52 | 125 | |
universe@52 | 126 | char c; |
universe@52 | 127 | do { |
universe@52 | 128 | c = src[++sp]; |
universe@52 | 129 | |
universe@52 | 130 | /* comments */ |
universe@52 | 131 | if (!isstring && c == '/') { |
universe@52 | 132 | if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { |
universe@52 | 133 | iscomment = 0; |
universe@52 | 134 | hd->multiline_comment = 0; |
universe@52 | 135 | ucx_buffer_puts(dest, "/</span>"); |
universe@52 | 136 | continue; |
universe@52 | 137 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@52 | 138 | iscomment = 1; |
universe@52 | 139 | hd->multiline_comment = (src[sp+1] == '*'); |
universe@52 | 140 | ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 141 | } |
universe@52 | 142 | } |
universe@52 | 143 | |
universe@52 | 144 | if (iscomment) { |
universe@52 | 145 | if (c == '\n') { |
universe@52 | 146 | ucx_buffer_puts(dest, "</span>\n"); |
universe@52 | 147 | } else { |
universe@52 | 148 | put_htmlescaped(dest, c); |
universe@52 | 149 | } |
universe@52 | 150 | } else if (isinclude) { |
universe@52 | 151 | if (c == '<') { |
universe@52 | 152 | ucx_buffer_puts(dest, |
universe@52 | 153 | "<span class=\"c2html-stdinclude\"><"); |
universe@52 | 154 | } else if (c == '\"') { |
universe@52 | 155 | if (parseinclude) { |
universe@52 | 156 | ucx_buffer_puts(dest, "\">"); |
universe@52 | 157 | ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest); |
universe@52 | 158 | ucx_buffer_puts(dest, "\"</a>"); |
universe@52 | 159 | parseinclude = 0; |
universe@52 | 160 | } else { |
universe@52 | 161 | ucx_buffer_puts(dest, |
universe@52 | 162 | "<a class=\"c2html-userinclude\" href=\""); |
universe@52 | 163 | ucx_buffer_putc(ifilebuf, '\"'); |
universe@52 | 164 | parseinclude = 1; |
universe@52 | 165 | } |
universe@52 | 166 | } else if (c == '>') { |
universe@52 | 167 | ucx_buffer_puts(dest, "></span>"); |
universe@52 | 168 | } else { |
universe@52 | 169 | if (parseinclude) { |
universe@52 | 170 | ucx_buffer_putc(ifilebuf, c); |
universe@52 | 171 | } |
universe@52 | 172 | put_htmlescaped(dest, c); |
universe@52 | 173 | } |
universe@52 | 174 | } else { |
universe@52 | 175 | /* strings */ |
universe@52 | 176 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@52 | 177 | if (isstring) { |
universe@52 | 178 | put_htmlescaped(dest, c); |
universe@52 | 179 | if (c == quote) { |
universe@52 | 180 | isstring = 0; |
universe@52 | 181 | ucx_buffer_puts(dest, "</span>"); |
universe@52 | 182 | } else { |
universe@52 | 183 | put_htmlescaped(dest, c); |
universe@52 | 184 | } |
universe@52 | 185 | } else { |
universe@52 | 186 | isstring = 1; |
universe@52 | 187 | quote = c; |
universe@52 | 188 | ucx_buffer_puts(dest, "<span class=\"c2html-string\">"); |
universe@52 | 189 | put_htmlescaped(dest, c); |
universe@52 | 190 | } |
universe@52 | 191 | } else { |
universe@52 | 192 | if (isstring) { |
universe@52 | 193 | put_htmlescaped(dest, c); |
universe@54 | 194 | } else if (isalnum(c) || c == '_' || c == '#') { |
universe@54 | 195 | /* buffer the current word */ |
universe@54 | 196 | ucx_buffer_putc(wbuf, c); |
universe@54 | 197 | } else { |
universe@52 | 198 | /* write buffered word, if any */ |
universe@52 | 199 | if (wbuf->size > 0) { |
universe@52 | 200 | sstr_t word = sstrn(wbuf->space, wbuf->size); |
universe@52 | 201 | int closespan = 1; |
universe@52 | 202 | sstr_t typesuffix = ST("_t"); |
universe@52 | 203 | if (check_keyword(word, ckeywords)) { |
universe@52 | 204 | ucx_buffer_puts(dest, |
universe@52 | 205 | "<span class=\"c2html-keyword\">"); |
universe@52 | 206 | } else if (sstrsuffix(word, typesuffix)) { |
universe@52 | 207 | ucx_buffer_puts(dest, |
universe@52 | 208 | "<span class=\"c2html-type\">"); |
universe@52 | 209 | } else if (word.ptr[0] == '#') { |
universe@52 | 210 | isinclude = !sstrcmp(word, S("#include")); |
universe@52 | 211 | ucx_buffer_puts(dest, |
universe@52 | 212 | "<span class=\"c2html-directive\">"); |
universe@52 | 213 | } else if (check_capsonly(word)) { |
universe@52 | 214 | ucx_buffer_puts(dest, |
universe@52 | 215 | "<span class=\"c2html-macroconst\">"); |
universe@52 | 216 | } else { |
universe@52 | 217 | closespan = 0; |
universe@52 | 218 | } |
universe@52 | 219 | put_htmlescapedstr(dest, word); |
universe@52 | 220 | if (closespan) { |
universe@52 | 221 | ucx_buffer_puts(dest, "</span>"); |
universe@52 | 222 | } |
universe@52 | 223 | } |
universe@52 | 224 | wbuf->pos = wbuf->size = 0; /* reset word buffer */ |
universe@52 | 225 | |
universe@52 | 226 | /* write current character */ |
universe@52 | 227 | put_htmlescaped(dest, c); |
universe@52 | 228 | } |
universe@52 | 229 | } |
universe@52 | 230 | |
universe@52 | 231 | isescaping = !isescaping & (c == '\\'); |
universe@52 | 232 | } |
universe@54 | 233 | } while (c && c != '\n'); |
universe@52 | 234 | } |
universe@52 | 235 | |
universe@52 | 236 | /* Java Highlighter */ |
universe@52 | 237 | |
universe@52 | 238 | static const char* jkeywords[] = { |
universe@52 | 239 | "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", |
universe@52 | 240 | "package", "synchronized", "boolean", "do", "if", "private", "this", |
universe@52 | 241 | "break", "double", "implements", "protected", "throw", "byte", "else", |
universe@52 | 242 | "import", "public", "throws", "case", "enum", "instanceof", "return", |
universe@52 | 243 | "transient", "catch", "extends", "int", "short", "try", "char", "final", |
universe@52 | 244 | "interface", "static", "void", "class", "finally", "long", "strictfp", |
universe@52 | 245 | "volatile", "const", "float", "native", "super", "while", NULL |
universe@52 | 246 | }; |
universe@52 | 247 | |
universe@52 | 248 | void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { |
universe@52 | 249 | /* reset buffers without clearing them */ |
universe@52 | 250 | hd->primary_buffer->size = hd->primary_buffer->pos = 0; |
universe@52 | 251 | hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; |
universe@52 | 252 | |
universe@52 | 253 | /* alias the buffers for better handling */ |
universe@52 | 254 | UcxBuffer *wbuf = hd->primary_buffer; |
universe@52 | 255 | |
universe@52 | 256 | /* local information */ |
universe@52 | 257 | size_t sp = (size_t)-1; |
universe@52 | 258 | int isstring = 0, iscomment = 0, isimport = 0; |
universe@52 | 259 | char quote = '\0'; |
universe@52 | 260 | int isescaping = 0; |
universe@52 | 261 | |
universe@52 | 262 | if (hd->multiline_comment) { |
universe@52 | 263 | iscomment = 1; |
universe@52 | 264 | ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 265 | } |
universe@52 | 266 | |
universe@52 | 267 | char c; |
universe@52 | 268 | do { |
universe@52 | 269 | c = src[++sp]; |
universe@52 | 270 | |
universe@52 | 271 | /* comments */ |
universe@52 | 272 | if (!isstring && c == '/') { |
universe@52 | 273 | if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { |
universe@52 | 274 | iscomment = 0; |
universe@52 | 275 | hd->multiline_comment = 0; |
universe@52 | 276 | ucx_buffer_puts(dest, "/</span>"); |
universe@52 | 277 | continue; |
universe@52 | 278 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@52 | 279 | iscomment = 1; |
universe@52 | 280 | hd->multiline_comment = (src[sp+1] == '*'); |
universe@52 | 281 | ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 282 | } |
universe@52 | 283 | } |
universe@52 | 284 | |
universe@52 | 285 | if (iscomment) { |
universe@52 | 286 | if (c == '\n') { |
universe@52 | 287 | ucx_buffer_puts(dest, "</span>\n"); |
universe@52 | 288 | } else { |
universe@52 | 289 | put_htmlescaped(dest, c); |
universe@52 | 290 | } |
universe@52 | 291 | } else if (isimport) { |
universe@52 | 292 | /* TODO: local imports */ |
universe@52 | 293 | } else { |
universe@52 | 294 | /* strings */ |
universe@52 | 295 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@52 | 296 | if (isstring) { |
universe@52 | 297 | put_htmlescaped(dest, c); |
universe@52 | 298 | if (c == quote) { |
universe@52 | 299 | isstring = 0; |
universe@52 | 300 | ucx_buffer_puts(dest, "</span>"); |
universe@52 | 301 | } else { |
universe@52 | 302 | put_htmlescaped(dest, c); |
universe@52 | 303 | } |
universe@52 | 304 | } else { |
universe@52 | 305 | isstring = 1; |
universe@52 | 306 | quote = c; |
universe@52 | 307 | ucx_buffer_puts(dest, |
universe@52 | 308 | "<span class=\"c2html-string\">"); |
universe@52 | 309 | put_htmlescaped(dest, c); |
universe@52 | 310 | } |
universe@52 | 311 | } else { |
universe@52 | 312 | if (isstring) { |
universe@52 | 313 | put_htmlescaped(dest, c); |
universe@54 | 314 | } else if (isalnum(c) || c == '_' || c == '@') { |
universe@54 | 315 | /* buffer the current word */ |
universe@54 | 316 | ucx_buffer_putc(wbuf, c); |
universe@54 | 317 | } else { |
universe@52 | 318 | /* write buffered word, if any */ |
universe@52 | 319 | if (wbuf->size > 0) { |
universe@52 | 320 | sstr_t word = sstrn(wbuf->space, wbuf->size); |
universe@52 | 321 | int closespan = 1; |
universe@52 | 322 | if (check_keyword(word, jkeywords)) { |
universe@52 | 323 | ucx_buffer_puts(dest, |
universe@52 | 324 | "<span class=\"c2html-keyword\">"); |
universe@52 | 325 | } else if (isupper(word.ptr[0])) { |
universe@52 | 326 | ucx_buffer_puts(dest, |
universe@52 | 327 | "<span class=\"c2html-type\">"); |
universe@52 | 328 | } else if (word.ptr[0] == '@') { |
universe@52 | 329 | ucx_buffer_puts(dest, |
universe@52 | 330 | "<span class=\"c2html-directive\">"); |
universe@52 | 331 | } else if (check_capsonly(word)) { |
universe@52 | 332 | ucx_buffer_puts(dest, |
universe@52 | 333 | "<span class=\"c2html-macroconst\">"); |
universe@52 | 334 | } else { |
universe@52 | 335 | closespan = 0; |
universe@52 | 336 | } |
universe@52 | 337 | put_htmlescapedstr(dest, word); |
universe@52 | 338 | |
universe@52 | 339 | if (closespan) { |
universe@52 | 340 | ucx_buffer_puts(dest, "</span>"); |
universe@52 | 341 | } |
universe@52 | 342 | } |
universe@52 | 343 | wbuf->pos = wbuf->size = 0; /* reset buffer */ |
universe@52 | 344 | |
universe@52 | 345 | /* write current character */ |
universe@52 | 346 | put_htmlescaped(dest, c); |
universe@52 | 347 | } |
universe@52 | 348 | } |
universe@52 | 349 | |
universe@52 | 350 | isescaping = !isescaping & (c == '\\'); |
universe@52 | 351 | } |
universe@54 | 352 | } while (c && c != '\n'); |
universe@52 | 353 | } |
universe@52 | 354 |