src/highlighter.c

Fri, 26 Aug 2016 13:49:19 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 26 Aug 2016 13:49:19 +0200
changeset 52
33ded421c512
parent 51
src/codegens.c@f25ba6fd7a08
child 53
5e47a26a16f0
permissions
-rw-r--r--

merges all highlighter functions into one highlighter module

universe@21 1 /*
universe@21 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@21 3 *
universe@35 4 * Copyright 2016 Mike Becker. All rights reserved.
universe@21 5 *
universe@21 6 * Redistribution and use in source and binary forms, with or without
universe@21 7 * modification, are permitted provided that the following conditions are met:
universe@21 8 *
universe@21 9 * 1. Redistributions of source code must retain the above copyright
universe@21 10 * notice, this list of conditions and the following disclaimer.
universe@21 11 *
universe@21 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@21 13 * notice, this list of conditions and the following disclaimer in the
universe@21 14 * documentation and/or other materials provided with the distribution.
universe@21 15 *
universe@21 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@21 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@21 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@21 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@21 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@21 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@21 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@21 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@21 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@21 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@21 26 * POSSIBILITY OF SUCH DAMAGE.
universe@21 27 *
universe@21 28 */
universe@21 29
universe@52 30 #include "highlighter.h"
universe@21 31
universe@51 32 HighlighterData* new_highlighter_data() {
universe@51 33 HighlighterData* hd = malloc(sizeof(HighlighterData));
universe@51 34 if (hd == NULL) {
universe@51 35 return NULL;
universe@51 36 } else {
universe@51 37 hd->multiline_comment = 0;
universe@51 38 hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
universe@51 39 hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
universe@51 40 return hd;
universe@51 41 }
universe@51 42 }
universe@51 43
universe@51 44 void free_highlighter_data(HighlighterData *hd) {
universe@51 45 ucx_buffer_free(hd->primary_buffer);
universe@51 46 ucx_buffer_free(hd->secondary_buffer);
universe@51 47 free(hd);
universe@51 48 }
universe@51 49
universe@52 50 static void put_htmlescaped(UcxBuffer *dest, char c) {
universe@21 51 if (c == '>') {
universe@48 52 ucx_buffer_puts(dest, "&gt;");
universe@21 53 } else if (c == '<') {
universe@48 54 ucx_buffer_puts(dest, "&lt;");
universe@21 55 } else {
universe@48 56 ucx_buffer_putc(dest, c);
universe@21 57 }
universe@48 58 }
universe@21 59
universe@52 60 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
universe@48 61 for (int i = 0 ; i < s.length ; i++) {
universe@48 62 put_htmlescaped(dest, s.ptr[i]);
universe@48 63 }
universe@21 64 }
universe@21 65
universe@52 66 static int check_keyword(sstr_t word, const char** keywords) {
universe@21 67 for (int i = 0 ; keywords[i] ; i++) {
universe@47 68 if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
universe@21 69 return 1;
universe@21 70 }
universe@21 71 }
universe@21 72 return 0;
universe@21 73 }
universe@21 74
universe@52 75 static int check_capsonly(sstr_t word) {
universe@47 76 for (size_t i = 0 ; i < word.length ; i++) {
universe@47 77 if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
universe@47 78 && word.ptr[i] != '_') {
universe@21 79 return 0;
universe@21 80 }
universe@21 81 }
universe@21 82 return 1;
universe@21 83 }
universe@52 84
universe@52 85 /* Plaintext Highlighter */
universe@52 86
universe@52 87 void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52 88 while (*src && *src != '\n') {
universe@52 89 put_htmlescaped(dest, *src);
universe@52 90 src++;
universe@52 91 }
universe@52 92 ucx_buffer_putc(dest, '\n');
universe@52 93 }
universe@52 94
universe@52 95 /* C Highlighter */
universe@52 96
universe@52 97 static const char* ckeywords[] = {
universe@52 98 "auto", "break", "case", "char", "const", "continue", "default", "do",
universe@52 99 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
universe@52 100 "long", "register", "return", "short", "signed", "sizeof", "static",
universe@52 101 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
universe@52 102 "while", NULL
universe@52 103 };
universe@52 104
universe@52 105 void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52 106 /* reset buffers without clearing them */
universe@52 107 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 108 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 109
universe@52 110 /* alias the buffers for better handling */
universe@52 111 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 112 UcxBuffer *ifilebuf = hd->secondary_buffer;
universe@52 113
universe@52 114 /* local information */
universe@52 115 size_t sp = (size_t)-1;
universe@52 116 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
universe@52 117 char quote = '\0';
universe@52 118 int isescaping = 0;
universe@52 119
universe@52 120 /* continue a multi line comment highlighting */
universe@52 121 if (hd->multiline_comment) {
universe@52 122 iscomment = 1;
universe@52 123 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 124 }
universe@52 125
universe@52 126 char c;
universe@52 127 do {
universe@52 128 c = src[++sp];
universe@52 129 if (!c) break;
universe@52 130
universe@52 131 /* comments */
universe@52 132 if (!isstring && c == '/') {
universe@52 133 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 134 iscomment = 0;
universe@52 135 hd->multiline_comment = 0;
universe@52 136 ucx_buffer_puts(dest, "/</span>");
universe@52 137 continue;
universe@52 138 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 139 iscomment = 1;
universe@52 140 hd->multiline_comment = (src[sp+1] == '*');
universe@52 141 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 142 }
universe@52 143 }
universe@52 144
universe@52 145 if (iscomment) {
universe@52 146 if (c == '\n') {
universe@52 147 ucx_buffer_puts(dest, "</span>\n");
universe@52 148 } else {
universe@52 149 put_htmlescaped(dest, c);
universe@52 150 }
universe@52 151 } else if (isinclude) {
universe@52 152 if (c == '<') {
universe@52 153 ucx_buffer_puts(dest,
universe@52 154 "<span class=\"c2html-stdinclude\">&lt;");
universe@52 155 } else if (c == '\"') {
universe@52 156 if (parseinclude) {
universe@52 157 ucx_buffer_puts(dest, "\">");
universe@52 158 ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
universe@52 159 ucx_buffer_puts(dest, "\"</a>");
universe@52 160 parseinclude = 0;
universe@52 161 } else {
universe@52 162 ucx_buffer_puts(dest,
universe@52 163 "<a class=\"c2html-userinclude\" href=\"");
universe@52 164 ucx_buffer_putc(ifilebuf, '\"');
universe@52 165 parseinclude = 1;
universe@52 166 }
universe@52 167 } else if (c == '>') {
universe@52 168 ucx_buffer_puts(dest, "&gt;</span>");
universe@52 169 } else {
universe@52 170 if (parseinclude) {
universe@52 171 ucx_buffer_putc(ifilebuf, c);
universe@52 172 }
universe@52 173 put_htmlescaped(dest, c);
universe@52 174 }
universe@52 175 } else {
universe@52 176 /* strings */
universe@52 177 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 178 if (isstring) {
universe@52 179 put_htmlescaped(dest, c);
universe@52 180 if (c == quote) {
universe@52 181 isstring = 0;
universe@52 182 ucx_buffer_puts(dest, "</span>");
universe@52 183 } else {
universe@52 184 put_htmlescaped(dest, c);
universe@52 185 }
universe@52 186 } else {
universe@52 187 isstring = 1;
universe@52 188 quote = c;
universe@52 189 ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
universe@52 190 put_htmlescaped(dest, c);
universe@52 191 }
universe@52 192 } else {
universe@52 193 if (isstring) {
universe@52 194 put_htmlescaped(dest, c);
universe@52 195 } else if (!isalnum(c) && c!='_' && c!='#') {
universe@52 196 /* write buffered word, if any */
universe@52 197 if (wbuf->size > 0) {
universe@52 198 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 199 int closespan = 1;
universe@52 200 sstr_t typesuffix = ST("_t");
universe@52 201 if (check_keyword(word, ckeywords)) {
universe@52 202 ucx_buffer_puts(dest,
universe@52 203 "<span class=\"c2html-keyword\">");
universe@52 204 } else if (sstrsuffix(word, typesuffix)) {
universe@52 205 ucx_buffer_puts(dest,
universe@52 206 "<span class=\"c2html-type\">");
universe@52 207 } else if (word.ptr[0] == '#') {
universe@52 208 isinclude = !sstrcmp(word, S("#include"));
universe@52 209 ucx_buffer_puts(dest,
universe@52 210 "<span class=\"c2html-directive\">");
universe@52 211 } else if (check_capsonly(word)) {
universe@52 212 ucx_buffer_puts(dest,
universe@52 213 "<span class=\"c2html-macroconst\">");
universe@52 214 } else {
universe@52 215 closespan = 0;
universe@52 216 }
universe@52 217 put_htmlescapedstr(dest, word);
universe@52 218 if (closespan) {
universe@52 219 ucx_buffer_puts(dest, "</span>");
universe@52 220 }
universe@52 221 }
universe@52 222 wbuf->pos = wbuf->size = 0; /* reset word buffer */
universe@52 223
universe@52 224 /* write current character */
universe@52 225 put_htmlescaped(dest, c);
universe@52 226 } else {
universe@52 227 /* buffer the current word */
universe@52 228 ucx_buffer_putc(wbuf, c);
universe@52 229 }
universe@52 230 }
universe@52 231
universe@52 232 isescaping = !isescaping & (c == '\\');
universe@52 233 }
universe@52 234 } while (c != '\n');
universe@52 235 }
universe@52 236
universe@52 237 /* Java Highlighter */
universe@52 238
universe@52 239 static const char* jkeywords[] = {
universe@52 240 "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
universe@52 241 "package", "synchronized", "boolean", "do", "if", "private", "this",
universe@52 242 "break", "double", "implements", "protected", "throw", "byte", "else",
universe@52 243 "import", "public", "throws", "case", "enum", "instanceof", "return",
universe@52 244 "transient", "catch", "extends", "int", "short", "try", "char", "final",
universe@52 245 "interface", "static", "void", "class", "finally", "long", "strictfp",
universe@52 246 "volatile", "const", "float", "native", "super", "while", NULL
universe@52 247 };
universe@52 248
universe@52 249 void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52 250 /* reset buffers without clearing them */
universe@52 251 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 252 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 253
universe@52 254 /* alias the buffers for better handling */
universe@52 255 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 256
universe@52 257 /* local information */
universe@52 258 size_t sp = (size_t)-1;
universe@52 259 int isstring = 0, iscomment = 0, isimport = 0;
universe@52 260 char quote = '\0';
universe@52 261 int isescaping = 0;
universe@52 262
universe@52 263 if (hd->multiline_comment) {
universe@52 264 iscomment = 1;
universe@52 265 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 266 }
universe@52 267
universe@52 268 char c;
universe@52 269 do {
universe@52 270 c = src[++sp];
universe@52 271 if (!c) break;
universe@52 272
universe@52 273 /* comments */
universe@52 274 if (!isstring && c == '/') {
universe@52 275 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 276 iscomment = 0;
universe@52 277 hd->multiline_comment = 0;
universe@52 278 ucx_buffer_puts(dest, "/</span>");
universe@52 279 continue;
universe@52 280 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 281 iscomment = 1;
universe@52 282 hd->multiline_comment = (src[sp+1] == '*');
universe@52 283 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 284 }
universe@52 285 }
universe@52 286
universe@52 287 if (iscomment) {
universe@52 288 if (c == '\n') {
universe@52 289 ucx_buffer_puts(dest, "</span>\n");
universe@52 290 } else {
universe@52 291 put_htmlescaped(dest, c);
universe@52 292 }
universe@52 293 } else if (isimport) {
universe@52 294 /* TODO: local imports */
universe@52 295 } else {
universe@52 296 /* strings */
universe@52 297 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 298 if (isstring) {
universe@52 299 put_htmlescaped(dest, c);
universe@52 300 if (c == quote) {
universe@52 301 isstring = 0;
universe@52 302 ucx_buffer_puts(dest, "</span>");
universe@52 303 } else {
universe@52 304 put_htmlescaped(dest, c);
universe@52 305 }
universe@52 306 } else {
universe@52 307 isstring = 1;
universe@52 308 quote = c;
universe@52 309 ucx_buffer_puts(dest,
universe@52 310 "<span class=\"c2html-string\">");
universe@52 311 put_htmlescaped(dest, c);
universe@52 312 }
universe@52 313 } else {
universe@52 314 if (isstring) {
universe@52 315 put_htmlescaped(dest, c);
universe@52 316 } else if (!isalnum(c) && c!='_' && c!='@') {
universe@52 317 /* write buffered word, if any */
universe@52 318 if (wbuf->size > 0) {
universe@52 319 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 320 int closespan = 1;
universe@52 321 if (check_keyword(word, jkeywords)) {
universe@52 322 ucx_buffer_puts(dest,
universe@52 323 "<span class=\"c2html-keyword\">");
universe@52 324 } else if (isupper(word.ptr[0])) {
universe@52 325 ucx_buffer_puts(dest,
universe@52 326 "<span class=\"c2html-type\">");
universe@52 327 } else if (word.ptr[0] == '@') {
universe@52 328 ucx_buffer_puts(dest,
universe@52 329 "<span class=\"c2html-directive\">");
universe@52 330 } else if (check_capsonly(word)) {
universe@52 331 ucx_buffer_puts(dest,
universe@52 332 "<span class=\"c2html-macroconst\">");
universe@52 333 } else {
universe@52 334 closespan = 0;
universe@52 335 }
universe@52 336 put_htmlescapedstr(dest, word);
universe@52 337
universe@52 338 if (closespan) {
universe@52 339 ucx_buffer_puts(dest, "</span>");
universe@52 340 }
universe@52 341 }
universe@52 342 wbuf->pos = wbuf->size = 0; /* reset buffer */
universe@52 343
universe@52 344 /* write current character */
universe@52 345 put_htmlescaped(dest, c);
universe@52 346 } else {
universe@52 347 /* buffer the current word */
universe@52 348 ucx_buffer_putc(wbuf, c);
universe@52 349 }
universe@52 350 }
universe@52 351
universe@52 352 isescaping = !isescaping & (c == '\\');
universe@52 353 }
universe@52 354 } while (c != '\n');
universe@52 355 }
universe@52 356

mercurial