--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/highlighter.c Fri Aug 26 13:49:19 2016 +0200 @@ -0,0 +1,356 @@ +/* + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. + * + * Copyright 2016 Mike Becker. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "highlighter.h" + +HighlighterData* new_highlighter_data() { + HighlighterData* hd = malloc(sizeof(HighlighterData)); + if (hd == NULL) { + return NULL; + } else { + hd->multiline_comment = 0; + hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND); + hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND); + return hd; + } +} + +void free_highlighter_data(HighlighterData *hd) { + ucx_buffer_free(hd->primary_buffer); + ucx_buffer_free(hd->secondary_buffer); + free(hd); +} + +static void put_htmlescaped(UcxBuffer *dest, char c) { + if (c == '>') { + ucx_buffer_puts(dest, ">"); + } else if (c == '<') { + ucx_buffer_puts(dest, "<"); + } else { + ucx_buffer_putc(dest, c); + } +} + +static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) { + for (int i = 0 ; i < s.length ; i++) { + put_htmlescaped(dest, s.ptr[i]); + } +} + +static int check_keyword(sstr_t word, const char** keywords) { + for (int i = 0 ; keywords[i] ; i++) { + if (sstrcmp(word, sstr((char*)keywords[i])) == 0) { + return 1; + } + } + return 0; +} + +static int check_capsonly(sstr_t word) { + for (size_t i = 0 ; i < word.length ; i++) { + if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) + && word.ptr[i] != '_') { + return 0; + } + } + return 1; +} + +/* Plaintext Highlighter */ + +void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { + while (*src && *src != '\n') { + put_htmlescaped(dest, *src); + src++; + } + ucx_buffer_putc(dest, '\n'); +} + +/* C Highlighter */ + +static const char* ckeywords[] = { + "auto", "break", "case", "char", "const", "continue", "default", "do", + "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", + "long", "register", "return", "short", "signed", "sizeof", "static", + "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", + "while", NULL +}; + +void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { + /* reset buffers without clearing them */ + hd->primary_buffer->size = hd->primary_buffer->pos = 0; + hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; + + /* alias the buffers for better handling */ + UcxBuffer *wbuf = hd->primary_buffer; + UcxBuffer *ifilebuf = hd->secondary_buffer; + + /* local information */ + size_t sp = (size_t)-1; + int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; + char quote = '\0'; + int isescaping = 0; + + /* continue a multi line comment highlighting */ + if (hd->multiline_comment) { + iscomment = 1; + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); + } + + char c; + do { + c = src[++sp]; + if (!c) break; + + /* comments */ + if (!isstring && c == '/') { + if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { + iscomment = 0; + hd->multiline_comment = 0; + ucx_buffer_puts(dest, "/</span>"); + continue; + } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { + iscomment = 1; + hd->multiline_comment = (src[sp+1] == '*'); + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); + } + } + + if (iscomment) { + if (c == '\n') { + ucx_buffer_puts(dest, "</span>\n"); + } else { + put_htmlescaped(dest, c); + } + } else if (isinclude) { + if (c == '<') { + ucx_buffer_puts(dest, + "<span class=\"c2html-stdinclude\"><"); + } else if (c == '\"') { + if (parseinclude) { + ucx_buffer_puts(dest, "\">"); + ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest); + ucx_buffer_puts(dest, "\"</a>"); + parseinclude = 0; + } else { + ucx_buffer_puts(dest, + "<a class=\"c2html-userinclude\" href=\""); + ucx_buffer_putc(ifilebuf, '\"'); + parseinclude = 1; + } + } else if (c == '>') { + ucx_buffer_puts(dest, "></span>"); + } else { + if (parseinclude) { + ucx_buffer_putc(ifilebuf, c); + } + put_htmlescaped(dest, c); + } + } else { + /* strings */ + if (!isescaping && (c == '\'' || c == '\"')) { + if (isstring) { + put_htmlescaped(dest, c); + if (c == quote) { + isstring = 0; + ucx_buffer_puts(dest, "</span>"); + } else { + put_htmlescaped(dest, c); + } + } else { + isstring = 1; + quote = c; + ucx_buffer_puts(dest, "<span class=\"c2html-string\">"); + put_htmlescaped(dest, c); + } + } else { + if (isstring) { + put_htmlescaped(dest, c); + } else if (!isalnum(c) && c!='_' && c!='#') { + /* write buffered word, if any */ + if (wbuf->size > 0) { + sstr_t word = sstrn(wbuf->space, wbuf->size); + int closespan = 1; + sstr_t typesuffix = ST("_t"); + if (check_keyword(word, ckeywords)) { + ucx_buffer_puts(dest, + "<span class=\"c2html-keyword\">"); + } else if (sstrsuffix(word, typesuffix)) { + ucx_buffer_puts(dest, + "<span class=\"c2html-type\">"); + } else if (word.ptr[0] == '#') { + isinclude = !sstrcmp(word, S("#include")); + ucx_buffer_puts(dest, + "<span class=\"c2html-directive\">"); + } else if (check_capsonly(word)) { + ucx_buffer_puts(dest, + "<span class=\"c2html-macroconst\">"); + } else { + closespan = 0; + } + put_htmlescapedstr(dest, word); + if (closespan) { + ucx_buffer_puts(dest, "</span>"); + } + } + wbuf->pos = wbuf->size = 0; /* reset word buffer */ + + /* write current character */ + put_htmlescaped(dest, c); + } else { + /* buffer the current word */ + ucx_buffer_putc(wbuf, c); + } + } + + isescaping = !isescaping & (c == '\\'); + } + } while (c != '\n'); +} + +/* Java Highlighter */ + +static const char* jkeywords[] = { + "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", + "package", "synchronized", "boolean", "do", "if", "private", "this", + "break", "double", "implements", "protected", "throw", "byte", "else", + "import", "public", "throws", "case", "enum", "instanceof", "return", + "transient", "catch", "extends", "int", "short", "try", "char", "final", + "interface", "static", "void", "class", "finally", "long", "strictfp", + "volatile", "const", "float", "native", "super", "while", NULL +}; + +void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { + /* reset buffers without clearing them */ + hd->primary_buffer->size = hd->primary_buffer->pos = 0; + hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; + + /* alias the buffers for better handling */ + UcxBuffer *wbuf = hd->primary_buffer; + + /* local information */ + size_t sp = (size_t)-1; + int isstring = 0, iscomment = 0, isimport = 0; + char quote = '\0'; + int isescaping = 0; + + if (hd->multiline_comment) { + iscomment = 1; + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); + } + + char c; + do { + c = src[++sp]; + if (!c) break; + + /* comments */ + if (!isstring && c == '/') { + if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { + iscomment = 0; + hd->multiline_comment = 0; + ucx_buffer_puts(dest, "/</span>"); + continue; + } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { + iscomment = 1; + hd->multiline_comment = (src[sp+1] == '*'); + ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); + } + } + + if (iscomment) { + if (c == '\n') { + ucx_buffer_puts(dest, "</span>\n"); + } else { + put_htmlescaped(dest, c); + } + } else if (isimport) { + /* TODO: local imports */ + } else { + /* strings */ + if (!isescaping && (c == '\'' || c == '\"')) { + if (isstring) { + put_htmlescaped(dest, c); + if (c == quote) { + isstring = 0; + ucx_buffer_puts(dest, "</span>"); + } else { + put_htmlescaped(dest, c); + } + } else { + isstring = 1; + quote = c; + ucx_buffer_puts(dest, + "<span class=\"c2html-string\">"); + put_htmlescaped(dest, c); + } + } else { + if (isstring) { + put_htmlescaped(dest, c); + } else if (!isalnum(c) && c!='_' && c!='@') { + /* write buffered word, if any */ + if (wbuf->size > 0) { + sstr_t word = sstrn(wbuf->space, wbuf->size); + int closespan = 1; + if (check_keyword(word, jkeywords)) { + ucx_buffer_puts(dest, + "<span class=\"c2html-keyword\">"); + } else if (isupper(word.ptr[0])) { + ucx_buffer_puts(dest, + "<span class=\"c2html-type\">"); + } else if (word.ptr[0] == '@') { + ucx_buffer_puts(dest, + "<span class=\"c2html-directive\">"); + } else if (check_capsonly(word)) { + ucx_buffer_puts(dest, + "<span class=\"c2html-macroconst\">"); + } else { + closespan = 0; + } + put_htmlescapedstr(dest, word); + + if (closespan) { + ucx_buffer_puts(dest, "</span>"); + } + } + wbuf->pos = wbuf->size = 0; /* reset buffer */ + + /* write current character */ + put_htmlescaped(dest, c); + } else { + /* buffer the current word */ + ucx_buffer_putc(wbuf, c); + } + } + + isescaping = !isescaping & (c == '\\'); + } + } while (c != '\n'); +} +