Wed, 31 Aug 2016 12:58:48 +0200
highlighter can now handle files which do not end with a blank line
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2016 Mike Becker. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include "highlighter.h" HighlighterData* new_highlighter_data() { HighlighterData* hd = malloc(sizeof(HighlighterData)); if (hd == NULL) { return NULL; } else { hd->multiline_comment = 0; hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND); hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND); return hd; } } void free_highlighter_data(HighlighterData *hd) { ucx_buffer_free(hd->primary_buffer); ucx_buffer_free(hd->secondary_buffer); free(hd); } static void put_htmlescaped(UcxBuffer *dest, char c) { if (c == '>') { ucx_buffer_puts(dest, ">"); } else if (c == '<') { ucx_buffer_puts(dest, "<"); } else if (c) { ucx_buffer_putc(dest, c); } } static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) { for (int i = 0 ; i < s.length ; i++) { put_htmlescaped(dest, s.ptr[i]); } } static int check_keyword(sstr_t word, const char** keywords) { for (int i = 0 ; keywords[i] ; i++) { if (sstrcmp(word, sstr((char*)keywords[i])) == 0) { return 1; } } return 0; } static int check_capsonly(sstr_t word) { for (size_t i = 0 ; i < word.length ; i++) { if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) && word.ptr[i] != '_') { return 0; } } return 1; } /* Plaintext Highlighter */ void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { while (*src && *src != '\n') { put_htmlescaped(dest, *src); src++; } ucx_buffer_putc(dest, '\n'); } /* C Highlighter */ static const char* ckeywords[] = { "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", NULL }; void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { /* reset buffers without clearing them */ hd->primary_buffer->size = hd->primary_buffer->pos = 0; hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; /* alias the buffers for better handling */ UcxBuffer *wbuf = hd->primary_buffer; UcxBuffer *ifilebuf = hd->secondary_buffer; /* local information */ size_t sp = (size_t)-1; int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; char quote = '\0'; int isescaping = 0; /* continue a multi line comment highlighting */ if (hd->multiline_comment) { iscomment = 1; ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); } char c; do { c = src[++sp]; /* comments */ if (!isstring && c == '/') { if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { iscomment = 0; hd->multiline_comment = 0; ucx_buffer_puts(dest, "/</span>"); continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; hd->multiline_comment = (src[sp+1] == '*'); ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); } } if (iscomment) { if (c == '\n') { ucx_buffer_puts(dest, "</span>\n"); } else { put_htmlescaped(dest, c); } } else if (isinclude) { if (c == '<') { ucx_buffer_puts(dest, "<span class=\"c2html-stdinclude\"><"); } else if (c == '\"') { if (parseinclude) { ucx_buffer_puts(dest, "\">"); ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest); ucx_buffer_puts(dest, "\"</a>"); parseinclude = 0; } else { ucx_buffer_puts(dest, "<a class=\"c2html-userinclude\" href=\""); ucx_buffer_putc(ifilebuf, '\"'); parseinclude = 1; } } else if (c == '>') { ucx_buffer_puts(dest, "></span>"); } else { if (parseinclude) { ucx_buffer_putc(ifilebuf, c); } put_htmlescaped(dest, c); } } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { if (isstring) { put_htmlescaped(dest, c); if (c == quote) { isstring = 0; ucx_buffer_puts(dest, "</span>"); } else { put_htmlescaped(dest, c); } } else { isstring = 1; quote = c; ucx_buffer_puts(dest, "<span class=\"c2html-string\">"); put_htmlescaped(dest, c); } } else { if (isstring) { put_htmlescaped(dest, c); } else if (isalnum(c) || c == '_' || c == '#') { /* buffer the current word */ ucx_buffer_putc(wbuf, c); } else { /* write buffered word, if any */ if (wbuf->size > 0) { sstr_t word = sstrn(wbuf->space, wbuf->size); int closespan = 1; sstr_t typesuffix = ST("_t"); if (check_keyword(word, ckeywords)) { ucx_buffer_puts(dest, "<span class=\"c2html-keyword\">"); } else if (sstrsuffix(word, typesuffix)) { ucx_buffer_puts(dest, "<span class=\"c2html-type\">"); } else if (word.ptr[0] == '#') { isinclude = !sstrcmp(word, S("#include")); ucx_buffer_puts(dest, "<span class=\"c2html-directive\">"); } else if (check_capsonly(word)) { ucx_buffer_puts(dest, "<span class=\"c2html-macroconst\">"); } else { closespan = 0; } put_htmlescapedstr(dest, word); if (closespan) { ucx_buffer_puts(dest, "</span>"); } } wbuf->pos = wbuf->size = 0; /* reset word buffer */ /* write current character */ put_htmlescaped(dest, c); } } isescaping = !isescaping & (c == '\\'); } } while (c && c != '\n'); } /* Java Highlighter */ static const char* jkeywords[] = { "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", "package", "synchronized", "boolean", "do", "if", "private", "this", "break", "double", "implements", "protected", "throw", "byte", "else", "import", "public", "throws", "case", "enum", "instanceof", "return", "transient", "catch", "extends", "int", "short", "try", "char", "final", "interface", "static", "void", "class", "finally", "long", "strictfp", "volatile", "const", "float", "native", "super", "while", NULL }; void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) { /* reset buffers without clearing them */ hd->primary_buffer->size = hd->primary_buffer->pos = 0; hd->secondary_buffer->size = hd->secondary_buffer->pos = 0; /* alias the buffers for better handling */ UcxBuffer *wbuf = hd->primary_buffer; /* local information */ size_t sp = (size_t)-1; int isstring = 0, iscomment = 0, isimport = 0; char quote = '\0'; int isescaping = 0; if (hd->multiline_comment) { iscomment = 1; ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); } char c; do { c = src[++sp]; /* comments */ if (!isstring && c == '/') { if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { iscomment = 0; hd->multiline_comment = 0; ucx_buffer_puts(dest, "/</span>"); continue; } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { iscomment = 1; hd->multiline_comment = (src[sp+1] == '*'); ucx_buffer_puts(dest, "<span class=\"c2html-comment\">"); } } if (iscomment) { if (c == '\n') { ucx_buffer_puts(dest, "</span>\n"); } else { put_htmlescaped(dest, c); } } else if (isimport) { /* TODO: local imports */ } else { /* strings */ if (!isescaping && (c == '\'' || c == '\"')) { if (isstring) { put_htmlescaped(dest, c); if (c == quote) { isstring = 0; ucx_buffer_puts(dest, "</span>"); } else { put_htmlescaped(dest, c); } } else { isstring = 1; quote = c; ucx_buffer_puts(dest, "<span class=\"c2html-string\">"); put_htmlescaped(dest, c); } } else { if (isstring) { put_htmlescaped(dest, c); } else if (isalnum(c) || c == '_' || c == '@') { /* buffer the current word */ ucx_buffer_putc(wbuf, c); } else { /* write buffered word, if any */ if (wbuf->size > 0) { sstr_t word = sstrn(wbuf->space, wbuf->size); int closespan = 1; if (check_keyword(word, jkeywords)) { ucx_buffer_puts(dest, "<span class=\"c2html-keyword\">"); } else if (isupper(word.ptr[0])) { ucx_buffer_puts(dest, "<span class=\"c2html-type\">"); } else if (word.ptr[0] == '@') { ucx_buffer_puts(dest, "<span class=\"c2html-directive\">"); } else if (check_capsonly(word)) { ucx_buffer_puts(dest, "<span class=\"c2html-macroconst\">"); } else { closespan = 0; } put_htmlescapedstr(dest, word); if (closespan) { ucx_buffer_puts(dest, "</span>"); } } wbuf->pos = wbuf->size = 0; /* reset buffer */ /* write current character */ put_htmlescaped(dest, c); } } isescaping = !isescaping & (c == '\\'); } } while (c && c != '\n'); }