universe@21: /*
universe@21:  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@21:  *
universe@35:  * Copyright 2016 Mike Becker. All rights reserved.
universe@21:  *
universe@21:  * Redistribution and use in source and binary forms, with or without
universe@21:  * modification, are permitted provided that the following conditions are met:
universe@21:  *
universe@21:  *   1. Redistributions of source code must retain the above copyright
universe@21:  *      notice, this list of conditions and the following disclaimer.
universe@21:  *
universe@21:  *   2. Redistributions in binary form must reproduce the above copyright
universe@21:  *      notice, this list of conditions and the following disclaimer in the
universe@21:  *      documentation and/or other materials provided with the distribution.
universe@21:  *
universe@21:  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@21:  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@21:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@21:  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@21:  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@21:  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@21:  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@21:  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@21:  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@21:  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@21:  * POSSIBILITY OF SUCH DAMAGE.
universe@21:  *
universe@21:  */
universe@21: 
universe@52: #include "highlighter.h"
universe@21: 
universe@51: HighlighterData* new_highlighter_data() {
universe@51:     HighlighterData* hd = malloc(sizeof(HighlighterData));
universe@51:     if (hd == NULL) {
universe@51:         return NULL;
universe@51:     } else {
universe@51:         hd->multiline_comment = 0;
universe@51:         hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
universe@51:         hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
universe@51:         return hd;
universe@51:     }
universe@51: }
universe@51: 
universe@51: void free_highlighter_data(HighlighterData *hd) {
universe@51:     ucx_buffer_free(hd->primary_buffer);
universe@51:     ucx_buffer_free(hd->secondary_buffer);
universe@51:     free(hd);
universe@51: }
universe@51: 
universe@52: static void put_htmlescaped(UcxBuffer *dest, char c) {
universe@21:     if (c == '>') {
universe@48:         ucx_buffer_puts(dest, "&gt;");
universe@21:     } else if (c == '<') {
universe@48:         ucx_buffer_puts(dest, "&lt;");
universe@21:     } else {
universe@48:         ucx_buffer_putc(dest, c);
universe@21:     }
universe@48: }
universe@21: 
universe@52: static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
universe@48:     for (int i = 0 ; i < s.length ; i++) {
universe@48:         put_htmlescaped(dest, s.ptr[i]);
universe@48:     }
universe@21: }
universe@21: 
universe@52: static int check_keyword(sstr_t word, const char** keywords) {
universe@21:     for (int i = 0 ; keywords[i] ; i++) {
universe@47:         if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
universe@21:             return 1;
universe@21:         }
universe@21:     }
universe@21:     return 0;
universe@21: }
universe@21: 
universe@52: static int check_capsonly(sstr_t word) {
universe@47:     for (size_t i = 0 ; i < word.length ; i++) {
universe@47:         if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
universe@47:                 && word.ptr[i] != '_') {
universe@21:             return 0;
universe@21:         }
universe@21:     }
universe@21:     return 1;
universe@21: }
universe@52: 
universe@52: /* Plaintext Highlighter */
universe@52: 
universe@52: void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52:     while (*src && *src != '\n') {
universe@52:         put_htmlescaped(dest, *src);
universe@52:         src++;
universe@52:     }
universe@52:     ucx_buffer_putc(dest, '\n');
universe@52: }
universe@52: 
universe@52: /* C Highlighter */
universe@52: 
universe@52: static const char* ckeywords[] = {
universe@52:     "auto", "break", "case", "char", "const", "continue", "default", "do",
universe@52:     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
universe@52:     "long", "register", "return", "short", "signed", "sizeof", "static",
universe@52:     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
universe@52:     "while", NULL
universe@52: };
universe@52: 
universe@52: void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52:     /* reset buffers without clearing them */
universe@52:     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52:     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52:     
universe@52:     /* alias the buffers for better handling */
universe@52:     UcxBuffer *wbuf = hd->primary_buffer;
universe@52:     UcxBuffer *ifilebuf = hd->secondary_buffer;
universe@52:     
universe@52:     /* local information */
universe@52:     size_t sp = (size_t)-1;
universe@52:     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
universe@52:     char quote = '\0';
universe@52:     int isescaping = 0;
universe@52:     
universe@52:     /* continue a multi line comment highlighting */
universe@52:     if (hd->multiline_comment) {
universe@52:         iscomment = 1;
universe@52:         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52:     }
universe@52: 
universe@52:     char c;
universe@52:     do {
universe@52:         c = src[++sp];
universe@52:         if (!c) break;
universe@52:         
universe@52:         /* comments */
universe@52:         if (!isstring && c == '/') {
universe@52:             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52:                 iscomment = 0;
universe@52:                 hd->multiline_comment = 0;
universe@52:                 ucx_buffer_puts(dest, "/</span>");
universe@52:                 continue;
universe@52:             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52:                 iscomment = 1;
universe@52:                 hd->multiline_comment = (src[sp+1] == '*');
universe@52:                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52:             }
universe@52:         }
universe@52: 
universe@52:         if (iscomment) {
universe@52:             if (c == '\n') {
universe@52:                 ucx_buffer_puts(dest, "</span>\n");
universe@52:             } else {
universe@52:                 put_htmlescaped(dest, c);
universe@52:             }
universe@52:         } else if (isinclude) {
universe@52:             if (c == '<') {
universe@52:                 ucx_buffer_puts(dest,
universe@52:                         "<span class=\"c2html-stdinclude\">&lt;");
universe@52:             } else if (c == '\"') {
universe@52:                 if (parseinclude) {
universe@52:                     ucx_buffer_puts(dest, "\">");
universe@52:                     ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
universe@52:                     ucx_buffer_puts(dest, "\"</a>");
universe@52:                     parseinclude = 0;
universe@52:                 } else {
universe@52:                     ucx_buffer_puts(dest,
universe@52:                             "<a class=\"c2html-userinclude\" href=\"");
universe@52:                     ucx_buffer_putc(ifilebuf, '\"');
universe@52:                     parseinclude = 1;
universe@52:                 }
universe@52:             } else if (c == '>') {
universe@52:                 ucx_buffer_puts(dest,  "&gt;</span>");
universe@52:             } else {
universe@52:                 if (parseinclude) {
universe@52:                     ucx_buffer_putc(ifilebuf, c);
universe@52:                 }
universe@52:                 put_htmlescaped(dest, c);
universe@52:             }
universe@52:         } else {
universe@52:             /* strings */
universe@52:             if (!isescaping && (c == '\'' || c == '\"')) {
universe@52:                 if (isstring) {
universe@52:                     put_htmlescaped(dest, c);
universe@52:                     if (c == quote) {
universe@52:                         isstring = 0;
universe@52:                         ucx_buffer_puts(dest, "</span>");
universe@52:                     } else {
universe@52:                         put_htmlescaped(dest, c);
universe@52:                     }
universe@52:                 } else {
universe@52:                     isstring = 1;
universe@52:                     quote = c;
universe@52:                     ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 }
universe@52:             } else {
universe@52:                 if (isstring) {
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 } else if (!isalnum(c) && c!='_' && c!='#') {
universe@52:                     /* write buffered word, if any */
universe@52:                     if (wbuf->size > 0) {
universe@52:                         sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52:                         int closespan = 1;
universe@52:                         sstr_t typesuffix = ST("_t");
universe@52:                         if (check_keyword(word, ckeywords)) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                     "<span class=\"c2html-keyword\">");
universe@52:                         } else if (sstrsuffix(word, typesuffix)) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-type\">");
universe@52:                         } else if (word.ptr[0] == '#') {
universe@52:                             isinclude = !sstrcmp(word, S("#include"));
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-directive\">");
universe@52:                         } else if (check_capsonly(word)) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-macroconst\">");
universe@52:                         } else {
universe@52:                             closespan = 0;
universe@52:                         }
universe@52:                         put_htmlescapedstr(dest, word);
universe@52:                         if (closespan) {
universe@52:                             ucx_buffer_puts(dest, "</span>");
universe@52:                         }
universe@52:                     }
universe@52:                     wbuf->pos = wbuf->size = 0; /* reset word buffer */
universe@52:                     
universe@52:                     /* write current character */
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 } else {
universe@52:                     /* buffer the current word */
universe@52:                     ucx_buffer_putc(wbuf, c);
universe@52:                 }
universe@52:             }
universe@52: 
universe@52:             isescaping = !isescaping & (c == '\\');
universe@52:         }
universe@52:     } while (c != '\n');
universe@52: }
universe@52: 
universe@52: /* Java Highlighter */
universe@52: 
universe@52: static const char* jkeywords[] = {
universe@52:     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
universe@52:     "package", "synchronized", "boolean", "do", "if", "private", "this",
universe@52:     "break", "double", "implements", "protected", "throw", "byte", "else",
universe@52:     "import", "public", "throws", "case", "enum", "instanceof", "return",
universe@52:     "transient", "catch", "extends", "int", "short", "try", "char", "final",
universe@52:     "interface", "static", "void", "class", "finally", "long", "strictfp",
universe@52:     "volatile", "const", "float", "native", "super", "while", NULL
universe@52: };
universe@52: 
universe@52: void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
universe@52:     /* reset buffers without clearing them */
universe@52:     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52:     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52: 
universe@52:     /* alias the buffers for better handling */
universe@52:     UcxBuffer *wbuf = hd->primary_buffer;
universe@52:     
universe@52:     /* local information */
universe@52:     size_t sp = (size_t)-1;
universe@52:     int isstring = 0, iscomment = 0, isimport = 0;
universe@52:     char quote = '\0';
universe@52:     int isescaping = 0;
universe@52: 
universe@52:     if (hd->multiline_comment) {
universe@52:         iscomment = 1;
universe@52:         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52:     }
universe@52: 
universe@52:     char c;
universe@52:     do {
universe@52:         c = src[++sp];
universe@52:         if (!c) break;
universe@52:         
universe@52:         /* comments */
universe@52:         if (!isstring && c == '/') {
universe@52:             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52:                 iscomment = 0;
universe@52:                 hd->multiline_comment = 0;
universe@52:                 ucx_buffer_puts(dest, "/</span>");
universe@52:                 continue;
universe@52:             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52:                 iscomment = 1;
universe@52:                 hd->multiline_comment = (src[sp+1] == '*');
universe@52:                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52:             }
universe@52:         }
universe@52: 
universe@52:         if (iscomment) {
universe@52:             if (c == '\n') {
universe@52:                 ucx_buffer_puts(dest, "</span>\n");
universe@52:             } else {
universe@52:                 put_htmlescaped(dest, c);
universe@52:             }
universe@52:         } else if (isimport) {
universe@52:             /* TODO: local imports */
universe@52:         } else {
universe@52:             /* strings */
universe@52:             if (!isescaping && (c == '\'' || c == '\"')) {
universe@52:                 if (isstring) {
universe@52:                     put_htmlescaped(dest, c);
universe@52:                     if (c == quote) {
universe@52:                         isstring = 0;
universe@52:                         ucx_buffer_puts(dest, "</span>");
universe@52:                     } else {
universe@52:                         put_htmlescaped(dest, c);
universe@52:                     }
universe@52:                 } else {
universe@52:                     isstring = 1;
universe@52:                     quote = c;
universe@52:                     ucx_buffer_puts(dest,
universe@52:                         "<span class=\"c2html-string\">");
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 }
universe@52:             } else {
universe@52:                 if (isstring) {
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 } else if (!isalnum(c) && c!='_' && c!='@') {
universe@52:                     /* write buffered word, if any */
universe@52:                     if (wbuf->size > 0) {
universe@52:                         sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52:                         int closespan = 1;
universe@52:                         if (check_keyword(word, jkeywords)) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-keyword\">");
universe@52:                         } else if (isupper(word.ptr[0])) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-type\">");
universe@52:                         } else if (word.ptr[0] == '@') {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-directive\">");
universe@52:                         } else if (check_capsonly(word)) {
universe@52:                             ucx_buffer_puts(dest,
universe@52:                                 "<span class=\"c2html-macroconst\">");
universe@52:                         } else {
universe@52:                             closespan = 0;
universe@52:                         }
universe@52:                         put_htmlescapedstr(dest, word);
universe@52:                         
universe@52:                         if (closespan) {
universe@52:                             ucx_buffer_puts(dest, "</span>");
universe@52:                         }
universe@52:                     }
universe@52:                     wbuf->pos = wbuf->size = 0; /* reset buffer */
universe@52:                     
universe@52:                     /* write current character */
universe@52:                     put_htmlescaped(dest, c);
universe@52:                 } else {
universe@52:                     /* buffer the current word */
universe@52:                     ucx_buffer_putc(wbuf, c);
universe@52:                 }
universe@52:             }
universe@52: 
universe@52:             isescaping = !isescaping & (c == '\\');
universe@52:         }
universe@52:     } while (c != '\n');
universe@52: }
universe@52: