src/highlighter.c

Thu, 11 Jul 2024 20:05:21 +0200

author
Mike Becker <universe@uap-core.de>
date
Thu, 11 Jul 2024 20:05:21 +0200
changeset 80
0f1c55d2a90a
parent 76
44c7423d6ce2
permissions
-rw-r--r--

hotfix: ampersand not escaped in html output

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2016 Mike Becker. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 *
 */

#include "highlighter.h"

#include <string.h>
#include <ctype.h>

#include <cx/string.h>
#include <cx/printf.h>

static void put_htmlescaped(CxBuffer *dest, char c) {
    if (c == '>') {
        cxBufferPutString(dest, "&gt;");
    } else if (c == '<') {
        cxBufferPutString(dest, "&lt;");
    } else if (c == '&') {
        cxBufferPutString(dest, "&amp;");
    } else if (c) {
        cxBufferPut(dest, c);
    }
}

static void put_htmlescapedstr(CxBuffer *dest, cxstring s) {
    for (int i = 0 ; i < s.length ; i++) {
        put_htmlescaped(dest, s.ptr[i]);
    }
}

static int check_keyword(cxstring word, const char** keywords) {
    for (int i = 0 ; keywords[i] ; i++) {
        if (cx_strcmp(word, cx_str(keywords[i])) == 0) {
            return 1;
        }
    }
    return 0;
}

static int check_capsonly(cxstring word) {
    for (size_t i = 0 ; i < word.length ; i++) {
        if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
                && word.ptr[i] != '_') {
            return 0;
        }
    }
    return 1;
}

/* Plaintext Highlighter */

void c2html_plain_highlighter(char const *src, CxBuffer *dest,
        c2html_highlighter_data *hd) {
    while (*src && *src != '\n') {
        if (*src != '\r') {
            put_htmlescaped(dest, *src);
        }
        src++;
    }
    cxBufferPut(dest, '\n');
}

/* C Highlighter */

static const char* ckeywords[] = {
    "auto", "break", "case", "char", "const", "continue", "default", "do",
    "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
    "long", "register", "return", "short", "signed", "sizeof", "static",
    "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    "while", NULL
};

void c2html_c_highlighter(char const *src, CxBuffer *dest,
        c2html_highlighter_data *hd) {
    /* reset buffers without clearing them */
    hd->primary_buffer.size = hd->primary_buffer.pos = 0;
    hd->secondary_buffer.size = hd->secondary_buffer.pos = 0;
    
    /* alias the buffers for better handling */
    CxBuffer *wbuf = &hd->primary_buffer;
    CxBuffer *ifilebuf = &hd->secondary_buffer;
    
    /* local information */
    size_t sp = (size_t)-1;

    int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
    char quote = '\0';
    int isescaping = 0;

    int continuation_enabled = 0;
    const char* current_highlight = NULL;

    /* define convenience macros */
#define start_span(cl) \
        current_highlight = cl; \
        cx_bprintf(dest, "<span class=\"c2html-%s\">", current_highlight)
#define stop_span \
        current_highlight = NULL;\
        cxBufferPutString(dest, "</span>")

    /* continue a multi line comment highlighting */
    if (hd->multiline_comment) {
        iscomment = 1;
        start_span("comment");
    }

    /* continue highlighting in case of line continuation */
    if (hd->continue_highlight) {
        start_span(hd->continue_highlight);
        isinclude = hd->continuation_info & 0x1;
        isstring = (hd->continuation_info & 0x2) >> 1;
        iscomment = (hd->continuation_info & 0x4) >> 2;
        if (hd->continuation_info & 0x10) {
            quote = '\'';
        } else if (hd->continuation_info & 0x20) {
            quote = '\"';
        }
        hd->continue_highlight = NULL;
        hd->continuation_info = 0;
    }

    char c;
    do {
        c = src[++sp];
        if (c == '\r') continue;

        /* line continuation */
        if (c == '\\') {
            /* currently do not support continuations in user includes */
            // TODO: also support user includes
            if (!parseinclude) {
                continuation_enabled = 1;
            }
        } else if (continuation_enabled) {
            if (!isspace(c)) {
                continuation_enabled = 0;
            } else if (c == '\n') {
                cxBufferPut(dest, '\n');
                hd->continue_highlight = current_highlight;
                hd->continuation_info =     \
                        isinclude         | \
                        (isstring << 1)   | \
                        (iscomment << 2);
                if (quote == '\'') {
                    hd->continuation_info |= 0x10;
                } else if (quote == '\"') {
                    hd->continuation_info |= 0x20;
                }
                stop_span;
                continue;
            }
        }
        
        /* comments */
        if (!isstring && c == '/') {
            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
                iscomment = 0;
                hd->multiline_comment = 0;
                cxBufferPut(dest, '/');
                stop_span;
                continue;
            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
                iscomment = 1;
                hd->multiline_comment = (src[sp+1] == '*');
                start_span("comment");
            }
        }

        if (iscomment) {
            if (c == '\n') {
                stop_span;
                cxBufferPut(dest, '\n');
            } else {
                put_htmlescaped(dest, c);
            }
        } else if (isinclude) {
            if (c == '<') {
                start_span("stdinclude");
                cxBufferPutString(dest, "&lt;");
            } else if (c == '\"') {
                if (parseinclude) {
                    cxBufferPutString(dest, "\">");
                    cxBufferWrite(ifilebuf->space, 1, ifilebuf->size, dest);
                    cxBufferPutString(dest, "\"</a>");
                    parseinclude = 0;
                } else {
                    cxBufferPutString(dest,
                            "<a class=\"c2html-userinclude\" href=\"");
                    cxBufferPut(ifilebuf, '\"');
                    parseinclude = 1;
                }
            } else if (c == '>') {
                cxBufferPutString(dest,  "&gt;");
                stop_span;
            } else {
                if (parseinclude) {
                    cxBufferPut(ifilebuf, c);
                }
                put_htmlescaped(dest, c);
            }
        } else {
            /* strings */
            if (!isescaping && (c == '\'' || c == '\"')) {
                if (isstring) {
                    put_htmlescaped(dest, c);
                    if (c == quote) {
                        isstring = 0;
                        stop_span;
                    } else {
                        put_htmlescaped(dest, c);
                    }
                } else {
                    isstring = 1;
                    quote = c;
                    start_span("string");
                    put_htmlescaped(dest, c);
                }
            } else {
                if (isstring) {
                    put_htmlescaped(dest, c);
                } else if (isalnum(c) ||  c == '_' || c == '#') {
                    /* buffer the current word */
                    cxBufferPut(wbuf, c);
                } else {
                    /* write buffered word, if any */
                    if (wbuf->size > 0) {
                        cxstring word = cx_strn(wbuf->space, wbuf->size);
                        int closespan = 1;
                        cxstring typesuffix = CX_STR("_t");
                        if (check_keyword(word, ckeywords)) {
                            start_span("keyword");
                        } else if (cx_strsuffix(word, typesuffix)) {
                            start_span("type");
                        } else if (word.ptr[0] == '#') {
                            isinclude = !cx_strcmp(word, CX_STR("#include"));
                            start_span("directive");
                        } else if (check_capsonly(word)) {
                            start_span("macroconst");
                        } else {
                            closespan = 0;
                        }
                        put_htmlescapedstr(dest, word);
                        if (closespan) {
                            stop_span;
                        }
                    }
                    wbuf->pos = wbuf->size = 0; /* reset word buffer */
                    
                    /* write current character */
                    put_htmlescaped(dest, c);
                }
            }

            isescaping = !isescaping & (c == '\\');
        }
    } while (c && c != '\n');

#undef start_span
#undef stop_span
}

/* Java Highlighter */

static const char* jkeywords[] = {
    "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
    "package", "synchronized", "boolean", "do", "if", "private", "this",
    "break", "double", "implements", "protected", "throw", "byte", "else",
    "import", "public", "throws", "case", "enum", "instanceof", "return",
    "transient", "catch", "extends", "int", "short", "try", "char", "final",
    "interface", "static", "void", "class", "finally", "long", "strictfp",
    "volatile", "const", "float", "native", "super", "while", NULL
};

void c2html_java_highlighter(char const *src, CxBuffer *dest,
        c2html_highlighter_data *hd) {
    /* reset buffers without clearing them */
    hd->primary_buffer.size = hd->primary_buffer.pos = 0;
    hd->secondary_buffer.size = hd->secondary_buffer.pos = 0;

    /* alias the buffers for better handling */
    CxBuffer *wbuf = &hd->primary_buffer;
    
    /* local information */
    size_t sp = (size_t)-1;
    int isstring = 0, iscomment = 0, isimport = 0;
    char quote = '\0';
    int isescaping = 0;

    if (hd->multiline_comment) {
        iscomment = 1;
        cxBufferPutString(dest, "<span class=\"c2html-comment\">");
    }

    char c;
    do {
        c = src[++sp];
        if (c == '\r') continue;
        
        /* comments */
        if (!isstring && c == '/') {
            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
                iscomment = 0;
                hd->multiline_comment = 0;
                cxBufferPutString(dest, "/</span>");
                continue;
            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
                iscomment = 1;
                hd->multiline_comment = (src[sp+1] == '*');
                cxBufferPutString(dest, "<span class=\"c2html-comment\">");
            }
        }

        if (iscomment) {
            if (c == '\n') {
                cxBufferPutString(dest, "</span>\n");
            } else {
                put_htmlescaped(dest, c);
            }
        } else if (isimport) {
            /* TODO: local imports */
        } else {
            /* strings */
            if (!isescaping && (c == '\'' || c == '\"')) {
                if (isstring) {
                    put_htmlescaped(dest, c);
                    if (c == quote) {
                        isstring = 0;
                        cxBufferPutString(dest, "</span>");
                    } else {
                        put_htmlescaped(dest, c);
                    }
                } else {
                    isstring = 1;
                    quote = c;
                    cxBufferPutString(dest,
                        "<span class=\"c2html-string\">");
                    put_htmlescaped(dest, c);
                }
            } else {
                if (isstring) {
                    put_htmlescaped(dest, c);
                } else if (isalnum(c) || c == '_' || c == '@') {
                    /* buffer the current word */
                    cxBufferPut(wbuf, c);
                } else {
                    /* write buffered word, if any */
                    if (wbuf->size > 0) {
                        cxstring word = cx_strn(wbuf->space, wbuf->size);
                        int closespan = 1;
                        if (check_keyword(word, jkeywords)) {
                            cxBufferPutString(dest,
                                "<span class=\"c2html-keyword\">");
                        } else if (isupper(word.ptr[0])) {
                            cxBufferPutString(dest,
                                "<span class=\"c2html-type\">");
                        } else if (word.ptr[0] == '@') {
                            cxBufferPutString(dest,
                                "<span class=\"c2html-directive\">");
                        } else if (check_capsonly(word)) {
                            cxBufferPutString(dest,
                                "<span class=\"c2html-macroconst\">");
                        } else {
                            closespan = 0;
                        }
                        put_htmlescapedstr(dest, word);
                        
                        if (closespan) {
                            cxBufferPutString(dest, "</span>");
                        }
                    }
                    wbuf->pos = wbuf->size = 0; /* reset buffer */
                    
                    /* write current character */
                    put_htmlescaped(dest, c);
                }
            }

            isescaping = !isescaping & (c == '\\');
        }
    } while (c && c != '\n');
}

mercurial