src/highlighter.c

changeset 52
33ded421c512
parent 51
f25ba6fd7a08
child 53
5e47a26a16f0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/highlighter.c	Fri Aug 26 13:49:19 2016 +0200
@@ -0,0 +1,356 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2016 Mike Becker. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "highlighter.h"
+
+HighlighterData* new_highlighter_data() {
+    HighlighterData* hd = malloc(sizeof(HighlighterData));
+    if (hd == NULL) {
+        return NULL;
+    } else {
+        hd->multiline_comment = 0;
+        hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
+        hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
+        return hd;
+    }
+}
+
+void free_highlighter_data(HighlighterData *hd) {
+    ucx_buffer_free(hd->primary_buffer);
+    ucx_buffer_free(hd->secondary_buffer);
+    free(hd);
+}
+
+static void put_htmlescaped(UcxBuffer *dest, char c) {
+    if (c == '>') {
+        ucx_buffer_puts(dest, ">");
+    } else if (c == '<') {
+        ucx_buffer_puts(dest, "&lt;");
+    } else {
+        ucx_buffer_putc(dest, c);
+    }
+}
+
+static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
+    for (int i = 0 ; i < s.length ; i++) {
+        put_htmlescaped(dest, s.ptr[i]);
+    }
+}
+
+static int check_keyword(sstr_t word, const char** keywords) {
+    for (int i = 0 ; keywords[i] ; i++) {
+        if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static int check_capsonly(sstr_t word) {
+    for (size_t i = 0 ; i < word.length ; i++) {
+        if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
+                && word.ptr[i] != '_') {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/* Plaintext Highlighter */
+
+void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
+    while (*src && *src != '\n') {
+        put_htmlescaped(dest, *src);
+        src++;
+    }
+    ucx_buffer_putc(dest, '\n');
+}
+
+/* C Highlighter */
+
+static const char* ckeywords[] = {
+    "auto", "break", "case", "char", "const", "continue", "default", "do",
+    "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
+    "long", "register", "return", "short", "signed", "sizeof", "static",
+    "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
+    "while", NULL
+};
+
+void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
+    /* reset buffers without clearing them */
+    hd->primary_buffer->size = hd->primary_buffer->pos = 0;
+    hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
+    
+    /* alias the buffers for better handling */
+    UcxBuffer *wbuf = hd->primary_buffer;
+    UcxBuffer *ifilebuf = hd->secondary_buffer;
+    
+    /* local information */
+    size_t sp = (size_t)-1;
+    int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
+    char quote = '\0';
+    int isescaping = 0;
+    
+    /* continue a multi line comment highlighting */
+    if (hd->multiline_comment) {
+        iscomment = 1;
+        ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
+    }
+
+    char c;
+    do {
+        c = src[++sp];
+        if (!c) break;
+        
+        /* comments */
+        if (!isstring && c == '/') {
+            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
+                iscomment = 0;
+                hd->multiline_comment = 0;
+                ucx_buffer_puts(dest, "/</span>");
+                continue;
+            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
+                iscomment = 1;
+                hd->multiline_comment = (src[sp+1] == '*');
+                ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
+            }
+        }
+
+        if (iscomment) {
+            if (c == '\n') {
+                ucx_buffer_puts(dest, "</span>\n");
+            } else {
+                put_htmlescaped(dest, c);
+            }
+        } else if (isinclude) {
+            if (c == '<') {
+                ucx_buffer_puts(dest,
+                        "<span class=\"c2html-stdinclude\">&lt;");
+            } else if (c == '\"') {
+                if (parseinclude) {
+                    ucx_buffer_puts(dest, "\">");
+                    ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
+                    ucx_buffer_puts(dest, "\"</a>");
+                    parseinclude = 0;
+                } else {
+                    ucx_buffer_puts(dest,
+                            "<a class=\"c2html-userinclude\" href=\"");
+                    ucx_buffer_putc(ifilebuf, '\"');
+                    parseinclude = 1;
+                }
+            } else if (c == '>') {
+                ucx_buffer_puts(dest,  "&gt;</span>");
+            } else {
+                if (parseinclude) {
+                    ucx_buffer_putc(ifilebuf, c);
+                }
+                put_htmlescaped(dest, c);
+            }
+        } else {
+            /* strings */
+            if (!isescaping && (c == '\'' || c == '\"')) {
+                if (isstring) {
+                    put_htmlescaped(dest, c);
+                    if (c == quote) {
+                        isstring = 0;
+                        ucx_buffer_puts(dest, "</span>");
+                    } else {
+                        put_htmlescaped(dest, c);
+                    }
+                } else {
+                    isstring = 1;
+                    quote = c;
+                    ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
+                    put_htmlescaped(dest, c);
+                }
+            } else {
+                if (isstring) {
+                    put_htmlescaped(dest, c);
+                } else if (!isalnum(c) && c!='_' && c!='#') {
+                    /* write buffered word, if any */
+                    if (wbuf->size > 0) {
+                        sstr_t word = sstrn(wbuf->space, wbuf->size);
+                        int closespan = 1;
+                        sstr_t typesuffix = ST("_t");
+                        if (check_keyword(word, ckeywords)) {
+                            ucx_buffer_puts(dest,
+                                    "<span class=\"c2html-keyword\">");
+                        } else if (sstrsuffix(word, typesuffix)) {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-type\">");
+                        } else if (word.ptr[0] == '#') {
+                            isinclude = !sstrcmp(word, S("#include"));
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-directive\">");
+                        } else if (check_capsonly(word)) {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-macroconst\">");
+                        } else {
+                            closespan = 0;
+                        }
+                        put_htmlescapedstr(dest, word);
+                        if (closespan) {
+                            ucx_buffer_puts(dest, "</span>");
+                        }
+                    }
+                    wbuf->pos = wbuf->size = 0; /* reset word buffer */
+                    
+                    /* write current character */
+                    put_htmlescaped(dest, c);
+                } else {
+                    /* buffer the current word */
+                    ucx_buffer_putc(wbuf, c);
+                }
+            }
+
+            isescaping = !isescaping & (c == '\\');
+        }
+    } while (c != '\n');
+}
+
+/* Java Highlighter */
+
+static const char* jkeywords[] = {
+    "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
+    "package", "synchronized", "boolean", "do", "if", "private", "this",
+    "break", "double", "implements", "protected", "throw", "byte", "else",
+    "import", "public", "throws", "case", "enum", "instanceof", "return",
+    "transient", "catch", "extends", "int", "short", "try", "char", "final",
+    "interface", "static", "void", "class", "finally", "long", "strictfp",
+    "volatile", "const", "float", "native", "super", "while", NULL
+};
+
+void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
+    /* reset buffers without clearing them */
+    hd->primary_buffer->size = hd->primary_buffer->pos = 0;
+    hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
+
+    /* alias the buffers for better handling */
+    UcxBuffer *wbuf = hd->primary_buffer;
+    
+    /* local information */
+    size_t sp = (size_t)-1;
+    int isstring = 0, iscomment = 0, isimport = 0;
+    char quote = '\0';
+    int isescaping = 0;
+
+    if (hd->multiline_comment) {
+        iscomment = 1;
+        ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
+    }
+
+    char c;
+    do {
+        c = src[++sp];
+        if (!c) break;
+        
+        /* comments */
+        if (!isstring && c == '/') {
+            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
+                iscomment = 0;
+                hd->multiline_comment = 0;
+                ucx_buffer_puts(dest, "/</span>");
+                continue;
+            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
+                iscomment = 1;
+                hd->multiline_comment = (src[sp+1] == '*');
+                ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
+            }
+        }
+
+        if (iscomment) {
+            if (c == '\n') {
+                ucx_buffer_puts(dest, "</span>\n");
+            } else {
+                put_htmlescaped(dest, c);
+            }
+        } else if (isimport) {
+            /* TODO: local imports */
+        } else {
+            /* strings */
+            if (!isescaping && (c == '\'' || c == '\"')) {
+                if (isstring) {
+                    put_htmlescaped(dest, c);
+                    if (c == quote) {
+                        isstring = 0;
+                        ucx_buffer_puts(dest, "</span>");
+                    } else {
+                        put_htmlescaped(dest, c);
+                    }
+                } else {
+                    isstring = 1;
+                    quote = c;
+                    ucx_buffer_puts(dest,
+                        "<span class=\"c2html-string\">");
+                    put_htmlescaped(dest, c);
+                }
+            } else {
+                if (isstring) {
+                    put_htmlescaped(dest, c);
+                } else if (!isalnum(c) && c!='_' && c!='@') {
+                    /* write buffered word, if any */
+                    if (wbuf->size > 0) {
+                        sstr_t word = sstrn(wbuf->space, wbuf->size);
+                        int closespan = 1;
+                        if (check_keyword(word, jkeywords)) {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-keyword\">");
+                        } else if (isupper(word.ptr[0])) {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-type\">");
+                        } else if (word.ptr[0] == '@') {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-directive\">");
+                        } else if (check_capsonly(word)) {
+                            ucx_buffer_puts(dest,
+                                "<span class=\"c2html-macroconst\">");
+                        } else {
+                            closespan = 0;
+                        }
+                        put_htmlescapedstr(dest, word);
+                        
+                        if (closespan) {
+                            ucx_buffer_puts(dest, "</span>");
+                        }
+                    }
+                    wbuf->pos = wbuf->size = 0; /* reset buffer */
+                    
+                    /* write current character */
+                    put_htmlescaped(dest, c);
+                } else {
+                    /* buffer the current word */
+                    ucx_buffer_putc(wbuf, c);
+                }
+            }
+
+            isescaping = !isescaping & (c == '\\');
+        }
+    } while (c != '\n');
+}
+

mercurial