src/highlighter.c

changeset 52
33ded421c512
parent 51
f25ba6fd7a08
child 53
5e47a26a16f0
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/highlighter.c	Fri Aug 26 13:49:19 2016 +0200
     1.3 @@ -0,0 +1,356 @@
     1.4 +/*
     1.5 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     1.6 + *
     1.7 + * Copyright 2016 Mike Becker. All rights reserved.
     1.8 + *
     1.9 + * Redistribution and use in source and binary forms, with or without
    1.10 + * modification, are permitted provided that the following conditions are met:
    1.11 + *
    1.12 + *   1. Redistributions of source code must retain the above copyright
    1.13 + *      notice, this list of conditions and the following disclaimer.
    1.14 + *
    1.15 + *   2. Redistributions in binary form must reproduce the above copyright
    1.16 + *      notice, this list of conditions and the following disclaimer in the
    1.17 + *      documentation and/or other materials provided with the distribution.
    1.18 + *
    1.19 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    1.20 + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    1.21 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    1.22 + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    1.23 + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    1.24 + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    1.25 + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    1.26 + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    1.27 + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    1.28 + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    1.29 + * POSSIBILITY OF SUCH DAMAGE.
    1.30 + *
    1.31 + */
    1.32 +
    1.33 +#include "highlighter.h"
    1.34 +
    1.35 +HighlighterData* new_highlighter_data() {
    1.36 +    HighlighterData* hd = malloc(sizeof(HighlighterData));
    1.37 +    if (hd == NULL) {
    1.38 +        return NULL;
    1.39 +    } else {
    1.40 +        hd->multiline_comment = 0;
    1.41 +        hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
    1.42 +        hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
    1.43 +        return hd;
    1.44 +    }
    1.45 +}
    1.46 +
    1.47 +void free_highlighter_data(HighlighterData *hd) {
    1.48 +    ucx_buffer_free(hd->primary_buffer);
    1.49 +    ucx_buffer_free(hd->secondary_buffer);
    1.50 +    free(hd);
    1.51 +}
    1.52 +
    1.53 +static void put_htmlescaped(UcxBuffer *dest, char c) {
    1.54 +    if (c == '>') {
    1.55 +        ucx_buffer_puts(dest, ">");
    1.56 +    } else if (c == '<') {
    1.57 +        ucx_buffer_puts(dest, "&lt;");
    1.58 +    } else {
    1.59 +        ucx_buffer_putc(dest, c);
    1.60 +    }
    1.61 +}
    1.62 +
    1.63 +static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
    1.64 +    for (int i = 0 ; i < s.length ; i++) {
    1.65 +        put_htmlescaped(dest, s.ptr[i]);
    1.66 +    }
    1.67 +}
    1.68 +
    1.69 +static int check_keyword(sstr_t word, const char** keywords) {
    1.70 +    for (int i = 0 ; keywords[i] ; i++) {
    1.71 +        if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
    1.72 +            return 1;
    1.73 +        }
    1.74 +    }
    1.75 +    return 0;
    1.76 +}
    1.77 +
    1.78 +static int check_capsonly(sstr_t word) {
    1.79 +    for (size_t i = 0 ; i < word.length ; i++) {
    1.80 +        if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
    1.81 +                && word.ptr[i] != '_') {
    1.82 +            return 0;
    1.83 +        }
    1.84 +    }
    1.85 +    return 1;
    1.86 +}
    1.87 +
    1.88 +/* Plaintext Highlighter */
    1.89 +
    1.90 +void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
    1.91 +    while (*src && *src != '\n') {
    1.92 +        put_htmlescaped(dest, *src);
    1.93 +        src++;
    1.94 +    }
    1.95 +    ucx_buffer_putc(dest, '\n');
    1.96 +}
    1.97 +
    1.98 +/* C Highlighter */
    1.99 +
   1.100 +static const char* ckeywords[] = {
   1.101 +    "auto", "break", "case", "char", "const", "continue", "default", "do",
   1.102 +    "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
   1.103 +    "long", "register", "return", "short", "signed", "sizeof", "static",
   1.104 +    "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
   1.105 +    "while", NULL
   1.106 +};
   1.107 +
   1.108 +void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   1.109 +    /* reset buffers without clearing them */
   1.110 +    hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   1.111 +    hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   1.112 +    
   1.113 +    /* alias the buffers for better handling */
   1.114 +    UcxBuffer *wbuf = hd->primary_buffer;
   1.115 +    UcxBuffer *ifilebuf = hd->secondary_buffer;
   1.116 +    
   1.117 +    /* local information */
   1.118 +    size_t sp = (size_t)-1;
   1.119 +    int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   1.120 +    char quote = '\0';
   1.121 +    int isescaping = 0;
   1.122 +    
   1.123 +    /* continue a multi line comment highlighting */
   1.124 +    if (hd->multiline_comment) {
   1.125 +        iscomment = 1;
   1.126 +        ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   1.127 +    }
   1.128 +
   1.129 +    char c;
   1.130 +    do {
   1.131 +        c = src[++sp];
   1.132 +        if (!c) break;
   1.133 +        
   1.134 +        /* comments */
   1.135 +        if (!isstring && c == '/') {
   1.136 +            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   1.137 +                iscomment = 0;
   1.138 +                hd->multiline_comment = 0;
   1.139 +                ucx_buffer_puts(dest, "/</span>");
   1.140 +                continue;
   1.141 +            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   1.142 +                iscomment = 1;
   1.143 +                hd->multiline_comment = (src[sp+1] == '*');
   1.144 +                ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   1.145 +            }
   1.146 +        }
   1.147 +
   1.148 +        if (iscomment) {
   1.149 +            if (c == '\n') {
   1.150 +                ucx_buffer_puts(dest, "</span>\n");
   1.151 +            } else {
   1.152 +                put_htmlescaped(dest, c);
   1.153 +            }
   1.154 +        } else if (isinclude) {
   1.155 +            if (c == '<') {
   1.156 +                ucx_buffer_puts(dest,
   1.157 +                        "<span class=\"c2html-stdinclude\">&lt;");
   1.158 +            } else if (c == '\"') {
   1.159 +                if (parseinclude) {
   1.160 +                    ucx_buffer_puts(dest, "\">");
   1.161 +                    ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
   1.162 +                    ucx_buffer_puts(dest, "\"</a>");
   1.163 +                    parseinclude = 0;
   1.164 +                } else {
   1.165 +                    ucx_buffer_puts(dest,
   1.166 +                            "<a class=\"c2html-userinclude\" href=\"");
   1.167 +                    ucx_buffer_putc(ifilebuf, '\"');
   1.168 +                    parseinclude = 1;
   1.169 +                }
   1.170 +            } else if (c == '>') {
   1.171 +                ucx_buffer_puts(dest,  "&gt;</span>");
   1.172 +            } else {
   1.173 +                if (parseinclude) {
   1.174 +                    ucx_buffer_putc(ifilebuf, c);
   1.175 +                }
   1.176 +                put_htmlescaped(dest, c);
   1.177 +            }
   1.178 +        } else {
   1.179 +            /* strings */
   1.180 +            if (!isescaping && (c == '\'' || c == '\"')) {
   1.181 +                if (isstring) {
   1.182 +                    put_htmlescaped(dest, c);
   1.183 +                    if (c == quote) {
   1.184 +                        isstring = 0;
   1.185 +                        ucx_buffer_puts(dest, "</span>");
   1.186 +                    } else {
   1.187 +                        put_htmlescaped(dest, c);
   1.188 +                    }
   1.189 +                } else {
   1.190 +                    isstring = 1;
   1.191 +                    quote = c;
   1.192 +                    ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
   1.193 +                    put_htmlescaped(dest, c);
   1.194 +                }
   1.195 +            } else {
   1.196 +                if (isstring) {
   1.197 +                    put_htmlescaped(dest, c);
   1.198 +                } else if (!isalnum(c) && c!='_' && c!='#') {
   1.199 +                    /* write buffered word, if any */
   1.200 +                    if (wbuf->size > 0) {
   1.201 +                        sstr_t word = sstrn(wbuf->space, wbuf->size);
   1.202 +                        int closespan = 1;
   1.203 +                        sstr_t typesuffix = ST("_t");
   1.204 +                        if (check_keyword(word, ckeywords)) {
   1.205 +                            ucx_buffer_puts(dest,
   1.206 +                                    "<span class=\"c2html-keyword\">");
   1.207 +                        } else if (sstrsuffix(word, typesuffix)) {
   1.208 +                            ucx_buffer_puts(dest,
   1.209 +                                "<span class=\"c2html-type\">");
   1.210 +                        } else if (word.ptr[0] == '#') {
   1.211 +                            isinclude = !sstrcmp(word, S("#include"));
   1.212 +                            ucx_buffer_puts(dest,
   1.213 +                                "<span class=\"c2html-directive\">");
   1.214 +                        } else if (check_capsonly(word)) {
   1.215 +                            ucx_buffer_puts(dest,
   1.216 +                                "<span class=\"c2html-macroconst\">");
   1.217 +                        } else {
   1.218 +                            closespan = 0;
   1.219 +                        }
   1.220 +                        put_htmlescapedstr(dest, word);
   1.221 +                        if (closespan) {
   1.222 +                            ucx_buffer_puts(dest, "</span>");
   1.223 +                        }
   1.224 +                    }
   1.225 +                    wbuf->pos = wbuf->size = 0; /* reset word buffer */
   1.226 +                    
   1.227 +                    /* write current character */
   1.228 +                    put_htmlescaped(dest, c);
   1.229 +                } else {
   1.230 +                    /* buffer the current word */
   1.231 +                    ucx_buffer_putc(wbuf, c);
   1.232 +                }
   1.233 +            }
   1.234 +
   1.235 +            isescaping = !isescaping & (c == '\\');
   1.236 +        }
   1.237 +    } while (c != '\n');
   1.238 +}
   1.239 +
   1.240 +/* Java Highlighter */
   1.241 +
   1.242 +static const char* jkeywords[] = {
   1.243 +    "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
   1.244 +    "package", "synchronized", "boolean", "do", "if", "private", "this",
   1.245 +    "break", "double", "implements", "protected", "throw", "byte", "else",
   1.246 +    "import", "public", "throws", "case", "enum", "instanceof", "return",
   1.247 +    "transient", "catch", "extends", "int", "short", "try", "char", "final",
   1.248 +    "interface", "static", "void", "class", "finally", "long", "strictfp",
   1.249 +    "volatile", "const", "float", "native", "super", "while", NULL
   1.250 +};
   1.251 +
   1.252 +void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   1.253 +    /* reset buffers without clearing them */
   1.254 +    hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   1.255 +    hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   1.256 +
   1.257 +    /* alias the buffers for better handling */
   1.258 +    UcxBuffer *wbuf = hd->primary_buffer;
   1.259 +    
   1.260 +    /* local information */
   1.261 +    size_t sp = (size_t)-1;
   1.262 +    int isstring = 0, iscomment = 0, isimport = 0;
   1.263 +    char quote = '\0';
   1.264 +    int isescaping = 0;
   1.265 +
   1.266 +    if (hd->multiline_comment) {
   1.267 +        iscomment = 1;
   1.268 +        ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   1.269 +    }
   1.270 +
   1.271 +    char c;
   1.272 +    do {
   1.273 +        c = src[++sp];
   1.274 +        if (!c) break;
   1.275 +        
   1.276 +        /* comments */
   1.277 +        if (!isstring && c == '/') {
   1.278 +            if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   1.279 +                iscomment = 0;
   1.280 +                hd->multiline_comment = 0;
   1.281 +                ucx_buffer_puts(dest, "/</span>");
   1.282 +                continue;
   1.283 +            } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   1.284 +                iscomment = 1;
   1.285 +                hd->multiline_comment = (src[sp+1] == '*');
   1.286 +                ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   1.287 +            }
   1.288 +        }
   1.289 +
   1.290 +        if (iscomment) {
   1.291 +            if (c == '\n') {
   1.292 +                ucx_buffer_puts(dest, "</span>\n");
   1.293 +            } else {
   1.294 +                put_htmlescaped(dest, c);
   1.295 +            }
   1.296 +        } else if (isimport) {
   1.297 +            /* TODO: local imports */
   1.298 +        } else {
   1.299 +            /* strings */
   1.300 +            if (!isescaping && (c == '\'' || c == '\"')) {
   1.301 +                if (isstring) {
   1.302 +                    put_htmlescaped(dest, c);
   1.303 +                    if (c == quote) {
   1.304 +                        isstring = 0;
   1.305 +                        ucx_buffer_puts(dest, "</span>");
   1.306 +                    } else {
   1.307 +                        put_htmlescaped(dest, c);
   1.308 +                    }
   1.309 +                } else {
   1.310 +                    isstring = 1;
   1.311 +                    quote = c;
   1.312 +                    ucx_buffer_puts(dest,
   1.313 +                        "<span class=\"c2html-string\">");
   1.314 +                    put_htmlescaped(dest, c);
   1.315 +                }
   1.316 +            } else {
   1.317 +                if (isstring) {
   1.318 +                    put_htmlescaped(dest, c);
   1.319 +                } else if (!isalnum(c) && c!='_' && c!='@') {
   1.320 +                    /* write buffered word, if any */
   1.321 +                    if (wbuf->size > 0) {
   1.322 +                        sstr_t word = sstrn(wbuf->space, wbuf->size);
   1.323 +                        int closespan = 1;
   1.324 +                        if (check_keyword(word, jkeywords)) {
   1.325 +                            ucx_buffer_puts(dest,
   1.326 +                                "<span class=\"c2html-keyword\">");
   1.327 +                        } else if (isupper(word.ptr[0])) {
   1.328 +                            ucx_buffer_puts(dest,
   1.329 +                                "<span class=\"c2html-type\">");
   1.330 +                        } else if (word.ptr[0] == '@') {
   1.331 +                            ucx_buffer_puts(dest,
   1.332 +                                "<span class=\"c2html-directive\">");
   1.333 +                        } else if (check_capsonly(word)) {
   1.334 +                            ucx_buffer_puts(dest,
   1.335 +                                "<span class=\"c2html-macroconst\">");
   1.336 +                        } else {
   1.337 +                            closespan = 0;
   1.338 +                        }
   1.339 +                        put_htmlescapedstr(dest, word);
   1.340 +                        
   1.341 +                        if (closespan) {
   1.342 +                            ucx_buffer_puts(dest, "</span>");
   1.343 +                        }
   1.344 +                    }
   1.345 +                    wbuf->pos = wbuf->size = 0; /* reset buffer */
   1.346 +                    
   1.347 +                    /* write current character */
   1.348 +                    put_htmlescaped(dest, c);
   1.349 +                } else {
   1.350 +                    /* buffer the current word */
   1.351 +                    ucx_buffer_putc(wbuf, c);
   1.352 +                }
   1.353 +            }
   1.354 +
   1.355 +            isescaping = !isescaping & (c == '\\');
   1.356 +        }
   1.357 +    } while (c != '\n');
   1.358 +}
   1.359 +

mercurial