src/highlighter.c

Wed, 31 Aug 2016 14:41:56 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2016 14:41:56 +0200
changeset 55
bf54085ce341
parent 54
b3f24e23bc25
child 57
eba880c1705c
permissions
-rw-r--r--

adds appropriate public API

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2016 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include "highlighter.h"
    32 #include <stdlib.h>
    33 #include <stdio.h>
    34 #include <string.h>
    35 #include <ctype.h>
    36 #include "ucx/string.h"
    37 #include "ucx/utils.h"
    39 static void put_htmlescaped(UcxBuffer *dest, char c) {
    40     if (c == '>') {
    41         ucx_buffer_puts(dest, "&gt;");
    42     } else if (c == '<') {
    43         ucx_buffer_puts(dest, "&lt;");
    44     } else if (c) {
    45         ucx_buffer_putc(dest, c);
    46     }
    47 }
    49 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
    50     for (int i = 0 ; i < s.length ; i++) {
    51         put_htmlescaped(dest, s.ptr[i]);
    52     }
    53 }
    55 static int check_keyword(sstr_t word, const char** keywords) {
    56     for (int i = 0 ; keywords[i] ; i++) {
    57         if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
    58             return 1;
    59         }
    60     }
    61     return 0;
    62 }
    64 static int check_capsonly(sstr_t word) {
    65     for (size_t i = 0 ; i < word.length ; i++) {
    66         if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
    67                 && word.ptr[i] != '_') {
    68             return 0;
    69         }
    70     }
    71     return 1;
    72 }
    74 /* Plaintext Highlighter */
    76 void c2html_plain_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
    77     while (*src && *src != '\n') {
    78         put_htmlescaped(dest, *src);
    79         src++;
    80     }
    81     ucx_buffer_putc(dest, '\n');
    82 }
    84 /* C Highlighter */
    86 static const char* ckeywords[] = {
    87     "auto", "break", "case", "char", "const", "continue", "default", "do",
    88     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
    89     "long", "register", "return", "short", "signed", "sizeof", "static",
    90     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
    91     "while", NULL
    92 };
    94 void c2html_c_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
    95     /* reset buffers without clearing them */
    96     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
    97     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
    99     /* alias the buffers for better handling */
   100     UcxBuffer *wbuf = hd->primary_buffer;
   101     UcxBuffer *ifilebuf = hd->secondary_buffer;
   103     /* local information */
   104     size_t sp = (size_t)-1;
   105     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   106     char quote = '\0';
   107     int isescaping = 0;
   109     /* continue a multi line comment highlighting */
   110     if (hd->multiline_comment) {
   111         iscomment = 1;
   112         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   113     }
   115     char c;
   116     do {
   117         c = src[++sp];
   119         /* comments */
   120         if (!isstring && c == '/') {
   121             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   122                 iscomment = 0;
   123                 hd->multiline_comment = 0;
   124                 ucx_buffer_puts(dest, "/</span>");
   125                 continue;
   126             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   127                 iscomment = 1;
   128                 hd->multiline_comment = (src[sp+1] == '*');
   129                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   130             }
   131         }
   133         if (iscomment) {
   134             if (c == '\n') {
   135                 ucx_buffer_puts(dest, "</span>\n");
   136             } else {
   137                 put_htmlescaped(dest, c);
   138             }
   139         } else if (isinclude) {
   140             if (c == '<') {
   141                 ucx_buffer_puts(dest,
   142                         "<span class=\"c2html-stdinclude\">&lt;");
   143             } else if (c == '\"') {
   144                 if (parseinclude) {
   145                     ucx_buffer_puts(dest, "\">");
   146                     ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
   147                     ucx_buffer_puts(dest, "\"</a>");
   148                     parseinclude = 0;
   149                 } else {
   150                     ucx_buffer_puts(dest,
   151                             "<a class=\"c2html-userinclude\" href=\"");
   152                     ucx_buffer_putc(ifilebuf, '\"');
   153                     parseinclude = 1;
   154                 }
   155             } else if (c == '>') {
   156                 ucx_buffer_puts(dest,  "&gt;</span>");
   157             } else {
   158                 if (parseinclude) {
   159                     ucx_buffer_putc(ifilebuf, c);
   160                 }
   161                 put_htmlescaped(dest, c);
   162             }
   163         } else {
   164             /* strings */
   165             if (!isescaping && (c == '\'' || c == '\"')) {
   166                 if (isstring) {
   167                     put_htmlescaped(dest, c);
   168                     if (c == quote) {
   169                         isstring = 0;
   170                         ucx_buffer_puts(dest, "</span>");
   171                     } else {
   172                         put_htmlescaped(dest, c);
   173                     }
   174                 } else {
   175                     isstring = 1;
   176                     quote = c;
   177                     ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
   178                     put_htmlescaped(dest, c);
   179                 }
   180             } else {
   181                 if (isstring) {
   182                     put_htmlescaped(dest, c);
   183                 } else if (isalnum(c) ||  c == '_' || c == '#') {
   184                     /* buffer the current word */
   185                     ucx_buffer_putc(wbuf, c);
   186                 } else {
   187                     /* write buffered word, if any */
   188                     if (wbuf->size > 0) {
   189                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   190                         int closespan = 1;
   191                         sstr_t typesuffix = ST("_t");
   192                         if (check_keyword(word, ckeywords)) {
   193                             ucx_buffer_puts(dest,
   194                                     "<span class=\"c2html-keyword\">");
   195                         } else if (sstrsuffix(word, typesuffix)) {
   196                             ucx_buffer_puts(dest,
   197                                 "<span class=\"c2html-type\">");
   198                         } else if (word.ptr[0] == '#') {
   199                             isinclude = !sstrcmp(word, S("#include"));
   200                             ucx_buffer_puts(dest,
   201                                 "<span class=\"c2html-directive\">");
   202                         } else if (check_capsonly(word)) {
   203                             ucx_buffer_puts(dest,
   204                                 "<span class=\"c2html-macroconst\">");
   205                         } else {
   206                             closespan = 0;
   207                         }
   208                         put_htmlescapedstr(dest, word);
   209                         if (closespan) {
   210                             ucx_buffer_puts(dest, "</span>");
   211                         }
   212                     }
   213                     wbuf->pos = wbuf->size = 0; /* reset word buffer */
   215                     /* write current character */
   216                     put_htmlescaped(dest, c);
   217                 }
   218             }
   220             isescaping = !isescaping & (c == '\\');
   221         }
   222     } while (c && c != '\n');
   223 }
   225 /* Java Highlighter */
   227 static const char* jkeywords[] = {
   228     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
   229     "package", "synchronized", "boolean", "do", "if", "private", "this",
   230     "break", "double", "implements", "protected", "throw", "byte", "else",
   231     "import", "public", "throws", "case", "enum", "instanceof", "return",
   232     "transient", "catch", "extends", "int", "short", "try", "char", "final",
   233     "interface", "static", "void", "class", "finally", "long", "strictfp",
   234     "volatile", "const", "float", "native", "super", "while", NULL
   235 };
   237 void c2html_java_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
   238     /* reset buffers without clearing them */
   239     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   240     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   242     /* alias the buffers for better handling */
   243     UcxBuffer *wbuf = hd->primary_buffer;
   245     /* local information */
   246     size_t sp = (size_t)-1;
   247     int isstring = 0, iscomment = 0, isimport = 0;
   248     char quote = '\0';
   249     int isescaping = 0;
   251     if (hd->multiline_comment) {
   252         iscomment = 1;
   253         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   254     }
   256     char c;
   257     do {
   258         c = src[++sp];
   260         /* comments */
   261         if (!isstring && c == '/') {
   262             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   263                 iscomment = 0;
   264                 hd->multiline_comment = 0;
   265                 ucx_buffer_puts(dest, "/</span>");
   266                 continue;
   267             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   268                 iscomment = 1;
   269                 hd->multiline_comment = (src[sp+1] == '*');
   270                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   271             }
   272         }
   274         if (iscomment) {
   275             if (c == '\n') {
   276                 ucx_buffer_puts(dest, "</span>\n");
   277             } else {
   278                 put_htmlescaped(dest, c);
   279             }
   280         } else if (isimport) {
   281             /* TODO: local imports */
   282         } else {
   283             /* strings */
   284             if (!isescaping && (c == '\'' || c == '\"')) {
   285                 if (isstring) {
   286                     put_htmlescaped(dest, c);
   287                     if (c == quote) {
   288                         isstring = 0;
   289                         ucx_buffer_puts(dest, "</span>");
   290                     } else {
   291                         put_htmlescaped(dest, c);
   292                     }
   293                 } else {
   294                     isstring = 1;
   295                     quote = c;
   296                     ucx_buffer_puts(dest,
   297                         "<span class=\"c2html-string\">");
   298                     put_htmlescaped(dest, c);
   299                 }
   300             } else {
   301                 if (isstring) {
   302                     put_htmlescaped(dest, c);
   303                 } else if (isalnum(c) || c == '_' || c == '@') {
   304                     /* buffer the current word */
   305                     ucx_buffer_putc(wbuf, c);
   306                 } else {
   307                     /* write buffered word, if any */
   308                     if (wbuf->size > 0) {
   309                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   310                         int closespan = 1;
   311                         if (check_keyword(word, jkeywords)) {
   312                             ucx_buffer_puts(dest,
   313                                 "<span class=\"c2html-keyword\">");
   314                         } else if (isupper(word.ptr[0])) {
   315                             ucx_buffer_puts(dest,
   316                                 "<span class=\"c2html-type\">");
   317                         } else if (word.ptr[0] == '@') {
   318                             ucx_buffer_puts(dest,
   319                                 "<span class=\"c2html-directive\">");
   320                         } else if (check_capsonly(word)) {
   321                             ucx_buffer_puts(dest,
   322                                 "<span class=\"c2html-macroconst\">");
   323                         } else {
   324                             closespan = 0;
   325                         }
   326                         put_htmlescapedstr(dest, word);
   328                         if (closespan) {
   329                             ucx_buffer_puts(dest, "</span>");
   330                         }
   331                     }
   332                     wbuf->pos = wbuf->size = 0; /* reset buffer */
   334                     /* write current character */
   335                     put_htmlescaped(dest, c);
   336                 }
   337             }
   339             isescaping = !isescaping & (c == '\\');
   340         }
   341     } while (c && c != '\n');
   342 }

mercurial