src/highlighter.c

Fri, 26 Aug 2016 13:49:19 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 26 Aug 2016 13:49:19 +0200
changeset 52
33ded421c512
parent 51
src/codegens.c@f25ba6fd7a08
child 53
5e47a26a16f0
permissions
-rw-r--r--

merges all highlighter functions into one highlighter module

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2016 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include "highlighter.h"
    32 HighlighterData* new_highlighter_data() {
    33     HighlighterData* hd = malloc(sizeof(HighlighterData));
    34     if (hd == NULL) {
    35         return NULL;
    36     } else {
    37         hd->multiline_comment = 0;
    38         hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
    39         hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
    40         return hd;
    41     }
    42 }
    44 void free_highlighter_data(HighlighterData *hd) {
    45     ucx_buffer_free(hd->primary_buffer);
    46     ucx_buffer_free(hd->secondary_buffer);
    47     free(hd);
    48 }
    50 static void put_htmlescaped(UcxBuffer *dest, char c) {
    51     if (c == '>') {
    52         ucx_buffer_puts(dest, "&gt;");
    53     } else if (c == '<') {
    54         ucx_buffer_puts(dest, "&lt;");
    55     } else {
    56         ucx_buffer_putc(dest, c);
    57     }
    58 }
    60 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
    61     for (int i = 0 ; i < s.length ; i++) {
    62         put_htmlescaped(dest, s.ptr[i]);
    63     }
    64 }
    66 static int check_keyword(sstr_t word, const char** keywords) {
    67     for (int i = 0 ; keywords[i] ; i++) {
    68         if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
    69             return 1;
    70         }
    71     }
    72     return 0;
    73 }
    75 static int check_capsonly(sstr_t word) {
    76     for (size_t i = 0 ; i < word.length ; i++) {
    77         if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
    78                 && word.ptr[i] != '_') {
    79             return 0;
    80         }
    81     }
    82     return 1;
    83 }
    85 /* Plaintext Highlighter */
    87 void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
    88     while (*src && *src != '\n') {
    89         put_htmlescaped(dest, *src);
    90         src++;
    91     }
    92     ucx_buffer_putc(dest, '\n');
    93 }
    95 /* C Highlighter */
    97 static const char* ckeywords[] = {
    98     "auto", "break", "case", "char", "const", "continue", "default", "do",
    99     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
   100     "long", "register", "return", "short", "signed", "sizeof", "static",
   101     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
   102     "while", NULL
   103 };
   105 void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   106     /* reset buffers without clearing them */
   107     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   108     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   110     /* alias the buffers for better handling */
   111     UcxBuffer *wbuf = hd->primary_buffer;
   112     UcxBuffer *ifilebuf = hd->secondary_buffer;
   114     /* local information */
   115     size_t sp = (size_t)-1;
   116     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   117     char quote = '\0';
   118     int isescaping = 0;
   120     /* continue a multi line comment highlighting */
   121     if (hd->multiline_comment) {
   122         iscomment = 1;
   123         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   124     }
   126     char c;
   127     do {
   128         c = src[++sp];
   129         if (!c) break;
   131         /* comments */
   132         if (!isstring && c == '/') {
   133             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   134                 iscomment = 0;
   135                 hd->multiline_comment = 0;
   136                 ucx_buffer_puts(dest, "/</span>");
   137                 continue;
   138             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   139                 iscomment = 1;
   140                 hd->multiline_comment = (src[sp+1] == '*');
   141                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   142             }
   143         }
   145         if (iscomment) {
   146             if (c == '\n') {
   147                 ucx_buffer_puts(dest, "</span>\n");
   148             } else {
   149                 put_htmlescaped(dest, c);
   150             }
   151         } else if (isinclude) {
   152             if (c == '<') {
   153                 ucx_buffer_puts(dest,
   154                         "<span class=\"c2html-stdinclude\">&lt;");
   155             } else if (c == '\"') {
   156                 if (parseinclude) {
   157                     ucx_buffer_puts(dest, "\">");
   158                     ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
   159                     ucx_buffer_puts(dest, "\"</a>");
   160                     parseinclude = 0;
   161                 } else {
   162                     ucx_buffer_puts(dest,
   163                             "<a class=\"c2html-userinclude\" href=\"");
   164                     ucx_buffer_putc(ifilebuf, '\"');
   165                     parseinclude = 1;
   166                 }
   167             } else if (c == '>') {
   168                 ucx_buffer_puts(dest,  "&gt;</span>");
   169             } else {
   170                 if (parseinclude) {
   171                     ucx_buffer_putc(ifilebuf, c);
   172                 }
   173                 put_htmlescaped(dest, c);
   174             }
   175         } else {
   176             /* strings */
   177             if (!isescaping && (c == '\'' || c == '\"')) {
   178                 if (isstring) {
   179                     put_htmlescaped(dest, c);
   180                     if (c == quote) {
   181                         isstring = 0;
   182                         ucx_buffer_puts(dest, "</span>");
   183                     } else {
   184                         put_htmlescaped(dest, c);
   185                     }
   186                 } else {
   187                     isstring = 1;
   188                     quote = c;
   189                     ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
   190                     put_htmlescaped(dest, c);
   191                 }
   192             } else {
   193                 if (isstring) {
   194                     put_htmlescaped(dest, c);
   195                 } else if (!isalnum(c) && c!='_' && c!='#') {
   196                     /* write buffered word, if any */
   197                     if (wbuf->size > 0) {
   198                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   199                         int closespan = 1;
   200                         sstr_t typesuffix = ST("_t");
   201                         if (check_keyword(word, ckeywords)) {
   202                             ucx_buffer_puts(dest,
   203                                     "<span class=\"c2html-keyword\">");
   204                         } else if (sstrsuffix(word, typesuffix)) {
   205                             ucx_buffer_puts(dest,
   206                                 "<span class=\"c2html-type\">");
   207                         } else if (word.ptr[0] == '#') {
   208                             isinclude = !sstrcmp(word, S("#include"));
   209                             ucx_buffer_puts(dest,
   210                                 "<span class=\"c2html-directive\">");
   211                         } else if (check_capsonly(word)) {
   212                             ucx_buffer_puts(dest,
   213                                 "<span class=\"c2html-macroconst\">");
   214                         } else {
   215                             closespan = 0;
   216                         }
   217                         put_htmlescapedstr(dest, word);
   218                         if (closespan) {
   219                             ucx_buffer_puts(dest, "</span>");
   220                         }
   221                     }
   222                     wbuf->pos = wbuf->size = 0; /* reset word buffer */
   224                     /* write current character */
   225                     put_htmlescaped(dest, c);
   226                 } else {
   227                     /* buffer the current word */
   228                     ucx_buffer_putc(wbuf, c);
   229                 }
   230             }
   232             isescaping = !isescaping & (c == '\\');
   233         }
   234     } while (c != '\n');
   235 }
   237 /* Java Highlighter */
   239 static const char* jkeywords[] = {
   240     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
   241     "package", "synchronized", "boolean", "do", "if", "private", "this",
   242     "break", "double", "implements", "protected", "throw", "byte", "else",
   243     "import", "public", "throws", "case", "enum", "instanceof", "return",
   244     "transient", "catch", "extends", "int", "short", "try", "char", "final",
   245     "interface", "static", "void", "class", "finally", "long", "strictfp",
   246     "volatile", "const", "float", "native", "super", "while", NULL
   247 };
   249 void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   250     /* reset buffers without clearing them */
   251     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   252     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   254     /* alias the buffers for better handling */
   255     UcxBuffer *wbuf = hd->primary_buffer;
   257     /* local information */
   258     size_t sp = (size_t)-1;
   259     int isstring = 0, iscomment = 0, isimport = 0;
   260     char quote = '\0';
   261     int isescaping = 0;
   263     if (hd->multiline_comment) {
   264         iscomment = 1;
   265         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   266     }
   268     char c;
   269     do {
   270         c = src[++sp];
   271         if (!c) break;
   273         /* comments */
   274         if (!isstring && c == '/') {
   275             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   276                 iscomment = 0;
   277                 hd->multiline_comment = 0;
   278                 ucx_buffer_puts(dest, "/</span>");
   279                 continue;
   280             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   281                 iscomment = 1;
   282                 hd->multiline_comment = (src[sp+1] == '*');
   283                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   284             }
   285         }
   287         if (iscomment) {
   288             if (c == '\n') {
   289                 ucx_buffer_puts(dest, "</span>\n");
   290             } else {
   291                 put_htmlescaped(dest, c);
   292             }
   293         } else if (isimport) {
   294             /* TODO: local imports */
   295         } else {
   296             /* strings */
   297             if (!isescaping && (c == '\'' || c == '\"')) {
   298                 if (isstring) {
   299                     put_htmlescaped(dest, c);
   300                     if (c == quote) {
   301                         isstring = 0;
   302                         ucx_buffer_puts(dest, "</span>");
   303                     } else {
   304                         put_htmlescaped(dest, c);
   305                     }
   306                 } else {
   307                     isstring = 1;
   308                     quote = c;
   309                     ucx_buffer_puts(dest,
   310                         "<span class=\"c2html-string\">");
   311                     put_htmlescaped(dest, c);
   312                 }
   313             } else {
   314                 if (isstring) {
   315                     put_htmlescaped(dest, c);
   316                 } else if (!isalnum(c) && c!='_' && c!='@') {
   317                     /* write buffered word, if any */
   318                     if (wbuf->size > 0) {
   319                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   320                         int closespan = 1;
   321                         if (check_keyword(word, jkeywords)) {
   322                             ucx_buffer_puts(dest,
   323                                 "<span class=\"c2html-keyword\">");
   324                         } else if (isupper(word.ptr[0])) {
   325                             ucx_buffer_puts(dest,
   326                                 "<span class=\"c2html-type\">");
   327                         } else if (word.ptr[0] == '@') {
   328                             ucx_buffer_puts(dest,
   329                                 "<span class=\"c2html-directive\">");
   330                         } else if (check_capsonly(word)) {
   331                             ucx_buffer_puts(dest,
   332                                 "<span class=\"c2html-macroconst\">");
   333                         } else {
   334                             closespan = 0;
   335                         }
   336                         put_htmlescapedstr(dest, word);
   338                         if (closespan) {
   339                             ucx_buffer_puts(dest, "</span>");
   340                         }
   341                     }
   342                     wbuf->pos = wbuf->size = 0; /* reset buffer */
   344                     /* write current character */
   345                     put_htmlescaped(dest, c);
   346                 } else {
   347                     /* buffer the current word */
   348                     ucx_buffer_putc(wbuf, c);
   349                 }
   350             }
   352             isescaping = !isescaping & (c == '\\');
   353         }
   354     } while (c != '\n');
   355 }

mercurial