src/highlighter.c

Fri, 26 Aug 2016 14:15:29 +0200

author
Mike Becker <universe@uap-core.de>
date
Fri, 26 Aug 2016 14:15:29 +0200
changeset 53
5e47a26a16f0
parent 52
33ded421c512
child 54
b3f24e23bc25
permissions
-rw-r--r--

adds appendfile() function to main module + adds TODOs for source files which do not terminate with a blank line

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2016 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include "highlighter.h"
    32 HighlighterData* new_highlighter_data() {
    33     HighlighterData* hd = malloc(sizeof(HighlighterData));
    34     if (hd == NULL) {
    35         return NULL;
    36     } else {
    37         hd->multiline_comment = 0;
    38         hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
    39         hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
    40         return hd;
    41     }
    42 }
    44 void free_highlighter_data(HighlighterData *hd) {
    45     ucx_buffer_free(hd->primary_buffer);
    46     ucx_buffer_free(hd->secondary_buffer);
    47     free(hd);
    48 }
    50 static void put_htmlescaped(UcxBuffer *dest, char c) {
    51     if (c == '>') {
    52         ucx_buffer_puts(dest, "&gt;");
    53     } else if (c == '<') {
    54         ucx_buffer_puts(dest, "&lt;");
    55     } else {
    56         ucx_buffer_putc(dest, c);
    57     }
    58 }
    60 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
    61     for (int i = 0 ; i < s.length ; i++) {
    62         put_htmlescaped(dest, s.ptr[i]);
    63     }
    64 }
    66 static int check_keyword(sstr_t word, const char** keywords) {
    67     for (int i = 0 ; keywords[i] ; i++) {
    68         if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
    69             return 1;
    70         }
    71     }
    72     return 0;
    73 }
    75 static int check_capsonly(sstr_t word) {
    76     for (size_t i = 0 ; i < word.length ; i++) {
    77         if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
    78                 && word.ptr[i] != '_') {
    79             return 0;
    80         }
    81     }
    82     return 1;
    83 }
    85 /* Plaintext Highlighter */
    87 void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
    88     while (*src && *src != '\n') {
    89         put_htmlescaped(dest, *src);
    90         src++;
    91     }
    92     ucx_buffer_putc(dest, '\n');
    93 }
    95 /* C Highlighter */
    97 static const char* ckeywords[] = {
    98     "auto", "break", "case", "char", "const", "continue", "default", "do",
    99     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
   100     "long", "register", "return", "short", "signed", "sizeof", "static",
   101     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
   102     "while", NULL
   103 };
   105 void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   106     /* reset buffers without clearing them */
   107     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   108     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   110     /* alias the buffers for better handling */
   111     UcxBuffer *wbuf = hd->primary_buffer;
   112     UcxBuffer *ifilebuf = hd->secondary_buffer;
   114     /* local information */
   115     size_t sp = (size_t)-1;
   116     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   117     char quote = '\0';
   118     int isescaping = 0;
   120     /* continue a multi line comment highlighting */
   121     if (hd->multiline_comment) {
   122         iscomment = 1;
   123         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   124     }
   126     char c;
   127     do {
   128         c = src[++sp];
   129         if (!c) {
   130             /* TODO: might cause problems if code file does not end with NL */
   131             break;
   132         }
   134         /* comments */
   135         if (!isstring && c == '/') {
   136             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   137                 iscomment = 0;
   138                 hd->multiline_comment = 0;
   139                 ucx_buffer_puts(dest, "/</span>");
   140                 continue;
   141             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   142                 iscomment = 1;
   143                 hd->multiline_comment = (src[sp+1] == '*');
   144                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   145             }
   146         }
   148         if (iscomment) {
   149             if (c == '\n') {
   150                 ucx_buffer_puts(dest, "</span>\n");
   151             } else {
   152                 put_htmlescaped(dest, c);
   153             }
   154         } else if (isinclude) {
   155             if (c == '<') {
   156                 ucx_buffer_puts(dest,
   157                         "<span class=\"c2html-stdinclude\">&lt;");
   158             } else if (c == '\"') {
   159                 if (parseinclude) {
   160                     ucx_buffer_puts(dest, "\">");
   161                     ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
   162                     ucx_buffer_puts(dest, "\"</a>");
   163                     parseinclude = 0;
   164                 } else {
   165                     ucx_buffer_puts(dest,
   166                             "<a class=\"c2html-userinclude\" href=\"");
   167                     ucx_buffer_putc(ifilebuf, '\"');
   168                     parseinclude = 1;
   169                 }
   170             } else if (c == '>') {
   171                 ucx_buffer_puts(dest,  "&gt;</span>");
   172             } else {
   173                 if (parseinclude) {
   174                     ucx_buffer_putc(ifilebuf, c);
   175                 }
   176                 put_htmlescaped(dest, c);
   177             }
   178         } else {
   179             /* strings */
   180             if (!isescaping && (c == '\'' || c == '\"')) {
   181                 if (isstring) {
   182                     put_htmlescaped(dest, c);
   183                     if (c == quote) {
   184                         isstring = 0;
   185                         ucx_buffer_puts(dest, "</span>");
   186                     } else {
   187                         put_htmlescaped(dest, c);
   188                     }
   189                 } else {
   190                     isstring = 1;
   191                     quote = c;
   192                     ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
   193                     put_htmlescaped(dest, c);
   194                 }
   195             } else {
   196                 if (isstring) {
   197                     put_htmlescaped(dest, c);
   198                 } else if (!isalnum(c) && c!='_' && c!='#') {
   199                     /* write buffered word, if any */
   200                     if (wbuf->size > 0) {
   201                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   202                         int closespan = 1;
   203                         sstr_t typesuffix = ST("_t");
   204                         if (check_keyword(word, ckeywords)) {
   205                             ucx_buffer_puts(dest,
   206                                     "<span class=\"c2html-keyword\">");
   207                         } else if (sstrsuffix(word, typesuffix)) {
   208                             ucx_buffer_puts(dest,
   209                                 "<span class=\"c2html-type\">");
   210                         } else if (word.ptr[0] == '#') {
   211                             isinclude = !sstrcmp(word, S("#include"));
   212                             ucx_buffer_puts(dest,
   213                                 "<span class=\"c2html-directive\">");
   214                         } else if (check_capsonly(word)) {
   215                             ucx_buffer_puts(dest,
   216                                 "<span class=\"c2html-macroconst\">");
   217                         } else {
   218                             closespan = 0;
   219                         }
   220                         put_htmlescapedstr(dest, word);
   221                         if (closespan) {
   222                             ucx_buffer_puts(dest, "</span>");
   223                         }
   224                     }
   225                     wbuf->pos = wbuf->size = 0; /* reset word buffer */
   227                     /* write current character */
   228                     put_htmlescaped(dest, c);
   229                 } else {
   230                     /* buffer the current word */
   231                     ucx_buffer_putc(wbuf, c);
   232                 }
   233             }
   235             isescaping = !isescaping & (c == '\\');
   236         }
   237     } while (c != '\n');
   238 }
   240 /* Java Highlighter */
   242 static const char* jkeywords[] = {
   243     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
   244     "package", "synchronized", "boolean", "do", "if", "private", "this",
   245     "break", "double", "implements", "protected", "throw", "byte", "else",
   246     "import", "public", "throws", "case", "enum", "instanceof", "return",
   247     "transient", "catch", "extends", "int", "short", "try", "char", "final",
   248     "interface", "static", "void", "class", "finally", "long", "strictfp",
   249     "volatile", "const", "float", "native", "super", "while", NULL
   250 };
   252 void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   253     /* reset buffers without clearing them */
   254     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   255     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   257     /* alias the buffers for better handling */
   258     UcxBuffer *wbuf = hd->primary_buffer;
   260     /* local information */
   261     size_t sp = (size_t)-1;
   262     int isstring = 0, iscomment = 0, isimport = 0;
   263     char quote = '\0';
   264     int isescaping = 0;
   266     if (hd->multiline_comment) {
   267         iscomment = 1;
   268         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   269     }
   271     char c;
   272     do {
   273         c = src[++sp];
   274         if (!c) {
   275             /* TODO: might cause problems if code file does not end with NL */
   276             break;
   277         }
   279         /* comments */
   280         if (!isstring && c == '/') {
   281             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   282                 iscomment = 0;
   283                 hd->multiline_comment = 0;
   284                 ucx_buffer_puts(dest, "/</span>");
   285                 continue;
   286             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   287                 iscomment = 1;
   288                 hd->multiline_comment = (src[sp+1] == '*');
   289                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   290             }
   291         }
   293         if (iscomment) {
   294             if (c == '\n') {
   295                 ucx_buffer_puts(dest, "</span>\n");
   296             } else {
   297                 put_htmlescaped(dest, c);
   298             }
   299         } else if (isimport) {
   300             /* TODO: local imports */
   301         } else {
   302             /* strings */
   303             if (!isescaping && (c == '\'' || c == '\"')) {
   304                 if (isstring) {
   305                     put_htmlescaped(dest, c);
   306                     if (c == quote) {
   307                         isstring = 0;
   308                         ucx_buffer_puts(dest, "</span>");
   309                     } else {
   310                         put_htmlescaped(dest, c);
   311                     }
   312                 } else {
   313                     isstring = 1;
   314                     quote = c;
   315                     ucx_buffer_puts(dest,
   316                         "<span class=\"c2html-string\">");
   317                     put_htmlescaped(dest, c);
   318                 }
   319             } else {
   320                 if (isstring) {
   321                     put_htmlescaped(dest, c);
   322                 } else if (!isalnum(c) && c!='_' && c!='@') {
   323                     /* write buffered word, if any */
   324                     if (wbuf->size > 0) {
   325                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   326                         int closespan = 1;
   327                         if (check_keyword(word, jkeywords)) {
   328                             ucx_buffer_puts(dest,
   329                                 "<span class=\"c2html-keyword\">");
   330                         } else if (isupper(word.ptr[0])) {
   331                             ucx_buffer_puts(dest,
   332                                 "<span class=\"c2html-type\">");
   333                         } else if (word.ptr[0] == '@') {
   334                             ucx_buffer_puts(dest,
   335                                 "<span class=\"c2html-directive\">");
   336                         } else if (check_capsonly(word)) {
   337                             ucx_buffer_puts(dest,
   338                                 "<span class=\"c2html-macroconst\">");
   339                         } else {
   340                             closespan = 0;
   341                         }
   342                         put_htmlescapedstr(dest, word);
   344                         if (closespan) {
   345                             ucx_buffer_puts(dest, "</span>");
   346                         }
   347                     }
   348                     wbuf->pos = wbuf->size = 0; /* reset buffer */
   350                     /* write current character */
   351                     put_htmlescaped(dest, c);
   352                 } else {
   353                     /* buffer the current word */
   354                     ucx_buffer_putc(wbuf, c);
   355                 }
   356             }
   358             isescaping = !isescaping & (c == '\\');
   359         }
   360     } while (c != '\n');
   361 }

mercurial