src/highlighter.c

Wed, 31 Aug 2016 12:58:48 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2016 12:58:48 +0200
changeset 54
b3f24e23bc25
parent 53
5e47a26a16f0
child 55
bf54085ce341
permissions
-rw-r--r--

highlighter can now handle files which do not end with a blank line

     1 /*
     2  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
     3  *
     4  * Copyright 2016 Mike Becker. All rights reserved.
     5  *
     6  * Redistribution and use in source and binary forms, with or without
     7  * modification, are permitted provided that the following conditions are met:
     8  *
     9  *   1. Redistributions of source code must retain the above copyright
    10  *      notice, this list of conditions and the following disclaimer.
    11  *
    12  *   2. Redistributions in binary form must reproduce the above copyright
    13  *      notice, this list of conditions and the following disclaimer in the
    14  *      documentation and/or other materials provided with the distribution.
    15  *
    16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
    20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    26  * POSSIBILITY OF SUCH DAMAGE.
    27  *
    28  */
    30 #include "highlighter.h"
    32 HighlighterData* new_highlighter_data() {
    33     HighlighterData* hd = malloc(sizeof(HighlighterData));
    34     if (hd == NULL) {
    35         return NULL;
    36     } else {
    37         hd->multiline_comment = 0;
    38         hd->primary_buffer = ucx_buffer_new(NULL, 256, UCX_BUFFER_AUTOEXTEND);
    39         hd->secondary_buffer = ucx_buffer_new(NULL, 32, UCX_BUFFER_AUTOEXTEND);
    40         return hd;
    41     }
    42 }
    44 void free_highlighter_data(HighlighterData *hd) {
    45     ucx_buffer_free(hd->primary_buffer);
    46     ucx_buffer_free(hd->secondary_buffer);
    47     free(hd);
    48 }
    50 static void put_htmlescaped(UcxBuffer *dest, char c) {
    51     if (c == '>') {
    52         ucx_buffer_puts(dest, "&gt;");
    53     } else if (c == '<') {
    54         ucx_buffer_puts(dest, "&lt;");
    55     } else if (c) {
    56         ucx_buffer_putc(dest, c);
    57     }
    58 }
    60 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
    61     for (int i = 0 ; i < s.length ; i++) {
    62         put_htmlescaped(dest, s.ptr[i]);
    63     }
    64 }
    66 static int check_keyword(sstr_t word, const char** keywords) {
    67     for (int i = 0 ; keywords[i] ; i++) {
    68         if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
    69             return 1;
    70         }
    71     }
    72     return 0;
    73 }
    75 static int check_capsonly(sstr_t word) {
    76     for (size_t i = 0 ; i < word.length ; i++) {
    77         if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
    78                 && word.ptr[i] != '_') {
    79             return 0;
    80         }
    81     }
    82     return 1;
    83 }
    85 /* Plaintext Highlighter */
    87 void plain_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
    88     while (*src && *src != '\n') {
    89         put_htmlescaped(dest, *src);
    90         src++;
    91     }
    92     ucx_buffer_putc(dest, '\n');
    93 }
    95 /* C Highlighter */
    97 static const char* ckeywords[] = {
    98     "auto", "break", "case", "char", "const", "continue", "default", "do",
    99     "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
   100     "long", "register", "return", "short", "signed", "sizeof", "static",
   101     "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
   102     "while", NULL
   103 };
   105 void c_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   106     /* reset buffers without clearing them */
   107     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   108     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   110     /* alias the buffers for better handling */
   111     UcxBuffer *wbuf = hd->primary_buffer;
   112     UcxBuffer *ifilebuf = hd->secondary_buffer;
   114     /* local information */
   115     size_t sp = (size_t)-1;
   116     int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
   117     char quote = '\0';
   118     int isescaping = 0;
   120     /* continue a multi line comment highlighting */
   121     if (hd->multiline_comment) {
   122         iscomment = 1;
   123         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   124     }
   126     char c;
   127     do {
   128         c = src[++sp];
   130         /* comments */
   131         if (!isstring && c == '/') {
   132             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   133                 iscomment = 0;
   134                 hd->multiline_comment = 0;
   135                 ucx_buffer_puts(dest, "/</span>");
   136                 continue;
   137             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   138                 iscomment = 1;
   139                 hd->multiline_comment = (src[sp+1] == '*');
   140                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   141             }
   142         }
   144         if (iscomment) {
   145             if (c == '\n') {
   146                 ucx_buffer_puts(dest, "</span>\n");
   147             } else {
   148                 put_htmlescaped(dest, c);
   149             }
   150         } else if (isinclude) {
   151             if (c == '<') {
   152                 ucx_buffer_puts(dest,
   153                         "<span class=\"c2html-stdinclude\">&lt;");
   154             } else if (c == '\"') {
   155                 if (parseinclude) {
   156                     ucx_buffer_puts(dest, "\">");
   157                     ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
   158                     ucx_buffer_puts(dest, "\"</a>");
   159                     parseinclude = 0;
   160                 } else {
   161                     ucx_buffer_puts(dest,
   162                             "<a class=\"c2html-userinclude\" href=\"");
   163                     ucx_buffer_putc(ifilebuf, '\"');
   164                     parseinclude = 1;
   165                 }
   166             } else if (c == '>') {
   167                 ucx_buffer_puts(dest,  "&gt;</span>");
   168             } else {
   169                 if (parseinclude) {
   170                     ucx_buffer_putc(ifilebuf, c);
   171                 }
   172                 put_htmlescaped(dest, c);
   173             }
   174         } else {
   175             /* strings */
   176             if (!isescaping && (c == '\'' || c == '\"')) {
   177                 if (isstring) {
   178                     put_htmlescaped(dest, c);
   179                     if (c == quote) {
   180                         isstring = 0;
   181                         ucx_buffer_puts(dest, "</span>");
   182                     } else {
   183                         put_htmlescaped(dest, c);
   184                     }
   185                 } else {
   186                     isstring = 1;
   187                     quote = c;
   188                     ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
   189                     put_htmlescaped(dest, c);
   190                 }
   191             } else {
   192                 if (isstring) {
   193                     put_htmlescaped(dest, c);
   194                 } else if (isalnum(c) ||  c == '_' || c == '#') {
   195                     /* buffer the current word */
   196                     ucx_buffer_putc(wbuf, c);
   197                 } else {
   198                     /* write buffered word, if any */
   199                     if (wbuf->size > 0) {
   200                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   201                         int closespan = 1;
   202                         sstr_t typesuffix = ST("_t");
   203                         if (check_keyword(word, ckeywords)) {
   204                             ucx_buffer_puts(dest,
   205                                     "<span class=\"c2html-keyword\">");
   206                         } else if (sstrsuffix(word, typesuffix)) {
   207                             ucx_buffer_puts(dest,
   208                                 "<span class=\"c2html-type\">");
   209                         } else if (word.ptr[0] == '#') {
   210                             isinclude = !sstrcmp(word, S("#include"));
   211                             ucx_buffer_puts(dest,
   212                                 "<span class=\"c2html-directive\">");
   213                         } else if (check_capsonly(word)) {
   214                             ucx_buffer_puts(dest,
   215                                 "<span class=\"c2html-macroconst\">");
   216                         } else {
   217                             closespan = 0;
   218                         }
   219                         put_htmlescapedstr(dest, word);
   220                         if (closespan) {
   221                             ucx_buffer_puts(dest, "</span>");
   222                         }
   223                     }
   224                     wbuf->pos = wbuf->size = 0; /* reset word buffer */
   226                     /* write current character */
   227                     put_htmlescaped(dest, c);
   228                 }
   229             }
   231             isescaping = !isescaping & (c == '\\');
   232         }
   233     } while (c && c != '\n');
   234 }
   236 /* Java Highlighter */
   238 static const char* jkeywords[] = {
   239     "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
   240     "package", "synchronized", "boolean", "do", "if", "private", "this",
   241     "break", "double", "implements", "protected", "throw", "byte", "else",
   242     "import", "public", "throws", "case", "enum", "instanceof", "return",
   243     "transient", "catch", "extends", "int", "short", "try", "char", "final",
   244     "interface", "static", "void", "class", "finally", "long", "strictfp",
   245     "volatile", "const", "float", "native", "super", "while", NULL
   246 };
   248 void java_highlighter(char *src, UcxBuffer *dest, HighlighterData *hd) {
   249     /* reset buffers without clearing them */
   250     hd->primary_buffer->size = hd->primary_buffer->pos = 0;
   251     hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
   253     /* alias the buffers for better handling */
   254     UcxBuffer *wbuf = hd->primary_buffer;
   256     /* local information */
   257     size_t sp = (size_t)-1;
   258     int isstring = 0, iscomment = 0, isimport = 0;
   259     char quote = '\0';
   260     int isescaping = 0;
   262     if (hd->multiline_comment) {
   263         iscomment = 1;
   264         ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   265     }
   267     char c;
   268     do {
   269         c = src[++sp];
   271         /* comments */
   272         if (!isstring && c == '/') {
   273             if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
   274                 iscomment = 0;
   275                 hd->multiline_comment = 0;
   276                 ucx_buffer_puts(dest, "/</span>");
   277                 continue;
   278             } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
   279                 iscomment = 1;
   280                 hd->multiline_comment = (src[sp+1] == '*');
   281                 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
   282             }
   283         }
   285         if (iscomment) {
   286             if (c == '\n') {
   287                 ucx_buffer_puts(dest, "</span>\n");
   288             } else {
   289                 put_htmlescaped(dest, c);
   290             }
   291         } else if (isimport) {
   292             /* TODO: local imports */
   293         } else {
   294             /* strings */
   295             if (!isescaping && (c == '\'' || c == '\"')) {
   296                 if (isstring) {
   297                     put_htmlescaped(dest, c);
   298                     if (c == quote) {
   299                         isstring = 0;
   300                         ucx_buffer_puts(dest, "</span>");
   301                     } else {
   302                         put_htmlescaped(dest, c);
   303                     }
   304                 } else {
   305                     isstring = 1;
   306                     quote = c;
   307                     ucx_buffer_puts(dest,
   308                         "<span class=\"c2html-string\">");
   309                     put_htmlescaped(dest, c);
   310                 }
   311             } else {
   312                 if (isstring) {
   313                     put_htmlescaped(dest, c);
   314                 } else if (isalnum(c) || c == '_' || c == '@') {
   315                     /* buffer the current word */
   316                     ucx_buffer_putc(wbuf, c);
   317                 } else {
   318                     /* write buffered word, if any */
   319                     if (wbuf->size > 0) {
   320                         sstr_t word = sstrn(wbuf->space, wbuf->size);
   321                         int closespan = 1;
   322                         if (check_keyword(word, jkeywords)) {
   323                             ucx_buffer_puts(dest,
   324                                 "<span class=\"c2html-keyword\">");
   325                         } else if (isupper(word.ptr[0])) {
   326                             ucx_buffer_puts(dest,
   327                                 "<span class=\"c2html-type\">");
   328                         } else if (word.ptr[0] == '@') {
   329                             ucx_buffer_puts(dest,
   330                                 "<span class=\"c2html-directive\">");
   331                         } else if (check_capsonly(word)) {
   332                             ucx_buffer_puts(dest,
   333                                 "<span class=\"c2html-macroconst\">");
   334                         } else {
   335                             closespan = 0;
   336                         }
   337                         put_htmlescapedstr(dest, word);
   339                         if (closespan) {
   340                             ucx_buffer_puts(dest, "</span>");
   341                         }
   342                     }
   343                     wbuf->pos = wbuf->size = 0; /* reset buffer */
   345                     /* write current character */
   346                     put_htmlescaped(dest, c);
   347                 }
   348             }
   350             isescaping = !isescaping & (c == '\\');
   351         }
   352     } while (c && c != '\n');
   353 }

mercurial