src/highlighter.c

Wed, 31 Aug 2016 14:41:56 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2016 14:41:56 +0200
changeset 55
bf54085ce341
parent 54
b3f24e23bc25
child 57
eba880c1705c
permissions
-rw-r--r--

adds appropriate public API

universe@21 1 /*
universe@21 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@21 3 *
universe@35 4 * Copyright 2016 Mike Becker. All rights reserved.
universe@21 5 *
universe@21 6 * Redistribution and use in source and binary forms, with or without
universe@21 7 * modification, are permitted provided that the following conditions are met:
universe@21 8 *
universe@21 9 * 1. Redistributions of source code must retain the above copyright
universe@21 10 * notice, this list of conditions and the following disclaimer.
universe@21 11 *
universe@21 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@21 13 * notice, this list of conditions and the following disclaimer in the
universe@21 14 * documentation and/or other materials provided with the distribution.
universe@21 15 *
universe@21 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@21 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@21 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@21 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@21 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@21 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@21 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@21 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@21 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@21 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@21 26 * POSSIBILITY OF SUCH DAMAGE.
universe@21 27 *
universe@21 28 */
universe@21 29
universe@52 30 #include "highlighter.h"
universe@21 31
universe@55 32 #include <stdlib.h>
universe@55 33 #include <stdio.h>
universe@55 34 #include <string.h>
universe@55 35 #include <ctype.h>
universe@55 36 #include "ucx/string.h"
universe@55 37 #include "ucx/utils.h"
universe@51 38
universe@52 39 static void put_htmlescaped(UcxBuffer *dest, char c) {
universe@21 40 if (c == '>') {
universe@48 41 ucx_buffer_puts(dest, "&gt;");
universe@21 42 } else if (c == '<') {
universe@48 43 ucx_buffer_puts(dest, "&lt;");
universe@54 44 } else if (c) {
universe@48 45 ucx_buffer_putc(dest, c);
universe@21 46 }
universe@48 47 }
universe@21 48
universe@52 49 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
universe@48 50 for (int i = 0 ; i < s.length ; i++) {
universe@48 51 put_htmlescaped(dest, s.ptr[i]);
universe@48 52 }
universe@21 53 }
universe@21 54
universe@52 55 static int check_keyword(sstr_t word, const char** keywords) {
universe@21 56 for (int i = 0 ; keywords[i] ; i++) {
universe@47 57 if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
universe@21 58 return 1;
universe@21 59 }
universe@21 60 }
universe@21 61 return 0;
universe@21 62 }
universe@21 63
universe@52 64 static int check_capsonly(sstr_t word) {
universe@47 65 for (size_t i = 0 ; i < word.length ; i++) {
universe@47 66 if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
universe@47 67 && word.ptr[i] != '_') {
universe@21 68 return 0;
universe@21 69 }
universe@21 70 }
universe@21 71 return 1;
universe@21 72 }
universe@52 73
universe@52 74 /* Plaintext Highlighter */
universe@52 75
universe@55 76 void c2html_plain_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
universe@52 77 while (*src && *src != '\n') {
universe@52 78 put_htmlescaped(dest, *src);
universe@52 79 src++;
universe@52 80 }
universe@52 81 ucx_buffer_putc(dest, '\n');
universe@52 82 }
universe@52 83
universe@52 84 /* C Highlighter */
universe@52 85
universe@52 86 static const char* ckeywords[] = {
universe@52 87 "auto", "break", "case", "char", "const", "continue", "default", "do",
universe@52 88 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
universe@52 89 "long", "register", "return", "short", "signed", "sizeof", "static",
universe@52 90 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
universe@52 91 "while", NULL
universe@52 92 };
universe@52 93
universe@55 94 void c2html_c_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
universe@52 95 /* reset buffers without clearing them */
universe@52 96 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 97 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 98
universe@52 99 /* alias the buffers for better handling */
universe@52 100 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 101 UcxBuffer *ifilebuf = hd->secondary_buffer;
universe@52 102
universe@52 103 /* local information */
universe@52 104 size_t sp = (size_t)-1;
universe@52 105 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
universe@52 106 char quote = '\0';
universe@52 107 int isescaping = 0;
universe@52 108
universe@52 109 /* continue a multi line comment highlighting */
universe@52 110 if (hd->multiline_comment) {
universe@52 111 iscomment = 1;
universe@52 112 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 113 }
universe@52 114
universe@52 115 char c;
universe@52 116 do {
universe@52 117 c = src[++sp];
universe@52 118
universe@52 119 /* comments */
universe@52 120 if (!isstring && c == '/') {
universe@52 121 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 122 iscomment = 0;
universe@52 123 hd->multiline_comment = 0;
universe@52 124 ucx_buffer_puts(dest, "/</span>");
universe@52 125 continue;
universe@52 126 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 127 iscomment = 1;
universe@52 128 hd->multiline_comment = (src[sp+1] == '*');
universe@52 129 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 130 }
universe@52 131 }
universe@52 132
universe@52 133 if (iscomment) {
universe@52 134 if (c == '\n') {
universe@52 135 ucx_buffer_puts(dest, "</span>\n");
universe@52 136 } else {
universe@52 137 put_htmlescaped(dest, c);
universe@52 138 }
universe@52 139 } else if (isinclude) {
universe@52 140 if (c == '<') {
universe@52 141 ucx_buffer_puts(dest,
universe@52 142 "<span class=\"c2html-stdinclude\">&lt;");
universe@52 143 } else if (c == '\"') {
universe@52 144 if (parseinclude) {
universe@52 145 ucx_buffer_puts(dest, "\">");
universe@52 146 ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
universe@52 147 ucx_buffer_puts(dest, "\"</a>");
universe@52 148 parseinclude = 0;
universe@52 149 } else {
universe@52 150 ucx_buffer_puts(dest,
universe@52 151 "<a class=\"c2html-userinclude\" href=\"");
universe@52 152 ucx_buffer_putc(ifilebuf, '\"');
universe@52 153 parseinclude = 1;
universe@52 154 }
universe@52 155 } else if (c == '>') {
universe@52 156 ucx_buffer_puts(dest, "&gt;</span>");
universe@52 157 } else {
universe@52 158 if (parseinclude) {
universe@52 159 ucx_buffer_putc(ifilebuf, c);
universe@52 160 }
universe@52 161 put_htmlescaped(dest, c);
universe@52 162 }
universe@52 163 } else {
universe@52 164 /* strings */
universe@52 165 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 166 if (isstring) {
universe@52 167 put_htmlescaped(dest, c);
universe@52 168 if (c == quote) {
universe@52 169 isstring = 0;
universe@52 170 ucx_buffer_puts(dest, "</span>");
universe@52 171 } else {
universe@52 172 put_htmlescaped(dest, c);
universe@52 173 }
universe@52 174 } else {
universe@52 175 isstring = 1;
universe@52 176 quote = c;
universe@52 177 ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
universe@52 178 put_htmlescaped(dest, c);
universe@52 179 }
universe@52 180 } else {
universe@52 181 if (isstring) {
universe@52 182 put_htmlescaped(dest, c);
universe@54 183 } else if (isalnum(c) || c == '_' || c == '#') {
universe@54 184 /* buffer the current word */
universe@54 185 ucx_buffer_putc(wbuf, c);
universe@54 186 } else {
universe@52 187 /* write buffered word, if any */
universe@52 188 if (wbuf->size > 0) {
universe@52 189 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 190 int closespan = 1;
universe@52 191 sstr_t typesuffix = ST("_t");
universe@52 192 if (check_keyword(word, ckeywords)) {
universe@52 193 ucx_buffer_puts(dest,
universe@52 194 "<span class=\"c2html-keyword\">");
universe@52 195 } else if (sstrsuffix(word, typesuffix)) {
universe@52 196 ucx_buffer_puts(dest,
universe@52 197 "<span class=\"c2html-type\">");
universe@52 198 } else if (word.ptr[0] == '#') {
universe@52 199 isinclude = !sstrcmp(word, S("#include"));
universe@52 200 ucx_buffer_puts(dest,
universe@52 201 "<span class=\"c2html-directive\">");
universe@52 202 } else if (check_capsonly(word)) {
universe@52 203 ucx_buffer_puts(dest,
universe@52 204 "<span class=\"c2html-macroconst\">");
universe@52 205 } else {
universe@52 206 closespan = 0;
universe@52 207 }
universe@52 208 put_htmlescapedstr(dest, word);
universe@52 209 if (closespan) {
universe@52 210 ucx_buffer_puts(dest, "</span>");
universe@52 211 }
universe@52 212 }
universe@52 213 wbuf->pos = wbuf->size = 0; /* reset word buffer */
universe@52 214
universe@52 215 /* write current character */
universe@52 216 put_htmlescaped(dest, c);
universe@52 217 }
universe@52 218 }
universe@52 219
universe@52 220 isescaping = !isescaping & (c == '\\');
universe@52 221 }
universe@54 222 } while (c && c != '\n');
universe@52 223 }
universe@52 224
universe@52 225 /* Java Highlighter */
universe@52 226
universe@52 227 static const char* jkeywords[] = {
universe@52 228 "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
universe@52 229 "package", "synchronized", "boolean", "do", "if", "private", "this",
universe@52 230 "break", "double", "implements", "protected", "throw", "byte", "else",
universe@52 231 "import", "public", "throws", "case", "enum", "instanceof", "return",
universe@52 232 "transient", "catch", "extends", "int", "short", "try", "char", "final",
universe@52 233 "interface", "static", "void", "class", "finally", "long", "strictfp",
universe@52 234 "volatile", "const", "float", "native", "super", "while", NULL
universe@52 235 };
universe@52 236
universe@55 237 void c2html_java_highlighter(char *src, UcxBuffer *dest, c2html_highlighter_data *hd) {
universe@52 238 /* reset buffers without clearing them */
universe@52 239 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 240 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 241
universe@52 242 /* alias the buffers for better handling */
universe@52 243 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 244
universe@52 245 /* local information */
universe@52 246 size_t sp = (size_t)-1;
universe@52 247 int isstring = 0, iscomment = 0, isimport = 0;
universe@52 248 char quote = '\0';
universe@52 249 int isescaping = 0;
universe@52 250
universe@52 251 if (hd->multiline_comment) {
universe@52 252 iscomment = 1;
universe@52 253 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 254 }
universe@52 255
universe@52 256 char c;
universe@52 257 do {
universe@52 258 c = src[++sp];
universe@52 259
universe@52 260 /* comments */
universe@52 261 if (!isstring && c == '/') {
universe@52 262 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 263 iscomment = 0;
universe@52 264 hd->multiline_comment = 0;
universe@52 265 ucx_buffer_puts(dest, "/</span>");
universe@52 266 continue;
universe@52 267 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 268 iscomment = 1;
universe@52 269 hd->multiline_comment = (src[sp+1] == '*');
universe@52 270 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 271 }
universe@52 272 }
universe@52 273
universe@52 274 if (iscomment) {
universe@52 275 if (c == '\n') {
universe@52 276 ucx_buffer_puts(dest, "</span>\n");
universe@52 277 } else {
universe@52 278 put_htmlescaped(dest, c);
universe@52 279 }
universe@52 280 } else if (isimport) {
universe@52 281 /* TODO: local imports */
universe@52 282 } else {
universe@52 283 /* strings */
universe@52 284 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 285 if (isstring) {
universe@52 286 put_htmlescaped(dest, c);
universe@52 287 if (c == quote) {
universe@52 288 isstring = 0;
universe@52 289 ucx_buffer_puts(dest, "</span>");
universe@52 290 } else {
universe@52 291 put_htmlescaped(dest, c);
universe@52 292 }
universe@52 293 } else {
universe@52 294 isstring = 1;
universe@52 295 quote = c;
universe@52 296 ucx_buffer_puts(dest,
universe@52 297 "<span class=\"c2html-string\">");
universe@52 298 put_htmlescaped(dest, c);
universe@52 299 }
universe@52 300 } else {
universe@52 301 if (isstring) {
universe@52 302 put_htmlescaped(dest, c);
universe@54 303 } else if (isalnum(c) || c == '_' || c == '@') {
universe@54 304 /* buffer the current word */
universe@54 305 ucx_buffer_putc(wbuf, c);
universe@54 306 } else {
universe@52 307 /* write buffered word, if any */
universe@52 308 if (wbuf->size > 0) {
universe@52 309 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 310 int closespan = 1;
universe@52 311 if (check_keyword(word, jkeywords)) {
universe@52 312 ucx_buffer_puts(dest,
universe@52 313 "<span class=\"c2html-keyword\">");
universe@52 314 } else if (isupper(word.ptr[0])) {
universe@52 315 ucx_buffer_puts(dest,
universe@52 316 "<span class=\"c2html-type\">");
universe@52 317 } else if (word.ptr[0] == '@') {
universe@52 318 ucx_buffer_puts(dest,
universe@52 319 "<span class=\"c2html-directive\">");
universe@52 320 } else if (check_capsonly(word)) {
universe@52 321 ucx_buffer_puts(dest,
universe@52 322 "<span class=\"c2html-macroconst\">");
universe@52 323 } else {
universe@52 324 closespan = 0;
universe@52 325 }
universe@52 326 put_htmlescapedstr(dest, word);
universe@52 327
universe@52 328 if (closespan) {
universe@52 329 ucx_buffer_puts(dest, "</span>");
universe@52 330 }
universe@52 331 }
universe@52 332 wbuf->pos = wbuf->size = 0; /* reset buffer */
universe@52 333
universe@52 334 /* write current character */
universe@52 335 put_htmlescaped(dest, c);
universe@52 336 }
universe@52 337 }
universe@52 338
universe@52 339 isescaping = !isescaping & (c == '\\');
universe@52 340 }
universe@54 341 } while (c && c != '\n');
universe@52 342 }
universe@52 343

mercurial