src/highlighter.c

Wed, 31 Aug 2016 16:20:58 +0200

author
Mike Becker <universe@uap-core.de>
date
Wed, 31 Aug 2016 16:20:58 +0200
changeset 57
eba880c1705c
parent 55
bf54085ce341
child 61
47a5fc33590a
child 66
1b12cf799fee
permissions
-rw-r--r--

improves API and adds functions for strings

universe@21 1 /*
universe@21 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
universe@21 3 *
universe@35 4 * Copyright 2016 Mike Becker. All rights reserved.
universe@21 5 *
universe@21 6 * Redistribution and use in source and binary forms, with or without
universe@21 7 * modification, are permitted provided that the following conditions are met:
universe@21 8 *
universe@21 9 * 1. Redistributions of source code must retain the above copyright
universe@21 10 * notice, this list of conditions and the following disclaimer.
universe@21 11 *
universe@21 12 * 2. Redistributions in binary form must reproduce the above copyright
universe@21 13 * notice, this list of conditions and the following disclaimer in the
universe@21 14 * documentation and/or other materials provided with the distribution.
universe@21 15 *
universe@21 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
universe@21 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
universe@21 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
universe@21 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
universe@21 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
universe@21 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
universe@21 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
universe@21 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
universe@21 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
universe@21 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
universe@21 26 * POSSIBILITY OF SUCH DAMAGE.
universe@21 27 *
universe@21 28 */
universe@21 29
universe@52 30 #include "highlighter.h"
universe@21 31
universe@55 32 #include <stdlib.h>
universe@55 33 #include <stdio.h>
universe@55 34 #include <string.h>
universe@55 35 #include <ctype.h>
universe@55 36 #include "ucx/string.h"
universe@55 37 #include "ucx/utils.h"
universe@51 38
universe@52 39 static void put_htmlescaped(UcxBuffer *dest, char c) {
universe@21 40 if (c == '>') {
universe@48 41 ucx_buffer_puts(dest, "&gt;");
universe@21 42 } else if (c == '<') {
universe@48 43 ucx_buffer_puts(dest, "&lt;");
universe@54 44 } else if (c) {
universe@48 45 ucx_buffer_putc(dest, c);
universe@21 46 }
universe@48 47 }
universe@21 48
universe@52 49 static void put_htmlescapedstr(UcxBuffer *dest, sstr_t s) {
universe@48 50 for (int i = 0 ; i < s.length ; i++) {
universe@48 51 put_htmlescaped(dest, s.ptr[i]);
universe@48 52 }
universe@21 53 }
universe@21 54
universe@52 55 static int check_keyword(sstr_t word, const char** keywords) {
universe@21 56 for (int i = 0 ; keywords[i] ; i++) {
universe@47 57 if (sstrcmp(word, sstr((char*)keywords[i])) == 0) {
universe@21 58 return 1;
universe@21 59 }
universe@21 60 }
universe@21 61 return 0;
universe@21 62 }
universe@21 63
universe@52 64 static int check_capsonly(sstr_t word) {
universe@47 65 for (size_t i = 0 ; i < word.length ; i++) {
universe@47 66 if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i])
universe@47 67 && word.ptr[i] != '_') {
universe@21 68 return 0;
universe@21 69 }
universe@21 70 }
universe@21 71 return 1;
universe@21 72 }
universe@52 73
universe@52 74 /* Plaintext Highlighter */
universe@52 75
universe@57 76 void c2html_plain_highlighter(char *src, UcxBuffer *dest,
universe@57 77 c2html_highlighter_data *hd) {
universe@52 78 while (*src && *src != '\n') {
universe@57 79 if (*src != '\r') {
universe@57 80 put_htmlescaped(dest, *src);
universe@57 81 }
universe@52 82 src++;
universe@52 83 }
universe@52 84 ucx_buffer_putc(dest, '\n');
universe@52 85 }
universe@52 86
universe@52 87 /* C Highlighter */
universe@52 88
universe@52 89 static const char* ckeywords[] = {
universe@52 90 "auto", "break", "case", "char", "const", "continue", "default", "do",
universe@52 91 "double", "else", "enum", "extern", "float", "for", "goto", "if", "int",
universe@52 92 "long", "register", "return", "short", "signed", "sizeof", "static",
universe@52 93 "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
universe@52 94 "while", NULL
universe@52 95 };
universe@52 96
universe@57 97 void c2html_c_highlighter(char *src, UcxBuffer *dest,
universe@57 98 c2html_highlighter_data *hd) {
universe@52 99 /* reset buffers without clearing them */
universe@52 100 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 101 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 102
universe@52 103 /* alias the buffers for better handling */
universe@52 104 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 105 UcxBuffer *ifilebuf = hd->secondary_buffer;
universe@52 106
universe@52 107 /* local information */
universe@52 108 size_t sp = (size_t)-1;
universe@52 109 int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0;
universe@52 110 char quote = '\0';
universe@52 111 int isescaping = 0;
universe@52 112
universe@52 113 /* continue a multi line comment highlighting */
universe@52 114 if (hd->multiline_comment) {
universe@52 115 iscomment = 1;
universe@52 116 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 117 }
universe@52 118
universe@52 119 char c;
universe@52 120 do {
universe@52 121 c = src[++sp];
universe@57 122 if (c == '\r') continue;
universe@52 123
universe@52 124 /* comments */
universe@52 125 if (!isstring && c == '/') {
universe@52 126 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 127 iscomment = 0;
universe@52 128 hd->multiline_comment = 0;
universe@52 129 ucx_buffer_puts(dest, "/</span>");
universe@52 130 continue;
universe@52 131 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 132 iscomment = 1;
universe@52 133 hd->multiline_comment = (src[sp+1] == '*');
universe@52 134 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 135 }
universe@52 136 }
universe@52 137
universe@52 138 if (iscomment) {
universe@52 139 if (c == '\n') {
universe@52 140 ucx_buffer_puts(dest, "</span>\n");
universe@52 141 } else {
universe@52 142 put_htmlescaped(dest, c);
universe@52 143 }
universe@52 144 } else if (isinclude) {
universe@52 145 if (c == '<') {
universe@52 146 ucx_buffer_puts(dest,
universe@52 147 "<span class=\"c2html-stdinclude\">&lt;");
universe@52 148 } else if (c == '\"') {
universe@52 149 if (parseinclude) {
universe@52 150 ucx_buffer_puts(dest, "\">");
universe@52 151 ucx_buffer_write(ifilebuf->space, 1, ifilebuf->size, dest);
universe@52 152 ucx_buffer_puts(dest, "\"</a>");
universe@52 153 parseinclude = 0;
universe@52 154 } else {
universe@52 155 ucx_buffer_puts(dest,
universe@52 156 "<a class=\"c2html-userinclude\" href=\"");
universe@52 157 ucx_buffer_putc(ifilebuf, '\"');
universe@52 158 parseinclude = 1;
universe@52 159 }
universe@52 160 } else if (c == '>') {
universe@52 161 ucx_buffer_puts(dest, "&gt;</span>");
universe@52 162 } else {
universe@52 163 if (parseinclude) {
universe@52 164 ucx_buffer_putc(ifilebuf, c);
universe@52 165 }
universe@52 166 put_htmlescaped(dest, c);
universe@52 167 }
universe@52 168 } else {
universe@52 169 /* strings */
universe@52 170 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 171 if (isstring) {
universe@52 172 put_htmlescaped(dest, c);
universe@52 173 if (c == quote) {
universe@52 174 isstring = 0;
universe@52 175 ucx_buffer_puts(dest, "</span>");
universe@52 176 } else {
universe@52 177 put_htmlescaped(dest, c);
universe@52 178 }
universe@52 179 } else {
universe@52 180 isstring = 1;
universe@52 181 quote = c;
universe@52 182 ucx_buffer_puts(dest, "<span class=\"c2html-string\">");
universe@52 183 put_htmlescaped(dest, c);
universe@52 184 }
universe@52 185 } else {
universe@52 186 if (isstring) {
universe@52 187 put_htmlescaped(dest, c);
universe@54 188 } else if (isalnum(c) || c == '_' || c == '#') {
universe@54 189 /* buffer the current word */
universe@54 190 ucx_buffer_putc(wbuf, c);
universe@54 191 } else {
universe@52 192 /* write buffered word, if any */
universe@52 193 if (wbuf->size > 0) {
universe@52 194 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 195 int closespan = 1;
universe@52 196 sstr_t typesuffix = ST("_t");
universe@52 197 if (check_keyword(word, ckeywords)) {
universe@52 198 ucx_buffer_puts(dest,
universe@52 199 "<span class=\"c2html-keyword\">");
universe@52 200 } else if (sstrsuffix(word, typesuffix)) {
universe@52 201 ucx_buffer_puts(dest,
universe@52 202 "<span class=\"c2html-type\">");
universe@52 203 } else if (word.ptr[0] == '#') {
universe@52 204 isinclude = !sstrcmp(word, S("#include"));
universe@52 205 ucx_buffer_puts(dest,
universe@52 206 "<span class=\"c2html-directive\">");
universe@52 207 } else if (check_capsonly(word)) {
universe@52 208 ucx_buffer_puts(dest,
universe@52 209 "<span class=\"c2html-macroconst\">");
universe@52 210 } else {
universe@52 211 closespan = 0;
universe@52 212 }
universe@52 213 put_htmlescapedstr(dest, word);
universe@52 214 if (closespan) {
universe@52 215 ucx_buffer_puts(dest, "</span>");
universe@52 216 }
universe@52 217 }
universe@52 218 wbuf->pos = wbuf->size = 0; /* reset word buffer */
universe@52 219
universe@52 220 /* write current character */
universe@52 221 put_htmlescaped(dest, c);
universe@52 222 }
universe@52 223 }
universe@52 224
universe@52 225 isescaping = !isescaping & (c == '\\');
universe@52 226 }
universe@54 227 } while (c && c != '\n');
universe@52 228 }
universe@52 229
universe@52 230 /* Java Highlighter */
universe@52 231
universe@52 232 static const char* jkeywords[] = {
universe@52 233 "abstract", "continue", "for", "new", "switch", "assert", "default", "goto",
universe@52 234 "package", "synchronized", "boolean", "do", "if", "private", "this",
universe@52 235 "break", "double", "implements", "protected", "throw", "byte", "else",
universe@52 236 "import", "public", "throws", "case", "enum", "instanceof", "return",
universe@52 237 "transient", "catch", "extends", "int", "short", "try", "char", "final",
universe@52 238 "interface", "static", "void", "class", "finally", "long", "strictfp",
universe@52 239 "volatile", "const", "float", "native", "super", "while", NULL
universe@52 240 };
universe@52 241
universe@57 242 void c2html_java_highlighter(char *src, UcxBuffer *dest,
universe@57 243 c2html_highlighter_data *hd) {
universe@52 244 /* reset buffers without clearing them */
universe@52 245 hd->primary_buffer->size = hd->primary_buffer->pos = 0;
universe@52 246 hd->secondary_buffer->size = hd->secondary_buffer->pos = 0;
universe@52 247
universe@52 248 /* alias the buffers for better handling */
universe@52 249 UcxBuffer *wbuf = hd->primary_buffer;
universe@52 250
universe@52 251 /* local information */
universe@52 252 size_t sp = (size_t)-1;
universe@52 253 int isstring = 0, iscomment = 0, isimport = 0;
universe@52 254 char quote = '\0';
universe@52 255 int isescaping = 0;
universe@52 256
universe@52 257 if (hd->multiline_comment) {
universe@52 258 iscomment = 1;
universe@52 259 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 260 }
universe@52 261
universe@52 262 char c;
universe@52 263 do {
universe@52 264 c = src[++sp];
universe@57 265 if (c == '\r') continue;
universe@52 266
universe@52 267 /* comments */
universe@52 268 if (!isstring && c == '/') {
universe@52 269 if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') {
universe@52 270 iscomment = 0;
universe@52 271 hd->multiline_comment = 0;
universe@52 272 ucx_buffer_puts(dest, "/</span>");
universe@52 273 continue;
universe@52 274 } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) {
universe@52 275 iscomment = 1;
universe@52 276 hd->multiline_comment = (src[sp+1] == '*');
universe@52 277 ucx_buffer_puts(dest, "<span class=\"c2html-comment\">");
universe@52 278 }
universe@52 279 }
universe@52 280
universe@52 281 if (iscomment) {
universe@52 282 if (c == '\n') {
universe@52 283 ucx_buffer_puts(dest, "</span>\n");
universe@52 284 } else {
universe@52 285 put_htmlescaped(dest, c);
universe@52 286 }
universe@52 287 } else if (isimport) {
universe@52 288 /* TODO: local imports */
universe@52 289 } else {
universe@52 290 /* strings */
universe@52 291 if (!isescaping && (c == '\'' || c == '\"')) {
universe@52 292 if (isstring) {
universe@52 293 put_htmlescaped(dest, c);
universe@52 294 if (c == quote) {
universe@52 295 isstring = 0;
universe@52 296 ucx_buffer_puts(dest, "</span>");
universe@52 297 } else {
universe@52 298 put_htmlescaped(dest, c);
universe@52 299 }
universe@52 300 } else {
universe@52 301 isstring = 1;
universe@52 302 quote = c;
universe@52 303 ucx_buffer_puts(dest,
universe@52 304 "<span class=\"c2html-string\">");
universe@52 305 put_htmlescaped(dest, c);
universe@52 306 }
universe@52 307 } else {
universe@52 308 if (isstring) {
universe@52 309 put_htmlescaped(dest, c);
universe@54 310 } else if (isalnum(c) || c == '_' || c == '@') {
universe@54 311 /* buffer the current word */
universe@54 312 ucx_buffer_putc(wbuf, c);
universe@54 313 } else {
universe@52 314 /* write buffered word, if any */
universe@52 315 if (wbuf->size > 0) {
universe@52 316 sstr_t word = sstrn(wbuf->space, wbuf->size);
universe@52 317 int closespan = 1;
universe@52 318 if (check_keyword(word, jkeywords)) {
universe@52 319 ucx_buffer_puts(dest,
universe@52 320 "<span class=\"c2html-keyword\">");
universe@52 321 } else if (isupper(word.ptr[0])) {
universe@52 322 ucx_buffer_puts(dest,
universe@52 323 "<span class=\"c2html-type\">");
universe@52 324 } else if (word.ptr[0] == '@') {
universe@52 325 ucx_buffer_puts(dest,
universe@52 326 "<span class=\"c2html-directive\">");
universe@52 327 } else if (check_capsonly(word)) {
universe@52 328 ucx_buffer_puts(dest,
universe@52 329 "<span class=\"c2html-macroconst\">");
universe@52 330 } else {
universe@52 331 closespan = 0;
universe@52 332 }
universe@52 333 put_htmlescapedstr(dest, word);
universe@52 334
universe@52 335 if (closespan) {
universe@52 336 ucx_buffer_puts(dest, "</span>");
universe@52 337 }
universe@52 338 }
universe@52 339 wbuf->pos = wbuf->size = 0; /* reset buffer */
universe@52 340
universe@52 341 /* write current character */
universe@52 342 put_htmlescaped(dest, c);
universe@52 343 }
universe@52 344 }
universe@52 345
universe@52 346 isescaping = !isescaping & (c == '\\');
universe@52 347 }
universe@54 348 } while (c && c != '\n');
universe@52 349 }
universe@52 350

mercurial