Mon, 24 Apr 2023 21:01:41 +0200
merge upstream changes
universe@21 | 1 | /* |
universe@21 | 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. |
universe@21 | 3 | * |
universe@35 | 4 | * Copyright 2016 Mike Becker. All rights reserved. |
universe@21 | 5 | * |
universe@21 | 6 | * Redistribution and use in source and binary forms, with or without |
universe@21 | 7 | * modification, are permitted provided that the following conditions are met: |
universe@21 | 8 | * |
universe@21 | 9 | * 1. Redistributions of source code must retain the above copyright |
universe@21 | 10 | * notice, this list of conditions and the following disclaimer. |
universe@21 | 11 | * |
universe@21 | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
universe@21 | 13 | * notice, this list of conditions and the following disclaimer in the |
universe@21 | 14 | * documentation and/or other materials provided with the distribution. |
universe@21 | 15 | * |
universe@21 | 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
universe@21 | 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
universe@21 | 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
universe@21 | 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
universe@21 | 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
universe@21 | 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
universe@21 | 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
universe@21 | 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
universe@21 | 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
universe@21 | 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
universe@21 | 26 | * POSSIBILITY OF SUCH DAMAGE. |
universe@21 | 27 | * |
universe@21 | 28 | */ |
universe@21 | 29 | |
universe@52 | 30 | #include "highlighter.h" |
universe@21 | 31 | |
universe@55 | 32 | #include <stdlib.h> |
universe@55 | 33 | #include <stdio.h> |
universe@55 | 34 | #include <string.h> |
universe@55 | 35 | #include <ctype.h> |
universe@51 | 36 | |
universe@66 | 37 | #include <cx/string.h> |
universe@66 | 38 | |
universe@66 | 39 | static void put_htmlescaped(CxBuffer *dest, char c) { |
universe@21 | 40 | if (c == '>') { |
universe@66 | 41 | cxBufferPutString(dest, ">"); |
universe@21 | 42 | } else if (c == '<') { |
universe@66 | 43 | cxBufferPutString(dest, "<"); |
universe@54 | 44 | } else if (c) { |
universe@66 | 45 | cxBufferPut(dest, c); |
universe@21 | 46 | } |
universe@48 | 47 | } |
universe@21 | 48 | |
universe@66 | 49 | static void put_htmlescapedstr(CxBuffer *dest, cxstring s) { |
universe@48 | 50 | for (int i = 0 ; i < s.length ; i++) { |
universe@48 | 51 | put_htmlescaped(dest, s.ptr[i]); |
universe@48 | 52 | } |
universe@21 | 53 | } |
universe@21 | 54 | |
universe@66 | 55 | static int check_keyword(cxstring word, const char** keywords) { |
universe@21 | 56 | for (int i = 0 ; keywords[i] ; i++) { |
universe@66 | 57 | if (cx_strcmp(word, cx_str(keywords[i])) == 0) { |
universe@21 | 58 | return 1; |
universe@21 | 59 | } |
universe@21 | 60 | } |
universe@21 | 61 | return 0; |
universe@21 | 62 | } |
universe@21 | 63 | |
universe@66 | 64 | static int check_capsonly(cxstring word) { |
universe@47 | 65 | for (size_t i = 0 ; i < word.length ; i++) { |
universe@47 | 66 | if (!isupper(word.ptr[i]) && !isdigit(word.ptr[i]) |
universe@47 | 67 | && word.ptr[i] != '_') { |
universe@21 | 68 | return 0; |
universe@21 | 69 | } |
universe@21 | 70 | } |
universe@21 | 71 | return 1; |
universe@21 | 72 | } |
universe@52 | 73 | |
universe@52 | 74 | /* Plaintext Highlighter */ |
universe@52 | 75 | |
universe@66 | 76 | void c2html_plain_highlighter(char const *src, CxBuffer *dest, |
universe@57 | 77 | c2html_highlighter_data *hd) { |
universe@52 | 78 | while (*src && *src != '\n') { |
universe@57 | 79 | if (*src != '\r') { |
universe@57 | 80 | put_htmlescaped(dest, *src); |
universe@57 | 81 | } |
universe@52 | 82 | src++; |
universe@52 | 83 | } |
universe@66 | 84 | cxBufferPut(dest, '\n'); |
universe@52 | 85 | } |
universe@52 | 86 | |
universe@52 | 87 | /* C Highlighter */ |
universe@52 | 88 | |
universe@52 | 89 | static const char* ckeywords[] = { |
universe@52 | 90 | "auto", "break", "case", "char", "const", "continue", "default", "do", |
universe@52 | 91 | "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", |
universe@52 | 92 | "long", "register", "return", "short", "signed", "sizeof", "static", |
universe@52 | 93 | "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", |
universe@52 | 94 | "while", NULL |
universe@52 | 95 | }; |
universe@52 | 96 | |
universe@66 | 97 | void c2html_c_highlighter(char const *src, CxBuffer *dest, |
universe@57 | 98 | c2html_highlighter_data *hd) { |
universe@52 | 99 | /* reset buffers without clearing them */ |
universe@66 | 100 | hd->primary_buffer.size = hd->primary_buffer.pos = 0; |
universe@66 | 101 | hd->secondary_buffer.size = hd->secondary_buffer.pos = 0; |
universe@52 | 102 | |
universe@52 | 103 | /* alias the buffers for better handling */ |
universe@66 | 104 | CxBuffer *wbuf = &hd->primary_buffer; |
universe@66 | 105 | CxBuffer *ifilebuf = &hd->secondary_buffer; |
universe@52 | 106 | |
universe@52 | 107 | /* local information */ |
universe@52 | 108 | size_t sp = (size_t)-1; |
universe@52 | 109 | int isstring = 0, iscomment = 0, isinclude = 0, parseinclude = 0; |
universe@52 | 110 | char quote = '\0'; |
universe@52 | 111 | int isescaping = 0; |
universe@52 | 112 | |
universe@52 | 113 | /* continue a multi line comment highlighting */ |
universe@52 | 114 | if (hd->multiline_comment) { |
universe@52 | 115 | iscomment = 1; |
universe@66 | 116 | cxBufferPutString(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 117 | } |
universe@52 | 118 | |
universe@52 | 119 | char c; |
universe@52 | 120 | do { |
universe@52 | 121 | c = src[++sp]; |
universe@57 | 122 | if (c == '\r') continue; |
universe@52 | 123 | |
universe@52 | 124 | /* comments */ |
universe@52 | 125 | if (!isstring && c == '/') { |
universe@52 | 126 | if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { |
universe@52 | 127 | iscomment = 0; |
universe@52 | 128 | hd->multiline_comment = 0; |
universe@66 | 129 | cxBufferPutString(dest, "/</span>"); |
universe@52 | 130 | continue; |
universe@52 | 131 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@52 | 132 | iscomment = 1; |
universe@52 | 133 | hd->multiline_comment = (src[sp+1] == '*'); |
universe@66 | 134 | cxBufferPutString(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 135 | } |
universe@52 | 136 | } |
universe@52 | 137 | |
universe@52 | 138 | if (iscomment) { |
universe@52 | 139 | if (c == '\n') { |
universe@66 | 140 | cxBufferPutString(dest, "</span>\n"); |
universe@52 | 141 | } else { |
universe@52 | 142 | put_htmlescaped(dest, c); |
universe@52 | 143 | } |
universe@52 | 144 | } else if (isinclude) { |
universe@52 | 145 | if (c == '<') { |
universe@66 | 146 | cxBufferPutString(dest, |
universe@52 | 147 | "<span class=\"c2html-stdinclude\"><"); |
universe@52 | 148 | } else if (c == '\"') { |
universe@52 | 149 | if (parseinclude) { |
universe@66 | 150 | cxBufferPutString(dest, "\">"); |
universe@66 | 151 | cxBufferWrite(ifilebuf->space, 1, ifilebuf->size, dest); |
universe@66 | 152 | cxBufferPutString(dest, "\"</a>"); |
universe@52 | 153 | parseinclude = 0; |
universe@52 | 154 | } else { |
universe@66 | 155 | cxBufferPutString(dest, |
universe@52 | 156 | "<a class=\"c2html-userinclude\" href=\""); |
universe@66 | 157 | cxBufferPut(ifilebuf, '\"'); |
universe@52 | 158 | parseinclude = 1; |
universe@52 | 159 | } |
universe@52 | 160 | } else if (c == '>') { |
universe@66 | 161 | cxBufferPutString(dest, "></span>"); |
universe@52 | 162 | } else { |
universe@52 | 163 | if (parseinclude) { |
universe@66 | 164 | cxBufferPut(ifilebuf, c); |
universe@52 | 165 | } |
universe@52 | 166 | put_htmlescaped(dest, c); |
universe@52 | 167 | } |
universe@52 | 168 | } else { |
universe@52 | 169 | /* strings */ |
universe@52 | 170 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@52 | 171 | if (isstring) { |
universe@52 | 172 | put_htmlescaped(dest, c); |
universe@52 | 173 | if (c == quote) { |
universe@52 | 174 | isstring = 0; |
universe@66 | 175 | cxBufferPutString(dest, "</span>"); |
universe@52 | 176 | } else { |
universe@52 | 177 | put_htmlescaped(dest, c); |
universe@52 | 178 | } |
universe@52 | 179 | } else { |
universe@52 | 180 | isstring = 1; |
universe@52 | 181 | quote = c; |
universe@66 | 182 | cxBufferPutString(dest, "<span class=\"c2html-string\">"); |
universe@52 | 183 | put_htmlescaped(dest, c); |
universe@52 | 184 | } |
universe@52 | 185 | } else { |
universe@52 | 186 | if (isstring) { |
universe@52 | 187 | put_htmlescaped(dest, c); |
universe@54 | 188 | } else if (isalnum(c) || c == '_' || c == '#') { |
universe@54 | 189 | /* buffer the current word */ |
universe@66 | 190 | cxBufferPut(wbuf, c); |
universe@54 | 191 | } else { |
universe@52 | 192 | /* write buffered word, if any */ |
universe@52 | 193 | if (wbuf->size > 0) { |
universe@66 | 194 | cxstring word = cx_strn(wbuf->space, wbuf->size); |
universe@52 | 195 | int closespan = 1; |
universe@66 | 196 | cxstring typesuffix = CX_STR("_t"); |
universe@52 | 197 | if (check_keyword(word, ckeywords)) { |
universe@66 | 198 | cxBufferPutString(dest, |
universe@52 | 199 | "<span class=\"c2html-keyword\">"); |
universe@66 | 200 | } else if (cx_strsuffix(word, typesuffix)) { |
universe@66 | 201 | cxBufferPutString(dest, |
universe@52 | 202 | "<span class=\"c2html-type\">"); |
universe@52 | 203 | } else if (word.ptr[0] == '#') { |
universe@66 | 204 | isinclude = !cx_strcmp(word, CX_STR("#include")); |
universe@66 | 205 | cxBufferPutString(dest, |
universe@52 | 206 | "<span class=\"c2html-directive\">"); |
universe@52 | 207 | } else if (check_capsonly(word)) { |
universe@66 | 208 | cxBufferPutString(dest, |
universe@52 | 209 | "<span class=\"c2html-macroconst\">"); |
universe@52 | 210 | } else { |
universe@52 | 211 | closespan = 0; |
universe@52 | 212 | } |
universe@52 | 213 | put_htmlescapedstr(dest, word); |
universe@52 | 214 | if (closespan) { |
universe@66 | 215 | cxBufferPutString(dest, "</span>"); |
universe@52 | 216 | } |
universe@52 | 217 | } |
universe@52 | 218 | wbuf->pos = wbuf->size = 0; /* reset word buffer */ |
universe@52 | 219 | |
universe@52 | 220 | /* write current character */ |
universe@52 | 221 | put_htmlescaped(dest, c); |
universe@52 | 222 | } |
universe@52 | 223 | } |
universe@52 | 224 | |
universe@52 | 225 | isescaping = !isescaping & (c == '\\'); |
universe@52 | 226 | } |
universe@54 | 227 | } while (c && c != '\n'); |
universe@52 | 228 | } |
universe@52 | 229 | |
universe@52 | 230 | /* Java Highlighter */ |
universe@52 | 231 | |
universe@52 | 232 | static const char* jkeywords[] = { |
universe@52 | 233 | "abstract", "continue", "for", "new", "switch", "assert", "default", "goto", |
universe@52 | 234 | "package", "synchronized", "boolean", "do", "if", "private", "this", |
universe@52 | 235 | "break", "double", "implements", "protected", "throw", "byte", "else", |
universe@52 | 236 | "import", "public", "throws", "case", "enum", "instanceof", "return", |
universe@52 | 237 | "transient", "catch", "extends", "int", "short", "try", "char", "final", |
universe@52 | 238 | "interface", "static", "void", "class", "finally", "long", "strictfp", |
universe@52 | 239 | "volatile", "const", "float", "native", "super", "while", NULL |
universe@52 | 240 | }; |
universe@52 | 241 | |
universe@66 | 242 | void c2html_java_highlighter(char const *src, CxBuffer *dest, |
universe@57 | 243 | c2html_highlighter_data *hd) { |
universe@52 | 244 | /* reset buffers without clearing them */ |
universe@66 | 245 | hd->primary_buffer.size = hd->primary_buffer.pos = 0; |
universe@66 | 246 | hd->secondary_buffer.size = hd->secondary_buffer.pos = 0; |
universe@52 | 247 | |
universe@52 | 248 | /* alias the buffers for better handling */ |
universe@66 | 249 | CxBuffer *wbuf = &hd->primary_buffer; |
universe@52 | 250 | |
universe@52 | 251 | /* local information */ |
universe@52 | 252 | size_t sp = (size_t)-1; |
universe@52 | 253 | int isstring = 0, iscomment = 0, isimport = 0; |
universe@52 | 254 | char quote = '\0'; |
universe@52 | 255 | int isescaping = 0; |
universe@52 | 256 | |
universe@52 | 257 | if (hd->multiline_comment) { |
universe@52 | 258 | iscomment = 1; |
universe@66 | 259 | cxBufferPutString(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 260 | } |
universe@52 | 261 | |
universe@52 | 262 | char c; |
universe@52 | 263 | do { |
universe@52 | 264 | c = src[++sp]; |
universe@57 | 265 | if (c == '\r') continue; |
universe@52 | 266 | |
universe@52 | 267 | /* comments */ |
universe@52 | 268 | if (!isstring && c == '/') { |
universe@52 | 269 | if (hd->multiline_comment && sp > 0 && src[sp-1] == '*') { |
universe@52 | 270 | iscomment = 0; |
universe@52 | 271 | hd->multiline_comment = 0; |
universe@66 | 272 | cxBufferPutString(dest, "/</span>"); |
universe@52 | 273 | continue; |
universe@52 | 274 | } else if (!iscomment && (src[sp+1] == '/' || src[sp+1] == '*')) { |
universe@52 | 275 | iscomment = 1; |
universe@52 | 276 | hd->multiline_comment = (src[sp+1] == '*'); |
universe@66 | 277 | cxBufferPutString(dest, "<span class=\"c2html-comment\">"); |
universe@52 | 278 | } |
universe@52 | 279 | } |
universe@52 | 280 | |
universe@52 | 281 | if (iscomment) { |
universe@52 | 282 | if (c == '\n') { |
universe@66 | 283 | cxBufferPutString(dest, "</span>\n"); |
universe@52 | 284 | } else { |
universe@52 | 285 | put_htmlescaped(dest, c); |
universe@52 | 286 | } |
universe@52 | 287 | } else if (isimport) { |
universe@52 | 288 | /* TODO: local imports */ |
universe@52 | 289 | } else { |
universe@52 | 290 | /* strings */ |
universe@52 | 291 | if (!isescaping && (c == '\'' || c == '\"')) { |
universe@52 | 292 | if (isstring) { |
universe@52 | 293 | put_htmlescaped(dest, c); |
universe@52 | 294 | if (c == quote) { |
universe@52 | 295 | isstring = 0; |
universe@66 | 296 | cxBufferPutString(dest, "</span>"); |
universe@52 | 297 | } else { |
universe@52 | 298 | put_htmlescaped(dest, c); |
universe@52 | 299 | } |
universe@52 | 300 | } else { |
universe@52 | 301 | isstring = 1; |
universe@52 | 302 | quote = c; |
universe@66 | 303 | cxBufferPutString(dest, |
universe@52 | 304 | "<span class=\"c2html-string\">"); |
universe@52 | 305 | put_htmlescaped(dest, c); |
universe@52 | 306 | } |
universe@52 | 307 | } else { |
universe@52 | 308 | if (isstring) { |
universe@52 | 309 | put_htmlescaped(dest, c); |
universe@54 | 310 | } else if (isalnum(c) || c == '_' || c == '@') { |
universe@54 | 311 | /* buffer the current word */ |
universe@66 | 312 | cxBufferPut(wbuf, c); |
universe@54 | 313 | } else { |
universe@52 | 314 | /* write buffered word, if any */ |
universe@52 | 315 | if (wbuf->size > 0) { |
universe@66 | 316 | cxstring word = cx_strn(wbuf->space, wbuf->size); |
universe@52 | 317 | int closespan = 1; |
universe@52 | 318 | if (check_keyword(word, jkeywords)) { |
universe@66 | 319 | cxBufferPutString(dest, |
universe@52 | 320 | "<span class=\"c2html-keyword\">"); |
universe@52 | 321 | } else if (isupper(word.ptr[0])) { |
universe@66 | 322 | cxBufferPutString(dest, |
universe@52 | 323 | "<span class=\"c2html-type\">"); |
universe@52 | 324 | } else if (word.ptr[0] == '@') { |
universe@66 | 325 | cxBufferPutString(dest, |
universe@52 | 326 | "<span class=\"c2html-directive\">"); |
universe@52 | 327 | } else if (check_capsonly(word)) { |
universe@66 | 328 | cxBufferPutString(dest, |
universe@52 | 329 | "<span class=\"c2html-macroconst\">"); |
universe@52 | 330 | } else { |
universe@52 | 331 | closespan = 0; |
universe@52 | 332 | } |
universe@52 | 333 | put_htmlescapedstr(dest, word); |
universe@52 | 334 | |
universe@52 | 335 | if (closespan) { |
universe@66 | 336 | cxBufferPutString(dest, "</span>"); |
universe@52 | 337 | } |
universe@52 | 338 | } |
universe@52 | 339 | wbuf->pos = wbuf->size = 0; /* reset buffer */ |
universe@52 | 340 | |
universe@52 | 341 | /* write current character */ |
universe@52 | 342 | put_htmlescaped(dest, c); |
universe@52 | 343 | } |
universe@52 | 344 | } |
universe@52 | 345 | |
universe@52 | 346 | isescaping = !isescaping & (c == '\\'); |
universe@52 | 347 | } |
universe@54 | 348 | } while (c && c != '\n'); |
universe@52 | 349 | } |
universe@52 | 350 |