src/highlighter.c

changeset 91
2c8514b3891b
parent 90
98adda6171d1
child 92
fe4dfb4d074d
equal deleted inserted replaced
90:98adda6171d1 91:2c8514b3891b
73 } 73 }
74 } 74 }
75 return 1; 75 return 1;
76 } 76 }
77 77
78 static size_t check_number(const char *str) {
79 /* this function is not precise, but a good over-approximation */
80 size_t i = 0;
81 if (str[0] == '+' || str[0] == '-') {
82 i++;
83 }
84 bool hex = str[i] == '0' && (str[i + 1] == 'x' || str[i + 1] == 'X');
85 bool bin = str[i] == '0' && (str[i + 1] == 'b' || str[i + 1] == 'B');
86 if (hex || bin) {
87 i += 2;
88 }
89 bool flt = false;
90 bool exp = false;
91 bool dot = false;
92 bool digit_seen = false;
93 if (str[i] == '.') {
94 dot = true;
95 flt = true;
96 i++;
97 }
98 char exp_char_low = hex ? 'p' : 'e';
99 char exp_char_up = hex ? 'P' : 'E';
100 while (str[i] != '\0' && str[i] != '\n') {
101 /* ignore grouping char */
102 if (str[i] == '\'') {
103 i++;
104 continue;
105 }
106 /* binary is always integer, nothing else allowed */
107 if (bin) {
108 if (str[i] != '0' && str[i] != '1') {
109 break;
110 } else {
111 i++;
112 digit_seen = true;
113 }
114 } else {
115 /* detect decimal and exponent separators */
116 if ((!dot && str[i] == '.') ||
117 (!exp && digit_seen &&
118 (str[i] == exp_char_low || str[i] == exp_char_up)
119 )
120 ) {
121 if (str[i] == '.') {
122 dot = true;
123 } else {
124 exp = true;
125 /* a sign may directly follow */
126 if (str[i+1] == '+' || str[i+1] == '-') {
127 i++;
128 }
129 }
130 flt = true;
131 i++;
132 continue;
133 }
134 /* check for allowed digits */
135 if ((str[i] >= '0' && str[i] <= '9') || (hex && (
136 (str[i] >= 'a' && str[i] <= 'f')
137 || (str[i] >= 'A' && str[i] <= 'F')
138 ))) {
139 digit_seen = true;
140 i++;
141 } else {
142 break;
143 }
144 }
145 }
146 /* have we seen at least one digit? */
147 if (!digit_seen) return 0;
148
149 /* check if we are already done (over-approximation) */
150 if (!isalpha(str[i])) return i;
151
152 /* check suffixes (must check with decreasing length) */
153 const char *const flt_suffixes[] = {
154 "f128", "bf16", "F128", "BF16",
155 "f16", "f32", "f64", "F16", "F32", "F64",
156 "df", "DF", "dd", "DD", "dl", "DL",
157 "d", "D", "f", "l", "F", "L",
158 };
159 const unsigned flt_suffixes_len = 22;
160 const char *const int_suffixes[] = {
161 "ull", "ULL",
162 "ul", "UL", "ll", "LL", "wb", "WB",
163 "u", "U", "l", "L",
164 };
165 const unsigned int_suffixes_len = 12;
166 const char * const *allowed_suffixes = flt ? flt_suffixes : int_suffixes;
167 const unsigned allowed_suffixes_len = flt ? flt_suffixes_len : int_suffixes_len;
168 for (unsigned j = 0 ; j < allowed_suffixes_len ; j++) {
169 cxstring suffix = cx_str(allowed_suffixes[j]);
170 const char *testee = str+i;
171 if (memcmp(testee, suffix.ptr, suffix.length) == 0) {
172 return i+suffix.length;
173 }
174 }
175 /* no suffix matched */
176 return 0;
177 }
178
78 /* Plaintext Highlighter */ 179 /* Plaintext Highlighter */
79 180
80 void c2html_plain_highlighter(char const *src, CxBuffer *dest, 181 void c2html_plain_highlighter(char const *src, CxBuffer *dest,
81 c2html_highlighter_data *hd) { 182 c2html_highlighter_data *hd) {
82 while (*src && *src != '\n') { 183 while (*src && *src != '\n') {
244 put_htmlescaped(dest, c); 345 put_htmlescaped(dest, c);
245 } 346 }
246 } else { 347 } else {
247 if (isstring) { 348 if (isstring) {
248 put_htmlescaped(dest, c); 349 put_htmlescaped(dest, c);
350 } else if (wbuf->size == 0 &&
351 (isdigit(c) || c == '+' || c == '-' || c == '.')
352 ) {
353 /* might be a number */
354 size_t numlen = check_number(src+sp);
355 if (numlen > 0) {
356 start_span("number");
357 put_htmlescapedstr(dest, cx_strn(src+sp, numlen));
358 stop_span;
359 sp += numlen - 1;
360 c = src[sp];
361 continue;
362 } else {
363 /* start a new buffered word */
364 cxBufferPut(wbuf, c);
365 }
249 } else if (isalnum(c) || c == '_' || c == '#') { 366 } else if (isalnum(c) || c == '_' || c == '#') {
250 /* buffer the current word */ 367 /* buffer the current word */
251 cxBufferPut(wbuf, c); 368 cxBufferPut(wbuf, c);
252 } else { 369 } else {
253 /* write buffered word, if any */ 370 /* write buffered word, if any */
269 } 386 }
270 put_htmlescapedstr(dest, word); 387 put_htmlescapedstr(dest, word);
271 if (closespan) { 388 if (closespan) {
272 stop_span; 389 stop_span;
273 } 390 }
274 } 391
275 wbuf->pos = wbuf->size = 0; /* reset word buffer */ 392 /* reset word buffer */
393 wbuf->pos = wbuf->size = 0;
394
395 /* re-test current char */
396 c = src[--sp];
397 continue;
398 }
276 399
277 /* write current character */ 400 /* write current character */
278 put_htmlescaped(dest, c); 401 put_htmlescaped(dest, c);
279 } 402 }
280 } 403 }
365 put_htmlescaped(dest, c); 488 put_htmlescaped(dest, c);
366 } 489 }
367 } else { 490 } else {
368 if (isstring) { 491 if (isstring) {
369 put_htmlescaped(dest, c); 492 put_htmlescaped(dest, c);
493 } else if (wbuf->size == 0 &&
494 (isdigit(c) || c == '+' || c == '-' || c == '.')
495 ) {
496 /* might be a number */
497 size_t numlen = check_number(src+sp);
498 if (numlen > 0) {
499 cxBufferPutString(dest,
500 "<span class=\"c2html-number\">");
501 put_htmlescapedstr(dest, cx_strn(src+sp, numlen));
502 cxBufferPutString(dest, "</span>");
503 sp += numlen - 1;
504 c = src[sp];
505 continue;
506 } else {
507 /* start a new buffered word */
508 cxBufferPut(wbuf, c);
509 }
370 } else if (isalnum(c) || c == '_' || c == '@') { 510 } else if (isalnum(c) || c == '_' || c == '@') {
371 /* buffer the current word */ 511 /* buffer the current word */
372 cxBufferPut(wbuf, c); 512 cxBufferPut(wbuf, c);
373 } else { 513 } else {
374 /* write buffered word, if any */ 514 /* write buffered word, if any */
393 put_htmlescapedstr(dest, word); 533 put_htmlescapedstr(dest, word);
394 534
395 if (closespan) { 535 if (closespan) {
396 cxBufferPutString(dest, "</span>"); 536 cxBufferPutString(dest, "</span>");
397 } 537 }
398 } 538
399 wbuf->pos = wbuf->size = 0; /* reset buffer */ 539 /* reset word buffer */
540 wbuf->pos = wbuf->size = 0;
541
542 /* re-test current char */
543 c = src[--sp];
544 continue;
545 }
400 546
401 /* write current character */ 547 /* write current character */
402 put_htmlescaped(dest, c); 548 put_htmlescaped(dest, c);
403 } 549 }
404 } 550 }

mercurial