--- a/src/json.c Wed Jan 22 21:02:46 2025 +0100 +++ b/src/json.c Sat Jan 25 16:13:28 2025 +0100 @@ -353,6 +353,24 @@ return CX_JSON_INCOMPLETE_DATA; } +static int codepoint_to_utf8(uint32_t codepoint, char *output_buf) { + if (codepoint <= 0x7F) { + *output_buf = (char)codepoint; + return 1; + } else if (codepoint <= 0x7FF) { + output_buf[0] = (char)(0xC0 | ((codepoint >> 6) & 0x1F)); + output_buf[1] = (char)(0x80 | (codepoint & 0x3F)); + return 2; + } else if (codepoint <= 0xFFFF) { + output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F)); + output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F)); + output_buf[2] = (char)(0x80 | (codepoint & 0x3F)); + return 3; + } + + return 0; +} + static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { // note: this function expects that str contains the enclosing quotes! @@ -380,8 +398,27 @@ c = '\f'; } else if (c == 'b') { c = '\b'; + } else if (c == 'u') { + if (i+4 < str.length) { + cxstring codepoint_str = { str.ptr + i + 1, 4}; + uint32_t codepoint; + if(!cx_strtou32_lc_(codepoint_str, &codepoint, 16, "")) { + char utf8buf[4]; + int utf8len = codepoint_to_utf8(codepoint, utf8buf); + if(utf8len > 0) { + // add all bytes from utf8buf expect the last char + // to the result + utf8len--; + c = utf8buf[utf8len]; + for(int i=0;i<utf8len;i++) { + result.ptr[result.length++] = utf8buf[i]; + } + } + i += 4; + } + } } - // TODO: support \uXXXX escape sequences + // TODO: discuss the behavior for unrecognized escape sequences // most parsers throw an error here result.ptr[result.length++] = c;