diff -r 7fd2672199d7 -r 49ab92de9a13 src/json.c --- a/src/json.c Sat Jan 11 12:33:10 2025 +0100 +++ b/src/json.c Sat Jan 11 12:56:54 2025 +0100 @@ -252,7 +252,7 @@ // current token type and start index CxJsonTokenType ttype = json->uncompleted.tokentype; - size_t token_start = json->buffer.pos; + size_t token_part_start = json->buffer.pos; for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { char c = json->buffer.space[i]; @@ -266,7 +266,7 @@ } else if (ctype == CX_JSON_TOKEN_STRING) { // begin string ttype = CX_JSON_TOKEN_STRING; - token_start = i; + token_part_start = i; } else if (ctype != CX_JSON_NO_TOKEN) { // single-char token json->buffer.pos = i + 1; @@ -274,12 +274,12 @@ return CX_JSON_NO_ERROR; } else { ttype = CX_JSON_TOKEN_LITERAL; // number or literal - token_start = i; + token_part_start = i; } } else { // finish token if (ctype != CX_JSON_NO_TOKEN) { - *result = token_create(json, false, token_start, i); + *result = token_create(json, false, token_part_start, i); if (result->tokentype == CX_JSON_NO_TOKEN) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } @@ -296,7 +296,7 @@ json->tokenizer_escape = false; } else { if (c == '"') { - *result = token_create(json, true, token_start, i + 1); + *result = token_create(json, true, token_part_start, i + 1); if (result->tokentype == CX_JSON_NO_TOKEN) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } @@ -311,13 +311,13 @@ if (ttype != CX_JSON_NO_TOKEN) { // uncompleted token - size_t uncompleted_len = json->buffer.size - token_start; + size_t uncompleted_len = json->buffer.size - token_part_start; if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { // current token is uncompleted // save current token content CxJsonToken uncompleted = { ttype, true, - cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len)) + cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len)) }; if (uncompleted.content.ptr == NULL) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE @@ -328,7 +328,7 @@ // combine the uncompleted token with the current token assert(json->uncompleted.allocated); cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, - cx_strn(json->buffer.space + token_start, uncompleted_len)); + cx_strn(json->buffer.space + token_part_start, uncompleted_len)); if (str.ptr == NULL) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } @@ -342,8 +342,8 @@ } static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { - // TODO: support more escape sequences - // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed + // note: this function expects that str contains the enclosing quotes! + cxmutstr result; result.length = 0; result.ptr = cxMalloc(a, str.length - 1); @@ -358,7 +358,20 @@ c = '\n'; } else if (c == 't') { c = '\t'; + } else if (c == 'r') { + c = '\r'; + } else if (c == '\\') { + c = '\\'; + } else if (c == '/') { + c = '/'; // always unescape, we don't need settings here + } else if (c == 'f') { + c = '\f'; + } else if (c == 'b') { + c = '\b'; } + // TODO: support \uXXXX escape sequences + // TODO: discuss the behavior for unrecognized escape sequences + // most parsers throw an error here result.ptr[result.length++] = c; } else { if (c == '\\') { @@ -374,6 +387,8 @@ } static cxmutstr escape_string(cxmutstr str) { + // note: this function produces the string without enclosing quotes + // the reason is that we don't want to allocate memory just for that CxBuffer buf = {0}; bool all_printable = true;