Fri, 17 Jan 2025 17:41:29 +0100
fix wrong accidental unicode escaping
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "cx/json.h" #include <string.h> #include <ctype.h> #include <assert.h> #include <stdio.h> #include <inttypes.h> /* * RFC 8259 * https://tools.ietf.org/html/rfc8259 */ static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING}; static int json_cmp_objvalue(const void *l, const void *r) { const CxJsonObjValue *left = l; const CxJsonObjValue *right = r; return cx_strcmp(cx_strcast(left->name), cx_strcast(right->name)); } static CxJsonObjValue *json_find_objvalue(const CxJsonValue *obj, cxstring name) { assert(obj->type == CX_JSON_OBJECT); CxJsonObjValue kv_dummy; kv_dummy.name = cx_mutstrn((char*) name.ptr, name.length); size_t index = cx_array_binary_search( obj->value.object.values, obj->value.object.values_size, sizeof(CxJsonObjValue), &kv_dummy, json_cmp_objvalue ); if (index == obj->value.object.values_size) { return NULL; } else { return &obj->value.object.values[index]; } } static int json_add_objvalue(CxJsonValue *objv, CxJsonObjValue member) { assert(objv->type == CX_JSON_OBJECT); const CxAllocator * const al = objv->allocator; CxJsonObject *obj = &(objv->value.object); // determine the index where we need to insert the new member size_t index = cx_array_binary_search_sup( obj->values, obj->values_size, sizeof(CxJsonObjValue), &member, json_cmp_objvalue ); // is the name already present? if (index < obj->values_size && 0 == json_cmp_objvalue(&member, &obj->values[index])) { // free the original value cx_strfree_a(al, &obj->values[index].name); cxJsonValueFree(obj->values[index].value); // replace the item obj->values[index] = member; // nothing more to do return 0; } // determine the old capacity and reserve for one more element CxArrayReallocator arealloc = cx_array_reallocator(al, NULL); size_t oldcap = obj->values_capacity; if (cx_array_simple_reserve_a(&arealloc, obj->values, 1)) return 1; // check the new capacity, if we need to realloc the index array size_t newcap = obj->values_capacity; if (newcap > oldcap) { if (cxReallocateArray(al, &obj->indices, newcap, sizeof(size_t))) { return 1; } } // check if append or insert if (index < obj->values_size) { // move the other elements memmove( &obj->values[index+1], &obj->values[index], (obj->values_size - index) * sizeof(CxJsonObjValue) ); // increase indices for the moved elements for (size_t i = 0; i < obj->values_size ; i++) { if (obj->indices[i] >= index) { obj->indices[i]++; } } } // insert the element and set the index obj->values[index] = member; obj->indices[obj->values_size] = index; obj->values_size++; return 0; } static void token_destroy(CxJsonToken *token) { if (token->allocated) { cx_strfree(&token->content); } } static int num_isexp(const char *content, size_t length, size_t pos) { if (pos >= length) { return 0; } int ok = 0; for (size_t i = pos; i < length; i++) { char c = content[i]; if (isdigit(c)) { ok = 1; } else if (i == pos) { if (!(c == '+' || c == '-')) { return 0; } } else { return 0; } } return ok; } static CxJsonTokenType token_numbertype(const char *content, size_t length) { if (length == 0) return CX_JSON_TOKEN_ERROR; if (content[0] != '-' && !isdigit(content[0])) { return CX_JSON_TOKEN_ERROR; } CxJsonTokenType type = CX_JSON_TOKEN_INTEGER; for (size_t i = 1; i < length; i++) { if (content[i] == '.') { if (type == CX_JSON_TOKEN_NUMBER) { return CX_JSON_TOKEN_ERROR; // more than one decimal separator } type = CX_JSON_TOKEN_NUMBER; } else if (content[i] == 'e' || content[i] == 'E') { return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR; } else if (!isdigit(content[i])) { return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep } } return type; } static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) { cxmutstr str = cx_mutstrn(json->buffer.space + start, end - start); bool allocated = false; if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) { allocated = true; str = cx_strcat_m(json->uncompleted.content, 1, str); if (str.ptr == NULL) { // LCOV_EXCL_START return (CxJsonToken){CX_JSON_NO_TOKEN, false, {NULL, 0}}; } // LCOV_EXCL_STOP } json->uncompleted = (CxJsonToken){0}; CxJsonTokenType ttype; if (isstring) { ttype = CX_JSON_TOKEN_STRING; } else { cxstring s = cx_strcast(str); if (!cx_strcmp(s, CX_STR("true")) || !cx_strcmp(s, CX_STR("false")) || !cx_strcmp(s, CX_STR("null"))) { ttype = CX_JSON_TOKEN_LITERAL; } else { ttype = token_numbertype(str.ptr, str.length); } } if (ttype == CX_JSON_TOKEN_ERROR) { if (allocated) { cx_strfree(&str); } return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, {NULL, 0}}; } return (CxJsonToken){ttype, allocated, str}; } static CxJsonTokenType char2ttype(char c) { switch (c) { case '[': { return CX_JSON_TOKEN_BEGIN_ARRAY; } case '{': { return CX_JSON_TOKEN_BEGIN_OBJECT; } case ']': { return CX_JSON_TOKEN_END_ARRAY; } case '}': { return CX_JSON_TOKEN_END_OBJECT; } case ':': { return CX_JSON_TOKEN_NAME_SEPARATOR; } case ',': { return CX_JSON_TOKEN_VALUE_SEPARATOR; } case '"': { return CX_JSON_TOKEN_STRING; } default: { if (isspace(c)) { return CX_JSON_TOKEN_SPACE; } } } return CX_JSON_NO_TOKEN; } static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) { // check if there is data in the buffer if (cxBufferEof(&json->buffer)) { return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ? CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; } // current token type and start index CxJsonTokenType ttype = json->uncompleted.tokentype; size_t token_part_start = json->buffer.pos; for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { char c = json->buffer.space[i]; if (ttype != CX_JSON_TOKEN_STRING) { // currently non-string token CxJsonTokenType ctype = char2ttype(c); // start of new token? if (ttype == CX_JSON_NO_TOKEN) { if (ctype == CX_JSON_TOKEN_SPACE) { json->buffer.pos++; continue; } else if (ctype == CX_JSON_TOKEN_STRING) { // begin string ttype = CX_JSON_TOKEN_STRING; token_part_start = i; } else if (ctype != CX_JSON_NO_TOKEN) { // single-char token json->buffer.pos = i + 1; *result = (CxJsonToken){ctype, false, {NULL, 0}}; return CX_JSON_NO_ERROR; } else { ttype = CX_JSON_TOKEN_LITERAL; // number or literal token_part_start = i; } } else { // finish token if (ctype != CX_JSON_NO_TOKEN) { *result = token_create(json, false, token_part_start, i); if (result->tokentype == CX_JSON_NO_TOKEN) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } if (result->tokentype == CX_JSON_TOKEN_ERROR) { return CX_JSON_FORMAT_ERROR_NUMBER; } json->buffer.pos = i; return CX_JSON_NO_ERROR; } } } else { // currently inside a string if (json->tokenizer_escape) { json->tokenizer_escape = false; } else { if (c == '"') { *result = token_create(json, true, token_part_start, i + 1); if (result->tokentype == CX_JSON_NO_TOKEN) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } json->buffer.pos = i + 1; return CX_JSON_NO_ERROR; } else if (c == '\\') { json->tokenizer_escape = true; } } } } if (ttype != CX_JSON_NO_TOKEN) { // uncompleted token size_t uncompleted_len = json->buffer.size - token_part_start; if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { // current token is uncompleted // save current token content CxJsonToken uncompleted = { ttype, true, cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len)) }; if (uncompleted.content.ptr == NULL) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } json->uncompleted = uncompleted; } else { // previously we also had an uncompleted token // combine the uncompleted token with the current token assert(json->uncompleted.allocated); cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, cx_strn(json->buffer.space + token_part_start, uncompleted_len)); if (str.ptr == NULL) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } json->uncompleted.content = str; } // advance the buffer position - we saved the stuff in the uncompleted token json->buffer.pos += uncompleted_len; } return CX_JSON_INCOMPLETE_DATA; } static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { // note: this function expects that str contains the enclosing quotes! cxmutstr result; result.length = 0; result.ptr = cxMalloc(a, str.length - 1); if (result.ptr == NULL) return result; // LCOV_EXCL_LINE bool u = false; for (size_t i = 1; i < str.length - 1; i++) { char c = str.ptr[i]; if (u) { u = false; if (c == 'n') { c = '\n'; } else if (c == 't') { c = '\t'; } else if (c == 'r') { c = '\r'; } else if (c == '\\') { c = '\\'; } else if (c == '/') { c = '/'; // always unescape, we don't need settings here } else if (c == 'f') { c = '\f'; } else if (c == 'b') { c = '\b'; } // TODO: support \uXXXX escape sequences // TODO: discuss the behavior for unrecognized escape sequences // most parsers throw an error here result.ptr[result.length++] = c; } else { if (c == '\\') { u = true; } else { result.ptr[result.length++] = c; } } } result.ptr[result.length] = 0; return result; } static cxmutstr escape_string(cxmutstr str, bool escape_slash) { // note: this function produces the string without enclosing quotes // the reason is that we don't want to allocate memory just for that CxBuffer buf = {0}; bool all_printable = true; for (size_t i = 0; i < str.length; i++) { unsigned char c = str.ptr[i]; bool escape = c < 0x20 || c == '\\' || c == '"' || (escape_slash && c == '/'); if (all_printable && escape) { size_t capa = str.length + 32; char *space = malloc(capa); if (space == NULL) return cx_mutstrn(NULL, 0); cxBufferInit(&buf, space, capa, NULL, CX_BUFFER_AUTO_EXTEND); cxBufferWrite(str.ptr, 1, i, &buf); all_printable = false; } if (escape) { cxBufferPut(&buf, '\\'); if (c == '\"') { cxBufferPut(&buf, '\"'); } else if (c == '\n') { cxBufferPut(&buf, 'n'); } else if (c == '\t') { cxBufferPut(&buf, 't'); } else if (c == '\r') { cxBufferPut(&buf, 'r'); } else if (c == '\\') { cxBufferPut(&buf, '\\'); } else if (c == '/') { cxBufferPut(&buf, '/'); } else if (c == '\f') { cxBufferPut(&buf, 'f'); } else if (c == '\b') { cxBufferPut(&buf, 'b'); } else { char code[6]; snprintf(code, sizeof(code), "u%04x", (unsigned int) c); cxBufferPutString(&buf, code); } } else if (!all_printable) { cxBufferPut(&buf, c); } } if (!all_printable) { str = cx_mutstrn(buf.space, buf.size); } cxBufferDestroy(&buf); return str; } static CxJsonValue* json_create_value(CxJson *json, CxJsonValueType type) { CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue)); if (v == NULL) return NULL; // LCOV_EXCL_LINE // initialize the value v->type = type; v->allocator = json->allocator; if (type == CX_JSON_ARRAY) { cx_array_initialize_a(json->allocator, v->value.array.array, 16); if (v->value.array.array == NULL) goto create_json_value_exit_error; // LCOV_EXCL_LINE } else if (type == CX_JSON_OBJECT) { cx_array_initialize_a(json->allocator, v->value.object.values, 16); v->value.object.indices = cxCalloc(json->allocator, 16, sizeof(size_t)); if (v->value.object.values == NULL || v->value.object.indices == NULL) goto create_json_value_exit_error; // LCOV_EXCL_LINE } // add the new value to a possible parent if (json->vbuf_size > 0) { CxJsonValue *parent = json->vbuf[json->vbuf_size - 1]; assert(parent != NULL); if (parent->type == CX_JSON_ARRAY) { CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL); if (cx_array_simple_add_a(&value_realloc, parent->value.array.array, v)) { goto create_json_value_exit_error; // LCOV_EXCL_LINE } } else if (parent->type == CX_JSON_OBJECT) { // the member was already created after parsing the name assert(json->uncompleted_member.name.ptr != NULL); json->uncompleted_member.value = v; if (json_add_objvalue(parent, json->uncompleted_member)) { goto create_json_value_exit_error; // LCOV_EXCL_LINE } json->uncompleted_member.name = (cxmutstr) {NULL, 0}; } else { assert(false); // LCOV_EXCL_LINE } } // add the new value to the stack, if it is an array or object if (type == CX_JSON_ARRAY || type == CX_JSON_OBJECT) { CxArrayReallocator vbuf_realloc = cx_array_reallocator(NULL, json->vbuf_internal); if (cx_array_simple_add_a(&vbuf_realloc, json->vbuf, v)) { goto create_json_value_exit_error; // LCOV_EXCL_LINE } } // if currently no value is parsed, this is now the value of interest if (json->parsed == NULL) { json->parsed = v; } return v; // LCOV_EXCL_START create_json_value_exit_error: cxJsonValueFree(v); return NULL; // LCOV_EXCL_STOP } #define JP_STATE_VALUE_BEGIN 0 #define JP_STATE_VALUE_END 10 #define JP_STATE_VALUE_BEGIN_OBJ 1 #define JP_STATE_OBJ_SEP_OR_CLOSE 11 #define JP_STATE_VALUE_BEGIN_AR 2 #define JP_STATE_ARRAY_SEP_OR_CLOSE 12 #define JP_STATE_OBJ_NAME_OR_CLOSE 5 #define JP_STATE_OBJ_NAME 6 #define JP_STATE_OBJ_COLON 7 void cxJsonInit(CxJson *json, const CxAllocator *allocator) { if (allocator == NULL) { allocator = cxDefaultAllocator; } memset(json, 0, sizeof(CxJson)); json->allocator = allocator; json->states = json->states_internal; json->states_capacity = cx_nmemb(json->states_internal); json->states[0] = JP_STATE_VALUE_BEGIN; json->states_size = 1; json->vbuf = json->vbuf_internal; json->vbuf_capacity = cx_nmemb(json->vbuf_internal); } void cxJsonDestroy(CxJson *json) { cxBufferDestroy(&json->buffer); if (json->states != json->states_internal) { free(json->states); } if (json->vbuf != json->vbuf_internal) { free(json->vbuf); } cxJsonValueFree(json->parsed); json->parsed = NULL; if (json->uncompleted_member.name.ptr != NULL) { cx_strfree_a(json->allocator, &json->uncompleted_member.name); json->uncompleted_member = (CxJsonObjValue){{NULL, 0}, NULL}; } } int cxJsonFilln(CxJson *json, const char *buf, size_t size) { if (cxBufferEof(&json->buffer)) { // reinitialize the buffer cxBufferDestroy(&json->buffer); cxBufferInit(&json->buffer, (char*) buf, size, NULL, CX_BUFFER_AUTO_EXTEND | CX_BUFFER_COPY_ON_WRITE); json->buffer.size = size; return 0; } else { return size != cxBufferAppend(buf, 1, size, &json->buffer); } } static void json_add_state(CxJson *json, int state) { // we have guaranteed the necessary space with cx_array_simple_reserve() // therefore, we can safely add the state in the simplest way possible json->states[json->states_size++] = state; } #define return_rec(code) \ token_destroy(&token); \ return code static enum cx_json_status json_parse(CxJson *json) { // Reserve a pointer for a possibly read value CxJsonValue *vbuf = NULL; // grab the next token CxJsonToken token; { enum cx_json_status ret = token_parse_next(json, &token); if (ret != CX_JSON_NO_ERROR) { return ret; } } // pop the current state assert(json->states_size > 0); int state = json->states[--json->states_size]; // guarantee that at least two more states fit on the stack CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal); if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) { return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE } // 0 JP_STATE_VALUE_BEGIN value begin // 10 JP_STATE_VALUE_END expect value end // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) // 11 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) // 12 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose // 5 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose // 6 JP_STATE_OBJ_NAME object, expect name // 7 JP_STATE_OBJ_COLON object, expect ':' if (state < 3) { // push expected end state to the stack json_add_state(json, 10 + state); switch (token.tokentype) { case CX_JSON_TOKEN_BEGIN_ARRAY: { if (json_create_value(json, CX_JSON_ARRAY) == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } json_add_state(json, JP_STATE_VALUE_BEGIN_AR); return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_BEGIN_OBJECT: { if (json_create_value(json, CX_JSON_OBJECT) == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE); return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_STRING: { if ((vbuf = json_create_value(json, CX_JSON_STRING)) == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } cxmutstr str = unescape_string(json->allocator, token.content); if (str.ptr == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } vbuf->value.string = str; return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_INTEGER: case CX_JSON_TOKEN_NUMBER: { int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER; if (NULL == (vbuf = json_create_value(json, type))) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } if (type == CX_JSON_INTEGER) { if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) { return_rec(CX_JSON_FORMAT_ERROR_NUMBER); } } else { if (cx_strtod(token.content, &vbuf->value.number)) { return_rec(CX_JSON_FORMAT_ERROR_NUMBER); } } return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_LITERAL: { if ((vbuf = json_create_value(json, CX_JSON_LITERAL)) == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) { vbuf->value.literal = CX_JSON_TRUE; } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) { vbuf->value.literal = CX_JSON_FALSE; } else { vbuf->value.literal = CX_JSON_NULL; } return_rec(CX_JSON_NO_ERROR); } default: { return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { // expect ',' or ']' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_VALUE_BEGIN_AR); return_rec(CX_JSON_NO_ERROR); } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { // discard the array from the value buffer json->vbuf_size--; return_rec(CX_JSON_NO_ERROR); } else { return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; return_rec(CX_JSON_NO_ERROR); } else { // expect string if (token.tokentype != CX_JSON_TOKEN_STRING) { return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } // add new entry cxmutstr name = unescape_string(json->allocator, token.content); if (name.ptr == NULL) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE } assert(json->uncompleted_member.name.ptr == NULL); json->uncompleted_member.name = name; assert(json->vbuf_size > 0); // next state json_add_state(json, JP_STATE_OBJ_COLON); return_rec(CX_JSON_NO_ERROR); } } else if (state == JP_STATE_OBJ_COLON) { // expect ':' if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) { return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } // next state json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ); return_rec(CX_JSON_NO_ERROR); } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) { // expect ',' or '}' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_OBJ_NAME); return_rec(CX_JSON_NO_ERROR); } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; return_rec(CX_JSON_NO_ERROR); } else { return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } else { // should be unreachable assert(false); return_rec(-1); } } CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) { // check if buffer has been filled if (json->buffer.space == NULL) { return CX_JSON_NULL_DATA; } // initialize output value *value = &cx_json_value_nothing; // parse data CxJsonStatus result; do { result = json_parse(json); if (result == CX_JSON_NO_ERROR && json->states_size == 1) { // final state reached assert(json->states[0] == JP_STATE_VALUE_END); assert(json->vbuf_size == 0); // write output value *value = json->parsed; json->parsed = NULL; // re-initialize state machine json->states[0] = JP_STATE_VALUE_BEGIN; return CX_JSON_NO_ERROR; } } while (result == CX_JSON_NO_ERROR); // the parser might think there is no data // but when we did not reach the final state, // we know that there must be more to come if (result == CX_JSON_NO_DATA && json->states_size > 1) { return CX_JSON_INCOMPLETE_DATA; } return result; } void cxJsonValueFree(CxJsonValue *value) { if (value == NULL || value->type == CX_JSON_NOTHING) return; switch (value->type) { case CX_JSON_OBJECT: { CxJsonObject obj = value->value.object; for (size_t i = 0; i < obj.values_size; i++) { cxJsonValueFree(obj.values[i].value); cx_strfree_a(value->allocator, &obj.values[i].name); } cxFree(value->allocator, obj.values); cxFree(value->allocator, obj.indices); break; } case CX_JSON_ARRAY: { CxJsonArray array = value->value.array; for (size_t i = 0; i < array.array_size; i++) { cxJsonValueFree(array.array[i]); } cxFree(value->allocator, array.array); break; } case CX_JSON_STRING: { cxFree(value->allocator, value->value.string.ptr); break; } default: { break; } } cxFree(value->allocator, value); } CxJsonValue* cxJsonCreateObj(const CxAllocator* allocator) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_OBJECT; cx_array_initialize_a(allocator, v->value.object.values, 16); if (v->value.object.values == NULL) { // LCOV_EXCL_START cxFree(allocator, v); return NULL; // LCOV_EXCL_STOP } v->value.object.indices = cxCalloc(allocator, 16, sizeof(size_t)); if (v->value.object.indices == NULL) { // LCOV_EXCL_START cxFree(allocator, v->value.object.values); cxFree(allocator, v); return NULL; // LCOV_EXCL_STOP } return v; } CxJsonValue* cxJsonCreateArr(const CxAllocator* allocator) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_ARRAY; cx_array_initialize_a(allocator, v->value.array.array, 16); if (v->value.array.array == NULL) { cxFree(allocator, v); return NULL; } return v; } CxJsonValue* cxJsonCreateNumber(const CxAllocator* allocator, double num) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_NUMBER; v->value.number = num; return v; } CxJsonValue* cxJsonCreateInteger(const CxAllocator* allocator, int64_t num) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_INTEGER; v->value.integer = num; return v; } CxJsonValue* cxJsonCreateString(const CxAllocator* allocator, const char* str) { return cxJsonCreateCxString(allocator, cx_str(str)); } CxJsonValue* cxJsonCreateCxString(const CxAllocator* allocator, cxstring str) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_STRING; cxmutstr s = cx_strdup_a(allocator, str); if (s.ptr == NULL) { cxFree(allocator, v); return NULL; } v->value.string = s; return v; } CxJsonValue* cxJsonCreateLiteral(const CxAllocator* allocator, CxJsonLiteral lit) { if (allocator == NULL) allocator = cxDefaultAllocator; CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue)); if (v == NULL) return NULL; v->allocator = allocator; v->type = CX_JSON_LITERAL; v->value.literal = lit; return v; } // LCOV_EXCL_START // never called as long as malloc() does not return NULL static void json_arr_free_temp(CxJsonValue** values, size_t count) { for (size_t i = 0; i < count; i++) { if (values[i] == NULL) break; cxJsonValueFree(values[i]); } free(values); } // LCOV_EXCL_STOP int cxJsonArrAddNumbers(CxJsonValue* arr, const double* num, size_t count) { CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); if (values == NULL) return -1; for (size_t i = 0; i < count; i++) { values[i] = cxJsonCreateNumber(arr->allocator, num[i]); if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; } } int ret = cxJsonArrAddValues(arr, values, count); free(values); return ret; } int cxJsonArrAddIntegers(CxJsonValue* arr, const int64_t* num, size_t count) { CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); if (values == NULL) return -1; for (size_t i = 0; i < count; i++) { values[i] = cxJsonCreateInteger(arr->allocator, num[i]); if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; } } int ret = cxJsonArrAddValues(arr, values, count); free(values); return ret; } int cxJsonArrAddStrings(CxJsonValue* arr, const char* const* str, size_t count) { CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); if (values == NULL) return -1; for (size_t i = 0; i < count; i++) { values[i] = cxJsonCreateString(arr->allocator, str[i]); if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; } } int ret = cxJsonArrAddValues(arr, values, count); free(values); return ret; } int cxJsonArrAddCxStrings(CxJsonValue* arr, const cxstring* str, size_t count) { CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); if (values == NULL) return -1; for (size_t i = 0; i < count; i++) { values[i] = cxJsonCreateCxString(arr->allocator, str[i]); if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; } } int ret = cxJsonArrAddValues(arr, values, count); free(values); return ret; } int cxJsonArrAddLiterals(CxJsonValue* arr, const CxJsonLiteral* lit, size_t count) { CxJsonValue** values = calloc(count, sizeof(CxJsonValue*)); if (values == NULL) return -1; for (size_t i = 0; i < count; i++) { values[i] = cxJsonCreateLiteral(arr->allocator, lit[i]); if (values[i] == NULL) { json_arr_free_temp(values, count); return -1; } } int ret = cxJsonArrAddValues(arr, values, count); free(values); return ret; } int cxJsonArrAddValues(CxJsonValue* arr, CxJsonValue* const* val, size_t count) { CxArrayReallocator value_realloc = cx_array_reallocator(arr->allocator, NULL); assert(arr->type == CX_JSON_ARRAY); return cx_array_simple_copy_a(&value_realloc, arr->value.array.array, arr->value.array.array_size, val, count ); } int cxJsonObjPut(CxJsonValue* obj, cxstring name, CxJsonValue* child) { cxmutstr k = cx_strdup_a(obj->allocator, name); if (k.ptr == NULL) return -1; CxJsonObjValue kv = {k, child}; if (json_add_objvalue(obj, kv)) { cx_strfree_a(obj->allocator, &k); return 1; } else { return 0; } } CxJsonValue* cxJsonObjPutObj(CxJsonValue* obj, cxstring name) { CxJsonValue* v = cxJsonCreateObj(obj->allocator); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutArr(CxJsonValue* obj, cxstring name) { CxJsonValue* v = cxJsonCreateArr(obj->allocator); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutNumber(CxJsonValue* obj, cxstring name, double num) { CxJsonValue* v = cxJsonCreateNumber(obj->allocator, num); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutInteger(CxJsonValue* obj, cxstring name, int64_t num) { CxJsonValue* v = cxJsonCreateInteger(obj->allocator, num); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutString(CxJsonValue* obj, cxstring name, const char* str) { CxJsonValue* v = cxJsonCreateString(obj->allocator, str); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutCxString(CxJsonValue* obj, cxstring name, cxstring str) { CxJsonValue* v = cxJsonCreateCxString(obj->allocator, str); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; } return v; } CxJsonValue* cxJsonObjPutLiteral(CxJsonValue* obj, cxstring name, CxJsonLiteral lit) { CxJsonValue* v = cxJsonCreateLiteral(obj->allocator, lit); if (v == NULL) return NULL; if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL;} return v; } CxJsonValue *cxJsonArrGet(const CxJsonValue *value, size_t index) { if (index >= value->value.array.array_size) { return &cx_json_value_nothing; } return value->value.array.array[index]; } CxIterator cxJsonArrIter(const CxJsonValue *value) { return cxIteratorPtr( value->value.array.array, value->value.array.array_size ); } CxIterator cxJsonObjIter(const CxJsonValue *value) { return cxIterator( value->value.object.values, sizeof(CxJsonObjValue), value->value.object.values_size ); } CxJsonValue *cx_json_obj_get_cxstr(const CxJsonValue *value, cxstring name) { CxJsonObjValue *member = json_find_objvalue(value, name); if (member == NULL) { return &cx_json_value_nothing; } else { return member->value; } } CxJsonWriter cxJsonWriterCompact(void) { return (CxJsonWriter) { false, true, 6, false, 4, false }; } CxJsonWriter cxJsonWriterPretty(bool use_spaces) { return (CxJsonWriter) { true, true, 6, use_spaces, 4, false }; } static int cx_json_writer_indent( void *target, cx_write_func wfunc, const CxJsonWriter *settings, unsigned int depth ) { if (depth == 0) return 0; // determine the width and characters to use const char* indent; // for 32 prepared chars size_t width = depth; if (settings->indent_space) { if (settings->indent == 0) return 0; width *= settings->indent; indent = " "; } else { indent = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; } // calculate the number of write calls and write size_t full = width / 32; size_t remaining = width % 32; for (size_t i = 0; i < full; i++) { if (32 != wfunc(indent, 1, 32, target)) return 1; } if (remaining != wfunc(indent, 1, remaining, target)) return 1; return 0; } int cx_json_write_rec( void *target, const CxJsonValue *value, cx_write_func wfunc, const CxJsonWriter *settings, unsigned int depth ) { // keep track of written items // the idea is to reduce the number of jumps for error checking size_t actual = 0, expected = 0; // small buffer for number to string conversions char numbuf[40]; // recursively write the values switch (value->type) { case CX_JSON_OBJECT: { const char *begin_obj = "{\n"; if (settings->pretty) { actual += wfunc(begin_obj, 1, 2, target); expected += 2; } else { actual += wfunc(begin_obj, 1, 1, target); expected++; } depth++; size_t elem_count = value->value.object.values_size; for (size_t look_idx = 0; look_idx < elem_count; look_idx++) { // get the member either via index array or directly size_t elem_idx = settings->sort_members ? look_idx : value->value.object.indices[look_idx]; CxJsonObjValue *member = &value->value.object.values[elem_idx]; if (settings->sort_members) { depth++;depth--; } // possible indentation if (settings->pretty) { if (cx_json_writer_indent(target, wfunc, settings, depth)) { return 1; // LCOV_EXCL_LINE } } // the name actual += wfunc("\"", 1, 1, target); cxmutstr name = escape_string(member->name, settings->escape_slash); actual += wfunc(name.ptr, 1, name.length, target); if (name.ptr != member->name.ptr) { cx_strfree(&name); } actual += wfunc("\"", 1, 1, target); const char *obj_name_sep = ": "; if (settings->pretty) { actual += wfunc(obj_name_sep, 1, 2, target); expected += 4 + member->name.length; } else { actual += wfunc(obj_name_sep, 1, 1, target); expected += 3 + member->name.length; } // the value if (cx_json_write_rec(target, member->value, wfunc, settings, depth)) return 1; // end of object-value if (look_idx < elem_count - 1) { const char *obj_value_sep = ",\n"; if (settings->pretty) { actual += wfunc(obj_value_sep, 1, 2, target); expected += 2; } else { actual += wfunc(obj_value_sep, 1, 1, target); expected++; } } else { if (settings->pretty) { actual += wfunc("\n", 1, 1, target); expected ++; } } } depth--; if (settings->pretty) { if (cx_json_writer_indent(target, wfunc, settings, depth)) return 1; } actual += wfunc("}", 1, 1, target); expected++; break; } case CX_JSON_ARRAY: { actual += wfunc("[", 1, 1, target); expected++; CxIterator iter = cxJsonArrIter(value); cx_foreach(CxJsonValue*, element, iter) { if (cx_json_write_rec( target, element, wfunc, settings, depth) ) return 1; if (iter.index < iter.elem_count - 1) { const char *arr_value_sep = ", "; if (settings->pretty) { actual += wfunc(arr_value_sep, 1, 2, target); expected += 2; } else { actual += wfunc(arr_value_sep, 1, 1, target); expected++; } } } actual += wfunc("]", 1, 1, target); expected++; break; } case CX_JSON_STRING: { actual += wfunc("\"", 1, 1, target); cxmutstr str = escape_string(value->value.string, settings->escape_slash); actual += wfunc(str.ptr, 1, str.length, target); if (str.ptr != value->value.string.ptr) { cx_strfree(&str); } actual += wfunc("\"", 1, 1, target); expected += 2 + value->value.string.length; break; } case CX_JSON_NUMBER: { int precision = settings->frac_max_digits; // because of the way how %g is defined, we need to // double the precision and truncate ourselves precision = 1 + (precision > 15 ? 30 : 2 * precision); snprintf(numbuf, 40, "%.*g", precision, value->value.number); char *dot, *exp; unsigned char max_digits; // find the decimal separator and hope that it's one of . or , dot = strchr(numbuf, '.'); if (dot == NULL) { dot = strchr(numbuf, ','); } if (dot == NULL) { // no decimal separator found // output everything until a possible exponent max_digits = 30; dot = numbuf; } else { // found a decimal separator // output everything until the separator // and set max digits to what the settings say size_t len = dot - numbuf; actual += wfunc(numbuf, 1, len, target); expected += len; max_digits = settings->frac_max_digits; if (max_digits > 15) { max_digits = 15; } // locale independent separator if (max_digits > 0) { actual += wfunc(".", 1, 1, target); expected++; } dot++; } // find the exponent exp = strchr(dot, 'e'); if (exp == NULL) { // no exponent - output the rest if (max_digits > 0) { size_t len = strlen(dot); if (len > max_digits) { len = max_digits; } actual += wfunc(dot, 1, len, target); expected += len; } } else { // exponent found - truncate the frac digits // and then output the rest if (max_digits > 0) { size_t len = exp - dot - 1; if (len > max_digits) { len = max_digits; } actual += wfunc(dot, 1, len, target); expected += len; } actual += wfunc("e", 1, 1, target); expected++; exp++; size_t len = strlen(exp); actual += wfunc(exp, 1, len, target); expected += len; } break; } case CX_JSON_INTEGER: { snprintf(numbuf, 32, "%" PRIi64, value->value.integer); size_t len = strlen(numbuf); actual += wfunc(numbuf, 1, len, target); expected += len; break; } case CX_JSON_LITERAL: { if (value->value.literal == CX_JSON_TRUE) { actual += wfunc("true", 1, 4, target); expected += 4; } else if (value->value.literal == CX_JSON_FALSE) { actual += wfunc("false", 1, 5, target); expected += 5; } else { actual += wfunc("null", 1, 4, target); expected += 4; } break; } case CX_JSON_NOTHING: { // deliberately supported as an empty string! // users might want to just write the result // of a get operation without testing the value // and therefore this should not blow up break; } default: assert(false); // LCOV_EXCL_LINE } return expected != actual; } int cxJsonWrite( void *target, const CxJsonValue *value, cx_write_func wfunc, const CxJsonWriter *settings ) { assert(target != NULL); assert(value != NULL); assert(wfunc != NULL); CxJsonWriter writer_default = cxJsonWriterCompact(); if (settings == NULL) { settings = &writer_default; } return cx_json_write_rec(target, value, wfunc, settings, 0); }