src/json.c

Fri, 10 Jan 2025 23:11:08 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 10 Jan 2025 23:11:08 +0100
changeset 1119
ff4d7e76f85a
parent 1117
54df904472b0
child 1121
7fd2672199d7
permissions
-rw-r--r--

implement string escape - resolves #526

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/json.h"
#include "cx/compare.h"

#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <stdio.h>
#include <errno.h>
#include <inttypes.h>

/*
 * RFC 8259
 * https://tools.ietf.org/html/rfc8259
 */

static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING};

static int json_cmp_objvalue(const void *l, const void *r) {
    const CxJsonObjValue *left = l;
    const CxJsonObjValue *right = r;
    return cx_strcmp(cx_strcast(left->name), cx_strcast(right->name));
}

static CxJsonObjValue *json_find_objvalue(const CxJsonValue *obj, cxstring name) {
    assert(obj->type == CX_JSON_OBJECT);
    CxJsonObjValue kv_dummy;
    kv_dummy.name = cx_mutstrn((char*) name.ptr, name.length);
    size_t index = cx_array_binary_search(
            obj->value.object.values,
            obj->value.object.values_size,
            sizeof(CxJsonObjValue),
            &kv_dummy,
            json_cmp_objvalue
    );
    if (index == obj->value.object.values_size) {
        return NULL;
    } else {
        return &obj->value.object.values[index];
    }
}

static int json_add_objvalue(CxJsonValue *objv, CxJsonObjValue member) {
    assert(objv->type == CX_JSON_OBJECT);
    const CxAllocator * const al = objv->allocator;
    CxJsonObject *obj = &(objv->value.object);

    // determine the index where we need to insert the new member
    size_t index = cx_array_binary_search_sup(
        obj->values,
        obj->values_size,
        sizeof(CxJsonObjValue),
        &member, json_cmp_objvalue
    );

    // is the name already present?
    if (index < obj->values_size && 0 == json_cmp_objvalue(&member, &obj->values[index])) {
        // free the original value
        cx_strfree_a(al, &obj->values[index].name);
        cxJsonValueFree(obj->values[index].value);
        // replace the item
        obj->values[index] = member;

        // nothing more to do
        return 0;
    }

    // determine the old capacity and reserve for one more element
    CxArrayReallocator arealloc = cx_array_reallocator(al, NULL);
    size_t oldcap = obj->values_capacity;
    if (cx_array_simple_reserve_a(&arealloc, obj->values, 1)) return 1;

    // check the new capacity, if we need to realloc the index array
    size_t newcap = obj->values_capacity;
    if (newcap > oldcap) {
        if (cxReallocateArray(al, &obj->indices, newcap, sizeof(size_t))) {
            return 1;
        }
    }

    // check if append or insert
    if (index < obj->values_size) {
        // move the other elements
        memmove(
            &obj->values[index+1],
            &obj->values[index],
            (obj->values_size - index) * sizeof(CxJsonObjValue)
        );
        // increase indices for the moved elements
        for (size_t i = 0; i < obj->values_size ; i++) {
            if (obj->indices[i] >= index) {
                obj->indices[i]++;
            }
        }
    }

    // insert the element and set the index
    obj->values[index] = member;
    obj->indices[obj->values_size] = index;
    obj->values_size++;

    return 0;
}

static void token_destroy(CxJsonToken *token) {
    if (token->allocated) {
        cx_strfree(&token->content);
    }
}

static int num_isexp(const char *content, size_t length, size_t pos) {
    if (pos >= length) {
        return 0;
    }

    int ok = 0;
    for (size_t i = pos; i < length; i++) {
        char c = content[i];
        if (isdigit(c)) {
            ok = 1;
        } else if (i == pos) {
            if (!(c == '+' || c == '-')) {
                return 0;
            }
        } else {
            return 0;
        }
    }

    return ok;
}

static CxJsonTokenType token_numbertype(const char *content, size_t length) {
    if (length == 0) return CX_JSON_TOKEN_ERROR;

    if (content[0] != '-' && !isdigit(content[0])) {
        return CX_JSON_TOKEN_ERROR;
    }

    CxJsonTokenType type = CX_JSON_TOKEN_INTEGER;
    for (size_t i = 1; i < length; i++) {
        if (content[i] == '.') {
            if (type == CX_JSON_TOKEN_NUMBER) {
                return CX_JSON_TOKEN_ERROR; // more than one decimal separator
            }
            type = CX_JSON_TOKEN_NUMBER;
        } else if (content[i] == 'e' || content[i] == 'E') {
            return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR;
        } else if (!isdigit(content[i])) {
            return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep
        }
    }

    return type;
}

static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) {
    cxmutstr str = cx_mutstrn(json->buffer.space + start, end - start);
    bool allocated = false;
    if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) {
        allocated = true;
        str = cx_strcat_m(json->uncompleted.content, 1, str);
        if (str.ptr == NULL) { // LCOV_EXCL_START
            return (CxJsonToken){CX_JSON_NO_TOKEN, false, {NULL, 0}};
        } // LCOV_EXCL_STOP
    }
    json->uncompleted = (CxJsonToken){0};
    CxJsonTokenType ttype;
    if (isstring) {
        ttype = CX_JSON_TOKEN_STRING;
    } else {
        cxstring s = cx_strcast(str);
        if (!cx_strcmp(s, CX_STR("true")) || !cx_strcmp(s, CX_STR("false"))
            || !cx_strcmp(s, CX_STR("null"))) {
            ttype = CX_JSON_TOKEN_LITERAL;
        } else {
            ttype = token_numbertype(str.ptr, str.length);
        }
    }
    if (ttype == CX_JSON_TOKEN_ERROR) {
        if (allocated) {
            cx_strfree(&str);
        }
        return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, {NULL, 0}};
    }
    return (CxJsonToken){ttype, allocated, str};
}

static CxJsonTokenType char2ttype(char c) {
    switch (c) {
        case '[': {
            return CX_JSON_TOKEN_BEGIN_ARRAY;
        }
        case '{': {
            return CX_JSON_TOKEN_BEGIN_OBJECT;
        }
        case ']': {
            return CX_JSON_TOKEN_END_ARRAY;
        }
        case '}': {
            return CX_JSON_TOKEN_END_OBJECT;
        }
        case ':': {
            return CX_JSON_TOKEN_NAME_SEPARATOR;
        }
        case ',': {
            return CX_JSON_TOKEN_VALUE_SEPARATOR;
        }
        case '"': {
            return CX_JSON_TOKEN_STRING;
        }
        default: {
            if (isspace(c)) {
                return CX_JSON_TOKEN_SPACE;
            }
        }
    }
    return CX_JSON_NO_TOKEN;
}

static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) {
    // check if there is data in the buffer
    if (cxBufferEof(&json->buffer)) {
        return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ?
            CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
    }

    // current token type and start index
    CxJsonTokenType ttype = json->uncompleted.tokentype;
    size_t token_start = json->buffer.pos;

    for (size_t i = json->buffer.pos; i < json->buffer.size; i++) {
        char c = json->buffer.space[i];
        if (ttype != CX_JSON_TOKEN_STRING) {
            // currently non-string token
            CxJsonTokenType ctype = char2ttype(c); // start of new token?
            if (ttype == CX_JSON_NO_TOKEN) {
                if (ctype == CX_JSON_TOKEN_SPACE) {
                    json->buffer.pos++;
                    continue;
                } else if (ctype == CX_JSON_TOKEN_STRING) {
                    // begin string
                    ttype = CX_JSON_TOKEN_STRING;
                    token_start = i;
                } else if (ctype != CX_JSON_NO_TOKEN) {
                    // single-char token
                    json->buffer.pos = i + 1;
                    *result = (CxJsonToken){ctype, false, {NULL, 0}};
                    return CX_JSON_NO_ERROR;
                } else {
                    ttype = CX_JSON_TOKEN_LITERAL; // number or literal
                    token_start = i;
                }
            } else {
                // finish token
                if (ctype != CX_JSON_NO_TOKEN) {
                    *result = token_create(json, false, token_start, i);
                    if (result->tokentype == CX_JSON_NO_TOKEN) {
                        return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
                    }
                    if (result->tokentype == CX_JSON_TOKEN_ERROR) {
                        return CX_JSON_FORMAT_ERROR_NUMBER;
                    }
                    json->buffer.pos = i;
                    return CX_JSON_NO_ERROR;
                }
            }
        } else {
            // currently inside a string
            if (json->tokenizer_escape) {
                json->tokenizer_escape = false;
            } else {
                if (c == '"') {
                    *result = token_create(json, true, token_start, i + 1);
                    if (result->tokentype == CX_JSON_NO_TOKEN) {
                        return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
                    }
                    json->buffer.pos = i + 1;
                    return CX_JSON_NO_ERROR;
                } else if (c == '\\') {
                    json->tokenizer_escape = true;
                }
            }
        }
    }

    if (ttype != CX_JSON_NO_TOKEN) {
        // uncompleted token
        size_t uncompleted_len = json->buffer.size - token_start;
        if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
            // current token is uncompleted
            // save current token content
            CxJsonToken uncompleted = {
                ttype, true,
                cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len))
            };
            if (uncompleted.content.ptr == NULL) {
                return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
            }
            json->uncompleted = uncompleted;
        } else {
            // previously we also had an uncompleted token
            // combine the uncompleted token with the current token
            assert(json->uncompleted.allocated);
            cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
                cx_strn(json->buffer.space + token_start, uncompleted_len));
            if (str.ptr == NULL) {
                return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
            }
            json->uncompleted.content = str;
        }
        // advance the buffer position - we saved the stuff in the uncompleted token
        json->buffer.pos += uncompleted_len;
    }

    return CX_JSON_INCOMPLETE_DATA;
}

static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
    // TODO: support more escape sequences
    // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed
    cxmutstr result;
    result.length = 0;
    result.ptr = cxMalloc(a, str.length - 1);
    if (result.ptr == NULL) return result; // LCOV_EXCL_LINE

    bool u = false;
    for (size_t i = 1; i < str.length - 1; i++) {
        char c = str.ptr[i];
        if (u) {
            u = false;
            if (c == 'n') {
                c = '\n';
            } else if (c == 't') {
                c = '\t';
            }
            result.ptr[result.length++] = c;
        } else {
            if (c == '\\') {
                u = true;
            } else {
                result.ptr[result.length++] = c;
            }
        }
    }
    result.ptr[result.length] = 0;

    return result;
}

static cxmutstr escape_string(cxmutstr str) {
    CxBuffer buf = {0};

    bool all_printable = true;
    for (size_t i = 0; i < str.length; i++) {
        bool escape = !isprint(str.ptr[i])
            || str.ptr[i] == '\\'
            || str.ptr[i] == '"'
            // TODO: make escaping slash optional
            || str.ptr[i] == '/';

        if (all_printable && escape) {
            size_t capa = str.length + 32;
            char *space = malloc(capa);
            if (space == NULL) return cx_mutstrn(NULL, 0);
            cxBufferInit(&buf, space, capa, NULL, CX_BUFFER_AUTO_EXTEND);
            cxBufferWrite(str.ptr, 1, i, &buf);
            all_printable = false;
        }
        if (escape) {
            cxBufferPut(&buf, '\\');
            if (str.ptr[i] == '\"') {
                cxBufferPut(&buf, '\"');
            } else if (str.ptr[i] == '\n') {
                cxBufferPut(&buf, 'n');
            } else if (str.ptr[i] == '\t') {
                cxBufferPut(&buf, 't');
            } else if (str.ptr[i] == '\r') {
                cxBufferPut(&buf, 'r');
            } else if (str.ptr[i] == '\\') {
                cxBufferPut(&buf, '\\');
            } else if (str.ptr[i] == '/') {
                cxBufferPut(&buf, '/');
            } else if (str.ptr[i] == '\f') {
                cxBufferPut(&buf, 'f');
            } else if (str.ptr[i] == '\b') {
                cxBufferPut(&buf, 'b');
            } else {
                char code[6];
                snprintf(code, sizeof(code), "u%04x",
                    (unsigned int)(0xff & str.ptr[i]));
                cxBufferPutString(&buf, code);
            }
        } else if (!all_printable) {
            cxBufferPut(&buf, str.ptr[i]);
        }
    }
    if (!all_printable) {
        str = cx_mutstrn(buf.space, buf.size);
    }
    cxBufferDestroy(&buf);
    return str;
}

static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
    CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue));
    if (v == NULL) return NULL; // LCOV_EXCL_LINE

    // initialize the value
    v->type = type;
    v->allocator = json->allocator;
    if (type == CX_JSON_ARRAY) {
        cx_array_initialize_a(json->allocator, v->value.array.array, 16);
        if (v->value.array.array == NULL) goto create_json_value_exit_error; // LCOV_EXCL_LINE
    } else if (type == CX_JSON_OBJECT) {
        cx_array_initialize_a(json->allocator, v->value.object.values, 16);
        v->value.object.indices = cxCalloc(json->allocator, 16, sizeof(size_t));
        if (v->value.object.values == NULL ||
            v->value.object.indices == NULL)
            goto create_json_value_exit_error; // LCOV_EXCL_LINE
    }

    // add the new value to a possible parent
    if (json->vbuf_size > 0) {
        CxJsonValue *parent = json->vbuf[json->vbuf_size - 1];
        assert(parent != NULL);
        if (parent->type == CX_JSON_ARRAY) {
            CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL);
            if (cx_array_simple_add_a(&value_realloc, parent->value.array.array, v)) {
                goto create_json_value_exit_error; // LCOV_EXCL_LINE
            }
        } else if (parent->type == CX_JSON_OBJECT) {
            // the member was already created after parsing the name
            assert(json->uncompleted_member.name.ptr != NULL);
            json->uncompleted_member.value = v;
            if (json_add_objvalue(parent, json->uncompleted_member))  {
                goto create_json_value_exit_error; // LCOV_EXCL_LINE
            }
            json->uncompleted_member.name = (cxmutstr) {NULL, 0};
        } else {
            assert(false); // LCOV_EXCL_LINE
        }
    }

    // add the new value to the stack, if it is an array or object
    if (type == CX_JSON_ARRAY || type == CX_JSON_OBJECT) {
        CxArrayReallocator vbuf_realloc = cx_array_reallocator(NULL, json->vbuf_internal);
        if (cx_array_simple_add_a(&vbuf_realloc, json->vbuf, v)) {
            goto create_json_value_exit_error; // LCOV_EXCL_LINE
        }
    }

    // if currently no value is parsed, this is now the value of interest
    if (json->parsed == NULL) {
        json->parsed = v;
    }

    return v;
    // LCOV_EXCL_START
create_json_value_exit_error:
    cxJsonValueFree(v);
    return NULL;
    // LCOV_EXCL_STOP
}

#define JP_STATE_VALUE_BEGIN         0
#define JP_STATE_VALUE_END          10
#define JP_STATE_VALUE_BEGIN_OBJ     1
#define JP_STATE_OBJ_SEP_OR_CLOSE   11
#define JP_STATE_VALUE_BEGIN_AR      2
#define JP_STATE_ARRAY_SEP_OR_CLOSE 12
#define JP_STATE_OBJ_NAME_OR_CLOSE   5
#define JP_STATE_OBJ_NAME            6
#define JP_STATE_OBJ_COLON           7

void cxJsonInit(CxJson *json, const CxAllocator *allocator) {
    if (allocator == NULL) {
        allocator = cxDefaultAllocator;
    }
    
    memset(json, 0, sizeof(CxJson));
    json->allocator = allocator;

    json->states = json->states_internal;
    json->states_capacity = cx_nmemb(json->states_internal);
    json->states[0] = JP_STATE_VALUE_BEGIN;
    json->states_size = 1;

    json->vbuf = json->vbuf_internal;
    json->vbuf_capacity = cx_nmemb(json->vbuf_internal);
}

void cxJsonDestroy(CxJson *json) {
    cxBufferDestroy(&json->buffer);
    if (json->states != json->states_internal) {
        free(json->states);
    }
    if (json->vbuf != json->vbuf_internal) {
        free(json->vbuf);
    }
    cxJsonValueFree(json->parsed);
    json->parsed = NULL;
    if (json->uncompleted_member.name.ptr != NULL) {
        cx_strfree_a(json->allocator, &json->uncompleted_member.name);
        json->uncompleted_member = (CxJsonObjValue){{NULL, 0}, NULL};
    }
}

int cxJsonFilln(CxJson *json, const char *buf, size_t size) {
    if (cxBufferEof(&json->buffer)) {
        // reinitialize the buffer
        cxBufferDestroy(&json->buffer);
        cxBufferInit(&json->buffer, (char*) buf, size,
            NULL, CX_BUFFER_AUTO_EXTEND | CX_BUFFER_COPY_ON_WRITE);
        json->buffer.size = size;
        return 0;
    } else {
        return size != cxBufferAppend(buf, 1, size, &json->buffer);
    }
}

static void json_add_state(CxJson *json, int state) {
    // we have guaranteed the necessary space with cx_array_simple_reserve()
    // therefore, we can safely add the state in the simplest way possible
    json->states[json->states_size++] = state;
}

#define return_rec(code) \
    token_destroy(&token); \
    return code

static enum cx_json_status json_parse(CxJson *json) {
    // Reserve a pointer for a possibly read value
    CxJsonValue *vbuf = NULL;

    // grab the next token
    CxJsonToken token;
    {
        enum cx_json_status ret = token_parse_next(json, &token);
        if (ret != CX_JSON_NO_ERROR) {
            return ret;
        }
    }

    // pop the current state
    assert(json->states_size > 0);
    int state = json->states[--json->states_size];

    // guarantee that at least two more states fit on the stack
    CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal);
    if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) {
        return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
    }


    //  0 JP_STATE_VALUE_BEGIN          value begin
    // 10 JP_STATE_VALUE_END            expect value end
    //  1 JP_STATE_VALUE_BEGIN_OBJ      value begin (inside object)
    // 11 JP_STATE_OBJ_SEP_OR_CLOSE     object, expect separator, objclose
    //  2 JP_STATE_VALUE_BEGIN_AR       value begin (inside array)
    // 12 JP_STATE_ARRAY_SEP_OR_CLOSE   array, expect separator or arrayclose
    //  5 JP_STATE_OBJ_NAME_OR_CLOSE    object, expect name or objclose
    //  6 JP_STATE_OBJ_NAME             object, expect name
    //  7 JP_STATE_OBJ_COLON            object, expect ':'

    if (state < 3) {
        // push expected end state to the stack
        json_add_state(json, 10 + state);
        switch (token.tokentype) {
            case CX_JSON_TOKEN_BEGIN_ARRAY: {
                if (create_json_value(json, CX_JSON_ARRAY) == NULL) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
                return_rec(CX_JSON_NO_ERROR);
            }
            case CX_JSON_TOKEN_BEGIN_OBJECT: {
                if (create_json_value(json, CX_JSON_OBJECT) == NULL) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE);
                return_rec(CX_JSON_NO_ERROR);
            }
            case CX_JSON_TOKEN_STRING: {
                if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                cxmutstr str = unescape_string(json->allocator, token.content);
                if (str.ptr == NULL) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                vbuf->value.string = str;
                return_rec(CX_JSON_NO_ERROR);
            }
            case CX_JSON_TOKEN_INTEGER:
            case CX_JSON_TOKEN_NUMBER: {
                int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER;
                if (NULL == (vbuf = create_json_value(json, type))) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                if (type == CX_JSON_INTEGER) {
                    if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) {
                        return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
                    }
                } else {
                    if (cx_strtod(token.content, &vbuf->value.number)) {
                        return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
                    }
                }
                return_rec(CX_JSON_NO_ERROR);
            }
            case CX_JSON_TOKEN_LITERAL: {
                if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) {
                    return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
                }
                if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) {
                    vbuf->value.literal = CX_JSON_TRUE;
                } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) {
                    vbuf->value.literal = CX_JSON_FALSE;
                } else {
                    vbuf->value.literal = CX_JSON_NULL;
                }
                return_rec(CX_JSON_NO_ERROR);
            }
            default: {
                return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
            }
        }
    } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
        // expect ',' or ']'
        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
            json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
            return_rec(CX_JSON_NO_ERROR);
        } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) {
            // discard the array from the value buffer
            json->vbuf_size--;
            return_rec(CX_JSON_NO_ERROR);
        } else {
            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
        }
    } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
        if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
            // discard the obj from the value buffer
            json->vbuf_size--;
            return_rec(CX_JSON_NO_ERROR);
        } else {
            // expect string
            if (token.tokentype != CX_JSON_TOKEN_STRING) {
                return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
            }

            // add new entry
            cxmutstr name = unescape_string(json->allocator, token.content);
            if (name.ptr == NULL) {
                return_rec(CX_JSON_VALUE_ALLOC_FAILED); // LCOV_EXCL_LINE
            }
            assert(json->uncompleted_member.name.ptr == NULL);
            json->uncompleted_member.name = name;
            assert(json->vbuf_size > 0);

            // next state
            json_add_state(json, JP_STATE_OBJ_COLON);
            return_rec(CX_JSON_NO_ERROR);
        }
    } else if (state == JP_STATE_OBJ_COLON) {
        // expect ':'
        if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) {
            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
        }
        // next state
        json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ);
        return_rec(CX_JSON_NO_ERROR);
    } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) {
        // expect ',' or '}'
        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
            json_add_state(json, JP_STATE_OBJ_NAME);
            return_rec(CX_JSON_NO_ERROR);
        } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
            // discard the obj from the value buffer
            json->vbuf_size--;
            return_rec(CX_JSON_NO_ERROR);
        } else {
            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
        }
    } else {
        // should be unreachable
        assert(false);
        return_rec(-1);
    }
}

CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) {
    // check if buffer has been filled
    if (json->buffer.space == NULL) {
        return CX_JSON_NULL_DATA;
    }

    // initialize output value
    *value = &cx_json_value_nothing;

    // parse data
    CxJsonStatus result;
    do {
        result = json_parse(json);
        if (result == CX_JSON_NO_ERROR && json->states_size == 1) {
            // final state reached
            assert(json->states[0] == JP_STATE_VALUE_END);
            assert(json->vbuf_size == 0);

            // write output value
            *value = json->parsed;
            json->parsed = NULL;

            // re-initialize state machine
            json->states[0] = JP_STATE_VALUE_BEGIN;

            return CX_JSON_NO_ERROR;
        }
    } while (result == CX_JSON_NO_ERROR);

    // the parser might think there is no data
    // but when we did not reach the final state,
    // we know that there must be more to come
    if (result == CX_JSON_NO_DATA && json->states_size > 1) {
        return CX_JSON_INCOMPLETE_DATA;
    }

    return result;
}

void cxJsonValueFree(CxJsonValue *value) {
    if (value == NULL || value->type == CX_JSON_NOTHING) return;
    switch (value->type) {
        case CX_JSON_OBJECT: {
            CxJsonObject obj = value->value.object;
            for (size_t i = 0; i < obj.values_size; i++) {
                cxJsonValueFree(obj.values[i].value);
                cx_strfree_a(value->allocator, &obj.values[i].name);
            }
            cxFree(value->allocator, obj.values);
            cxFree(value->allocator, obj.indices);
            break;
        }
        case CX_JSON_ARRAY: {
            CxJsonArray array = value->value.array;
            for (size_t i = 0; i < array.array_size; i++) {
                cxJsonValueFree(array.array[i]);
            }
            cxFree(value->allocator, array.array);
            break;
        }
        case CX_JSON_STRING: {
            cxFree(value->allocator, value->value.string.ptr);
            break;
        }
        default: {
            break;
        }
    }
    cxFree(value->allocator, value);
}

CxJsonValue* cxJsonCreateObj(const CxAllocator* allocator) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_OBJECT;
    cx_array_initialize_a(allocator, v->value.object.values, 16);
    if (v->value.object.values == NULL) { // LCOV_EXCL_START
        cxFree(allocator, v);
        return NULL;
        // LCOV_EXCL_STOP
    }
    v->value.object.indices = cxCalloc(allocator, 16, sizeof(size_t));
    if (v->value.object.indices == NULL) { // LCOV_EXCL_START
        cxFree(allocator, v->value.object.values);
        cxFree(allocator, v);
        return NULL;
        // LCOV_EXCL_STOP
    }
    return v;
}

CxJsonValue* cxJsonCreateArr(const CxAllocator* allocator) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_ARRAY;
    cx_array_initialize_a(allocator, v->value.array.array, 16);
    if (v->value.array.array == NULL) { cxFree(allocator, v); return NULL; }
    return v;
}

CxJsonValue* cxJsonCreateNumber(const CxAllocator* allocator, double num) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_NUMBER;
    v->value.number = num;
    return v;
}

CxJsonValue* cxJsonCreateInteger(const CxAllocator* allocator, int64_t num) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_INTEGER;
    v->value.integer = num;
    return v;
}

CxJsonValue* cxJsonCreateString(const CxAllocator* allocator, const char* str) {
    return cxJsonCreateCxString(allocator, cx_str(str));
}

CxJsonValue* cxJsonCreateCxString(const CxAllocator* allocator, cxstring str) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_STRING;
    cxmutstr s = cx_strdup_a(allocator, str);
    if (s.ptr == NULL) { cxFree(allocator, v); return NULL; }
    v->value.string = s;
    return v;
}

CxJsonValue* cxJsonCreateLiteral(const CxAllocator* allocator, CxJsonLiteral lit) {
    if (allocator == NULL) allocator = cxDefaultAllocator;
    CxJsonValue* v = cxMalloc(allocator, sizeof(CxJsonValue));
    if (v == NULL) return NULL;
    v->allocator = allocator;
    v->type = CX_JSON_LITERAL;
    v->value.literal = lit;
    return v;
}

// LCOV_EXCL_START
// never called as long as malloc() does not return NULL
static void cx_json_arr_free_temp(CxJsonValue** values, size_t count) {
    for (size_t i = 0; i < count; i++) {
        if (values[i] == NULL) break;
        cxJsonValueFree(values[i]);
    }
    free(values);
}
// LCOV_EXCL_STOP

int cxJsonArrAddNumbers(CxJsonValue* arr, const double* num, size_t count) {
    CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
    if (values == NULL) return -1;
    for (size_t i = 0; i < count; i++) {
        values[i] = cxJsonCreateNumber(arr->allocator, num[i]);
        if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; }
    }
    int ret = cxJsonArrAddValues(arr, values, count);
    free(values);
    return ret;
}

int cxJsonArrAddIntegers(CxJsonValue* arr, const int64_t* num, size_t count) {
    CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
    if (values == NULL) return -1;
    for (size_t i = 0; i < count; i++) {
        values[i] = cxJsonCreateInteger(arr->allocator, num[i]);
        if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; }
    }
    int ret = cxJsonArrAddValues(arr, values, count);
    free(values);
    return ret;
}

int cxJsonArrAddStrings(CxJsonValue* arr, const char* const* str, size_t count) {
    CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
    if (values == NULL) return -1;
    for (size_t i = 0; i < count; i++) {
        values[i] = cxJsonCreateString(arr->allocator, str[i]);
        if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; }
    }
    int ret = cxJsonArrAddValues(arr, values, count);
    free(values);
    return ret;
}

int cxJsonArrAddCxStrings(CxJsonValue* arr, const cxstring* str, size_t count) {
    CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
    if (values == NULL) return -1;
    for (size_t i = 0; i < count; i++) {
        values[i] = cxJsonCreateCxString(arr->allocator, str[i]);
        if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; }
    }
    int ret = cxJsonArrAddValues(arr, values, count);
    free(values);
    return ret;
}

int cxJsonArrAddLiterals(CxJsonValue* arr, const CxJsonLiteral* lit, size_t count) {
    CxJsonValue** values = calloc(count, sizeof(CxJsonValue*));
    if (values == NULL) return -1;
    for (size_t i = 0; i < count; i++) {
        values[i] = cxJsonCreateLiteral(arr->allocator, lit[i]);
        if (values[i] == NULL) { cx_json_arr_free_temp(values, count); return -1; }
    }
    int ret = cxJsonArrAddValues(arr, values, count);
    free(values);
    return ret;
}

int cxJsonArrAddValues(CxJsonValue* arr, CxJsonValue* const* val, size_t count) {
    CxArrayReallocator value_realloc = cx_array_reallocator(arr->allocator, NULL);
    assert(arr->type == CX_JSON_ARRAY);
    return cx_array_simple_copy_a(&value_realloc,
            arr->value.array.array,
            arr->value.array.array_size,
            val, count
    );
}

int cxJsonObjPut(CxJsonValue* obj, cxstring name, CxJsonValue* child) {
    cxmutstr k = cx_strdup_a(obj->allocator, name);
    if (k.ptr == NULL) return -1;
    CxJsonObjValue kv = {k, child};
    if (json_add_objvalue(obj, kv)) {
        cx_strfree_a(obj->allocator, &k);
        return 1;
    } else {
        return 0;
    }
}

CxJsonValue* cxJsonObjPutObj(CxJsonValue* obj, cxstring name) {
    CxJsonValue* v = cxJsonCreateObj(obj->allocator);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutArr(CxJsonValue* obj, cxstring name) {
    CxJsonValue* v = cxJsonCreateArr(obj->allocator);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutNumber(CxJsonValue* obj, cxstring name, double num) {
    CxJsonValue* v = cxJsonCreateNumber(obj->allocator, num);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutInteger(CxJsonValue* obj, cxstring name, int64_t num) {
    CxJsonValue* v = cxJsonCreateInteger(obj->allocator, num);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutString(CxJsonValue* obj, cxstring name, const char* str) {
    CxJsonValue* v = cxJsonCreateString(obj->allocator, str);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutCxString(CxJsonValue* obj, cxstring name, cxstring str) {
    CxJsonValue* v = cxJsonCreateCxString(obj->allocator, str);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL; }
    return v;
}

CxJsonValue* cxJsonObjPutLiteral(CxJsonValue* obj, cxstring name, CxJsonLiteral lit) {
    CxJsonValue* v = cxJsonCreateLiteral(obj->allocator, lit);
    if (v == NULL) return NULL;
    if (cxJsonObjPut(obj, name, v)) { cxJsonValueFree(v); return NULL;}
    return v;
}

CxJsonValue *cxJsonArrGet(const CxJsonValue *value, size_t index) {
    if (index >= value->value.array.array_size) {
        return &cx_json_value_nothing;
    }
    return value->value.array.array[index];
}

CxIterator cxJsonArrIter(const CxJsonValue *value) {
    return cxIteratorPtr(
        value->value.array.array,
        value->value.array.array_size
    );
}

CxIterator cxJsonObjIter(const CxJsonValue *value) {
    return cxIterator(
        value->value.object.values,
        sizeof(CxJsonObjValue),
        value->value.object.values_size
    );
}

CxJsonValue *cx_json_obj_get_cxstr(const CxJsonValue *value, cxstring name) {
    CxJsonObjValue *member = json_find_objvalue(value, name);
    if (member == NULL) {
        return &cx_json_value_nothing;
    } else {
        return member->value;
    }
}

static const CxJsonWriter cx_json_writer_default = {
    false,
    true,
    6,
    false,
    4
};

CxJsonWriter cxJsonWriterCompact(void) {
    return cx_json_writer_default;
}

CxJsonWriter cxJsonWriterPretty(bool use_spaces) {
    return (CxJsonWriter) {
        true,
        true,
        6,
        use_spaces,
        4
    };
}

static int cx_json_writer_indent(
    void *target,
    cx_write_func wfunc,
    const CxJsonWriter *settings,
    unsigned int depth
) {
    if (depth == 0) return 0;

    // determine the width and characters to use
    const char* indent; // for 32 prepared chars
    size_t width = depth;
    if (settings->indent_space) {
        if (settings->indent == 0) return 0;
        width *= settings->indent;
        indent = "                                ";
    } else {
        indent = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
    }

    // calculate the number of write calls and write
    size_t full = width / 32;
    size_t remaining = width % 32;
    for (size_t i = 0; i < full; i++) {
        if (32 != wfunc(indent, 1, 32, target)) return 1;
    }
    if (remaining != wfunc(indent, 1, remaining, target)) return 1;

    return 0;
}


int cx_json_write_rec(
    void *target,
    const CxJsonValue *value,
    cx_write_func wfunc,
    const CxJsonWriter *settings,
    unsigned int depth
) {
    // keep track of written items
    // the idea is to reduce the number of jumps for error checking
    size_t actual = 0, expected = 0;

    // small buffer for number to string conversions
    char numbuf[40];

    // recursively write the values
    switch (value->type) {
        case CX_JSON_OBJECT: {
            const char *begin_obj = "{\n";
            if (settings->pretty) {
                actual += wfunc(begin_obj, 1, 2, target);
                expected += 2;
            } else {
                actual += wfunc(begin_obj, 1, 1, target);
                expected++;
            }
            depth++;
            size_t elem_count = value->value.object.values_size;
            for (size_t look_idx = 0; look_idx < elem_count; look_idx++) {
                // get the member either via index array or directly
                size_t elem_idx = settings->sort_members
                                      ? look_idx
                                      : value->value.object.indices[look_idx];
                CxJsonObjValue *member = &value->value.object.values[elem_idx];
                if (settings->sort_members) {
                    depth++;depth--;
                }

                // possible indentation
                if (settings->pretty) {
                    if (cx_json_writer_indent(target, wfunc, settings, depth)) {
                        return 1; // LCOV_EXCL_LINE
                    }
                }

                // the name
                actual += wfunc("\"", 1, 1, target);
                cxmutstr name = escape_string(member->name);
                actual += wfunc(name.ptr, 1, name.length, target);
                if (name.ptr != member->name.ptr) {
                    cx_strfree(&name);
                }
                actual += wfunc("\"", 1, 1, target);
                const char *obj_name_sep = ": ";
                if (settings->pretty) {
                    actual += wfunc(obj_name_sep, 1, 2, target);
                    expected += 4 + member->name.length;
                } else {
                    actual += wfunc(obj_name_sep, 1, 1, target);
                    expected += 3 + member->name.length;
                }

                // the value
                if (cx_json_write_rec(target, member->value, wfunc, settings, depth)) return 1;

                // end of object-value
                if (look_idx < elem_count - 1) {
                    const char *obj_value_sep = ",\n";
                    if (settings->pretty) {
                        actual += wfunc(obj_value_sep, 1, 2, target);
                        expected += 2;
                    } else {
                        actual += wfunc(obj_value_sep, 1, 1, target);
                        expected++;
                    }
                } else {
                    if (settings->pretty) {
                        actual += wfunc("\n", 1, 1, target);
                        expected ++;
                    }
                }
            }
            depth--;
            if (settings->pretty) {
                if (cx_json_writer_indent(target, wfunc, settings, depth)) return 1;
            }
            actual += wfunc("}", 1, 1, target);
            expected++;
            break;
        }
        case CX_JSON_ARRAY: {
            actual += wfunc("[", 1, 1, target);
            expected++;
            CxIterator iter = cxJsonArrIter(value);
            cx_foreach(CxJsonValue*, element, iter) {
                if (cx_json_write_rec(
                        target, element,
                        wfunc, settings, depth)
                ) return 1;

                if (iter.index < iter.elem_count - 1) {
                    const char *arr_value_sep = ", ";
                    if (settings->pretty) {
                        actual += wfunc(arr_value_sep, 1, 2, target);
                        expected += 2;
                    } else {
                        actual += wfunc(arr_value_sep, 1, 1, target);
                        expected++;
                    }
                }
            }
            actual += wfunc("]", 1, 1, target);
            expected++;
            break;
        }
        case CX_JSON_STRING: {
            actual += wfunc("\"", 1, 1, target);
            cxmutstr str = escape_string(value->value.string);
            actual += wfunc(str.ptr, 1, str.length, target);
            if (str.ptr != value->value.string.ptr) {
                cx_strfree(&str);
            }
            actual += wfunc("\"", 1, 1, target);
            expected += 2 + value->value.string.length;
            break;
        }
        case CX_JSON_NUMBER: {
            int precision = settings->frac_max_digits;
            // because of the way how %g is defined, we need to
            // double the precision and truncate ourselves
            precision = 1 + (precision > 15 ? 30 : 2 * precision);
            snprintf(numbuf, 40, "%.*g", precision, value->value.number);
            char *dot, *exp;
            unsigned char max_digits;
            // find the decimal separator and hope that it's one of . or ,
            dot = strchr(numbuf, '.');
            if (dot == NULL) {
                dot = strchr(numbuf, ',');
            }
            if (dot == NULL) {
                // no decimal separator found
                // output everything until a possible exponent
                max_digits = 30;
                dot = numbuf;
            } else {
                // found a decimal separator
                // output everything until the separator
                // and set max digits to what the settings say
                size_t len = dot - numbuf;
                actual += wfunc(numbuf, 1, len, target);
                expected += len;
                max_digits = settings->frac_max_digits;
                if (max_digits > 15) {
                    max_digits = 15;
                }
                // locale independent separator
                if (max_digits > 0) {
                    actual += wfunc(".", 1, 1, target);
                    expected++;
                }
                dot++;
            }
            // find the exponent
            exp = strchr(dot, 'e');
            if (exp == NULL) {
                // no exponent - output the rest
                if (max_digits > 0) {
                    size_t len = strlen(dot);
                    if (len > max_digits) {
                        len = max_digits;
                    }
                    actual += wfunc(dot, 1, len, target);
                    expected += len;
                }
            } else {
                // exponent found - truncate the frac digits
                // and then output the rest
                if (max_digits > 0) {
                    size_t len = exp - dot - 1;
                    if (len > max_digits) {
                        len = max_digits;
                    }
                    actual += wfunc(dot, 1, len, target);
                    expected += len;
                }
                actual += wfunc("e", 1, 1, target);
                expected++;
                exp++;
                size_t len = strlen(exp);
                actual += wfunc(exp, 1, len, target);
                expected += len;
            }
            break;
        }
        case CX_JSON_INTEGER: {
            snprintf(numbuf, 32, "%" PRIi64, value->value.integer);
            size_t len = strlen(numbuf);
            actual += wfunc(numbuf, 1, len, target);
            expected += len;
            break;
        }
        case CX_JSON_LITERAL: {
            if (value->value.literal == CX_JSON_TRUE) {
                actual += wfunc("true", 1, 4, target);
                expected += 4;
            } else if (value->value.literal == CX_JSON_FALSE) {
                actual += wfunc("false", 1, 5, target);
                expected += 5;
            } else {
                actual += wfunc("null", 1, 4, target);
                expected += 4;
            }
            break;
        }
        case CX_JSON_NOTHING: {
            // deliberately supported as an empty string!
            // users might want to just write the result
            // of a get operation without testing the value
            // and therefore this should not blow up
            break;
        }
        default: assert(false); // LCOV_EXCL_LINE
    }

    return expected != actual;
}

int cxJsonWrite(
    void *target,
    const CxJsonValue *value,
    cx_write_func wfunc,
    const CxJsonWriter *settings
) {
    if (settings == NULL) {
        settings = &cx_json_writer_default;
    }
    assert(target != NULL);
    assert(value != NULL);
    assert(wfunc != NULL);

    return cx_json_write_rec(target, value, wfunc, settings, 0);
}

mercurial