Sat, 07 Dec 2024 23:59:54 +0100
change cx_strcat variants to allow handling of ENOMEM
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <string.h> #include <ctype.h> #include <assert.h> #include "cx/json.h" /* * RFC 8259 * https://tools.ietf.org/html/rfc8259 */ #define PARSER_READVALUE_ALLOC 32 static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING}; static void token_destroy(CxJsonToken *token) { if (token->alloc > 0) { free((char*) token->content); } } static int token_append(CxJsonToken *token, const char *buf, size_t len) { if (len == 0) { return 0; } size_t newlen = token->length + len; if (token->alloc < newlen) { char *newbuf = realloc( token->alloc == 0 ? NULL : (char *) token->content, newlen); if (!newbuf) { return 1; } token->content = newbuf; token->alloc = newlen; } memcpy((char *) token->content + token->length, buf, len); token->length = newlen; return 0; } static CxJsonToken token_create(CxJson *json, size_t start, size_t end) { CxJsonToken token = {0}; size_t len = end - start; if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { token.content = json->buffer + start; token.length = len; } else { if (token_append(&json->uncompleted, json->buffer + start, len)) { // TODO: this does certainly not lead to correct error handling return (CxJsonToken){0}; } token = json->uncompleted; } json->uncompleted = (CxJsonToken){0}; return token; } static int token_isliteral(const char *content, size_t length) { if (length == 4) { if (!memcmp(content, "true", 4)) { return 1; } else if (!memcmp(content, "null", 4)) { return 1; } } else if (length == 5 && !memcmp(content, "false", 5)) { return 1; } return 0; } static int num_isexp(const char *content, size_t length, size_t pos) { if (pos >= length) { return 0; } int ok = 0; for (size_t i = pos; i < length; i++) { char c = content[i]; if (isdigit(c)) { ok = 1; } else if (i == pos) { if (!(c == '+' || c == '-')) { return 0; } } else { return 0; } } return ok; } static CxJsonTokenType token_numbertype(const char *content, size_t length) { if (length == 0) return CX_JSON_TOKEN_ERROR; if (content[0] != '-' && !isdigit(content[0])) { return CX_JSON_TOKEN_ERROR; } CxJsonTokenType type = CX_JSON_TOKEN_INTEGER; for (size_t i = 1; i < length; i++) { if (content[i] == '.') { if (type == CX_JSON_TOKEN_NUMBER) { return CX_JSON_TOKEN_ERROR; // more than one decimal separator } type = CX_JSON_TOKEN_NUMBER; } else if (content[i] == 'e' || content[i] == 'E') { return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR; } else if (!isdigit(content[i])) { return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep } } return type; } static CxJsonTokenType char2ttype(char c) { switch (c) { case '[': { return CX_JSON_TOKEN_BEGIN_ARRAY; } case '{': { return CX_JSON_TOKEN_BEGIN_OBJECT; } case ']': { return CX_JSON_TOKEN_END_ARRAY; } case '}': { return CX_JSON_TOKEN_END_OBJECT; } case ':': { return CX_JSON_TOKEN_NAME_SEPARATOR; } case ',': { return CX_JSON_TOKEN_VALUE_SEPARATOR; } case '"': { return CX_JSON_TOKEN_STRING; } default: { if (isspace(c)) { return CX_JSON_TOKEN_SPACE; } } } return CX_JSON_NO_TOKEN; } static CxJsonToken token_parse_next(CxJson *json) { // current token type and start index CxJsonTokenType ttype = json->uncompleted.tokentype; size_t token_start = json->pos; for (size_t i = json->pos; i < json->size; i++) { char c = json->buffer[i]; if (ttype != CX_JSON_TOKEN_STRING) { // currently non-string token CxJsonTokenType ctype = char2ttype(c); // start of new token? if (ttype == CX_JSON_NO_TOKEN) { if (ctype == CX_JSON_TOKEN_SPACE) { continue; } else if (ctype == CX_JSON_TOKEN_STRING) { // begin string ttype = CX_JSON_TOKEN_STRING; token_start = i; } else if (ctype != CX_JSON_NO_TOKEN) { // single-char token json->pos = i + 1; CxJsonToken token = {ctype, NULL, 0, 0}; return token; } else { ttype = CX_JSON_TOKEN_LITERAL; // number or literal token_start = i; } } else { // finish token if (ctype != CX_JSON_NO_TOKEN) { CxJsonToken ret = token_create(json, token_start, i); if (token_isliteral(ret.content, ret.length)) { ret.tokentype = CX_JSON_TOKEN_LITERAL; } else { ret.tokentype = token_numbertype(ret.content, ret.length); } json->pos = i; return ret; } } } else { // currently inside a string if (json->tokenizer_escape) { json->tokenizer_escape = false; } else { if (c == '"') { CxJsonToken ret = token_create(json, token_start, i + 1); ret.tokentype = CX_JSON_TOKEN_STRING; json->pos = i + 1; return ret; } else if (c == '\\') { json->tokenizer_escape = true; } } } } if (ttype != CX_JSON_NO_TOKEN) { // uncompleted token size_t uncompeted_len = json->size - token_start; if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { // current token is uncompleted // save current token content in p->uncompleted CxJsonToken uncompleted; uncompleted.tokentype = ttype; uncompleted.length = uncompeted_len; uncompleted.alloc = uncompeted_len + 16; char *tmp = malloc(uncompleted.alloc); if (tmp) { memcpy(tmp, json->buffer + token_start, uncompeted_len); uncompleted.content = tmp; json->uncompleted = uncompleted; } else { json->error = 1; } } else { // previously we also had an uncompleted token // combine the uncompleted token with the current token if (token_append(&json->uncompleted, json->buffer + token_start, uncompeted_len)) { json->error = 1; } } } CxJsonToken ret = {CX_JSON_NO_TOKEN, NULL, 0, 0}; return ret; } static cxmutstr unescape_string(const CxAllocator *a, const char *str, size_t len) { // TODO: support more escape sequences // we know that the unescaped string will be shorter by at least 2 chars cxmutstr result; result.length = 0; result.ptr = cxMalloc(a, len - 1); if (result.ptr == NULL) { return result; } bool u = false; for (size_t i = 1; i < len - 1; i++) { char c = str[i]; if (u) { u = false; if (c == 'n') { c = '\n'; } else if (c == 't') { c = '\t'; } result.ptr[result.length++] = c; } else { if (c == '\\') { u = true; } else { result.ptr[result.length++] = c; } } } result.ptr[result.length] = 0; return result; } static int parse_number(const char *str, size_t len, void *value, bool asint) { char *endptr = NULL; char buf[32]; if (len > 30) { return 1; } // TODO: if we can guarantee that we are working on a copied string already, we can avoid this memcpy memcpy(buf, str, len); buf[len] = 0; if (asint) { long long v = strtoll(buf, &endptr, 10); *((int64_t*)value) = (int64_t) v; } else { // TODO: proper JSON spec number parser double v = strtod(buf, &endptr); *((double*)value) = v; } return (endptr != &buf[len]); } static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue)); if (v == NULL) { return NULL; } // initialize the value if (type == CX_JSON_ARRAY) { cx_array_initialize_a(json->allocator, v->value.array.array, 16); if (v->value.array.array == NULL) { cxFree(json->allocator, v); return NULL; } } else if (type == CX_JSON_OBJECT) { cx_array_initialize_a(json->allocator, v->value.object.values, 16); if (v->value.object.values == NULL) { cxFree(json->allocator, v); return NULL; } } else { memset(v, 0, sizeof(CxJsonValue)); } v->type = type; v->allocator = json->allocator; // add the new value to a possible parent CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL); if (json->vbuf_size > 0) { CxJsonValue *parent = json->vbuf[json->vbuf_size - 1]; if (parent->type == CX_JSON_ARRAY) { cx_array_simple_add_a(&value_realloc, parent->value.array.array, v); } else if (parent->type == CX_JSON_OBJECT) { assert(parent->value.object.values_size > 0); assert(parent->value.object.values[parent->value.object.values_size - 1].value == NULL); parent->value.object.values[parent->value.object.values_size - 1].value = v; } else { assert(false); } } // add the new value to the stack, if it is an array or object if (type == CX_JSON_ARRAY || type == CX_JSON_OBJECT) { CxArrayReallocator vbuf_realloc = cx_array_reallocator(NULL, json->vbuf_internal); if (cx_array_simple_add_a(&vbuf_realloc, json->vbuf, v)) { cxFree(json->allocator, v); return NULL; } } // if currently no value is parsed, this is now the value of interest if (json->parsed == NULL) { json->parsed = v; } return v; } static int json_obj_add_entry(CxJson *json, char *name) { CxJsonObjValue kv = {name, NULL}; assert(json->vbuf_size > 0); CxJsonValue *parent = json->vbuf[json->vbuf_size - 1]; assert(parent != NULL); assert(parent->type == CX_JSON_OBJECT); CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL); return cx_array_simple_add_a(&value_realloc, parent->value.object.values, kv); } #define JP_STATE_VALUE_BEGIN 0 #define JP_STATE_VALUE_END 10 #define JP_STATE_VALUE_BEGIN_OBJ 1 #define JP_STATE_OBJ_SEP_OR_CLOSE 11 #define JP_STATE_VALUE_BEGIN_AR 2 #define JP_STATE_ARRAY_SEP_OR_CLOSE 12 #define JP_STATE_OBJ_NAME_OR_CLOSE 5 #define JP_STATE_OBJ_NAME 6 #define JP_STATE_OBJ_COLON 7 void cxJsonInit(CxJson *json, const CxAllocator *allocator) { if (allocator == NULL) { allocator = cxDefaultAllocator; } memset(json, 0, sizeof(CxJson)); json->allocator = allocator; json->states = json->states_internal; json->states_capacity = cx_nmemb(json->states_internal); json->states[0] = JP_STATE_VALUE_BEGIN; json->states_size = 1; json->vbuf = json->vbuf_internal; json->vbuf_capacity = cx_nmemb(json->vbuf_internal); } void cxJsonDestroy(CxJson *json) { if (json->states != json->states_internal) { free(json->states); } if (json->vbuf != json->vbuf_internal) { free(json->vbuf); } cxJsonValueFree(json->parsed); json->parsed = NULL; } int cxJsonFilln(CxJson *json, const char *buf, size_t size) { // TODO: implement rescue buffer like in CxProperties to allow subsequent fills json->buffer = buf; json->size = size; json->pos = 0; return 0; } static void json_add_state(CxJson *json, int state) { // we have guaranteed the necessary space with cx_array_simple_reserve() // therefore, we can safely add the state in the simplest way possible json->states[json->states_size++] = state; } #define return_rec(code) \ token_destroy(&token); \ return code static int json_parse(CxJson *json) { // Reserve a pointer for a possibly read value CxJsonValue *vbuf = NULL; // grab the next token CxJsonToken token = token_parse_next(json); if (token.tokentype == CX_JSON_NO_TOKEN) { // nothing found, wait for more data return 0; } // pop the current state assert(json->states_size > 0); int state = json->states[--json->states_size]; // guarantee that at least two more states fit on the stack CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal); if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) { return -1; } // 0 JP_STATE_VALUE_BEGIN value begin // 10 JP_STATE_VALUE_END expect value end // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) // 11 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) // 12 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose // 5 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose // 6 JP_STATE_OBJ_NAME object, expect name // 7 JP_STATE_OBJ_COLON object, expect ':' if (state < 3) { // push expected end state to the stack json_add_state(json, 10 + state); switch (token.tokentype) { case CX_JSON_TOKEN_BEGIN_ARRAY: { if (create_json_value(json, CX_JSON_ARRAY) == NULL) { // TODO: error code - no memory return_rec(-1); } json_add_state(json, JP_STATE_VALUE_BEGIN_AR); return_rec(1); } case CX_JSON_TOKEN_BEGIN_OBJECT: { if (create_json_value(json, CX_JSON_OBJECT) == NULL) { // TODO: error code - no memory return_rec(-1); } json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE); return_rec(1); } case CX_JSON_TOKEN_STRING: { if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) { // TODO: error code - no memory return_rec(-1); } cxmutstr str = unescape_string(json->allocator, token.content, token.length); if (str.ptr == NULL) { // TODO: error code - no memory return_rec(-1); } vbuf->value.string = str; return_rec(1); } case CX_JSON_TOKEN_INTEGER: case CX_JSON_TOKEN_NUMBER: { int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER; if (NULL == (vbuf = create_json_value(json, type))) { // TODO: error code - no memory return_rec(-1); } if (parse_number(token.content, token.length, &vbuf->value,type == CX_JSON_INTEGER)) { // TODO: error code - format error return_rec(-1); } return_rec(1); } case CX_JSON_TOKEN_LITERAL: { if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) { // TODO: error code - no memory return_rec(-1); } const char *l = token.content; size_t token_len = token.length; if (token_len == 4 && !memcmp(l, "true", 4)) { vbuf->value.literal = CX_JSON_TRUE; } else if (token_len == 5 && !memcmp(l, "false", 5)) { vbuf->value.literal = CX_JSON_FALSE; } else { vbuf->value.literal = CX_JSON_NULL; } return_rec(1); } default: { // TODO: error code - unexpected token return_rec(-1); } } } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { // expect ',' or ']' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_VALUE_BEGIN_AR); return_rec(1); } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { // discard the array from the value buffer json->vbuf_size--; return_rec(1); } else { // TODO: error code - unexpected token return_rec(-1); } } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; return_rec(1); } else { // expect string if (token.tokentype != CX_JSON_TOKEN_STRING) { // TODO: error code - unexpected token return_rec(-1); } // add new entry cxmutstr name = unescape_string(json->allocator, token.content, token.length); if (name.ptr == NULL) { // TODO: error code - no mem return_rec(-1); } json_obj_add_entry(json, name.ptr); // next state json_add_state(json, JP_STATE_OBJ_COLON); return_rec(1); } } else if (state == JP_STATE_OBJ_COLON) { // expect ':' if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) { // TODO: error code - unexpected token return_rec(-1); } // next state json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ); return_rec(1); } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) { // expect ',' or '}' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_OBJ_NAME); return_rec(1); } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; return_rec(1); } else { // TODO: error code - unexpected token return_rec(-1); } } else { // should be unreachable assert(false); return_rec(-1); } } int cxJsonNext(CxJson *json, CxJsonValue **value) { // TODO: replace int with a status enum like in CxProperties // initialize output value *value = &cx_json_value_nothing; // parse data int result; do { result = json_parse(json); if (result == 1 && json->states_size == 1) { // final state reached assert(json->states[0] == JP_STATE_VALUE_END); assert(json->vbuf_size == 0); // write output value *value = json->parsed; json->parsed = NULL; // re-initialize state machine json->states[0] = JP_STATE_VALUE_BEGIN; return 1; } } while (result == 1); return result; } void cxJsonValueFree(CxJsonValue *value) { if (value == NULL || value == &cx_json_value_nothing) return; switch (value->type) { case CX_JSON_OBJECT: { CxJsonObject obj = value->value.object; for (size_t i = 0; i < obj.values_size; i++) { cxJsonValueFree(obj.values[i].value); cxFree(value->allocator, obj.values[i].name); } cxFree(value->allocator, obj.values); break; } case CX_JSON_ARRAY: { CxJsonArray array = value->value.array; for (size_t i = 0; i < array.array_size; i++) { cxJsonValueFree(array.array[i]); } cxFree(value->allocator, array.array); break; } case CX_JSON_STRING: { cxFree(value->allocator, value->value.string.ptr); break; } default: { break; } } cxFree(value->allocator, value); } CxJsonValue *cxJsonArrGet(CxJsonValue *value, size_t index) { if (index >= value->value.array.array_size) { return &cx_json_value_nothing; } return value->value.array.array[index]; } CxJsonValue *cxJsonObjGet(CxJsonValue *value, const char *name) { CxJsonObject *obj = &(value->value.object); // TODO: think about sorting the object so that we can use binary search here for (size_t i = 0; i < obj->values_size; i++) { // TODO: we might want to store names as cxmutstr if (0 == strcmp(name, obj->values[i].name)) { return obj->values[i].value; } } return &cx_json_value_nothing; }