--- a/src/json.c Sat Dec 07 23:59:54 2024 +0100 +++ b/src/json.c Sun Dec 08 00:13:38 2024 +0100 @@ -42,49 +42,11 @@ static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING}; static void token_destroy(CxJsonToken *token) { - if (token->alloc > 0) { - free((char*) token->content); + if (token->allocated) { + cx_strfree(&token->content); } } -static int token_append(CxJsonToken *token, const char *buf, size_t len) { - if (len == 0) { - return 0; - } - - size_t newlen = token->length + len; - if (token->alloc < newlen) { - char *newbuf = realloc( - token->alloc == 0 ? NULL : (char *) token->content, - newlen); - if (!newbuf) { - return 1; - } - token->content = newbuf; - token->alloc = newlen; - } - - memcpy((char *) token->content + token->length, buf, len); - token->length = newlen; - return 0; -} - -static CxJsonToken token_create(CxJson *json, size_t start, size_t end) { - CxJsonToken token = {0}; - size_t len = end - start; - if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { - token.content = json->buffer + start; - token.length = len; - } else { - if (token_append(&json->uncompleted, json->buffer + start, len)) { - // TODO: this does certainly not lead to correct error handling - return (CxJsonToken){0}; - } - token = json->uncompleted; - } - json->uncompleted = (CxJsonToken){0}; - return token; -} static int token_isliteral(const char *content, size_t length) { if (length == 4) { @@ -145,6 +107,36 @@ return type; } +static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) { + cxmutstr str = cx_mutstrn((char*)json->buffer + start, end - start); + bool allocated = false; + if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) { + allocated = true; + str = cx_strcat_m(json->uncompleted.content, 1, str); + if (str.ptr == NULL) { + return (CxJsonToken){CX_JSON_NO_TOKEN, false, 0, 0}; + } + } + json->uncompleted = (CxJsonToken){0}; + CxJsonTokenType ttype; + if (isstring) { + ttype = CX_JSON_TOKEN_STRING; + } else { + if (token_isliteral(str.ptr, str.length)) { + ttype = CX_JSON_TOKEN_LITERAL; + } else { + ttype = token_numbertype(str.ptr, str.length); + } + } + if (ttype == CX_JSON_TOKEN_ERROR) { + if (allocated) { + cx_strfree(&str); + } + return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, 0, 0}; + } + return (CxJsonToken){ttype, allocated, str}; +} + static CxJsonTokenType char2ttype(char c) { switch (c) { case '[': { @@ -177,7 +169,18 @@ return CX_JSON_NO_TOKEN; } -static CxJsonToken token_parse_next(CxJson *json) { +static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) { + // check if there is data in the buffer + if (json->pos >= json->size) { + return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ? + CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; + } + + // sanity check + if (json->buffer == NULL) { + return CX_JSON_NULL_INPUT; + } + // current token type and start index CxJsonTokenType ttype = json->uncompleted.tokentype; size_t token_start = json->pos; @@ -186,9 +189,7 @@ char c = json->buffer[i]; if (ttype != CX_JSON_TOKEN_STRING) { // currently non-string token - CxJsonTokenType ctype = char2ttype(c); // start of new token? - if (ttype == CX_JSON_NO_TOKEN) { if (ctype == CX_JSON_TOKEN_SPACE) { continue; @@ -199,8 +200,8 @@ } else if (ctype != CX_JSON_NO_TOKEN) { // single-char token json->pos = i + 1; - CxJsonToken token = {ctype, NULL, 0, 0}; - return token; + *result = (CxJsonToken){ctype, NULL, 0, 0}; + return CX_JSON_NO_ERROR; } else { ttype = CX_JSON_TOKEN_LITERAL; // number or literal token_start = i; @@ -208,14 +209,15 @@ } else { // finish token if (ctype != CX_JSON_NO_TOKEN) { - CxJsonToken ret = token_create(json, token_start, i); - if (token_isliteral(ret.content, ret.length)) { - ret.tokentype = CX_JSON_TOKEN_LITERAL; - } else { - ret.tokentype = token_numbertype(ret.content, ret.length); + *result = token_create(json, false, token_start, i); + if (result->tokentype == CX_JSON_NO_TOKEN) { + return CX_JSON_BUFFER_ALLOC_FAILED; + } + if (result->tokentype == CX_JSON_TOKEN_ERROR) { + return CX_JSON_FORMAT_ERROR_NUMBER; } json->pos = i; - return ret; + return CX_JSON_NO_ERROR; } } } else { @@ -224,10 +226,12 @@ json->tokenizer_escape = false; } else { if (c == '"') { - CxJsonToken ret = token_create(json, token_start, i + 1); - ret.tokentype = CX_JSON_TOKEN_STRING; + *result = token_create(json, true, token_start, i + 1); + if (result->tokentype == CX_JSON_NO_TOKEN) { + return CX_JSON_BUFFER_ALLOC_FAILED; + } json->pos = i + 1; - return ret; + return CX_JSON_NO_ERROR; } else if (c == '\\') { json->tokenizer_escape = true; } @@ -240,45 +244,44 @@ size_t uncompeted_len = json->size - token_start; if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { // current token is uncompleted - // save current token content in p->uncompleted - CxJsonToken uncompleted; - uncompleted.tokentype = ttype; - uncompleted.length = uncompeted_len; - uncompleted.alloc = uncompeted_len + 16; - char *tmp = malloc(uncompleted.alloc); - if (tmp) { - memcpy(tmp, json->buffer + token_start, uncompeted_len); - uncompleted.content = tmp; - json->uncompleted = uncompleted; - } else { - json->error = 1; + // save current token content + CxJsonToken uncompleted = { + ttype, true, + cx_strdup(cx_strn(json->buffer + token_start, uncompeted_len)) + }; + if (uncompleted.content.ptr == NULL) { + return CX_JSON_BUFFER_ALLOC_FAILED; } + json->uncompleted = uncompleted; } else { // previously we also had an uncompleted token // combine the uncompleted token with the current token - if (token_append(&json->uncompleted, json->buffer + token_start, uncompeted_len)) { - json->error = 1; + assert(json->uncompleted.allocated); + cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, + cx_strn(json->buffer + token_start, uncompeted_len)); + if (str.ptr == NULL) { + return CX_JSON_BUFFER_ALLOC_FAILED; } + json->uncompleted.content = str; } } - CxJsonToken ret = {CX_JSON_NO_TOKEN, NULL, 0, 0}; - return ret; + return CX_JSON_INCOMPLETE_DATA; } -static cxmutstr unescape_string(const CxAllocator *a, const char *str, size_t len) { +static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { // TODO: support more escape sequences // we know that the unescaped string will be shorter by at least 2 chars cxmutstr result; result.length = 0; - result.ptr = cxMalloc(a, len - 1); + result.ptr = cxMalloc(a, str.length - 1); if (result.ptr == NULL) { return result; } bool u = false; - for (size_t i = 1; i < len - 1; i++) { - char c = str[i]; + for (size_t i = 1; i < str.length - 1; i++) { + char c = str.ptr[i]; if (u) { u = false; if (c == 'n') { @@ -300,15 +303,15 @@ return result; } -static int parse_number(const char *str, size_t len, void *value, bool asint) { +static int parse_number(cxmutstr str, void *value, bool asint) { char *endptr = NULL; char buf[32]; - if (len > 30) { + if (str.length > 30) { return 1; } // TODO: if we can guarantee that we are working on a copied string already, we can avoid this memcpy - memcpy(buf, str, len); - buf[len] = 0; + memcpy(buf, str.ptr, str.length); + buf[str.length] = 0; if (asint) { long long v = strtoll(buf, &endptr, 10); @@ -319,7 +322,7 @@ *((double*)value) = v; } - return (endptr != &buf[len]); + return (endptr != &buf[str.length]); } static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { @@ -445,15 +448,17 @@ token_destroy(&token); \ return code -static int json_parse(CxJson *json) { +static enum cx_json_status json_parse(CxJson *json) { // Reserve a pointer for a possibly read value CxJsonValue *vbuf = NULL; // grab the next token - CxJsonToken token = token_parse_next(json); - if (token.tokentype == CX_JSON_NO_TOKEN) { - // nothing found, wait for more data - return 0; + CxJsonToken token; + { + enum cx_json_status ret = token_parse_next(json, &token); + if (ret != CX_JSON_NO_ERROR) { + return ret; + } } // pop the current state @@ -463,7 +468,7 @@ // guarantee that at least two more states fit on the stack CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal); if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) { - return -1; + return CX_JSON_BUFFER_ALLOC_FAILED; } @@ -483,125 +488,110 @@ switch (token.tokentype) { case CX_JSON_TOKEN_BEGIN_ARRAY: { if (create_json_value(json, CX_JSON_ARRAY) == NULL) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } json_add_state(json, JP_STATE_VALUE_BEGIN_AR); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_BEGIN_OBJECT: { if (create_json_value(json, CX_JSON_OBJECT) == NULL) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_STRING: { if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } - cxmutstr str = unescape_string(json->allocator, token.content, token.length); + cxmutstr str = unescape_string(json->allocator, token.content); if (str.ptr == NULL) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } vbuf->value.string = str; - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_INTEGER: case CX_JSON_TOKEN_NUMBER: { int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER; if (NULL == (vbuf = create_json_value(json, type))) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } - if (parse_number(token.content, token.length, &vbuf->value,type == CX_JSON_INTEGER)) { - // TODO: error code - format error - return_rec(-1); + if (parse_number(token.content, &vbuf->value,type == CX_JSON_INTEGER)) { + return_rec(CX_JSON_FORMAT_ERROR_NUMBER); } - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } case CX_JSON_TOKEN_LITERAL: { if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) { - // TODO: error code - no memory - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } - const char *l = token.content; - size_t token_len = token.length; - if (token_len == 4 && !memcmp(l, "true", 4)) { + if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) { vbuf->value.literal = CX_JSON_TRUE; - } else if (token_len == 5 && !memcmp(l, "false", 5)) { + } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) { vbuf->value.literal = CX_JSON_FALSE; } else { vbuf->value.literal = CX_JSON_NULL; } - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } default: { - // TODO: error code - unexpected token - return_rec(-1); + return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) { // expect ',' or ']' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_VALUE_BEGIN_AR); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) { // discard the array from the value buffer json->vbuf_size--; - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else { - // TODO: error code - unexpected token - return_rec(-1); + return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else { // expect string if (token.tokentype != CX_JSON_TOKEN_STRING) { - // TODO: error code - unexpected token - return_rec(-1); + return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } // add new entry - cxmutstr name = unescape_string(json->allocator, token.content, token.length); + cxmutstr name = unescape_string(json->allocator, token.content); if (name.ptr == NULL) { - // TODO: error code - no mem - return_rec(-1); + return_rec(CX_JSON_VALUE_ALLOC_FAILED); } json_obj_add_entry(json, name.ptr); // next state json_add_state(json, JP_STATE_OBJ_COLON); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } } else if (state == JP_STATE_OBJ_COLON) { // expect ':' if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) { - // TODO: error code - unexpected token - return_rec(-1); + return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } // next state json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) { // expect ',' or '}' if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) { json_add_state(json, JP_STATE_OBJ_NAME); - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) { // discard the obj from the value buffer json->vbuf_size--; - return_rec(1); + return_rec(CX_JSON_NO_ERROR); } else { - // TODO: error code - unexpected token - return_rec(-1); + return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN); } } else { // should be unreachable @@ -610,17 +600,15 @@ } } -int cxJsonNext(CxJson *json, CxJsonValue **value) { - // TODO: replace int with a status enum like in CxProperties - +CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) { // initialize output value *value = &cx_json_value_nothing; // parse data - int result; + CxJsonStatus result; do { result = json_parse(json); - if (result == 1 && json->states_size == 1) { + if (result == CX_JSON_NO_ERROR && json->states_size == 1) { // final state reached assert(json->states[0] == JP_STATE_VALUE_END); assert(json->vbuf_size == 0); @@ -632,9 +620,16 @@ // re-initialize state machine json->states[0] = JP_STATE_VALUE_BEGIN; - return 1; + return CX_JSON_NO_ERROR; } - } while (result == 1); + } while (result == CX_JSON_NO_ERROR); + + // the parser might think there is no data + // but when we did not reach the final state, + // we know that there must be more to come + if (result == CX_JSON_NO_DATA && json->states_size > 1) { + return CX_JSON_INCOMPLETE_DATA; + } return result; }