3 weeks ago
avoid copying the filled data in the json parser when possible - fixes #530
src/cx/json.h | file | annotate | diff | comparison | revisions | |
src/json.c | file | annotate | diff | comparison | revisions | |
tests/test_json.c | file | annotate | diff | comparison | revisions |
--- a/src/cx/json.h Fri Dec 27 12:23:14 2024 +0100 +++ b/src/cx/json.h Fri Dec 27 13:01:31 2024 +0100 @@ -398,6 +398,10 @@ */ CX_JSON_OK, /** + * The input buffer has never been filled. + */ + CX_JSON_NULL_DATA, + /** * Allocating memory for the internal buffer failed. */ CX_JSON_BUFFER_ALLOC_FAILED, @@ -455,9 +459,15 @@ } /** - * Adds more data to the input buffer. + * Fills the input buffer. * - * The data will be copied. + * @remark The JSON interface tries to avoid copying the input data. + * When you use this function and cxJsonNext() interleaving, + * no copies are performed. However, you must not free the + * pointer to the data in that case. When you invoke the fill + * function more than once before calling cxJsonNext(), + * the additional data is appended - inevitably leading to + * an allocation of a new buffer and copying the previous contents. * * @param json the json interface * @param buf the source buffer @@ -500,12 +510,18 @@ extern "C" { #else // __cplusplus /** - * Adds more data to the input buffer. + * Fills the input buffer. * - * The data will be copied. + * @remark The JSON interface tries to avoid copying the input data. + * When you use this function and cxJsonNext() interleaving, + * no copies are performed. However, you must not free the + * pointer to the data in that case. When you invoke the fill + * function more than once before calling cxJsonNext(), + * the additional data is appended - inevitably leading to + * an allocation of a new buffer and copying the previous contents. * * @param json the json interface - * @param str the string to add to the buffer + * @param buf the source string * @return zero on success, non-zero on internal allocation error * @see cxJsonFilln() */
--- a/src/json.c Fri Dec 27 12:23:14 2024 +0100 +++ b/src/json.c Fri Dec 27 13:01:31 2024 +0100 @@ -289,35 +289,6 @@ return result; } -static int parse_number(cxmutstr str, void *value, bool asint) { - char *endptr = NULL; - if (str.length > 30) { - return 1; - } - // the buffer guarantees that we are working on a copied string - char c = str.ptr[str.length]; - str.ptr[str.length] = 0; - - if (asint) { - errno = 0; - long long v = strtoll(str.ptr, &endptr, 10); - if (errno == ERANGE) { - return 1; - } - *((int64_t*)value) = (int64_t) v; - } else { - // TODO: proper JSON spec number parser - // TODO: also return an error when loss of precision is high - double v = strtod(str.ptr, &endptr); - *((double*)value) = v; - } - - // recover from the hack - str.ptr[str.length] = c; - - return endptr != &str.ptr[str.length]; -} - static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue)); if (v == NULL) { @@ -400,8 +371,6 @@ json->vbuf = json->vbuf_internal; json->vbuf_capacity = cx_nmemb(json->vbuf_internal); - - cxBufferInit(&json->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND); } void cxJsonDestroy(CxJson *json) { @@ -417,16 +386,16 @@ } int cxJsonFilln(CxJson *json, const char *buf, size_t size) { - // we use the UCX buffer to write the data - // but reset the position immediately to enable parsing - size_t old_pos = json->buffer.pos; - cxBufferSeek(&json->buffer, 0, SEEK_END); - size_t written = cxBufferWrite(buf, 1, size, &json->buffer); - if (0 == cxBufferTerminate(&json->buffer)) { - written++; + if (cxBufferEof(&json->buffer)) { + // reinitialize the buffer + cxBufferDestroy(&json->buffer); + cxBufferInit(&json->buffer, (char*) buf, size, + NULL, CX_BUFFER_AUTO_EXTEND | CX_BUFFER_COPY_ON_WRITE); + json->buffer.size = size; + return 0; + } else { + return size != cxBufferAppend(buf, 1, size, &json->buffer); } - json->buffer.pos = old_pos; - return written != size + 1; } static void json_add_state(CxJson *json, int state) { @@ -508,8 +477,14 @@ if (NULL == (vbuf = create_json_value(json, type))) { return_rec(CX_JSON_VALUE_ALLOC_FAILED); } - if (parse_number(token.content, &vbuf->value,type == CX_JSON_INTEGER)) { - return_rec(CX_JSON_FORMAT_ERROR_NUMBER); + if (type == CX_JSON_INTEGER) { + if (cx_strtoi64(token.content, &vbuf->value.integer, 10)) { + return_rec(CX_JSON_FORMAT_ERROR_NUMBER); + } + } else { + if (cx_strtod(token.content, &vbuf->value.number)) { + return_rec(CX_JSON_FORMAT_ERROR_NUMBER); + } } return_rec(CX_JSON_NO_ERROR); } @@ -600,6 +575,11 @@ } CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) { + // check if buffer has been filled + if (json->buffer.space == NULL) { + return CX_JSON_NULL_DATA; + } + // initialize output value *value = &cx_json_value_nothing; @@ -607,7 +587,6 @@ CxJsonStatus result; do { result = json_parse(json); - cxBufferShiftLeft(&json->buffer, json->buffer.pos); if (result == CX_JSON_NO_ERROR && json->states_size == 1) { // final state reached assert(json->states[0] == JP_STATE_VALUE_END);
--- a/tests/test_json.c Fri Dec 27 12:23:14 2024 +0100 +++ b/tests/test_json.c Fri Dec 27 13:01:31 2024 +0100 @@ -376,6 +376,14 @@ CX_TEST_ASSERT(cxJsonAsInteger(v) == 12300); CX_TEST_ASSERT(cxJsonAsDouble(v) == 12300.0); cxJsonValueFree(v); + + cxJsonFill(&json, "18446744073709551615.0123456789 "); + result = cxJsonNext(&json, &v); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsNumber(v)); + // be as precise as possible + CX_TEST_ASSERT(cxJsonAsDouble(v) == 1.8446744073709552e+19); + cxJsonValueFree(v); } cxJsonDestroy(&json); } @@ -394,15 +402,6 @@ CX_TEST_ASSERT(v->type == CX_JSON_NOTHING); cxJsonReset(&json); -#if 0 // TODO: discuss if it is intended that this produces -47 as valid value - cxJsonFill(&json, "-47,11e2 "); - result = cxJsonNext(&json, &v); - CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN, - "decimal separator cannot be a comma"); - CX_TEST_ASSERT(v->type == CX_JSON_NOTHING); - cxJsonReset(&json); -#endif - cxJsonFill(&json, "0.815e-3.0 "); result = cxJsonNext(&json, &v); CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_NUMBER, @@ -451,13 +450,6 @@ "30 digit int does not fit into 64-bit int"); CX_TEST_ASSERT(v->type == CX_JSON_NOTHING); cxJsonReset(&json); - - cxJsonFill(&json, "18446744073709551615.0123456789 "); - result = cxJsonNext(&json, &v); - CX_TEST_ASSERTM(result == CX_JSON_FORMAT_ERROR_NUMBER, - "numbers with more than 30 characters are unsupported in general"); - CX_TEST_ASSERT(v->type == CX_JSON_NOTHING); - cxJsonReset(&json); } cxJsonDestroy(&json); }