3 months ago
avoid state buffer allocation for JSON with reasonable nesting depth
src/cx/json.h | file | annotate | diff | comparison | revisions | |
src/json.c | file | annotate | diff | comparison | revisions | |
tests/test_json.c | file | annotate | diff | comparison | revisions | |
tests/ucxtest.c | file | annotate | diff | comparison | revisions |
--- a/src/cx/json.h Tue Oct 22 22:42:48 2024 +0200 +++ b/src/cx/json.h Tue Oct 22 23:10:31 2024 +0200 @@ -121,8 +121,9 @@ int tokenizer_escape; int *states; - int nstates; - int states_alloc; + unsigned nstates; + unsigned states_alloc; + int states_internal[8]; CxJsonToken reader_token; CxJsonReaderType reader_type; @@ -135,12 +136,12 @@ double value_double; CxJsonValue **readvalue_stack; - int readvalue_nelm; - int readvalue_alloc; + unsigned readvalue_nelm; + unsigned readvalue_alloc; CxJsonValue *read_value; int readvalue_initialized; - int reader_array_alloc; + unsigned reader_array_alloc; int error; };
--- a/src/json.c Tue Oct 22 22:42:48 2024 +0200 +++ b/src/json.c Tue Oct 22 23:10:31 2024 +0200 @@ -38,6 +38,7 @@ */ #define PARSER_STATES_ALLOC 32 +#define PARSER_READVALUE_ALLOC 32 static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING}; @@ -339,11 +340,21 @@ } static int add_state(CxJson *p, int state) { + // TODO: this is such a common pattern, try to reuse code here if (p->nstates >= p->states_alloc) { - p->states_alloc += PARSER_STATES_ALLOC; - if (cx_reallocate(&p->states, p->states_alloc * sizeof(int))) { - return 1; + unsigned newcap = PARSER_STATES_ALLOC; + if (p->states == p->states_internal) { + void *mem = malloc(newcap * sizeof(int)); + if (mem == NULL) return 1; + memcpy(mem, p->states, p->nstates * sizeof(int)); + p->states = mem; + } else { + newcap += p->states_alloc; + if (cx_reallocate(&p->states, newcap * sizeof(int))) { + return 1; + } } + p->states_alloc = newcap; } p->states[++p->nstates] = state; return 0; @@ -545,9 +556,9 @@ /* -------------------- read value functions -------------------- */ static int setup_read_value(CxJson *p) { - p->readvalue_alloc = PARSER_STATES_ALLOC; + p->readvalue_alloc = PARSER_READVALUE_ALLOC; p->readvalue_nelm = 0; - p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(CxJsonValue *)); + p->readvalue_stack = calloc(p->readvalue_alloc, sizeof(CxJsonValue *)); if (!p->readvalue_stack) return -1; p->read_value = NULL; @@ -659,15 +670,16 @@ void cxJsonInit(CxJson *json) { memset(json, 0, sizeof(CxJson)); - // TODO: do not allocate states right away - json->states_alloc = PARSER_STATES_ALLOC; - json->states = calloc(PARSER_STATES_ALLOC, sizeof(int)); + json->states = json->states_internal; + json->states_alloc = cx_nmemb(json->states_internal); // TODO: find better way to configure the initial allocation size for arrays and objects json->reader_array_alloc = 8; } void cxJsonDestroy(CxJson *p) { - free(p->states); + if (p->states != p->states_internal) { + free(p->states); + } free(p->readvalue_stack); cxJsonValueFree(p->read_value); free(p->value_name); @@ -684,7 +696,7 @@ int cxJsonNext(CxJson *p, CxJsonValue **value) { // TODO: replace int with a status enum like in CxProperties - *value = NULL; + *value = NULL; // TODO: maybe better initialize with NOTHING? if (!p->readvalue_stack) { if (setup_read_value(p)) return -1; }
--- a/tests/test_json.c Tue Oct 22 22:42:48 2024 +0200 +++ b/tests/test_json.c Tue Oct 22 23:10:31 2024 +0200 @@ -30,6 +30,17 @@ #include "cx/json.h" +CX_TEST(test_json_init_default) { + CxJson json; + CX_TEST_DO { + cxJsonInit(&json); + CX_TEST_ASSERT(json.states == json.states_internal); + CX_TEST_ASSERT(json.nstates == 0); + CX_TEST_ASSERT(json.states_alloc == 8); + CX_TEST_ASSERT(json.reader_array_alloc == 8); + } +} + CX_TEST(test_json_simple_object) { cxstring text = cx_str( "{\n" @@ -184,12 +195,59 @@ } } +CX_TEST(test_json_large_nesting_depth) { + CxJson json; + CxJsonValue *d1; + cxstring text = cx_str("{\"test\": [{},{\"foo\": [[{\"bar\":[4, 2, [null, {\"key\": 47}]]}]]}]}"); + CX_TEST_DO { + cxJsonInit(&json); + cxJsonFill(&json, text.ptr, text.length); + cxJsonNext(&json, &d1); + + CX_TEST_ASSERT(d1 != NULL); + CX_TEST_ASSERT(cxJsonIsObject(d1)); + CxJsonValue *d2 = cxJsonObjGet(d1, "test"); + CX_TEST_ASSERT(cxJsonIsArray(d2)); + CX_TEST_ASSERT(cxJsonArrSize(d2) == 2); + CxJsonValue *d3 = cxJsonArrGet(d2, 1); + CX_TEST_ASSERT(cxJsonIsObject(d3)); + CxJsonValue *d4 = cxJsonObjGet(d3, "foo"); + CX_TEST_ASSERT(cxJsonIsArray(d4)); + CX_TEST_ASSERT(cxJsonArrSize(d4) == 1); + CxJsonValue *d5 = cxJsonArrGet(d4, 0); + CX_TEST_ASSERT(cxJsonIsArray(d5)); + CX_TEST_ASSERT(cxJsonArrSize(d5) == 1); + CxJsonValue *d6 = cxJsonArrGet(d5, 0); + CX_TEST_ASSERT(cxJsonIsObject(d6)); + CxJsonValue *d7 = cxJsonObjGet(d6, "bar"); + CX_TEST_ASSERT(cxJsonIsArray(d7)); + CX_TEST_ASSERT(cxJsonArrSize(d7) == 3); + CxJsonValue *d8 = cxJsonArrGet(d7, 2); + CX_TEST_ASSERT(cxJsonIsArray(d8)); + CX_TEST_ASSERT(cxJsonArrSize(d8) == 2); + CxJsonValue *d9a = cxJsonArrGet(d8, 0); + CX_TEST_ASSERT(cxJsonIsNull(d9a)); + CxJsonValue *d9b = cxJsonArrGet(d8, 1); + CX_TEST_ASSERT(cxJsonIsObject(d9b)); + CxJsonValue *d10 = cxJsonObjGet(d9b, "key"); + CX_TEST_ASSERT(cxJsonIsInteger(d10)); + CX_TEST_ASSERT(cxJsonAsInteger(d10) == 47); + + CX_TEST_ASSERT(json.states != json.states_internal); + CX_TEST_ASSERT(json.states_alloc > cx_nmemb(json.states_internal)); + + cxJsonDestroy(&json); + } +} + CxTestSuite *cx_test_suite_json(void) { CxTestSuite *suite = cx_test_suite_new("json"); + cx_test_register(suite, test_json_init_default); cx_test_register(suite, test_json_simple_object); cx_test_register(suite, test_json_object_incomplete_token); cx_test_register(suite, test_json_object_error); + cx_test_register(suite, test_json_large_nesting_depth); return suite; }
--- a/tests/ucxtest.c Tue Oct 22 22:42:48 2024 +0200 +++ b/tests/ucxtest.c Tue Oct 22 23:10:31 2024 +0200 @@ -26,6 +26,7 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include "cx/common.h" #include "cx/test.h" CxTestSuite *cx_test_suite_testing_allocator(void); @@ -51,8 +52,8 @@ #define run_tests(suite) cx_test_run_stdout(suite); success += (suite)->success; failure += (suite)->failure #define execute_test_suites(...) unsigned success = 0, failure = 0; CxTestSuite* test_suites[] = {__VA_ARGS__}; \ - for (size_t i = 0; i < sizeof(test_suites)/sizeof(void*) ; i++) {run_tests(test_suites[i]);} (void)0 -#define free_test_suites for (size_t i = 0 ; i < sizeof(test_suites)/sizeof(void*) ; i++) {cx_test_suite_free(test_suites[i]);} (void)0 + for (size_t i = 0; i < cx_nmemb(test_suites) ; i++) {run_tests(test_suites[i]);} (void)0 +#define free_test_suites for (size_t i = 0 ; i < cx_nmemb(test_suites) ; i++) {cx_test_suite_free(test_suites[i]);} (void)0 int main(void) { printf("UCX Tests\n---------\n");