avoid state buffer allocation for JSON with reasonable nesting depth

3 months ago

author
Mike Becker <universe@uap-core.de>
date
Tue, 22 Oct 2024 23:10:31 +0200 (3 months ago)
changeset 946
b428424c0214
parent 945
84a5fab8a47c
child 947
2a47b4a5c216

avoid state buffer allocation for JSON with reasonable nesting depth

src/cx/json.h file | annotate | diff | comparison | revisions
src/json.c file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
tests/ucxtest.c file | annotate | diff | comparison | revisions
--- a/src/cx/json.h	Tue Oct 22 22:42:48 2024 +0200
+++ b/src/cx/json.h	Tue Oct 22 23:10:31 2024 +0200
@@ -121,8 +121,9 @@
     int tokenizer_escape;
 
     int *states;
-    int nstates;
-    int states_alloc;
+    unsigned nstates;
+    unsigned states_alloc;
+    int states_internal[8];
 
     CxJsonToken reader_token;
     CxJsonReaderType reader_type;
@@ -135,12 +136,12 @@
     double value_double;
 
     CxJsonValue **readvalue_stack;
-    int readvalue_nelm;
-    int readvalue_alloc;
+    unsigned readvalue_nelm;
+    unsigned readvalue_alloc;
     CxJsonValue *read_value;
     int readvalue_initialized;
 
-    int reader_array_alloc;
+    unsigned reader_array_alloc;
 
     int error;
 };
--- a/src/json.c	Tue Oct 22 22:42:48 2024 +0200
+++ b/src/json.c	Tue Oct 22 23:10:31 2024 +0200
@@ -38,6 +38,7 @@
  */
 
 #define PARSER_STATES_ALLOC 32
+#define PARSER_READVALUE_ALLOC 32
 
 static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING};
 
@@ -339,11 +340,21 @@
 }
 
 static int add_state(CxJson *p, int state) {
+    // TODO: this is such a common pattern, try to reuse code here
     if (p->nstates >= p->states_alloc) {
-        p->states_alloc += PARSER_STATES_ALLOC;
-        if (cx_reallocate(&p->states, p->states_alloc * sizeof(int))) {
-            return 1;
+        unsigned newcap = PARSER_STATES_ALLOC;
+        if (p->states == p->states_internal) {
+            void *mem = malloc(newcap * sizeof(int));
+            if (mem == NULL) return 1;
+            memcpy(mem, p->states, p->nstates * sizeof(int));
+            p->states = mem;
+        } else {
+            newcap += p->states_alloc;
+            if (cx_reallocate(&p->states, newcap * sizeof(int))) {
+                return 1;
+            }
         }
+        p->states_alloc = newcap;
     }
     p->states[++p->nstates] = state;
     return 0;
@@ -545,9 +556,9 @@
 /* -------------------- read value functions -------------------- */
 
 static int setup_read_value(CxJson *p) {
-    p->readvalue_alloc = PARSER_STATES_ALLOC;
+    p->readvalue_alloc = PARSER_READVALUE_ALLOC;
     p->readvalue_nelm = 0;
-    p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(CxJsonValue *));
+    p->readvalue_stack = calloc(p->readvalue_alloc, sizeof(CxJsonValue *));
     if (!p->readvalue_stack) return -1;
 
     p->read_value = NULL;
@@ -659,15 +670,16 @@
 
 void cxJsonInit(CxJson *json) {
     memset(json, 0, sizeof(CxJson));
-    // TODO: do not allocate states right away
-    json->states_alloc = PARSER_STATES_ALLOC;
-    json->states = calloc(PARSER_STATES_ALLOC, sizeof(int));
+    json->states = json->states_internal;
+    json->states_alloc = cx_nmemb(json->states_internal);
     // TODO: find better way to configure the initial allocation size for arrays and objects
     json->reader_array_alloc = 8;
 }
 
 void cxJsonDestroy(CxJson *p) {
-    free(p->states);
+    if (p->states != p->states_internal) {
+        free(p->states);
+    }
     free(p->readvalue_stack);
     cxJsonValueFree(p->read_value);
     free(p->value_name);
@@ -684,7 +696,7 @@
 int cxJsonNext(CxJson *p, CxJsonValue **value) {
     // TODO: replace int with a status enum like in CxProperties
 
-    *value = NULL;
+    *value = NULL; // TODO: maybe better initialize with NOTHING?
     if (!p->readvalue_stack) {
         if (setup_read_value(p)) return -1;
     }
--- a/tests/test_json.c	Tue Oct 22 22:42:48 2024 +0200
+++ b/tests/test_json.c	Tue Oct 22 23:10:31 2024 +0200
@@ -30,6 +30,17 @@
 
 #include "cx/json.h"
 
+CX_TEST(test_json_init_default) {
+    CxJson json;
+    CX_TEST_DO {
+        cxJsonInit(&json);
+        CX_TEST_ASSERT(json.states == json.states_internal);
+        CX_TEST_ASSERT(json.nstates == 0);
+        CX_TEST_ASSERT(json.states_alloc == 8);
+        CX_TEST_ASSERT(json.reader_array_alloc == 8);
+    }
+}
+
 CX_TEST(test_json_simple_object) {
     cxstring text = cx_str(
             "{\n"
@@ -184,12 +195,59 @@
     }
 }
 
+CX_TEST(test_json_large_nesting_depth) {
+    CxJson json;
+    CxJsonValue *d1;
+    cxstring text = cx_str("{\"test\": [{},{\"foo\": [[{\"bar\":[4, 2, [null, {\"key\": 47}]]}]]}]}");
+    CX_TEST_DO {
+        cxJsonInit(&json);
+        cxJsonFill(&json, text.ptr, text.length);
+        cxJsonNext(&json, &d1);
+
+        CX_TEST_ASSERT(d1 != NULL);
+        CX_TEST_ASSERT(cxJsonIsObject(d1));
+        CxJsonValue *d2 = cxJsonObjGet(d1, "test");
+        CX_TEST_ASSERT(cxJsonIsArray(d2));
+        CX_TEST_ASSERT(cxJsonArrSize(d2) == 2);
+        CxJsonValue *d3 = cxJsonArrGet(d2, 1);
+        CX_TEST_ASSERT(cxJsonIsObject(d3));
+        CxJsonValue *d4 = cxJsonObjGet(d3, "foo");
+        CX_TEST_ASSERT(cxJsonIsArray(d4));
+        CX_TEST_ASSERT(cxJsonArrSize(d4) == 1);
+        CxJsonValue *d5 = cxJsonArrGet(d4, 0);
+        CX_TEST_ASSERT(cxJsonIsArray(d5));
+        CX_TEST_ASSERT(cxJsonArrSize(d5) == 1);
+        CxJsonValue *d6 = cxJsonArrGet(d5, 0);
+        CX_TEST_ASSERT(cxJsonIsObject(d6));
+        CxJsonValue *d7 = cxJsonObjGet(d6, "bar");
+        CX_TEST_ASSERT(cxJsonIsArray(d7));
+        CX_TEST_ASSERT(cxJsonArrSize(d7) == 3);
+        CxJsonValue *d8 = cxJsonArrGet(d7, 2);
+        CX_TEST_ASSERT(cxJsonIsArray(d8));
+        CX_TEST_ASSERT(cxJsonArrSize(d8) == 2);
+        CxJsonValue *d9a = cxJsonArrGet(d8, 0);
+        CX_TEST_ASSERT(cxJsonIsNull(d9a));
+        CxJsonValue *d9b = cxJsonArrGet(d8, 1);
+        CX_TEST_ASSERT(cxJsonIsObject(d9b));
+        CxJsonValue *d10 = cxJsonObjGet(d9b, "key");
+        CX_TEST_ASSERT(cxJsonIsInteger(d10));
+        CX_TEST_ASSERT(cxJsonAsInteger(d10) == 47);
+
+        CX_TEST_ASSERT(json.states != json.states_internal);
+        CX_TEST_ASSERT(json.states_alloc > cx_nmemb(json.states_internal));
+
+        cxJsonDestroy(&json);
+    }
+}
+
 CxTestSuite *cx_test_suite_json(void) {
     CxTestSuite *suite = cx_test_suite_new("json");
 
+    cx_test_register(suite, test_json_init_default);
     cx_test_register(suite, test_json_simple_object);
     cx_test_register(suite, test_json_object_incomplete_token);
     cx_test_register(suite, test_json_object_error);
+    cx_test_register(suite, test_json_large_nesting_depth);
 
     return suite;
 }
--- a/tests/ucxtest.c	Tue Oct 22 22:42:48 2024 +0200
+++ b/tests/ucxtest.c	Tue Oct 22 23:10:31 2024 +0200
@@ -26,6 +26,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "cx/common.h"
 #include "cx/test.h"
 
 CxTestSuite *cx_test_suite_testing_allocator(void);
@@ -51,8 +52,8 @@
 
 #define run_tests(suite) cx_test_run_stdout(suite); success += (suite)->success; failure += (suite)->failure
 #define execute_test_suites(...) unsigned success = 0, failure = 0; CxTestSuite* test_suites[] = {__VA_ARGS__}; \
-    for (size_t i = 0; i < sizeof(test_suites)/sizeof(void*) ; i++) {run_tests(test_suites[i]);} (void)0
-#define free_test_suites for (size_t i = 0 ; i < sizeof(test_suites)/sizeof(void*) ; i++) {cx_test_suite_free(test_suites[i]);} (void)0
+    for (size_t i = 0; i < cx_nmemb(test_suites) ; i++) {run_tests(test_suites[i]);} (void)0
+#define free_test_suites for (size_t i = 0 ; i < cx_nmemb(test_suites) ; i++) {cx_test_suite_free(test_suites[i]);} (void)0
 
 int main(void) {
     printf("UCX Tests\n---------\n");

mercurial