simplify how the json parser works

Thu, 05 Dec 2024 01:54:12 +0100

author
Mike Becker <universe@uap-core.de>
date
Thu, 05 Dec 2024 01:54:12 +0100
changeset 1000
1aecddf7e209
parent 999
84fc42b04d3b
child 1001
5c9ec5a0a4ef

simplify how the json parser works

relates to #431 and fixes several errors related to issue #475

src/Makefile file | annotate | diff | comparison | revisions
src/cx/json.h file | annotate | diff | comparison | revisions
src/json.c file | annotate | diff | comparison | revisions
tests/Makefile file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
--- a/src/Makefile	Thu Dec 05 01:51:47 2024 +0100
+++ b/src/Makefile	Thu Dec 05 01:54:12 2024 +0100
@@ -99,8 +99,8 @@
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS)  -c $<
 
-$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/string.h \
- cx/allocator.h cx/array_list.h cx/list.h cx/collection.h cx/iterator.h \
+$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/allocator.h \
+ cx/string.h cx/array_list.h cx/list.h cx/collection.h cx/iterator.h \
  cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS)  -c $<
--- a/src/cx/json.h	Thu Dec 05 01:51:47 2024 +0100
+++ b/src/cx/json.h	Thu Dec 05 01:54:12 2024 +0100
@@ -77,20 +77,8 @@
     CX_JSON_FALSE
 };
 
-enum cx_json_reader_type {
-    CX_JSON_READER_OBJECT_BEGIN,
-    CX_JSON_READER_OBJECT_END,
-    CX_JSON_READER_ARRAY_BEGIN,
-    CX_JSON_READER_ARRAY_END,
-    CX_JSON_READER_STRING,
-    CX_JSON_READER_INTEGER,
-    CX_JSON_READER_NUMBER,
-    CX_JSON_READER_LITERAL
-};
-
 typedef enum cx_json_token_type CxJsonTokenType;
 typedef enum cx_json_value_type CxJsonValueType;
-typedef enum cx_json_reader_type CxJsonReaderType;
 
 typedef struct cx_json_s CxJson;
 typedef struct cx_json_token_s CxJsonToken;
@@ -106,47 +94,6 @@
 
 typedef struct cx_json_obj_value_s CxJsonObjValue;
 
-struct cx_json_token_s {
-    CxJsonTokenType tokentype;
-    const char *content;
-    size_t length;
-    size_t alloc;
-};
-
-struct cx_json_s {
-    const CxAllocator *allocator;
-    const char *buffer;
-    size_t size;
-    size_t pos;
-
-    CxJsonToken uncompleted;
-    int tokenizer_escape;
-
-    int *states;
-    size_t nstates;
-    size_t states_alloc;
-    int states_internal[8];
-
-    CxJsonToken reader_token;
-    CxJsonReaderType reader_type;
-    int value_ready;
-    char *value_name;
-    size_t value_name_len;
-    char *value_str;
-    size_t value_str_len;
-    int64_t value_int;
-    double value_double;
-
-    CxJsonValue **readvalue_stack;
-    unsigned readvalue_nelm;
-    unsigned readvalue_alloc;
-    CxJsonValue *read_value;
-    int readvalue_initialized;
-
-    unsigned reader_array_alloc;
-
-    int error;
-};
 
 struct cx_json_array_s {
     CX_ARRAY_DECLARE(CxJsonValue*, array);
@@ -174,9 +121,54 @@
     } value;
 };
 
+struct cx_json_token_s {
+    CxJsonTokenType tokentype;
+    const char *content;
+    size_t length;
+    size_t alloc;
+};
 
-cx_attr_nonnull_arg(2)
-void cxJsonInit(const CxAllocator *allocator, CxJson *json);
+struct cx_json_s {
+    const CxAllocator *allocator;
+    const char *buffer;
+    size_t size;
+    size_t pos;
+
+    CxJsonToken uncompleted;
+
+    /**
+     * A pointer to an intermediate state of the currently parsed value.
+     *
+     * Never access this value manually.
+     */
+    CxJsonValue *parsed;
+
+    /**
+     * State stack.
+     */
+    CX_ARRAY_DECLARE_SIZED(int, states, unsigned);
+
+    /**
+     * Value buffer stack.
+     */
+    CX_ARRAY_DECLARE_SIZED(CxJsonValue*, vbuf, unsigned);
+
+    /**
+     * Internally reserved memory for the state stack.
+     */
+    int states_internal[8];
+
+    /**
+     * Internally reserved memory for the value buffer stack.
+     */
+    CxJsonValue* vbuf_internal[8];
+
+    int error; // TODO: currently unused
+    bool tokenizer_escape; // TODO: check if it can be replaced with look-behind
+};
+
+cx_attr_nonnull_arg(1)
+void cxJsonInit(CxJson *json, const CxAllocator *allocator);
 
 cx_attr_nonnull
 void cxJsonDestroy(CxJson *json);
@@ -250,7 +242,6 @@
 
 void cxJsonValueFree(CxJsonValue *value);
 
-// TODO: if the CxJsonValue was a returned value, we could reference cxJsonValueFree() as deallocator
 cx_attr_nonnull
 int cxJsonNext(CxJson *json, CxJsonValue **value);
 
--- a/src/json.c	Thu Dec 05 01:51:47 2024 +0100
+++ b/src/json.c	Thu Dec 05 01:54:12 2024 +0100
@@ -28,6 +28,7 @@
 
 #include <string.h>
 #include <ctype.h>
+#include <assert.h>
 
 #include "cx/json.h"
 
@@ -40,6 +41,11 @@
 
 static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING};
 
+static void token_destroy(CxJsonToken *token) {
+    if (token->alloc > 0) {
+        free((char*) token->content);
+    }
+}
 
 static int token_append(CxJsonToken *token, const char *buf, size_t len) {
     if (len == 0) {
@@ -63,22 +69,20 @@
     return 0;
 }
 
-static CxJsonToken get_content(CxJson *p, size_t start, size_t end) {
+static CxJsonToken token_create(CxJson *json, size_t start, size_t end) {
     CxJsonToken token = {0};
-    size_t part2 = end - start;
-    if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
-        token.content = p->buffer + start;
-        token.length = part2;
-    } else if (part2 == 0) {
-        token = p->uncompleted;
+    size_t len = end - start;
+    if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
+        token.content = json->buffer + start;
+        token.length = len;
     } else {
-        if (token_append(&p->uncompleted, p->buffer + start, end - start)) {
+        if (token_append(&json->uncompleted, json->buffer + start, len)) {
             // TODO: this does certainly not lead to correct error handling
             return (CxJsonToken){0};
         }
-        token = p->uncompleted;
+        token = json->uncompleted;
     }
-    p->uncompleted = (CxJsonToken){0};
+    json->uncompleted = (CxJsonToken){0};
     return token;
 }
 
@@ -141,17 +145,6 @@
     return type;
 }
 
-static CxJsonToken get_token(CxJson *p, size_t start, size_t end) {
-    CxJsonToken token = get_content(p, start, end);
-    if (token_isliteral(token.content, token.length)) {
-        token.tokentype = CX_JSON_TOKEN_LITERAL;
-    } else {
-        token.tokentype = token_numbertype(token.content, token.length);
-    }
-    p->pos = end;
-    return token;
-}
-
 static CxJsonTokenType char2ttype(char c) {
     switch (c) {
         case '[': {
@@ -184,13 +177,13 @@
     return CX_JSON_NO_TOKEN;
 }
 
-static CxJsonToken json_parser_next_token(CxJson *p) {
+static CxJsonToken token_parse_next(CxJson *json) {
     // current token type and start index
-    CxJsonTokenType ttype = p->uncompleted.tokentype;
-    size_t token_start = p->pos;
+    CxJsonTokenType ttype = json->uncompleted.tokentype;
+    size_t token_start = json->pos;
 
-    for (size_t i = p->pos; i < p->size; i++) {
-        char c = p->buffer[i];
+    for (size_t i = json->pos; i < json->size; i++) {
+        char c = json->buffer[i];
         if (ttype != CX_JSON_TOKEN_STRING) {
             // currently non-string token
 
@@ -205,7 +198,7 @@
                     token_start = i;
                 } else if (ctype != CX_JSON_NO_TOKEN) {
                     // single-char token
-                    p->pos = i + 1;
+                    json->pos = i + 1;
                     CxJsonToken token = {ctype, NULL, 0, 0};
                     return token;
                 } else {
@@ -215,30 +208,37 @@
             } else {
                 // finish token
                 if (ctype != CX_JSON_NO_TOKEN) {
-                    return get_token(p, token_start, i);
+                    CxJsonToken ret = token_create(json, token_start, i);
+                    if (token_isliteral(ret.content, ret.length)) {
+                        ret.tokentype = CX_JSON_TOKEN_LITERAL;
+                    } else {
+                        ret.tokentype = token_numbertype(ret.content, ret.length);
+                    }
+                    json->pos = i;
+                    return ret;
                 }
             }
         } else {
             // currently inside a string
-            if (!p->tokenizer_escape) {
+            if (json->tokenizer_escape) {
+                json->tokenizer_escape = false;
+            } else {
                 if (c == '"') {
-                    CxJsonToken ret = get_content(p, token_start, i + 1);
+                    CxJsonToken ret = token_create(json, token_start, i + 1);
                     ret.tokentype = CX_JSON_TOKEN_STRING;
-                    p->pos = i + 1;
+                    json->pos = i + 1;
                     return ret;
                 } else if (c == '\\') {
-                    p->tokenizer_escape = 1;
+                    json->tokenizer_escape = true;
                 }
-            } else {
-                p->tokenizer_escape = 0;
             }
         }
     }
 
     if (ttype != CX_JSON_NO_TOKEN) {
         // uncompleted token
-        size_t uncompeted_len = p->size - token_start;
-        if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
+        size_t uncompeted_len = json->size - token_start;
+        if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
             // current token is uncompleted
             // save current token content in p->uncompleted
             CxJsonToken uncompleted;
@@ -247,17 +247,17 @@
             uncompleted.alloc = uncompeted_len + 16;
             char *tmp = malloc(uncompleted.alloc);
             if (tmp) {
-                memcpy(tmp, p->buffer + token_start, uncompeted_len);
+                memcpy(tmp, json->buffer + token_start, uncompeted_len);
                 uncompleted.content = tmp;
-                p->uncompleted = uncompleted;
+                json->uncompleted = uncompleted;
             } else {
-                p->error = 1;
+                json->error = 1;
             }
         } else {
             // previously we also had an uncompleted token
             // combine the uncompleted token with the current token
-            if (token_append(&p->uncompleted, p->buffer + token_start, uncompeted_len)) {
-                p->error = 1;
+            if (token_append(&json->uncompleted, json->buffer + token_start, uncompeted_len)) {
+                json->error = 1;
             }
         }
     }
@@ -273,7 +273,6 @@
     result.length = 0;
     result.ptr = cxMalloc(a, len - 1);
     if (result.ptr == NULL) {
-        // TODO: check if this actually leads to correct error handling
         return result;
     }
 
@@ -323,401 +322,321 @@
     return (endptr != &buf[len]);
 }
 
-static int add_state(CxJson *p, int state) {
-    CxArrayReallocator alloc = cx_array_reallocator(NULL, p->states_internal);
-    size_t size = p->nstates + 1;
-    size_t capacity = p->states_alloc;
-    // TODO: fix that nstates does not denote the size of the array
-    // TODO: replace with a 16 bit (or maybe even 8 bit) version of cx_array_add()
-    int result = cx_array_add(
-            &p->states,
-            &size,
-            &capacity,
-            sizeof(int),
-            &state,
-            &alloc
-    );
-    if (result == 0) {
-        p->nstates = size - 1;
-        p->states_alloc = capacity;
-    }
-    return result;
-}
-
-static void end_elm(CxJson *p, CxJsonReaderType type) {
-    p->reader_type = type;
-    p->nstates--;
-}
-
-#define JP_STATE_VALUE_BEGIN        0
-#define JP_STATE_VALUE_BEGIN_OBJ    1
-#define JP_STATE_VALUE_BEGIN_AR     2
-#define JP_STATE_ARRAY_SEP_OR_CLOSE 3
-#define JP_STATE_OBJ_NAME_OR_CLOSE  4
-#define JP_STATE_OBJ_NAME           5
-#define JP_STATE_OBJ_COLON          6
-#define JP_STATE_OBJ_SEP_OR_CLOSE   7
-
-static int next_state_after_value(int current) {
-    switch (current) {
-        default:
-            return -1;
-            // after value JSON complete, expect nothing
-        case JP_STATE_VALUE_BEGIN:
-            return -1;
-            // after obj value, expect ',' or '}'
-        case JP_STATE_VALUE_BEGIN_OBJ:
-            return JP_STATE_OBJ_SEP_OR_CLOSE;
-            // after array value, expect ',' or ']'
-        case JP_STATE_VALUE_BEGIN_AR:
-            return JP_STATE_ARRAY_SEP_OR_CLOSE;
-    }
-}
-
-static void clear_valuename(CxJson *p) {
-    free(p->value_name);
-    p->value_name = NULL;
-    p->value_name_len = 0;
-}
-
-static void clear_values(CxJson *p) {
-    free(p->value_str);
-    p->value_str = NULL;
-    p->value_str_len = 0;
-    p->value_int = 0;
-    p->value_double = 0;
-}
-
-static int json_read(CxJson *p) {
-    int state = p->states[p->nstates];
-    clear_values(p);
-    CxJsonToken token = json_parser_next_token(p);
-    p->reader_token = token;
-
-    p->value_ready = 0;
-
-    if (token.tokentype == CX_JSON_NO_TOKEN) {
-        return 0;
-    }
-
-    int ret = 1;
-
-    // 0 JP_STATE_VALUE_BEGIN          value begin
-    // 1 JP_STATE_VALUE_BEGIN_OBJ      value begin (inside object)
-    // 2 JP_STATE_VALUE_BEGIN_AR       value begin (inside array)
-    // 3 JP_STATE_ARRAY_SEP_OR_CLOSE   array, expect separator or arrayclose
-    // 4 JP_STATE_OBJ_NAME_OR_CLOSE    object, expect name or objclose
-    // 5 JP_STATE_OBJ_NAME             object, expect name
-    // 6 JP_STATE_OBJ_COLON            object, expect ':'
-    // 7 JP_STATE_OBJ_SEP_OR_CLOSE     object, expect separator, objclose
-
-    if (state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) {
-        clear_valuename(p);
+static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
+    CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue));
+    if (v == NULL) {
+        return NULL;
     }
 
-    if (state < 3) {
-        // expect value
-        p->states[p->nstates] = next_state_after_value(state);
-        p->value_ready = 1;
-        switch (token.tokentype) {
-            case CX_JSON_TOKEN_BEGIN_ARRAY: {
-                p->reader_type = CX_JSON_READER_ARRAY_BEGIN;
-                ret = add_state(p, JP_STATE_VALUE_BEGIN_AR) ? -1 : 1;
-                break;
-            }
-            case CX_JSON_TOKEN_BEGIN_OBJECT: {
-                p->reader_type = CX_JSON_READER_OBJECT_BEGIN;
-                ret = add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE) ? -1 : 1;
-                break;
-            }
-            case CX_JSON_TOKEN_END_ARRAY: {
-                p->value_ready = 0;
-                end_elm(p, CX_JSON_READER_ARRAY_END);
-                break;
-            }
-            case CX_JSON_TOKEN_STRING: {
-                p->reader_type = CX_JSON_READER_STRING;
-                cxmutstr str = unescape_string(p->allocator, token.content, token.length);
-                if (str.ptr) {
-                    p->value_str = str.ptr;
-                    p->value_str_len = str.length;
-                } else {
-                    ret = -1;
-                }
-                break;
-            }
-            case CX_JSON_TOKEN_INTEGER: {
-                p->reader_type = CX_JSON_READER_INTEGER;
-                if (parse_number(token.content, token.length,
-                                 &p->value_int, true)) {
-                    ret = -1;
-                }
-                break;
-            }
-            case CX_JSON_TOKEN_NUMBER: {
-                p->reader_type = CX_JSON_READER_NUMBER;
-                if (parse_number(token.content, token.length,
-                                 &p->value_double, false)) {
-                    ret = -1;
-                }
-                break;
-            }
-            case CX_JSON_TOKEN_LITERAL: {
-                p->reader_type = CX_JSON_READER_LITERAL;
-                break;
-            }
-            default: ret = -1;  
+    // initialize the value
+    if (type == CX_JSON_ARRAY) {
+        cx_array_initialize_a(json->allocator, v->value.array.array, 16);
+        if (v->value.array.array == NULL) {
+            cxFree(json->allocator, v);
+            return NULL;
+        }
+    } else if (type == CX_JSON_OBJECT) {
+        cx_array_initialize_a(json->allocator, v->value.object.values, 16);
+        if (v->value.object.values == NULL) {
+            cxFree(json->allocator, v);
+            return NULL;
         }
-    } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
-        // expect ',' or ']'
-        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
-            p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR;
-            ret = json_read(p);
-        } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) {
-            end_elm(p, CX_JSON_READER_ARRAY_END);
-        } else {
-            ret = -1;
-        }
-    } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
-        if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
-            clear_valuename(p);
-            end_elm(p, CX_JSON_READER_OBJECT_END);
-        } else {
-            // expect string
-            if (token.tokentype != CX_JSON_TOKEN_STRING) return -1;
+    } else {
+        memset(v, 0, sizeof(CxJsonValue));
+    }
+    v->type = type;
+    v->allocator = json->allocator;
 
-            if (p->value_name) free(p->value_name);
-            cxmutstr valname = unescape_string(p->allocator, token.content, token.length);
-            p->value_name = valname.ptr;
-            p->value_name_len = valname.length;
-
-            // next state
-            p->states[p->nstates] = JP_STATE_OBJ_COLON;
-            ret = json_read(p);
-        }
-    } else if (state == JP_STATE_OBJ_COLON) {
-        // expect ':'
-        if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) return -1;
-        // next state
-        p->states[p->nstates] = JP_STATE_VALUE_BEGIN_OBJ;
-        ret = json_read(p);
-    } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) {
-        // expect ',' or '}'
-        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
-            p->states[p->nstates] = JP_STATE_OBJ_NAME;
-            ret = json_read(p);
-        } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
-            end_elm(p, CX_JSON_READER_OBJECT_END);
+    // add the new value to a possible parent
+    CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL);
+    if (json->vbuf_size > 0) {
+        CxJsonValue *parent = json->vbuf[json->vbuf_size - 1];
+        if (parent->type == CX_JSON_ARRAY) {
+            cx_array_simple_add_a(&value_realloc, parent->value.array.array, v);
+        } else if (parent->type == CX_JSON_OBJECT) {
+            assert(parent->value.object.values_size > 0);
+            assert(parent->value.object.values[parent->value.object.values_size - 1].value == NULL);
+            parent->value.object.values[parent->value.object.values_size - 1].value = v;
         } else {
-            ret = -1;
+            assert(false);
         }
     }
-    
-    if (token.alloc > 0) {
-        free((char*)token.content);
+
+    // add the new value to the stack, if it is an array or object
+    if (type == CX_JSON_ARRAY || type == CX_JSON_OBJECT) {
+        CxArrayReallocator vbuf_realloc = cx_array_reallocator(NULL, json->vbuf_internal);
+        if (cx_array_simple_add_a(&vbuf_realloc, json->vbuf, v)) {
+            cxFree(json->allocator, v);
+            return NULL;
+        }
     }
 
-    return ret;
-}
-
-static CxJsonLiteral json_reader_literal(CxJson *p) {
-    const char *l = p->reader_token.content;
-    size_t token_len = p->reader_token.length;
-    if (token_len == 4 && !memcmp(l, "true", 4)) {
-        return CX_JSON_TRUE;
-    } else if (token_len == 5 && !memcmp(l, "false", 5)) {
-        return CX_JSON_FALSE;
+    // if currently no value is parsed, this is now the value of interest
+    if (json->parsed == NULL) {
+        json->parsed = v;
     }
-    return CX_JSON_NULL;
-}
 
-/* -------------------- read value functions -------------------- */
-
-static int setup_read_value(CxJson *p) {
-    p->readvalue_alloc = PARSER_READVALUE_ALLOC;
-    p->readvalue_nelm = 0;
-    p->readvalue_stack = calloc(p->readvalue_alloc, sizeof(CxJsonValue *));
-    if (!p->readvalue_stack) return -1;
-
-    p->read_value = NULL;
-    p->readvalue_stack[0] = NULL;
-
-    return 0;
+    return v;
 }
 
-static int add_to_parent(CxJson *p, CxJsonValue *parent, CxJsonValue *v) {
-    if (!parent) {
-        return -1; // shouldn't happen but who knows
-    }
-    
-    CxArrayReallocator reallocator = cx_array_reallocator(p->allocator, NULL);
-    if (parent->type == CX_JSON_OBJECT) {
-        if (!p->value_name || p->value_name_len == 0) {
-            return -1;
-        }
-        char *valuename = p->value_name;
-        p->value_name = NULL;
-
-        CxJsonObjValue newvalue;
-        newvalue.name = valuename;
-        newvalue.value = v;
-
-        return cx_array_add(
-                &parent->value.object.values,
-                &parent->value.object.values_size,
-                &parent->value.object.values_capacity, 
-                sizeof(CxJsonObjValue),
-                &newvalue,
-                &reallocator);
-    } else if (parent->type == CX_JSON_ARRAY) {
-        return cx_array_add(
-                &parent->value.array.array,
-                &parent->value.array.array_size,
-                &parent->value.array.array_capacity, 
-                sizeof(CxJsonValue*),
-                &v,
-                &reallocator);
-    } else {
-        return -1; // should also never happen
-    }
+static int json_obj_add_entry(CxJson *json, char *name) {
+    CxJsonObjValue kv = {name, NULL};
+    assert(json->vbuf_size > 0);
+    CxJsonValue *parent = json->vbuf[json->vbuf_size - 1];
+    assert(parent != NULL);
+    assert(parent->type == CX_JSON_OBJECT);
+    CxArrayReallocator value_realloc = cx_array_reallocator(json->allocator, NULL);
+    return cx_array_simple_add_a(&value_realloc, parent->value.object.values, kv);
 }
 
+#define JP_STATE_VALUE_BEGIN         0
+#define JP_STATE_VALUE_END          10
+#define JP_STATE_VALUE_BEGIN_OBJ     1
+#define JP_STATE_OBJ_SEP_OR_CLOSE   11
+#define JP_STATE_VALUE_BEGIN_AR      2
+#define JP_STATE_ARRAY_SEP_OR_CLOSE 12
+#define JP_STATE_OBJ_NAME_OR_CLOSE   5
+#define JP_STATE_OBJ_NAME            6
+#define JP_STATE_OBJ_COLON           7
 
-static int readvaluestack_add(CxJson *p, CxJsonValue *v) {
-    if (p->readvalue_nelm == p->readvalue_alloc) {
-        p->readvalue_alloc *= 2;
-        if (cx_reallocate(&p->readvalue_stack, sizeof(CxJsonValue *) * p->readvalue_alloc)) {
-            return -1;
-        }
-    }
-    p->readvalue_stack[p->readvalue_nelm++] = v;
-    return 0;
-}
-
-void cxJsonInit(const CxAllocator *allocator, CxJson *json) {
+void cxJsonInit(CxJson *json, const CxAllocator *allocator) {
     if (allocator == NULL) {
         allocator = cxDefaultAllocator;
     }
     
     memset(json, 0, sizeof(CxJson));
     json->allocator = allocator;
+
     json->states = json->states_internal;
-    json->states_alloc = cx_nmemb(json->states_internal);
-    // TODO: find better way to configure the initial allocation size for arrays and objects
-    json->reader_array_alloc = 8;
+    json->states_capacity = cx_nmemb(json->states_internal);
+    json->states[0] = JP_STATE_VALUE_BEGIN;
+    json->states_size = 1;
+
+    json->vbuf = json->vbuf_internal;
+    json->vbuf_capacity = cx_nmemb(json->vbuf_internal);
 }
 
-void cxJsonDestroy(CxJson *p) {
-    if (p->states != p->states_internal) {
-        free(p->states);
+void cxJsonDestroy(CxJson *json) {
+    if (json->states != json->states_internal) {
+        free(json->states);
     }
-    free(p->readvalue_stack);
-    cxJsonValueFree(p->read_value);
-    free(p->value_name);
-    free(p->value_str);
+    if (json->vbuf != json->vbuf_internal) {
+        free(json->vbuf);
+    }
+    cxJsonValueFree(json->parsed);
+    json->parsed = NULL;
 }
 
-int cxJsonFilln(CxJson *p, const char *buf, size_t size) {
+int cxJsonFilln(CxJson *json, const char *buf, size_t size) {
     // TODO: implement rescue buffer like in CxProperties to allow subsequent fills
-    p->buffer = buf;
-    p->size = size;
-    p->pos = 0;
+    json->buffer = buf;
+    json->size = size;
+    json->pos = 0;
     return 0;
 }
 
-int cxJsonNext(CxJson *p, CxJsonValue **value) {
-    // TODO: replace int with a status enum like in CxProperties
+static void json_add_state(CxJson *json, int state) {
+    // we have guaranteed the necessary space with cx_array_simple_reserve()
+    // therefore, we can safely add the state in the simplest way possible
+    json->states[json->states_size++] = state;
+}
+
+#define return_rec(code) \
+    token_destroy(&token); \
+    return code
+
+static int json_parse(CxJson *json) {
+    // Reserve a pointer for a possibly read value
+    CxJsonValue *vbuf = NULL;
 
-    *value = NULL; // TODO: maybe better initialize with NOTHING?
-    if (!p->readvalue_stack) {
-        if (setup_read_value(p)) return -1;
+    // grab the next token
+    CxJsonToken token = token_parse_next(json);
+    if (token.tokentype == CX_JSON_NO_TOKEN) {
+        // nothing found, wait for more data
+        return 0;
+    }
+
+    // pop the current state
+    assert(json->states_size > 0);
+    int state = json->states[--json->states_size];
+
+    // guarantee that at least two more states fit on the stack
+    CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal);
+    if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) {
+        return -1;
     }
 
-    while (p->readvalue_nelm > 0 || !p->read_value) {
-        if (p->value_ready) {
-            // value available without another read
-            CxJsonValue *v = cxCalloc(p->allocator, 1, sizeof(CxJsonValue));
-            if (!v) return -1;
-            v->allocator = p->allocator;
+
+    //  0 JP_STATE_VALUE_BEGIN          value begin
+    // 10 JP_STATE_VALUE_END            expect value end
+    //  1 JP_STATE_VALUE_BEGIN_OBJ      value begin (inside object)
+    // 11 JP_STATE_OBJ_SEP_OR_CLOSE     object, expect separator, objclose
+    //  2 JP_STATE_VALUE_BEGIN_AR       value begin (inside array)
+    // 12 JP_STATE_ARRAY_SEP_OR_CLOSE   array, expect separator or arrayclose
+    //  5 JP_STATE_OBJ_NAME_OR_CLOSE    object, expect name or objclose
+    //  6 JP_STATE_OBJ_NAME             object, expect name
+    //  7 JP_STATE_OBJ_COLON            object, expect ':'
 
-            if (p->readvalue_nelm > 0) {
-                if (add_to_parent(p, p->readvalue_stack[p->readvalue_nelm - 1], v)) {
-                    free(v);
-                    return -1;
+    if (state < 3) {
+        // push expected end state to the stack
+        json_add_state(json, 10 + state);
+        switch (token.tokentype) {
+            case CX_JSON_TOKEN_BEGIN_ARRAY: {
+                if (create_json_value(json, CX_JSON_ARRAY) == NULL) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
+                return_rec(1);
+            }
+            case CX_JSON_TOKEN_BEGIN_OBJECT: {
+                if (create_json_value(json, CX_JSON_OBJECT) == NULL) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE);
+                return_rec(1);
+            }
+            case CX_JSON_TOKEN_STRING: {
+                if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                cxmutstr str = unescape_string(json->allocator, token.content, token.length);
+                if (str.ptr == NULL) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                vbuf->value.string = str;
+                return_rec(1);
+            }
+            case CX_JSON_TOKEN_INTEGER:
+            case CX_JSON_TOKEN_NUMBER: {
+                int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER;
+                if (NULL == (vbuf = create_json_value(json, type))) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                if (parse_number(token.content, token.length, &vbuf->value,type == CX_JSON_INTEGER)) {
+                    // TODO: error code - format error
+                    return_rec(-1);
                 }
-            } else {
-                // set this value as root
-                p->read_value = v;
+                return_rec(1);
+            }
+            case CX_JSON_TOKEN_LITERAL: {
+                if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) {
+                    // TODO: error code - no memory
+                    return_rec(-1);
+                }
+                const char *l = token.content;
+                size_t token_len = token.length;
+                if (token_len == 4 && !memcmp(l, "true", 4)) {
+                    vbuf->value.literal = CX_JSON_TRUE;
+                } else if (token_len == 5 && !memcmp(l, "false", 5)) {
+                    vbuf->value.literal = CX_JSON_FALSE;
+                } else {
+                    vbuf->value.literal = CX_JSON_NULL;
+                }
+                return_rec(1);
+            }
+            default: {
+                // TODO: error code - unexpected token
+                return_rec(-1);
+            }
+        }
+    } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
+        // expect ',' or ']'
+        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
+            json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
+            return_rec(1);
+        } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) {
+            // discard the array from the value buffer
+            json->vbuf_size--;
+            return_rec(1);
+        } else {
+            // TODO: error code - unexpected token
+            return_rec(-1);
+        }
+    } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
+        if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
+            // discard the obj from the value buffer
+            json->vbuf_size--;
+            return_rec(1);
+        } else {
+            // expect string
+            if (token.tokentype != CX_JSON_TOKEN_STRING) {
+                // TODO: error code - unexpected token
+                return_rec(-1);
             }
 
-            switch (p->reader_type) {
-                case CX_JSON_READER_OBJECT_BEGIN: {
-                    v->type = CX_JSON_OBJECT;
-                    if (readvaluestack_add(p, v)) {
-                        return -1;
-                    }
-                    break;
-                }
-                case CX_JSON_READER_OBJECT_END:
-                    return -1; // should not happen
-                case CX_JSON_READER_ARRAY_BEGIN: {
-                    v->type = CX_JSON_ARRAY;
-                    if (readvaluestack_add(p, v)) {
-                        return -1;
-                    }
-                    break;
-                }
-                case CX_JSON_READER_ARRAY_END:
-                    return -1; // should not happen
-                case CX_JSON_READER_STRING: {
-                    v->type = CX_JSON_STRING;
-                    if (p->value_str) {
-                        v->value.string.ptr = p->value_str;
-                        v->value.string.length = p->value_str_len;
-                        p->value_str = NULL;
-                    }
-                    break;
-                }
-                case CX_JSON_READER_INTEGER: {
-                    v->type = CX_JSON_INTEGER;
-                    v->value.integer = p->value_int;
-                    break;
-                }
-                case CX_JSON_READER_NUMBER: {
-                    v->type = CX_JSON_NUMBER;
-                    v->value.number = p->value_double;
-                    break;
-                }
-                case CX_JSON_READER_LITERAL: {
-                    v->type = CX_JSON_LITERAL;
-                    v->value.literal = json_reader_literal(p);
-                    break;
-                }
+            // add new entry
+            cxmutstr name = unescape_string(json->allocator, token.content, token.length);
+            if (name.ptr == NULL) {
+                // TODO: error code - no mem
+                return_rec(-1);
             }
-        } else if (p->readvalue_initialized) {
-            CxJsonReaderType rt = p->reader_type;
-            if (rt == CX_JSON_READER_OBJECT_END || rt == CX_JSON_READER_ARRAY_END) {
-                p->readvalue_nelm--;
-            }
-            // else: p->value_ready is 1, this will be handled in the next run
-        }
+            json_obj_add_entry(json, name.ptr);
 
-        if (p->readvalue_nelm > 0 || !p->read_value) {
-            int r = json_read(p);
-            if (r != 1) {
-                p->readvalue_initialized = 0;
-                return r;
-            }
-            p->readvalue_initialized = 1;
+            // next state
+            json_add_state(json, JP_STATE_OBJ_COLON);
+            return_rec(1);
+        }
+    } else if (state == JP_STATE_OBJ_COLON) {
+        // expect ':'
+        if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) {
+            // TODO: error code - unexpected token
+            return_rec(-1);
+        }
+        // next state
+        json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ);
+        return_rec(1);
+    } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) {
+        // expect ',' or '}'
+        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
+            json_add_state(json, JP_STATE_OBJ_NAME);
+            return_rec(1);
+        } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
+            // discard the obj from the value buffer
+            json->vbuf_size--;
+            return_rec(1);
+        } else {
+            // TODO: error code - unexpected token
+            return_rec(-1);
         }
+    } else {
+        // should be unreachable
+        assert(false);
+        return_rec(-1);
     }
+}
+
+int cxJsonNext(CxJson *json, CxJsonValue **value) {
+    // TODO: replace int with a status enum like in CxProperties
+
+    // initialize output value
+    *value = &cx_json_value_nothing;
 
-    *value = p->read_value;
-    p->readvalue_initialized = 0;
-    p->read_value = NULL;
-    p->value_ready = 0;
+    // parse data
+    int result;
+    do {
+        result = json_parse(json);
+        if (result == 1 && json->states_size == 1) {
+            // final state reached
+            assert(json->states[0] == JP_STATE_VALUE_END);
+            assert(json->vbuf_size == 0);
 
-    return 1;
+            // write output value
+            *value = json->parsed;
+            json->parsed = NULL;
+
+            // re-initialize state machine
+            json->states[0] = JP_STATE_VALUE_BEGIN;
+
+            return 1;
+        }
+    } while (result == 1);
+
+    return result;
 }
 
 void cxJsonValueFree(CxJsonValue *value) {
--- a/tests/Makefile	Thu Dec 05 01:51:47 2024 +0100
+++ b/tests/Makefile	Thu Dec 05 01:54:12 2024 +0100
@@ -87,10 +87,11 @@
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
-$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c ../src/cx/test.h \
- ../src/cx/common.h ../src/cx/json.h ../src/cx/string.h \
- ../src/cx/allocator.h ../src/cx/array_list.h ../src/cx/list.h \
- ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h
+$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c util_allocator.h \
+ ../src/cx/allocator.h ../src/cx/common.h ../src/cx/test.h \
+ ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \
+ ../src/cx/array_list.h ../src/cx/list.h ../src/cx/collection.h \
+ ../src/cx/iterator.h ../src/cx/compare.h ../src/cx/mempool.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
--- a/tests/test_json.c	Thu Dec 05 01:51:47 2024 +0100
+++ b/tests/test_json.c	Thu Dec 05 01:54:12 2024 +0100
@@ -26,6 +26,7 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include "util_allocator.h"
 #include "cx/test.h"
 
 #include "cx/json.h"
@@ -34,11 +35,13 @@
 CX_TEST(test_json_init_default) {
     CxJson json;
     CX_TEST_DO {
-        cxJsonInit(NULL, &json);
+        cxJsonInit(&json, NULL);
         CX_TEST_ASSERT(json.states == json.states_internal);
-        CX_TEST_ASSERT(json.nstates == 0);
-        CX_TEST_ASSERT(json.states_alloc == 8);
-        CX_TEST_ASSERT(json.reader_array_alloc == 8);
+        CX_TEST_ASSERT(json.states_size == 1);
+        CX_TEST_ASSERT(json.states_capacity >= 8);
+        CX_TEST_ASSERT(json.vbuf == json.vbuf_internal);
+        CX_TEST_ASSERT(json.vbuf_size == 0);
+        CX_TEST_ASSERT(json.vbuf_capacity >= 8);
     }
 }
 
@@ -59,7 +62,7 @@
         int result;
 
         CxJson json;
-        cxJsonInit(NULL, &json);
+        cxJsonInit(&json, NULL);
         cxJsonFill(&json, text);
 
         // parse the big fat object
@@ -126,7 +129,7 @@
         int result;
 
         CxJson json;
-        cxJsonInit(NULL, &json);
+        cxJsonInit(&json, NULL);
         CxJsonValue *obj;
         
         size_t part = 0;
@@ -191,12 +194,12 @@
         CxJsonValue *obj = NULL;
         
         for(int i=0;i<5;i++) {
-            cxJsonInit(NULL, &json);
+            cxJsonInit(&json, NULL);
             cxJsonFill(&json, tests[i]);
             result = cxJsonNext(&json, &obj);
 
             CX_TEST_ASSERT(result == -1);
-            CX_TEST_ASSERT(obj == NULL);
+            CX_TEST_ASSERT(obj != NULL && obj->type == CX_JSON_NOTHING);
             cxJsonDestroy(&json);
         }
     }
@@ -207,7 +210,7 @@
     CxJsonValue *d1;
     cxstring text = cx_str("{\"test\": [{},{\"foo\": [[{\"bar\":[4, 2, [null, {\"key\": 47}]]}]]}]}");
     CX_TEST_DO {
-        cxJsonInit(NULL, &json);
+        cxJsonInit(&json, NULL);
         cxJsonFill(&json, text);
         cxJsonNext(&json, &d1);
 
@@ -241,7 +244,7 @@
         CX_TEST_ASSERT(cxJsonAsInteger(d10) == 47);
 
         CX_TEST_ASSERT(json.states != json.states_internal);
-        CX_TEST_ASSERT(json.states_alloc > cx_nmemb(json.states_internal));
+        CX_TEST_ASSERT(json.states_capacity > cx_nmemb(json.states_internal));
         
         cxJsonValueFree(d1);
         cxJsonDestroy(&json);
@@ -250,7 +253,7 @@
 
 CX_TEST(test_json_number) {
     CxJson json;
-    cxJsonInit(NULL, &json);
+    cxJsonInit(&json, NULL);
     CX_TEST_DO {
         // TODO: find a better way to terminate values that are not arrays/objects
         CxJsonValue *v;
@@ -273,7 +276,7 @@
 
 CX_TEST(test_json_multiple_values) {
     CxJson json;
-    cxJsonInit(NULL, &json);
+    cxJsonInit(&json, NULL);
     CX_TEST_DO {
         CxJsonValue *v;
         int result;
@@ -334,10 +337,10 @@
 }
 
 CX_TEST(test_json_allocator) {
-    CxMempool *mp = cxMempoolCreate(64, NULL);
-    CxJson json;
-    cxJsonInit(mp->allocator, &json);
-    
+    CxTestingAllocator talloc;
+    cx_testing_allocator_init(&talloc);
+    CxAllocator *allocator = &talloc.base;
+
     cxstring text = cx_str(
             "{\n"
             "\t\"message\":\"success\",\n"
@@ -346,22 +349,52 @@
     );
 
     CX_TEST_DO {
-        int result;
-
         CxJson json;
-        cxJsonInit(mp->allocator, &json);
+        cxJsonInit(&json, allocator);
         cxJsonFill(&json, text);
         
         CxJsonValue *obj;
-        result = cxJsonNext(&json, &obj);
+        int result = cxJsonNext(&json, &obj);
         CX_TEST_ASSERT(result == 1);
-        CX_TEST_ASSERT(obj->allocator == mp->allocator);
+        CX_TEST_ASSERT(obj->allocator == allocator);
         
         // this recursively frees everything 
         cxJsonValueFree(obj);
         cxJsonDestroy(&json);
-        cxMempoolFree(mp);
+
+        CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc));
     }
+    cx_testing_allocator_destroy(&talloc);
+}
+
+CX_TEST(test_json_allocator_parse_error) {
+    CxTestingAllocator talloc;
+    cx_testing_allocator_init(&talloc);
+    CxAllocator *allocator = &talloc.base;
+
+    cxstring text = cx_str(
+            "{\n"
+            "\t\"message\":\"success\"\n" // <-- missing comma
+            "\t\"data\":[\"value1\",{\"x\":123, \"y\":523 }]\n"
+            "}"
+    );
+
+    CX_TEST_DO {
+        CxJson json;
+        cxJsonInit(&json, allocator);
+        cxJsonFill(&json, text);
+
+        CxJsonValue *obj = NULL;
+        int result = cxJsonNext(&json, &obj);
+        CX_TEST_ASSERT(result == -1);
+        CX_TEST_ASSERT(obj != NULL && obj->type == CX_JSON_NOTHING);
+
+        // clean-up any left-over memory
+        cxJsonDestroy(&json);
+
+        CX_TEST_ASSERT(cx_testing_allocator_verify(&talloc));
+    }
+    cx_testing_allocator_destroy(&talloc);
 }
 
 CxTestSuite *cx_test_suite_json(void) {
@@ -375,6 +408,7 @@
     cx_test_register(suite, test_json_number);
     cx_test_register(suite, test_json_multiple_values);
     cx_test_register(suite, test_json_allocator);
+    cx_test_register(suite, test_json_allocator_parse_error);
     
     return suite;
 }

mercurial