Mercurial > hg > ucx / changeset

--- a/src/Makefile	Sat Oct 19 16:28:15 2024 +0200
+++ b/src/Makefile	Sat Oct 19 17:25:11 2024 +0200
@@ -25,7 +25,7 @@

 SRC = allocator.c array_list.c buffer.c compare.c hash_key.c hash_map.c \
   iterator.c linked_list.c list.c map.c mempool.c printf.c string.c tree.c \
-  utils.c properties.c
+  utils.c properties.c json.c

 OBJ_EXT=.o
 OBJ=$(SRC:%.c=$(build_dir)/%$(OBJ_EXT))
@@ -99,6 +99,11 @@
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -c $<

+$(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/string.h \
+ cx/allocator.h cx/allocator.h
+	@echo "Compiling $<"
+	$(CC) -o $@ $(CFLAGS) -c $<
+
 $(build_dir)/linked_list$(OBJ_EXT): linked_list.c cx/linked_list.h \
  cx/common.h cx/list.h cx/collection.h cx/allocator.h cx/iterator.h \
  cx/compare.h cx/utils.h cx/compare.h
@@ -126,8 +131,8 @@
 	$(CC) -o $@ $(CFLAGS) -c $<

 $(build_dir)/properties$(OBJ_EXT): properties.c cx/properties.h \
- cx/common.h cx/string.h cx/allocator.h cx/array_list.h cx/list.h \
- cx/collection.h cx/iterator.h cx/compare.h
+ cx/common.h cx/string.h cx/allocator.h cx/map.h cx/collection.h \
+ cx/iterator.h cx/compare.h cx/hash_key.h cx/array_list.h cx/list.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -c $<
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cx/json.h	Sat Oct 19 17:25:11 2024 +0200
@@ -0,0 +1,299 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * \file json.h
+ * \brief Interface for parsing data from JSON files.
+ * \author Mike Becker
+ * \author Olaf Wintermann
+ * \copyright 2-Clause BSD License
+ */
+
+#ifndef UCX_JSON_H
+#define UCX_JSON_H
+
+#include "common.h"
+#include "string.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum cx_json_token_type {
+    CX_JSON_NO_TOKEN,
+    CX_JSON_TOKEN_ERROR,
+    CX_JSON_TOKEN_BEGIN_ARRAY,
+    CX_JSON_TOKEN_BEGIN_OBJECT,
+    CX_JSON_TOKEN_END_ARRAY,
+    CX_JSON_TOKEN_END_OBJECT,
+    CX_JSON_TOKEN_NAME_SEPARATOR,
+    CX_JSON_TOKEN_VALUE_SEPARATOR,
+    CX_JSON_TOKEN_STRING,
+    CX_JSON_TOKEN_INTEGER,
+    CX_JSON_TOKEN_NUMBER,
+    CX_JSON_TOKEN_LITERAL,
+    CX_JSON_TOKEN_SPACE
+};
+
+enum cx_json_value_type {
+    CX_JSON_NOTHING, // this allows us to always return non-NULL values
+    CX_JSON_OBJECT,
+    CX_JSON_ARRAY,
+    CX_JSON_STRING,
+    CX_JSON_INTEGER, // TODO: the spec does not know integer types
+    CX_JSON_NUMBER,
+    CX_JSON_LITERAL
+};
+
+enum cx_json_literal_type {
+    CX_JSON_NULL,
+    CX_JSON_TRUE,
+    CX_JSON_FALSE
+};
+
+enum cx_json_reader_type {
+    CX_JSON_READER_OBJECT_BEGIN,
+    CX_JSON_READER_OBJECT_END,
+    CX_JSON_READER_ARRAY_BEGIN,
+    CX_JSON_READER_ARRAY_END,
+    CX_JSON_READER_STRING,
+    CX_JSON_READER_INTEGER,
+    CX_JSON_READER_NUMBER,
+    CX_JSON_READER_LITERAL
+};
+
+typedef enum cx_json_token_type CxJsonTokenType;
+typedef enum cx_json_value_type CxJsonValueType;
+typedef enum cx_json_literal_type CxJsonLiteralType;
+typedef enum cx_json_reader_type CxJsonReaderType;
+
+typedef struct cx_json_s CxJson;
+typedef struct cx_json_token_s CxJsonToken;
+
+typedef struct cx_json_value_s CxJsonValue;
+
+typedef struct cx_json_array_s CxJsonArray;
+typedef struct cx_json_object_s CxJsonObject;
+typedef struct cx_mutstr_s CxJsonString;
+typedef struct cx_json_integer_s CxJsonInteger;
+typedef struct cx_json_number_s CxJsonNumber;
+typedef struct cx_json_literal_s CxJsonLiteral;
+
+typedef struct cx_json_obj_value_s CxJsonObjValue;
+
+struct cx_json_token_s {
+    CxJsonTokenType tokentype;
+    const char *content;
+    size_t length;
+    size_t alloc;
+};
+
+struct cx_json_s {
+    const char *buffer;
+    size_t size;
+    size_t pos;
+
+    CxJsonToken uncompleted;
+    int tokenizer_escape;
+
+    int *states;
+    int nstates;
+    int states_alloc;
+
+    CxJsonToken reader_token;
+    CxJsonReaderType reader_type;
+    int value_ready;
+    char *value_name;
+    size_t value_name_len;
+    char *value_str;
+    size_t value_str_len;
+    int64_t value_int;
+    double value_double;
+
+    CxJsonValue **readvalue_stack;
+    int readvalue_nelm;
+    int readvalue_alloc;
+    CxJsonValue *read_value;
+    int readvalue_initialized;
+
+    int reader_array_alloc;
+
+    int error;
+};
+
+struct cx_json_array_s {
+    CxJsonValue **array;
+    size_t alloc;
+    size_t size;
+};
+
+struct cx_json_object_s {
+    CxJsonObjValue *values;
+    size_t alloc;
+    size_t size;
+};
+
+struct cx_json_obj_value_s {
+    char *name;
+    CxJsonValue *value;
+};
+
+// TODO: remove single member structs
+
+struct cx_json_integer_s {
+    int64_t value;
+};
+
+struct cx_json_number_s {
+    double value;
+};
+
+struct cx_json_literal_s {
+    CxJsonLiteralType literal;
+};
+
+struct cx_json_value_s {
+    CxJsonValueType type;
+    union {
+        CxJsonArray array;
+        CxJsonObject object;
+        CxJsonString string;
+        CxJsonInteger integer;
+        CxJsonNumber number;
+        CxJsonLiteral literal;
+    } value;
+};
+
+// TODO: add support for CxAllocator
+
+__attribute__((__nonnull__))
+void cxJsonInit(CxJson *json);
+
+__attribute__((__nonnull__))
+void cxJsonDestroy(CxJson *json);
+
+__attribute__((__nonnull__))
+void cxJsonFill(CxJson *json, const char *buf, size_t len);
+
+// TODO: discuss if it is intentional that cxJsonNext() will usually parse an entire file in one go
+__attribute__((__nonnull__))
+int cxJsonNext(CxJson *json, CxJsonValue **value);
+
+void cxJsonValueFree(CxJsonValue *value);
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsObject(CxJsonValue *value) {
+    return value->type == CX_JSON_OBJECT;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsArray(CxJsonValue *value) {
+    return value->type == CX_JSON_ARRAY;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsString(CxJsonValue *value) {
+    return value->type == CX_JSON_STRING;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsNumber(CxJsonValue *value) {
+    // TODO: this is not good, because an integer is also a number
+    return value->type == CX_JSON_NUMBER;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsInteger(CxJsonValue *value) {
+    return value->type == CX_JSON_INTEGER;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsLiteral(CxJsonValue *value) {
+    return value->type == CX_JSON_LITERAL;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsBool(CxJsonValue *value) {
+    return cxJsonIsLiteral(value) && value->value.literal.literal != CX_JSON_NULL;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsTrue(CxJsonValue *value) {
+    return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_TRUE;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsFalse(CxJsonValue *value) {
+    return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_FALSE;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonIsNull(CxJsonValue *value) {
+    return cxJsonIsLiteral(value) && value->value.literal.literal == CX_JSON_NULL;
+}
+
+__attribute__((__nonnull__))
+static inline cxmutstr cxJsonAsString(CxJsonValue *value) {
+    // TODO: do we need a separate method to return this directly as cxstring?
+    return value->value.string;
+}
+
+__attribute__((__nonnull__))
+static inline double cxJsonAsDouble(CxJsonValue *value) {
+    return value->value.number.value;
+}
+
+__attribute__((__nonnull__))
+static inline int64_t cxJsonAsInteger(CxJsonValue *value) {
+    return value->value.integer.value;
+}
+
+__attribute__((__nonnull__))
+static inline bool cxJsonAsBool(CxJsonValue *value) {
+    return value->value.literal.literal == CX_JSON_TRUE;
+}
+
+__attribute__((__nonnull__))
+static inline size_t cxJsonArrSize(CxJsonValue *value) {
+    return value->value.array.size;
+}
+
+__attribute__((__nonnull__, __returns_nonnull__))
+CxJsonValue *cxJsonArrGet(CxJsonValue *value, size_t index);
+
+// TODO: add cxJsonArrIter()
+
+// TODO: implement cxJsonObjGet as a _Generic with support for cxstring
+__attribute__((__nonnull__, __returns_nonnull__))
+CxJsonValue *cxJsonObjGet(CxJsonValue *value, const char* name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* UCX_JSON_H */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/json.c	Sat Oct 19 17:25:11 2024 +0200
@@ -0,0 +1,825 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <ctype.h>
+
+#include "cx/json.h"
+#include "cx/allocator.h"
+
+/*
+ * RFC 8259
+ * https://tools.ietf.org/html/rfc8259
+ */
+
+#define PARSER_STATES_ALLOC 32
+
+static CxJsonValue cx_json_value_nothing = {CX_JSON_NOTHING, {0}};
+
+
+static int token_append(CxJsonToken *token, const char *buf, size_t len) {
+    if (len == 0) {
+        return 0;
+    }
+
+    size_t newlen = token->length + len;
+    if (token->alloc < newlen) {
+        char *newbuf = realloc(
+                token->alloc == 0 ? NULL : (char *) token->content,
+                newlen);
+        if (!newbuf) {
+            return 1;
+        }
+        token->content = newbuf;
+        token->alloc = newlen;
+    }
+
+    memcpy((char *) token->content + token->length, buf, len);
+    token->length = newlen;
+    return 0;
+}
+
+static CxJsonToken get_content(CxJson *p, size_t start, size_t end) {
+    CxJsonToken token = {0};
+    size_t part2 = end - start;
+    if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
+        token.content = p->buffer + start;
+        token.length = part2;
+    } else if (part2 == 0) {
+        token = p->uncompleted;
+    } else {
+        if (token_append(&p->uncompleted, p->buffer + start, end - start)) {
+            // TODO: this does certainly not lead to correct error handling
+            return (CxJsonToken){0};
+        }
+        token = p->uncompleted;
+    }
+    p->uncompleted = (CxJsonToken){0};
+    return token;
+}
+
+static int token_isliteral(const char *content, size_t length) {
+    if (length == 4) {
+        if (!memcmp(content, "true", 4)) {
+            return 1;
+        } else if (!memcmp(content, "null", 4)) {
+            return 1;
+        }
+    } else if (length == 5 && !memcmp(content, "false", 5)) {
+        return 1;
+    }
+    return 0;
+}
+
+static int num_isexp(const char *content, size_t length, size_t pos) {
+    if (pos >= length) {
+        return 0;
+    }
+
+    int ok = 0;
+    for (size_t i = pos; i < length; i++) {
+        char c = content[i];
+        if (isdigit(c)) {
+            ok = 1;
+        } else if (i == pos) {
+            if (!(c == '+' || c == '-')) {
+                return 0;
+            }
+        } else {
+            return 0;
+        }
+    }
+
+    return ok;
+}
+
+static CxJsonTokenType token_numbertype(const char *content, size_t length) {
+    if (length == 0) return CX_JSON_TOKEN_ERROR;
+
+    if (content[0] != '-' && !isdigit(content[0])) {
+        return CX_JSON_TOKEN_ERROR;
+    }
+
+    CxJsonTokenType type = CX_JSON_TOKEN_INTEGER;
+    for (size_t i = 1; i < length; i++) {
+        if (content[i] == '.') {
+            if (type == CX_JSON_TOKEN_NUMBER) {
+                return CX_JSON_TOKEN_ERROR; // more than one decimal separator
+            }
+            type = CX_JSON_TOKEN_NUMBER;
+        } else if (content[i] == 'e' || content[i] == 'E') {
+            return num_isexp(content, length, i + 1) ? CX_JSON_TOKEN_NUMBER : CX_JSON_TOKEN_ERROR;
+        } else if (!isdigit(content[i])) {
+            return CX_JSON_TOKEN_ERROR; // char is not a digit, decimal separator or exponent sep
+        }
+    }
+
+    return type;
+}
+
+static CxJsonToken get_token(CxJson *p, size_t start, size_t end) {
+    CxJsonToken token = get_content(p, start, end);
+    if (token_isliteral(token.content, token.length)) {
+        token.tokentype = CX_JSON_TOKEN_LITERAL;
+    } else {
+        token.tokentype = token_numbertype(token.content, token.length);
+    }
+    p->pos = end;
+    return token;
+}
+
+static CxJsonTokenType char2ttype(char c) {
+    switch (c) {
+        case '[': {
+            return CX_JSON_TOKEN_BEGIN_ARRAY;
+        }
+        case '{': {
+            return CX_JSON_TOKEN_BEGIN_OBJECT;
+        }
+        case ']': {
+            return CX_JSON_TOKEN_END_ARRAY;
+        }
+        case '}': {
+            return CX_JSON_TOKEN_END_OBJECT;
+        }
+        case ':': {
+            return CX_JSON_TOKEN_NAME_SEPARATOR;
+        }
+        case ',': {
+            return CX_JSON_TOKEN_VALUE_SEPARATOR;
+        }
+        case '"': {
+            return CX_JSON_TOKEN_STRING;
+        }
+        default: {
+            if (isspace(c)) {
+                return CX_JSON_TOKEN_SPACE;
+            }
+        }
+    }
+    return CX_JSON_NO_TOKEN;
+}
+
+static CxJsonToken json_parser_next_token(CxJson *p) {
+    // current token type and start index
+    CxJsonTokenType ttype = p->uncompleted.tokentype;
+    size_t token_start = p->pos;
+
+    for (size_t i = p->pos; i < p->size; i++) {
+        char c = p->buffer[i];
+        if (ttype != CX_JSON_TOKEN_STRING) {
+            // currently non-string token
+
+            CxJsonTokenType ctype = char2ttype(c); // start of new token?
+
+            if (ttype == CX_JSON_NO_TOKEN) {
+                if (ctype == CX_JSON_TOKEN_SPACE) {
+                    continue;
+                } else if (ctype == CX_JSON_TOKEN_STRING) {
+                    // begin string
+                    ttype = CX_JSON_TOKEN_STRING;
+                    token_start = i;
+                } else if (ctype != CX_JSON_NO_TOKEN) {
+                    // single-char token
+                    p->pos = i + 1;
+                    CxJsonToken token = {ctype, NULL, 0, 0};
+                    return token;
+                } else {
+                    ttype = CX_JSON_TOKEN_LITERAL; // number or literal
+                    token_start = i;
+                }
+            } else {
+                // finish token
+                if (ctype != CX_JSON_NO_TOKEN) {
+                    return get_token(p, token_start, i);
+                }
+            }
+        } else {
+            // currently inside a string
+            if (!p->tokenizer_escape) {
+                if (c == '"') {
+                    CxJsonToken ret = get_content(p, token_start, i + 1);
+                    ret.tokentype = CX_JSON_TOKEN_STRING;
+                    p->pos = i + 1;
+                    return ret;
+                } else if (c == '\\') {
+                    p->tokenizer_escape = 1;
+                }
+            } else {
+                p->tokenizer_escape = 0;
+            }
+        }
+    }
+
+    if (ttype != CX_JSON_NO_TOKEN) {
+        // uncompleted token
+        size_t uncompeted_len = p->size - token_start;
+        if (p->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
+            // current token is uncompleted
+            // save current token content in p->uncompleted
+            CxJsonToken uncompleted;
+            uncompleted.tokentype = ttype;
+            uncompleted.length = uncompeted_len;
+            uncompleted.alloc = uncompeted_len + 16;
+            char *tmp = malloc(uncompleted.alloc);
+            if (tmp) {
+                memcpy(tmp, p->buffer + token_start, uncompeted_len);
+                uncompleted.content = tmp;
+                p->uncompleted = uncompleted;
+            } else {
+                p->error = 1;
+            }
+        } else {
+            // previously we also had an uncompleted token
+            // combine the uncompleted token with the current token
+            if (token_append(&p->uncompleted, p->buffer + token_start, uncompeted_len)) {
+                p->error = 1;
+            }
+        }
+    }
+
+    CxJsonToken ret = {CX_JSON_NO_TOKEN, NULL, 0, 0};
+    return ret;
+}
+
+static cxmutstr unescape_string(const char *str, size_t len) {
+    // TODO: support more escape sequences
+    // we know that the unescaped string will be shorter by at least 2 chars
+    cxmutstr result;
+    result.length = 0;
+    result.ptr = malloc(len - 1);
+    if (result.ptr == NULL) {
+        // TODO: check if this actually leads to correct error handling
+        return result;
+    }
+
+    bool u = false;
+    for (size_t i = 1; i < len - 1; i++) {
+        char c = str[i];
+        if (u) {
+            u = false;
+            if (c == 'n') {
+                c = '\n';
+            } else if (c == 't') {
+                c = '\t';
+            }
+            result.ptr[result.length++] = c;
+        } else {
+            if (c == '\\') {
+                u = true;
+            } else {
+                result.ptr[result.length++] = c;
+            }
+        }
+    }
+    result.ptr[result.length] = 0;
+
+    return result;
+}
+
+static int parse_integer(const char *str, size_t len, int64_t *value) {
+    char *endptr = NULL;
+    char buf[32];
+    if (len > 30) {
+        return 1;
+    }
+    memcpy(buf, str, len);
+    buf[len] = 0;
+
+    long long v = strtoll(buf, &endptr, 10);
+    if (endptr != &buf[len]) {
+        return 1;
+    }
+    *value = (int64_t) v;
+
+    return 0;
+}
+
+static int parse_number(const char *str, size_t len, double *value) {
+    char *endptr = NULL;
+    char buf[32];
+    if (len > 30) {
+        return 1;
+    }
+    memcpy(buf, str, len);
+    buf[len] = 0;
+
+    double v = strtod(buf, &endptr);
+    if (endptr != &buf[len]) {
+        return 1;
+    }
+    *value = v;
+
+    return 0;
+}
+
+static int add_state(CxJson *p, int state) {
+    if (p->nstates >= p->states_alloc) {
+        p->states_alloc += PARSER_STATES_ALLOC;
+        if (cx_reallocate(&p->states, p->states_alloc * sizeof(int))) {
+            return 1;
+        }
+    }
+    p->states[++p->nstates] = state;
+    return 0;
+}
+
+static void end_elm(CxJson *p, CxJsonReaderType type) {
+    p->reader_type = type;
+    p->nstates--;
+}
+
+#define JP_STATE_VALUE_BEGIN        0
+#define JP_STATE_VALUE_BEGIN_OBJ    1
+#define JP_STATE_VALUE_BEGIN_AR     2
+#define JP_STATE_ARRAY_SEP_OR_CLOSE 3
+#define JP_STATE_OBJ_NAME_OR_CLOSE  4
+#define JP_STATE_OBJ_NAME           5
+#define JP_STATE_OBJ_COLON          6
+#define JP_STATE_OBJ_SEP_OR_CLOSE   7
+
+static int next_state_after_value(int current) {
+    switch (current) {
+        default:
+            return -1;
+            // after value JSON complete, expect nothing
+        case JP_STATE_VALUE_BEGIN:
+            return -1;
+            // after obj value, expect ',' or '}'
+        case JP_STATE_VALUE_BEGIN_OBJ:
+            return JP_STATE_OBJ_SEP_OR_CLOSE;
+            // after array value, expect ',' or ']'
+        case JP_STATE_VALUE_BEGIN_AR:
+            return JP_STATE_ARRAY_SEP_OR_CLOSE;
+    }
+}
+
+static void clear_valuename(CxJson *p) {
+    free(p->value_name);
+    p->value_name = NULL;
+    p->value_name_len = 0;
+}
+
+static void clear_values(CxJson *p) {
+    free(p->value_str);
+    p->value_str = NULL;
+    p->value_str_len = 0;
+    p->value_int = 0;
+    p->value_double = 0;
+}
+
+static int json_read(CxJson *p) {
+    int state = p->states[p->nstates];
+    clear_values(p);
+    CxJsonToken token = json_parser_next_token(p);
+    p->reader_token = token;
+
+    p->value_ready = 0;
+
+    if (token.tokentype == CX_JSON_NO_TOKEN) {
+        return 0;
+    }
+
+    int ret = 1;
+
+    // 0 JP_STATE_VALUE_BEGIN          value begin
+    // 1 JP_STATE_VALUE_BEGIN_OBJ      value begin (inside object)
+    // 2 JP_STATE_VALUE_BEGIN_AR       value begin (inside array)
+    // 3 JP_STATE_ARRAY_SEP_OR_CLOSE   array, expect separator or arrayclose
+    // 4 JP_STATE_OBJ_NAME_OR_CLOSE    object, expect name or objclose
+    // 5 JP_STATE_OBJ_NAME             object, expect name
+    // 6 JP_STATE_OBJ_COLON            object, expect ':'
+    // 7 JP_STATE_OBJ_SEP_OR_CLOSE     object, expect separator, objclose
+
+    if (state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) {
+        clear_valuename(p);
+    }
+
+    if (state < 3) {
+        // expect value
+        p->states[p->nstates] = next_state_after_value(state);
+        p->value_ready = 1;
+        switch (token.tokentype) {
+            case CX_JSON_TOKEN_BEGIN_ARRAY: {
+                p->reader_type = CX_JSON_READER_ARRAY_BEGIN;
+                if (add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1;
+                return 1;
+                //return json_read(p);
+            }
+            case CX_JSON_TOKEN_BEGIN_OBJECT: {
+                p->reader_type = CX_JSON_READER_OBJECT_BEGIN;
+                if (add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1;
+                return 1;
+                //return json_read(p);
+            }
+            case CX_JSON_TOKEN_END_ARRAY: {
+                p->value_ready = 0;
+                end_elm(p, CX_JSON_READER_ARRAY_END);
+                break;
+            }
+            case CX_JSON_TOKEN_END_OBJECT: {
+                p->value_ready = 0;
+                end_elm(p, CX_JSON_READER_OBJECT_END);
+                break;
+            }
+            case CX_JSON_TOKEN_STRING: {
+                p->reader_type = CX_JSON_READER_STRING;
+                cxmutstr str = unescape_string(token.content, token.length);
+                if (str.ptr) {
+                    p->value_str = str.ptr;
+                    p->value_str_len = str.length;
+                } else {
+                    return -1;
+                }
+                break;
+            }
+            case CX_JSON_TOKEN_INTEGER: {
+                p->reader_type = CX_JSON_READER_INTEGER;
+                int64_t value;
+                if (parse_integer(token.content, token.length, &value)) {
+                    return -1;
+                }
+                p->value_int = value;
+                p->value_double = (double) value;
+                break;
+            }
+            case CX_JSON_TOKEN_NUMBER: {
+                p->reader_type = CX_JSON_READER_NUMBER;
+                double value;
+                if (parse_number(token.content, token.length, &value)) {
+                    return -1;
+                }
+                p->value_double = value;
+                p->value_int = (int64_t) value;
+                break;
+            }
+            case CX_JSON_TOKEN_LITERAL: {
+                p->reader_type = CX_JSON_READER_LITERAL;
+                break;
+            }
+            default:
+                return -1;
+        }
+    } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
+        // expect ',' or ']'
+        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
+            p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR;
+            return json_read(p);
+        } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) {
+            end_elm(p, CX_JSON_READER_ARRAY_END);
+        } else {
+            return -1;
+        }
+    } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
+        if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
+            clear_valuename(p);
+            end_elm(p, CX_JSON_READER_OBJECT_END);
+        } else {
+            // expect string
+            if (token.tokentype != CX_JSON_TOKEN_STRING) return -1;
+
+            if (p->value_name) free(p->value_name);
+            cxmutstr valname = unescape_string(token.content, token.length);
+            p->value_name = valname.ptr;
+            p->value_name_len = valname.length;
+
+            // next state
+            p->states[p->nstates] = JP_STATE_OBJ_COLON;
+            return json_read(p);
+        }
+    } else if (state == JP_STATE_OBJ_COLON) {
+        // expect ':'
+        if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) return -1;
+        // next state
+        p->states[p->nstates] = 1;
+        return json_read(p);
+    } else if (state == 7) {
+        // expect ',' or '}]'
+        if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
+            p->states[p->nstates] = JP_STATE_OBJ_NAME;
+            return json_read(p);
+        } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
+            end_elm(p, CX_JSON_READER_OBJECT_END);
+        } else {
+            return -1;
+        }
+    }
+
+    return ret;
+}
+
+static CxJsonLiteralType json_reader_literal(CxJson *p) {
+    const char *l = p->reader_token.content;
+    size_t token_len = p->reader_token.length;
+    if (token_len == 4 && !memcmp(l, "true", 4)) {
+        return CX_JSON_TRUE;
+    } else if (token_len == 5 && !memcmp(l, "false", 5)) {
+        return CX_JSON_FALSE;
+    }
+    return CX_JSON_NULL;
+}
+
+/* -------------------- read value functions -------------------- */
+
+static int setup_read_value(CxJson *p) {
+    p->readvalue_alloc = PARSER_STATES_ALLOC;
+    p->readvalue_nelm = 0;
+    p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(CxJsonValue *));
+    if (!p->readvalue_stack) return -1;
+
+    p->read_value = NULL;
+    p->readvalue_stack[0] = NULL;
+
+    return 0;
+}
+
+static int obj_init_values(CxJson *p, CxJsonValue *v) {
+    v->value.object.values = calloc(p->reader_array_alloc, sizeof(CxJsonObjValue));
+    if (!v->value.object.values) {
+        return -1;
+    }
+    v->value.object.alloc = p->reader_array_alloc;
+    v->value.object.size = 0;
+
+    return 0;
+}
+
+static int obj_add_value(CxJson *p, CxJsonValue *parent, CxJsonObjValue v) {
+    if (!parent->value.object.values) {
+        if (obj_init_values(p, parent)) {
+            return -1;
+        }
+    }
+
+    if (parent->value.object.size == parent->value.object.alloc) {
+        parent->value.object.alloc *= 2;
+        if (cx_reallocate(&parent->value.object.values,
+                          sizeof(CxJsonObjValue) * parent->value.object.alloc)) {
+            return -1;
+        }
+    }
+
+    parent->value.object.values[parent->value.object.size++] = v;
+
+    return 0;
+}
+
+static int array_init(CxJson *p, CxJsonValue *v) {
+    v->value.array.array = calloc(p->reader_array_alloc, sizeof(CxJsonValue *));
+    if (!v->value.array.array) {
+        return -1;
+    }
+    v->value.array.alloc = p->reader_array_alloc;
+    v->value.array.size = 0;
+
+    return 0;
+}
+
+static int array_add_value(CxJson *p, CxJsonValue *parent, CxJsonValue *v) {
+    if (!parent->value.array.array) {
+        if (array_init(p, parent)) {
+            return -1;
+        }
+    }
+
+    if (parent->value.array.size == parent->value.array.alloc) {
+        parent->value.array.alloc *= 2;
+        if (cx_reallocate(parent->value.array.array,
+                          sizeof(CxJsonValue *) * parent->value.array.alloc)) {
+            return -1;
+        }
+    }
+
+    parent->value.array.array[parent->value.array.size++] = v;
+
+    return 0;
+}
+
+static int add_to_parent(CxJson *p, CxJsonValue *parent, CxJsonValue *v) {
+    if (!parent) {
+        return -1; // shouldn't happen but who knows
+    }
+
+    int ret = 0;
+    if (parent->type == CX_JSON_OBJECT) {
+        if (!p->value_name || p->value_name_len == 0) {
+            return -1;
+        }
+        char *valuename = p->value_name;
+        p->value_name = NULL;
+
+        CxJsonObjValue newvalue;
+        newvalue.name = valuename;
+        newvalue.value = v;
+
+        ret = obj_add_value(p, parent, newvalue);
+    } else if (parent->type == CX_JSON_ARRAY) {
+        ret = array_add_value(p, parent, v);
+    } else {
+        ret = -1; // should also never happen
+    }
+
+    return ret;
+}
+
+
+static int readvaluestack_add(CxJson *p, CxJsonValue *v) {
+    if (p->readvalue_nelm == p->readvalue_alloc) {
+        p->readvalue_alloc *= 2;
+        if (cx_reallocate(&p->readvalue_stack, sizeof(CxJsonValue *) * p->readvalue_alloc)) {
+            return -1;
+        }
+    }
+    p->readvalue_stack[p->readvalue_nelm++] = v;
+    return 0;
+}
+
+void cxJsonInit(CxJson *json) {
+    memset(json, 0, sizeof(CxJson));
+    // TODO: do not allocate states right away
+    json->states_alloc = PARSER_STATES_ALLOC;
+    json->states = calloc(PARSER_STATES_ALLOC, sizeof(int));
+    // TODO: find better way to configure the initial allocation size for arrays and objects
+    json->reader_array_alloc = 8;
+}
+
+void cxJsonDestroy(CxJson *p) {
+    free(p->states);
+    free(p->readvalue_stack);
+}
+
+void cxJsonFill(CxJson *p, const char *buf, size_t size) {
+    // TODO: implement rescue buffer like in CxProperties to allow subsequent fills
+    p->buffer = buf;
+    p->size = size;
+    p->pos = 0;
+}
+
+int cxJsonNext(CxJson *p, CxJsonValue **value) {
+    // TODO: replace int with a status enum like in CxProperties
+
+    *value = NULL;
+    if (!p->readvalue_stack) {
+        if (setup_read_value(p)) return -1;
+    }
+
+    while (p->readvalue_nelm > 0 || !p->read_value) {
+        if (p->value_ready) {
+            // value available without another read
+            CxJsonValue *v = calloc(1, sizeof(CxJsonValue));
+            if (!v) return -1;
+
+            if (p->readvalue_nelm > 0) {
+                if (add_to_parent(p, p->readvalue_stack[p->readvalue_nelm - 1], v)) {
+                    return -1;
+                }
+            } else {
+                // set this value as root
+                p->read_value = v;
+            }
+
+            switch (p->reader_type) {
+                case CX_JSON_READER_OBJECT_BEGIN: {
+                    v->type = CX_JSON_OBJECT;
+                    if (readvaluestack_add(p, v)) {
+                        return -1;
+                    }
+                    break;
+                }
+                case CX_JSON_READER_OBJECT_END:
+                    return -1; // should not happen
+                case CX_JSON_READER_ARRAY_BEGIN: {
+                    v->type = CX_JSON_ARRAY;
+                    if (readvaluestack_add(p, v)) {
+                        return -1;
+                    }
+                    break;
+                }
+                case CX_JSON_READER_ARRAY_END:
+                    return -1; // should not happen
+                case CX_JSON_READER_STRING: {
+                    v->type = CX_JSON_STRING;
+                    if (p->value_str) {
+                        v->value.string.ptr = p->value_str;
+                        v->value.string.length = p->value_str_len;
+                        p->value_str = NULL;
+                    }
+                    break;
+                }
+                case CX_JSON_READER_INTEGER: {
+                    v->type = CX_JSON_INTEGER;
+                    v->value.integer.value = p->value_int;
+                    break;
+                }
+                case CX_JSON_READER_NUMBER: {
+                    v->type = CX_JSON_NUMBER;
+                    v->value.number.value = p->value_double;
+                    break;
+                }
+                case CX_JSON_READER_LITERAL: {
+                    v->type = CX_JSON_LITERAL;
+                    v->value.literal.literal = json_reader_literal(p);
+                    break;
+                }
+            }
+        } else if (p->readvalue_initialized) {
+            CxJsonReaderType rt = p->reader_type;
+            if (rt == CX_JSON_READER_OBJECT_END || rt == CX_JSON_READER_ARRAY_END) {
+                p->readvalue_nelm--;
+            }
+            // else: p->value_ready is 1, this will be handled in the next run
+        }
+
+        if (p->readvalue_nelm > 0 || !p->read_value) {
+            int r = json_read(p);
+            if (r != 1) {
+                p->readvalue_initialized = 0;
+                return r;
+            }
+            p->readvalue_initialized = 1;
+        }
+    }
+
+    *value = p->read_value;
+    p->readvalue_initialized = 0;
+    p->read_value = NULL;
+
+    return 1;
+}
+
+void cxJsonValueFree(CxJsonValue *value) {
+    if (value == NULL || value == &cx_json_value_nothing) return;
+
+    // TODO: discuss if we should keep freeing the stuff recursively
+    switch (value->type) {
+        case CX_JSON_OBJECT: {
+            CxJsonObject obj = value->value.object;
+            for (size_t i = 0; i < obj.size; i++) {
+                cxJsonValueFree(obj.values[i].value);
+                free(obj.values[i].name);
+            }
+            free(obj.values);
+            break;
+        }
+        case CX_JSON_ARRAY: {
+            CxJsonArray array = value->value.array;
+            for (size_t i = 0; i < array.size; i++) {
+                cxJsonValueFree(array.array[i]);
+            }
+            free(array.array);
+            break;
+        }
+        case CX_JSON_STRING: {
+            free(value->value.string.ptr);
+            break;
+        }
+        default: {
+            break;
+        }
+    }
+    free(value);
+}
+
+CxJsonValue *cxJsonArrGet(CxJsonValue *value, size_t index) {
+    if (index >= value->value.array.size) {
+        return &cx_json_value_nothing;
+    }
+    return value->value.array.array[index];
+}
+
+CxJsonValue *cxJsonObjGet(CxJsonValue *value, const char *name) {
+    CxJsonObject *obj = &(value->value.object);
+    // TODO: think about sorting the object so that we can use binary search here
+    for (size_t i = 0; i < obj->size; i++) {
+        // TODO: we might want to store names as cxmutstr
+        if (0 == strcmp(name, obj->values[i].name)) {
+            return obj->values[i].value;
+        }
+    }
+    return &cx_json_value_nothing;
+}
--- a/tests/Makefile	Sat Oct 19 16:28:15 2024 +0200
+++ b/tests/Makefile	Sat Oct 19 17:25:11 2024 +0200
@@ -30,7 +30,7 @@
 SRC = util_allocator.c test_utils.c test_hash_key.c test_allocator.c \
 	test_compare.c test_string.c test_buffer.c test_iterator.c \
 	test_list.c test_tree.c test_hash_map.c test_properties.c \
-	test_printf.c test_mempool.c ucxtest.c
+	test_printf.c test_mempool.c test_json.c ucxtest.c

 OBJ_EXT=.o
 OBJ=$(SRC:%.c=$(TEST_DIR)/%$(OBJ_EXT))
@@ -83,6 +83,12 @@
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -c $<

+$(TEST_DIR)/test_json$(OBJ_EXT): test_json.c ../src/cx/test.h \
+ ../src/cx/json.h ../src/cx/common.h ../src/cx/string.h \
+ ../src/cx/allocator.h
+	@echo "Compiling $<"
+	$(CC) -o $@ $(CFLAGS) -c $<
+
 $(TEST_DIR)/test_list$(OBJ_EXT): test_list.c ../src/cx/test.h \
  util_allocator.h ../src/cx/allocator.h ../src/cx/common.h \
  ../src/cx/compare.h ../src/cx/utils.h ../src/cx/array_list.h \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_json.c	Sat Oct 19 17:25:11 2024 +0200
@@ -0,0 +1,105 @@
+/*
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
+ *
+ * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cx/test.h"
+
+#include "cx/json.h"
+
+CX_TEST(test_json_simple_object) {
+    cxstring text = cx_str(
+            "{\n"
+            "\t\"message\":\"success\",\n"
+            "\t\"position\":{\n"
+            "\t\t\"longitude\":-94.7099,\n"
+            "\t\t\"latitude\":51.5539\n"
+            "\t},\n"
+            "\t\"timestamp\":1729348561,\n"
+            "\t\"alive\":true\n"
+            "}"
+    );
+
+    CX_TEST_DO {
+        int result;
+
+        CxJson json;
+        cxJsonInit(&json);
+        cxJsonFill(&json, text.ptr, text.length);
+
+        // parse the big fat object
+        CxJsonValue *obj;
+        result = cxJsonNext(&json, &obj);
+        CX_TEST_ASSERT(result == 1);
+
+        // check the contents
+        CX_TEST_ASSERT(cxJsonIsObject(obj));
+
+        CxJsonValue *message = cxJsonObjGet(obj, "message");
+        CX_TEST_ASSERT(cxJsonIsString(message));
+        CX_TEST_ASSERT(0 == cx_strcmp(
+                cx_strcast(cxJsonAsString(message)),
+                cx_str("success"))
+        );
+
+        CxJsonValue *position = cxJsonObjGet(obj, "position");
+        CX_TEST_ASSERT(cxJsonIsObject(position));
+        CxJsonValue *longitude = cxJsonObjGet(position, "longitude");
+        CX_TEST_ASSERT(cxJsonIsNumber(longitude));
+        CX_TEST_ASSERT(cxJsonAsDouble(longitude) == -94.7099);
+        CxJsonValue *latitude = cxJsonObjGet(position, "latitude");
+        CX_TEST_ASSERT(cxJsonIsNumber(latitude));
+        CX_TEST_ASSERT(cxJsonAsDouble(latitude) == 51.5539);
+
+        CxJsonValue *timestamp = cxJsonObjGet(obj, "timestamp");
+        CX_TEST_ASSERT(cxJsonIsInteger(timestamp));
+        CX_TEST_ASSERT(cxJsonAsInteger(timestamp) == 1729348561);
+
+        CxJsonValue *alive = cxJsonObjGet(obj, "alive");
+        CX_TEST_ASSERT(cxJsonIsBool(alive));
+        CX_TEST_ASSERT(cxJsonIsTrue(alive));
+        CX_TEST_ASSERT(!cxJsonIsFalse(alive));
+        CX_TEST_ASSERT(cxJsonAsBool(alive));
+
+        // this recursively frees everything else
+        cxJsonValueFree(obj);
+
+        // we only have one object that already contained all the data
+        result = cxJsonNext(&json, &obj);
+        CX_TEST_ASSERT(result == 0);
+
+        cxJsonDestroy(&json);
+    }
+}
+
+CxTestSuite *cx_test_suite_json(void) {
+    CxTestSuite *suite = cx_test_suite_new("json");
+
+    cx_test_register(suite, test_json_simple_object);
+
+    return suite;
+}
+
--- a/tests/ucxtest.c	Sat Oct 19 16:28:15 2024 +0200
+++ b/tests/ucxtest.c	Sat Oct 19 17:25:11 2024 +0200
@@ -47,6 +47,7 @@
 CxTestSuite *cx_test_suite_mempool(void);
 CxTestSuite *cx_test_suite_hash_map(void);
 CxTestSuite *cx_test_suite_properties(void);
+CxTestSuite *cx_test_suite_json(void);

 #define run_tests(suite) cx_test_run_stdout(suite); success += (suite)->success; failure += (suite)->failure
 #define execute_test_suites(...) unsigned success = 0, failure = 0; CxTestSuite* test_suites[] = {__VA_ARGS__}; \
@@ -75,7 +76,8 @@
             cx_test_suite_tree_high_level(),
             cx_test_suite_mempool(),
             cx_test_suite_hash_map(),
-            cx_test_suite_properties()
+            cx_test_suite_properties(),
+            cx_test_suite_json()
     );
     printf("=== OVERALL RESULT ===\n");
     printf("  Total:   %u\n  Success: %u\n  Failure: %u\n",
src/Makefile		file \| annotate \| diff \| comparison \| revisions
src/cx/json.h		file \| annotate \| diff \| comparison \| revisions
src/json.c		file \| annotate \| diff \| comparison \| revisions
tests/Makefile		file \| annotate \| diff \| comparison \| revisions
tests/test_json.c		file \| annotate \| diff \| comparison \| revisions
tests/ucxtest.c		file \| annotate \| diff \| comparison \| revisions