src/json.c

changeset 1002
1483c47063a8
parent 1000
1aecddf7e209
child 1007
81b2986d2b04
--- a/src/json.c	Sat Dec 07 23:59:54 2024 +0100
+++ b/src/json.c	Sun Dec 08 00:13:38 2024 +0100
@@ -42,49 +42,11 @@
 static CxJsonValue cx_json_value_nothing = {.type = CX_JSON_NOTHING};
 
 static void token_destroy(CxJsonToken *token) {
-    if (token->alloc > 0) {
-        free((char*) token->content);
+    if (token->allocated) {
+        cx_strfree(&token->content);
     }
 }
 
-static int token_append(CxJsonToken *token, const char *buf, size_t len) {
-    if (len == 0) {
-        return 0;
-    }
-
-    size_t newlen = token->length + len;
-    if (token->alloc < newlen) {
-        char *newbuf = realloc(
-                token->alloc == 0 ? NULL : (char *) token->content,
-                newlen);
-        if (!newbuf) {
-            return 1;
-        }
-        token->content = newbuf;
-        token->alloc = newlen;
-    }
-
-    memcpy((char *) token->content + token->length, buf, len);
-    token->length = newlen;
-    return 0;
-}
-
-static CxJsonToken token_create(CxJson *json, size_t start, size_t end) {
-    CxJsonToken token = {0};
-    size_t len = end - start;
-    if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
-        token.content = json->buffer + start;
-        token.length = len;
-    } else {
-        if (token_append(&json->uncompleted, json->buffer + start, len)) {
-            // TODO: this does certainly not lead to correct error handling
-            return (CxJsonToken){0};
-        }
-        token = json->uncompleted;
-    }
-    json->uncompleted = (CxJsonToken){0};
-    return token;
-}
 
 static int token_isliteral(const char *content, size_t length) {
     if (length == 4) {
@@ -145,6 +107,36 @@
     return type;
 }
 
+static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) {
+    cxmutstr str = cx_mutstrn((char*)json->buffer + start, end - start);
+    bool allocated = false;
+    if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) {
+        allocated = true;
+        str = cx_strcat_m(json->uncompleted.content, 1, str);
+        if (str.ptr == NULL) {
+            return (CxJsonToken){CX_JSON_NO_TOKEN, false, 0, 0};
+        }
+    }
+    json->uncompleted = (CxJsonToken){0};
+    CxJsonTokenType ttype;
+    if (isstring) {
+        ttype = CX_JSON_TOKEN_STRING;
+    } else {
+        if (token_isliteral(str.ptr, str.length)) {
+            ttype = CX_JSON_TOKEN_LITERAL;
+        } else {
+            ttype = token_numbertype(str.ptr, str.length);
+        }
+    }
+    if (ttype == CX_JSON_TOKEN_ERROR) {
+        if (allocated) {
+            cx_strfree(&str);
+        }
+        return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, 0, 0};
+    }
+    return (CxJsonToken){ttype, allocated, str};
+}
+
 static CxJsonTokenType char2ttype(char c) {
     switch (c) {
         case '[': {
@@ -177,7 +169,18 @@
     return CX_JSON_NO_TOKEN;
 }
 
-static CxJsonToken token_parse_next(CxJson *json) {
+static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) {
+    // check if there is data in the buffer
+    if (json->pos >= json->size) {
+        return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ?
+            CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
+    }
+
+    // sanity check
+    if (json->buffer == NULL) {
+        return CX_JSON_NULL_INPUT;
+    }
+
     // current token type and start index
     CxJsonTokenType ttype = json->uncompleted.tokentype;
     size_t token_start = json->pos;
@@ -186,9 +189,7 @@
         char c = json->buffer[i];
         if (ttype != CX_JSON_TOKEN_STRING) {
             // currently non-string token
-
             CxJsonTokenType ctype = char2ttype(c); // start of new token?
-
             if (ttype == CX_JSON_NO_TOKEN) {
                 if (ctype == CX_JSON_TOKEN_SPACE) {
                     continue;
@@ -199,8 +200,8 @@
                 } else if (ctype != CX_JSON_NO_TOKEN) {
                     // single-char token
                     json->pos = i + 1;
-                    CxJsonToken token = {ctype, NULL, 0, 0};
-                    return token;
+                    *result = (CxJsonToken){ctype, NULL, 0, 0};
+                    return CX_JSON_NO_ERROR;
                 } else {
                     ttype = CX_JSON_TOKEN_LITERAL; // number or literal
                     token_start = i;
@@ -208,14 +209,15 @@
             } else {
                 // finish token
                 if (ctype != CX_JSON_NO_TOKEN) {
-                    CxJsonToken ret = token_create(json, token_start, i);
-                    if (token_isliteral(ret.content, ret.length)) {
-                        ret.tokentype = CX_JSON_TOKEN_LITERAL;
-                    } else {
-                        ret.tokentype = token_numbertype(ret.content, ret.length);
+                    *result = token_create(json, false, token_start, i);
+                    if (result->tokentype == CX_JSON_NO_TOKEN) {
+                        return CX_JSON_BUFFER_ALLOC_FAILED;
+                    }
+                    if (result->tokentype == CX_JSON_TOKEN_ERROR) {
+                        return CX_JSON_FORMAT_ERROR_NUMBER;
                     }
                     json->pos = i;
-                    return ret;
+                    return CX_JSON_NO_ERROR;
                 }
             }
         } else {
@@ -224,10 +226,12 @@
                 json->tokenizer_escape = false;
             } else {
                 if (c == '"') {
-                    CxJsonToken ret = token_create(json, token_start, i + 1);
-                    ret.tokentype = CX_JSON_TOKEN_STRING;
+                    *result = token_create(json, true, token_start, i + 1);
+                    if (result->tokentype == CX_JSON_NO_TOKEN) {
+                        return CX_JSON_BUFFER_ALLOC_FAILED;
+                    }
                     json->pos = i + 1;
-                    return ret;
+                    return CX_JSON_NO_ERROR;
                 } else if (c == '\\') {
                     json->tokenizer_escape = true;
                 }
@@ -240,45 +244,44 @@
         size_t uncompeted_len = json->size - token_start;
         if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
             // current token is uncompleted
-            // save current token content in p->uncompleted
-            CxJsonToken uncompleted;
-            uncompleted.tokentype = ttype;
-            uncompleted.length = uncompeted_len;
-            uncompleted.alloc = uncompeted_len + 16;
-            char *tmp = malloc(uncompleted.alloc);
-            if (tmp) {
-                memcpy(tmp, json->buffer + token_start, uncompeted_len);
-                uncompleted.content = tmp;
-                json->uncompleted = uncompleted;
-            } else {
-                json->error = 1;
+            // save current token content
+            CxJsonToken uncompleted = {
+                ttype, true,
+                cx_strdup(cx_strn(json->buffer + token_start, uncompeted_len))
+            };
+            if (uncompleted.content.ptr == NULL) {
+                return CX_JSON_BUFFER_ALLOC_FAILED;
             }
+            json->uncompleted = uncompleted;
         } else {
             // previously we also had an uncompleted token
             // combine the uncompleted token with the current token
-            if (token_append(&json->uncompleted, json->buffer + token_start, uncompeted_len)) {
-                json->error = 1;
+            assert(json->uncompleted.allocated);
+            cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
+                cx_strn(json->buffer + token_start, uncompeted_len));
+            if (str.ptr == NULL) {
+                return CX_JSON_BUFFER_ALLOC_FAILED;
             }
+            json->uncompleted.content = str;
         }
     }
 
-    CxJsonToken ret = {CX_JSON_NO_TOKEN, NULL, 0, 0};
-    return ret;
+    return CX_JSON_INCOMPLETE_DATA;
 }
 
-static cxmutstr unescape_string(const CxAllocator *a, const char *str, size_t len) {
+static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
     // TODO: support more escape sequences
     // we know that the unescaped string will be shorter by at least 2 chars
     cxmutstr result;
     result.length = 0;
-    result.ptr = cxMalloc(a, len - 1);
+    result.ptr = cxMalloc(a, str.length - 1);
     if (result.ptr == NULL) {
         return result;
     }
 
     bool u = false;
-    for (size_t i = 1; i < len - 1; i++) {
-        char c = str[i];
+    for (size_t i = 1; i < str.length - 1; i++) {
+        char c = str.ptr[i];
         if (u) {
             u = false;
             if (c == 'n') {
@@ -300,15 +303,15 @@
     return result;
 }
 
-static int parse_number(const char *str, size_t len, void *value, bool asint) {
+static int parse_number(cxmutstr str, void *value, bool asint) {
     char *endptr = NULL;
     char buf[32];
-    if (len > 30) {
+    if (str.length > 30) {
         return 1;
     }
     // TODO: if we can guarantee that we are working on a copied string already, we can avoid this memcpy
-    memcpy(buf, str, len);
-    buf[len] = 0;
+    memcpy(buf, str.ptr, str.length);
+    buf[str.length] = 0;
 
     if (asint) {
         long long v = strtoll(buf, &endptr, 10);
@@ -319,7 +322,7 @@
         *((double*)value) = v;
     }
 
-    return (endptr != &buf[len]);
+    return (endptr != &buf[str.length]);
 }
 
 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
@@ -445,15 +448,17 @@
     token_destroy(&token); \
     return code
 
-static int json_parse(CxJson *json) {
+static enum cx_json_status json_parse(CxJson *json) {
     // Reserve a pointer for a possibly read value
     CxJsonValue *vbuf = NULL;
 
     // grab the next token
-    CxJsonToken token = token_parse_next(json);
-    if (token.tokentype == CX_JSON_NO_TOKEN) {
-        // nothing found, wait for more data
-        return 0;
+    CxJsonToken token;
+    {
+        enum cx_json_status ret = token_parse_next(json, &token);
+        if (ret != CX_JSON_NO_ERROR) {
+            return ret;
+        }
     }
 
     // pop the current state
@@ -463,7 +468,7 @@
     // guarantee that at least two more states fit on the stack
     CxArrayReallocator state_realloc = cx_array_reallocator(NULL, json->states_internal);
     if (cx_array_simple_reserve_a(&state_realloc, json->states, 2)) {
-        return -1;
+        return CX_JSON_BUFFER_ALLOC_FAILED;
     }
 
 
@@ -483,125 +488,110 @@
         switch (token.tokentype) {
             case CX_JSON_TOKEN_BEGIN_ARRAY: {
                 if (create_json_value(json, CX_JSON_ARRAY) == NULL) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
                 json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
-                return_rec(1);
+                return_rec(CX_JSON_NO_ERROR);
             }
             case CX_JSON_TOKEN_BEGIN_OBJECT: {
                 if (create_json_value(json, CX_JSON_OBJECT) == NULL) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
                 json_add_state(json, JP_STATE_OBJ_NAME_OR_CLOSE);
-                return_rec(1);
+                return_rec(CX_JSON_NO_ERROR);
             }
             case CX_JSON_TOKEN_STRING: {
                 if ((vbuf = create_json_value(json, CX_JSON_STRING)) == NULL) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
-                cxmutstr str = unescape_string(json->allocator, token.content, token.length);
+                cxmutstr str = unescape_string(json->allocator, token.content);
                 if (str.ptr == NULL) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
                 vbuf->value.string = str;
-                return_rec(1);
+                return_rec(CX_JSON_NO_ERROR);
             }
             case CX_JSON_TOKEN_INTEGER:
             case CX_JSON_TOKEN_NUMBER: {
                 int type = token.tokentype == CX_JSON_TOKEN_INTEGER ? CX_JSON_INTEGER : CX_JSON_NUMBER;
                 if (NULL == (vbuf = create_json_value(json, type))) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
-                if (parse_number(token.content, token.length, &vbuf->value,type == CX_JSON_INTEGER)) {
-                    // TODO: error code - format error
-                    return_rec(-1);
+                if (parse_number(token.content, &vbuf->value,type == CX_JSON_INTEGER)) {
+                    return_rec(CX_JSON_FORMAT_ERROR_NUMBER);
                 }
-                return_rec(1);
+                return_rec(CX_JSON_NO_ERROR);
             }
             case CX_JSON_TOKEN_LITERAL: {
                 if ((vbuf = create_json_value(json, CX_JSON_LITERAL)) == NULL) {
-                    // TODO: error code - no memory
-                    return_rec(-1);
+                    return_rec(CX_JSON_VALUE_ALLOC_FAILED);
                 }
-                const char *l = token.content;
-                size_t token_len = token.length;
-                if (token_len == 4 && !memcmp(l, "true", 4)) {
+                if (0 == cx_strcmp(cx_strcast(token.content), cx_str("true"))) {
                     vbuf->value.literal = CX_JSON_TRUE;
-                } else if (token_len == 5 && !memcmp(l, "false", 5)) {
+                } else if (0 == cx_strcmp(cx_strcast(token.content), cx_str("false"))) {
                     vbuf->value.literal = CX_JSON_FALSE;
                 } else {
                     vbuf->value.literal = CX_JSON_NULL;
                 }
-                return_rec(1);
+                return_rec(CX_JSON_NO_ERROR);
             }
             default: {
-                // TODO: error code - unexpected token
-                return_rec(-1);
+                return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
             }
         }
     } else if (state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
         // expect ',' or ']'
         if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
             json_add_state(json, JP_STATE_VALUE_BEGIN_AR);
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         } else if (token.tokentype == CX_JSON_TOKEN_END_ARRAY) {
             // discard the array from the value buffer
             json->vbuf_size--;
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         } else {
-            // TODO: error code - unexpected token
-            return_rec(-1);
+            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
         }
     } else if (state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
         if (state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
             // discard the obj from the value buffer
             json->vbuf_size--;
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         } else {
             // expect string
             if (token.tokentype != CX_JSON_TOKEN_STRING) {
-                // TODO: error code - unexpected token
-                return_rec(-1);
+                return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
             }
 
             // add new entry
-            cxmutstr name = unescape_string(json->allocator, token.content, token.length);
+            cxmutstr name = unescape_string(json->allocator, token.content);
             if (name.ptr == NULL) {
-                // TODO: error code - no mem
-                return_rec(-1);
+                return_rec(CX_JSON_VALUE_ALLOC_FAILED);
             }
             json_obj_add_entry(json, name.ptr);
 
             // next state
             json_add_state(json, JP_STATE_OBJ_COLON);
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         }
     } else if (state == JP_STATE_OBJ_COLON) {
         // expect ':'
         if (token.tokentype != CX_JSON_TOKEN_NAME_SEPARATOR) {
-            // TODO: error code - unexpected token
-            return_rec(-1);
+            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
         }
         // next state
         json_add_state(json, JP_STATE_VALUE_BEGIN_OBJ);
-        return_rec(1);
+        return_rec(CX_JSON_NO_ERROR);
     } else if (state == JP_STATE_OBJ_SEP_OR_CLOSE) {
         // expect ',' or '}'
         if (token.tokentype == CX_JSON_TOKEN_VALUE_SEPARATOR) {
             json_add_state(json, JP_STATE_OBJ_NAME);
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         } else if (token.tokentype == CX_JSON_TOKEN_END_OBJECT) {
             // discard the obj from the value buffer
             json->vbuf_size--;
-            return_rec(1);
+            return_rec(CX_JSON_NO_ERROR);
         } else {
-            // TODO: error code - unexpected token
-            return_rec(-1);
+            return_rec(CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN);
         }
     } else {
         // should be unreachable
@@ -610,17 +600,15 @@
     }
 }
 
-int cxJsonNext(CxJson *json, CxJsonValue **value) {
-    // TODO: replace int with a status enum like in CxProperties
-
+CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value) {
     // initialize output value
     *value = &cx_json_value_nothing;
 
     // parse data
-    int result;
+    CxJsonStatus result;
     do {
         result = json_parse(json);
-        if (result == 1 && json->states_size == 1) {
+        if (result == CX_JSON_NO_ERROR && json->states_size == 1) {
             // final state reached
             assert(json->states[0] == JP_STATE_VALUE_END);
             assert(json->vbuf_size == 0);
@@ -632,9 +620,16 @@
             // re-initialize state machine
             json->states[0] = JP_STATE_VALUE_BEGIN;
 
-            return 1;
+            return CX_JSON_NO_ERROR;
         }
-    } while (result == 1);
+    } while (result == CX_JSON_NO_ERROR);
+
+    // the parser might think there is no data
+    // but when we did not reach the final state,
+    // we know that there must be more to come
+    if (result == CX_JSON_NO_DATA && json->states_size > 1) {
+        return CX_JSON_INCOMPLETE_DATA;
+    }
 
     return result;
 }

mercurial