fix that cxBufferSeek() cannot move pos past the end - fixes #523

6 weeks ago

author
Mike Becker <universe@uap-core.de>
date
Tue, 10 Dec 2024 00:19:45 +0100 (6 weeks ago)
changeset 1007
81b2986d2b04
parent 1006
8ee818fa29f7
child 1008
3b69f025f083

fix that cxBufferSeek() cannot move pos past the end - fixes #523

src/Makefile file | annotate | diff | comparison | revisions
src/buffer.c file | annotate | diff | comparison | revisions
src/cx/json.h file | annotate | diff | comparison | revisions
src/json.c file | annotate | diff | comparison | revisions
tests/Makefile file | annotate | diff | comparison | revisions
tests/test_buffer.c file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
--- a/src/Makefile	Tue Dec 10 00:09:55 2024 +0100
+++ b/src/Makefile	Tue Dec 10 00:19:45 2024 +0100
@@ -100,8 +100,8 @@
 	$(CC) -o $@ $(CFLAGS)  -c $<
 
 $(build_dir)/json$(OBJ_EXT): json.c cx/json.h cx/common.h cx/allocator.h \
- cx/string.h cx/array_list.h cx/list.h cx/collection.h cx/iterator.h \
- cx/compare.h
+ cx/string.h cx/buffer.h cx/array_list.h cx/list.h cx/collection.h \
+ cx/iterator.h cx/compare.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS)  -c $<
 
--- a/src/buffer.c	Tue Dec 10 00:09:55 2024 +0100
+++ b/src/buffer.c	Tue Dec 10 00:19:45 2024 +0100
@@ -123,7 +123,7 @@
         return -1;
     }
 
-    if (npos >= buffer->size) {
+    if (npos > buffer->size) {
         return -1;
     } else {
         buffer->pos = npos;
--- a/src/cx/json.h	Tue Dec 10 00:09:55 2024 +0100
+++ b/src/cx/json.h	Tue Dec 10 00:19:45 2024 +0100
@@ -39,6 +39,7 @@
 #include "common.h"
 #include "allocator.h"
 #include "string.h"
+#include "buffer.h"
 #include "array_list.h"
 
 #ifdef __cplusplus
@@ -129,9 +130,7 @@
 
 struct cx_json_s {
     const CxAllocator *allocator;
-    const char *buffer;
-    size_t size;
-    size_t pos;
+    CxBuffer buffer;
 
     CxJsonToken uncompleted;
 
@@ -194,10 +193,6 @@
      */
     CX_JSON_OK,
     /**
-     * Input buffer is \c NULL.
-     */
-    CX_JSON_NULL_INPUT,
-    /**
      * Allocating memory for the internal buffer failed.
      */
     CX_JSON_BUFFER_ALLOC_FAILED,
--- a/src/json.c	Tue Dec 10 00:09:55 2024 +0100
+++ b/src/json.c	Tue Dec 10 00:19:45 2024 +0100
@@ -29,6 +29,7 @@
 #include <string.h>
 #include <ctype.h>
 #include <assert.h>
+#include <stdio.h>
 
 #include "cx/json.h"
 
@@ -108,13 +109,13 @@
 }
 
 static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) {
-    cxmutstr str = cx_mutstrn((char*)json->buffer + start, end - start);
+    cxmutstr str = cx_mutstrn(json->buffer.space + start, end - start);
     bool allocated = false;
     if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) {
         allocated = true;
         str = cx_strcat_m(json->uncompleted.content, 1, str);
         if (str.ptr == NULL) {
-            return (CxJsonToken){CX_JSON_NO_TOKEN, false, 0, 0};
+            return (CxJsonToken){CX_JSON_NO_TOKEN, false, {0, 0}};
         }
     }
     json->uncompleted = (CxJsonToken){0};
@@ -132,7 +133,7 @@
         if (allocated) {
             cx_strfree(&str);
         }
-        return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, 0, 0};
+        return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, {0, 0}};
     }
     return (CxJsonToken){ttype, allocated, str};
 }
@@ -171,22 +172,17 @@
 
 static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) {
     // check if there is data in the buffer
-    if (json->pos >= json->size) {
+    if (cxBufferEof(&json->buffer)) {
         return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ?
             CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
     }
 
-    // sanity check
-    if (json->buffer == NULL) {
-        return CX_JSON_NULL_INPUT;
-    }
-
     // current token type and start index
     CxJsonTokenType ttype = json->uncompleted.tokentype;
-    size_t token_start = json->pos;
+    size_t token_start = json->buffer.pos;
 
-    for (size_t i = json->pos; i < json->size; i++) {
-        char c = json->buffer[i];
+    for (size_t i = json->buffer.pos; i < json->buffer.size; i++) {
+        char c = json->buffer.space[i];
         if (ttype != CX_JSON_TOKEN_STRING) {
             // currently non-string token
             CxJsonTokenType ctype = char2ttype(c); // start of new token?
@@ -199,8 +195,8 @@
                     token_start = i;
                 } else if (ctype != CX_JSON_NO_TOKEN) {
                     // single-char token
-                    json->pos = i + 1;
-                    *result = (CxJsonToken){ctype, NULL, 0, 0};
+                    json->buffer.pos = i + 1;
+                    *result = (CxJsonToken){ctype, NULL, {0, 0}};
                     return CX_JSON_NO_ERROR;
                 } else {
                     ttype = CX_JSON_TOKEN_LITERAL; // number or literal
@@ -216,7 +212,7 @@
                     if (result->tokentype == CX_JSON_TOKEN_ERROR) {
                         return CX_JSON_FORMAT_ERROR_NUMBER;
                     }
-                    json->pos = i;
+                    json->buffer.pos = i;
                     return CX_JSON_NO_ERROR;
                 }
             }
@@ -230,7 +226,7 @@
                     if (result->tokentype == CX_JSON_NO_TOKEN) {
                         return CX_JSON_BUFFER_ALLOC_FAILED;
                     }
-                    json->pos = i + 1;
+                    json->buffer.pos = i + 1;
                     return CX_JSON_NO_ERROR;
                 } else if (c == '\\') {
                     json->tokenizer_escape = true;
@@ -241,13 +237,13 @@
 
     if (ttype != CX_JSON_NO_TOKEN) {
         // uncompleted token
-        size_t uncompeted_len = json->size - token_start;
+        size_t uncompeted_len = json->buffer.size - token_start;
         if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
             // current token is uncompleted
             // save current token content
             CxJsonToken uncompleted = {
                 ttype, true,
-                cx_strdup(cx_strn(json->buffer + token_start, uncompeted_len))
+                cx_strdup(cx_strn(json->buffer.space + token_start, uncompeted_len))
             };
             if (uncompleted.content.ptr == NULL) {
                 return CX_JSON_BUFFER_ALLOC_FAILED;
@@ -258,7 +254,7 @@
             // combine the uncompleted token with the current token
             assert(json->uncompleted.allocated);
             cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
-                cx_strn(json->buffer + token_start, uncompeted_len));
+                cx_strn(json->buffer.space + token_start, uncompeted_len));
             if (str.ptr == NULL) {
                 return CX_JSON_BUFFER_ALLOC_FAILED;
             }
@@ -305,24 +301,26 @@
 
 static int parse_number(cxmutstr str, void *value, bool asint) {
     char *endptr = NULL;
-    char buf[32];
     if (str.length > 30) {
         return 1;
     }
-    // TODO: if we can guarantee that we are working on a copied string already, we can avoid this memcpy
-    memcpy(buf, str.ptr, str.length);
-    buf[str.length] = 0;
+    // the buffer guarantees that we are working on a copied string
+    char c = str.ptr[str.length];
+    str.ptr[str.length] = 0;
 
     if (asint) {
-        long long v = strtoll(buf, &endptr, 10);
+        long long v = strtoll(str.ptr, &endptr, 10);
         *((int64_t*)value) = (int64_t) v;
     } else {
         // TODO: proper JSON spec number parser
-        double v = strtod(buf, &endptr);
+        double v = strtod(str.ptr, &endptr);
         *((double*)value) = v;
     }
 
-    return (endptr != &buf[str.length]);
+    // recover from the hack
+    str.ptr[str.length] = c;
+
+    return endptr != &str.ptr[str.length];
 }
 
 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
@@ -417,9 +415,12 @@
 
     json->vbuf = json->vbuf_internal;
     json->vbuf_capacity = cx_nmemb(json->vbuf_internal);
+
+    cxBufferInit(&json->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND);
 }
 
 void cxJsonDestroy(CxJson *json) {
+    cxBufferDestroy(&json->buffer);
     if (json->states != json->states_internal) {
         free(json->states);
     }
@@ -431,11 +432,16 @@
 }
 
 int cxJsonFilln(CxJson *json, const char *buf, size_t size) {
-    // TODO: implement rescue buffer like in CxProperties to allow subsequent fills
-    json->buffer = buf;
-    json->size = size;
-    json->pos = 0;
-    return 0;
+    // we use the UCX buffer to write the data
+    // but reset the position immediately to enable parsing
+    size_t old_pos = json->buffer.pos;
+    cxBufferSeek(&json->buffer, 0, SEEK_END);
+    size_t written = cxBufferWrite(buf, 1, size, &json->buffer);
+    if (0 == cxBufferTerminate(&json->buffer)) {
+        written++;
+    }
+    json->buffer.pos = old_pos;
+    return written != size + 1;
 }
 
 static void json_add_state(CxJson *json, int state) {
--- a/tests/Makefile	Tue Dec 10 00:09:55 2024 +0100
+++ b/tests/Makefile	Tue Dec 10 00:19:45 2024 +0100
@@ -90,8 +90,9 @@
 $(TEST_DIR)/test_json$(OBJ_EXT): test_json.c util_allocator.h \
  ../src/cx/allocator.h ../src/cx/common.h ../src/cx/test.h \
  ../src/cx/json.h ../src/cx/allocator.h ../src/cx/string.h \
- ../src/cx/array_list.h ../src/cx/list.h ../src/cx/collection.h \
- ../src/cx/iterator.h ../src/cx/compare.h ../src/cx/mempool.h
+ ../src/cx/buffer.h ../src/cx/array_list.h ../src/cx/list.h \
+ ../src/cx/collection.h ../src/cx/iterator.h ../src/cx/compare.h \
+ ../src/cx/mempool.h
 	@echo "Compiling $<"
 	$(CC) -o $@ $(CFLAGS) -I../src -c $<
 
--- a/tests/test_buffer.c	Tue Dec 10 00:09:55 2024 +0100
+++ b/tests/test_buffer.c	Tue Dec 10 00:19:45 2024 +0100
@@ -263,7 +263,7 @@
     buf.size = 6;
     buf.pos = 3;
     CX_TEST_DO {
-        int result = cxBufferSeek(&buf, 6, SEEK_SET);
+        int result = cxBufferSeek(&buf, 7, SEEK_SET);
         CX_TEST_ASSERT(result != 0);
         CX_TEST_ASSERT(buf.pos == 3);
     }
@@ -315,7 +315,7 @@
     buf.size = 6;
     buf.pos = 3;
     CX_TEST_DO {
-        int result = cxBufferSeek(&buf, 3, SEEK_CUR);
+        int result = cxBufferSeek(&buf, 4, SEEK_CUR);
         CX_TEST_ASSERT(result != 0);
         CX_TEST_ASSERT(buf.pos == 3);
     }
@@ -342,9 +342,8 @@
     buf.pos = 3;
     CX_TEST_DO {
         int result = cxBufferSeek(&buf, 0, SEEK_END);
-        // the (past-the-)end position is always invalid
-        CX_TEST_ASSERT(result != 0);
-        CX_TEST_ASSERT(buf.pos == 3);
+        CX_TEST_ASSERT(result == 0);
+        CX_TEST_ASSERT(buf.pos == 6);
     }
     cxBufferDestroy(&buf);
 }
--- a/tests/test_json.c	Tue Dec 10 00:09:55 2024 +0100
+++ b/tests/test_json.c	Tue Dec 10 00:19:45 2024 +0100
@@ -142,7 +142,7 @@
         cxJsonFill(&json, parts[nparts - 1]);
         result = cxJsonNext(&json, &obj);
         CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
-        CX_TEST_ASSERT(obj);
+        CX_TEST_ASSERT(cxJsonIsObject(obj));
         
         CxJsonValue *message = cxJsonObjGet(obj, "message");
         CX_TEST_ASSERT(cxJsonIsString(message));
@@ -166,6 +166,43 @@
     }
 }
 
+CX_TEST(test_json_subsequent_fill) {
+    cxstring text = cx_str(
+            "{\"message\":\"success\"  ,     \"__timestamp\":1729348561}");
+
+    cxstring part1 = cx_strsubsl(text, 0, 25);
+    cxstring part2 = cx_strsubs(text, 25);
+
+    CX_TEST_DO {
+        CxJson json;
+        cxJsonInit(&json, NULL);
+        CxJsonValue *obj;
+
+        cxJsonFill(&json, part1);
+        cxJsonFill(&json, part2);
+        CxJsonStatus result = cxJsonNext(&json, &obj);
+        CX_TEST_ASSERT(result == CX_JSON_NO_ERROR);
+        CX_TEST_ASSERT(cxJsonIsObject(obj));
+
+        CxJsonValue *message = cxJsonObjGet(obj, "message");
+        CX_TEST_ASSERT(cxJsonIsString(message));
+        CX_TEST_ASSERT(0 == cx_strcmp(
+                cxJsonAsCxString(message),
+                cx_str("success"))
+        );
+        CxJsonValue *timestamp = cxJsonObjGet(obj, "__timestamp");
+        CX_TEST_ASSERT(message->type == CX_JSON_STRING);
+        CX_TEST_ASSERT(cxJsonIsInteger(timestamp));
+        CX_TEST_ASSERT(cxJsonAsInteger(timestamp) == 1729348561);
+
+        cxJsonValueFree(obj);
+        result = cxJsonNext(&json, &obj);
+        CX_TEST_ASSERT(result == CX_JSON_NO_DATA);
+
+        cxJsonDestroy(&json);
+    }
+}
+
 CX_TEST(test_json_object_error) {
     cxstring text0 = cx_str(
             "{\n"
@@ -408,6 +445,7 @@
     cx_test_register(suite, test_json_simple_object);
     cx_test_register(suite, test_json_object_incomplete_token);
     cx_test_register(suite, test_json_object_error);
+    cx_test_register(suite, test_json_subsequent_fill);
     cx_test_register(suite, test_json_large_nesting_depth);
     cx_test_register(suite, test_json_number);
     cx_test_register(suite, test_json_multiple_values);

mercurial