implement string escape - resolves #526

Fri, 10 Jan 2025 23:11:08 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 10 Jan 2025 23:11:08 +0100
changeset 1119
ff4d7e76f85a
parent 1118
9fa87f9882ba
child 1120
608bdcc8f9ad

implement string escape - resolves #526

src/json.c file | annotate | diff | comparison | revisions
tests/test_json.c file | annotate | diff | comparison | revisions
--- a/src/json.c	Fri Jan 10 15:03:58 2025 +0100
+++ b/src/json.c	Fri Jan 10 23:11:08 2025 +0100
@@ -345,7 +345,7 @@
 
 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
     // TODO: support more escape sequences
-    // we know that the unescaped string will be shorter by at least 2 chars
+    // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed
     cxmutstr result;
     result.length = 0;
     result.ptr = cxMalloc(a, str.length - 1);
@@ -375,6 +375,60 @@
     return result;
 }
 
+static cxmutstr escape_string(cxmutstr str) {
+    CxBuffer buf = {0};
+
+    bool all_printable = true;
+    for (size_t i = 0; i < str.length; i++) {
+        bool escape = !isprint(str.ptr[i])
+            || str.ptr[i] == '\\'
+            || str.ptr[i] == '"'
+            // TODO: make escaping slash optional
+            || str.ptr[i] == '/';
+
+        if (all_printable && escape) {
+            size_t capa = str.length + 32;
+            char *space = malloc(capa);
+            if (space == NULL) return cx_mutstrn(NULL, 0);
+            cxBufferInit(&buf, space, capa, NULL, CX_BUFFER_AUTO_EXTEND);
+            cxBufferWrite(str.ptr, 1, i, &buf);
+            all_printable = false;
+        }
+        if (escape) {
+            cxBufferPut(&buf, '\\');
+            if (str.ptr[i] == '\"') {
+                cxBufferPut(&buf, '\"');
+            } else if (str.ptr[i] == '\n') {
+                cxBufferPut(&buf, 'n');
+            } else if (str.ptr[i] == '\t') {
+                cxBufferPut(&buf, 't');
+            } else if (str.ptr[i] == '\r') {
+                cxBufferPut(&buf, 'r');
+            } else if (str.ptr[i] == '\\') {
+                cxBufferPut(&buf, '\\');
+            } else if (str.ptr[i] == '/') {
+                cxBufferPut(&buf, '/');
+            } else if (str.ptr[i] == '\f') {
+                cxBufferPut(&buf, 'f');
+            } else if (str.ptr[i] == '\b') {
+                cxBufferPut(&buf, 'b');
+            } else {
+                char code[6];
+                snprintf(code, sizeof(code), "u%04x",
+                    (unsigned int)(0xff & str.ptr[i]));
+                cxBufferPutString(&buf, code);
+            }
+        } else if (!all_printable) {
+            cxBufferPut(&buf, str.ptr[i]);
+        }
+    }
+    if (!all_printable) {
+        str = cx_mutstrn(buf.space, buf.size);
+    }
+    cxBufferDestroy(&buf);
+    return str;
+}
+
 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
     CxJsonValue *v = cxCalloc(json->allocator, 1, sizeof(CxJsonValue));
     if (v == NULL) return NULL; // LCOV_EXCL_LINE
@@ -1084,9 +1138,11 @@
 
                 // the name
                 actual += wfunc("\"", 1, 1, target);
-                // TODO: escape the string
-                actual += wfunc(member->name.ptr, 1,
-                    member->name.length, target);
+                cxmutstr name = escape_string(member->name);
+                actual += wfunc(name.ptr, 1, name.length, target);
+                if (name.ptr != member->name.ptr) {
+                    cx_strfree(&name);
+                }
                 actual += wfunc("\"", 1, 1, target);
                 const char *obj_name_sep = ": ";
                 if (settings->pretty) {
@@ -1152,9 +1208,11 @@
         }
         case CX_JSON_STRING: {
             actual += wfunc("\"", 1, 1, target);
-            // TODO: escape the string
-            actual += wfunc(value->value.string.ptr, 1,
-                value->value.string.length, target);
+            cxmutstr str = escape_string(value->value.string);
+            actual += wfunc(str.ptr, 1, str.length, target);
+            if (str.ptr != value->value.string.ptr) {
+                cx_strfree(&str);
+            }
             actual += wfunc("\"", 1, 1, target);
             expected += 2 + value->value.string.length;
             break;
--- a/tests/test_json.c	Fri Jan 10 15:03:58 2025 +0100
+++ b/tests/test_json.c	Fri Jan 10 23:11:08 2025 +0100
@@ -894,7 +894,7 @@
     cxBufferInit(&buf, NULL, 32, NULL, 0);
     CX_TEST_DO {
         // test default settings (6 digits)
-        cxJsonWrite(&buf,num, cxBufferWriteFunc, &writer);
+        cxJsonWrite(&buf, num, cxBufferWriteFunc, &writer);
         CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size), CX_STR("3.141592")));
 
         // test too many digits
@@ -939,6 +939,50 @@
     cxJsonValueFree(num);
 }
 
+CX_TEST(test_json_write_string_escape) {
+    /**
+     * According to RFC-8259 we have to test the following characters:
+     *    "    quotation mark
+     *    \    reverse solidus
+     *    /    solidus
+     *    b    backspace
+     *    f    form feed
+     *    n    line feed
+     *    r    carriage return
+     *    t    tab
+     * And all other control characters must be encoded uXXXX - in our example the bell character.
+     * Also, all unicode characters are encoded that way - in our example the 'ö'.
+     */
+    CxJsonValue* str = cxJsonCreateString(NULL,
+        "hello\twörld\r\nthis/is\\a \"string\"\b in \a string\f");
+    CxJsonWriter writer = cxJsonWriterCompact();
+    CxBuffer buf;
+    cxBufferInit(&buf, NULL, 128, NULL, 0);
+    CX_TEST_DO {
+        cxJsonWrite(&buf, str, cxBufferWriteFunc, &writer);
+        CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size),
+            CX_STR("\"hello\\tw\\u00c3\\u00b6rld\\r\\nthis\\/is\\\\a \\\"string\\\"\\b in \\u0007 string\\f\"")));
+    }
+    cxBufferDestroy(&buf);
+    cxJsonValueFree(str);
+}
+
+CX_TEST(test_json_write_name_escape) {
+    CxJsonValue* obj = cxJsonCreateObj(NULL);
+    cxJsonObjPutLiteral(obj,
+        CX_STR("hello\twörld\r\nthis/is\\a \"string\"\b in \a string\f"), CX_JSON_TRUE);
+    CxJsonWriter writer = cxJsonWriterCompact();
+    CxBuffer buf;
+    cxBufferInit(&buf, NULL, 128, NULL, 0);
+    CX_TEST_DO {
+        cxJsonWrite(&buf, obj, cxBufferWriteFunc, &writer);
+        CX_TEST_ASSERT(0 == cx_strcmp(cx_strn(buf.space, buf.size),
+            CX_STR("{\"hello\\tw\\u00c3\\u00b6rld\\r\\nthis\\/is\\\\a \\\"string\\\"\\b in \\u0007 string\\f\":true}")));
+    }
+    cxBufferDestroy(&buf);
+    cxJsonValueFree(obj);
+}
+
 CxTestSuite *cx_test_suite_json(void) {
     CxTestSuite *suite = cx_test_suite_new("json");
 
@@ -962,6 +1006,8 @@
     cx_test_register(suite, test_json_write_pretty_default_tabs);
     cx_test_register(suite, test_json_write_pretty_preserve_order);
     cx_test_register(suite, test_json_write_frac_max_digits);
+    cx_test_register(suite, test_json_write_string_escape);
+    cx_test_register(suite, test_json_write_name_escape);
     
     return suite;
 }

mercurial