--- a/tests/test_json.c Sat Jan 25 15:22:01 2025 +0100 +++ b/tests/test_json.c Tue Jan 28 18:31:17 2025 +0100 @@ -120,7 +120,8 @@ CX_TEST(test_json_escaped_strings) { cxstring text = cx_str( "{\n" - "\t\"object\":\"{\\n\\t\\\"object\\\":null\\n}\"}\"\n" + "\t\"object\":\"{\\n\\t\\\"object\\\":null\\n}\",\n" + "\t\"ctrl-chars\":\"\\\\foo\\r\\nbar\\f*ring\\/ring*\\b\"\n" "}" ); @@ -138,6 +139,152 @@ cxJsonAsCxString(object), CX_STR("{\n\t\"object\":null\n}")) ); + CxJsonValue *ctrl = cxJsonObjGet(obj, "ctrl-chars"); + CX_TEST_ASSERT(cxJsonIsString(ctrl)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(ctrl), + CX_STR("\\foo\r\nbar\f*ring/ring*\b")) + ); + cxJsonValueFree(obj); + } + cxJsonDestroy(&json); +} + +CX_TEST(test_json_escaped_unicode_strings) { + cxstring text = cx_str( + "{\n" + "\"ascii\":\"\\u0041\\u0053\\u0043\\u0049\\u0049\",\n" + "\"unicode\":\"\\u00df\\u00DF\",\n" + "\"mixed\":\"mixed ä ö \\u00e4 \\u00f6\",\n" + "\"wide\":\"\\u03a3\\u29b0\",\n" + "\"surrogatepair1\":\"\\ud83e\\udff5\",\n" + "\"surrogatepair2\":\"test\\ud83e\\udff1AA\"\n," + "\"mixed2\":\"123\\u03a3\\ud83e\\udfc5\\u00df\"" + "}" + ); + + CxJson json; + cxJsonInit(&json, NULL); + CX_TEST_DO { + cxJsonFill(&json, text); + CxJsonValue *obj; + CxJsonStatus result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsObject(obj)); + + CxJsonValue *ascii = cxJsonObjGet(obj, "ascii"); + CX_TEST_ASSERT(cxJsonIsString(ascii)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(ascii), + CX_STR("ASCII")) + ); + + CxJsonValue *unicode = cxJsonObjGet(obj, "unicode"); + CX_TEST_ASSERT(cxJsonIsString(unicode)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(unicode), + CX_STR("ßß")) + ); + + CxJsonValue *mixed = cxJsonObjGet(obj, "mixed"); + CX_TEST_ASSERT(cxJsonIsString(mixed)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(mixed), + CX_STR("mixed ä ö ä ö")) + ); + + CxJsonValue *wide = cxJsonObjGet(obj, "wide"); + CX_TEST_ASSERT(cxJsonIsString(wide)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(wide), + CX_STR("\u03a3\u29b0")) + ); + + CxJsonValue *surrogatepair1 = cxJsonObjGet(obj, "surrogatepair1"); + CX_TEST_ASSERT(cxJsonIsString(surrogatepair1)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(surrogatepair1), + CX_STR("\xf0\x9f\xaf\xb5")) + ); + + CxJsonValue *surrogatepair2 = cxJsonObjGet(obj, "surrogatepair2"); + CX_TEST_ASSERT(cxJsonIsString(surrogatepair2)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(surrogatepair2), + CX_STR("test\xf0\x9f\xaf\xb1" "AA")) + ); + + CxJsonValue *mixed2 = cxJsonObjGet(obj, "mixed2"); + CX_TEST_ASSERT(cxJsonIsString(mixed2)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(mixed2), + CX_STR("123\u03a3\xf0\x9f\xaf\x85ß")) + ); + + cxJsonValueFree(obj); + } + cxJsonDestroy(&json); +} + +CX_TEST(test_json_escaped_unicode_malformed) { + CxJson json; + cxJsonInit(&json, NULL); + CxJsonValue *obj; + CxJsonStatus result; + CX_TEST_DO { + cxJsonFill(&json, "\"too few \\u123 digits\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("too few \\u123 digits") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"too many \\u00E456 digits\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("too many ä56 digits") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"only high \\uD800 surrogate\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("only high \\uD800 surrogate") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"only low \\uDC00 surrogate\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("only low \\uDC00 surrogate") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"two high \\uD800\\uD800 surrogates\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("two high \\uD800\\uD800 surrogates") + )); + cxJsonValueFree(obj); + cxJsonFill(&json, "\"high plus bullshit \\uD800\\u567 foo\""); + result = cxJsonNext(&json, &obj); + CX_TEST_ASSERT(result == CX_JSON_NO_ERROR); + CX_TEST_ASSERT(cxJsonIsString(obj)); + CX_TEST_ASSERT(0 == cx_strcmp( + cxJsonAsCxString(obj), + CX_STR("high plus bullshit \\uD800\\u567 foo") + )); cxJsonValueFree(obj); } cxJsonDestroy(&json); @@ -1042,6 +1189,8 @@ cx_test_register(suite, test_json_init_default); cx_test_register(suite, test_json_simple_object); cx_test_register(suite, test_json_escaped_strings); + cx_test_register(suite, test_json_escaped_unicode_strings); + cx_test_register(suite, test_json_escaped_unicode_malformed); cx_test_register(suite, test_json_escaped_end_of_string); cx_test_register(suite, test_json_object_incomplete_token); cx_test_register(suite, test_json_token_wrongly_completed);