src/json.c

changeset 1149
df5665de7344
parent 1139
7dfa5bcf39ee
child 1150
7b0bd5e76b5d
--- a/src/json.c	Wed Jan 22 21:02:46 2025 +0100
+++ b/src/json.c	Sat Jan 25 16:13:28 2025 +0100
@@ -353,6 +353,24 @@
     return CX_JSON_INCOMPLETE_DATA;
 }
 
+static int codepoint_to_utf8(uint32_t codepoint, char *output_buf) {
+    if (codepoint <= 0x7F) {
+        *output_buf = (char)codepoint;
+        return 1;
+    } else if (codepoint <= 0x7FF) {
+        output_buf[0] = (char)(0xC0 | ((codepoint >> 6) & 0x1F));
+        output_buf[1] = (char)(0x80 | (codepoint & 0x3F));
+        return 2;
+    } else if (codepoint <= 0xFFFF) {
+        output_buf[0] = (char)(0xE0 | ((codepoint >> 12) & 0x0F));
+        output_buf[1] = (char)(0x80 | ((codepoint >> 6) & 0x3F));
+        output_buf[2] = (char)(0x80 | (codepoint & 0x3F));
+        return 3;
+    }
+    
+    return 0;
+}
+
 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
     // note: this function expects that str contains the enclosing quotes!
 
@@ -380,8 +398,27 @@
                 c = '\f';
             } else if (c == 'b') {
                 c = '\b';
+            } else if (c == 'u') {
+                if (i+4 < str.length) {
+                    cxstring codepoint_str = { str.ptr + i + 1, 4};
+                    uint32_t codepoint;
+                    if(!cx_strtou32_lc_(codepoint_str, &codepoint, 16, "")) {
+                        char utf8buf[4];
+                        int utf8len = codepoint_to_utf8(codepoint, utf8buf);
+                        if(utf8len > 0) {
+                            // add all bytes from utf8buf expect the last char
+                            // to the result
+                            utf8len--;
+                            c = utf8buf[utf8len];
+                            for(int i=0;i<utf8len;i++) {
+                                result.ptr[result.length++] = utf8buf[i];
+                            }
+                        }
+                        i += 4;
+                    }
+                }
             }
-            // TODO: support \uXXXX escape sequences
+            
             // TODO: discuss the behavior for unrecognized escape sequences
             //       most parsers throw an error here
             result.ptr[result.length++] = c;

mercurial