src/json.c

changeset 1007
81b2986d2b04
parent 1002
1483c47063a8
child 1008
3b69f025f083
equal deleted inserted replaced
1006:8ee818fa29f7 1007:81b2986d2b04
27 */ 27 */
28 28
29 #include <string.h> 29 #include <string.h>
30 #include <ctype.h> 30 #include <ctype.h>
31 #include <assert.h> 31 #include <assert.h>
32 #include <stdio.h>
32 33
33 #include "cx/json.h" 34 #include "cx/json.h"
34 35
35 /* 36 /*
36 * RFC 8259 37 * RFC 8259
106 107
107 return type; 108 return type;
108 } 109 }
109 110
110 static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) { 111 static CxJsonToken token_create(CxJson *json, bool isstring, size_t start, size_t end) {
111 cxmutstr str = cx_mutstrn((char*)json->buffer + start, end - start); 112 cxmutstr str = cx_mutstrn(json->buffer.space + start, end - start);
112 bool allocated = false; 113 bool allocated = false;
113 if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) { 114 if (json->uncompleted.tokentype != CX_JSON_NO_TOKEN) {
114 allocated = true; 115 allocated = true;
115 str = cx_strcat_m(json->uncompleted.content, 1, str); 116 str = cx_strcat_m(json->uncompleted.content, 1, str);
116 if (str.ptr == NULL) { 117 if (str.ptr == NULL) {
117 return (CxJsonToken){CX_JSON_NO_TOKEN, false, 0, 0}; 118 return (CxJsonToken){CX_JSON_NO_TOKEN, false, {0, 0}};
118 } 119 }
119 } 120 }
120 json->uncompleted = (CxJsonToken){0}; 121 json->uncompleted = (CxJsonToken){0};
121 CxJsonTokenType ttype; 122 CxJsonTokenType ttype;
122 if (isstring) { 123 if (isstring) {
130 } 131 }
131 if (ttype == CX_JSON_TOKEN_ERROR) { 132 if (ttype == CX_JSON_TOKEN_ERROR) {
132 if (allocated) { 133 if (allocated) {
133 cx_strfree(&str); 134 cx_strfree(&str);
134 } 135 }
135 return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, 0, 0}; 136 return (CxJsonToken){CX_JSON_TOKEN_ERROR, false, {0, 0}};
136 } 137 }
137 return (CxJsonToken){ttype, allocated, str}; 138 return (CxJsonToken){ttype, allocated, str};
138 } 139 }
139 140
140 static CxJsonTokenType char2ttype(char c) { 141 static CxJsonTokenType char2ttype(char c) {
169 return CX_JSON_NO_TOKEN; 170 return CX_JSON_NO_TOKEN;
170 } 171 }
171 172
172 static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) { 173 static enum cx_json_status token_parse_next(CxJson *json, CxJsonToken *result) {
173 // check if there is data in the buffer 174 // check if there is data in the buffer
174 if (json->pos >= json->size) { 175 if (cxBufferEof(&json->buffer)) {
175 return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ? 176 return json->uncompleted.tokentype == CX_JSON_NO_TOKEN ?
176 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; 177 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
177 } 178 }
178 179
179 // sanity check
180 if (json->buffer == NULL) {
181 return CX_JSON_NULL_INPUT;
182 }
183
184 // current token type and start index 180 // current token type and start index
185 CxJsonTokenType ttype = json->uncompleted.tokentype; 181 CxJsonTokenType ttype = json->uncompleted.tokentype;
186 size_t token_start = json->pos; 182 size_t token_start = json->buffer.pos;
187 183
188 for (size_t i = json->pos; i < json->size; i++) { 184 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) {
189 char c = json->buffer[i]; 185 char c = json->buffer.space[i];
190 if (ttype != CX_JSON_TOKEN_STRING) { 186 if (ttype != CX_JSON_TOKEN_STRING) {
191 // currently non-string token 187 // currently non-string token
192 CxJsonTokenType ctype = char2ttype(c); // start of new token? 188 CxJsonTokenType ctype = char2ttype(c); // start of new token?
193 if (ttype == CX_JSON_NO_TOKEN) { 189 if (ttype == CX_JSON_NO_TOKEN) {
194 if (ctype == CX_JSON_TOKEN_SPACE) { 190 if (ctype == CX_JSON_TOKEN_SPACE) {
197 // begin string 193 // begin string
198 ttype = CX_JSON_TOKEN_STRING; 194 ttype = CX_JSON_TOKEN_STRING;
199 token_start = i; 195 token_start = i;
200 } else if (ctype != CX_JSON_NO_TOKEN) { 196 } else if (ctype != CX_JSON_NO_TOKEN) {
201 // single-char token 197 // single-char token
202 json->pos = i + 1; 198 json->buffer.pos = i + 1;
203 *result = (CxJsonToken){ctype, NULL, 0, 0}; 199 *result = (CxJsonToken){ctype, NULL, {0, 0}};
204 return CX_JSON_NO_ERROR; 200 return CX_JSON_NO_ERROR;
205 } else { 201 } else {
206 ttype = CX_JSON_TOKEN_LITERAL; // number or literal 202 ttype = CX_JSON_TOKEN_LITERAL; // number or literal
207 token_start = i; 203 token_start = i;
208 } 204 }
214 return CX_JSON_BUFFER_ALLOC_FAILED; 210 return CX_JSON_BUFFER_ALLOC_FAILED;
215 } 211 }
216 if (result->tokentype == CX_JSON_TOKEN_ERROR) { 212 if (result->tokentype == CX_JSON_TOKEN_ERROR) {
217 return CX_JSON_FORMAT_ERROR_NUMBER; 213 return CX_JSON_FORMAT_ERROR_NUMBER;
218 } 214 }
219 json->pos = i; 215 json->buffer.pos = i;
220 return CX_JSON_NO_ERROR; 216 return CX_JSON_NO_ERROR;
221 } 217 }
222 } 218 }
223 } else { 219 } else {
224 // currently inside a string 220 // currently inside a string
228 if (c == '"') { 224 if (c == '"') {
229 *result = token_create(json, true, token_start, i + 1); 225 *result = token_create(json, true, token_start, i + 1);
230 if (result->tokentype == CX_JSON_NO_TOKEN) { 226 if (result->tokentype == CX_JSON_NO_TOKEN) {
231 return CX_JSON_BUFFER_ALLOC_FAILED; 227 return CX_JSON_BUFFER_ALLOC_FAILED;
232 } 228 }
233 json->pos = i + 1; 229 json->buffer.pos = i + 1;
234 return CX_JSON_NO_ERROR; 230 return CX_JSON_NO_ERROR;
235 } else if (c == '\\') { 231 } else if (c == '\\') {
236 json->tokenizer_escape = true; 232 json->tokenizer_escape = true;
237 } 233 }
238 } 234 }
239 } 235 }
240 } 236 }
241 237
242 if (ttype != CX_JSON_NO_TOKEN) { 238 if (ttype != CX_JSON_NO_TOKEN) {
243 // uncompleted token 239 // uncompleted token
244 size_t uncompeted_len = json->size - token_start; 240 size_t uncompeted_len = json->buffer.size - token_start;
245 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { 241 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
246 // current token is uncompleted 242 // current token is uncompleted
247 // save current token content 243 // save current token content
248 CxJsonToken uncompleted = { 244 CxJsonToken uncompleted = {
249 ttype, true, 245 ttype, true,
250 cx_strdup(cx_strn(json->buffer + token_start, uncompeted_len)) 246 cx_strdup(cx_strn(json->buffer.space + token_start, uncompeted_len))
251 }; 247 };
252 if (uncompleted.content.ptr == NULL) { 248 if (uncompleted.content.ptr == NULL) {
253 return CX_JSON_BUFFER_ALLOC_FAILED; 249 return CX_JSON_BUFFER_ALLOC_FAILED;
254 } 250 }
255 json->uncompleted = uncompleted; 251 json->uncompleted = uncompleted;
256 } else { 252 } else {
257 // previously we also had an uncompleted token 253 // previously we also had an uncompleted token
258 // combine the uncompleted token with the current token 254 // combine the uncompleted token with the current token
259 assert(json->uncompleted.allocated); 255 assert(json->uncompleted.allocated);
260 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, 256 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
261 cx_strn(json->buffer + token_start, uncompeted_len)); 257 cx_strn(json->buffer.space + token_start, uncompeted_len));
262 if (str.ptr == NULL) { 258 if (str.ptr == NULL) {
263 return CX_JSON_BUFFER_ALLOC_FAILED; 259 return CX_JSON_BUFFER_ALLOC_FAILED;
264 } 260 }
265 json->uncompleted.content = str; 261 json->uncompleted.content = str;
266 } 262 }
303 return result; 299 return result;
304 } 300 }
305 301
306 static int parse_number(cxmutstr str, void *value, bool asint) { 302 static int parse_number(cxmutstr str, void *value, bool asint) {
307 char *endptr = NULL; 303 char *endptr = NULL;
308 char buf[32];
309 if (str.length > 30) { 304 if (str.length > 30) {
310 return 1; 305 return 1;
311 } 306 }
312 // TODO: if we can guarantee that we are working on a copied string already, we can avoid this memcpy 307 // the buffer guarantees that we are working on a copied string
313 memcpy(buf, str.ptr, str.length); 308 char c = str.ptr[str.length];
314 buf[str.length] = 0; 309 str.ptr[str.length] = 0;
315 310
316 if (asint) { 311 if (asint) {
317 long long v = strtoll(buf, &endptr, 10); 312 long long v = strtoll(str.ptr, &endptr, 10);
318 *((int64_t*)value) = (int64_t) v; 313 *((int64_t*)value) = (int64_t) v;
319 } else { 314 } else {
320 // TODO: proper JSON spec number parser 315 // TODO: proper JSON spec number parser
321 double v = strtod(buf, &endptr); 316 double v = strtod(str.ptr, &endptr);
322 *((double*)value) = v; 317 *((double*)value) = v;
323 } 318 }
324 319
325 return (endptr != &buf[str.length]); 320 // recover from the hack
321 str.ptr[str.length] = c;
322
323 return endptr != &str.ptr[str.length];
326 } 324 }
327 325
328 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) { 326 static CxJsonValue* create_json_value(CxJson *json, CxJsonValueType type) {
329 CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue)); 327 CxJsonValue *v = cxMalloc(json->allocator, sizeof(CxJsonValue));
330 if (v == NULL) { 328 if (v == NULL) {
415 json->states[0] = JP_STATE_VALUE_BEGIN; 413 json->states[0] = JP_STATE_VALUE_BEGIN;
416 json->states_size = 1; 414 json->states_size = 1;
417 415
418 json->vbuf = json->vbuf_internal; 416 json->vbuf = json->vbuf_internal;
419 json->vbuf_capacity = cx_nmemb(json->vbuf_internal); 417 json->vbuf_capacity = cx_nmemb(json->vbuf_internal);
418
419 cxBufferInit(&json->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND);
420 } 420 }
421 421
422 void cxJsonDestroy(CxJson *json) { 422 void cxJsonDestroy(CxJson *json) {
423 cxBufferDestroy(&json->buffer);
423 if (json->states != json->states_internal) { 424 if (json->states != json->states_internal) {
424 free(json->states); 425 free(json->states);
425 } 426 }
426 if (json->vbuf != json->vbuf_internal) { 427 if (json->vbuf != json->vbuf_internal) {
427 free(json->vbuf); 428 free(json->vbuf);
429 cxJsonValueFree(json->parsed); 430 cxJsonValueFree(json->parsed);
430 json->parsed = NULL; 431 json->parsed = NULL;
431 } 432 }
432 433
433 int cxJsonFilln(CxJson *json, const char *buf, size_t size) { 434 int cxJsonFilln(CxJson *json, const char *buf, size_t size) {
434 // TODO: implement rescue buffer like in CxProperties to allow subsequent fills 435 // we use the UCX buffer to write the data
435 json->buffer = buf; 436 // but reset the position immediately to enable parsing
436 json->size = size; 437 size_t old_pos = json->buffer.pos;
437 json->pos = 0; 438 cxBufferSeek(&json->buffer, 0, SEEK_END);
438 return 0; 439 size_t written = cxBufferWrite(buf, 1, size, &json->buffer);
440 if (0 == cxBufferTerminate(&json->buffer)) {
441 written++;
442 }
443 json->buffer.pos = old_pos;
444 return written != size + 1;
439 } 445 }
440 446
441 static void json_add_state(CxJson *json, int state) { 447 static void json_add_state(CxJson *json, int state) {
442 // we have guaranteed the necessary space with cx_array_simple_reserve() 448 // we have guaranteed the necessary space with cx_array_simple_reserve()
443 // therefore, we can safely add the state in the simplest way possible 449 // therefore, we can safely add the state in the simplest way possible

mercurial