src/json.c

changeset 1122
49ab92de9a13
parent 1121
7fd2672199d7
equal deleted inserted replaced
1121:7fd2672199d7 1122:49ab92de9a13
250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA; 250 CX_JSON_NO_DATA : CX_JSON_INCOMPLETE_DATA;
251 } 251 }
252 252
253 // current token type and start index 253 // current token type and start index
254 CxJsonTokenType ttype = json->uncompleted.tokentype; 254 CxJsonTokenType ttype = json->uncompleted.tokentype;
255 size_t token_start = json->buffer.pos; 255 size_t token_part_start = json->buffer.pos;
256 256
257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) { 257 for (size_t i = json->buffer.pos; i < json->buffer.size; i++) {
258 char c = json->buffer.space[i]; 258 char c = json->buffer.space[i];
259 if (ttype != CX_JSON_TOKEN_STRING) { 259 if (ttype != CX_JSON_TOKEN_STRING) {
260 // currently non-string token 260 // currently non-string token
264 json->buffer.pos++; 264 json->buffer.pos++;
265 continue; 265 continue;
266 } else if (ctype == CX_JSON_TOKEN_STRING) { 266 } else if (ctype == CX_JSON_TOKEN_STRING) {
267 // begin string 267 // begin string
268 ttype = CX_JSON_TOKEN_STRING; 268 ttype = CX_JSON_TOKEN_STRING;
269 token_start = i; 269 token_part_start = i;
270 } else if (ctype != CX_JSON_NO_TOKEN) { 270 } else if (ctype != CX_JSON_NO_TOKEN) {
271 // single-char token 271 // single-char token
272 json->buffer.pos = i + 1; 272 json->buffer.pos = i + 1;
273 *result = (CxJsonToken){ctype, false, {NULL, 0}}; 273 *result = (CxJsonToken){ctype, false, {NULL, 0}};
274 return CX_JSON_NO_ERROR; 274 return CX_JSON_NO_ERROR;
275 } else { 275 } else {
276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal 276 ttype = CX_JSON_TOKEN_LITERAL; // number or literal
277 token_start = i; 277 token_part_start = i;
278 } 278 }
279 } else { 279 } else {
280 // finish token 280 // finish token
281 if (ctype != CX_JSON_NO_TOKEN) { 281 if (ctype != CX_JSON_NO_TOKEN) {
282 *result = token_create(json, false, token_start, i); 282 *result = token_create(json, false, token_part_start, i);
283 if (result->tokentype == CX_JSON_NO_TOKEN) { 283 if (result->tokentype == CX_JSON_NO_TOKEN) {
284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 284 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
285 } 285 }
286 if (result->tokentype == CX_JSON_TOKEN_ERROR) { 286 if (result->tokentype == CX_JSON_TOKEN_ERROR) {
287 return CX_JSON_FORMAT_ERROR_NUMBER; 287 return CX_JSON_FORMAT_ERROR_NUMBER;
294 // currently inside a string 294 // currently inside a string
295 if (json->tokenizer_escape) { 295 if (json->tokenizer_escape) {
296 json->tokenizer_escape = false; 296 json->tokenizer_escape = false;
297 } else { 297 } else {
298 if (c == '"') { 298 if (c == '"') {
299 *result = token_create(json, true, token_start, i + 1); 299 *result = token_create(json, true, token_part_start, i + 1);
300 if (result->tokentype == CX_JSON_NO_TOKEN) { 300 if (result->tokentype == CX_JSON_NO_TOKEN) {
301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 301 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
302 } 302 }
303 json->buffer.pos = i + 1; 303 json->buffer.pos = i + 1;
304 return CX_JSON_NO_ERROR; 304 return CX_JSON_NO_ERROR;
309 } 309 }
310 } 310 }
311 311
312 if (ttype != CX_JSON_NO_TOKEN) { 312 if (ttype != CX_JSON_NO_TOKEN) {
313 // uncompleted token 313 // uncompleted token
314 size_t uncompleted_len = json->buffer.size - token_start; 314 size_t uncompleted_len = json->buffer.size - token_part_start;
315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) { 315 if (json->uncompleted.tokentype == CX_JSON_NO_TOKEN) {
316 // current token is uncompleted 316 // current token is uncompleted
317 // save current token content 317 // save current token content
318 CxJsonToken uncompleted = { 318 CxJsonToken uncompleted = {
319 ttype, true, 319 ttype, true,
320 cx_strdup(cx_strn(json->buffer.space + token_start, uncompleted_len)) 320 cx_strdup(cx_strn(json->buffer.space + token_part_start, uncompleted_len))
321 }; 321 };
322 if (uncompleted.content.ptr == NULL) { 322 if (uncompleted.content.ptr == NULL) {
323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 323 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
324 } 324 }
325 json->uncompleted = uncompleted; 325 json->uncompleted = uncompleted;
326 } else { 326 } else {
327 // previously we also had an uncompleted token 327 // previously we also had an uncompleted token
328 // combine the uncompleted token with the current token 328 // combine the uncompleted token with the current token
329 assert(json->uncompleted.allocated); 329 assert(json->uncompleted.allocated);
330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1, 330 cxmutstr str = cx_strcat_m(json->uncompleted.content, 1,
331 cx_strn(json->buffer.space + token_start, uncompleted_len)); 331 cx_strn(json->buffer.space + token_part_start, uncompleted_len));
332 if (str.ptr == NULL) { 332 if (str.ptr == NULL) {
333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE 333 return CX_JSON_BUFFER_ALLOC_FAILED; // LCOV_EXCL_LINE
334 } 334 }
335 json->uncompleted.content = str; 335 json->uncompleted.content = str;
336 } 336 }
340 340
341 return CX_JSON_INCOMPLETE_DATA; 341 return CX_JSON_INCOMPLETE_DATA;
342 } 342 }
343 343
344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) { 344 static cxmutstr unescape_string(const CxAllocator *a, cxmutstr str) {
345 // TODO: support more escape sequences 345 // note: this function expects that str contains the enclosing quotes!
346 // TODO: to be consistent with escape_string() we might want to expect that the enclosing quotes were already removed 346
347 cxmutstr result; 347 cxmutstr result;
348 result.length = 0; 348 result.length = 0;
349 result.ptr = cxMalloc(a, str.length - 1); 349 result.ptr = cxMalloc(a, str.length - 1);
350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE 350 if (result.ptr == NULL) return result; // LCOV_EXCL_LINE
351 351
356 u = false; 356 u = false;
357 if (c == 'n') { 357 if (c == 'n') {
358 c = '\n'; 358 c = '\n';
359 } else if (c == 't') { 359 } else if (c == 't') {
360 c = '\t'; 360 c = '\t';
361 } 361 } else if (c == 'r') {
362 c = '\r';
363 } else if (c == '\\') {
364 c = '\\';
365 } else if (c == '/') {
366 c = '/'; // always unescape, we don't need settings here
367 } else if (c == 'f') {
368 c = '\f';
369 } else if (c == 'b') {
370 c = '\b';
371 }
372 // TODO: support \uXXXX escape sequences
373 // TODO: discuss the behavior for unrecognized escape sequences
374 // most parsers throw an error here
362 result.ptr[result.length++] = c; 375 result.ptr[result.length++] = c;
363 } else { 376 } else {
364 if (c == '\\') { 377 if (c == '\\') {
365 u = true; 378 u = true;
366 } else { 379 } else {
372 385
373 return result; 386 return result;
374 } 387 }
375 388
376 static cxmutstr escape_string(cxmutstr str) { 389 static cxmutstr escape_string(cxmutstr str) {
390 // note: this function produces the string without enclosing quotes
391 // the reason is that we don't want to allocate memory just for that
377 CxBuffer buf = {0}; 392 CxBuffer buf = {0};
378 393
379 bool all_printable = true; 394 bool all_printable = true;
380 for (size_t i = 0; i < str.length; i++) { 395 for (size_t i = 0; i < str.length; i++) {
381 bool escape = !isprint(str.ptr[i]) 396 bool escape = !isprint(str.ptr[i])

mercurial