2 * Copyright 2022 Olaf Wintermann
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
31 * https://tools.ietf.org/html/rfc8259
34 #define PARSER_STATES_ALLOC 32
36 JSONParser* json_parser_new(void) {
37 JSONParser *parser = calloc(1, sizeof(JSONParser));
42 parser->states_alloc = PARSER_STATES_ALLOC;
43 parser->states = calloc(PARSER_STATES_ALLOC, sizeof(int));
49 parser->reader_array_alloc = 8;
54 void json_parser_fill(JSONParser *p, const char *buf, size_t size) {
61 static JSONToken nulltoken = { JSON_NO_TOKEN, NULL, 0, 0 };
63 int token_append(JSONToken *token, const char *buf, size_t len) {
68 size_t newlen = token->length + len;
69 if(token->alloc < newlen) {
70 char *newbuf = realloc(
71 token->alloc == 0 ? NULL : (char*)token->content,
76 token->content = newbuf;
77 token->alloc = newlen;
80 memcpy((char*)token->content+token->length, buf, len);
81 token->length = newlen;
85 JSONToken get_content(JSONParser *p, size_t start, size_t end) {
86 JSONToken token = nulltoken;
87 size_t part2 = end - start;
88 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
89 token.content = p->buffer + start;
91 } else if(part2 == 0) {
92 token = p->uncompleted;
94 if(token_append(&p->uncompleted, p->buffer+start, end - start)) {
97 token = p->uncompleted;
99 p->uncompleted = nulltoken;
103 int token_isliteral(const char *content, size_t length) {
105 if(!memcmp(content, "true", 4)) {
107 } else if(!memcmp(content, "null", 4)) {
110 } else if(length == 5 && !memcmp(content, "false", 5)) {
116 static int num_isexp(const char *content, size_t length, size_t pos) {
122 for(size_t i=pos;i<length;i++) {
126 } else if(i == pos) {
127 if(!(c == '+' || c == '-')) {
138 JSONTokenType token_numbertype(const char *content, size_t length) {
139 if(length == 0) return JSON_TOKEN_ERROR;
141 if(content[0] != '-' && !isdigit(content[0])) {
142 return JSON_TOKEN_ERROR;
145 JSONTokenType type = JSON_TOKEN_INTEGER;
146 for(size_t i=1;i<length;i++) {
147 if(content[i] == '.') {
148 if(type == JSON_TOKEN_NUMBER) {
149 return JSON_TOKEN_ERROR; // more than one decimal separator
151 type = JSON_TOKEN_NUMBER;
152 } else if(content[i] == 'e' || content[i] == 'E') {
153 return num_isexp(content, length, i+1) ? JSON_TOKEN_NUMBER : JSON_TOKEN_ERROR;
154 } else if(!isdigit(content[i])) {
155 return JSON_TOKEN_ERROR; // char is not a diget, decimal separator or exponent sep
162 JSONToken get_token(JSONParser *p, size_t start, size_t end) {
163 JSONToken token = get_content(p, start, end);
164 if(token_isliteral(token.content, token.length)) {
165 token.tokentype = JSON_TOKEN_LITERAL;
167 token.tokentype = token_numbertype(token.content, token.length);
173 static JSONTokenType char2ttype(char c) {
176 return JSON_TOKEN_BEGIN_ARRAY;
179 return JSON_TOKEN_BEGIN_OBJECT;
182 return JSON_TOKEN_END_ARRAY;
185 return JSON_TOKEN_END_OBJECT;
188 return JSON_TOKEN_NAME_SEPARATOR;
191 return JSON_TOKEN_VALUE_SEPARATOR;
194 return JSON_TOKEN_STRING;
198 return JSON_TOKEN_SPACE;
202 return JSON_NO_TOKEN;
205 JSONToken json_parser_next_token(JSONParser *p) {
206 // current token type and start index
207 JSONTokenType ttype = p->uncompleted.tokentype;
208 size_t token_start = p->pos;
210 for(size_t i=p->pos;i<p->size;i++) {
211 char c = p->buffer[i];
212 if(ttype != JSON_TOKEN_STRING) {
213 // currently non-string token
215 JSONTokenType ctype = char2ttype(c); // start of new token?
217 if(ttype == JSON_NO_TOKEN) {
218 if(ctype == JSON_TOKEN_SPACE) {
220 } else if(ctype == JSON_TOKEN_STRING) {
222 ttype = JSON_TOKEN_STRING;
224 } else if(ctype != JSON_NO_TOKEN) {
227 JSONToken token = { ctype, NULL, 0, 0};
230 ttype = JSON_TOKEN_LITERAL; // number or literal
235 if(ctype != JSON_NO_TOKEN) {
236 return get_token(p, token_start, i);
240 // currently inside a string
241 if(!p->tokenizer_escape) {
243 JSONToken ret = get_content(p, token_start, i+1);
244 ret.tokentype = JSON_TOKEN_STRING;
247 } else if(c == '\\') {
248 p->tokenizer_escape = 1;
251 p->tokenizer_escape = 0;
256 if(ttype != JSON_NO_TOKEN) {
258 size_t uncompeted_len = p->size - token_start;
259 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
260 // current token is uncompleted
261 // save current token content in p->uncompleted
262 JSONToken uncompleted;
263 uncompleted.tokentype = ttype;
264 uncompleted.length = uncompeted_len;
265 uncompleted.alloc = uncompeted_len + 16;
266 char *tmp = malloc(uncompleted.alloc);
268 memcpy(tmp, p->buffer+token_start, uncompeted_len);
269 uncompleted.content = tmp;
270 p->uncompleted = uncompleted;
275 // previously we also had an uncompleted token
276 // combine the uncompleted token with the current token
277 if(token_append(&p->uncompleted, p->buffer+token_start, uncompeted_len)) {
283 JSONToken ret = { JSON_NO_TOKEN, NULL, 0, 0};
287 static int create_string(JSONToken token, JSONValue **value) {
288 JSONValue *v = malloc(sizeof(JSONValue));
293 v->type = JSON_STRING;
295 char *str = malloc(token.length+1);
301 memcpy(str, token.content, token.length);
302 str[token.length] = 0;
304 v->type = JSON_STRING;
305 v->value.string.string = str;
306 v->value.string.length = token.length;
311 typedef struct json_ustr {
315 static json_ustr unescape_string(const char *str, size_t len) {
316 char *newstr = malloc(len+1);
326 for(int i=1;i<len-1;i++) {
338 } else if(c == 't') {
352 static int parse_integer(const char *str, size_t len, int64_t *value) {
358 memcpy(buf, str, len);
361 long long v = strtoll(buf, &endptr, 10);
362 if(endptr != &buf[len]) {
370 static int parse_number(const char *str, size_t len, double *value) {
376 memcpy(buf, str, len);
379 double v = strtod(buf, &endptr);
380 if(endptr != &buf[len]) {
388 static int add_state(JSONParser *p, int state) {
389 if(p->nstates >= p->states_alloc) {
390 p->states_alloc += PARSER_STATES_ALLOC;
391 p->states = realloc(p->states, p->states_alloc * sizeof(int));
396 p->states[++p->nstates] = state;
400 static void end_elm(JSONParser *p, JSONReaderType type) {
401 p->reader_type = type;
405 #define JP_STATE_VALUE_BEGIN 0
406 #define JP_STATE_VALUE_BEGIN_OBJ 1
407 #define JP_STATE_VALUE_BEGIN_AR 2
408 #define JP_STATE_ARRAY_SEP_OR_CLOSE 3
409 #define JP_STATE_OBJ_NAME_OR_CLOSE 4
410 #define JP_STATE_OBJ_NAME 5
411 #define JP_STATE_OBJ_COLON 6
412 #define JP_STATE_OBJ_SEP_OR_CLOSE 7
414 static int next_state_after_value(int current) {
417 // after value JSON complete, expect nothing
418 case JP_STATE_VALUE_BEGIN: return -1;
419 // after obj value, expect ',' or '}'
420 case JP_STATE_VALUE_BEGIN_OBJ: return JP_STATE_OBJ_SEP_OR_CLOSE;
421 // after array value, expect ',' or ']'
422 case JP_STATE_VALUE_BEGIN_AR: return JP_STATE_ARRAY_SEP_OR_CLOSE;
426 static void clear_valuename(JSONParser *p) {
427 if(p->value_name) free(p->value_name);
428 p->value_name = NULL;
429 p->value_name_len = 0;
432 static void clear_values(JSONParser *p) {
433 if(p->value_str) free(p->value_str);
435 p->value_str_len = 0;
440 int json_read(JSONParser *p) {
441 int state = p->states[p->nstates];
443 JSONToken token = json_parser_next_token(p);
444 p->reader_token = token;
449 if(token.tokentype == JSON_NO_TOKEN) {
455 // 0 JP_STATE_VALUE_BEGIN value begin
456 // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object)
457 // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array)
458 // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose
459 // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose
460 // 5 JP_STATE_OBJ_NAME object, expect name
461 // 6 JP_STATE_OBJ_COLON object, expect ':'
462 // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose
464 if(state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) {
470 p->states[p->nstates] = next_state_after_value(state);
472 switch(token.tokentype) {
473 case JSON_TOKEN_BEGIN_ARRAY: {
474 p->reader_type = JSON_READER_ARRAY_BEGIN;
475 if(add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1;
477 //return json_read(p);
479 case JSON_TOKEN_BEGIN_OBJECT: {
480 p->reader_type = JSON_READER_OBJECT_BEGIN;
481 if(add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1;
483 //return json_read(p);
485 case JSON_TOKEN_END_ARRAY: {
487 end_elm(p, JSON_READER_ARRAY_END);
490 case JSON_TOKEN_END_OBJECT: {
492 end_elm(p, JSON_READER_OBJECT_END);
495 case JSON_TOKEN_STRING: {
496 p->reader_type = JSON_READER_STRING;
497 json_ustr str = unescape_string(token.content, token.length);
499 p->value_str = str.ptr;
500 p->value_str_len = str.length;
506 case JSON_TOKEN_INTEGER: {
507 p->reader_type = JSON_READER_INTEGER;
509 if(parse_integer(token.content, token.length, &value)) {
512 p->value_int = value;
513 p->value_double = (double)value;
516 case JSON_TOKEN_NUMBER: {
517 p->reader_type = JSON_READER_NUMBER;
519 if(parse_number(token.content, token.length, &value)) {
522 p->value_double = value;
523 p->value_int = (int64_t)value;
526 case JSON_TOKEN_LITERAL: {
527 p->reader_type = JSON_READER_LITERAL;
532 } else if(state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
534 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
535 p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR;
537 } else if(token.tokentype == JSON_TOKEN_END_ARRAY) {
538 end_elm(p, JSON_READER_ARRAY_END);
542 } else if(state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
543 if(state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == JSON_TOKEN_END_OBJECT) {
545 end_elm(p, JSON_READER_OBJECT_END);
548 if(token.tokentype != JSON_TOKEN_STRING) return -1;
550 if(p->value_name) free(p->value_name);
551 json_ustr valname = unescape_string(token.content, token.length);
552 p->value_name = valname.ptr;
553 p->value_name_len = valname.length;
556 p->states[p->nstates] = JP_STATE_OBJ_COLON;
559 } else if(state == JP_STATE_OBJ_COLON) {
561 if(token.tokentype != JSON_TOKEN_NAME_SEPARATOR) return -1;
563 p->states[p->nstates] = 1;
565 } else if(state == 7) {
566 // expect ',' or '}]'
567 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
568 p->states[p->nstates] = JP_STATE_OBJ_NAME;
570 } else if(token.tokentype == JSON_TOKEN_END_OBJECT) {
571 end_elm(p, JSON_READER_OBJECT_END);
581 JSONReaderType json_reader_type(JSONParser *p) {
582 return p->reader_type;
585 const char* json_reader_name(JSONParser *p, size_t *opt_len) {
586 if(opt_len) *opt_len = p->value_name_len;
587 return p->value_name;
590 const char* json_reader_string(JSONParser *p, size_t *opt_len) {
591 if(opt_len) *opt_len = p->value_str_len;
593 if(p->reader_token.tokentype != JSON_TOKEN_STRING) {
600 int64_t json_reader_int(JSONParser *p) {
604 double json_reader_double(JSONParser *p) {
605 return p->value_double;
608 int json_reader_isnull(JSONParser *p) {
609 if(p->reader_token.tokentype == JSON_TOKEN_LITERAL && p->reader_token.length == 4) {
610 return !memcmp(p->reader_token.content, "null", 4);
615 JSONLiteralType json_reader_literal(JSONParser *p) {
616 const char *l = p->reader_token.content;
617 size_t token_len = p->reader_token.length;
618 if(token_len == 4 && !memcmp(l, "true", 5)) {
620 } else if(token_len == 5 && !memcmp(l, "false", 5)) {
626 int json_reader_bool(JSONParser *p) {
627 JSONLiteralType lt = json_reader_literal(p);
628 return lt == JSON_TRUE ? 1 : 0;
632 /* -------------------- read value functions -------------------- */
634 static JSONValue* init_value(JSONParser *p) {
635 JSONValue *value = malloc(sizeof(JSONValue));
639 memset(value, 0, sizeof(JSONValue));
643 static int setup_read_value(JSONParser *p) {
644 p->readvalue_alloc = PARSER_STATES_ALLOC;
645 p->readvalue_nelm = 0;
646 p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(JSONValue*));
647 if(!p->readvalue_stack) return -1;
649 p->read_value = NULL;
650 p->readvalue_stack[0] = NULL;
655 static int obj_init_values(JSONParser *p, JSONValue *v) {
656 v->value.object.values = calloc(sizeof(JSONObjValue), p->reader_array_alloc);
657 if(!v->value.object.values) {
660 v->value.object.alloc = p->reader_array_alloc;
661 v->value.object.size = 0;
666 static int obj_add_value(JSONParser *p, JSONValue *parent, JSONObjValue v) {
667 if(!parent->value.object.values) {
668 if(obj_init_values(p, parent)) {
673 if(parent->value.object.size == parent->value.object.alloc) {
674 parent->value.object.alloc *= 2;
675 parent->value.object.values = realloc(parent->value.object.values, sizeof(JSONObjValue) * parent->value.object.alloc);
676 if(!parent->value.object.values) {
681 parent->value.object.values[parent->value.object.size++] = v;
686 static int array_init(JSONParser *p, JSONValue *v) {
687 v->value.array.array = calloc(sizeof(JSONValue*), p->reader_array_alloc);
688 if(!v->value.array.array) {
691 v->value.array.alloc = p->reader_array_alloc;
692 v->value.array.size = 0;
697 static int array_add_value(JSONParser *p, JSONValue *parent, JSONValue *v) {
698 if(!parent->value.array.array) {
699 if(array_init(p, parent)) {
704 if(parent->value.array.size == parent->value.array.alloc) {
705 parent->value.array.alloc *= 2;
706 parent->value.array.array = realloc(parent->value.array.array, sizeof(JSONValue*) * parent->value.array.alloc);
707 if(!parent->value.array.array) {
712 parent->value.array.array[parent->value.array.size++] = v;
717 static int add_to_parent(JSONParser *p, JSONValue *parent, JSONValue *v) {
719 return -1; // shouldn't happen but who knows
723 if(parent->type == JSON_OBJECT) {
724 if(!p->value_name || p->value_name_len == 0) {
727 char *valuename = p->value_name;
728 p->value_name = NULL;
730 JSONObjValue newvalue;
731 newvalue.name = valuename;
734 ret = obj_add_value(p, parent, newvalue);
735 } else if(parent->type == JSON_ARRAY) {
736 ret = array_add_value(p, parent, v);
738 ret = -1; // should also never happen
745 static int readvaluestack_add(JSONParser *p, JSONValue *v) {
746 if(p->readvalue_nelm == p->readvalue_alloc) {
747 p->readvalue_alloc *= 2;
748 p->readvalue_stack = realloc(p->readvalue_stack, sizeof(JSONValue*) * p->readvalue_alloc);
749 if(!p->readvalue_stack) {
753 p->readvalue_stack[p->readvalue_nelm++] = v;
757 int json_read_value(JSONParser *p, JSONValue **value) {
759 if(!p->readvalue_stack) {
760 if(setup_read_value(p)) return -1;
763 while(p->readvalue_nelm > 0 || !p->read_value) {
764 //JSONValue *s = p->readvalue_stack[p->readvalue_nelm];
766 // value available without another read
767 JSONValue *v = init_value(p);
770 if(p->readvalue_nelm > 0) {
771 if(add_to_parent(p, p->readvalue_stack[p->readvalue_nelm-1], v)) {
775 // set this value as root
779 switch(p->reader_type) {
780 case JSON_READER_OBJECT_BEGIN: {
781 v->type = JSON_OBJECT;
782 if(readvaluestack_add(p, v)) {
787 case JSON_READER_OBJECT_END: return -1; // should not happen
788 case JSON_READER_ARRAY_BEGIN: {
789 v->type = JSON_ARRAY;
790 if(readvaluestack_add(p, v)) {
795 case JSON_READER_ARRAY_END: return -1; // should not happen
796 case JSON_READER_STRING: {
797 v->type = JSON_STRING;
799 v->value.string.string = p->value_str;
800 v->value.string.length = p->value_str_len;
805 case JSON_READER_INTEGER: {
806 v->type = JSON_INTEGER;
807 v->value.integer.value = json_reader_int(p);
810 case JSON_READER_NUMBER: {
811 v->type = JSON_NUMBER;
812 v->value.number.value = json_reader_double(p);
815 case JSON_READER_LITERAL: {
816 v->type = JSON_LITERAL;
817 v->value.literal.literal = json_reader_literal(p);
821 } else if(p->readvalue_initialized) {
822 JSONReaderType rt = p->reader_type;
823 if(rt == JSON_READER_OBJECT_END || rt == JSON_READER_ARRAY_END) {
826 // else: p->value_ready is 1, this will be handled in the next run
829 if(p->readvalue_nelm > 0 || !p->read_value) {
830 int r = json_read(p);
832 p->readvalue_initialized = 0;
835 p->readvalue_initialized = 1;
839 *value = p->read_value;
840 p->readvalue_initialized = 0;
841 p->read_value = NULL;
846 JSONValue* json_obj_get(JSONObject *obj, const char *name) {
847 for(size_t i=0;i<obj->size;i++) {
848 if(!strcmp(obj->values[i].name, name)) {
849 return obj->values[i].value;
855 JSONValue* json_array_get(JSONArray *array, size_t i) {
856 if(i >= array->size) return NULL;
857 return array->array[i];
860 void json_value_free(JSONValue *value) {
861 switch(value->type) {
863 JSONObject obj = value->value.object;
864 for(size_t i=0;i<obj.size;i++) {
865 json_value_free(obj.values[i].value);
866 free(obj.values[i].name);
872 JSONArray array = value->value.array;
873 for(size_t i=0;i<array.size;i++) {
874 json_value_free(array.array[i]);
880 free(value->value.string.string);
890 int json_strcmp(JSONValue *jsstr, const char *str) {
891 return json_strncmp(jsstr, str, strlen(str));
894 int json_strncmp(JSONValue *jsstr, const char *str, size_t slen) {
895 if(jsstr->type != JSON_STRING) {
898 size_t jsstrlen = jsstr->value.string.length;
900 if(jsstrlen != slen) {
901 return jsstrlen > slen ? 1 : -1;
904 return memcmp(jsstr->value.string.string, str, slen);