2 * Copyright 2022 Olaf Wintermann
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
31 * https://tools.ietf.org/html/rfc8259
34 #define PARSER_STATES_ALLOC 32
36 JSONParser* json_parser_new(void) {
37 JSONParser *parser = calloc(1, sizeof(JSONParser));
42 parser->states_alloc = PARSER_STATES_ALLOC;
43 parser->states = calloc(PARSER_STATES_ALLOC, sizeof(int));
49 parser->reader_array_alloc = 8;
54 void json_parser_fill(JSONParser *p, const char *buf, size_t size) {
61 static JSONToken nulltoken = { JSON_NO_TOKEN, NULL, 0, 0 };
63 int token_append(JSONToken *token, const char *buf, size_t len) {
68 size_t newlen = token->length + len;
69 if(token->alloc < newlen) {
70 char *newbuf = realloc(
71 token->alloc == 0 ? NULL : (char*)token->content,
76 token->content = newbuf;
77 token->alloc = newlen;
80 memcpy((char*)token->content+token->length, buf, len);
81 token->length = newlen;
85 JSONToken get_content(JSONParser *p, size_t start, size_t end) {
86 JSONToken token = nulltoken;
87 size_t part2 = end - start;
88 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
89 token.content = p->buffer + start;
91 } else if(part2 == 0) {
92 token = p->uncompleted;
94 if(token_append(&p->uncompleted, p->buffer+start, end - start)) {
97 token = p->uncompleted;
99 p->uncompleted = nulltoken;
103 int token_isliteral(const char *content, size_t length) {
105 if(!memcmp(content, "true", 4)) {
107 } else if(!memcmp(content, "null", 4)) {
110 } else if(length == 5 && !memcmp(content, "false", 5)) {
116 static int num_isexp(const char *content, size_t length, size_t pos) {
122 for(size_t i=pos;i<length;i++) {
126 } else if(i == pos) {
127 if(!(c == '+' || c == '-')) {
138 JSONTokenType token_numbertype(const char *content, size_t length) {
139 if(length == 0) return JSON_TOKEN_ERROR;
141 if(content[0] != '-' && !isdigit(content[0])) {
142 return JSON_TOKEN_ERROR;
145 JSONTokenType type = JSON_TOKEN_INTEGER;
146 for(size_t i=1;i<length;i++) {
147 if(content[i] == '.') {
148 if(type == JSON_TOKEN_NUMBER) {
149 return JSON_TOKEN_ERROR; // more than one decimal separator
151 type = JSON_TOKEN_NUMBER;
152 } else if(content[i] == 'e' || content[i] == 'E') {
153 return num_isexp(content, length, i+1) ? JSON_TOKEN_NUMBER : JSON_TOKEN_ERROR;
154 } else if(!isdigit(content[i])) {
155 return JSON_TOKEN_ERROR; // char is not a diget, decimal separator or exponent sep
162 JSONToken get_token(JSONParser *p, size_t start, size_t end) {
163 JSONToken token = get_content(p, start, end);
164 if(token_isliteral(token.content, token.length)) {
165 token.tokentype = JSON_TOKEN_LITERAL;
167 token.tokentype = token_numbertype(token.content, token.length);
173 static JSONTokenType char2ttype(char c) {
176 return JSON_TOKEN_BEGIN_ARRAY;
179 return JSON_TOKEN_BEGIN_OBJECT;
182 return JSON_TOKEN_END_ARRAY;
185 return JSON_TOKEN_END_OBJECT;
188 return JSON_TOKEN_NAME_SEPARATOR;
191 return JSON_TOKEN_VALUE_SEPARATOR;
194 return JSON_TOKEN_STRING;
198 return JSON_TOKEN_SPACE;
202 return JSON_NO_TOKEN;
205 JSONToken json_parser_next_token(JSONParser *p) {
206 // current token type and start index
207 JSONTokenType ttype = p->uncompleted.tokentype;
208 size_t token_start = p->pos;
210 for(size_t i=p->pos;i<p->size;i++) {
211 char c = p->buffer[i];
212 if(ttype != JSON_TOKEN_STRING) {
213 // currently non-string token
215 JSONTokenType ctype = char2ttype(c); // start of new token?
217 if(ttype == JSON_NO_TOKEN) {
218 if(ctype == JSON_TOKEN_SPACE) {
220 } else if(ctype == JSON_TOKEN_STRING) {
222 ttype = JSON_TOKEN_STRING;
224 } else if(ctype != JSON_NO_TOKEN) {
227 JSONToken token = { ctype, NULL, 0, 0};
230 ttype = JSON_TOKEN_LITERAL; // number or literal
235 if(ctype != JSON_NO_TOKEN) {
236 return get_token(p, token_start, i);
240 // currently inside a string
241 if(!p->tokenizer_escape) {
243 JSONToken ret = get_content(p, token_start, i+1);
244 ret.tokentype = JSON_TOKEN_STRING;
247 } else if(c == '\\') {
248 p->tokenizer_escape = 1;
251 p->tokenizer_escape = 0;
256 if(ttype != JSON_NO_TOKEN) {
258 size_t uncompeted_len = p->size - token_start;
259 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
260 // current token is uncompleted
261 // save current token content in p->uncompleted
262 JSONToken uncompleted;
263 uncompleted.tokentype = ttype;
264 uncompleted.length = uncompeted_len;
265 uncompleted.alloc = uncompeted_len + 16;
266 char *tmp = malloc(uncompleted.alloc);
268 memcpy(tmp, p->buffer+token_start, uncompeted_len);
269 uncompleted.content = tmp;
270 p->uncompleted = uncompleted;
275 // previously we also had an uncompleted token
276 // combine the uncompleted token with the current token
277 if(token_append(&p->uncompleted, p->buffer+token_start, uncompeted_len)) {
283 JSONToken ret = { JSON_NO_TOKEN, NULL, 0, 0};
287 static int create_string(JSONToken token, JSONValue **value) {
288 JSONValue *v = malloc(sizeof(JSONValue));
293 v->type = JSON_STRING;
295 char *str = malloc(token.length+1);
301 memcpy(str, token.content, token.length);
302 str[token.length] = 0;
304 v->type = JSON_STRING;
305 v->value.string.string = str;
306 v->value.string.length = token.length;
311 typedef struct json_ustr {
315 static json_ustr unescape_string(const char *str, size_t len) {
316 char *newstr = malloc(len+1);
326 for(int i=1;i<len-1;i++) {
338 } else if(c == 't') {
352 static int parse_integer(const char *str, size_t len, int64_t *value) {
358 memcpy(buf, str, len);
361 long long v = strtoll(buf, &endptr, 10);
362 if(endptr != &buf[len]) {
370 static int parse_number(const char *str, size_t len, double *value) {
376 memcpy(buf, str, len);
379 double v = strtod(buf, &endptr);
380 if(endptr != &buf[len]) {
388 static int add_state(JSONParser *p, int state) {
389 if(p->nstates >= p->states_alloc) {
390 p->states_alloc += PARSER_STATES_ALLOC;
391 p->states = realloc(p->states, p->states_alloc * sizeof(int));
396 p->states[++p->nstates] = state;
400 static void end_elm(JSONParser *p, JSONReaderType type) {
401 p->reader_type = type;
405 #define JP_STATE_VALUE_BEGIN 0
406 #define JP_STATE_VALUE_BEGIN_OBJ 1
407 #define JP_STATE_VALUE_BEGIN_AR 2
408 #define JP_STATE_ARRAY_SEP_OR_CLOSE 3
409 #define JP_STATE_OBJ_NAME_OR_CLOSE 4
410 #define JP_STATE_OBJ_NAME 5
411 #define JP_STATE_OBJ_COLON 6
412 #define JP_STATE_OBJ_SEP_OR_CLOSE 7
414 static int next_state_after_value(int current) {
417 // after value JSON complete, expect nothing
418 case JP_STATE_VALUE_BEGIN: return -1;
419 // after obj value, expect ',' or '}'
420 case JP_STATE_VALUE_BEGIN_OBJ: return JP_STATE_OBJ_SEP_OR_CLOSE;
421 // after array value, expect ',' or ']'
422 case JP_STATE_VALUE_BEGIN_AR: return JP_STATE_ARRAY_SEP_OR_CLOSE;
426 static void clear_valuename(JSONParser *p) {
427 if(p->value_name) free(p->value_name);
428 p->value_name = NULL;
429 p->value_name_len = 0;
432 static void clear_values(JSONParser *p) {
433 if(p->value_str) free(p->value_str);
435 p->value_str_len = 0;
440 int json_read(JSONParser *p) {
441 int state = p->states[p->nstates];
443 JSONToken token = json_parser_next_token(p);
444 p->reader_token = token;
449 if(token.tokentype == JSON_NO_TOKEN) {
455 // 0 JP_STATE_VALUE_BEGIN value begin
456 // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object)
457 // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array)
458 // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose
459 // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose
460 // 5 JP_STATE_OBJ_NAME object, expect name
461 // 6 JP_STATE_OBJ_COLON object, expect ':'
462 // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose
464 if(state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) {
470 p->states[p->nstates] = next_state_after_value(state);
472 switch(token.tokentype) {
473 case JSON_TOKEN_BEGIN_ARRAY: {
474 p->reader_type = JSON_READER_ARRAY_BEGIN;
475 if(add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1;
477 //return json_read(p);
479 case JSON_TOKEN_BEGIN_OBJECT: {
480 p->reader_type = JSON_READER_OBJECT_BEGIN;
481 if(add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1;
483 //return json_read(p);
485 case JSON_TOKEN_END_ARRAY: {
487 end_elm(p, JSON_READER_ARRAY_END);
490 case JSON_TOKEN_END_OBJECT: {
492 end_elm(p, JSON_READER_OBJECT_END);
495 case JSON_TOKEN_STRING: {
496 p->reader_type = JSON_READER_STRING;
497 json_ustr str = unescape_string(token.content, token.length);
499 p->value_str = str.ptr;
500 p->value_str_len = str.length;
506 case JSON_TOKEN_INTEGER: {
507 p->reader_type = JSON_READER_INTEGER;
509 if(parse_integer(token.content, token.length, &value)) {
512 p->value_int = value;
513 p->value_double = (double)value;
516 case JSON_TOKEN_NUMBER: {
517 p->reader_type = JSON_READER_NUMBER;
519 if(parse_number(token.content, token.length, &value)) {
522 p->value_double = value;
523 p->value_int = (int64_t)value;
526 case JSON_TOKEN_LITERAL: {
527 p->reader_type = JSON_READER_LITERAL;
532 } else if(state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
534 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
535 p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR;
537 } else if(token.tokentype == JSON_TOKEN_END_ARRAY) {
538 end_elm(p, JSON_READER_ARRAY_END);
542 } else if(state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
543 if(state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == JSON_TOKEN_END_OBJECT) {
545 end_elm(p, JSON_READER_OBJECT_END);
548 if(token.tokentype != JSON_TOKEN_STRING) return -1;
550 if(p->value_name) free(p->value_name);
551 json_ustr valname = unescape_string(token.content, token.length);
552 p->value_name = valname.ptr;
553 p->value_name_len = valname.length;
556 p->states[p->nstates] = JP_STATE_OBJ_COLON;
559 } else if(state == JP_STATE_OBJ_COLON) {
561 if(token.tokentype != JSON_TOKEN_NAME_SEPARATOR) return -1;
563 p->states[p->nstates] = 1;
565 } else if(state == 7) {
566 // expect ',' or '}]'
567 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
568 p->states[p->nstates] = JP_STATE_OBJ_NAME;
570 } else if(token.tokentype == JSON_TOKEN_END_OBJECT) {
571 end_elm(p, JSON_READER_OBJECT_END);
581 JSONReaderType json_reader_type(JSONParser *p) {
582 return p->reader_type;
585 const char* json_reader_name(JSONParser *p, size_t *opt_len) {
586 if(opt_len) *opt_len = p->value_name_len;
587 return p->value_name;
590 const char* json_reader_string(JSONParser *p, size_t *opt_len) {
591 if(opt_len) *opt_len = p->value_str_len;
593 if(p->reader_token.tokentype != JSON_TOKEN_STRING) {
600 int64_t json_reader_int(JSONParser *p) {
604 double json_reader_double(JSONParser *p) {
605 return p->value_double;
608 int json_reader_isnull(JSONParser *p) {
609 if(p->reader_token.tokentype == JSON_TOKEN_LITERAL && p->reader_token.length == 4) {
610 return !memcmp(p->reader_token.content, "null", 4);
615 JSONLiteralType json_reader_literal(JSONParser *p) {
616 const char *l = p->reader_token.content;
617 if(!strcmp(l, "true")) {
619 } else if(!strcmp(l, "false")) {
625 int json_reader_bool(JSONParser *p) {
626 JSONLiteralType lt = json_reader_literal(p);
627 return lt == JSON_TRUE ? 1 : 0;
631 /* -------------------- read value functions -------------------- */
633 static JSONValue* init_value(JSONParser *p) {
634 JSONValue *value = malloc(sizeof(JSONValue));
638 memset(value, 0, sizeof(JSONValue));
642 static int setup_read_value(JSONParser *p) {
643 p->readvalue_alloc = PARSER_STATES_ALLOC;
644 p->readvalue_nelm = 0;
645 p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(JSONValue*));
646 if(!p->readvalue_stack) return -1;
648 p->read_value = NULL;
649 p->readvalue_stack[0] = NULL;
654 static int obj_init_values(JSONParser *p, JSONValue *v) {
655 v->value.object.values = calloc(sizeof(JSONObjValue), p->reader_array_alloc);
656 if(!v->value.object.values) {
659 v->value.object.alloc = p->reader_array_alloc;
660 v->value.object.size = 0;
665 static int obj_add_value(JSONParser *p, JSONValue *parent, JSONObjValue v) {
666 if(!parent->value.object.values) {
667 if(obj_init_values(p, parent)) {
672 if(parent->value.object.size == parent->value.object.alloc) {
673 parent->value.object.alloc *= 2;
674 parent->value.object.values = realloc(parent->value.object.values, sizeof(JSONObjValue) * parent->value.object.alloc);
675 if(!parent->value.object.values) {
680 parent->value.object.values[parent->value.object.size++] = v;
685 static int array_init(JSONParser *p, JSONValue *v) {
686 v->value.array.array = calloc(sizeof(JSONValue*), p->reader_array_alloc);
687 if(!v->value.array.array) {
690 v->value.array.alloc = p->reader_array_alloc;
691 v->value.array.size = 0;
696 static int array_add_value(JSONParser *p, JSONValue *parent, JSONValue *v) {
697 if(!parent->value.array.array) {
698 if(array_init(p, parent)) {
703 if(parent->value.array.size == parent->value.array.alloc) {
704 parent->value.array.alloc *= 2;
705 parent->value.array.array = realloc(parent->value.array.array, sizeof(JSONValue*) * parent->value.array.alloc);
706 if(!parent->value.array.array) {
711 parent->value.array.array[parent->value.array.size++] = v;
716 static int add_to_parent(JSONParser *p, JSONValue *parent, JSONValue *v) {
718 return -1; // shouldn't happen but who knows
722 if(parent->type == JSON_OBJECT) {
723 if(!p->value_name || p->value_name_len == 0) {
726 char *valuename = p->value_name;
727 p->value_name = NULL;
729 JSONObjValue newvalue;
730 newvalue.name = valuename;
733 ret = obj_add_value(p, parent, newvalue);
734 } else if(parent->type == JSON_ARRAY) {
735 ret = array_add_value(p, parent, v);
737 ret = -1; // should also never happen
744 static int readvaluestack_add(JSONParser *p, JSONValue *v) {
745 if(p->readvalue_nelm == p->readvalue_alloc) {
746 p->readvalue_alloc *= 2;
747 p->readvalue_stack = realloc(p->readvalue_stack, sizeof(JSONValue*) * p->readvalue_alloc);
748 if(!p->readvalue_stack) {
752 p->readvalue_stack[p->readvalue_nelm++] = v;
756 int json_read_value(JSONParser *p, JSONValue **value) {
758 if(!p->readvalue_stack) {
759 if(setup_read_value(p)) return -1;
762 while(p->readvalue_nelm > 0 || !p->read_value) {
763 //JSONValue *s = p->readvalue_stack[p->readvalue_nelm];
765 // value available without another read
766 JSONValue *v = init_value(p);
769 if(p->readvalue_nelm > 0) {
770 if(add_to_parent(p, p->readvalue_stack[p->readvalue_nelm-1], v)) {
774 // set this value as root
778 switch(p->reader_type) {
779 case JSON_READER_OBJECT_BEGIN: {
780 v->type = JSON_OBJECT;
781 if(readvaluestack_add(p, v)) {
786 case JSON_READER_OBJECT_END: return -1; // should not happen
787 case JSON_READER_ARRAY_BEGIN: {
788 v->type = JSON_ARRAY;
789 if(readvaluestack_add(p, v)) {
794 case JSON_READER_ARRAY_END: return -1; // should not happen
795 case JSON_READER_STRING: {
796 v->type = JSON_STRING;
798 v->value.string.string = p->value_str;
799 v->value.string.length = p->value_str_len;
804 case JSON_READER_INTEGER: {
805 v->type = JSON_INTEGER;
806 v->value.integer.value = json_reader_int(p);
809 case JSON_READER_NUMBER: {
810 v->type = JSON_NUMBER;
811 v->value.number.value = json_reader_double(p);
814 case JSON_READER_LITERAL: {
815 v->type = JSON_LITERAL;
816 v->value.literal.literal = json_reader_literal(p);
820 } else if(p->readvalue_initialized) {
821 JSONReaderType rt = p->reader_type;
822 if(rt == JSON_READER_OBJECT_END || rt == JSON_READER_ARRAY_END) {
825 // else: p->value_ready is 1, this will be handled in the next run
828 if(p->readvalue_nelm > 0 || !p->read_value) {
829 int r = json_read(p);
831 p->readvalue_initialized = 0;
834 p->readvalue_initialized = 1;
838 *value = p->read_value;
839 p->readvalue_initialized = 0;
840 p->read_value = NULL;
845 JSONValue* json_obj_get(JSONObject *obj, const char *name) {
846 for(size_t i=0;i<obj->size;i++) {
847 if(!strcmp(obj->values[i].name, name)) {
848 return obj->values[i].value;
854 JSONValue* json_array_get(JSONArray *array, size_t i) {
855 if(i >= array->size) return NULL;
856 return array->array[i];
859 void json_value_free(JSONValue *value) {
860 switch(value->type) {
862 JSONObject obj = value->value.object;
863 for(size_t i=0;i<obj.size;i++) {
864 json_value_free(obj.values[i].value);
865 free(obj.values[i].name);
871 JSONArray array = value->value.array;
872 for(size_t i=0;i<array.size;i++) {
873 json_value_free(array.array[i]);
879 free(value->value.string.string);
889 int json_strcmp(JSONValue *jsstr, const char *str) {
890 return json_strncmp(jsstr, str, strlen(str));
893 int json_strncmp(JSONValue *jsstr, const char *str, size_t slen) {
894 if(jsstr->type != JSON_STRING) {
897 size_t jsstrlen = jsstr->value.string.length;
899 if(jsstrlen != slen) {
900 return jsstrlen > slen ? 1 : -1;
903 return memcmp(jsstr->value.string.string, str, slen);