2 * Copyright 2022 Olaf Wintermann
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
31 * https://tools.ietf.org/html/rfc8259
34 #define PARSER_STATES_ALLOC 32
36 JSONParser* json_parser_new(void) {
37 JSONParser *parser = calloc(1, sizeof(JSONParser));
42 parser->states_alloc = PARSER_STATES_ALLOC;
43 parser->states = calloc(PARSER_STATES_ALLOC, sizeof(int));
49 parser->reader_array_alloc = 8;
54 void json_parser_free(JSONParser *p) {
55 if(p->states) free(p->states);
56 if(p->readvalue_stack) free(p->readvalue_stack);
61 void json_parser_fill(JSONParser *p, const char *buf, size_t size) {
68 static JSONToken nulltoken = { JSON_NO_TOKEN, NULL, 0, 0 };
70 int token_append(JSONToken *token, const char *buf, size_t len) {
75 size_t newlen = token->length + len;
76 if(token->alloc < newlen) {
77 char *newbuf = realloc(
78 token->alloc == 0 ? NULL : (char*)token->content,
83 token->content = newbuf;
84 token->alloc = newlen;
87 memcpy((char*)token->content+token->length, buf, len);
88 token->length = newlen;
92 JSONToken get_content(JSONParser *p, size_t start, size_t end) {
93 JSONToken token = nulltoken;
94 size_t part2 = end - start;
95 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
96 token.content = p->buffer + start;
98 } else if(part2 == 0) {
99 token = p->uncompleted;
101 if(token_append(&p->uncompleted, p->buffer+start, end - start)) {
104 token = p->uncompleted;
106 p->uncompleted = nulltoken;
110 int token_isliteral(const char *content, size_t length) {
112 if(!memcmp(content, "true", 4)) {
114 } else if(!memcmp(content, "null", 4)) {
117 } else if(length == 5 && !memcmp(content, "false", 5)) {
123 static int num_isexp(const char *content, size_t length, size_t pos) {
129 for(size_t i=pos;i<length;i++) {
133 } else if(i == pos) {
134 if(!(c == '+' || c == '-')) {
145 JSONTokenType token_numbertype(const char *content, size_t length) {
146 if(length == 0) return JSON_TOKEN_ERROR;
148 if(content[0] != '-' && !isdigit(content[0])) {
149 return JSON_TOKEN_ERROR;
152 JSONTokenType type = JSON_TOKEN_INTEGER;
153 for(size_t i=1;i<length;i++) {
154 if(content[i] == '.') {
155 if(type == JSON_TOKEN_NUMBER) {
156 return JSON_TOKEN_ERROR; // more than one decimal separator
158 type = JSON_TOKEN_NUMBER;
159 } else if(content[i] == 'e' || content[i] == 'E') {
160 return num_isexp(content, length, i+1) ? JSON_TOKEN_NUMBER : JSON_TOKEN_ERROR;
161 } else if(!isdigit(content[i])) {
162 return JSON_TOKEN_ERROR; // char is not a diget, decimal separator or exponent sep
169 JSONToken get_token(JSONParser *p, size_t start, size_t end) {
170 JSONToken token = get_content(p, start, end);
171 if(token_isliteral(token.content, token.length)) {
172 token.tokentype = JSON_TOKEN_LITERAL;
174 token.tokentype = token_numbertype(token.content, token.length);
180 static JSONTokenType char2ttype(char c) {
183 return JSON_TOKEN_BEGIN_ARRAY;
186 return JSON_TOKEN_BEGIN_OBJECT;
189 return JSON_TOKEN_END_ARRAY;
192 return JSON_TOKEN_END_OBJECT;
195 return JSON_TOKEN_NAME_SEPARATOR;
198 return JSON_TOKEN_VALUE_SEPARATOR;
201 return JSON_TOKEN_STRING;
205 return JSON_TOKEN_SPACE;
209 return JSON_NO_TOKEN;
212 JSONToken json_parser_next_token(JSONParser *p) {
213 // current token type and start index
214 JSONTokenType ttype = p->uncompleted.tokentype;
215 size_t token_start = p->pos;
217 for(size_t i=p->pos;i<p->size;i++) {
218 char c = p->buffer[i];
219 if(ttype != JSON_TOKEN_STRING) {
220 // currently non-string token
222 JSONTokenType ctype = char2ttype(c); // start of new token?
224 if(ttype == JSON_NO_TOKEN) {
225 if(ctype == JSON_TOKEN_SPACE) {
227 } else if(ctype == JSON_TOKEN_STRING) {
229 ttype = JSON_TOKEN_STRING;
231 } else if(ctype != JSON_NO_TOKEN) {
234 JSONToken token = { ctype, NULL, 0, 0};
237 ttype = JSON_TOKEN_LITERAL; // number or literal
242 if(ctype != JSON_NO_TOKEN) {
243 return get_token(p, token_start, i);
247 // currently inside a string
248 if(!p->tokenizer_escape) {
250 JSONToken ret = get_content(p, token_start, i+1);
251 ret.tokentype = JSON_TOKEN_STRING;
254 } else if(c == '\\') {
255 p->tokenizer_escape = 1;
258 p->tokenizer_escape = 0;
263 if(ttype != JSON_NO_TOKEN) {
265 size_t uncompeted_len = p->size - token_start;
266 if(p->uncompleted.tokentype == JSON_NO_TOKEN) {
267 // current token is uncompleted
268 // save current token content in p->uncompleted
269 JSONToken uncompleted;
270 uncompleted.tokentype = ttype;
271 uncompleted.length = uncompeted_len;
272 uncompleted.alloc = uncompeted_len + 16;
273 char *tmp = malloc(uncompleted.alloc);
275 memcpy(tmp, p->buffer+token_start, uncompeted_len);
276 uncompleted.content = tmp;
277 p->uncompleted = uncompleted;
282 // previously we also had an uncompleted token
283 // combine the uncompleted token with the current token
284 if(token_append(&p->uncompleted, p->buffer+token_start, uncompeted_len)) {
290 JSONToken ret = { JSON_NO_TOKEN, NULL, 0, 0};
294 static int create_string(JSONToken token, JSONValue **value) {
295 JSONValue *v = malloc(sizeof(JSONValue));
300 v->type = JSON_STRING;
302 char *str = malloc(token.length+1);
308 memcpy(str, token.content, token.length);
309 str[token.length] = 0;
311 v->type = JSON_STRING;
312 v->value.string.string = str;
313 v->value.string.length = token.length;
318 typedef struct json_ustr {
322 static json_ustr unescape_string(const char *str, size_t len) {
323 char *newstr = malloc(len+1);
333 for(int i=1;i<len-1;i++) {
345 } else if(c == 't') {
359 static int parse_integer(const char *str, size_t len, int64_t *value) {
365 memcpy(buf, str, len);
368 long long v = strtoll(buf, &endptr, 10);
369 if(endptr != &buf[len]) {
377 static int parse_number(const char *str, size_t len, double *value) {
383 memcpy(buf, str, len);
386 double v = strtod(buf, &endptr);
387 if(endptr != &buf[len]) {
395 static int add_state(JSONParser *p, int state) {
396 if(p->nstates >= p->states_alloc) {
397 p->states_alloc += PARSER_STATES_ALLOC;
398 p->states = realloc(p->states, p->states_alloc * sizeof(int));
403 p->states[++p->nstates] = state;
407 static void end_elm(JSONParser *p, JSONReaderType type) {
408 p->reader_type = type;
412 #define JP_STATE_VALUE_BEGIN 0
413 #define JP_STATE_VALUE_BEGIN_OBJ 1
414 #define JP_STATE_VALUE_BEGIN_AR 2
415 #define JP_STATE_ARRAY_SEP_OR_CLOSE 3
416 #define JP_STATE_OBJ_NAME_OR_CLOSE 4
417 #define JP_STATE_OBJ_NAME 5
418 #define JP_STATE_OBJ_COLON 6
419 #define JP_STATE_OBJ_SEP_OR_CLOSE 7
421 static int next_state_after_value(int current) {
424 // after value JSON complete, expect nothing
425 case JP_STATE_VALUE_BEGIN: return -1;
426 // after obj value, expect ',' or '}'
427 case JP_STATE_VALUE_BEGIN_OBJ: return JP_STATE_OBJ_SEP_OR_CLOSE;
428 // after array value, expect ',' or ']'
429 case JP_STATE_VALUE_BEGIN_AR: return JP_STATE_ARRAY_SEP_OR_CLOSE;
433 static void clear_valuename(JSONParser *p) {
434 if(p->value_name) free(p->value_name);
435 p->value_name = NULL;
436 p->value_name_len = 0;
439 static void clear_values(JSONParser *p) {
440 if(p->value_str) free(p->value_str);
442 p->value_str_len = 0;
447 int json_read(JSONParser *p) {
448 int state = p->states[p->nstates];
450 JSONToken token = json_parser_next_token(p);
451 p->reader_token = token;
456 if(token.tokentype == JSON_NO_TOKEN) {
462 // 0 JP_STATE_VALUE_BEGIN value begin
463 // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object)
464 // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array)
465 // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose
466 // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose
467 // 5 JP_STATE_OBJ_NAME object, expect name
468 // 6 JP_STATE_OBJ_COLON object, expect ':'
469 // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose
471 if(state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) {
477 p->states[p->nstates] = next_state_after_value(state);
479 switch(token.tokentype) {
480 case JSON_TOKEN_BEGIN_ARRAY: {
481 p->reader_type = JSON_READER_ARRAY_BEGIN;
482 if(add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1;
484 //return json_read(p);
486 case JSON_TOKEN_BEGIN_OBJECT: {
487 p->reader_type = JSON_READER_OBJECT_BEGIN;
488 if(add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1;
490 //return json_read(p);
492 case JSON_TOKEN_END_ARRAY: {
494 end_elm(p, JSON_READER_ARRAY_END);
497 case JSON_TOKEN_END_OBJECT: {
499 end_elm(p, JSON_READER_OBJECT_END);
502 case JSON_TOKEN_STRING: {
503 p->reader_type = JSON_READER_STRING;
504 json_ustr str = unescape_string(token.content, token.length);
506 p->value_str = str.ptr;
507 p->value_str_len = str.length;
513 case JSON_TOKEN_INTEGER: {
514 p->reader_type = JSON_READER_INTEGER;
516 if(parse_integer(token.content, token.length, &value)) {
519 p->value_int = value;
520 p->value_double = (double)value;
523 case JSON_TOKEN_NUMBER: {
524 p->reader_type = JSON_READER_NUMBER;
526 if(parse_number(token.content, token.length, &value)) {
529 p->value_double = value;
530 p->value_int = (int64_t)value;
533 case JSON_TOKEN_LITERAL: {
534 p->reader_type = JSON_READER_LITERAL;
539 } else if(state == JP_STATE_ARRAY_SEP_OR_CLOSE) {
541 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
542 p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR;
544 } else if(token.tokentype == JSON_TOKEN_END_ARRAY) {
545 end_elm(p, JSON_READER_ARRAY_END);
549 } else if(state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) {
550 if(state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == JSON_TOKEN_END_OBJECT) {
552 end_elm(p, JSON_READER_OBJECT_END);
555 if(token.tokentype != JSON_TOKEN_STRING) return -1;
557 if(p->value_name) free(p->value_name);
558 json_ustr valname = unescape_string(token.content, token.length);
559 p->value_name = valname.ptr;
560 p->value_name_len = valname.length;
563 p->states[p->nstates] = JP_STATE_OBJ_COLON;
566 } else if(state == JP_STATE_OBJ_COLON) {
568 if(token.tokentype != JSON_TOKEN_NAME_SEPARATOR) return -1;
570 p->states[p->nstates] = 1;
572 } else if(state == 7) {
573 // expect ',' or '}]'
574 if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) {
575 p->states[p->nstates] = JP_STATE_OBJ_NAME;
577 } else if(token.tokentype == JSON_TOKEN_END_OBJECT) {
578 end_elm(p, JSON_READER_OBJECT_END);
588 JSONReaderType json_reader_type(JSONParser *p) {
589 return p->reader_type;
592 const char* json_reader_name(JSONParser *p, size_t *opt_len) {
593 if(opt_len) *opt_len = p->value_name_len;
594 return p->value_name;
597 const char* json_reader_string(JSONParser *p, size_t *opt_len) {
598 if(opt_len) *opt_len = p->value_str_len;
600 if(p->reader_token.tokentype != JSON_TOKEN_STRING) {
607 int64_t json_reader_int(JSONParser *p) {
611 double json_reader_double(JSONParser *p) {
612 return p->value_double;
615 int json_reader_isnull(JSONParser *p) {
616 if(p->reader_token.tokentype == JSON_TOKEN_LITERAL && p->reader_token.length == 4) {
617 return !memcmp(p->reader_token.content, "null", 4);
622 JSONLiteralType json_reader_literal(JSONParser *p) {
623 const char *l = p->reader_token.content;
624 size_t token_len = p->reader_token.length;
625 if(token_len == 4 && !memcmp(l, "true", 4)) {
627 } else if(token_len == 5 && !memcmp(l, "false", 5)) {
633 int json_reader_bool(JSONParser *p) {
634 JSONLiteralType lt = json_reader_literal(p);
635 return lt == JSON_TRUE ? 1 : 0;
639 /* -------------------- read value functions -------------------- */
641 static JSONValue* init_value(JSONParser *p) {
642 JSONValue *value = malloc(sizeof(JSONValue));
646 memset(value, 0, sizeof(JSONValue));
650 static int setup_read_value(JSONParser *p) {
651 p->readvalue_alloc = PARSER_STATES_ALLOC;
652 p->readvalue_nelm = 0;
653 p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(JSONValue*));
654 if(!p->readvalue_stack) return -1;
656 p->read_value = NULL;
657 p->readvalue_stack[0] = NULL;
662 static int obj_init_values(JSONParser *p, JSONValue *v) {
663 v->value.object.values = calloc(sizeof(JSONObjValue), p->reader_array_alloc);
664 if(!v->value.object.values) {
667 v->value.object.alloc = p->reader_array_alloc;
668 v->value.object.size = 0;
673 static int obj_add_value(JSONParser *p, JSONValue *parent, JSONObjValue v) {
674 if(!parent->value.object.values) {
675 if(obj_init_values(p, parent)) {
680 if(parent->value.object.size == parent->value.object.alloc) {
681 parent->value.object.alloc *= 2;
682 parent->value.object.values = realloc(parent->value.object.values, sizeof(JSONObjValue) * parent->value.object.alloc);
683 if(!parent->value.object.values) {
688 parent->value.object.values[parent->value.object.size++] = v;
693 static int array_init(JSONParser *p, JSONValue *v) {
694 v->value.array.array = calloc(sizeof(JSONValue*), p->reader_array_alloc);
695 if(!v->value.array.array) {
698 v->value.array.alloc = p->reader_array_alloc;
699 v->value.array.size = 0;
704 static int array_add_value(JSONParser *p, JSONValue *parent, JSONValue *v) {
705 if(!parent->value.array.array) {
706 if(array_init(p, parent)) {
711 if(parent->value.array.size == parent->value.array.alloc) {
712 parent->value.array.alloc *= 2;
713 parent->value.array.array = realloc(parent->value.array.array, sizeof(JSONValue*) * parent->value.array.alloc);
714 if(!parent->value.array.array) {
719 parent->value.array.array[parent->value.array.size++] = v;
724 static int add_to_parent(JSONParser *p, JSONValue *parent, JSONValue *v) {
726 return -1; // shouldn't happen but who knows
730 if(parent->type == JSON_OBJECT) {
731 if(!p->value_name || p->value_name_len == 0) {
734 char *valuename = p->value_name;
735 p->value_name = NULL;
737 JSONObjValue newvalue;
738 newvalue.name = valuename;
741 ret = obj_add_value(p, parent, newvalue);
742 } else if(parent->type == JSON_ARRAY) {
743 ret = array_add_value(p, parent, v);
745 ret = -1; // should also never happen
752 static int readvaluestack_add(JSONParser *p, JSONValue *v) {
753 if(p->readvalue_nelm == p->readvalue_alloc) {
754 p->readvalue_alloc *= 2;
755 JSONValue **new_stack = realloc(p->readvalue_stack, sizeof(JSONValue*) * p->readvalue_alloc);
759 p->readvalue_stack = new_stack;
761 p->readvalue_stack[p->readvalue_nelm++] = v;
765 int json_read_value(JSONParser *p, JSONValue **value) {
767 if(!p->readvalue_stack) {
768 if(setup_read_value(p)) return -1;
771 while(p->readvalue_nelm > 0 || !p->read_value) {
772 //JSONValue *s = p->readvalue_stack[p->readvalue_nelm];
774 // value available without another read
775 JSONValue *v = init_value(p);
778 if(p->readvalue_nelm > 0) {
779 if(add_to_parent(p, p->readvalue_stack[p->readvalue_nelm-1], v)) {
783 // set this value as root
787 switch(p->reader_type) {
788 case JSON_READER_OBJECT_BEGIN: {
789 v->type = JSON_OBJECT;
790 if(readvaluestack_add(p, v)) {
795 case JSON_READER_OBJECT_END: return -1; // should not happen
796 case JSON_READER_ARRAY_BEGIN: {
797 v->type = JSON_ARRAY;
798 if(readvaluestack_add(p, v)) {
803 case JSON_READER_ARRAY_END: return -1; // should not happen
804 case JSON_READER_STRING: {
805 v->type = JSON_STRING;
807 v->value.string.string = p->value_str;
808 v->value.string.length = p->value_str_len;
813 case JSON_READER_INTEGER: {
814 v->type = JSON_INTEGER;
815 v->value.integer.value = json_reader_int(p);
818 case JSON_READER_NUMBER: {
819 v->type = JSON_NUMBER;
820 v->value.number.value = json_reader_double(p);
823 case JSON_READER_LITERAL: {
824 v->type = JSON_LITERAL;
825 v->value.literal.literal = json_reader_literal(p);
829 } else if(p->readvalue_initialized) {
830 JSONReaderType rt = p->reader_type;
831 if(rt == JSON_READER_OBJECT_END || rt == JSON_READER_ARRAY_END) {
834 // else: p->value_ready is 1, this will be handled in the next run
837 if(p->readvalue_nelm > 0 || !p->read_value) {
838 int r = json_read(p);
840 p->readvalue_initialized = 0;
843 p->readvalue_initialized = 1;
847 *value = p->read_value;
848 p->readvalue_initialized = 0;
849 p->read_value = NULL;
854 JSONValue* json_obj_get(JSONObject *obj, const char *name) {
855 for(size_t i=0;i<obj->size;i++) {
856 if(!strcmp(obj->values[i].name, name)) {
857 return obj->values[i].value;
863 JSONValue* json_array_get(JSONArray *array, size_t i) {
864 if(i >= array->size) return NULL;
865 return array->array[i];
868 void json_value_free(JSONValue *value) {
869 switch(value->type) {
871 JSONObject obj = value->value.object;
872 for(size_t i=0;i<obj.size;i++) {
873 json_value_free(obj.values[i].value);
874 free(obj.values[i].name);
880 JSONArray array = value->value.array;
881 for(size_t i=0;i<array.size;i++) {
882 json_value_free(array.array[i]);
888 free(value->value.string.string);
898 int json_strcmp(JSONValue *jsstr, const char *str) {
899 return json_strncmp(jsstr, str, strlen(str));
902 int json_strncmp(JSONValue *jsstr, const char *str, size_t slen) {
903 if(jsstr->type != JSON_STRING) {
906 size_t jsstrlen = jsstr->value.string.length;
908 if(jsstrlen != slen) {
909 return jsstrlen > slen ? 1 : -1;
912 return memcmp(jsstr->value.string.string, str, slen);