/* * Copyright 2022 Olaf Wintermann * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include "json.h" /* * RFC 8259 * https://tools.ietf.org/html/rfc8259 */ #define PARSER_STATES_ALLOC 32 JSONParser* json_parser_new(void) { JSONParser *parser = calloc(1, sizeof(JSONParser)); if(!parser) { return NULL; } parser->states_alloc = PARSER_STATES_ALLOC; parser->states = calloc(PARSER_STATES_ALLOC, sizeof(int)); if(!parser->states) { free(parser); return NULL; } parser->reader_array_alloc = 8; return parser; } void json_parser_free(JSONParser *p) { if(p->states) free(p->states); if(p->readvalue_stack) free(p->readvalue_stack); free(p); } void json_parser_fill(JSONParser *p, const char *buf, size_t size) { p->buffer = buf; p->size = size; p->pos = 0; } static JSONToken nulltoken = { JSON_NO_TOKEN, NULL, 0, 0 }; int token_append(JSONToken *token, const char *buf, size_t len) { if(len == 0) { return 0; } size_t newlen = token->length + len; if(token->alloc < newlen) { char *newbuf = realloc( token->alloc == 0 ? NULL : (char*)token->content, newlen); if(!newbuf) { return 1; } token->content = newbuf; token->alloc = newlen; } memcpy((char*)token->content+token->length, buf, len); token->length = newlen; return 0; } JSONToken get_content(JSONParser *p, size_t start, size_t end) { JSONToken token = nulltoken; size_t part2 = end - start; if(p->uncompleted.tokentype == JSON_NO_TOKEN) { token.content = p->buffer + start; token.length = part2; } else if(part2 == 0) { token = p->uncompleted; } else { if(token_append(&p->uncompleted, p->buffer+start, end - start)) { return nulltoken; } token = p->uncompleted; } p->uncompleted = nulltoken; return token; } int token_isliteral(const char *content, size_t length) { if(length == 4) { if(!memcmp(content, "true", 4)) { return 1; } else if(!memcmp(content, "null", 4)) { return 1; } } else if(length == 5 && !memcmp(content, "false", 5)) { return 1; } return 0; } static int num_isexp(const char *content, size_t length, size_t pos) { if(pos >= length) { return 0; } int ok = 0; for(size_t i=pos;ipos = end; return token; } static JSONTokenType char2ttype(char c) { switch(c) { case '[': { return JSON_TOKEN_BEGIN_ARRAY; } case '{': { return JSON_TOKEN_BEGIN_OBJECT; } case ']': { return JSON_TOKEN_END_ARRAY; } case '}': { return JSON_TOKEN_END_OBJECT; } case ':': { return JSON_TOKEN_NAME_SEPARATOR; } case ',': { return JSON_TOKEN_VALUE_SEPARATOR; } case '"': { return JSON_TOKEN_STRING; } default: { if(isspace(c)) { return JSON_TOKEN_SPACE; } } } return JSON_NO_TOKEN; } JSONToken json_parser_next_token(JSONParser *p) { // current token type and start index JSONTokenType ttype = p->uncompleted.tokentype; size_t token_start = p->pos; for(size_t i=p->pos;isize;i++) { char c = p->buffer[i]; if(ttype != JSON_TOKEN_STRING) { // currently non-string token JSONTokenType ctype = char2ttype(c); // start of new token? if(ttype == JSON_NO_TOKEN) { if(ctype == JSON_TOKEN_SPACE) { continue; } else if(ctype == JSON_TOKEN_STRING) { // begin string ttype = JSON_TOKEN_STRING; token_start = i; } else if(ctype != JSON_NO_TOKEN) { // single-char token p->pos = i + 1; JSONToken token = { ctype, NULL, 0, 0}; return token; } else { ttype = JSON_TOKEN_LITERAL; // number or literal token_start = i; } } else { // finish token if(ctype != JSON_NO_TOKEN) { return get_token(p, token_start, i); } } } else { // currently inside a string if(!p->tokenizer_escape) { if(c == '"') { JSONToken ret = get_content(p, token_start, i+1); ret.tokentype = JSON_TOKEN_STRING; p->pos = i+1; return ret; } else if(c == '\\') { p->tokenizer_escape = 1; } } else { p->tokenizer_escape = 0; } } } if(ttype != JSON_NO_TOKEN) { // uncompleted token size_t uncompeted_len = p->size - token_start; if(p->uncompleted.tokentype == JSON_NO_TOKEN) { // current token is uncompleted // save current token content in p->uncompleted JSONToken uncompleted; uncompleted.tokentype = ttype; uncompleted.length = uncompeted_len; uncompleted.alloc = uncompeted_len + 16; char *tmp = malloc(uncompleted.alloc); if(tmp) { memcpy(tmp, p->buffer+token_start, uncompeted_len); uncompleted.content = tmp; p->uncompleted = uncompleted; } else { p->error = 1; } } else { // previously we also had an uncompleted token // combine the uncompleted token with the current token if(token_append(&p->uncompleted, p->buffer+token_start, uncompeted_len)) { p->error = 1; } } } JSONToken ret = { JSON_NO_TOKEN, NULL, 0, 0}; return ret; } static int create_string(JSONToken token, JSONValue **value) { JSONValue *v = malloc(sizeof(JSONValue)); if(!v) { *value = NULL; return -1; } v->type = JSON_STRING; char *str = malloc(token.length+1); if(!str) { free(v); *value = NULL; return -1; } memcpy(str, token.content, token.length); str[token.length] = 0; v->type = JSON_STRING; v->value.string.string = str; v->value.string.length = token.length; *value = v; return 0; } typedef struct json_ustr { char *ptr; size_t length; } json_ustr; static json_ustr unescape_string(const char *str, size_t len) { char *newstr = malloc(len+1); if(!newstr) { json_ustr r; r.ptr = NULL; r.length = 0; return r; } int j = 0; int u = 0; for(int i=1;i 30) { return 1; } memcpy(buf, str, len); buf[len] = 0; long long v = strtoll(buf, &endptr, 10); if(endptr != &buf[len]) { return 1; } *value = (int64_t)v; return 0; } static int parse_number(const char *str, size_t len, double *value) { char *endptr = NULL; char buf[32]; if(len > 30) { return 1; } memcpy(buf, str, len); buf[len] = 0; double v = strtod(buf, &endptr); if(endptr != &buf[len]) { return 1; } *value = v; return 0; } static int add_state(JSONParser *p, int state) { if(p->nstates >= p->states_alloc) { p->states_alloc += PARSER_STATES_ALLOC; p->states = realloc(p->states, p->states_alloc * sizeof(int)); if(!p->states) { return 1; } } p->states[++p->nstates] = state; return 0; } static void end_elm(JSONParser *p, JSONReaderType type) { p->reader_type = type; p->nstates--; } #define JP_STATE_VALUE_BEGIN 0 #define JP_STATE_VALUE_BEGIN_OBJ 1 #define JP_STATE_VALUE_BEGIN_AR 2 #define JP_STATE_ARRAY_SEP_OR_CLOSE 3 #define JP_STATE_OBJ_NAME_OR_CLOSE 4 #define JP_STATE_OBJ_NAME 5 #define JP_STATE_OBJ_COLON 6 #define JP_STATE_OBJ_SEP_OR_CLOSE 7 static int next_state_after_value(int current) { switch(current) { default: return -1; // after value JSON complete, expect nothing case JP_STATE_VALUE_BEGIN: return -1; // after obj value, expect ',' or '}' case JP_STATE_VALUE_BEGIN_OBJ: return JP_STATE_OBJ_SEP_OR_CLOSE; // after array value, expect ',' or ']' case JP_STATE_VALUE_BEGIN_AR: return JP_STATE_ARRAY_SEP_OR_CLOSE; } } static void clear_valuename(JSONParser *p) { if(p->value_name) free(p->value_name); p->value_name = NULL; p->value_name_len = 0; } static void clear_values(JSONParser *p) { if(p->value_str) free(p->value_str); p->value_str = NULL; p->value_str_len = 0; p->value_int = 0; p->value_double = 0; } int json_read(JSONParser *p) { int state = p->states[p->nstates]; clear_values(p); JSONToken token = json_parser_next_token(p); p->reader_token = token; p->value_ready = 0; if(token.tokentype == JSON_NO_TOKEN) { return 0; } int ret = 1; // 0 JP_STATE_VALUE_BEGIN value begin // 1 JP_STATE_VALUE_BEGIN_OBJ value begin (inside object) // 2 JP_STATE_VALUE_BEGIN_AR value begin (inside array) // 3 JP_STATE_ARRAY_SEP_OR_CLOSE array, expect separator or arrayclose // 4 JP_STATE_OBJ_NAME_OR_CLOSE object, expect name or objclose // 5 JP_STATE_OBJ_NAME object, expect name // 6 JP_STATE_OBJ_COLON object, expect ':' // 7 JP_STATE_OBJ_SEP_OR_CLOSE object, expect separator, objclose if(state == JP_STATE_VALUE_BEGIN_AR || state == JP_STATE_OBJ_SEP_OR_CLOSE) { clear_valuename(p); } if(state < 3) { // expect value p->states[p->nstates] = next_state_after_value(state); p->value_ready = 1; switch(token.tokentype) { case JSON_TOKEN_BEGIN_ARRAY: { p->reader_type = JSON_READER_ARRAY_BEGIN; if(add_state(p, JP_STATE_VALUE_BEGIN_AR)) return -1; return 1; //return json_read(p); } case JSON_TOKEN_BEGIN_OBJECT: { p->reader_type = JSON_READER_OBJECT_BEGIN; if(add_state(p, JP_STATE_OBJ_NAME_OR_CLOSE)) return -1; return 1; //return json_read(p); } case JSON_TOKEN_END_ARRAY: { p->value_ready = 0; end_elm(p, JSON_READER_ARRAY_END); break; } case JSON_TOKEN_END_OBJECT: { p->value_ready = 0; end_elm(p, JSON_READER_OBJECT_END); break; } case JSON_TOKEN_STRING: { p->reader_type = JSON_READER_STRING; json_ustr str = unescape_string(token.content, token.length); if(str.ptr) { p->value_str = str.ptr; p->value_str_len = str.length; } else { return -1; } break; } case JSON_TOKEN_INTEGER: { p->reader_type = JSON_READER_INTEGER; int64_t value; if(parse_integer(token.content, token.length, &value)) { return -1; } p->value_int = value; p->value_double = (double)value; break; } case JSON_TOKEN_NUMBER: { p->reader_type = JSON_READER_NUMBER; double value; if(parse_number(token.content, token.length, &value)) { return -1; } p->value_double = value; p->value_int = (int64_t)value; break; } case JSON_TOKEN_LITERAL: { p->reader_type = JSON_READER_LITERAL; break; } default: return -1; } } else if(state == JP_STATE_ARRAY_SEP_OR_CLOSE) { // expect ',' or ']' if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) { p->states[p->nstates] = JP_STATE_VALUE_BEGIN_AR; return json_read(p); } else if(token.tokentype == JSON_TOKEN_END_ARRAY) { end_elm(p, JSON_READER_ARRAY_END); } else { return -1; } } else if(state == JP_STATE_OBJ_NAME_OR_CLOSE || state == JP_STATE_OBJ_NAME) { if(state == JP_STATE_OBJ_NAME_OR_CLOSE && token.tokentype == JSON_TOKEN_END_OBJECT) { clear_valuename(p); end_elm(p, JSON_READER_OBJECT_END); } else { // expect string if(token.tokentype != JSON_TOKEN_STRING) return -1; if(p->value_name) free(p->value_name); json_ustr valname = unescape_string(token.content, token.length); p->value_name = valname.ptr; p->value_name_len = valname.length; // next state p->states[p->nstates] = JP_STATE_OBJ_COLON; return json_read(p); } } else if(state == JP_STATE_OBJ_COLON) { // expect ':' if(token.tokentype != JSON_TOKEN_NAME_SEPARATOR) return -1; // next state p->states[p->nstates] = 1; return json_read(p); } else if(state == 7) { // expect ',' or '}]' if(token.tokentype == JSON_TOKEN_VALUE_SEPARATOR) { p->states[p->nstates] = JP_STATE_OBJ_NAME; return json_read(p); } else if(token.tokentype == JSON_TOKEN_END_OBJECT) { end_elm(p, JSON_READER_OBJECT_END); } else { return -1; } } return ret; } JSONReaderType json_reader_type(JSONParser *p) { return p->reader_type; } const char* json_reader_name(JSONParser *p, size_t *opt_len) { if(opt_len) *opt_len = p->value_name_len; return p->value_name; } const char* json_reader_string(JSONParser *p, size_t *opt_len) { if(opt_len) *opt_len = p->value_str_len; if(p->reader_token.tokentype != JSON_TOKEN_STRING) { return NULL; } return p->value_str; } int64_t json_reader_int(JSONParser *p) { return p->value_int; } double json_reader_double(JSONParser *p) { return p->value_double; } int json_reader_isnull(JSONParser *p) { if(p->reader_token.tokentype == JSON_TOKEN_LITERAL && p->reader_token.length == 4) { return !memcmp(p->reader_token.content, "null", 4); } return 0; } JSONLiteralType json_reader_literal(JSONParser *p) { const char *l = p->reader_token.content; size_t token_len = p->reader_token.length; if(token_len == 4 && !memcmp(l, "true", 4)) { return JSON_TRUE; } else if(token_len == 5 && !memcmp(l, "false", 5)) { return JSON_FALSE; } return JSON_NULL; } int json_reader_bool(JSONParser *p) { JSONLiteralType lt = json_reader_literal(p); return lt == JSON_TRUE ? 1 : 0; } /* -------------------- read value functions -------------------- */ static JSONValue* init_value(JSONParser *p) { JSONValue *value = malloc(sizeof(JSONValue)); if(!value) { return NULL; } memset(value, 0, sizeof(JSONValue)); return value; } static int setup_read_value(JSONParser *p) { p->readvalue_alloc = PARSER_STATES_ALLOC; p->readvalue_nelm = 0; p->readvalue_stack = calloc(PARSER_STATES_ALLOC, sizeof(JSONValue*)); if(!p->readvalue_stack) return -1; p->read_value = NULL; p->readvalue_stack[0] = NULL; return 0; } static int obj_init_values(JSONParser *p, JSONValue *v) { v->value.object.values = calloc(sizeof(JSONObjValue), p->reader_array_alloc); if(!v->value.object.values) { return -1; } v->value.object.alloc = p->reader_array_alloc; v->value.object.size = 0; return 0; } static int obj_add_value(JSONParser *p, JSONValue *parent, JSONObjValue v) { if(!parent->value.object.values) { if(obj_init_values(p, parent)) { return -1; } } if(parent->value.object.size == parent->value.object.alloc) { parent->value.object.alloc *= 2; parent->value.object.values = realloc(parent->value.object.values, sizeof(JSONObjValue) * parent->value.object.alloc); if(!parent->value.object.values) { return -1; } } parent->value.object.values[parent->value.object.size++] = v; return 0; } static int array_init(JSONParser *p, JSONValue *v) { v->value.array.array = calloc(sizeof(JSONValue*), p->reader_array_alloc); if(!v->value.array.array) { return -1; } v->value.array.alloc = p->reader_array_alloc; v->value.array.size = 0; return 0; } static int array_add_value(JSONParser *p, JSONValue *parent, JSONValue *v) { if(!parent->value.array.array) { if(array_init(p, parent)) { return -1; } } if(parent->value.array.size == parent->value.array.alloc) { parent->value.array.alloc *= 2; parent->value.array.array = realloc(parent->value.array.array, sizeof(JSONValue*) * parent->value.array.alloc); if(!parent->value.array.array) { return -1; } } parent->value.array.array[parent->value.array.size++] = v; return 0; } static int add_to_parent(JSONParser *p, JSONValue *parent, JSONValue *v) { if(!parent) { return -1; // shouldn't happen but who knows } int ret = 0; if(parent->type == JSON_OBJECT) { if(!p->value_name || p->value_name_len == 0) { return -1; } char *valuename = p->value_name; p->value_name = NULL; JSONObjValue newvalue; newvalue.name = valuename; newvalue.value = v; ret = obj_add_value(p, parent, newvalue); } else if(parent->type == JSON_ARRAY) { ret = array_add_value(p, parent, v); } else { ret = -1; // should also never happen } return ret; } static int readvaluestack_add(JSONParser *p, JSONValue *v) { if(p->readvalue_nelm == p->readvalue_alloc) { p->readvalue_alloc *= 2; JSONValue **new_stack = realloc(p->readvalue_stack, sizeof(JSONValue*) * p->readvalue_alloc); if(!new_stack) { return -1; } p->readvalue_stack = new_stack; } p->readvalue_stack[p->readvalue_nelm++] = v; return 0; } int json_read_value(JSONParser *p, JSONValue **value) { *value = NULL; if(!p->readvalue_stack) { if(setup_read_value(p)) return -1; } while(p->readvalue_nelm > 0 || !p->read_value) { //JSONValue *s = p->readvalue_stack[p->readvalue_nelm]; if(p->value_ready) { // value available without another read JSONValue *v = init_value(p); if(!v) return -1; if(p->readvalue_nelm > 0) { if(add_to_parent(p, p->readvalue_stack[p->readvalue_nelm-1], v)) { return -1; } } else { // set this value as root p->read_value = v; } switch(p->reader_type) { case JSON_READER_OBJECT_BEGIN: { v->type = JSON_OBJECT; if(readvaluestack_add(p, v)) { return -1; } break; } case JSON_READER_OBJECT_END: return -1; // should not happen case JSON_READER_ARRAY_BEGIN: { v->type = JSON_ARRAY; if(readvaluestack_add(p, v)) { return -1; } break; } case JSON_READER_ARRAY_END: return -1; // should not happen case JSON_READER_STRING: { v->type = JSON_STRING; if(p->value_str) { v->value.string.string = p->value_str; v->value.string.length = p->value_str_len; p->value_str = NULL; } break; } case JSON_READER_INTEGER: { v->type = JSON_INTEGER; v->value.integer.value = json_reader_int(p); break; } case JSON_READER_NUMBER: { v->type = JSON_NUMBER; v->value.number.value = json_reader_double(p); break; } case JSON_READER_LITERAL: { v->type = JSON_LITERAL; v->value.literal.literal = json_reader_literal(p); break; } } } else if(p->readvalue_initialized) { JSONReaderType rt = p->reader_type; if(rt == JSON_READER_OBJECT_END || rt == JSON_READER_ARRAY_END) { p->readvalue_nelm--; } // else: p->value_ready is 1, this will be handled in the next run } if(p->readvalue_nelm > 0 || !p->read_value) { int r = json_read(p); if(r != 1) { p->readvalue_initialized = 0; return r; } p->readvalue_initialized = 1; } } *value = p->read_value; p->readvalue_initialized = 0; p->read_value = NULL; return 1; } JSONValue* json_obj_get(JSONObject *obj, const char *name) { for(size_t i=0;isize;i++) { if(!strcmp(obj->values[i].name, name)) { return obj->values[i].value; } } return NULL; } JSONValue* json_array_get(JSONArray *array, size_t i) { if(i >= array->size) return NULL; return array->array[i]; } void json_value_free(JSONValue *value) { switch(value->type) { case JSON_OBJECT: { JSONObject obj = value->value.object; for(size_t i=0;ivalue.array; for(size_t i=0;ivalue.string.string); break; } default: { break; } } free(value); } int json_strcmp(JSONValue *jsstr, const char *str) { return json_strncmp(jsstr, str, strlen(str)); } int json_strncmp(JSONValue *jsstr, const char *str, size_t slen) { if(jsstr->type != JSON_STRING) { return -1; } size_t jsstrlen = jsstr->value.string.length; if(jsstrlen != slen) { return jsstrlen > slen ? 1 : -1; } return memcmp(jsstr->value.string.string, str, slen); }