src/cx/json.h

Sat, 21 Dec 2024 21:03:28 +0100

author
Mike Becker <universe@uap-core.de>
date
Sat, 21 Dec 2024 21:03:28 +0100
changeset 1040
1ecf4dbbc60c
parent 1037
83620ba72cc1
child 1042
c17f11830a2d
permissions
-rw-r--r--

add some more overflow treatment and make sure to set errno properly

resolves #469

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * \file json.h
 * \brief Interface for parsing data from JSON files.
 * \author Mike Becker
 * \author Olaf Wintermann
 * \copyright 2-Clause BSD License
 */

#ifndef UCX_JSON_H
#define UCX_JSON_H

#include "common.h"
#include "allocator.h"
#include "string.h"
#include "buffer.h"
#include "array_list.h"

#ifdef __cplusplus
extern "C" {
#endif


/**
 * The type of the parsed token.
 */
enum cx_json_token_type {
    /**
     * No complete token parsed, yet.
     */
    CX_JSON_NO_TOKEN,
    /**
     * The presumed token contains syntactical errors.
     */
    CX_JSON_TOKEN_ERROR,
    /**
     * A "begin of array" '[' token.
     */
    CX_JSON_TOKEN_BEGIN_ARRAY,
    /**
     * A "begin of object" '{' token.
     */
    CX_JSON_TOKEN_BEGIN_OBJECT,
    /**
     * An "end of array" ']' token.
     */
    CX_JSON_TOKEN_END_ARRAY,
    /**
     * An "end of object" '}' token.
     */
    CX_JSON_TOKEN_END_OBJECT,
    /**
     * A colon ':' token separating names and values.
     */
    CX_JSON_TOKEN_NAME_SEPARATOR,
    /**
     * A comma ',' token separating object entries or array elements.
     */
    CX_JSON_TOKEN_VALUE_SEPARATOR,
    /**
     * A string token.
     */
    CX_JSON_TOKEN_STRING,
    /**
     * A number token that can be represented as integer.
     */
    CX_JSON_TOKEN_INTEGER,
    /**
     * A number token that cannot be represented as integer.
     */
    CX_JSON_TOKEN_NUMBER,
    /**
     * A literal token.
     */
    CX_JSON_TOKEN_LITERAL,
    /**
     * A white-space token.
     */
    CX_JSON_TOKEN_SPACE
};

/**
 * The type of some JSON value.
 */
enum cx_json_value_type {
    /**
     * Reserved.
     */
    CX_JSON_NOTHING, // this allows us to always return non-NULL values
    /**
     * A JSON object.
     */
    CX_JSON_OBJECT,
    /**
     * A JSON array.
     */
    CX_JSON_ARRAY,
    /**
     * A string.
     */
    CX_JSON_STRING,
    /**
     * A number that contains an integer.
     */
    CX_JSON_INTEGER,
    /**
     * A number, not necessarily an integer.
     */
    CX_JSON_NUMBER,
    /**
     * A literal (true, false, null).
     */
    CX_JSON_LITERAL
};

/**
 * JSON literal types.
 */
enum cx_json_literal {
    /**
     * The \c null literal.
     */
    CX_JSON_NULL,
    /**
     * The \c true literal.
     */
    CX_JSON_TRUE,
    /**
     * The \c false literal.
     */
    CX_JSON_FALSE
};

/**
 * Type alias for the token type enum.
 */
typedef enum cx_json_token_type CxJsonTokenType;
/**
 * Type alias for the value type enum.
 */
typedef enum cx_json_value_type CxJsonValueType;

/**
 * Type alias for the JSON parser interface.
 */
typedef struct cx_json_s CxJson;

/**
 * Type alias for the token struct.
 */
typedef struct cx_json_token_s CxJsonToken;

/**
 * Type alias for the JSON value struct.
 */
typedef struct cx_json_value_s CxJsonValue;

/**
 * Type alias for the JSON array struct.
 */
typedef struct cx_json_array_s CxJsonArray;
/**
 * Type alias for the JSON object struct.
 */
typedef struct cx_json_object_s CxJsonObject;
/**
 * Type alias for a JSON string.
 */
typedef struct cx_mutstr_s CxJsonString;
/**
 * Type alias for a number that can be represented as 64-bit signed integer.
 */
typedef int64_t CxJsonInteger;
/**
 * Type alias for number that is not an integer.
 */
typedef double CxJsonNumber;
/**
 * Type alias for a JSON literal.
 */
typedef enum cx_json_literal CxJsonLiteral;

/**
 * Type alias for a key/value pair in a JSON object.
 */
typedef struct cx_json_obj_value_s CxJsonObjValue;

/**
 * JSON array structure.
 */
struct cx_json_array_s {
    /**
     * The array data.
     */
    CX_ARRAY_DECLARE(CxJsonValue*, array);
};

/**
 * JSON object structure.
 */
struct cx_json_object_s {
    /**
     * The key/value entries.
     */
    CX_ARRAY_DECLARE(CxJsonObjValue, values);
};

/**
 * Structure for a key/value entry in a JSON object.
 */
struct cx_json_obj_value_s {
    /**
     * The key (or name in JSON terminology) of the value.
     */
    cxmutstr name;
    /**
     * The value.
     */
    CxJsonValue *value;
};

/**
 * Structure for a JSON value.
 */
struct cx_json_value_s {
    /**
     * The allocator with which the value was allocated.
     *
     * Required for recursively deallocating memory of objects and arrays.
     */
    const CxAllocator *allocator;
    /**
     * The type of this value.
     *
     * Specifies how the \c value union shall be resolved.
     */
    CxJsonValueType type;
    /**
     * The value data.
     */
    union {
        /**
         * The array data if type is #CX_JSON_ARRAY.
         */
        CxJsonArray array;
        /**
         * The object data if type is #CX_JSON_OBJECT.
         */
        CxJsonObject object;
        /**
         * The string data if type is #CX_JSON_STRING.
         */
        CxJsonString string;
        /**
         * The integer if type is #CX_JSON_INTEGER.
         */
        CxJsonInteger integer;
        /**
         * The number if type is #CX_JSON_NUMBER.
         */
        CxJsonNumber number;
        /**
         * The literal type if type is #CX_JSON_LITERAL.
         */
        CxJsonLiteral literal;
    } value;
};

/**
 * Structure for a parsed token.
 */
struct cx_json_token_s {
    /**
     * The token type.
     */
    CxJsonTokenType tokentype;
    /**
     * True, iff the \c content must be passed to cx_strfree().
     */
    bool allocated;
    /**
     * The token text, if any.
     *
     * This is not necessarily set when the token type already
     * uniquely identifies the content.
     */
    cxmutstr content;
};

/**
 * The JSON parser interface.
 */
struct cx_json_s {
    /**
     * The allocator used for produced JSON values.
     */
    const CxAllocator *allocator;
    /**
     * The input buffer.
     */
    CxBuffer buffer;

    /**
     * Used internally.
     *
     * Remembers the prefix of the last uncompleted token.
     */
    CxJsonToken uncompleted;

    /**
     * A pointer to an intermediate state of the currently parsed value.
     *
     * Never access this value manually.
     */
    CxJsonValue *parsed;

    /**
     * State stack.
     */
    CX_ARRAY_DECLARE_SIZED(int, states, unsigned);

    /**
     * Value buffer stack.
     */
    CX_ARRAY_DECLARE_SIZED(CxJsonValue*, vbuf, unsigned);

    /**
     * Internally reserved memory for the state stack.
     */
    int states_internal[8];

    /**
     * Internally reserved memory for the value buffer stack.
     */
    CxJsonValue* vbuf_internal[8];

    /**
     * Used internally.
     */
    bool tokenizer_escape; // TODO: check if it can be replaced with look-behind
};

/**
 * Status codes for the json interface.
 */
enum cx_json_status {
    /**
     * Everything is fine.
     */
    CX_JSON_NO_ERROR,
    /**
     * The input buffer does not contain more data.
     */
    CX_JSON_NO_DATA,
    /**
     * The input ends unexpectedly.
     *
     * Refill the buffer with cxJsonFill() to complete the json data.
     */
    CX_JSON_INCOMPLETE_DATA,
    /**
     * Not used as a status and never returned by any function.
     *
     * You can use this enumerator to check for all "good" status results
     * by checking if the status is less than \c CX_JSON_OK.
     *
     * A "good" status means, that you can refill data and continue parsing.
     */
    CX_JSON_OK,
    /**
     * Allocating memory for the internal buffer failed.
     */
    CX_JSON_BUFFER_ALLOC_FAILED,
    /**
     * Allocating memory for a json value failed.
     */
    CX_JSON_VALUE_ALLOC_FAILED,
    /**
     * A number value is incorrectly formatted.
     */
    CX_JSON_FORMAT_ERROR_NUMBER,
    /**
     * The tokenizer found something unexpected.
     */
    CX_JSON_FORMAT_ERROR_UNEXPECTED_TOKEN
};

/**
 * Typedef for the json status enum.
 */
typedef enum cx_json_status CxJsonStatus;

/**
 * Initializes the json interface.
 *
 * @param json the json interface
 * @param allocator the allocator that shall be used for the produced values
 * @see cxJsonDestroy()
 */
cx_attr_nonnull_arg(1)
void cxJsonInit(CxJson *json, const CxAllocator *allocator);

/**
 * Destroys the json interface.
 *
 * @param json the json interface
 * @see cxJsonInit()
 */
cx_attr_nonnull
void cxJsonDestroy(CxJson *json);

/**
 * Destroys and re-initializes the json interface.
 *
 * You might want to use this, to reset the parser after
 * encountering a syntax error.
 *
 * @param json the json interface
 */
cx_attr_nonnull
static inline void cxJsonReset(CxJson *json) {
    const CxAllocator *allocator = json->allocator;
    cxJsonDestroy(json);
    cxJsonInit(json, allocator);
}

/**
 * Adds more data to the input buffer.
 *
 * The data will be copied.
 *
 * @param json the json interface
 * @param buf the source buffer
 * @param len the length of the source buffer
 * @return zero on success, non-zero on internal allocation error
 * @see cxJsonFill()
 */
cx_attr_nonnull
cx_attr_access_r(2, 3)
int cxJsonFilln(CxJson *json, const char *buf, size_t len);

#ifdef __cplusplus
} // extern "C"

cx_attr_nonnull
static inline int cxJsonFill(
        CxJson *json,
        cxstring str
) {
    return cxJsonFilln(json, str.ptr, str.length);
}

cx_attr_nonnull
static inline int cxJsonFill(
        CxJson *json,
        cxmutstr str
) {
    return cxJsonFilln(json, str.ptr, str.length);
}

cx_attr_nonnull
cx_attr_cstr_arg(2)
static inline int cxJsonFill(
        CxJson *json,
        const char *str
) {
    return cxJsonFilln(json, str, strlen(str));
}

extern "C" {
#else // __cplusplus
/**
 * Adds more data to the input buffer.
 *
 * The data will be copied.
 *
 * @param json the json interface
 * @param str the string to add to the buffer
 * @return zero on success, non-zero on internal allocation error
 * @see cxJsonFilln()
 */
#define cxJsonFill(json, str) _Generic((str), \
    cxstring: cx_json_fill_cxstr,             \
    cxmutstr: cx_json_fill_mutstr,            \
    char*: cx_json_fill_str,                  \
    const char*: cx_json_fill_str)            \
    (json, str)

/**
 * @copydoc cxJsonFill()
 */
cx_attr_nonnull
static inline int cx_json_fill_cxstr(
        CxJson *json,
        cxstring str
) {
    return cxJsonFilln(json, str.ptr, str.length);
}

/**
 * @copydoc cxJsonFill()
 */
cx_attr_nonnull
static inline int cx_json_fill_mutstr(
        CxJson *json,
        cxmutstr str
) {
    return cxJsonFilln(json, str.ptr, str.length);
}

/**
 * @copydoc cxJsonFill()
 */
cx_attr_nonnull
cx_attr_cstr_arg(2)
static inline int cx_json_fill_str(
        CxJson *json,
        const char *str
) {
    return cxJsonFilln(json, str, strlen(str));
}
#endif

/**
 * Recursively deallocates the memory of a JSON value.
 *
 * \remark The type of each deallocated value will be changed
 * to #CX_JSON_NOTHING and values of such type will be skipped
 * by the de-allocation. That means, this function protects
 * you from double-frees when you are accidentally freeing
 * a nested value and then the parent value (or vice versa).
 *
 * @param value the value
 */
void cxJsonValueFree(CxJsonValue *value);

/**
 * Tries to obtain the next JSON value.
 *
 *
 * @param json the json interface
 * @param value a pointer where the next value shall be stored
 * @return a status code
 */
cx_attr_nonnull
cx_attr_access_w(2)
CxJsonStatus cxJsonNext(CxJson *json, CxJsonValue **value);

/**
 * Checks if the specified value is a JSON object.
 *
 * @param value a pointer to the value
 * @return true if the value is a JSON object, false otherwise
 */
cx_attr_nonnull
static inline bool cxJsonIsObject(const CxJsonValue *value) {
    return value->type == CX_JSON_OBJECT;
}

/**
 * Checks if the specified value is a JSON array.
 *
 * @param value a pointer to the value
 * @return true if the value is a JSON array, false otherwise
 */
cx_attr_nonnull
static inline bool cxJsonIsArray(const CxJsonValue *value) {
    return value->type == CX_JSON_ARRAY;
}

/**
 * Checks if the specified value is a string.
 *
 * @param value a pointer to the value
 * @return true if the value is a string, false otherwise
 */
cx_attr_nonnull
static inline bool cxJsonIsString(const CxJsonValue *value) {
    return value->type == CX_JSON_STRING;
}

/**
 * Checks if the specified value is a JSON number.
 *
 * This function will return true for both floating point and
 * integer numbers.
 *
 * @param value a pointer to the value
 * @return true if the value is a JSON number, false otherwise
 * @see cxJsonIsInteger()
 */
cx_attr_nonnull
static inline bool cxJsonIsNumber(const CxJsonValue *value) {
    return value->type == CX_JSON_NUMBER || value->type == CX_JSON_INTEGER;
}

/**
 * Checks if the specified value is an integer number.
 *
 * @param value a pointer to the value
 * @return true if the value is an integer number, false otherwise
 * @see cxJsonIsNumber()
 */
cx_attr_nonnull
static inline bool cxJsonIsInteger(const CxJsonValue *value) {
    return value->type == CX_JSON_INTEGER;
}

/**
 * Checks if the specified value is a JSON literal.
 *
 * JSON literals are \c true, \c false, and \c null.
 *
 * @param value a pointer to the value
 * @return true if the value is a JSON literal, false otherwise
 * @see cxJsonIsTrue()
 * @see cxJsonIsFalse()
 * @see cxJsonIsNull()
 */
cx_attr_nonnull
static inline bool cxJsonIsLiteral(const CxJsonValue *value) {
    return value->type == CX_JSON_LITERAL;
}

/**
 * Checks if the specified value is a Boolean literal.
 *
 * @param value a pointer to the value
 * @return true if the value is either \c true or \c false, false otherwise
 * @see cxJsonIsTrue()
 * @see cxJsonIsFalse()
 */
cx_attr_nonnull
static inline bool cxJsonIsBool(const CxJsonValue *value) {
    return cxJsonIsLiteral(value) && value->value.literal != CX_JSON_NULL;
}

/**
 * Checks if the specified value is \c true.
 *
 * \remark Be advised, that this is not the same as
 * testing \c !cxJsonIsFalse(v).
 *
 * @param value a pointer to the value
 * @return true if the value is \c true, false otherwise
 * @see cxJsonIsBool()
 * @see cxJsonIsFalse()
 */
cx_attr_nonnull
static inline bool cxJsonIsTrue(const CxJsonValue *value) {
    return cxJsonIsLiteral(value) && value->value.literal == CX_JSON_TRUE;
}

/**
 * Checks if the specified value is \c false.
 *
 * \remark Be advised, that this is not the same as
 * testing \c !cxJsonIsTrue(v).
 *
 * @param value a pointer to the value
 * @return true if the value is \c false, false otherwise
 * @see cxJsonIsBool()
 * @see cxJsonIsTrue()
 */
cx_attr_nonnull
static inline bool cxJsonIsFalse(const CxJsonValue *value) {
    return cxJsonIsLiteral(value) && value->value.literal == CX_JSON_FALSE;
}

/**
 * Checks if the specified value is \c null.
 *
 * @param value a pointer to the value
 * @return true if the value is \c null, false otherwise
 * @see cxJsonIsLiteral()
 */
cx_attr_nonnull
static inline bool cxJsonIsNull(const CxJsonValue *value) {
    return cxJsonIsLiteral(value) && value->value.literal == CX_JSON_NULL;
}

/**
 * Obtains a C string from the given JSON value.
 *
 * If the \p value is not a string, the behavior is undefined.
 *
 * @param value the JSON value
 * @return the value represented as C string
 * @see cxJsonIsString()
 */
cx_attr_nonnull
cx_attr_returns_nonnull
static inline char *cxJsonAsString(const CxJsonValue *value) {
    return value->value.string.ptr;
}

/**
 * Obtains a UCX string from the given JSON value.
 *
 * If the \p value is not a string, the behavior is undefined.
 *
 * @param value the JSON value
 * @return the value represented as UCX string
 * @see cxJsonIsString()
 */
cx_attr_nonnull
static inline cxstring cxJsonAsCxString(const CxJsonValue *value) {
    return cx_strcast(value->value.string);
}

/**
 * Obtains a mutable UCX string from the given JSON value.
 *
 * If the \p value is not a string, the behavior is undefined.
 *
 * @param value the JSON value
 * @return the value represented as mutable UCX string
 * @see cxJsonIsString()
 */
cx_attr_nonnull
static inline cxmutstr cxJsonAsCxMutStr(const CxJsonValue *value) {
    return value->value.string;
}

/**
 * Obtains a double-precision floating point value from the given JSON value.
 *
 * If the \p value is not a JSON number, the behavior is undefined.
 *
 * @param value the JSON value
 * @return the value represented as double
 * @see cxJsonIsNumber()
 */
cx_attr_nonnull
static inline double cxJsonAsDouble(const CxJsonValue *value) {
    if (value->type == CX_JSON_INTEGER) {
        return (double) value->value.integer;
    } else {
        return value->value.number;
    }
}

/**
 * Obtains a 64-bit signed integer from the given JSON value.
 *
 * If the \p value is not a JSON number, the behavior is undefined.
 * If it is a JSON number, but not an integer, the value will be
 * converted to an integer, possibly losing precision.
 *
 * @param value the JSON value
 * @return the value represented as double
 * @see cxJsonIsNumber()
 * @see cxJsonIsInteger()
 */
cx_attr_nonnull
static inline int64_t cxJsonAsInteger(const CxJsonValue *value) {
    if (value->type == CX_JSON_INTEGER) {
        return value->value.integer;
    } else {
        return (int64_t) value->value.number;
    }
}

/**
 * Obtains a Boolean value from the given JSON value.
 *
 * If the \p value is not a JSON literal, the behavior is undefined.
 * The \c null literal is interpreted as \c false.
 *
 * @param value the JSON value
 * @return the value represented as double
 * @see cxJsonIsLiteral()
 */
cx_attr_nonnull
static inline bool cxJsonAsBool(const CxJsonValue *value) {
    return value->value.literal == CX_JSON_TRUE;
}

/**
 * Returns the size of a JSON array.
 *
 * If the \p value is not a JSON array, the behavior is undefined.
 *
 * @param value the JSON value
 * @return the size of the array
 * @see cxJsonIsArray()
 */
cx_attr_nonnull
static inline size_t cxJsonArrSize(const CxJsonValue *value) {
    return value->value.array.array_size;
}

/**
 * Returns an element from a JSON array.
 *
 * If the \p value is not a JSON array, the behavior is undefined.
 *
 * This function guarantees to return a value. If the index is
 * out of bounds, the returned value will be of type
 * #CX_JSON_NOTHING, but never \c NULL.
 *
 * @param value the JSON value
 * @param index the index in the array
 * @return the value at the specified index
 * @see cxJsonIsArray()
 */
cx_attr_nonnull
cx_attr_returns_nonnull
CxJsonValue *cxJsonArrGet(const CxJsonValue *value, size_t index);

/**
 * Returns an iterator over the JSON array elements.
 *
 * The iterator yields values of type \c CxJsonValue* .
 *
 * If the \p value is not a JSON array, the behavior is undefined.
 *
 * @param value the JSON value
 * @return an iterator over the array elements
 * @see cxJsonIsArray()
 */
cx_attr_nonnull
cx_attr_nodiscard
CxIterator cxJsonArrIter(const CxJsonValue *value);

/**
 * @copydoc cxJsonObjGet()
 */
cx_attr_nonnull
cx_attr_returns_nonnull
CxJsonValue *cx_json_obj_get_cxstr(const CxJsonValue *value, cxstring name);

#ifdef __cplusplus
} // extern "C"

CxJsonValue *cxJsonObjGet(const CxJsonValue *value, cxstring name) {
    return cx_json_obj_get_cxstr(value, name);
}

CxJsonValue *cxJsonObjGet(const CxJsonValue *value, cxmutstr name) {
    return cx_json_obj_get_cxstr(value, cx_strcast(name));
}

CxJsonValue *cxJsonObjGet(const CxJsonValue *value, const char *name) {
    return cx_json_obj_get_cxstr(value, cx_str(name));
}

extern "C" {
#else
/**
 * Returns a value corresponding to a key in a JSON object.
 *
 * If the \p value is not a JSON object, the behavior is undefined.
 *
 * This function guarantees to return a JSON value. If the
 * object does not contain \p name, the returned JSON value
 * will be of type #CX_JSON_NOTHING, but never \c NULL.
 *
 * @param value the JSON object
 * @param name the key to look up
 * @return the value corresponding to the key
 * @see cxJsonIsObject()
 */
#define cxJsonObjGet(value, name) _Generic((name), \
        cxstring: cx_json_obj_get_cxstr,           \
        cxmutstr: cx_json_obj_get_mutstr,          \
        char*: cx_json_obj_get_str,                \
        const char*: cx_json_obj_get_str)          \
        (value, name)

/**
 * @copydoc cxJsonObjGet()
 */
cx_attr_nonnull
cx_attr_returns_nonnull
static inline CxJsonValue *cx_json_obj_get_mutstr(const CxJsonValue *value, cxmutstr name) {
    return cx_json_obj_get_cxstr(value, cx_strcast(name));
}

/**
 * @copydoc cxJsonObjGet()
 */
cx_attr_nonnull
cx_attr_returns_nonnull
cx_attr_cstr_arg(2)
static inline CxJsonValue *cx_json_obj_get_str(const CxJsonValue *value, const char *name) {
    return cx_json_obj_get_cxstr(value, cx_str(name));
}
#endif

#ifdef __cplusplus
}
#endif

#endif /* UCX_JSON_H */

mercurial