src/cx/properties.h

Fri, 27 Dec 2024 13:01:31 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 27 Dec 2024 13:01:31 +0100
changeset 1060
0a7c1bb2372d
parent 1059
154eb64ce746
permissions
-rw-r--r--

avoid copying the filled data in the json parser when possible - fixes #530

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
/**
 * \file properties.h
 * \brief Interface for parsing data from properties files.
 * \author Mike Becker
 * \author Olaf Wintermann
 * \copyright 2-Clause BSD License
 */

#ifndef UCX_PROPERTIES
#define UCX_PROPERTIES

#include "common.h"
#include "string.h"
#include "map.h"
#include "buffer.h"

#include <stdio.h>
#include <string.h>

#ifdef __cplusplus
extern "C" {
#endif

/**
 * Configures the expected characters for the properties parser.
 */
struct cx_properties_config_s {
    /**
     * The key/value delimiter that shall be used.
     * This is '=' by default.
     */
    char delimiter;

    /**
     * The character, when appearing at the end of a line, continues that line.
     * This is '\' by default.
     */
    // char continuation; // TODO: line continuation in properties

    /**
     * The first comment character.
     * This is '#' by default.
     */
    char comment1;

    /**
     * The second comment character.
     * This is not set by default.
     */
    char comment2;

    /**
     * The third comment character.
     * This is not set by default.
     */
    char comment3;
};

/**
 * Typedef for the properties config.
 */
typedef struct cx_properties_config_s CxPropertiesConfig;

/**
 * Default properties configuration.
 */
extern const CxPropertiesConfig cx_properties_config_default;

/**
 * Status codes for the properties interface.
 */
enum cx_properties_status {
    /**
     * Everything is fine.
     */
    CX_PROPERTIES_NO_ERROR,
    /**
     * The input buffer does not contain more data.
     */
    CX_PROPERTIES_NO_DATA,
    /**
     * The input ends unexpectedly.
     *
     * This either happens when the last line does not terminate with a line
     * break, or when the input ends with a parsed key but no value.
     */
    CX_PROPERTIES_INCOMPLETE_DATA,
    /**
     * Not used as a status and never returned by any function.
     *
     * You can use this enumerator to check for all "good" status results
     * by checking if the status is less than \c CX_PROPERTIES_OK.
     *
     * A "good" status means, that you can refill data and continue parsing.
     */
    CX_PROPERTIES_OK,
    /**
     * Input buffer is \c NULL.
     */
    CX_PROPERTIES_NULL_INPUT,
    /**
     * The line contains a delimiter, but no key.
     */
    CX_PROPERTIES_INVALID_EMPTY_KEY,
    /**
     * The line contains data, but no delimiter.
     */
    CX_PROPERTIES_INVALID_MISSING_DELIMITER,
    /**
     * More internal buffer was needed, but could not be allocated.
     */
    CX_PROPERTIES_BUFFER_ALLOC_FAILED,
    /**
     * Initializing the properties source failed.
     *
     * @see cx_properties_read_init_func
     */
    CX_PROPERTIES_READ_INIT_FAILED,
    /**
     * Reading from a properties source failed.
     *
     * @see cx_properties_read_func
     */
    CX_PROPERTIES_READ_FAILED,
    /**
     * Sinking a k/v-pair failed.
     *
     * @see cx_properties_sink_func
     */
    CX_PROPERTIES_SINK_FAILED,
};

/**
 * Typedef for the properties status enum.
 */
typedef enum cx_properties_status CxPropertiesStatus;

/**
 * Interface for working with properties data.
 */
struct cx_properties_s {
    /**
     * The configuration.
     */
    CxPropertiesConfig config;

    /**
     * The text input buffer.
     */
    CxBuffer input;

    /**
     * Internal buffer.
     */
    CxBuffer buffer;
};

/**
 * Typedef for the properties interface.
 */
typedef struct cx_properties_s CxProperties;


/**
 * Typedef for a properties sink.
 */
typedef struct cx_properties_sink_s CxPropertiesSink;

/**
 * A function that consumes a k/v-pair in a sink.
 *
 * The sink could be e.g. a map and the sink function would be calling
 * a map function to store the k/v-pair.
 *
 * @param prop the properties interface that wants to sink a k/v-pair
 * @param sink the sink
 * @param key the key
 * @param value the value
 * @return zero on success, non-zero when sinking the k/v-pair failed
 */
cx_attr_nonnull
typedef int(*cx_properties_sink_func)(
        CxProperties *prop,
        CxPropertiesSink *sink,
        cxstring key,
        cxstring value
);

/**
 * Defines a sink for k/v-pairs.
 */
struct cx_properties_sink_s {
    /**
     * The sink object.
     */
    void *sink;
    /**
     * Optional custom data.
     */
    void *data;
    /**
     * A function for consuming k/v-pairs into the sink.
     */
    cx_properties_sink_func sink_func;
};


/**
 * Typedef for a properties source.
 */
typedef struct cx_properties_source_s CxPropertiesSource;

/**
 * A function that reads data from a source.
 *
 * When the source is depleted, implementations SHALL provide an empty
 * string in the \p target and return zero.
 * A non-zero return value is only permitted in case of an error.
 *
 * The meaning of the optional parameters is implementation-dependent.
 *
 * @param prop the properties interface that wants to read from the source
 * @param src the source
 * @param target a string buffer where the read data shall be stored
 * @return zero on success, non-zero when reading data failed
 */
cx_attr_nonnull
typedef int(*cx_properties_read_func)(
        CxProperties *prop,
        CxPropertiesSource *src,
        cxstring *target
);

/**
 * A function that may initialize additional memory for the source.
 *
 * @param prop the properties interface that wants to read from the source
 * @param src the source
 * @return zero when initialization was successful, non-zero otherwise
 */
cx_attr_nonnull
typedef int(*cx_properties_read_init_func)(
        CxProperties *prop,
        CxPropertiesSource *src
);

/**
 * A function that cleans memory initialized by the read_init_func.
 *
 * @param prop the properties interface that wants to read from the source
 * @param src the source
 */
cx_attr_nonnull
typedef void(*cx_properties_read_clean_func)(
        CxProperties *prop,
        CxPropertiesSource *src
);

/**
 * Defines a properties source.
 */
struct cx_properties_source_s {
    /**
     * The source object.
     *
     * For example a file stream or a string.
     */
    void *src;
    /**
     * Optional additional data pointer.
     */
    void *data_ptr;
    /**
     * Optional size information.
     */
    size_t data_size;
    /**
     * A function that reads data from the source.
     */
    cx_properties_read_func read_func;
    /**
     * Optional function that may prepare the source for reading data.
     */
    cx_properties_read_init_func read_init_func;
    /**
     * Optional function that cleans additional memory allocated by the
     * read_init_func.
     */
    cx_properties_read_clean_func read_clean_func;
};

/**
 * Initialize a properties interface.
 *
 * @param prop the properties interface
 * @param config the properties configuration
 * @see cxPropertiesInitDefault()
 */
cx_attr_nonnull
void cxPropertiesInit(CxProperties *prop, CxPropertiesConfig config);

/**
 * Destroys the properties interface.
 *
 * \note Even when you are certain that you did not use the interface in a
 * way that caused a memory allocation, you should call this function anyway.
 * Future versions of the library might add features that need additional memory
 * and you really don't want to search the entire code where you might need
 * add call to this function.
 *
 * @param prop the properties interface
 */
cx_attr_nonnull
void cxPropertiesDestroy(CxProperties *prop);

/**
 * Destroys and re-initializes the properties interface.
 *
 * You might want to use this, to reset the parser after
 * encountering a syntax error.
 *
 * @param prop the properties interface
 */
cx_attr_nonnull
static inline void cxPropertiesReset(CxProperties *prop) {
    CxPropertiesConfig config = prop->config;
    cxPropertiesDestroy(prop);
    cxPropertiesInit(prop, config);
}

/**
 * Initialize a properties parser with the default configuration.
 *
 * @param prop the properties interface
 * @see cxPropertiesInit()
 */
#define cxPropertiesInitDefault(prop) \
    cxPropertiesInit(prop, cx_properties_config_default)

/**
 * Fills the input buffer with data.
 *
 * After calling this function, you can parse the data by calling
 * cxPropertiesNext().
 *
 * @remark The properties interface tries to avoid allocations.
 * When you use this function and cxPropertiesNext() interleaving,
 * no allocations are performed. However, you must not free the
 * pointer to the data in that case. When you invoke the fill
 * function more than once before calling cxPropertiesNext(),
 * the additional data is appended - inevitably leading to
 * an allocation of a new buffer and copying the previous contents.
 *
 * @param prop the properties interface
 * @param buf a pointer to the data
 * @param len the length of the data
 * @return non-zero when a memory allocation was necessary but failed
 * @see cxPropertiesFill()
 */
cx_attr_nonnull
cx_attr_access_r(2, 3)
int cxPropertiesFilln(
        CxProperties *prop,
        const char *buf,
        size_t len
);

#ifdef __cplusplus
} // extern "C"
cx_attr_nonnull
static inline int cxPropertiesFill(
        CxProperties *prop,
        cxstring str
) {
    return cxPropertiesFilln(prop, str.ptr, str.length);
}

cx_attr_nonnull
static inline int cxPropertiesFill(
        CxProperties *prop,
        cxmutstr str
) {
    return cxPropertiesFilln(prop, str.ptr, str.length);
}

cx_attr_nonnull
cx_attr_cstr_arg(2)
static inline int cxPropertiesFill(
        CxProperties *prop,
        const char *str
) {
    return cxPropertiesFilln(prop, str, strlen(str));
}

extern "C" {
#else // __cplusplus
/**
 * Fills the input buffer with data.
 *
 * After calling this function, you can parse the data by calling
 * cxPropertiesNext().
 *
 * @attention The properties interface tries to avoid allocations.
 * When you use this function and cxPropertiesNext() interleaving,
 * no allocations are performed. However, you must not free the
 * pointer to the data in that case. When you invoke the fill
 * function more than once before calling cxPropertiesNext(),
 * the additional data is appended - inevitably leading to
 * an allocation of a new buffer and copying the previous contents.
 *
 * @param prop the properties interface
 * @param str the text to fill in
 * @return non-zero when a memory allocation was necessary but failed
 * @see cxPropertiesFilln()
 */
#define cxPropertiesFill(prop, str) _Generic((str), \
    cxstring: cx_properties_fill_cxstr,             \
    cxmutstr: cx_properties_fill_mutstr,            \
    char*: cx_properties_fill_str,                  \
    const char*: cx_properties_fill_str)            \
    (prop, str)

/**
 * @copydoc cxPropertiesFill()
 */
cx_attr_nonnull
static inline int cx_properties_fill_cxstr(
        CxProperties *prop,
        cxstring str
) {
    return cxPropertiesFilln(prop, str.ptr, str.length);
}

/**
 * @copydoc cxPropertiesFill()
 */
cx_attr_nonnull
static inline int cx_properties_fill_mutstr(
        CxProperties *prop,
        cxmutstr str
) {
    return cxPropertiesFilln(prop, str.ptr, str.length);
}

/**
 * @copydoc cxPropertiesFill()
 */
cx_attr_nonnull
cx_attr_cstr_arg(2)
static inline int cx_properties_fill_str(
        CxProperties *prop,
        const char *str
) {
    return cxPropertiesFilln(prop, str, strlen(str));
}
#endif

/**
 * Specifies stack memory that shall be used as internal buffer.
 *
 * @param prop the properties interface
 * @param buf a pointer to stack memory
 * @param capacity the capacity of the stack memory
 */
cx_attr_nonnull
void cxPropertiesUseStack(
        CxProperties *prop,
        char *buf,
        size_t capacity
);

/**
 * Retrieves the next key/value-pair.
 *
 * This function returns zero as long as there are key/value-pairs found.
 * If no more key/value-pairs are found, #CX_PROPERTIES_NO_DATA is returned.
 *
 * When an incomplete line is encountered, #CX_PROPERTIES_INCOMPLETE_DATA is
 * returned, and you can add more data with #cxPropertiesFill().
 *
 * \remark The incomplete line will be stored in an internal buffer, which is
 * allocated on the heap, by default. If you want to avoid allocations,
 * you can specify sufficient space with cxPropertiesUseStack() after
 * initialization with cxPropertiesInit().
 *
 * \attention The returned strings will point into a buffer that might not be
 * available later. It is strongly recommended to copy the strings for further
 * use.
 *
 * @param prop the properties interface
 * @param key a pointer to the cxstring that shall contain the property name
 * @param value a pointer to the cxstring that shall contain the property value
 * @return the status code as defined above
 * @see cxPropertiesFill()
 */
cx_attr_nonnull
cx_attr_nodiscard
CxPropertiesStatus cxPropertiesNext(
        CxProperties *prop,
        cxstring *key,
        cxstring *value
);

/**
 * Creates a properties sink for an UCX map.
 *
 * The values stored in the map will be pointers to strings allocated
 * by #cx_strdup_a().
 * The default stdlib allocator will be used, unless you specify a custom
 * allocator in the optional \c data of the sink.
 *
 * @param map the map that shall consume the k/v-pairs.
 * @return the sink
 * @see cxPropertiesLoad()
 */
cx_attr_nonnull
cx_attr_nodiscard
CxPropertiesSink cxPropertiesMapSink(CxMap *map);

/**
 * Creates a properties source based on an UCX string.
 *
 * @param str the string
 * @return the properties source
 * @see cxPropertiesLoad()
 */
cx_attr_nodiscard
CxPropertiesSource cxPropertiesStringSource(cxstring str);

/**
 * Creates a properties source based on C string with the specified length.
 *
 * @param str the string
 * @param len the length
 * @return the properties source
 * @see cxPropertiesLoad()
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_access_r(1, 2)
CxPropertiesSource cxPropertiesCstrnSource(const char *str, size_t len);

/**
 * Creates a properties source based on a C string.
 *
 * The length will be determined with strlen(), so the string MUST be
 * zero-terminated.
 *
 * @param str the string
 * @return the properties source
 * @see cxPropertiesLoad()
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_cstr_arg(1)
CxPropertiesSource cxPropertiesCstrSource(const char *str);

/**
 * Creates a properties source based on an FILE.
 *
 * @param file the file
 * @param chunk_size how many bytes may be read in one operation
 *
 * @return the properties source
 * @see cxPropertiesLoad()
 */
cx_attr_nonnull
cx_attr_nodiscard
cx_attr_access_r(1)
CxPropertiesSource cxPropertiesFileSource(FILE *file, size_t chunk_size);


/**
 * Loads properties data from a source and transfers it to a sink.
 *
 * This function tries to read as much data from the source as possible.
 * When the source was completely consumed and at least on k/v-pair was found,
 * the return value will be #CX_PROPERTIES_NO_ERROR.
 * When the source was consumed but no k/v-pairs were found, the return value
 * will be #CX_PROPERTIES_NO_DATA.
 * The other result codes apply, according to their description.
 *
 * @param prop the properties interface
 * @param sink the sink
 * @param source the source
 * @return the status of the last operation
 */
cx_attr_nonnull
CxPropertiesStatus cxPropertiesLoad(
        CxProperties *prop,
        CxPropertiesSink sink,
        CxPropertiesSource source
);

#ifdef __cplusplus
} // extern "C"
#endif

#endif // UCX_PROPERTIES

mercurial