src/properties.c

Fri, 20 Dec 2024 15:00:31 +0100

author
Mike Becker <universe@uap-core.de>
date
Fri, 20 Dec 2024 15:00:31 +0100
changeset 1031
8a90552bba29
parent 985
68754c7de906
permissions
-rw-r--r--

rework of properties parser - fixes #529 and resolves #458

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include "cx/properties.h"

#include <assert.h>

const CxPropertiesConfig cx_properties_config_default = {
        '=',
        //'\\',
        '#',
        '\0',
        '\0'
};

void cxPropertiesInit(
        CxProperties *prop,
        CxPropertiesConfig config
) {
    memset(prop, 0, sizeof(CxProperties));
    prop->config = config;
}

void cxPropertiesDestroy(CxProperties *prop) {
    cxBufferDestroy(&prop->input);
    cxBufferDestroy(&prop->buffer);
}

int cxPropertiesFilln(
        CxProperties *prop,
        const char *buf,
        size_t len
) {
    if (cxBufferEof(&prop->input)) {
        // destroy a possible previously initialized buffer
        cxBufferDestroy(&prop->input);
        cxBufferInit(&prop->input, (void*) buf, len,
            NULL, CX_BUFFER_COPY_ON_WRITE | CX_BUFFER_AUTO_EXTEND);
        prop->input.size = len;
    } else {
        if (cxBufferAppend(buf, 1, len, &prop->input) < len) return -1;
    }
    return 0;
}

void cxPropertiesUseStack(
        CxProperties *prop,
        char *buf,
        size_t capacity
) {
    cxBufferInit(&prop->buffer, buf, capacity, NULL, CX_BUFFER_COPY_ON_EXTEND);
}

CxPropertiesStatus cxPropertiesNext(
        CxProperties *prop,
        cxstring *key,
        cxstring *value
) {
    // check if we have a text buffer
    if (prop->input.space == NULL) {
        return CX_PROPERTIES_NULL_INPUT;
    }

    // a pointer to the buffer we want to read from
    CxBuffer *current_buffer = &prop->input;

    // check if we have rescued data
    if (!cxBufferEof(&prop->buffer)) {
        // check if we can now get a complete line
        cxstring input = cx_strn(prop->input.space + prop->input.pos,
            prop->input.size - prop->input.pos);
        cxstring nl = cx_strchr(input, '\n');
        if (nl.length > 0) {
            // we add as much data to the rescue buffer as we need
            // to complete the line
            size_t len_until_nl = (size_t)(nl.ptr - input.ptr) + 1;

            if (cxBufferAppend(input.ptr, 1,
                len_until_nl, &prop->buffer) < len_until_nl) {
                return CX_PROPERTIES_BUFFER_ALLOC_FAILED;
            }

            // advance the position in the input buffer
            prop->input.pos += len_until_nl;

            // we now want to read from the rescue buffer
            current_buffer = &prop->buffer;
        } else {
            // still not enough data, copy input buffer to internal buffer
            if (cxBufferAppend(input.ptr, 1,
                input.length, &prop->buffer) < input.length) {
                return CX_PROPERTIES_BUFFER_ALLOC_FAILED;
            }
            // reset the input buffer (make way for a re-fill)
            cxBufferReset(&prop->input);
            return CX_PROPERTIES_INCOMPLETE_DATA;
        }
    }

    char comment1 = prop->config.comment1;
    char comment2 = prop->config.comment2;
    char comment3 = prop->config.comment3;
    char delimiter = prop->config.delimiter;

    // get one line and parse it
    while (!cxBufferEof(current_buffer)) {
        const char *buf = current_buffer->space + current_buffer->pos;
        size_t len = current_buffer->size - current_buffer->pos;

        /*
         * First we check if we have at least one line. We also get indices of
         * delimiter and comment chars
         */
        size_t delimiter_index = 0;
        size_t comment_index = 0;
        bool has_comment = false;

        size_t i = 0;
        char c = 0;
        for (; i < len; i++) {
            c = buf[i];
            if (c == comment1 || c == comment2 || c == comment3) {
                if (comment_index == 0) {
                    comment_index = i;
                    has_comment = true;
                }
            } else if (c == delimiter) {
                if (delimiter_index == 0 && !has_comment) {
                    delimiter_index = i;
                }
            } else if (c == '\n') {
                break;
            }
        }

        if (c != '\n') {
            // we don't have enough data for a line, use the rescue buffer
            assert(current_buffer != &prop->buffer);
            // make sure that the rescue buffer does not already contain something
            assert(cxBufferEof(&prop->buffer));
            if (prop->buffer.space == NULL) {
                // initialize a rescue buffer, if the user did not provide one
                cxBufferInit(&prop->buffer, NULL, 256, NULL, CX_BUFFER_AUTO_EXTEND);
            } else {
                // from a previous rescue there might be already read data
                // reset the buffer to avoid unnecessary buffer extension
                cxBufferReset(&prop->buffer);
            }
            if (cxBufferAppend(buf, 1, len, &prop->buffer) < len) {
                return CX_PROPERTIES_BUFFER_ALLOC_FAILED;
            }
            // reset the input buffer (make way for a re-fill)
            cxBufferReset(&prop->input);
            return CX_PROPERTIES_INCOMPLETE_DATA;
        }

        cxstring line = has_comment ?
                        cx_strn(buf, comment_index) :
                        cx_strn(buf, i);
        // check line
        if (delimiter_index == 0) {
            // if line is not blank ...
            line = cx_strtrim(line);
            // ... either no delimiter found, or key is empty
            if (line.length > 0) {
                if (line.ptr[0] == delimiter) {
                    return CX_PROPERTIES_INVALID_EMPTY_KEY;
                } else {
                    return CX_PROPERTIES_INVALID_MISSING_DELIMITER;
                }
            } else {
                // skip blank line
                // if it was the rescue buffer, return to the original buffer
                if (current_buffer == &prop->buffer) {
                    // assert that the rescue buffer really does not contain more data
                    assert(current_buffer->pos + i + 1 == current_buffer->size);
                    // reset the rescue buffer, but don't destroy it!
                    cxBufferReset(&prop->buffer);
                    // continue with the input buffer
                    current_buffer = &prop->input;
                } else {
                    // if it was the input buffer already, just advance the position
                    current_buffer->pos += i + 1;
                }
                continue;
            }
        } else {
            cxstring k = cx_strn(buf, delimiter_index);
            cxstring val = cx_strn(
                    buf + delimiter_index + 1,
                    line.length - delimiter_index - 1);
            k = cx_strtrim(k);
            val = cx_strtrim(val);
            if (k.length > 0) {
                *key = k;
                *value = val;
                current_buffer->pos += i + 1;
                assert(current_buffer->pos <= current_buffer->size);
                return CX_PROPERTIES_NO_ERROR;
            } else {
                return CX_PROPERTIES_INVALID_EMPTY_KEY;
            }
        }
        // unreachable - either we returned or skipped a blank line
        assert(false);
    }

    // when we come to this point, all data must have been read
    assert(cxBufferEof(&prop->buffer));
    assert(cxBufferEof(&prop->input));

    return CX_PROPERTIES_NO_DATA;
}

static int cx_properties_sink_map(
        cx_attr_unused CxProperties *prop,
        CxPropertiesSink *sink,
        cxstring key,
        cxstring value
) {
    CxMap *map = sink->sink;
    CxAllocator *alloc = sink->data;
    cxmutstr v = cx_strdup_a(alloc, value);
    int r = cx_map_put_cxstr(map, key, v.ptr);
    if (r != 0) cx_strfree_a(alloc, &v);
    return r;
}

CxPropertiesSink cxPropertiesMapSink(CxMap *map) {
    CxPropertiesSink sink;
    sink.sink = map;
    sink.data = cxDefaultAllocator;
    sink.sink_func = cx_properties_sink_map;
    return sink;
}

static int cx_properties_read_string(
        CxProperties *prop,
        CxPropertiesSource *src,
        cxstring *target
) {
    if (prop->input.space == src->src) {
        // when the input buffer already contains the string
        // we have nothing more to provide
        target->length = 0;
    } else {
        target->ptr = src->src;
        target->length = src->data_size;
    }
    return 0;
}

static int cx_properties_read_file(
        cx_attr_unused CxProperties *prop,
        CxPropertiesSource *src,
        cxstring *target
) {
    target->ptr = src->data_ptr;
    target->length = fread(src->data_ptr, 1, src->data_size, src->src);
    return ferror(src->src);
}

static int cx_properties_read_init_file(
        cx_attr_unused CxProperties *prop,
        CxPropertiesSource *src
) {
    src->data_ptr = malloc(src->data_size);
    if (src->data_ptr == NULL) return 1;
    return 0;
}

static void cx_properties_read_clean_file(
        cx_attr_unused CxProperties *prop,
        CxPropertiesSource *src
) {
    free(src->data_ptr);
}

CxPropertiesSource cxPropertiesStringSource(cxstring str) {
    CxPropertiesSource src;
    src.src = (void*) str.ptr;
    src.data_size = str.length;
    src.data_ptr = NULL;
    src.read_func = cx_properties_read_string;
    src.read_init_func = NULL;
    src.read_clean_func = NULL;
    return src;
}

CxPropertiesSource cxPropertiesCstrnSource(const char *str, size_t len) {
    CxPropertiesSource src;
    src.src = (void*) str;
    src.data_size = len;
    src.data_ptr = NULL;
    src.read_func = cx_properties_read_string;
    src.read_init_func = NULL;
    src.read_clean_func = NULL;
    return src;
}

CxPropertiesSource cxPropertiesCstrSource(const char *str) {
    CxPropertiesSource src;
    src.src = (void*) str;
    src.data_size = strlen(str);
    src.data_ptr = NULL;
    src.read_func = cx_properties_read_string;
    src.read_init_func = NULL;
    src.read_clean_func = NULL;
    return src;
}

CxPropertiesSource cxPropertiesFileSource(FILE *file, size_t chunk_size) {
    CxPropertiesSource src;
    src.src = file;
    src.data_size = chunk_size;
    src.data_ptr = NULL;
    src.read_func = cx_properties_read_file;
    src.read_init_func = cx_properties_read_init_file;
    src.read_clean_func = cx_properties_read_clean_file;
    return src;
}

CxPropertiesStatus cxPropertiesLoad(
        CxProperties *prop,
        CxPropertiesSink sink,
        CxPropertiesSource source
) {
    assert(source.read_func != NULL);
    assert(sink.sink_func != NULL);

    // initialize reader
    if (source.read_init_func != NULL) {
        if (source.read_init_func(prop, &source)) {
            return CX_PROPERTIES_READ_INIT_FAILED;
        }
    }

    // transfer the data from the source to the sink
    CxPropertiesStatus status;
    bool found = false;
    while (true) {
        // read input
        cxstring input;
        if (source.read_func(prop, &source, &input)) {
            status = CX_PROPERTIES_READ_FAILED;
            break;
        }

        // no more data - break
        if (input.length == 0) {
            status = found ? CX_PROPERTIES_NO_ERROR : CX_PROPERTIES_NO_DATA;
            break;
        }

        // set the input buffer and read the k/v-pairs
        cxPropertiesFill(prop, input);

        CxPropertiesStatus kv_status;
        do {
            cxstring key, value;
            kv_status = cxPropertiesNext(prop, &key, &value);
            if (kv_status == CX_PROPERTIES_NO_ERROR) {
                found = true;
                if (sink.sink_func(prop, &sink, key, value)) {
                    kv_status = CX_PROPERTIES_SINK_FAILED;
                }
            }
        } while (kv_status == CX_PROPERTIES_NO_ERROR);

        if (kv_status > CX_PROPERTIES_OK) {
            status = kv_status;
            break;
        }
    }

    if (source.read_clean_func != NULL) {
        source.read_clean_func(prop, &source);
    }

    return status;
}

mercurial