Tue, 20 Aug 2024 12:39:35 +0200
cx_tree_add_iter() - optimize check for empty trees
relates to #390
/* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright 2024 Mike Becker, Olaf Wintermann All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /** * \file tree.h * \brief Interface for tree implementations. * \author Mike Becker * \author Olaf Wintermann * \copyright 2-Clause BSD License */ #ifndef UCX_TREE_H #define UCX_TREE_H #include "common.h" #include "iterator.h" #ifdef __cplusplus extern "C" { #endif /** * A depth-first tree iterator. * * This iterator is not position-aware in a strict sense, as it does not assume * a particular order of elements in the tree. However, the iterator keeps track * of the number of nodes it has passed in a counter variable. * Each node, regardless of the number of passes, is counted only once. * * @note Objects that are pointed to by an iterator are mutable through that * iterator. However, if the * underlying data structure is mutated by other means than this iterator (e.g. * elements added or removed), the iterator becomes invalid (regardless of what * cxIteratorValid() returns). * * @see CxIterator */ typedef struct cx_tree_iterator_s { /** * Base members. */ CX_ITERATOR_BASE; /** * Indicates whether the subtree below the current node shall be skipped. */ bool skip; /** * Set to true, when the iterator shall visit a node again * when all it's children have been processed. */ bool visit_on_exit; /** * True, if this iterator is currently leaving the node. */ bool exiting; /** * Offset in the node struct for the children linked list. */ ptrdiff_t loc_children; /** * Offset in the node struct for the next pointer. */ ptrdiff_t loc_next; /** * The total number of distinct nodes that have been passed so far. */ size_t counter; /** * The currently observed node. * * This is the same what cxIteratorCurrent() would return. */ void *node; /** * Stores a copy of the next pointer of the visited node. * Allows freeing a node on exit without corrupting the iteration. */ void *node_next; /** * Internal stack. * Will be automatically freed once the iterator becomes invalid. * * If you want to discard the iterator before, you need to manually * call cxTreeIteratorDispose(). */ void **stack; /** * Internal capacity of the stack. */ size_t stack_capacity; union { /** * Internal stack size. */ size_t stack_size; /** * The current depth in the tree. */ size_t depth; }; } CxTreeIterator; /** * An element in a visitor queue. */ struct cx_tree_visitor_queue_s { /** * The tree node to visit. */ void *node; /** * The depth of the node. */ size_t depth; /** * The next element in the queue or \c NULL. */ struct cx_tree_visitor_queue_s *next; }; /** * A breadth-first tree iterator. * * This iterator needs to maintain a visitor queue that will be automatically * freed once the iterator becomes invalid. * If you want to discard the iterator before, you MUST manually call * cxTreeVisitorDispose(). * * This iterator is not position-aware in a strict sense, as it does not assume * a particular order of elements in the tree. However, the iterator keeps track * of the number of nodes it has passed in a counter variable. * Each node, regardless of the number of passes, is counted only once. * * @note Objects that are pointed to by an iterator are mutable through that * iterator. However, if the * underlying data structure is mutated by other means than this iterator (e.g. * elements added or removed), the iterator becomes invalid (regardless of what * cxIteratorValid() returns). * * @see CxIterator */ typedef struct cx_tree_visitor_s { /** * Base members. */ CX_ITERATOR_BASE; /** * Indicates whether the subtree below the current node shall be skipped. */ bool skip; /** * Offset in the node struct for the children linked list. */ ptrdiff_t loc_children; /** * Offset in the node struct for the next pointer. */ ptrdiff_t loc_next; /** * The total number of distinct nodes that have been passed so far. */ size_t counter; /** * The currently observed node. * * This is the same what cxIteratorCurrent() would return. */ void *node; /** * The current depth in the tree. */ size_t depth; /** * The next element in the visitor queue. */ struct cx_tree_visitor_queue_s *queue_next; /** * The last element in the visitor queue. */ struct cx_tree_visitor_queue_s *queue_last; } CxTreeVisitor; /** * Releases internal memory of the given tree iterator. * @param iter the iterator */ __attribute__((__nonnull__)) static inline void cxTreeIteratorDispose(CxTreeIterator *iter) { free(iter->stack); iter->stack = NULL; } /** * Releases internal memory of the given tree visitor. * @param visitor the visitor */ __attribute__((__nonnull__)) static inline void cxTreeVisitorDispose(CxTreeVisitor *visitor) { struct cx_tree_visitor_queue_s *q = visitor->queue_next; while (q != NULL) { struct cx_tree_visitor_queue_s *next = q->next; free(q); q = next; } } /** * Advises the iterator to skip the subtree below the current node and * also continues the current loop. * * @param iterator the iterator */ #define cxTreeIteratorContinue(iterator) (iterator).skip = true; continue /** * Advises the visitor to skip the subtree below the current node and * also continues the current loop. * * @param visitor the visitor */ #define cxTreeVisitorContinue(visitor) cxTreeIteratorContinue(visitor) /** * Links a node to a (new) parent. * * If the node has already a parent, it is unlinked, first. * If the parent has children already, the node is \em appended to the list * of all currently existing children. * * @param parent the parent node * @param node the node that shall be linked * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @see cx_tree_unlink() */ __attribute__((__nonnull__)) void cx_tree_link( void *restrict parent, void *restrict node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Unlinks a node from its parent. * * If the node has no parent, this function does nothing. * * @param node the node that shall be unlinked from its parent * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @see cx_tree_link() */ __attribute__((__nonnull__)) void cx_tree_unlink( void *node, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Function pointer for a search function. * * A function of this kind shall check if the specified \p node * contains the given \p data or if one of the children might contain * the data. * * The function should use the returned integer to indicate how close the * match is, where a negative number means that it does not match at all. * * For example if a tree stores file path information, a node that is * describing a parent directory of a filename that is searched, shall * return a positive number to indicate that a child node might contain the * searched item. On the other hand, if the node denotes a path that is not a * prefix of the searched filename, the function would return -1 to indicate * that the search does not need to be continued in that branch. * * @param node the node that is currently investigated * @param data the data that is searched for * * @return 0 if the node contains the data, * positive if one of the children might contain the data, * negative if neither the node, nor the children contains the data */ typedef int (*cx_tree_search_func)(void const *node, void const *data); /** * Searches for data in a tree. * * When the data cannot be found exactly, the search function might return a * closest result which might be a good starting point for adding a new node * to the tree. * * Depending on the tree structure it is not necessarily guaranteed that the * "closest" match is uniquely defined. This function will search for a node * with the best match according to the \p sfunc (meaning: the return value of * \p sfunc which is closest to zero). If that is also ambiguous, an arbitrary * node matching the criteria is returned. * * @param root the root node * @param data the data to search for * @param sfunc the search function * @param result where the result shall be stored * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return zero if the node was found exactly, positive if a node was found that * could contain the node (but doesn't right now), negative if the tree does not * contain any node that might be related to the searched data */ __attribute__((__nonnull__)) int cx_tree_search( void const *root, void const *data, cx_tree_search_func sfunc, void **result, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Creates a depth-first iterator for a tree with the specified root node. * * @note A tree iterator needs to maintain a stack of visited nodes, which is * allocated using stdlib malloc(). * When the iterator becomes invalid, this memory is automatically released. * However, if you wish to cancel the iteration before the iterator becomes * invalid by itself, you MUST call cxTreeIteratorDispose() manually to release * the memory. * * @remark The returned iterator does not support cxIteratorFlagRemoval(). * * @param root the root node * @param visit_on_exit set to true, when the iterator shall visit a node again * after processing all children * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return the new tree iterator * @see cxTreeIteratorDispose() */ __attribute__((__nonnull__)) CxTreeIterator cx_tree_iterator( void *root, bool visit_on_exit, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Creates a breadth-first iterator for a tree with the specified root node. * * @note A tree visitor needs to maintain a queue of to be visited nodes, which * is allocated using stdlib malloc(). * When the visitor becomes invalid, this memory is automatically released. * However, if you wish to cancel the iteration before the visitor becomes * invalid by itself, you MUST call cxTreeVisitorDispose() manually to release * the memory. * * @remark The returned iterator does not support cxIteratorFlagRemoval(). * * @param root the root node * @param loc_children offset in the node struct for the children linked list * @param loc_next offset in the node struct for the next pointer * @return the new tree visitor * @see cxTreeVisitorDispose() */ __attribute__((__nonnull__)) CxTreeVisitor cx_tree_visitor( void *root, ptrdiff_t loc_children, ptrdiff_t loc_next ); /** * Describes a function that creates a tree node from the specified data. * The first argument points to the data the node shall contain and * the second, optional, argument points to an existing node that already * contains the data. * The third argument may be used for additional data (e.g. an allocator). * Functions of this type shall either return a new pointer to a newly * created node, a pointer to the existing node, or \c NULL when allocation * fails. * Returning a pointer to the existing node means, that the function decides * not to create a new node for the data and that the caller shall continue to * use the existing node. * * \note the function may leave the node pointers in the struct uninitialized. * The caller is responsible to set them according to where the node will be * added to the tree. */ typedef void *(*cx_tree_node_create_func)(void const *, void const *, void *); /** * The local search depth for a new subtree when adding multiple elements. * The default value is 3. * This variable is used by #cx_tree_add_array() and #cx_tree_add_iter() to * implement optimized insertion of multiple elements into a tree. */ extern unsigned int cx_tree_add_look_around_depth; /** * Adds multiple elements efficiently to a tree. * * This function returns the number of elements successfully obtained from the * iterator, which is not necessarily the number of new nodes created (depending * on the implementation of \p cfunc). * * Once an element cannot be added to the tree, this function returns, leaving * the iterator in a valid state pointing to the element that could not be * added. * * The advantage of this function compared to multiple invocations of * #cx_tree_add() is that the search for the insert locations is not always * started from the root node. * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes * of the current insert location before starting from the root node again. * When the variable is set to zero, only the last found location is checked * again. * * Refer to the documentation of #cx_tree_add() for more details. * * @param iter a pointer to an arbitrary iterator * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param root the location where a pointer to the root node is stored * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return the number of elements obtained from the iterator * @see cx_tree_add() */ __attribute__((__nonnull__(1, 2, 3, 5))) size_t cx_tree_add_iter( struct cx_iterator_base_s *iter, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Adds multiple elements efficiently to a tree. * * This function returns the number of elements successfully processed which * is not necessarily the number of new nodes created (depending on the * implementation of \p cfunc). * * Once an element cannot be added to the tree, this function returns. * That means, the integer \c n returned by this function means, that the first * \c n elements of \p src will be definitely in the tree. * * The advantage of this function compared to multiple invocations of * #cx_tree_add() is that the search for the insert locations is not always * started from the root node. * Instead, the function checks #cx_tree_add_look_around_depth many parent nodes * of the current insert location before starting from the root node again. * When the variable is set to zero, only the last found location is checked * again. * * Refer to the documentation of #cx_tree_add() for more details. * * @param src a pointer to the source data array * @param num the number of elements in the \p src array * @param elem_size the size of each element in the \p src array * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param root the location where a pointer to the root node is stored * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return the number of array elements successfully processed * @see cx_tree_add() */ __attribute__((__nonnull__(1, 4, 5, 7))) size_t cx_tree_add_array( void const *src, size_t num, size_t elem_size, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); /** * Adds data to a tree. * * An adequate location where to add the new tree node is searched with the * specified \p sfunc. * * When a location is found, the \p cfunc will be invoked with \p cdata and, * in case \p sfunc returned a direct match, the already found node. * * If \p cfunc returns a new node pointer, it will be linked into the tree. * When \p sfunc returned a positive integer, the new node will be linked as a * child. When \p sfunc returned zero and the found node has a parent, the new * node will be added as sibling - otherwise, the new node will be the new root. * When \p sfunc returned a negative value, the new node will always be the * new root. * * If \p cfunc returns an existing node found by \p sfunc, this function just * returns the found node without modifying the tree. * * This function may return \c NULL when \p cfunc tries to allocate a new node * but fails to do so. * * The \p root argument shall point to a location where the pointer to the root * node is stored. The pointer to the root node may be \c NULL in which case * this function will instantly create a new node and write the location to * \p root. * * Multiple elements can be added more efficiently with * #cx_tree_add_array() or #cx_tree_add_iter(). * * @param src a pointer to the data * @param sfunc a search function * @param cfunc a node creation function * @param cdata optional additional data * @param root the location where a pointer to the root node is stored * @param loc_parent offset in the node struct for the parent pointer * @param loc_children offset in the node struct for the children linked list * @param loc_last_child optional offset in the node struct for the pointer to * the last child in the linked list (negative if there is no such pointer) * @param loc_prev offset in the node struct for the prev pointer * @param loc_next offset in the node struct for the next pointer * @return a pointer to the new node, to an existing node, or \c NULL */ __attribute__((__nonnull__(1, 2, 3, 5))) void *cx_tree_add( void const *src, cx_tree_search_func sfunc, cx_tree_node_create_func cfunc, void *cdata, void **root, ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, ptrdiff_t loc_prev, ptrdiff_t loc_next ); #ifdef __cplusplus } // extern "C" #endif #endif //UCX_TREE_H