--- a/src/tree.c Sun Jul 07 14:56:44 2024 +0200 +++ b/src/tree.c Tue Aug 20 18:02:39 2024 +0200 @@ -33,13 +33,32 @@ #include <assert.h> #define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off))) -#define CX_TREE_PTR(cur, off) (*(void**)(((char*)(cur))+(off))) #define tree_parent(node) CX_TREE_PTR(node, loc_parent) #define tree_children(node) CX_TREE_PTR(node, loc_children) #define tree_last_child(node) CX_TREE_PTR(node, loc_last_child) #define tree_prev(node) CX_TREE_PTR(node, loc_prev) #define tree_next(node) CX_TREE_PTR(node, loc_next) +#define cx_tree_ptr_locations \ + loc_parent, loc_children, loc_last_child, loc_prev, loc_next + +static void cx_tree_zero_pointers( + void *node, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + tree_parent(node) = NULL; + tree_prev(node) = NULL; + tree_next(node) = NULL; + tree_children(node) = NULL; + if (loc_last_child >= 0) { + tree_last_child(node) = NULL; + } +} + void cx_tree_link( void *restrict parent, void *restrict node, @@ -52,8 +71,7 @@ void *current_parent = tree_parent(node); if (current_parent == parent) return; if (current_parent != NULL) { - cx_tree_unlink(node, loc_parent, loc_children, loc_last_child, - loc_prev, loc_next); + cx_tree_unlink(node, cx_tree_ptr_locations); } if (tree_children(parent) == NULL) { @@ -117,7 +135,7 @@ int cx_tree_search( void const *root, - void const *data, + void const *node, cx_tree_search_func sfunc, void **result, ptrdiff_t loc_children, @@ -127,7 +145,7 @@ *result = NULL; // shortcut: compare root before doing anything else - ret = sfunc(root, data); + ret = sfunc(root, node); if (ret < 0) { return ret; } else if (ret == 0 || tree_children(root) == NULL) { @@ -150,33 +168,33 @@ // remember a candidate for adding the data // also remember the exact return code from sfunc - void *candidate = NULL; - int ret_candidate = -1; + void *candidate = (void *) root; + int ret_candidate = ret; // process the working stack while (work_size > 0) { // pop element - void const *node = work[--work_size]; + void const *elem = work[--work_size]; // apply the search function - ret = sfunc(node, data); + ret = sfunc(elem, node); if (ret == 0) { // if found, exit the search - *result = (void*) node; + *result = (void *) elem; work_size = 0; break; } else if (ret > 0) { // if children might contain the data, add them to the stack - void *c = tree_children(node); + void *c = tree_children(elem); while (c != NULL) { cx_array_simple_add(work, c); c = tree_next(c); } // remember this node in case no child is suitable - if (ret_candidate < 0 || ret < ret_candidate) { - candidate = (void *) node; + if (ret < ret_candidate) { + candidate = (void *) elem; ret_candidate = ret; } } @@ -193,6 +211,22 @@ return ret; } +int cx_tree_search_data( + void const *root, + void const *data, + cx_tree_search_data_func sfunc, + void **result, + ptrdiff_t loc_children, + ptrdiff_t loc_next +) { + // it is basically the same implementation + return cx_tree_search( + root, data, + (cx_tree_search_func) sfunc, + result, + loc_children, loc_next); +} + static bool cx_tree_iter_valid(void const *it) { struct cx_tree_iterator_s const *iter = it; return iter->node != NULL; @@ -426,3 +460,208 @@ return iter; } +static void cx_tree_add_link_duplicate( + void *original, void *duplicate, + ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, ptrdiff_t loc_next +) { + void *shared_parent = tree_parent(original); + if (shared_parent == NULL) { + cx_tree_link(original, duplicate, cx_tree_ptr_locations); + } else { + cx_tree_link(shared_parent, duplicate, cx_tree_ptr_locations); + } +} + +static void cx_tree_add_link_new( + void *parent, void *node, cx_tree_search_func sfunc, + ptrdiff_t loc_parent, ptrdiff_t loc_children, ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, ptrdiff_t loc_next +) { + // check the current children one by one, + // if they could be children of the new node + void *child = tree_children(parent); + while (child != NULL) { + void *next = tree_next(child); + + if (sfunc(node, child) > 0) { + // the sibling could be a child -> re-link + cx_tree_link(node, child, cx_tree_ptr_locations); + } + + child = next; + } + + // add new node as new child + cx_tree_link(parent, node, cx_tree_ptr_locations); +} + +int cx_tree_add( + void const *src, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **cnode, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + *cnode = cfunc(src, cdata); + if (*cnode == NULL) return 1; + cx_tree_zero_pointers(*cnode, cx_tree_ptr_locations); + + void *match = NULL; + int result = cx_tree_search( + root, + *cnode, + sfunc, + &match, + loc_children, + loc_next + ); + + if (result < 0) { + // node does not fit into the tree - return non-zero value + return 1; + } else if (result == 0) { + // data already found in the tree, link duplicate + cx_tree_add_link_duplicate(match, *cnode, cx_tree_ptr_locations); + } else { + // closest match found, add new node + cx_tree_add_link_new(match, *cnode, sfunc, cx_tree_ptr_locations); + } + + return 0; +} + +unsigned int cx_tree_add_look_around_depth = 3; + +size_t cx_tree_add_iter( + struct cx_iterator_base_s *iter, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **failed, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + // erase the failed pointer + *failed = NULL; + + // iter not valid? cancel... + if (!iter->valid(iter)) return 0; + + size_t processed = 0; + void *current_node = root; + void const *elem; + + for (void **eptr; + iter->valid(iter) && (eptr = iter->current(iter)) != NULL; + iter->next(iter)) { + elem = *eptr; + + // create the new node + void *new_node = cfunc(elem, cdata); + if (new_node == NULL) return processed; + cx_tree_zero_pointers(new_node, cx_tree_ptr_locations); + + // start searching from current node + void *match; + int result; + unsigned int look_around_retries = cx_tree_add_look_around_depth; + cx_tree_add_look_around_retry: + result = cx_tree_search( + current_node, + new_node, + sfunc, + &match, + loc_children, + loc_next + ); + + if (result < 0) { + // traverse upwards and try to find better parents + void *parent = tree_parent(current_node); + if (parent != NULL) { + if (look_around_retries > 0) { + look_around_retries--; + current_node = parent; + } else { + // look around retries exhausted, start from the root + current_node = root; + } + goto cx_tree_add_look_around_retry; + } else { + // no parents. so we failed + *failed = new_node; + return processed; + } + } else if (result == 0) { + // data already found in the tree, link duplicate + cx_tree_add_link_duplicate(match, new_node, cx_tree_ptr_locations); + // but stick with the original match, in case we needed a new root + current_node = match; + } else { + // closest match found, add new node as child + cx_tree_add_link_new(match, new_node, sfunc, + cx_tree_ptr_locations); + current_node = match; + } + + processed++; + } + return processed; +} + +size_t cx_tree_add_array( + void const *src, + size_t num, + size_t elem_size, + cx_tree_search_func sfunc, + cx_tree_node_create_func cfunc, + void *cdata, + void **failed, + void *root, + ptrdiff_t loc_parent, + ptrdiff_t loc_children, + ptrdiff_t loc_last_child, + ptrdiff_t loc_prev, + ptrdiff_t loc_next +) { + // erase failed pointer + *failed = NULL; + + // super special case: zero elements + if (num == 0) { + return 0; + } + + // special case: one element does not need an iterator + if (num == 1) { + void *node; + if (0 == cx_tree_add( + src, sfunc, cfunc, cdata, &node, root, + loc_parent, loc_children, loc_last_child, + loc_prev, loc_next)) { + return 1; + } else { + *failed = node; + return 0; + } + } + + // otherwise, create iterator and hand over to other function + CxIterator iter = cxIterator(src, elem_size, num); + return cx_tree_add_iter(cxIteratorRef(iter), sfunc, + cfunc, cdata, failed, root, + loc_parent, loc_children, loc_last_child, + loc_prev, loc_next); +} +