From c0507d48f244aab38bf7d591da84e40ab5140893 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 26 Feb 2026 19:58:15 -0500 Subject: [PATCH 1/5] abstract out tree policy --- docs/AddressSpace.md | 2 +- .../backend_helpers/largebuddyrange.h | 26 +- .../backend_helpers/smallbuddyrange.h | 10 +- src/snmalloc/ds_core/ds_core.h | 2 +- src/snmalloc/ds_core/rankbalancetree.h | 844 ++++++++++++++++++ src/snmalloc/ds_core/redblacktree.h | 795 ----------------- src/test/func/redblack/redblack.cc | 6 +- 7 files changed, 867 insertions(+), 818 deletions(-) create mode 100644 src/snmalloc/ds_core/rankbalancetree.h delete mode 100644 src/snmalloc/ds_core/redblacktree.h diff --git a/docs/AddressSpace.md b/docs/AddressSpace.md index 1e28491ee..ce40a26df 100644 --- a/docs/AddressSpace.md +++ b/docs/AddressSpace.md @@ -115,7 +115,7 @@ Its contents can be decoded as follows: This trick of pointing at the child's chunk rather than at the child `MetaEntry` is particularly useful on CHERI: it allows us to capture the authority to the chunk without needing another pointer and costs just a shift and add.) -3. The `meta` field's `LargeBuddyRep::RED_BIT` is used to carry the red/black color of this node. +3. The `meta` field's `LargeBuddyRep::TREE_TAG_BIT` is used to carry the red/black color of this node. See `src/backend/largebuddyrange.h`. diff --git a/src/snmalloc/backend_helpers/largebuddyrange.h b/src/snmalloc/backend_helpers/largebuddyrange.h index 15324753f..ef052bb86 100644 --- a/src/snmalloc/backend_helpers/largebuddyrange.h +++ b/src/snmalloc/backend_helpers/largebuddyrange.h @@ -18,7 +18,7 @@ namespace snmalloc /* * The values we store in our rbtree are the addresses of (combined spans * of) chunks of the address space; as such, bits in (MIN_CHUNK_SIZE - 1) - * are unused and so the RED_BIT is packed therein. However, in practice, + * are unused and so the TREE_TAG_BIT is packed therein. However, in practice, * these are not "just any" uintptr_t-s, but specifically the uintptr_t-s * inside the Pagemap's BackendAllocator::Entry structures. * @@ -37,13 +37,13 @@ namespace snmalloc * a bit that is a valid part of the address of a chunk. * @{ */ - static constexpr address_t RED_BIT = 1 << 8; + static constexpr address_t TREE_TAG_BIT = 1 << 8; - static_assert(RED_BIT < MIN_CHUNK_SIZE); + static_assert(TREE_TAG_BIT < MIN_CHUNK_SIZE); static_assert(MetaEntryBase::is_backend_allowed_value( - MetaEntryBase::Word::One, RED_BIT)); + MetaEntryBase::Word::One, TREE_TAG_BIT)); static_assert(MetaEntryBase::is_backend_allowed_value( - MetaEntryBase::Word::Two, RED_BIT)); + MetaEntryBase::Word::Two, TREE_TAG_BIT)); ///@} /// The value of a null node, as returned by `get` @@ -56,7 +56,7 @@ namespace snmalloc */ static void set(Handle ptr, Contents r) { - ptr = r | (static_cast(ptr.get()) & RED_BIT); + ptr = r | (static_cast(ptr.get()) & TREE_TAG_BIT); } /** @@ -64,7 +64,7 @@ namespace snmalloc */ static Contents get(const Handle ptr) { - return ptr.get() & ~RED_BIT; + return ptr.get() & ~TREE_TAG_BIT; } /** @@ -87,19 +87,19 @@ namespace snmalloc return entry.get_backend_word(Pagemap::Entry::Word::Two); } - static bool is_red(Contents k) + static bool tree_tag(Contents k) { - return (ref(true, k).get() & RED_BIT) == RED_BIT; + return (ref(true, k).get() & TREE_TAG_BIT) == TREE_TAG_BIT; } - static void set_red(Contents k, bool new_is_red) + static void set_tree_tag(Contents k, bool new_tree_tag) { - if (new_is_red != is_red(k)) + if (new_tree_tag != tree_tag(k)) { auto v = ref(true, k); - v = v.get() ^ RED_BIT; + v = v.get() ^ TREE_TAG_BIT; } - SNMALLOC_ASSERT(is_red(k) == new_is_red); + SNMALLOC_ASSERT(tree_tag(k) == new_tree_tag); } static Contents offset(Contents k, size_t size) diff --git a/src/snmalloc/backend_helpers/smallbuddyrange.h b/src/snmalloc/backend_helpers/smallbuddyrange.h index 6f8400e83..7e9d70718 100644 --- a/src/snmalloc/backend_helpers/smallbuddyrange.h +++ b/src/snmalloc/backend_helpers/smallbuddyrange.h @@ -57,21 +57,21 @@ namespace snmalloc return &r->right; } - static bool is_red(Contents k) + static bool tree_tag(Contents k) { if (k == nullptr) return false; return (address_cast(*ref(false, k)) & MASK) == MASK; } - static void set_red(Contents k, bool new_is_red) + static void set_tree_tag(Contents k, bool new_tree_tag) { - if (new_is_red != is_red(k)) + if (new_tree_tag != tree_tag(k)) { auto r = ref(false, k); auto old_addr = pointer_align_down<2, FreeChunk>(r->as_void()); - if (new_is_red) + if (new_tree_tag) { if (old_addr == nullptr) *r = CapPtr, bounds>::unsafe_from( @@ -84,7 +84,7 @@ namespace snmalloc { *r = old_addr; } - SNMALLOC_ASSERT(is_red(k) == new_is_red); + SNMALLOC_ASSERT(tree_tag(k) == new_tree_tag); } } diff --git a/src/snmalloc/ds_core/ds_core.h b/src/snmalloc/ds_core/ds_core.h index cc395127b..112d160d9 100644 --- a/src/snmalloc/ds_core/ds_core.h +++ b/src/snmalloc/ds_core/ds_core.h @@ -14,5 +14,5 @@ #include "helpers.h" #include "mitigations.h" #include "ptrwrap.h" -#include "redblacktree.h" +#include "rankbalancetree.h" #include "tid.h" \ No newline at end of file diff --git a/src/snmalloc/ds_core/rankbalancetree.h b/src/snmalloc/ds_core/rankbalancetree.h new file mode 100644 index 000000000..5ae4e3a1e --- /dev/null +++ b/src/snmalloc/ds_core/rankbalancetree.h @@ -0,0 +1,844 @@ +#pragma once + +#include "snmalloc/ds_core/concept.h" +#include "snmalloc/ds_core/defines.h" +#include "snmalloc/stl/array.h" + +#include +#include + +// This file was designed for red-black trees but later migrated to support +// rank-balanced trees. We abuse the "RB" acronym to mean "rank-balanced". + +namespace snmalloc +{ +#ifdef __cpp_concepts + /** + * The representation must define two types. `Contents` defines some + * identifier that can be mapped to a node as a value type. `Handle` defines + * a reference to the storage, which can be used to update it. + * + * Conceptually, `Contents` is a node ID and `Handle` is a pointer to a node + * ID. + */ + template + concept RBRepTypes = requires() { + typename Rep::Handle; + typename Rep::Contents; + }; + + /** + * The representation must define operations on the holder and contents + * types. It must be able to 'dereference' a holder with `get`, assign to it + * with `set`, set and query the red/black colour of a node with + * `set_tree_tag` and `tree_tag`. + * + * The `ref` method provides uniform access to the children of a node, + * returning a holder pointing to either the left or right child, depending on + * the direction parameter. + * + * The backend must also provide two constant values. + * `Rep::null` defines a value that, if returned from `get`, indicates a null + * value. `Rep::root` defines a value that, if constructed directly, indicates + * a null value and can therefore be used as the initial raw bit pattern of + * the root node. + */ + template + concept RBRepMethods = + requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) { + { Rep::get(hp) } -> ConceptSame; + { Rep::set(hp, k) } -> ConceptSame; + { Rep::tree_tag(k) } -> ConceptSame; + { Rep::set_tree_tag(k, b) } -> ConceptSame; + { Rep::ref(b, k) } -> ConceptSame; + { Rep::null } -> ConceptSameModRef; + { + typename Rep::Handle{const_cast< + stl::remove_const_t>*>( + &Rep::root)} + } -> ConceptSame; + }; + + template + concept RBRep = // + RBRepTypes // + && RBRepMethods // + && + ConceptSame>; +#endif + + namespace rankbalancetree + { + // Container that behaves like a C++ Ref type to enable assignment + // to treat left, right and root uniformly. + template + class ChildRef + { + using H = typename Rep::Handle; + using K = typename Rep::Contents; + + H ptr; + + public: + constexpr ChildRef() = default; + + ChildRef(H p) : ptr(p) {} + + ChildRef(const ChildRef& other) = default; + + operator K() + { + return Rep::get(ptr); + } + + ChildRef& operator=(const ChildRef& other) = default; + + ChildRef& operator=(const K t) + { + // Use representations assigment, so we update the correct bits + // color and other things way also be stored in the Handle. + Rep::set(ptr, t); + return *this; + } + + /** + * Comparison operators. Note that these are nominal comparisons: + * they compare the identities of the references rather than the values + * referenced. + * comparison of the values held in these child references. + * @{ + */ + bool operator==(const ChildRef t) const + { + return ptr == t.ptr; + } + + bool operator!=(const ChildRef t) const + { + return ptr != t.ptr; + } + + ///@} + + bool is_null() + { + return Rep::get(ptr) == Rep::null; + } + + /** + * Return the reference in some printable format defined by the + * representation. + */ + auto printable() + { + return Rep::printable(ptr); + } + + static ChildRef get_dir(bool direction, K k) + { + return {Rep::ref(direction, k)}; + } + }; + + template + struct RBStep + { + ChildRef node; + bool dir; + + // Default constructor needed for Array. + constexpr RBStep() = default; + + // Remove copy constructors to avoid accidentally copying and mutating the + // path. + RBStep(const RBStep& other) = delete; + RBStep& operator=(const RBStep& other) = delete; + + /** + * Update the step to point to a new node and direction. + */ + void set(ChildRef r, bool direction) + { + node = r; + dir = direction; + } + + /** + * Update the step to point to a new node and direction. + */ + void set(typename Rep::Handle r, bool direction) + { + set(ChildRef(r), direction); + } + }; + + // Internal representation of a path in the tree. + // Exposed to allow for some composite operations to be defined + // externally. + template + struct RBPath + { + using ChildRef = rankbalancetree::ChildRef; + using RBStep = rankbalancetree::RBStep; + + stl::Array path; + size_t length = 0; + + RBPath(typename Rep::Handle root) + { + path[0].set(root, false); + length = 1; + } + + ChildRef ith(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].node; + } + + bool ith_dir(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].dir; + } + + ChildRef curr() + { + return ith(0); + } + + bool curr_dir() + { + return ith_dir(0); + } + + ChildRef parent() + { + return ith(1); + } + + bool parent_dir() + { + return ith_dir(1); + } + + ChildRef grand_parent() + { + return ith(2); + } + + // Extend path in `direction`. + // If `direction` contains `Rep::null`, do not extend the path. + // Returns false if path is not extended. + bool move(bool direction) + { + auto next = ChildRef::get_dir(direction, curr()); + if (next.is_null()) + return false; + path[length].set(next, direction); + length++; + return true; + } + + // Extend path in `direction`. + // If `direction` contains zero, do not extend the path. + // Returns false if path is extended with null. + bool move_inc_null(bool direction) + { + auto next = ChildRef::get_dir(direction, curr()); + path[length].set(next, direction); + length++; + return !(next.is_null()); + } + + // Remove top element from the path. + void pop() + { + SNMALLOC_ASSERT(length > 0); + length--; + } + + // If a path is changed in place, then some references can be stale. + // This rewalks the updated path, and corrects any internal references. + // `expected` is used to run the update, or if `false` used to check + // that no update is required. + void fixup(bool expected = true) + { + if (!run_checks && !expected) + return; + + // During a splice in remove the path can be invalidated, + // this refreshs the path so that the it refers to the spliced + // nodes fields. + // TODO optimise usage to avoid traversing whole path. + for (size_t i = 1; i < length; i++) + { + auto parent = path[i - 1].node; + auto& curr = path[i].node; + auto dir = path[i].dir; + auto actual = ChildRef::get_dir(dir, parent); + if (actual != curr) + { + if (!expected) + { + snmalloc::error("Performed an unexpected fixup."); + } + curr = actual; + } + } + } + + void print() + { + if constexpr (TRACE) + { + for (size_t i = 0; i < length; i++) + { + message<1024>( + " -> {} @ {} ({})", + Rep::printable(typename Rep::Contents(path[i].node)), + path[i].node.printable(), + path[i].dir); + } + } + } + }; + } // namespace rankbalancetree + + namespace rankbalancetree + { + template + struct RedBlackPolicy + { + using K = typename Rep::Contents; + using H = typename Rep::Handle; + using ChildRef = rankbalancetree::ChildRef; + using RBPath = rankbalancetree::RBPath; + + /* + * Verify structural invariants. Returns the black depth of the `curr`ent + * node. + */ + int invariant(K curr, K lower = Rep::null, K upper = Rep::null) + { + if constexpr (!run_checks) + { + UNUSED(curr, lower, upper); + return 0; + } + else + { + if (curr == Rep::null) + return 1; + + if ( + ((lower != Rep::null) && Rep::compare(lower, curr)) || + ((upper != Rep::null) && Rep::compare(curr, upper))) + { + report_fatal_error( + "Invariant failed: {} is out of bounds {}..{}", + Rep::printable(curr), + Rep::printable(lower), + Rep::printable(upper)); + } + + if ( + Rep::tree_tag(curr) && + (Rep::tree_tag(ChildRef::get_dir(true, curr)) || + Rep::tree_tag(ChildRef::get_dir(false, curr)))) + { + report_fatal_error( + "Invariant failed: {} is red and has red child", + Rep::printable(curr)); + } + + int left_inv = invariant(ChildRef::get_dir(true, curr), lower, curr); + int right_inv = + invariant(ChildRef::get_dir(false, curr), curr, upper); + + if (left_inv != right_inv) + { + report_fatal_error( + "Invariant failed: {} has different black depths", + Rep::printable(curr)); + } + + if (Rep::tree_tag(curr)) + return left_inv; + + return left_inv + 1; + } + } + + // Insert an element at the given path. + template + void insert_path( + RBPath& path, K value, DebugLogger debug_log, RootGetter get_root) + { + SNMALLOC_ASSERT(path.curr().is_null()); + path.curr() = value; + ChildRef::get_dir(true, path.curr()) = Rep::null; + ChildRef::get_dir(false, path.curr()) = Rep::null; + Rep::set_tree_tag(value, true); + + debug_log("Insert ", path); + + // Propogate double red up to rebalance. + // These notes were particularly clear for explaining insert + // https://www.cs.cmu.edu/~fp/courses/15122-f10/lectures/17-rbtrees.pdf + while (path.curr() != get_root()) + { + SNMALLOC_ASSERT(Rep::tree_tag(path.curr())); + if (!Rep::tree_tag(path.parent())) + { + invariant(get_root()); + return; + } + bool curr_dir = path.curr_dir(); + K curr = path.curr(); + K parent = path.parent(); + K grand_parent = path.grand_parent(); + SNMALLOC_ASSERT(!Rep::tree_tag(grand_parent)); + if (path.parent_dir() == curr_dir) + { + debug_log("Insert - double red case 1", path, path.grand_parent()); + /* Same direction case + * G - grand parent + * P - parent + * C - current + * S - sibling + * + * G P + * / \ / \ + * A P --> G C + * / \ / \ + * S C A S + */ + K sibling = ChildRef::get_dir(!curr_dir, parent); + Rep::set_tree_tag(curr, false); + ChildRef::get_dir(curr_dir, grand_parent) = sibling; + ChildRef::get_dir(!curr_dir, parent) = grand_parent; + path.grand_parent() = parent; + debug_log( + "Insert - double red case 1 - done", path, path.grand_parent()); + } + else + { + debug_log("Insert - double red case 2", path, path.grand_parent()); + /* G - grand parent + * P - parent + * C - current + * Cg - Current child for grand parent + * Cp - Current child for parent + * + * G C + * / \ / \ + * A P G P + * / \ --> / \ / \ + * C B A Cg Cp B + * / \ + * Cg Cp + */ + K child_g = ChildRef::get_dir(curr_dir, curr); + K child_p = ChildRef::get_dir(!curr_dir, curr); + + Rep::set_tree_tag(parent, false); + path.grand_parent() = curr; + ChildRef::get_dir(curr_dir, curr) = grand_parent; + ChildRef::get_dir(!curr_dir, curr) = parent; + ChildRef::get_dir(curr_dir, parent) = child_p; + ChildRef::get_dir(!curr_dir, grand_parent) = child_g; + debug_log( + "Insert - double red case 2 - done", path, path.grand_parent()); + } + + // Move to what replaced grand parent. + path.pop(); + path.pop(); + invariant(path.curr()); + } + Rep::set_tree_tag(get_root(), false); + invariant(get_root()); + } + + template + bool remove_path(RBPath& path, DebugLogger debug_log, RootGetter get_root) + { + ChildRef splice = path.curr(); + SNMALLOC_ASSERT(!(splice.is_null())); + + debug_log("Removing", path); + + /* + * Find immediately smaller leaf element (rightmost descendant of left + * child) to serve as the replacement for this node. We may not have a + * left subtree, so this may not move the path at all. + */ + path.move(true); + while (path.move(false)) + { + } + + K curr = path.curr(); + + { + // Locally extract right-child-less replacement, replacing it with its + // left child, if any + K child = ChildRef::get_dir(true, path.curr()); + // Unlink target replacing with possible child. + path.curr() = child; + } + + bool leaf_red = Rep::tree_tag(curr); + + if (path.curr() != splice) + { + // If we had a left child, replace ourselves with the extracted value + // from above + Rep::set_tree_tag(curr, Rep::tree_tag(splice)); + ChildRef::get_dir(true, curr) = K{ChildRef::get_dir(true, splice)}; + ChildRef::get_dir(false, curr) = K{ChildRef::get_dir(false, splice)}; + splice = curr; + path.fixup(); + } + + debug_log("Splice done", path); + + // TODO: Clear node contents? + + // Red leaf removal requires no rebalancing. + if (leaf_red) + return true; + + // Now in the double black case. + // End of path is considered double black, that is, one black element + // shorter than satisfies the invariant. The following algorithm moves + // up the path until it finds a close red element or the root. If we + // convert the tree to one, in which the root is double black, then the + // algorithm is complete, as there is nothing to be out of balance with. + // Otherwise, we are searching for nearby red elements so we can rotate + // the tree to rebalance. The following slides nicely cover the case + // analysis below + // https://www.cs.purdue.edu/homes/ayg/CS251/slides/chap13c.pdf + while (path.curr() != get_root()) + { + K parent = path.parent(); + bool cur_dir = path.curr_dir(); + K sibling = ChildRef::get_dir(!cur_dir, parent); + + /* Handle red sibling case. + * This performs a rotation to give a black sibling. + * + * p s(b) + * / \ / \ + * c s(r) --> p(r) m + * / \ / \ + * n m c n + * + * By invariant we know that p, n and m are all initially black. + */ + if (Rep::tree_tag(sibling)) + { + debug_log("Red sibling", path, path.parent()); + K nibling = ChildRef::get_dir(cur_dir, sibling); + ChildRef::get_dir(!cur_dir, parent) = nibling; + ChildRef::get_dir(cur_dir, sibling) = parent; + Rep::set_tree_tag(parent, true); + Rep::set_tree_tag(sibling, false); + path.parent() = sibling; + // Manually fix path. Using path.fixup would alter the complexity + // class. + path.pop(); + path.move(cur_dir); + path.move_inc_null(cur_dir); + path.fixup(false); + debug_log("Red sibling - done", path, path.parent()); + continue; + } + + /* Handle red nibling case 1. + *

+ * / \ / \ + * c s --> p rn + * / \ / \ + * on rn c on + */ + if (Rep::tree_tag(ChildRef::get_dir(!cur_dir, sibling))) + { + debug_log("Red nibling 1", path, path.parent()); + K r_nibling = ChildRef::get_dir(!cur_dir, sibling); + K o_nibling = ChildRef::get_dir(cur_dir, sibling); + ChildRef::get_dir(cur_dir, sibling) = parent; + ChildRef::get_dir(!cur_dir, parent) = o_nibling; + path.parent() = sibling; + Rep::set_tree_tag(r_nibling, false); + Rep::set_tree_tag(sibling, Rep::tree_tag(parent)); + Rep::set_tree_tag(parent, false); + debug_log("Red nibling 1 - done", path, path.parent()); + break; + } + + /* Handle red nibling case 2. + *

+ * / \ / \ + * c s --> p s + * / \ / \ / \ + * rn on c rno rns on + * / \ + * rno rns + */ + if (Rep::tree_tag(ChildRef::get_dir(cur_dir, sibling))) + { + debug_log("Red nibling 2", path, path.parent()); + K r_nibling = ChildRef::get_dir(cur_dir, sibling); + K r_nibling_same = ChildRef::get_dir(cur_dir, r_nibling); + K r_nibling_opp = ChildRef::get_dir(!cur_dir, r_nibling); + ChildRef::get_dir(!cur_dir, parent) = r_nibling_same; + ChildRef::get_dir(cur_dir, sibling) = r_nibling_opp; + ChildRef::get_dir(cur_dir, r_nibling) = parent; + ChildRef::get_dir(!cur_dir, r_nibling) = sibling; + path.parent() = r_nibling; + Rep::set_tree_tag(r_nibling, Rep::tree_tag(parent)); + Rep::set_tree_tag(parent, false); + debug_log("Red nibling 2 - done", path, path.parent()); + break; + } + + // Handle black sibling and niblings, and red parent. + if (Rep::tree_tag(parent)) + { + debug_log("Black sibling and red parent case", path, path.parent()); + Rep::set_tree_tag(parent, false); + Rep::set_tree_tag(sibling, true); + debug_log( + "Black sibling and red parent case - done", path, path.parent()); + break; + } + // Handle black sibling and niblings and black parent. + debug_log( + "Black sibling, niblings and black parent case", + path, + path.parent()); + Rep::set_tree_tag(sibling, true); + path.pop(); + invariant(path.curr()); + debug_log( + "Black sibling, niblings and black parent case - done", + path, + path.curr()); + } + return true; + } + }; + struct WeakAVLPolicy { + + }; + } // namespace rankbalancetree + + /** + * Contains a self balancing binary tree. + * + * The template parameter Rep provides the representation of the nodes as a + * collection of functions and types that are requires. See the associated + * test for an example. + * + * run_checks enables invariant checking on the tree. Enabled in Debug. + * TRACE prints all the sets of the rebalancing operations. Only enabled by + * the test when debugging a specific failure. + */ + template< + SNMALLOC_CONCEPT(RBRep) Rep, + bool run_checks = Debug, + bool TRACE = false, + typename Policy = rankbalancetree::RedBlackPolicy> + class RBTree : public Policy + { + using H = typename Rep::Handle; + using K = typename Rep::Contents; + using ChildRef = rankbalancetree::ChildRef; + using RBStep = rankbalancetree::RBStep; + + // Root field of the tree + typename stl::remove_const_t> + root{Rep::root}; + + ChildRef get_root() + { + return {H{&root}}; + } + + void invariant() + { + Policy::invariant(get_root()); + } + + public: + using RBPath = rankbalancetree::RBPath; + + private: + struct DebugLogger + { + RBTree* context; + + void operator()(const char* msg, RBPath& path) + { + this->operator()(msg, path, context->get_root()); + } + + void operator()(const char* msg, RBPath& path, ChildRef base) + { + if constexpr (TRACE) + { + message<100>("------- {}", Rep::name()); + message<1024>(msg); + path.print(); + context->print(base); + } + else + { + UNUSED(msg, path, base); + } + } + }; + + public: + constexpr RBTree() = default; + + void print() + { + print(get_root()); + } + + void print(ChildRef curr, const char* indent = "", size_t depth = 0) + { + if constexpr (TRACE) + { + if (curr.is_null()) + { + message<1024>("{}\\_null", indent); + return; + } + +#ifdef _MSC_VER + auto colour = Rep::tree_tag(curr) ? "R-" : "B-"; + auto reset = ""; +#else + auto colour = Rep::tree_tag(curr) ? "\e[1;31m" : "\e[1;34m"; + auto reset = "\e[0m"; +#endif + + message<1024>( + "{}\\_{}{}{}@{} ({})", + indent, + colour, + Rep::printable((K(curr))), + reset, + curr.printable(), + depth); + if (!(ChildRef::get_dir(true, curr).is_null() && + ChildRef::get_dir(false, curr).is_null())) + { + // As the tree should be balanced, the depth should not exceed 128 if + // there are 2^64 elements in the tree. This is a debug feature, and + // it would be impossible to debug something of this size, so this is + // considerably larger than required. + // If there is a bug that leads to an unbalanced tree, this might be + // insufficient to accurately display the tree, but it will still be + // memory safe as the search code is bounded by the string size. + static constexpr size_t max_depth = 128; + char s_indent[max_depth]; + size_t end = 0; + for (; end < max_depth - 1; end++) + { + if (indent[end] == 0) + break; + s_indent[end] = indent[end]; + } + s_indent[end] = '|'; + s_indent[end + 1] = 0; + print(ChildRef::get_dir(true, curr), s_indent, depth + 1); + s_indent[end] = ' '; + print(ChildRef::get_dir(false, curr), s_indent, depth + 1); + } + } + } + + bool find(RBPath& path, K value) + { + bool dir; + + if (path.curr().is_null()) + return false; + + do + { + if (Rep::equal(path.curr(), value)) + return true; + dir = Rep::compare(path.curr(), value); + } while (path.move_inc_null(dir)); + + return false; + } + + bool is_empty() + { + return get_root().is_null(); + } + + K remove_min() + { + if (is_empty()) + return Rep::null; + + auto path = get_root_path(); + while (path.move(true)) + { + } + + K result = path.curr(); + + remove_path(path); + return result; + } + + bool remove_elem(K value) + { + if (is_empty()) + return false; + + auto path = get_root_path(); + if (!find(path, value)) + return false; + + remove_path(path); + return true; + } + + bool insert_elem(K value) + { + auto path = get_root_path(); + + if (find(path, value)) + return false; + + Policy::insert_path( + path, value, DebugLogger{this}, [this]() { return get_root(); }); + return true; + } + + RBPath get_root_path() + { + return RBPath(H{&root}); + } + + void insert_path(RBPath& path, K value) + { + Policy::insert_path(path, value, DebugLogger{this}, [this]() { return get_root(); }); + } + + bool remove_path(RBPath& path) + { + return Policy::remove_path(path, DebugLogger{this}, [this]() { return get_root(); }); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/redblacktree.h b/src/snmalloc/ds_core/redblacktree.h deleted file mode 100644 index e6ce73c24..000000000 --- a/src/snmalloc/ds_core/redblacktree.h +++ /dev/null @@ -1,795 +0,0 @@ -#pragma once - -#include "snmalloc/stl/array.h" - -#include -#include - -namespace snmalloc -{ -#ifdef __cpp_concepts - /** - * The representation must define two types. `Contents` defines some - * identifier that can be mapped to a node as a value type. `Handle` defines - * a reference to the storage, which can be used to update it. - * - * Conceptually, `Contents` is a node ID and `Handle` is a pointer to a node - * ID. - */ - template - concept RBRepTypes = requires() { - typename Rep::Handle; - typename Rep::Contents; - }; - - /** - * The representation must define operations on the holder and contents - * types. It must be able to 'dereference' a holder with `get`, assign to it - * with `set`, set and query the red/black colour of a node with `set_red` and - * `is_red`. - * - * The `ref` method provides uniform access to the children of a node, - * returning a holder pointing to either the left or right child, depending on - * the direction parameter. - * - * The backend must also provide two constant values. - * `Rep::null` defines a value that, if returned from `get`, indicates a null - * value. `Rep::root` defines a value that, if constructed directly, indicates - * a null value and can therefore be used as the initial raw bit pattern of - * the root node. - */ - template - concept RBRepMethods = - requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) { - { - Rep::get(hp) - } -> ConceptSame; - { - Rep::set(hp, k) - } -> ConceptSame; - { - Rep::is_red(k) - } -> ConceptSame; - { - Rep::set_red(k, b) - } -> ConceptSame; - { - Rep::ref(b, k) - } -> ConceptSame; - { - Rep::null - } -> ConceptSameModRef; - { - typename Rep::Handle{const_cast< - stl::remove_const_t>*>( - &Rep::root)} - } -> ConceptSame; - }; - - template - concept RBRep = // - RBRepTypes // - && RBRepMethods // - && - ConceptSame>; -#endif - - /** - * Contains a self balancing binary tree. - * - * The template parameter Rep provides the representation of the nodes as a - * collection of functions and types that are requires. See the associated - * test for an example. - * - * run_checks enables invariant checking on the tree. Enabled in Debug. - * TRACE prints all the sets of the rebalancing operations. Only enabled by - * the test when debugging a specific failure. - */ - template< - SNMALLOC_CONCEPT(RBRep) Rep, - bool run_checks = Debug, - bool TRACE = false> - class RBTree - { - using H = typename Rep::Handle; - using K = typename Rep::Contents; - - // Container that behaves like a C++ Ref type to enable assignment - // to treat left, right and root uniformly. - class ChildRef - { - H ptr; - - public: - constexpr ChildRef() = default; - - ChildRef(H p) : ptr(p) {} - - ChildRef(const ChildRef& other) = default; - - operator K() - { - return Rep::get(ptr); - } - - ChildRef& operator=(const ChildRef& other) = default; - - ChildRef& operator=(const K t) - { - // Use representations assigment, so we update the correct bits - // color and other things way also be stored in the Handle. - Rep::set(ptr, t); - return *this; - } - - /** - * Comparison operators. Note that these are nominal comparisons: - * they compare the identities of the references rather than the values - * referenced. - * comparison of the values held in these child references. - * @{ - */ - bool operator==(const ChildRef t) const - { - return ptr == t.ptr; - } - - bool operator!=(const ChildRef t) const - { - return ptr != t.ptr; - } - - ///@} - - bool is_null() - { - return Rep::get(ptr) == Rep::null; - } - - /** - * Return the reference in some printable format defined by the - * representation. - */ - auto printable() - { - return Rep::printable(ptr); - } - }; - - // Root field of the tree - typename stl::remove_const_t> - root{Rep::root}; - - static ChildRef get_dir(bool direction, K k) - { - return {Rep::ref(direction, k)}; - } - - ChildRef get_root() - { - return {H{&root}}; - } - - void invariant() - { - invariant(get_root()); - } - - /* - * Verify structural invariants. Returns the black depth of the `curr`ent - * node. - */ - int invariant(K curr, K lower = Rep::null, K upper = Rep::null) - { - if constexpr (!run_checks) - { - UNUSED(curr, lower, upper); - return 0; - } - else - { - if (curr == Rep::null) - return 1; - - if ( - ((lower != Rep::null) && Rep::compare(lower, curr)) || - ((upper != Rep::null) && Rep::compare(curr, upper))) - { - report_fatal_error( - "Invariant failed: {} is out of bounds {}..{}", - Rep::printable(curr), - Rep::printable(lower), - Rep::printable(upper)); - } - - if ( - Rep::is_red(curr) && - (Rep::is_red(get_dir(true, curr)) || - Rep::is_red(get_dir(false, curr)))) - { - report_fatal_error( - "Invariant failed: {} is red and has red child", - Rep::printable(curr)); - } - - int left_inv = invariant(get_dir(true, curr), lower, curr); - int right_inv = invariant(get_dir(false, curr), curr, upper); - - if (left_inv != right_inv) - { - report_fatal_error( - "Invariant failed: {} has different black depths", - Rep::printable(curr)); - } - - if (Rep::is_red(curr)) - return left_inv; - - return left_inv + 1; - } - } - - struct RBStep - { - ChildRef node; - bool dir; - - // Default constructor needed for Array. - constexpr RBStep() = default; - - // Remove copy constructors to avoid accidentally copying and mutating the - // path. - RBStep(const RBStep& other) = delete; - RBStep& operator=(const RBStep& other) = delete; - - /** - * Update the step to point to a new node and direction. - */ - void set(ChildRef r, bool direction) - { - node = r; - dir = direction; - } - - /** - * Update the step to point to a new node and direction. - */ - void set(typename Rep::Handle r, bool direction) - { - set(ChildRef(r), direction); - } - }; - - public: - // Internal representation of a path in the tree. - // Exposed to allow for some composite operations to be defined - // externally. - class RBPath - { - friend class RBTree; - - stl::Array path; - size_t length = 0; - - RBPath(typename Rep::Handle root) - { - path[0].set(root, false); - length = 1; - } - - ChildRef ith(size_t n) - { - SNMALLOC_ASSERT(length >= n); - return path[length - n - 1].node; - } - - bool ith_dir(size_t n) - { - SNMALLOC_ASSERT(length >= n); - return path[length - n - 1].dir; - } - - ChildRef curr() - { - return ith(0); - } - - bool curr_dir() - { - return ith_dir(0); - } - - ChildRef parent() - { - return ith(1); - } - - bool parent_dir() - { - return ith_dir(1); - } - - ChildRef grand_parent() - { - return ith(2); - } - - // Extend path in `direction`. - // If `direction` contains `Rep::null`, do not extend the path. - // Returns false if path is not extended. - bool move(bool direction) - { - auto next = get_dir(direction, curr()); - if (next.is_null()) - return false; - path[length].set(next, direction); - length++; - return true; - } - - // Extend path in `direction`. - // If `direction` contains zero, do not extend the path. - // Returns false if path is extended with null. - bool move_inc_null(bool direction) - { - auto next = get_dir(direction, curr()); - path[length].set(next, direction); - length++; - return !(next.is_null()); - } - - // Remove top element from the path. - void pop() - { - SNMALLOC_ASSERT(length > 0); - length--; - } - - // If a path is changed in place, then some references can be stale. - // This rewalks the updated path, and corrects any internal references. - // `expected` is used to run the update, or if `false` used to check - // that no update is required. - void fixup(bool expected = true) - { - if (!run_checks && !expected) - return; - - // During a splice in remove the path can be invalidated, - // this refreshs the path so that the it refers to the spliced - // nodes fields. - // TODO optimise usage to avoid traversing whole path. - for (size_t i = 1; i < length; i++) - { - auto parent = path[i - 1].node; - auto& curr = path[i].node; - auto dir = path[i].dir; - auto actual = get_dir(dir, parent); - if (actual != curr) - { - if (!expected) - { - snmalloc::error("Performed an unexpected fixup."); - } - curr = actual; - } - } - } - - void print() - { - if constexpr (TRACE) - { - for (size_t i = 0; i < length; i++) - { - message<1024>( - " -> {} @ {} ({})", - Rep::printable(K(path[i].node)), - path[i].node.printable(), - path[i].dir); - } - } - } - }; - - private: - void debug_log(const char* msg, RBPath& path) - { - debug_log(msg, path, get_root()); - } - - void debug_log(const char* msg, RBPath& path, ChildRef base) - { - if constexpr (TRACE) - { - message<100>("------- {}", Rep::name()); - message<1024>(msg); - path.print(); - print(base); - } - else - { - UNUSED(msg, path, base); - } - } - - public: - constexpr RBTree() = default; - - void print() - { - print(get_root()); - } - - void print(ChildRef curr, const char* indent = "", size_t depth = 0) - { - if constexpr (TRACE) - { - if (curr.is_null()) - { - message<1024>("{}\\_null", indent); - return; - } - -#ifdef _MSC_VER - auto colour = Rep::is_red(curr) ? "R-" : "B-"; - auto reset = ""; -#else - auto colour = Rep::is_red(curr) ? "\e[1;31m" : "\e[1;34m"; - auto reset = "\e[0m"; -#endif - - message<1024>( - "{}\\_{}{}{}@{} ({})", - indent, - colour, - Rep::printable((K(curr))), - reset, - curr.printable(), - depth); - if (!(get_dir(true, curr).is_null() && get_dir(false, curr).is_null())) - { - // As the tree should be balanced, the depth should not exceed 128 if - // there are 2^64 elements in the tree. This is a debug feature, and - // it would be impossible to debug something of this size, so this is - // considerably larger than required. - // If there is a bug that leads to an unbalanced tree, this might be - // insufficient to accurately display the tree, but it will still be - // memory safe as the search code is bounded by the string size. - static constexpr size_t max_depth = 128; - char s_indent[max_depth]; - size_t end = 0; - for (; end < max_depth - 1; end++) - { - if (indent[end] == 0) - break; - s_indent[end] = indent[end]; - } - s_indent[end] = '|'; - s_indent[end + 1] = 0; - print(get_dir(true, curr), s_indent, depth + 1); - s_indent[end] = ' '; - print(get_dir(false, curr), s_indent, depth + 1); - } - } - } - - bool find(RBPath& path, K value) - { - bool dir; - - if (path.curr().is_null()) - return false; - - do - { - if (Rep::equal(path.curr(), value)) - return true; - dir = Rep::compare(path.curr(), value); - } while (path.move_inc_null(dir)); - - return false; - } - - bool remove_path(RBPath& path) - { - ChildRef splice = path.curr(); - SNMALLOC_ASSERT(!(splice.is_null())); - - debug_log("Removing", path); - - /* - * Find immediately smaller leaf element (rightmost descendant of left - * child) to serve as the replacement for this node. We may not have a - * left subtree, so this may not move the path at all. - */ - path.move(true); - while (path.move(false)) - {} - - K curr = path.curr(); - - { - // Locally extract right-child-less replacement, replacing it with its - // left child, if any - K child = get_dir(true, path.curr()); - // Unlink target replacing with possible child. - path.curr() = child; - } - - bool leaf_red = Rep::is_red(curr); - - if (path.curr() != splice) - { - // If we had a left child, replace ourselves with the extracted value - // from above - Rep::set_red(curr, Rep::is_red(splice)); - get_dir(true, curr) = K{get_dir(true, splice)}; - get_dir(false, curr) = K{get_dir(false, splice)}; - splice = curr; - path.fixup(); - } - - debug_log("Splice done", path); - - // TODO: Clear node contents? - - // Red leaf removal requires no rebalancing. - if (leaf_red) - return true; - - // Now in the double black case. - // End of path is considered double black, that is, one black element - // shorter than satisfies the invariant. The following algorithm moves up - // the path until it finds a close red element or the root. If we convert - // the tree to one, in which the root is double black, then the algorithm - // is complete, as there is nothing to be out of balance with. Otherwise, - // we are searching for nearby red elements so we can rotate the tree to - // rebalance. The following slides nicely cover the case analysis below - // https://www.cs.purdue.edu/homes/ayg/CS251/slides/chap13c.pdf - while (path.curr() != ChildRef(H{&root})) - { - K parent = path.parent(); - bool cur_dir = path.curr_dir(); - K sibling = get_dir(!cur_dir, parent); - - /* Handle red sibling case. - * This performs a rotation to give a black sibling. - * - * p s(b) - * / \ / \ - * c s(r) --> p(r) m - * / \ / \ - * n m c n - * - * By invariant we know that p, n and m are all initially black. - */ - if (Rep::is_red(sibling)) - { - debug_log("Red sibling", path, path.parent()); - K nibling = get_dir(cur_dir, sibling); - get_dir(!cur_dir, parent) = nibling; - get_dir(cur_dir, sibling) = parent; - Rep::set_red(parent, true); - Rep::set_red(sibling, false); - path.parent() = sibling; - // Manually fix path. Using path.fixup would alter the complexity - // class. - path.pop(); - path.move(cur_dir); - path.move_inc_null(cur_dir); - path.fixup(false); - debug_log("Red sibling - done", path, path.parent()); - continue; - } - - /* Handle red nibling case 1. - *

- * / \ / \ - * c s --> p rn - * / \ / \ - * on rn c on - */ - if (Rep::is_red(get_dir(!cur_dir, sibling))) - { - debug_log("Red nibling 1", path, path.parent()); - K r_nibling = get_dir(!cur_dir, sibling); - K o_nibling = get_dir(cur_dir, sibling); - get_dir(cur_dir, sibling) = parent; - get_dir(!cur_dir, parent) = o_nibling; - path.parent() = sibling; - Rep::set_red(r_nibling, false); - Rep::set_red(sibling, Rep::is_red(parent)); - Rep::set_red(parent, false); - debug_log("Red nibling 1 - done", path, path.parent()); - break; - } - - /* Handle red nibling case 2. - *

- * / \ / \ - * c s --> p s - * / \ / \ / \ - * rn on c rno rns on - * / \ - * rno rns - */ - if (Rep::is_red(get_dir(cur_dir, sibling))) - { - debug_log("Red nibling 2", path, path.parent()); - K r_nibling = get_dir(cur_dir, sibling); - K r_nibling_same = get_dir(cur_dir, r_nibling); - K r_nibling_opp = get_dir(!cur_dir, r_nibling); - get_dir(!cur_dir, parent) = r_nibling_same; - get_dir(cur_dir, sibling) = r_nibling_opp; - get_dir(cur_dir, r_nibling) = parent; - get_dir(!cur_dir, r_nibling) = sibling; - path.parent() = r_nibling; - Rep::set_red(r_nibling, Rep::is_red(parent)); - Rep::set_red(parent, false); - debug_log("Red nibling 2 - done", path, path.parent()); - break; - } - - // Handle black sibling and niblings, and red parent. - if (Rep::is_red(parent)) - { - debug_log("Black sibling and red parent case", path, path.parent()); - Rep::set_red(parent, false); - Rep::set_red(sibling, true); - debug_log( - "Black sibling and red parent case - done", path, path.parent()); - break; - } - // Handle black sibling and niblings and black parent. - debug_log( - "Black sibling, niblings and black parent case", path, path.parent()); - Rep::set_red(sibling, true); - path.pop(); - invariant(path.curr()); - debug_log( - "Black sibling, niblings and black parent case - done", - path, - path.curr()); - } - return true; - } - - // Insert an element at the given path. - void insert_path(RBPath& path, K value) - { - SNMALLOC_ASSERT(path.curr().is_null()); - path.curr() = value; - get_dir(true, path.curr()) = Rep::null; - get_dir(false, path.curr()) = Rep::null; - Rep::set_red(value, true); - - debug_log("Insert ", path); - - // Propogate double red up to rebalance. - // These notes were particularly clear for explaining insert - // https://www.cs.cmu.edu/~fp/courses/15122-f10/lectures/17-rbtrees.pdf - while (path.curr() != get_root()) - { - SNMALLOC_ASSERT(Rep::is_red(path.curr())); - if (!Rep::is_red(path.parent())) - { - invariant(); - return; - } - bool curr_dir = path.curr_dir(); - K curr = path.curr(); - K parent = path.parent(); - K grand_parent = path.grand_parent(); - SNMALLOC_ASSERT(!Rep::is_red(grand_parent)); - if (path.parent_dir() == curr_dir) - { - debug_log("Insert - double red case 1", path, path.grand_parent()); - /* Same direction case - * G - grand parent - * P - parent - * C - current - * S - sibling - * - * G P - * / \ / \ - * A P --> G C - * / \ / \ - * S C A S - */ - K sibling = get_dir(!curr_dir, parent); - Rep::set_red(curr, false); - get_dir(curr_dir, grand_parent) = sibling; - get_dir(!curr_dir, parent) = grand_parent; - path.grand_parent() = parent; - debug_log( - "Insert - double red case 1 - done", path, path.grand_parent()); - } - else - { - debug_log("Insert - double red case 2", path, path.grand_parent()); - /* G - grand parent - * P - parent - * C - current - * Cg - Current child for grand parent - * Cp - Current child for parent - * - * G C - * / \ / \ - * A P G P - * / \ --> / \ / \ - * C B A Cg Cp B - * / \ - * Cg Cp - */ - K child_g = get_dir(curr_dir, curr); - K child_p = get_dir(!curr_dir, curr); - - Rep::set_red(parent, false); - path.grand_parent() = curr; - get_dir(curr_dir, curr) = grand_parent; - get_dir(!curr_dir, curr) = parent; - get_dir(curr_dir, parent) = child_p; - get_dir(!curr_dir, grand_parent) = child_g; - debug_log( - "Insert - double red case 2 - done", path, path.grand_parent()); - } - - // Move to what replaced grand parent. - path.pop(); - path.pop(); - invariant(path.curr()); - } - Rep::set_red(get_root(), false); - invariant(); - } - - bool is_empty() - { - return get_root().is_null(); - } - - K remove_min() - { - if (is_empty()) - return Rep::null; - - auto path = get_root_path(); - while (path.move(true)) - {} - - K result = path.curr(); - - remove_path(path); - return result; - } - - bool remove_elem(K value) - { - if (is_empty()) - return false; - - auto path = get_root_path(); - if (!find(path, value)) - return false; - - remove_path(path); - return true; - } - - bool insert_elem(K value) - { - auto path = get_root_path(); - - if (find(path, value)) - return false; - - insert_path(path, value); - return true; - } - - RBPath get_root_path() - { - return RBPath(H{&root}); - } - }; -} // namespace snmalloc diff --git a/src/test/func/redblack/redblack.cc b/src/test/func/redblack/redblack.cc index 164a5978f..30bbe868e 100644 --- a/src/test/func/redblack/redblack.cc +++ b/src/test/func/redblack/redblack.cc @@ -98,14 +98,14 @@ class Rep return {&array[k].right}; } - static bool is_red(key k) + static bool tree_tag(key k) { return (array[k].left & 1) == 1; } - static void set_red(key k, bool new_is_red) + static void set_tree_tag(key k, bool new_tree_tag) { - if (new_is_red != is_red(k)) + if (new_tree_tag != tree_tag(k)) array[k].left ^= 1; } From 3019597a0d725b465b5ed3a27f077f415eb7d252 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 26 Feb 2026 21:02:03 -0500 Subject: [PATCH 2/5] add weak avl policy --- CMakeLists.txt | 9 + src/snmalloc/ds_core/rankbalancetree copy.h | 854 ++++++++++++++++++++ src/snmalloc/ds_core/rankbalancetree.h | 428 +++++++++- 3 files changed, 1286 insertions(+), 5 deletions(-) create mode 100644 src/snmalloc/ds_core/rankbalancetree copy.h diff --git a/CMakeLists.txt b/CMakeLists.txt index e017d1e7a..005f4ea4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,14 @@ option(SNMALLOC_ENABLE_WAIT_ON_ADDRESS "Use wait on address backoff strategy if option(SNMALLOC_PTHREAD_FORK_PROTECTION "Guard against forking while allocator locks are held using pthread_atfork hooks" OFF) option(SNMALLOC_ENABLE_FUZZING "Enable fuzzing instrumentation tests" OFF) option(SNMALLOC_USE_SELF_VENDORED_STL "Avoid using system STL" OFF) +set( + SNMALLOC_DEFAULT_RBTREE_POLICY + "WeakAVLPolicy" + CACHE STRING + "Default RBTree policy class in snmalloc::rankbalancetree (WeakAVLPolicy or RedBlackPolicy)") +set_property( + CACHE SNMALLOC_DEFAULT_RBTREE_POLICY + PROPERTY STRINGS WeakAVLPolicy RedBlackPolicy) # Options that apply only if we're not building the header-only library cmake_dependent_option(SNMALLOC_RUST_SUPPORT "Build static library for rust" OFF "NOT SNMALLOC_HEADER_ONLY_LIBRARY" OFF) cmake_dependent_option(SNMALLOC_RUST_LIBC_API "Include libc API in the rust library" OFF "SNMALLOC_RUST_SUPPORT" OFF) @@ -352,6 +360,7 @@ add_as_define_value(SNMALLOC_MIN_ALLOC_SIZE) add_as_define_value(SNMALLOC_MIN_ALLOC_STEP_SIZE) add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_ASSOC) add_as_define_value(SNMALLOC_DEALLOC_BATCH_RING_SET_BITS) +add_as_define_value(SNMALLOC_DEFAULT_RBTREE_POLICY) add_as_define_value(SNMALLOC_PAGESIZE) diff --git a/src/snmalloc/ds_core/rankbalancetree copy.h b/src/snmalloc/ds_core/rankbalancetree copy.h new file mode 100644 index 000000000..e6a2a02d1 --- /dev/null +++ b/src/snmalloc/ds_core/rankbalancetree copy.h @@ -0,0 +1,854 @@ +#pragma once + +#include "snmalloc/ds_core/concept.h" +#include "snmalloc/ds_core/defines.h" +#include "snmalloc/stl/array.h" + +#include +#include + +// This file was designed for red-black trees but later migrated to support +// rank-balanced trees. We abuse the "RB" acronym to mean "rank-balanced". + +namespace snmalloc +{ +#ifdef __cpp_concepts + /** + * The representation must define two types. `Contents` defines some + * identifier that can be mapped to a node as a value type. `Handle` defines + * a reference to the storage, which can be used to update it. + * + * Conceptually, `Contents` is a node ID and `Handle` is a pointer to a node + * ID. + */ + template + concept RBRepTypes = requires() { + typename Rep::Handle; + typename Rep::Contents; + }; + + /** + * The representation must define operations on the holder and contents + * types. It must be able to 'dereference' a holder with `get`, assign to it + * with `set`, set and query the red/black colour of a node with + * `set_tree_tag` and `tree_tag`. + * + * The `ref` method provides uniform access to the children of a node, + * returning a holder pointing to either the left or right child, depending on + * the direction parameter. + * + * The backend must also provide two constant values. + * `Rep::null` defines a value that, if returned from `get`, indicates a null + * value. `Rep::root` defines a value that, if constructed directly, indicates + * a null value and can therefore be used as the initial raw bit pattern of + * the root node. + */ + template + concept RBRepMethods = + requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) { + { Rep::get(hp) } -> ConceptSame; + { Rep::set(hp, k) } -> ConceptSame; + { Rep::tree_tag(k) } -> ConceptSame; + { Rep::set_tree_tag(k, b) } -> ConceptSame; + { Rep::ref(b, k) } -> ConceptSame; + { Rep::null } -> ConceptSameModRef; + { + typename Rep::Handle{const_cast< + stl::remove_const_t>*>( + &Rep::root)} + } -> ConceptSame; + }; + + template + concept RBRep = // + RBRepTypes // + && RBRepMethods // + && + ConceptSame>; +#endif + + namespace rankbalancetree + { + // Container that behaves like a C++ Ref type to enable assignment + // to treat left, right and root uniformly. + template + class ChildRef + { + using H = typename Rep::Handle; + using K = typename Rep::Contents; + + H ptr; + + public: + constexpr ChildRef() = default; + + ChildRef(H p) : ptr(p) {} + + ChildRef(const ChildRef& other) = default; + + operator K() + { + return Rep::get(ptr); + } + + ChildRef& operator=(const ChildRef& other) = default; + + ChildRef& operator=(const K t) + { + // Use representations assigment, so we update the correct bits + // color and other things way also be stored in the Handle. + Rep::set(ptr, t); + return *this; + } + + /** + * Comparison operators. Note that these are nominal comparisons: + * they compare the identities of the references rather than the values + * referenced. + * comparison of the values held in these child references. + * @{ + */ + bool operator==(const ChildRef t) const + { + return ptr == t.ptr; + } + + bool operator!=(const ChildRef t) const + { + return ptr != t.ptr; + } + + ///@} + + bool is_null() + { + return Rep::get(ptr) == Rep::null; + } + + /** + * Return the reference in some printable format defined by the + * representation. + */ + auto printable() + { + return Rep::printable(ptr); + } + + static ChildRef get_dir(bool direction, K k) + { + return {Rep::ref(direction, k)}; + } + }; + + template + struct RBStep + { + ChildRef node; + bool dir; + + // Default constructor needed for Array. + constexpr RBStep() = default; + + // Remove copy constructors to avoid accidentally copying and mutating the + // path. + RBStep(const RBStep& other) = delete; + RBStep& operator=(const RBStep& other) = delete; + + /** + * Update the step to point to a new node and direction. + */ + void set(ChildRef r, bool direction) + { + node = r; + dir = direction; + } + + /** + * Update the step to point to a new node and direction. + */ + void set(typename Rep::Handle r, bool direction) + { + set(ChildRef(r), direction); + } + }; + + // Internal representation of a path in the tree. + // Exposed to allow for some composite operations to be defined + // externally. + template + struct RBPath + { + using ChildRef = rankbalancetree::ChildRef; + using RBStep = rankbalancetree::RBStep; + + stl::Array path; + size_t length = 0; + + RBPath(typename Rep::Handle root) + { + path[0].set(root, false); + length = 1; + } + + ChildRef ith(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].node; + } + + bool ith_dir(size_t n) + { + SNMALLOC_ASSERT(length >= n); + return path[length - n - 1].dir; + } + + ChildRef curr() + { + return ith(0); + } + + bool curr_dir() + { + return ith_dir(0); + } + + ChildRef parent() + { + return ith(1); + } + + bool parent_dir() + { + return ith_dir(1); + } + + ChildRef grand_parent() + { + return ith(2); + } + + // Extend path in `direction`. + // If `direction` contains `Rep::null`, do not extend the path. + // Returns false if path is not extended. + bool move(bool direction) + { + auto next = ChildRef::get_dir(direction, curr()); + if (next.is_null()) + return false; + path[length].set(next, direction); + length++; + return true; + } + + // Extend path in `direction`. + // If `direction` contains zero, do not extend the path. + // Returns false if path is extended with null. + bool move_inc_null(bool direction) + { + auto next = ChildRef::get_dir(direction, curr()); + path[length].set(next, direction); + length++; + return !(next.is_null()); + } + + // Remove top element from the path. + void pop() + { + SNMALLOC_ASSERT(length > 0); + length--; + } + + // If a path is changed in place, then some references can be stale. + // This rewalks the updated path, and corrects any internal references. + // `expected` is used to run the update, or if `false` used to check + // that no update is required. + void fixup(bool expected = true) + { + if (!run_checks && !expected) + return; + + // During a splice in remove the path can be invalidated, + // this refreshs the path so that the it refers to the spliced + // nodes fields. + // TODO optimise usage to avoid traversing whole path. + for (size_t i = 1; i < length; i++) + { + auto parent = path[i - 1].node; + auto& curr = path[i].node; + auto dir = path[i].dir; + auto actual = ChildRef::get_dir(dir, parent); + if (actual != curr) + { + if (!expected) + { + snmalloc::error("Performed an unexpected fixup."); + } + curr = actual; + } + } + } + + void print() + { + if constexpr (TRACE) + { + for (size_t i = 0; i < length; i++) + { + message<1024>( + " -> {} @ {} ({})", + Rep::printable(typename Rep::Contents(path[i].node)), + path[i].node.printable(), + path[i].dir); + } + } + } + }; + } // namespace rankbalancetree + + namespace rankbalancetree + { + template + struct RedBlackPolicy + { + using K = typename Rep::Contents; + using H = typename Rep::Handle; + using ChildRef = rankbalancetree::ChildRef; + using RBPath = rankbalancetree::RBPath; + + /* + * Verify structural invariants. Returns the black depth of the `curr`ent + * node. + */ + int invariant(K curr, K lower = Rep::null, K upper = Rep::null) + { + if constexpr (!run_checks) + { + UNUSED(curr, lower, upper); + return 0; + } + else + { + if (curr == Rep::null) + return 1; + + if ( + ((lower != Rep::null) && Rep::compare(lower, curr)) || + ((upper != Rep::null) && Rep::compare(curr, upper))) + { + report_fatal_error( + "Invariant failed: {} is out of bounds {}..{}", + Rep::printable(curr), + Rep::printable(lower), + Rep::printable(upper)); + } + + if ( + Rep::tree_tag(curr) && + (Rep::tree_tag(ChildRef::get_dir(true, curr)) || + Rep::tree_tag(ChildRef::get_dir(false, curr)))) + { + report_fatal_error( + "Invariant failed: {} is red and has red child", + Rep::printable(curr)); + } + + int left_inv = invariant(ChildRef::get_dir(true, curr), lower, curr); + int right_inv = + invariant(ChildRef::get_dir(false, curr), curr, upper); + + if (left_inv != right_inv) + { + report_fatal_error( + "Invariant failed: {} has different black depths", + Rep::printable(curr)); + } + + if (Rep::tree_tag(curr)) + return left_inv; + + return left_inv + 1; + } + } + + // Insert an element at the given path. + template + void insert_path( + RBPath& path, K value, DebugLogger debug_log, RootGetter get_root) + { + SNMALLOC_ASSERT(path.curr().is_null()); + path.curr() = value; + ChildRef::get_dir(true, path.curr()) = Rep::null; + ChildRef::get_dir(false, path.curr()) = Rep::null; + Rep::set_tree_tag(value, true); + + debug_log("Insert ", path); + + // Propogate double red up to rebalance. + // These notes were particularly clear for explaining insert + // https://www.cs.cmu.edu/~fp/courses/15122-f10/lectures/17-rbtrees.pdf + while (path.curr() != get_root()) + { + SNMALLOC_ASSERT(Rep::tree_tag(path.curr())); + if (!Rep::tree_tag(path.parent())) + { + invariant(get_root()); + return; + } + bool curr_dir = path.curr_dir(); + K curr = path.curr(); + K parent = path.parent(); + K grand_parent = path.grand_parent(); + SNMALLOC_ASSERT(!Rep::tree_tag(grand_parent)); + if (path.parent_dir() == curr_dir) + { + debug_log("Insert - double red case 1", path, path.grand_parent()); + /* Same direction case + * G - grand parent + * P - parent + * C - current + * S - sibling + * + * G P + * / \ / \ + * A P --> G C + * / \ / \ + * S C A S + */ + K sibling = ChildRef::get_dir(!curr_dir, parent); + Rep::set_tree_tag(curr, false); + ChildRef::get_dir(curr_dir, grand_parent) = sibling; + ChildRef::get_dir(!curr_dir, parent) = grand_parent; + path.grand_parent() = parent; + debug_log( + "Insert - double red case 1 - done", path, path.grand_parent()); + } + else + { + debug_log("Insert - double red case 2", path, path.grand_parent()); + /* G - grand parent + * P - parent + * C - current + * Cg - Current child for grand parent + * Cp - Current child for parent + * + * G C + * / \ / \ + * A P G P + * / \ --> / \ / \ + * C B A Cg Cp B + * / \ + * Cg Cp + */ + K child_g = ChildRef::get_dir(curr_dir, curr); + K child_p = ChildRef::get_dir(!curr_dir, curr); + + Rep::set_tree_tag(parent, false); + path.grand_parent() = curr; + ChildRef::get_dir(curr_dir, curr) = grand_parent; + ChildRef::get_dir(!curr_dir, curr) = parent; + ChildRef::get_dir(curr_dir, parent) = child_p; + ChildRef::get_dir(!curr_dir, grand_parent) = child_g; + debug_log( + "Insert - double red case 2 - done", path, path.grand_parent()); + } + + // Move to what replaced grand parent. + path.pop(); + path.pop(); + invariant(path.curr()); + } + Rep::set_tree_tag(get_root(), false); + invariant(get_root()); + } + + template + bool remove_path(RBPath& path, DebugLogger debug_log, RootGetter get_root) + { + ChildRef splice = path.curr(); + SNMALLOC_ASSERT(!(splice.is_null())); + + debug_log("Removing", path); + + /* + * Find immediately smaller leaf element (rightmost descendant of left + * child) to serve as the replacement for this node. We may not have a + * left subtree, so this may not move the path at all. + */ + path.move(true); + while (path.move(false)) + { + } + + K curr = path.curr(); + + { + // Locally extract right-child-less replacement, replacing it with its + // left child, if any + K child = ChildRef::get_dir(true, path.curr()); + // Unlink target replacing with possible child. + path.curr() = child; + } + + bool leaf_red = Rep::tree_tag(curr); + + if (path.curr() != splice) + { + // If we had a left child, replace ourselves with the extracted value + // from above + Rep::set_tree_tag(curr, Rep::tree_tag(splice)); + ChildRef::get_dir(true, curr) = K{ChildRef::get_dir(true, splice)}; + ChildRef::get_dir(false, curr) = K{ChildRef::get_dir(false, splice)}; + splice = curr; + path.fixup(); + } + + debug_log("Splice done", path); + + // TODO: Clear node contents? + + // Red leaf removal requires no rebalancing. + if (leaf_red) + return true; + + // Now in the double black case. + // End of path is considered double black, that is, one black element + // shorter than satisfies the invariant. The following algorithm moves + // up the path until it finds a close red element or the root. If we + // convert the tree to one, in which the root is double black, then the + // algorithm is complete, as there is nothing to be out of balance with. + // Otherwise, we are searching for nearby red elements so we can rotate + // the tree to rebalance. The following slides nicely cover the case + // analysis below + // https://www.cs.purdue.edu/homes/ayg/CS251/slides/chap13c.pdf + while (path.curr() != get_root()) + { + K parent = path.parent(); + bool cur_dir = path.curr_dir(); + K sibling = ChildRef::get_dir(!cur_dir, parent); + + /* Handle red sibling case. + * This performs a rotation to give a black sibling. + * + * p s(b) + * / \ / \ + * c s(r) --> p(r) m + * / \ / \ + * n m c n + * + * By invariant we know that p, n and m are all initially black. + */ + if (Rep::tree_tag(sibling)) + { + debug_log("Red sibling", path, path.parent()); + K nibling = ChildRef::get_dir(cur_dir, sibling); + ChildRef::get_dir(!cur_dir, parent) = nibling; + ChildRef::get_dir(cur_dir, sibling) = parent; + Rep::set_tree_tag(parent, true); + Rep::set_tree_tag(sibling, false); + path.parent() = sibling; + // Manually fix path. Using path.fixup would alter the complexity + // class. + path.pop(); + path.move(cur_dir); + path.move_inc_null(cur_dir); + path.fixup(false); + debug_log("Red sibling - done", path, path.parent()); + continue; + } + + /* Handle red nibling case 1. + *

+ * / \ / \ + * c s --> p rn + * / \ / \ + * on rn c on + */ + if (Rep::tree_tag(ChildRef::get_dir(!cur_dir, sibling))) + { + debug_log("Red nibling 1", path, path.parent()); + K r_nibling = ChildRef::get_dir(!cur_dir, sibling); + K o_nibling = ChildRef::get_dir(cur_dir, sibling); + ChildRef::get_dir(cur_dir, sibling) = parent; + ChildRef::get_dir(!cur_dir, parent) = o_nibling; + path.parent() = sibling; + Rep::set_tree_tag(r_nibling, false); + Rep::set_tree_tag(sibling, Rep::tree_tag(parent)); + Rep::set_tree_tag(parent, false); + debug_log("Red nibling 1 - done", path, path.parent()); + break; + } + + /* Handle red nibling case 2. + *

+ * / \ / \ + * c s --> p s + * / \ / \ / \ + * rn on c rno rns on + * / \ + * rno rns + */ + if (Rep::tree_tag(ChildRef::get_dir(cur_dir, sibling))) + { + debug_log("Red nibling 2", path, path.parent()); + K r_nibling = ChildRef::get_dir(cur_dir, sibling); + K r_nibling_same = ChildRef::get_dir(cur_dir, r_nibling); + K r_nibling_opp = ChildRef::get_dir(!cur_dir, r_nibling); + ChildRef::get_dir(!cur_dir, parent) = r_nibling_same; + ChildRef::get_dir(cur_dir, sibling) = r_nibling_opp; + ChildRef::get_dir(cur_dir, r_nibling) = parent; + ChildRef::get_dir(!cur_dir, r_nibling) = sibling; + path.parent() = r_nibling; + Rep::set_tree_tag(r_nibling, Rep::tree_tag(parent)); + Rep::set_tree_tag(parent, false); + debug_log("Red nibling 2 - done", path, path.parent()); + break; + } + + // Handle black sibling and niblings, and red parent. + if (Rep::tree_tag(parent)) + { + debug_log("Black sibling and red parent case", path, path.parent()); + Rep::set_tree_tag(parent, false); + Rep::set_tree_tag(sibling, true); + debug_log( + "Black sibling and red parent case - done", path, path.parent()); + break; + } + // Handle black sibling and niblings and black parent. + debug_log( + "Black sibling, niblings and black parent case", + path, + path.parent()); + Rep::set_tree_tag(sibling, true); + path.pop(); + invariant(path.curr()); + debug_log( + "Black sibling, niblings and black parent case - done", + path, + path.curr()); + } + return true; + } + }; + + template + struct WeakAVLPolicy + { + using K = typename Rep::Contents; + using ChildRef = rankbalancetree::ChildRef; + using RBPath = rankbalancetree::RBPath; + + int invariant(K, K, K) {} + + }; + } // namespace rankbalancetree + + /** + * Contains a self balancing binary tree. + * + * The template parameter Rep provides the representation of the nodes as a + * collection of functions and types that are requires. See the associated + * test for an example. + * + * run_checks enables invariant checking on the tree. Enabled in Debug. + * TRACE prints all the sets of the rebalancing operations. Only enabled by + * the test when debugging a specific failure. + */ + template< + SNMALLOC_CONCEPT(RBRep) Rep, + bool run_checks = Debug, + bool TRACE = false, + typename Policy = rankbalancetree::RedBlackPolicy> + class RBTree : public Policy + { + using H = typename Rep::Handle; + using K = typename Rep::Contents; + using ChildRef = rankbalancetree::ChildRef; + using RBStep = rankbalancetree::RBStep; + + // Root field of the tree + typename stl::remove_const_t> + root{Rep::root}; + + ChildRef get_root() + { + return {H{&root}}; + } + + void invariant() + { + Policy::invariant(get_root()); + } + + public: + using RBPath = rankbalancetree::RBPath; + + private: + struct DebugLogger + { + RBTree* context; + + void operator()(const char* msg, RBPath& path) + { + this->operator()(msg, path, context->get_root()); + } + + void operator()(const char* msg, RBPath& path, ChildRef base) + { + if constexpr (TRACE) + { + message<100>("------- {}", Rep::name()); + message<1024>(msg); + path.print(); + context->print(base); + } + else + { + UNUSED(msg, path, base); + } + } + }; + + public: + constexpr RBTree() = default; + + void print() + { + print(get_root()); + } + + void print(ChildRef curr, const char* indent = "", size_t depth = 0) + { + if constexpr (TRACE) + { + if (curr.is_null()) + { + message<1024>("{}\\_null", indent); + return; + } + +#ifdef _MSC_VER + auto colour = Rep::tree_tag(curr) ? "R-" : "B-"; + auto reset = ""; +#else + auto colour = Rep::tree_tag(curr) ? "\e[1;31m" : "\e[1;34m"; + auto reset = "\e[0m"; +#endif + + message<1024>( + "{}\\_{}{}{}@{} ({})", + indent, + colour, + Rep::printable((K(curr))), + reset, + curr.printable(), + depth); + if (!(ChildRef::get_dir(true, curr).is_null() && + ChildRef::get_dir(false, curr).is_null())) + { + // As the tree should be balanced, the depth should not exceed 128 if + // there are 2^64 elements in the tree. This is a debug feature, and + // it would be impossible to debug something of this size, so this is + // considerably larger than required. + // If there is a bug that leads to an unbalanced tree, this might be + // insufficient to accurately display the tree, but it will still be + // memory safe as the search code is bounded by the string size. + static constexpr size_t max_depth = 128; + char s_indent[max_depth]; + size_t end = 0; + for (; end < max_depth - 1; end++) + { + if (indent[end] == 0) + break; + s_indent[end] = indent[end]; + } + s_indent[end] = '|'; + s_indent[end + 1] = 0; + print(ChildRef::get_dir(true, curr), s_indent, depth + 1); + s_indent[end] = ' '; + print(ChildRef::get_dir(false, curr), s_indent, depth + 1); + } + } + } + + bool find(RBPath& path, K value) + { + bool dir; + + if (path.curr().is_null()) + return false; + + do + { + if (Rep::equal(path.curr(), value)) + return true; + dir = Rep::compare(path.curr(), value); + } while (path.move_inc_null(dir)); + + return false; + } + + bool is_empty() + { + return get_root().is_null(); + } + + K remove_min() + { + if (is_empty()) + return Rep::null; + + auto path = get_root_path(); + while (path.move(true)) + { + } + + K result = path.curr(); + + remove_path(path); + return result; + } + + bool remove_elem(K value) + { + if (is_empty()) + return false; + + auto path = get_root_path(); + if (!find(path, value)) + return false; + + remove_path(path); + return true; + } + + bool insert_elem(K value) + { + auto path = get_root_path(); + + if (find(path, value)) + return false; + + Policy::insert_path( + path, value, DebugLogger{this}, [this]() { return get_root(); }); + return true; + } + + RBPath get_root_path() + { + return RBPath(H{&root}); + } + + void insert_path(RBPath& path, K value) + { + Policy::insert_path( + path, value, DebugLogger{this}, [this]() { return get_root(); }); + } + + bool remove_path(RBPath& path) + { + return Policy::remove_path( + path, DebugLogger{this}, [this]() { return get_root(); }); + } + }; +} // namespace snmalloc diff --git a/src/snmalloc/ds_core/rankbalancetree.h b/src/snmalloc/ds_core/rankbalancetree.h index 5ae4e3a1e..8c739e64c 100644 --- a/src/snmalloc/ds_core/rankbalancetree.h +++ b/src/snmalloc/ds_core/rankbalancetree.h @@ -12,6 +12,10 @@ namespace snmalloc { +#ifndef SNMALLOC_DEFAULT_RBTREE_POLICY +# define SNMALLOC_DEFAULT_RBTREE_POLICY WeakAVLPolicy +#endif + #ifdef __cpp_concepts /** * The representation must define two types. `Contents` defines some @@ -630,8 +634,419 @@ namespace snmalloc return true; } }; - struct WeakAVLPolicy { - + + template + struct WeakAVLPolicy + { + using K = typename Rep::Contents; + using H = typename Rep::Handle; + using ChildRef = rankbalancetree::ChildRef; + using RBPath = rankbalancetree::RBPath; + + // Null nodes have conceptual rank -1 and therefore odd parity. + static constexpr bool null_rank_parity = true; + + static bool rank_parity(K node) + { + if (node == Rep::null) + return null_rank_parity; + return Rep::tree_tag(node); + } + + static void toggle_rank_parity(K node) + { + SNMALLOC_ASSERT(node != Rep::null); + Rep::set_tree_tag(node, !Rep::tree_tag(node)); + } + + // if parent and child have the same parity, parent is 2-level above the + // child + static bool edge_is_even(K parent, bool dir) + { + K child = ChildRef::get_dir(dir, parent); + return rank_parity(parent) == rank_parity(child); + } + + // If parent and child have different parity, parent is 1-level above the + // child + static bool edge_is_odd(K parent, bool dir) + { + return !edge_is_even(parent, dir); + } + + // A node is a leaf if both of its children are null. + // A leaf node always has rank 1. + static bool is_leaf(K node) + { + return ChildRef::get_dir(true, node).is_null() && + ChildRef::get_dir(false, node).is_null(); + } + + // Check if a node have both children 2-levels lower in rank. + // These nodes can only be created during deletion. + static bool is_22(K node) + { + return edge_is_even(node, true) && edge_is_even(node, false); + } + + // Do rotation using the child at the given direction as the pivot. + // This is the normal rotation in binary search tree: the pivot + // transfer an opposite child to its parent then becomes the parent + // of the old parent. + static K rotate_subtree(ChildRef subtree, bool direction) + { + K root = subtree; + K pivot = ChildRef::get_dir(direction, root); + SNMALLOC_ASSERT(pivot != Rep::null); + + K transfer = ChildRef::get_dir(!direction, pivot); + ChildRef::get_dir(direction, root) = transfer; + ChildRef::get_dir(!direction, pivot) = root; + subtree = pivot; + return pivot; + } + + /* + * Verify structural invariants. Returns the rank of `curr`, using null + * nodes at rank -1. + */ + int invariant(K curr, K lower = Rep::null, K upper = Rep::null) + { + if constexpr (!run_checks) + { + UNUSED(curr, lower, upper); + return 0; + } + else + { + if (curr == Rep::null) + return -1; + + if ( + ((lower != Rep::null) && Rep::compare(lower, curr)) || + ((upper != Rep::null) && Rep::compare(curr, upper))) + { + report_fatal_error( + "Invariant failed: {} is out of bounds {}..{}", + Rep::printable(curr), + Rep::printable(lower), + Rep::printable(upper)); + } + + K left = ChildRef::get_dir(true, curr); + K right = ChildRef::get_dir(false, curr); + int left_rank = invariant(left, lower, curr); + int right_rank = invariant(right, curr, upper); + + // The rank computed from either side should be the same. + int left_from_edge = left_rank + (edge_is_odd(curr, true) ? 1 : 2); + int right_from_edge = right_rank + (edge_is_odd(curr, false) ? 1 : 2); + if (left_from_edge != right_from_edge) + { + report_fatal_error( + "Invariant failed: {} computes different ranks from each side", + Rep::printable(curr)); + } + + if (is_leaf(curr) && (left_from_edge != 0)) + { + report_fatal_error( + "Invariant failed: leaf {} has rank {} (expected 0)", + Rep::printable(curr), + left_from_edge); + } + + if ((left_from_edge & 1) != static_cast(Rep::tree_tag(curr))) + { + report_fatal_error( + "Invariant failed: {} parity bit disagrees with computed rank {}", + Rep::printable(curr), + left_from_edge); + } + + return left_from_edge; + } + } + + template + void insert_path( + RBPath& path, K value, DebugLogger debug_log, RootGetter get_root) + { + // Insert an external node (rank 0, even parity). Null children are + // conceptual rank -1. + SNMALLOC_ASSERT(path.curr().is_null()); + path.curr() = value; + // Create a leaf node. + ChildRef::get_dir(true, path.curr()) = Rep::null; + ChildRef::get_dir(false, path.curr()) = Rep::null; + Rep::set_tree_tag(value, false); + + debug_log("Insert", path); + + while (path.curr() != get_root()) + { + K node = path.curr(); + K parent = path.parent(); + bool node_dir = path.curr_dir(); + + // If parent and inserted node have opposite parity, this edge is + // rank-diff 1 and insertion is already valid as parent does not + // need to be promoted. + // (P) (P) + // ╱ ╲ => ╱ ╲ + // * (S) (N) (S) + if (edge_is_odd(parent, node_dir)) + break; + + bool sibling_dir = !node_dir; + if (edge_is_odd(parent, sibling_dir)) + { + /* + * Case 1: parent has two 1-children before insertion and now has + * a 0-child to the inserted node. Promote parent and continue. + * + * (GP) (GP) + * │ x │ x-1 + * │ (P) + * 0 │ 1 ╱ ╲ + * (N) ─── (P) => (N) (S) + * ╲ 1 + * (S) + */ + debug_log("Insert - promote parent", path, path.parent()); + toggle_rank_parity(parent); + path.pop(); + continue; + } + + if (edge_is_even(node, !node_dir)) + { + /* + * Case 2: sibling edge from node is 2-level lower in rank. And node has a 1-node + * along the same direction. Rotate parent once. + * + * (GP) (GP) + * 0 │ x x │ + * (N) ─── (P) => (N) + * 1 ╱ ╲ 2 ╲ 2 ╱ ╲ 1 + * (C1) (C2) ╲ (C1) (P) + * (S) 1 ╱ ╲ 1 + * (C2) (S) + */ + debug_log("Insert - single rotation", path, path.parent()); + rotate_subtree(path.parent(), node_dir); + // RBPath caches handles along the search path; rotation invalidates + // those cached references until we refresh. + path.fixup(); + // Parent is demoted by one rank while node is at the same rank. + toggle_rank_parity(parent); + } + else + { + /* + * Case 3: sibling edge from node is 2-level lower in rank. And node has a 1-node + * along the opposite direction. Do zig-zag rotation. + * + * (GP) (GP) + * 0 │ x │ x + * (N) ─── (P) => (C1) + * 2 ╱ ╲ 1 ╲ 1 ╱ ╲ + * ╱ (C1) ╲ 2 (N) (P) + * (C2) ╱ ╲ ╲ 1 ╱ ╲ ╱ ╲ + * (A) (B) (S) (C2)(A)(B)(S) + * + */ + debug_log("Insert - double rotation", path, path.parent()); + K middle = ChildRef::get_dir(!node_dir, node); + + rotate_subtree(path.curr(), !node_dir); + path.fixup(); + + rotate_subtree(path.parent(), node_dir); + path.fixup(); + + // Middle is promoted, parent and node are demoted. + toggle_rank_parity(middle); + toggle_rank_parity(parent); + toggle_rank_parity(node); + } + + invariant(get_root()); + return; + } + + invariant(get_root()); + } + + template + bool remove_path(RBPath& path, DebugLogger debug_log, RootGetter get_root) + { + ChildRef splice = path.curr(); + SNMALLOC_ASSERT(!(splice.is_null())); + + debug_log("Removing", path); + + // Extract predecessor (rightmost descendant in left subtree), if any. + path.move(true); + while (path.move(false)) + { + } + + K curr = path.curr(); + + { + // Extract predecessor node from its current location. + K child = ChildRef::get_dir(true, path.curr()); + path.curr() = child; + } + + if (path.curr() != splice) + { + // Move extracted predecessor into the splice location. + Rep::set_tree_tag(curr, Rep::tree_tag(splice)); + ChildRef::get_dir(true, curr) = K{ChildRef::get_dir(true, splice)}; + ChildRef::get_dir(false, curr) = K{ChildRef::get_dir(false, splice)}; + splice = curr; + path.fixup(); + } + + debug_log("Splice done", path); + + while (path.curr() != get_root()) + { + K cursor = path.parent(); + bool dir = path.curr_dir(); + bool sibling_dir = !dir; + + /* + * Case 0: deleted-side edge changes 1 -> 2. This can stop immediately + * unless we created a 2-2 leaf, which must be demoted and bubbled up. + * + * (C) (C) + * X ╱ ╲ 1 X ╱ ╲ + * (*) (D) => (*) ╲ 2 + * (D) + */ + if (edge_is_even(cursor, dir)) + { + if (!(is_leaf(cursor) && is_22(cursor))) + { + invariant(get_root()); + return true; + } + + // 2-2 leaf must be demoted and we continue upwards. + toggle_rank_parity(cursor); + path.pop(); + continue; + } + + K sibling = ChildRef::get_dir(sibling_dir, cursor); + SNMALLOC_ASSERT(sibling != Rep::null); + + /* + * Case 1: deleted-side edge is now 3-level lower and sibling edge is 2-level lower. + * Demote cursor and continue upward. + * + * (P) (P) + * │ X │ X+1 + * (C) │ + * 2 ╱ ╲ 3 => (C) + * ╱ ╲ 1 ╱ ╲ 2 + * (*) ╲ (*) ╲ + * (D) (D) + */ + if (edge_is_even(cursor, sibling_dir)) + { + toggle_rank_parity(cursor); + path.pop(); + continue; + } + + /* + * Case 2: sibling is 1-level lower and it is a 2-2 node. Demote sibling and cursor, then + * continue upward. + * + * (P) (P) + * │ X │ X+1 + * (C) │ + * 1 ╱ ╲ 3 => (C) + * (S) ╲ 1 ╱ ╲ 2 + * 2╱ ╲2 ╲ (S) ╲ + * ╱ ╲ (D) 1 ╱ ╲ 1 (D) + * (*) (*) (*) (*) + */ + if (is_22(sibling)) + { + toggle_rank_parity(sibling); + toggle_rank_parity(cursor); + path.pop(); + continue; + } + + /* + * Case 3: sibling cannot be demoted since it has a 1-edge. if sibling has a 1-child on the same + * side. Single rotation at cursor. + * + * (P) (P) + * │ X │ X + * (C) (S) + * 1 ╱ ╲ 3 => 2 ╱ ╲ 1 + * (S) ╲ ╱ (C) + * 1 ╱ ╲ Y ╲ (T) Y ╱ ╲ 2 + * (T) ╲ (D) ╱ ╲ + * (*) (*) (D) + */ + if (edge_is_odd(sibling, sibling_dir)) + { + bool inner_is_2 = edge_is_even(sibling, !sibling_dir); + + rotate_subtree(path.parent(), sibling_dir); + // RBPath caches references to node fields, so refresh after rotate. + path.fixup(); + + // sibling is promoted, cursor demoted. + toggle_rank_parity(sibling); + toggle_rank_parity(cursor); + + // Special leaf case requires one extra demotion of cursor. + if (inner_is_2 && is_leaf(cursor)) + toggle_rank_parity(cursor); + + invariant(get_root()); + return true; + } + + /* + * Case 4: the 1-child is on the opposite side of the sibling. + * + * (P) (P) + * │ X │ X + * (C) (T) + * 1 ╱ ╲ 3 => 2 ╱ ╲ 2 + * (S) ╲ ╱ ╲ + * 2 ╱ ╲ 1 ╲ (S) (C) + * ╱ (T) (D) 1 ╱ ╲ ╱ ╲ 1 + * (*) y ╱ ╲ z (*) (A) (B) (D) + * (A) (B) + */ + auto sibling_ref = ChildRef::get_dir(sibling_dir, cursor); + rotate_subtree(sibling_ref, !sibling_dir); + path.fixup(); + rotate_subtree(path.parent(), sibling_dir); + path.fixup(); + + // Sibling is demoted by one. Cursor and sibling's 1-child's rank changes are two, + // so no further toggle is needed. + toggle_rank_parity(sibling); + + invariant(get_root()); + return true; + } + + invariant(get_root()); + return true; + } }; } // namespace rankbalancetree @@ -650,7 +1065,8 @@ namespace snmalloc SNMALLOC_CONCEPT(RBRep) Rep, bool run_checks = Debug, bool TRACE = false, - typename Policy = rankbalancetree::RedBlackPolicy> + typename Policy = + rankbalancetree::SNMALLOC_DEFAULT_RBTREE_POLICY> class RBTree : public Policy { using H = typename Rep::Handle; @@ -833,12 +1249,14 @@ namespace snmalloc void insert_path(RBPath& path, K value) { - Policy::insert_path(path, value, DebugLogger{this}, [this]() { return get_root(); }); + Policy::insert_path( + path, value, DebugLogger{this}, [this]() { return get_root(); }); } bool remove_path(RBPath& path) { - return Policy::remove_path(path, DebugLogger{this}, [this]() { return get_root(); }); + return Policy::remove_path( + path, DebugLogger{this}, [this]() { return get_root(); }); } }; } // namespace snmalloc From ee6114d0dda4f69be4e299b5fe7b4bf7e948ecce Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 26 Feb 2026 21:02:24 -0500 Subject: [PATCH 3/5] remove extra copy --- src/snmalloc/ds_core/rankbalancetree copy.h | 854 -------------------- 1 file changed, 854 deletions(-) delete mode 100644 src/snmalloc/ds_core/rankbalancetree copy.h diff --git a/src/snmalloc/ds_core/rankbalancetree copy.h b/src/snmalloc/ds_core/rankbalancetree copy.h deleted file mode 100644 index e6a2a02d1..000000000 --- a/src/snmalloc/ds_core/rankbalancetree copy.h +++ /dev/null @@ -1,854 +0,0 @@ -#pragma once - -#include "snmalloc/ds_core/concept.h" -#include "snmalloc/ds_core/defines.h" -#include "snmalloc/stl/array.h" - -#include -#include - -// This file was designed for red-black trees but later migrated to support -// rank-balanced trees. We abuse the "RB" acronym to mean "rank-balanced". - -namespace snmalloc -{ -#ifdef __cpp_concepts - /** - * The representation must define two types. `Contents` defines some - * identifier that can be mapped to a node as a value type. `Handle` defines - * a reference to the storage, which can be used to update it. - * - * Conceptually, `Contents` is a node ID and `Handle` is a pointer to a node - * ID. - */ - template - concept RBRepTypes = requires() { - typename Rep::Handle; - typename Rep::Contents; - }; - - /** - * The representation must define operations on the holder and contents - * types. It must be able to 'dereference' a holder with `get`, assign to it - * with `set`, set and query the red/black colour of a node with - * `set_tree_tag` and `tree_tag`. - * - * The `ref` method provides uniform access to the children of a node, - * returning a holder pointing to either the left or right child, depending on - * the direction parameter. - * - * The backend must also provide two constant values. - * `Rep::null` defines a value that, if returned from `get`, indicates a null - * value. `Rep::root` defines a value that, if constructed directly, indicates - * a null value and can therefore be used as the initial raw bit pattern of - * the root node. - */ - template - concept RBRepMethods = - requires(typename Rep::Handle hp, typename Rep::Contents k, bool b) { - { Rep::get(hp) } -> ConceptSame; - { Rep::set(hp, k) } -> ConceptSame; - { Rep::tree_tag(k) } -> ConceptSame; - { Rep::set_tree_tag(k, b) } -> ConceptSame; - { Rep::ref(b, k) } -> ConceptSame; - { Rep::null } -> ConceptSameModRef; - { - typename Rep::Handle{const_cast< - stl::remove_const_t>*>( - &Rep::root)} - } -> ConceptSame; - }; - - template - concept RBRep = // - RBRepTypes // - && RBRepMethods // - && - ConceptSame>; -#endif - - namespace rankbalancetree - { - // Container that behaves like a C++ Ref type to enable assignment - // to treat left, right and root uniformly. - template - class ChildRef - { - using H = typename Rep::Handle; - using K = typename Rep::Contents; - - H ptr; - - public: - constexpr ChildRef() = default; - - ChildRef(H p) : ptr(p) {} - - ChildRef(const ChildRef& other) = default; - - operator K() - { - return Rep::get(ptr); - } - - ChildRef& operator=(const ChildRef& other) = default; - - ChildRef& operator=(const K t) - { - // Use representations assigment, so we update the correct bits - // color and other things way also be stored in the Handle. - Rep::set(ptr, t); - return *this; - } - - /** - * Comparison operators. Note that these are nominal comparisons: - * they compare the identities of the references rather than the values - * referenced. - * comparison of the values held in these child references. - * @{ - */ - bool operator==(const ChildRef t) const - { - return ptr == t.ptr; - } - - bool operator!=(const ChildRef t) const - { - return ptr != t.ptr; - } - - ///@} - - bool is_null() - { - return Rep::get(ptr) == Rep::null; - } - - /** - * Return the reference in some printable format defined by the - * representation. - */ - auto printable() - { - return Rep::printable(ptr); - } - - static ChildRef get_dir(bool direction, K k) - { - return {Rep::ref(direction, k)}; - } - }; - - template - struct RBStep - { - ChildRef node; - bool dir; - - // Default constructor needed for Array. - constexpr RBStep() = default; - - // Remove copy constructors to avoid accidentally copying and mutating the - // path. - RBStep(const RBStep& other) = delete; - RBStep& operator=(const RBStep& other) = delete; - - /** - * Update the step to point to a new node and direction. - */ - void set(ChildRef r, bool direction) - { - node = r; - dir = direction; - } - - /** - * Update the step to point to a new node and direction. - */ - void set(typename Rep::Handle r, bool direction) - { - set(ChildRef(r), direction); - } - }; - - // Internal representation of a path in the tree. - // Exposed to allow for some composite operations to be defined - // externally. - template - struct RBPath - { - using ChildRef = rankbalancetree::ChildRef; - using RBStep = rankbalancetree::RBStep; - - stl::Array path; - size_t length = 0; - - RBPath(typename Rep::Handle root) - { - path[0].set(root, false); - length = 1; - } - - ChildRef ith(size_t n) - { - SNMALLOC_ASSERT(length >= n); - return path[length - n - 1].node; - } - - bool ith_dir(size_t n) - { - SNMALLOC_ASSERT(length >= n); - return path[length - n - 1].dir; - } - - ChildRef curr() - { - return ith(0); - } - - bool curr_dir() - { - return ith_dir(0); - } - - ChildRef parent() - { - return ith(1); - } - - bool parent_dir() - { - return ith_dir(1); - } - - ChildRef grand_parent() - { - return ith(2); - } - - // Extend path in `direction`. - // If `direction` contains `Rep::null`, do not extend the path. - // Returns false if path is not extended. - bool move(bool direction) - { - auto next = ChildRef::get_dir(direction, curr()); - if (next.is_null()) - return false; - path[length].set(next, direction); - length++; - return true; - } - - // Extend path in `direction`. - // If `direction` contains zero, do not extend the path. - // Returns false if path is extended with null. - bool move_inc_null(bool direction) - { - auto next = ChildRef::get_dir(direction, curr()); - path[length].set(next, direction); - length++; - return !(next.is_null()); - } - - // Remove top element from the path. - void pop() - { - SNMALLOC_ASSERT(length > 0); - length--; - } - - // If a path is changed in place, then some references can be stale. - // This rewalks the updated path, and corrects any internal references. - // `expected` is used to run the update, or if `false` used to check - // that no update is required. - void fixup(bool expected = true) - { - if (!run_checks && !expected) - return; - - // During a splice in remove the path can be invalidated, - // this refreshs the path so that the it refers to the spliced - // nodes fields. - // TODO optimise usage to avoid traversing whole path. - for (size_t i = 1; i < length; i++) - { - auto parent = path[i - 1].node; - auto& curr = path[i].node; - auto dir = path[i].dir; - auto actual = ChildRef::get_dir(dir, parent); - if (actual != curr) - { - if (!expected) - { - snmalloc::error("Performed an unexpected fixup."); - } - curr = actual; - } - } - } - - void print() - { - if constexpr (TRACE) - { - for (size_t i = 0; i < length; i++) - { - message<1024>( - " -> {} @ {} ({})", - Rep::printable(typename Rep::Contents(path[i].node)), - path[i].node.printable(), - path[i].dir); - } - } - } - }; - } // namespace rankbalancetree - - namespace rankbalancetree - { - template - struct RedBlackPolicy - { - using K = typename Rep::Contents; - using H = typename Rep::Handle; - using ChildRef = rankbalancetree::ChildRef; - using RBPath = rankbalancetree::RBPath; - - /* - * Verify structural invariants. Returns the black depth of the `curr`ent - * node. - */ - int invariant(K curr, K lower = Rep::null, K upper = Rep::null) - { - if constexpr (!run_checks) - { - UNUSED(curr, lower, upper); - return 0; - } - else - { - if (curr == Rep::null) - return 1; - - if ( - ((lower != Rep::null) && Rep::compare(lower, curr)) || - ((upper != Rep::null) && Rep::compare(curr, upper))) - { - report_fatal_error( - "Invariant failed: {} is out of bounds {}..{}", - Rep::printable(curr), - Rep::printable(lower), - Rep::printable(upper)); - } - - if ( - Rep::tree_tag(curr) && - (Rep::tree_tag(ChildRef::get_dir(true, curr)) || - Rep::tree_tag(ChildRef::get_dir(false, curr)))) - { - report_fatal_error( - "Invariant failed: {} is red and has red child", - Rep::printable(curr)); - } - - int left_inv = invariant(ChildRef::get_dir(true, curr), lower, curr); - int right_inv = - invariant(ChildRef::get_dir(false, curr), curr, upper); - - if (left_inv != right_inv) - { - report_fatal_error( - "Invariant failed: {} has different black depths", - Rep::printable(curr)); - } - - if (Rep::tree_tag(curr)) - return left_inv; - - return left_inv + 1; - } - } - - // Insert an element at the given path. - template - void insert_path( - RBPath& path, K value, DebugLogger debug_log, RootGetter get_root) - { - SNMALLOC_ASSERT(path.curr().is_null()); - path.curr() = value; - ChildRef::get_dir(true, path.curr()) = Rep::null; - ChildRef::get_dir(false, path.curr()) = Rep::null; - Rep::set_tree_tag(value, true); - - debug_log("Insert ", path); - - // Propogate double red up to rebalance. - // These notes were particularly clear for explaining insert - // https://www.cs.cmu.edu/~fp/courses/15122-f10/lectures/17-rbtrees.pdf - while (path.curr() != get_root()) - { - SNMALLOC_ASSERT(Rep::tree_tag(path.curr())); - if (!Rep::tree_tag(path.parent())) - { - invariant(get_root()); - return; - } - bool curr_dir = path.curr_dir(); - K curr = path.curr(); - K parent = path.parent(); - K grand_parent = path.grand_parent(); - SNMALLOC_ASSERT(!Rep::tree_tag(grand_parent)); - if (path.parent_dir() == curr_dir) - { - debug_log("Insert - double red case 1", path, path.grand_parent()); - /* Same direction case - * G - grand parent - * P - parent - * C - current - * S - sibling - * - * G P - * / \ / \ - * A P --> G C - * / \ / \ - * S C A S - */ - K sibling = ChildRef::get_dir(!curr_dir, parent); - Rep::set_tree_tag(curr, false); - ChildRef::get_dir(curr_dir, grand_parent) = sibling; - ChildRef::get_dir(!curr_dir, parent) = grand_parent; - path.grand_parent() = parent; - debug_log( - "Insert - double red case 1 - done", path, path.grand_parent()); - } - else - { - debug_log("Insert - double red case 2", path, path.grand_parent()); - /* G - grand parent - * P - parent - * C - current - * Cg - Current child for grand parent - * Cp - Current child for parent - * - * G C - * / \ / \ - * A P G P - * / \ --> / \ / \ - * C B A Cg Cp B - * / \ - * Cg Cp - */ - K child_g = ChildRef::get_dir(curr_dir, curr); - K child_p = ChildRef::get_dir(!curr_dir, curr); - - Rep::set_tree_tag(parent, false); - path.grand_parent() = curr; - ChildRef::get_dir(curr_dir, curr) = grand_parent; - ChildRef::get_dir(!curr_dir, curr) = parent; - ChildRef::get_dir(curr_dir, parent) = child_p; - ChildRef::get_dir(!curr_dir, grand_parent) = child_g; - debug_log( - "Insert - double red case 2 - done", path, path.grand_parent()); - } - - // Move to what replaced grand parent. - path.pop(); - path.pop(); - invariant(path.curr()); - } - Rep::set_tree_tag(get_root(), false); - invariant(get_root()); - } - - template - bool remove_path(RBPath& path, DebugLogger debug_log, RootGetter get_root) - { - ChildRef splice = path.curr(); - SNMALLOC_ASSERT(!(splice.is_null())); - - debug_log("Removing", path); - - /* - * Find immediately smaller leaf element (rightmost descendant of left - * child) to serve as the replacement for this node. We may not have a - * left subtree, so this may not move the path at all. - */ - path.move(true); - while (path.move(false)) - { - } - - K curr = path.curr(); - - { - // Locally extract right-child-less replacement, replacing it with its - // left child, if any - K child = ChildRef::get_dir(true, path.curr()); - // Unlink target replacing with possible child. - path.curr() = child; - } - - bool leaf_red = Rep::tree_tag(curr); - - if (path.curr() != splice) - { - // If we had a left child, replace ourselves with the extracted value - // from above - Rep::set_tree_tag(curr, Rep::tree_tag(splice)); - ChildRef::get_dir(true, curr) = K{ChildRef::get_dir(true, splice)}; - ChildRef::get_dir(false, curr) = K{ChildRef::get_dir(false, splice)}; - splice = curr; - path.fixup(); - } - - debug_log("Splice done", path); - - // TODO: Clear node contents? - - // Red leaf removal requires no rebalancing. - if (leaf_red) - return true; - - // Now in the double black case. - // End of path is considered double black, that is, one black element - // shorter than satisfies the invariant. The following algorithm moves - // up the path until it finds a close red element or the root. If we - // convert the tree to one, in which the root is double black, then the - // algorithm is complete, as there is nothing to be out of balance with. - // Otherwise, we are searching for nearby red elements so we can rotate - // the tree to rebalance. The following slides nicely cover the case - // analysis below - // https://www.cs.purdue.edu/homes/ayg/CS251/slides/chap13c.pdf - while (path.curr() != get_root()) - { - K parent = path.parent(); - bool cur_dir = path.curr_dir(); - K sibling = ChildRef::get_dir(!cur_dir, parent); - - /* Handle red sibling case. - * This performs a rotation to give a black sibling. - * - * p s(b) - * / \ / \ - * c s(r) --> p(r) m - * / \ / \ - * n m c n - * - * By invariant we know that p, n and m are all initially black. - */ - if (Rep::tree_tag(sibling)) - { - debug_log("Red sibling", path, path.parent()); - K nibling = ChildRef::get_dir(cur_dir, sibling); - ChildRef::get_dir(!cur_dir, parent) = nibling; - ChildRef::get_dir(cur_dir, sibling) = parent; - Rep::set_tree_tag(parent, true); - Rep::set_tree_tag(sibling, false); - path.parent() = sibling; - // Manually fix path. Using path.fixup would alter the complexity - // class. - path.pop(); - path.move(cur_dir); - path.move_inc_null(cur_dir); - path.fixup(false); - debug_log("Red sibling - done", path, path.parent()); - continue; - } - - /* Handle red nibling case 1. - *

- * / \ / \ - * c s --> p rn - * / \ / \ - * on rn c on - */ - if (Rep::tree_tag(ChildRef::get_dir(!cur_dir, sibling))) - { - debug_log("Red nibling 1", path, path.parent()); - K r_nibling = ChildRef::get_dir(!cur_dir, sibling); - K o_nibling = ChildRef::get_dir(cur_dir, sibling); - ChildRef::get_dir(cur_dir, sibling) = parent; - ChildRef::get_dir(!cur_dir, parent) = o_nibling; - path.parent() = sibling; - Rep::set_tree_tag(r_nibling, false); - Rep::set_tree_tag(sibling, Rep::tree_tag(parent)); - Rep::set_tree_tag(parent, false); - debug_log("Red nibling 1 - done", path, path.parent()); - break; - } - - /* Handle red nibling case 2. - *

- * / \ / \ - * c s --> p s - * / \ / \ / \ - * rn on c rno rns on - * / \ - * rno rns - */ - if (Rep::tree_tag(ChildRef::get_dir(cur_dir, sibling))) - { - debug_log("Red nibling 2", path, path.parent()); - K r_nibling = ChildRef::get_dir(cur_dir, sibling); - K r_nibling_same = ChildRef::get_dir(cur_dir, r_nibling); - K r_nibling_opp = ChildRef::get_dir(!cur_dir, r_nibling); - ChildRef::get_dir(!cur_dir, parent) = r_nibling_same; - ChildRef::get_dir(cur_dir, sibling) = r_nibling_opp; - ChildRef::get_dir(cur_dir, r_nibling) = parent; - ChildRef::get_dir(!cur_dir, r_nibling) = sibling; - path.parent() = r_nibling; - Rep::set_tree_tag(r_nibling, Rep::tree_tag(parent)); - Rep::set_tree_tag(parent, false); - debug_log("Red nibling 2 - done", path, path.parent()); - break; - } - - // Handle black sibling and niblings, and red parent. - if (Rep::tree_tag(parent)) - { - debug_log("Black sibling and red parent case", path, path.parent()); - Rep::set_tree_tag(parent, false); - Rep::set_tree_tag(sibling, true); - debug_log( - "Black sibling and red parent case - done", path, path.parent()); - break; - } - // Handle black sibling and niblings and black parent. - debug_log( - "Black sibling, niblings and black parent case", - path, - path.parent()); - Rep::set_tree_tag(sibling, true); - path.pop(); - invariant(path.curr()); - debug_log( - "Black sibling, niblings and black parent case - done", - path, - path.curr()); - } - return true; - } - }; - - template - struct WeakAVLPolicy - { - using K = typename Rep::Contents; - using ChildRef = rankbalancetree::ChildRef; - using RBPath = rankbalancetree::RBPath; - - int invariant(K, K, K) {} - - }; - } // namespace rankbalancetree - - /** - * Contains a self balancing binary tree. - * - * The template parameter Rep provides the representation of the nodes as a - * collection of functions and types that are requires. See the associated - * test for an example. - * - * run_checks enables invariant checking on the tree. Enabled in Debug. - * TRACE prints all the sets of the rebalancing operations. Only enabled by - * the test when debugging a specific failure. - */ - template< - SNMALLOC_CONCEPT(RBRep) Rep, - bool run_checks = Debug, - bool TRACE = false, - typename Policy = rankbalancetree::RedBlackPolicy> - class RBTree : public Policy - { - using H = typename Rep::Handle; - using K = typename Rep::Contents; - using ChildRef = rankbalancetree::ChildRef; - using RBStep = rankbalancetree::RBStep; - - // Root field of the tree - typename stl::remove_const_t> - root{Rep::root}; - - ChildRef get_root() - { - return {H{&root}}; - } - - void invariant() - { - Policy::invariant(get_root()); - } - - public: - using RBPath = rankbalancetree::RBPath; - - private: - struct DebugLogger - { - RBTree* context; - - void operator()(const char* msg, RBPath& path) - { - this->operator()(msg, path, context->get_root()); - } - - void operator()(const char* msg, RBPath& path, ChildRef base) - { - if constexpr (TRACE) - { - message<100>("------- {}", Rep::name()); - message<1024>(msg); - path.print(); - context->print(base); - } - else - { - UNUSED(msg, path, base); - } - } - }; - - public: - constexpr RBTree() = default; - - void print() - { - print(get_root()); - } - - void print(ChildRef curr, const char* indent = "", size_t depth = 0) - { - if constexpr (TRACE) - { - if (curr.is_null()) - { - message<1024>("{}\\_null", indent); - return; - } - -#ifdef _MSC_VER - auto colour = Rep::tree_tag(curr) ? "R-" : "B-"; - auto reset = ""; -#else - auto colour = Rep::tree_tag(curr) ? "\e[1;31m" : "\e[1;34m"; - auto reset = "\e[0m"; -#endif - - message<1024>( - "{}\\_{}{}{}@{} ({})", - indent, - colour, - Rep::printable((K(curr))), - reset, - curr.printable(), - depth); - if (!(ChildRef::get_dir(true, curr).is_null() && - ChildRef::get_dir(false, curr).is_null())) - { - // As the tree should be balanced, the depth should not exceed 128 if - // there are 2^64 elements in the tree. This is a debug feature, and - // it would be impossible to debug something of this size, so this is - // considerably larger than required. - // If there is a bug that leads to an unbalanced tree, this might be - // insufficient to accurately display the tree, but it will still be - // memory safe as the search code is bounded by the string size. - static constexpr size_t max_depth = 128; - char s_indent[max_depth]; - size_t end = 0; - for (; end < max_depth - 1; end++) - { - if (indent[end] == 0) - break; - s_indent[end] = indent[end]; - } - s_indent[end] = '|'; - s_indent[end + 1] = 0; - print(ChildRef::get_dir(true, curr), s_indent, depth + 1); - s_indent[end] = ' '; - print(ChildRef::get_dir(false, curr), s_indent, depth + 1); - } - } - } - - bool find(RBPath& path, K value) - { - bool dir; - - if (path.curr().is_null()) - return false; - - do - { - if (Rep::equal(path.curr(), value)) - return true; - dir = Rep::compare(path.curr(), value); - } while (path.move_inc_null(dir)); - - return false; - } - - bool is_empty() - { - return get_root().is_null(); - } - - K remove_min() - { - if (is_empty()) - return Rep::null; - - auto path = get_root_path(); - while (path.move(true)) - { - } - - K result = path.curr(); - - remove_path(path); - return result; - } - - bool remove_elem(K value) - { - if (is_empty()) - return false; - - auto path = get_root_path(); - if (!find(path, value)) - return false; - - remove_path(path); - return true; - } - - bool insert_elem(K value) - { - auto path = get_root_path(); - - if (find(path, value)) - return false; - - Policy::insert_path( - path, value, DebugLogger{this}, [this]() { return get_root(); }); - return true; - } - - RBPath get_root_path() - { - return RBPath(H{&root}); - } - - void insert_path(RBPath& path, K value) - { - Policy::insert_path( - path, value, DebugLogger{this}, [this]() { return get_root(); }); - } - - bool remove_path(RBPath& path) - { - return Policy::remove_path( - path, DebugLogger{this}, [this]() { return get_root(); }); - } - }; -} // namespace snmalloc From 4b3f94a91ccfb0a906056065157700a2d948752f Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 26 Feb 2026 21:09:19 -0500 Subject: [PATCH 4/5] adjust the way of policy being specified --- src/snmalloc/ds_core/concept.h | 1 + src/snmalloc/ds_core/rankbalancetree.h | 15 +- src/test/func/redblack/redblack.cc | 3 +- src/test/func/weakavl/weakavl.cc | 237 +++++++++++++++++++++++++ 4 files changed, 248 insertions(+), 8 deletions(-) create mode 100644 src/test/func/weakavl/weakavl.cc diff --git a/src/snmalloc/ds_core/concept.h b/src/snmalloc/ds_core/concept.h index d20698d30..aa98887b4 100644 --- a/src/snmalloc/ds_core/concept.h +++ b/src/snmalloc/ds_core/concept.h @@ -1,6 +1,7 @@ #pragma once #include "snmalloc/stl/type_traits.h" +#include /** * C++20 concepts are referenced as if they were types in declarations within diff --git a/src/snmalloc/ds_core/rankbalancetree.h b/src/snmalloc/ds_core/rankbalancetree.h index 8c739e64c..3a9eaa9d1 100644 --- a/src/snmalloc/ds_core/rankbalancetree.h +++ b/src/snmalloc/ds_core/rankbalancetree.h @@ -1065,14 +1065,15 @@ namespace snmalloc SNMALLOC_CONCEPT(RBRep) Rep, bool run_checks = Debug, bool TRACE = false, - typename Policy = - rankbalancetree::SNMALLOC_DEFAULT_RBTREE_POLICY> - class RBTree : public Policy + template class Policy = + rankbalancetree::SNMALLOC_DEFAULT_RBTREE_POLICY> + class RBTree : public Policy { using H = typename Rep::Handle; using K = typename Rep::Contents; using ChildRef = rankbalancetree::ChildRef; using RBStep = rankbalancetree::RBStep; + using Base = Policy; // Root field of the tree typename stl::remove_const_t> @@ -1085,7 +1086,7 @@ namespace snmalloc void invariant() { - Policy::invariant(get_root()); + Base::invariant(get_root()); } public: @@ -1237,7 +1238,7 @@ namespace snmalloc if (find(path, value)) return false; - Policy::insert_path( + Base::insert_path( path, value, DebugLogger{this}, [this]() { return get_root(); }); return true; } @@ -1249,13 +1250,13 @@ namespace snmalloc void insert_path(RBPath& path, K value) { - Policy::insert_path( + Base::insert_path( path, value, DebugLogger{this}, [this]() { return get_root(); }); } bool remove_path(RBPath& path) { - return Policy::remove_path( + return Base::remove_path( path, DebugLogger{this}, [this]() { return get_root(); }); } }; diff --git a/src/test/func/redblack/redblack.cc b/src/test/func/redblack/redblack.cc index 30bbe868e..734a42967 100644 --- a/src/test/func/redblack/redblack.cc +++ b/src/test/func/redblack/redblack.cc @@ -1,3 +1,4 @@ +#include "snmalloc/ds_core/rankbalancetree.h" #include "test/opt.h" #include "test/setup.h" #include "test/usage.h" @@ -142,7 +143,7 @@ void test(size_t size, unsigned int seed) /// additions and removals from the tree. xoroshiro::p64r32 rand(seed); - snmalloc::RBTree tree; + snmalloc::RBTree tree; std::vector entries; bool first = true; diff --git a/src/test/func/weakavl/weakavl.cc b/src/test/func/weakavl/weakavl.cc new file mode 100644 index 000000000..01914e410 --- /dev/null +++ b/src/test/func/weakavl/weakavl.cc @@ -0,0 +1,237 @@ +#include "test/opt.h" +#include "test/setup.h" +#include "test/usage.h" +#include "test/xoroshiro.h" + +#include +#include +#include + +#ifndef SNMALLOC_TRACING +# define SNMALLOC_TRACING +#endif +// Redblack tree needs some libraries with trace enabled. +#include "snmalloc/snmalloc.h" + +struct NodeRef +{ + // The redblack tree is going to be used inside the pagemap, + // and the redblack tree cannot use all the bits. Applying an offset + // to the stored value ensures that we have some abstraction over + // the representation. + static constexpr size_t offset = 10000; + + size_t* ptr; + + constexpr NodeRef(size_t* p) : ptr(p) {} + + constexpr NodeRef() : ptr(nullptr) {} + + constexpr NodeRef(const NodeRef& other) : ptr(other.ptr) {} + + constexpr NodeRef(NodeRef&& other) : ptr(other.ptr) {} + + bool operator!=(const NodeRef& other) const + { + return ptr != other.ptr; + } + + NodeRef& operator=(const NodeRef& other) + { + ptr = other.ptr; + return *this; + } + + void set(uint16_t val) + { + *ptr = ((size_t(val) + offset) << 1) + (*ptr & 1); + } + + explicit operator uint16_t() + { + return uint16_t((*ptr >> 1) - offset); + } + + explicit operator size_t*() + { + return ptr; + } +}; + +// Simple representation that is like the pagemap. +// Bottom bit of left is used to store the colour. +// We shift the fields up to make room for the colour. +struct node +{ + size_t left; + size_t right; +}; + +inline static node array[2048]; + +class Rep +{ +public: + using key = uint16_t; + + static constexpr key null = 0; + static constexpr size_t root{NodeRef::offset << 1}; + + using Handle = NodeRef; + using Contents = uint16_t; + + static void set(Handle ptr, Contents r) + { + ptr.set(r); + } + + static Contents get(Handle ptr) + { + return static_cast(ptr); + } + + static Handle ref(bool direction, key k) + { + if (direction) + return {&array[k].left}; + else + return {&array[k].right}; + } + + static bool tree_tag(key k) + { + return (array[k].left & 1) == 1; + } + + static void set_tree_tag(key k, bool new_tree_tag) + { + if (new_tree_tag != tree_tag(k)) + array[k].left ^= 1; + } + + static bool compare(key k1, key k2) + { + return k1 > k2; + } + + static bool equal(key k1, key k2) + { + return k1 == k2; + } + + static size_t printable(key k) + { + return k; + } + + static size_t* printable(NodeRef k) + { + return static_cast(k); + } + + static const char* name() + { + return "TestRep"; + } +}; + +template +void test(size_t size, unsigned int seed) +{ + /// Perform a pseudo-random series of + /// additions and removals from the tree. + + xoroshiro::p64r32 rand(seed); + snmalloc::RBTree tree; + std::vector entries; + + bool first = true; + std::cout << "size: " << size << " seed: " << seed << std::endl; + for (size_t i = 0; i < 20 * size; i++) + { + auto batch = 1 + rand.next() % (3 + (size / 2)); + auto op = rand.next() % 4; + if (op < 2 || first) + { + first = false; + for (auto j = batch; j > 0; j--) + { + auto index = 1 + rand.next() % size; + if (tree.insert_elem(Rep::key(index))) + { + entries.push_back(Rep::key(index)); + } + } + } + else if (op == 3) + { + for (auto j = batch; j > 0; j--) + { + if (entries.size() == 0) + continue; + auto index = rand.next() % entries.size(); + auto elem = entries[index]; + if (!tree.remove_elem(elem)) + { + std::cout << "Failed to remove element: " << elem << std::endl; + abort(); + } + entries.erase(entries.begin() + static_cast(index)); + } + } + else + { + for (auto j = batch; j > 0; j--) + { + // print(); + auto min = tree.remove_min(); + auto s = entries.size(); + if (min == 0) + break; + + entries.erase( + std::remove(entries.begin(), entries.end(), min), entries.end()); + if (s != entries.size() + 1) + { + std::cout << "Failed to remove min: " << min << std::endl; + abort(); + } + } + } + if (entries.size() == 0) + { + break; + } + } +} + +int main(int argc, char** argv) +{ + setup(); + + opt::Opt opt(argc, argv); + + auto seed = opt.is("--seed", 0); + auto size = opt.is("--size", 0); + + if (seed == 0 && size == 0) + { + for (size = 1; size <= 300; size = size + 1 + (size >> 3)) + for (seed = 1; seed < 5 + (8 * size); seed++) + { + test(size, seed); + } + + return 0; + } + + if (seed == 0 || size == 0) + { + std::cout << "Set both --seed and --size" << std::endl; + return 1; + } + + // Trace particular example + test(size, seed); + return 0; +} From 7567dfc9fc5fb38ff5156bdd9cc8721d3f1c29bb Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Fri, 27 Feb 2026 09:15:34 -0500 Subject: [PATCH 5/5] remove extra fixup --- src/snmalloc/ds_core/rankbalancetree.h | 48 ++++++++++---------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/src/snmalloc/ds_core/rankbalancetree.h b/src/snmalloc/ds_core/rankbalancetree.h index 3a9eaa9d1..9f2d09d92 100644 --- a/src/snmalloc/ds_core/rankbalancetree.h +++ b/src/snmalloc/ds_core/rankbalancetree.h @@ -822,8 +822,8 @@ namespace snmalloc if (edge_is_even(node, !node_dir)) { /* - * Case 2: sibling edge from node is 2-level lower in rank. And node has a 1-node - * along the same direction. Rotate parent once. + * Case 2: sibling edge from node is 2-level lower in rank. And node + * has a 1-node along the same direction. Rotate parent once. * * (GP) (GP) * 0 │ x x │ @@ -835,36 +835,29 @@ namespace snmalloc */ debug_log("Insert - single rotation", path, path.parent()); rotate_subtree(path.parent(), node_dir); - // RBPath caches handles along the search path; rotation invalidates - // those cached references until we refresh. - path.fixup(); // Parent is demoted by one rank while node is at the same rank. toggle_rank_parity(parent); } else { /* - * Case 3: sibling edge from node is 2-level lower in rank. And node has a 1-node - * along the opposite direction. Do zig-zag rotation. + * Case 3: sibling edge from node is 2-level lower in rank. And node + * has a 1-node along the opposite direction. Do zig-zag rotation. * * (GP) (GP) * 0 │ x │ x * (N) ─── (P) => (C1) - * 2 ╱ ╲ 1 ╲ 1 ╱ ╲ + * 2 ╱ ╲ 1 ╲ 1 ╱ ╲ * ╱ (C1) ╲ 2 (N) (P) * (C2) ╱ ╲ ╲ 1 ╱ ╲ ╱ ╲ * (A) (B) (S) (C2)(A)(B)(S) - * + * */ debug_log("Insert - double rotation", path, path.parent()); K middle = ChildRef::get_dir(!node_dir, node); rotate_subtree(path.curr(), !node_dir); - path.fixup(); - rotate_subtree(path.parent(), node_dir); - path.fixup(); - // Middle is promoted, parent and node are demoted. toggle_rank_parity(middle); toggle_rank_parity(parent); @@ -945,8 +938,8 @@ namespace snmalloc SNMALLOC_ASSERT(sibling != Rep::null); /* - * Case 1: deleted-side edge is now 3-level lower and sibling edge is 2-level lower. - * Demote cursor and continue upward. + * Case 1: deleted-side edge is now 3-level lower and sibling edge is + * 2-level lower. Demote cursor and continue upward. * * (P) (P) * │ X │ X+1 @@ -964,8 +957,8 @@ namespace snmalloc } /* - * Case 2: sibling is 1-level lower and it is a 2-2 node. Demote sibling and cursor, then - * continue upward. + * Case 2: sibling is 1-level lower and it is a 2-2 node. Demote + * sibling and cursor, then continue upward. * * (P) (P) * │ X │ X+1 @@ -974,7 +967,7 @@ namespace snmalloc * (S) ╲ 1 ╱ ╲ 2 * 2╱ ╲2 ╲ (S) ╲ * ╱ ╲ (D) 1 ╱ ╲ 1 (D) - * (*) (*) (*) (*) + * (*) (*) (*) (*) */ if (is_22(sibling)) { @@ -985,8 +978,8 @@ namespace snmalloc } /* - * Case 3: sibling cannot be demoted since it has a 1-edge. if sibling has a 1-child on the same - * side. Single rotation at cursor. + * Case 3: sibling cannot be demoted since it has a 1-edge. if sibling + * has a 1-child on the same side. Single rotation at cursor. * * (P) (P) * │ X │ X @@ -1002,9 +995,6 @@ namespace snmalloc bool inner_is_2 = edge_is_even(sibling, !sibling_dir); rotate_subtree(path.parent(), sibling_dir); - // RBPath caches references to node fields, so refresh after rotate. - path.fixup(); - // sibling is promoted, cursor demoted. toggle_rank_parity(sibling); toggle_rank_parity(cursor); @@ -1024,20 +1014,18 @@ namespace snmalloc * │ X │ X * (C) (T) * 1 ╱ ╲ 3 => 2 ╱ ╲ 2 - * (S) ╲ ╱ ╲ + * (S) ╲ ╱ ╲ * 2 ╱ ╲ 1 ╲ (S) (C) - * ╱ (T) (D) 1 ╱ ╲ ╱ ╲ 1 + * ╱ (T) (D) 1 ╱ ╲ ╱ ╲ 1 * (*) y ╱ ╲ z (*) (A) (B) (D) - * (A) (B) + * (A) (B) */ auto sibling_ref = ChildRef::get_dir(sibling_dir, cursor); rotate_subtree(sibling_ref, !sibling_dir); - path.fixup(); rotate_subtree(path.parent(), sibling_dir); - path.fixup(); - // Sibling is demoted by one. Cursor and sibling's 1-child's rank changes are two, - // so no further toggle is needed. + // Sibling is demoted by one. Cursor and sibling's 1-child's rank + // changes are two, so no further toggle is needed. toggle_rank_parity(sibling); invariant(get_root());