Loading...
Searching...
No Matches
object_pool.hpp
1#pragma once
2
3#include <array>
4#include <cstddef>
5#include <cstdint>
6#include <memory_resource>
7#include <memory>
8#include <atomic>
9#include <thread>
10#include <utility>
11#include "os.hpp"
12
17
18namespace tf {
19
20// ----------------------------------------------------------------------------
21// TaggedHead128
22// ----------------------------------------------------------------------------
23
46 using pointer_type = uintptr_t;
47 using tag_type = uintptr_t;
48
51
53 TaggedHead128() = default;
54
60 TaggedHead128(pointer_type p, tag_type t) noexcept : ptr{p}, tag{t} {}
61
63 pointer_type get_ptr() const noexcept { return ptr; }
64
66 tag_type get_tag() const noexcept { return tag; }
67};
68
69// ----------------------------------------------------------------------------
70// TaggedHead64
71// ----------------------------------------------------------------------------
72
115template <int PtrBits = TF_POINTER_BITS>
117 static_assert(64 - PtrBits >= 16,
118 "TaggedHead64 requires at least 16 tag bits for ABA safety "
119 "(PtrBits must be <= 48); use TaggedHead128 instead");
120
121 using pointer_type = uintptr_t;
122 using tag_type = uint16_t;
123
124 static constexpr int PTR_BITS = PtrBits;
125 static constexpr int TAG_BITS = 64 - PtrBits;
126 static constexpr pointer_type PTR_MASK =
127 (pointer_type{1} << PTR_BITS) - 1;
128
129 uintptr_t bits {0};
130
132 TaggedHead64() = default;
133
140 : bits{ (p & PTR_MASK) | (static_cast<uintptr_t>(t) << PTR_BITS) } {}
141
143 pointer_type get_ptr() const noexcept { return bits & PTR_MASK; }
144
146 tag_type get_tag() const noexcept { return static_cast<tag_type>(bits >> PTR_BITS); }
147};
148
149// ----------------------------------------------------------------------------
150// ObjectBlock
151// ----------------------------------------------------------------------------
152
173template <typename T>
174struct ObjectBlock {
175
176 uint16_t pool_id;
177
178 // Intrusive free-list link. Must be atomic to avoid a formal data race
179 // between push_free writing next_free and a concurrent pop_free reading it.
180 //
181 // The race arises from a *stale pointer* held by a thread that loses a CAS.
182 // Consider this interleaving (free list: [b -> null]):
183 //
184 // Thread A (pop_free): loads _free_head = {b, 5} <-- cur.ptr = b
185 // Thread B (pop_free): also loads {b, 5}, wins CAS first,
186 // pops b, returns it to the caller
187 // Thread C (push_free): caller recycles b;
188 // C writes b->next_free <-- non-atomic WRITE
189 // Thread A (pop_free): reads cur.ptr->next_free <-- non-atomic READ
190 // (cur.ptr is the stale b!)
191 //
192 // Thread A's CAS will ultimately fail (the version tag changed), so there
193 // is no algorithmic corruption — but the concurrent non-atomic read + write
194 // on the same memory location is a formal data race and undefined behavior
195 // per the C++ memory model. Making next_free atomic eliminates the race by
196 // definition: two concurrent atomic accesses are never a data race.
197 std::atomic<ObjectBlock*> next_free {nullptr};
198
199 alignas(T) std::byte storage[sizeof(T)];
200
202 T* object() noexcept {
203 return std::launder(reinterpret_cast<T*>(storage));
204 }
205
207 const T* object() const noexcept {
208 return std::launder(reinterpret_cast<const T*>(storage));
209 }
210
218 static ObjectBlock* from_object(T* obj) noexcept {
219 return reinterpret_cast<ObjectBlock*>(
220 reinterpret_cast<char*>(obj) - offsetof(ObjectBlock, storage)
221 );
222 }
223};
224
225// ----------------------------------------------------------------------------
226// ObjectPool
227// ----------------------------------------------------------------------------
228
319template <typename T, typename H = TaggedHead128, size_t LogSize = 5>
321
322 static_assert(LogSize >= 1 && LogSize <= 15,
323 "LogSize must be in [1, 15]");
324
325 using Block = ObjectBlock<T>;
326
327 static constexpr size_t NumPools = 1u << LogSize;
328
329 struct alignas(TF_CACHELINE_SIZE) Shard {
330
331 // Hot path: lock-free Treiber stack of recycled blocks.
332 // _free_head sits on its own cache line (via the Shard alignas) so that
333 // hot-path CAS does not invalidate the line holding _backing's mutex.
334 std::atomic<H> _free_head {H{}};
335
336 // Cold path: backing allocator for fresh block memory.
337 // alignas pushes _backing to the next cache line, separating it from the
338 // hot _free_head above and preventing hot/cold false sharing.
339 alignas(TF_CACHELINE_SIZE) std::pmr::synchronized_pool_resource _backing {
340 std::pmr::pool_options {
341 .max_blocks_per_chunk = 1024,
342 .largest_required_pool_block = sizeof(Block)
343 }
344 };
345
346 void push_free(Block* b) noexcept {
347 H cur = _free_head.load(std::memory_order_relaxed);
348 H next;
349 do {
350 // relaxed: the release CAS below synchronises-with pop_free's acquire,
351 // making this store visible to any thread that subsequently observes b
352 // at the head of the list.
353 b->next_free.store(
354 reinterpret_cast<Block*>(cur.get_ptr()), std::memory_order_relaxed);
355 next = H(
356 reinterpret_cast<typename H::pointer_type>(b),
357 static_cast<typename H::tag_type>(cur.get_tag() + 1)
358 );
359 } while (!_free_head.compare_exchange_weak(
360 cur, next,
361 std::memory_order_release, // publish next_free write to pop_free
362 std::memory_order_relaxed
363 ));
364 }
365
366 Block* pop_free() noexcept {
367 H cur = _free_head.load(std::memory_order_acquire);
368 while (cur.get_ptr()) {
369 auto* p = reinterpret_cast<Block*>(cur.get_ptr());
370 // relaxed on next_free: the acquire on _free_head (either the load
371 // above or the acquire failure ordering below) synchronises-with the
372 // release CAS in push_free, so the next_free store that preceded it
373 // is already visible to this thread.
374 Block* nx = p->next_free.load(std::memory_order_relaxed);
375 H next(
376 reinterpret_cast<typename H::pointer_type>(nx),
377 static_cast<typename H::tag_type>(cur.get_tag() + 1)
378 );
379 if (_free_head.compare_exchange_weak(
380 cur, next,
381 std::memory_order_acquire, // success: synchronise with push_free
382 std::memory_order_acquire // failure: fresh cur must also synchronise
383 )) { // before the next next_free read
384 return p;
385 }
386 }
387 return nullptr;
388 }
389
390 Block* allocate_from_backing() {
391 return static_cast<Block*>(
392 _backing.allocate(sizeof(Block), alignof(Block))
393 );
394 }
395
396 void deallocate_to_backing(Block* b) {
397 _backing.deallocate(b, sizeof(Block), alignof(Block));
398 }
399 };
400
401 std::array<Shard, NumPools> _shards;
402
403 // Returns the next shard index for this thread. The thread_local counter
404 // is seeded once from the calling thread's ID hash, spreading different
405 // threads across different starting shards with zero shared state.
406 // Subsequent calls are a bare local increment — no atomic, no cache-line
407 // traffic.
408 //
409 // If thread_local is broken (e.g. MSVC DLL with improper TLS), the counter
410 // degrades to a single shared value and causes contention, but shard
411 // selection remains correct.
412 static size_t _next_shard() noexcept {
413 thread_local size_t counter =
414 std::hash<std::thread::id>{}(std::this_thread::get_id());
415 return counter++ & (NumPools - 1);
416 }
417
418 public:
419
427 ObjectPool() = default;
428
436 ObjectPool(const ObjectPool&) = delete;
437
441 ObjectPool& operator=(const ObjectPool&) = delete;
442
452 ~ObjectPool() = default;
453
490 template <typename... Args>
491 [[nodiscard]] T* animate(Args&&... args) {
492 auto sid = _next_shard();
493 auto& shard = _shards[sid];
494
495 Block* block = shard.pop_free(); // hot path: lock-free
496 if (!block) block = shard.allocate_from_backing(); // cold path: mutex, amortized
497
498 block->pool_id = static_cast<uint16_t>(sid);
499 return std::construct_at(block->object(), std::forward<Args>(args)...);
500 }
501
533 void recycle(T* obj) {
534 if (!obj) return;
535 auto* block = Block::from_object(obj);
536 std::destroy_at(block->object());
537 _shards[block->pool_id].push_free(block); // hot path: lock-free
538 }
539
582 void release() {
583 for (auto& shard : _shards) {
584 // Release all backing chunks to upstream first — this covers both blocks
585 // on the free stack and any that were never recycled, since the backing
586 // pool owns memory at the chunk level, not per block.
587 shard._backing.release();
588 // Reset the free stack to null in O(1). Pointers it held are now
589 // dangling (their backing chunks were just freed), so they must be
590 // cleared before the allocator is used again.
591 shard._free_head.store(H{}, std::memory_order_relaxed);
592 }
593 }
594};
595
596} // namespace tf
ObjectPool(const ObjectPool &)=delete
disabled copy constructor
ObjectPool()=default
constructs the allocator with 2^LogSize empty shards
T * animate(Args &&... args)
constructs an object of type T in the pool and returns a pointer
Definition object_pool.hpp:491
~ObjectPool()=default
destroys the allocator and releases all backing memory to upstream
void recycle(T *obj)
destructs the object and returns its storage to the pool
Definition object_pool.hpp:533
ObjectPool & operator=(const ObjectPool &)=delete
disabled copy assignment operator
void release()
returns all recycled blocks and backing memory to the system allocator
Definition object_pool.hpp:582
taskflow namespace
Definition small_vector.hpp:20
pointer_type get_ptr() const noexcept
returns the stored block address
Definition object_pool.hpp:63
TaggedHead128()=default
constructs a null, zero-tagged head
pointer_type ptr
block address (reinterpret-cast to/from ObjectBlock*)
Definition object_pool.hpp:49
tag_type tag
ABA version counter; incremented on every push and pop.
Definition object_pool.hpp:50
TaggedHead128(pointer_type p, tag_type t) noexcept
constructs a head with an explicit block address and version counter
Definition object_pool.hpp:60
uintptr_t pointer_type
block address representation
Definition object_pool.hpp:46
uintptr_t tag_type
ABA version counter representation.
Definition object_pool.hpp:47
tag_type get_tag() const noexcept
returns the ABA version counter
Definition object_pool.hpp:66
uint16_t tag_type
ABA version counter.
Definition object_pool.hpp:122
uintptr_t pointer_type
block address representation
Definition object_pool.hpp:121
TaggedHead64()=default
constructs a null, zero-tagged head
tag_type get_tag() const noexcept
returns the 16-bit ABA version counter
Definition object_pool.hpp:146
pointer_type get_ptr() const noexcept
returns the block address
Definition object_pool.hpp:143
static constexpr int PTR_BITS
bits reserved for the block address
Definition object_pool.hpp:124
TaggedHead64(pointer_type p, tag_type t) noexcept
constructs a head with an explicit block address and version counter
Definition object_pool.hpp:139
uintptr_t bits
packed word: high TAG_BITS bits = version tag, low PTR_BITS bits = address
Definition object_pool.hpp:129
static constexpr int TAG_BITS
bits reserved for the version counter
Definition object_pool.hpp:125
static constexpr pointer_type PTR_MASK
mask isolating the address field
Definition object_pool.hpp:126