taskflow/wsq_8hpp_source.html

#pragma once


#include <bit>


#include "../utility/macros.hpp"

#include "../utility/traits.hpp"


#ifndef TF_DEFAULT_BOUNDED_TASK_QUEUE_LOG_SIZE

  #define TF_DEFAULT_BOUNDED_TASK_QUEUE_LOG_SIZE 8

#endif


#ifndef TF_DEFAULT_UNBOUNDED_TASK_QUEUE_LOG_SIZE

  #define TF_DEFAULT_UNBOUNDED_TASK_QUEUE_LOG_SIZE 10

#endif


namespace tf {


// ----------------------------------------------------------------------------

// Work-stealing queue steal protocol sentinels

//

// These free functions define the sentinel value used by steal operations

// across all work-stealing queue types (BoundedWSQ, UnboundedWSQ). They encode

// the result of a steal attempt into the return value itself, avoiding any

// out-parameter or separate status type.

//

// For pointer types T:

//   wsq_empty_value<T>() = nullptr  — queue was empty (or CAS was lost)

//

// The sentinel nullptr is returned whenever steal() cannot deliver a task,

// whether due to the queue being genuinely empty or losing a concurrent CAS

// race to another thief.

//

// For non-pointer types T, std::nullopt is returned to indicate the absence

// of a value.

//

// Queue classes expose this as a static member function (empty_value) that

// delegates here, so callers can use either form.

// ----------------------------------------------------------------------------


template <typename T>


constexpr auto wsq_empty_value() {

  if constexpr (std::is_pointer_v<T>) {

    return T{nullptr};

  } else {

    return std::optional<T>{std::nullopt};

  }

}


// ----------------------------------------------------------------------------

// Unbounded Work-stealing Queue (WSQ)

// ----------------------------------------------------------------------------


template <typename T>


class UnboundedWSQ {


  struct Array {


    size_t C;

    size_t M;

    std::atomic<T>* S;


    explicit Array(size_t c) :

      C {c},

      M {c-1},

      S {new std::atomic<T>[C]} {

    }


    ~Array() {

      delete [] S;

    }


    size_t capacity() const noexcept {

      return C;

    }


    void push(int64_t i, T o) noexcept {

      S[i & M].store(o, std::memory_order_relaxed);

    }


    T pop(int64_t i) noexcept {

      return S[i & M].load(std::memory_order_relaxed);

    }


    Array* resize(int64_t b, int64_t t) {

      Array* ptr = new Array(2*C);

      for(int64_t i=t; i!=b; ++i) {

        ptr->push(i, pop(i));

      }

      return ptr;

    }


    Array* resize(int64_t b, int64_t t, size_t N) {

      // assert(N>0);

      Array* ptr = new Array(std::bit_ceil(C + N));

      for(int64_t i=t; i!=b; ++i) {

        ptr->push(i, pop(i));

      }

      return ptr;

    }


  };


  alignas(TF_CACHELINE_SIZE) std::atomic<int64_t> _top;

  alignas(TF_CACHELINE_SIZE) std::atomic<int64_t> _bottom;


  // Owner-private cached upper bound on _top.  Never read by thieves.

  // Because _top is never decremented, the real occupancy can only be

  // smaller than what is computed using this cached value, so using it

  // for the overflow check is always safe.

  int64_t _cached_top {0};


  // _array on its own cache line: avoids false-sharing with _bottom when

  // thieves load _array (consume) after reading _bottom (acquire).

  alignas(TF_CACHELINE_SIZE) std::atomic<Array*> _array;

  std::vector<Array*> _garbage;


  public:


  using value_type = std::conditional_t<std::is_pointer_v<T>, T, std::optional<T>>;


  explicit UnboundedWSQ(int64_t LogSize = TF_DEFAULT_UNBOUNDED_TASK_QUEUE_LOG_SIZE);


  ~UnboundedWSQ();


  bool empty() const noexcept;


  size_t size() const noexcept;


  size_t capacity() const noexcept;


  void push(T item);


  template <typename I>


  void bulk_push(I& first, size_t N);


  value_type pop();


  value_type steal();


  static constexpr auto empty_value()     { return wsq_empty_value<T>();     }


  private:


  Array* _resize_array(Array* a, int64_t b, int64_t t);

  Array* _resize_array(Array* a, int64_t b, int64_t t, size_t N);

};


// Constructor

template <typename T>


UnboundedWSQ<T>::UnboundedWSQ(int64_t LogSize) {

  _top.store(0, std::memory_order_relaxed);

  _bottom.store(0, std::memory_order_relaxed);

  _array.store(new Array{(size_t{1} << LogSize)}, std::memory_order_relaxed);

  _garbage.reserve(32);

}


// Destructor

template <typename T>


UnboundedWSQ<T>::~UnboundedWSQ() {

  for(auto a : _garbage) {

    delete a;

  }

  delete _array.load();

}


// Function: empty

template <typename T>


bool UnboundedWSQ<T>::empty() const noexcept {

  int64_t t = _top.load(std::memory_order_relaxed);

  int64_t b = _bottom.load(std::memory_order_relaxed);

  return (b <= t);

}


// Function: size

template <typename T>


size_t UnboundedWSQ<T>::size() const noexcept {

  int64_t t = _top.load(std::memory_order_relaxed);

  int64_t b = _bottom.load(std::memory_order_relaxed);

  return static_cast<size_t>(b >= t ? b - t : 0);

}


// Function: push

template <typename T>


void UnboundedWSQ<T>::push(T o) {


  int64_t b = _bottom.load(std::memory_order_relaxed);

  Array* a = _array.load(std::memory_order_relaxed);


  // queue is full with one additional item (b-t+1)

  if(a->capacity() < static_cast<size_t>(b - _cached_top + 1)) [[unlikely]] {

    _cached_top = _top.load(std::memory_order_acquire);

    if(a->capacity() < static_cast<size_t>(b - _cached_top + 1)) [[unlikely]] {

      a = _resize_array(a, b, _cached_top);

    }

  }


  a->push(b, o);

  std::atomic_thread_fence(std::memory_order_release);


  // original paper uses relaxed here but tsa complains

  _bottom.store(b + 1, std::memory_order_release);

}


// Function: bulk_push

template <typename T>

template <typename I>


void UnboundedWSQ<T>::bulk_push(I& first, size_t N) {


  if(N == 0) return;


  int64_t b = _bottom.load(std::memory_order_relaxed);

  Array* a = _array.load(std::memory_order_relaxed);


  // queue is full with N additional items

  if((b - _cached_top + N) > a->capacity()) [[unlikely]] {

    _cached_top = _top.load(std::memory_order_acquire);

    if((b - _cached_top + N) > a->capacity()) [[unlikely]] {

      a = _resize_array(a, b, _cached_top, N);

    }

  }


  for(size_t i=0; i<N; ++i) {

    a->push(b++, *first++);

  }

  std::atomic_thread_fence(std::memory_order_release);


  // original paper uses relaxed here but tsa complains

  _bottom.store(b, std::memory_order_release);

}


// Function: pop

template <typename T>

typename UnboundedWSQ<T>::value_type


UnboundedWSQ<T>::pop() {


  int64_t b = _bottom.load(std::memory_order_relaxed) - 1;

  Array* a = _array.load(std::memory_order_relaxed);

  _bottom.store(b, std::memory_order_relaxed);

  std::atomic_thread_fence(std::memory_order_seq_cst);

  int64_t t = _top.load(std::memory_order_relaxed);


  //T item {nullptr};

  auto item = empty_value();


  if(t <= b) {

    item = a->pop(b);

    if(t == b) {

      // the last item just got stolen

      if(!_top.compare_exchange_strong(t, t+1, std::memory_order_seq_cst,

                                               std::memory_order_relaxed)) {

        //item = nullptr;

        item = empty_value();

      }

      _bottom.store(b + 1, std::memory_order_relaxed);

    }

  }

  else {

    _bottom.store(b + 1, std::memory_order_relaxed);

  }


  return item;

}


// Function: steal

template <typename T>

typename UnboundedWSQ<T>::value_type


UnboundedWSQ<T>::steal() {


  int64_t t = _top.load(std::memory_order_acquire);

  std::atomic_thread_fence(std::memory_order_seq_cst);

  int64_t b = _bottom.load(std::memory_order_acquire);


  //T item {nullptr};

  auto item = empty_value();


  if(t < b) {

    Array* a = _array.load(std::memory_order_consume);

    item = a->pop(t);

    if(!_top.compare_exchange_strong(t, t+1,

                                     std::memory_order_seq_cst,

                                     std::memory_order_relaxed)) {

      //return nullptr;

      return empty_value();

    }

  }


  return item;

}


// Function: capacity

template <typename T>


size_t UnboundedWSQ<T>::capacity() const noexcept {

  return _array.load(std::memory_order_relaxed)->capacity();

}


template <typename T>

typename UnboundedWSQ<T>::Array*

UnboundedWSQ<T>::_resize_array(Array* a, int64_t b, int64_t t) {

  Array* tmp = a->resize(b, t);

  _garbage.push_back(a);

  // Note: the original paper using relaxed causes t-san to complain

  _array.store(tmp, std::memory_order_release);

  return tmp;

}


template <typename T>

typename UnboundedWSQ<T>::Array*

UnboundedWSQ<T>::_resize_array(Array* a, int64_t b, int64_t t, size_t N) {

  Array* tmp = a->resize(b, t, N);

  _garbage.push_back(a);

  // Note: the original paper using relaxed causes t-san to complain

  _array.store(tmp, std::memory_order_release);

  return tmp;

}


// ----------------------------------------------------------------------------

// Bounded Work-stealing Queue (WSQ)

// ----------------------------------------------------------------------------


template <typename T, size_t LogSize = TF_DEFAULT_BOUNDED_TASK_QUEUE_LOG_SIZE>


class BoundedWSQ {


  constexpr static size_t BufferSize = size_t{1} << LogSize;

  constexpr static size_t BufferMask = (BufferSize - 1);


  static_assert((BufferSize >= 2) && ((BufferSize & (BufferSize - 1)) == 0));


  alignas(TF_CACHELINE_SIZE) std::atomic<int64_t> _top {0};

  alignas(TF_CACHELINE_SIZE) std::atomic<int64_t> _bottom {0};

  alignas(TF_CACHELINE_SIZE) std::atomic<T> _buffer[BufferSize];


  public:


  using value_type = std::conditional_t<std::is_pointer_v<T>, T, std::optional<T>>;


  BoundedWSQ() = default;


  ~BoundedWSQ() = default;


  bool empty() const noexcept;


  size_t size() const noexcept;


  constexpr size_t capacity() const;


  template <typename O>

  bool try_push(O&& item);


  template <typename I>

  size_t try_bulk_push(I& first, size_t N);


  value_type pop();


  value_type steal();


  static constexpr auto empty_value()     { return wsq_empty_value<T>();     }

};


// Function: empty

template <typename T, size_t LogSize>


bool BoundedWSQ<T, LogSize>::empty() const noexcept {

  int64_t t = _top.load(std::memory_order_relaxed);

  int64_t b = _bottom.load(std::memory_order_relaxed);

  return b <= t;

}


// Function: size

template <typename T, size_t LogSize>


size_t BoundedWSQ<T, LogSize>::size() const noexcept {

  int64_t t = _top.load(std::memory_order_relaxed);

  int64_t b = _bottom.load(std::memory_order_relaxed);

  return static_cast<size_t>(b >= t ? b - t : 0);

}


// Function: try_push

template <typename T, size_t LogSize>

template <typename O>


bool BoundedWSQ<T, LogSize>::try_push(O&& o) {


  int64_t b = _bottom.load(std::memory_order_relaxed);

  int64_t t = _top.load(std::memory_order_acquire);


  // queue is full with one additional item (b-t+1)

  if(static_cast<size_t>(b - t + 1) > BufferSize) [[unlikely]] {

    return false;

  }


  _buffer[b & BufferMask].store(std::forward<O>(o), std::memory_order_relaxed);


  std::atomic_thread_fence(std::memory_order_release);


  // original paper uses relaxed here but tsa complains

  _bottom.store(b + 1, std::memory_order_release);


  return true;

}


// Function: try_bulk_push

template <typename T, size_t LogSize>

template <typename I>


size_t BoundedWSQ<T, LogSize>::try_bulk_push(I& first, size_t N) {


  if(N == 0) return 0;


  int64_t b = _bottom.load(std::memory_order_relaxed);

  int64_t t = _top.load(std::memory_order_acquire);


  size_t r = BufferSize - (b - t);  // remaining capacity

  size_t n = std::min(N, r);        // number of pushable elements


  if(n > 0) {

    // push n elements into the queue

    for(size_t i=0; i<n; ++i) {

      _buffer[b++ & BufferMask].store(*first++, std::memory_order_relaxed);

    }

    std::atomic_thread_fence(std::memory_order_release);

    // original paper uses relaxed here but tsa complains

    _bottom.store(b, std::memory_order_release);

  }


  return n;

}


// Function: pop

template <typename T, size_t LogSize>

typename BoundedWSQ<T, LogSize>::value_type


BoundedWSQ<T, LogSize>::pop() {


  int64_t b = _bottom.load(std::memory_order_relaxed) - 1;

  _bottom.store(b, std::memory_order_relaxed);

  std::atomic_thread_fence(std::memory_order_seq_cst);

  int64_t t = _top.load(std::memory_order_relaxed);


  //T item {nullptr};

  auto item = empty_value();


  if(t <= b) {

    item = _buffer[b & BufferMask].load(std::memory_order_relaxed);

    if(t == b) {

      // the last item just got stolen

      if(!_top.compare_exchange_strong(t, t+1,

                                       std::memory_order_seq_cst,

                                       std::memory_order_relaxed)) {

        //item = nullptr;

        item = empty_value();

      }

      _bottom.store(b + 1, std::memory_order_relaxed);

    }

  }

  else {

    _bottom.store(b + 1, std::memory_order_relaxed);

  }


  return item;

}


// Function: steal

template <typename T, size_t LogSize>

typename BoundedWSQ<T, LogSize>::value_type


BoundedWSQ<T, LogSize>::steal() {

  int64_t t = _top.load(std::memory_order_acquire);

  std::atomic_thread_fence(std::memory_order_seq_cst);

  int64_t b = _bottom.load(std::memory_order_acquire);


  //T item{nullptr};

  auto item = empty_value();


  if(t < b) {

    item = _buffer[t & BufferMask].load(std::memory_order_relaxed);

    if(!_top.compare_exchange_strong(t, t+1,

                                     std::memory_order_seq_cst,

                                     std::memory_order_relaxed)) {

      //return nullptr;

      return empty_value();

    }

  }


  return item;

}


// Function: capacity

template <typename T, size_t LogSize>


constexpr size_t BoundedWSQ<T, LogSize>::capacity() const {

  return BufferSize;

}


// ----------------------------------------------------------------------------

// Work-stealing Queue Concept

// ----------------------------------------------------------------------------


template <typename Q>


concept BoundedWSQLike = requires(Q& q, typename Q::value_type v) {

  { q.steal() };

  { q.pop() };

  { q.try_push(v) } -> std::same_as<bool>;

};


// ----------------------------------------------------------------------------

// Bounded Work-stealing Queue with Priority

// ----------------------------------------------------------------------------


struct DefaultPriorityFn {

  template <typename T>

  size_t operator()(const T& item) const {

    if constexpr (std::is_pointer_v<T>) {

      return static_cast<size_t>(item->priority());

    } else {

      return static_cast<size_t>(item.priority());

    }

  }

};


template <BoundedWSQLike Q, size_t MaxPriority, typename PriorityFn = DefaultPriorityFn>


class BoundedPriorityWSQ {


  static_assert(MaxPriority >= 1);


  public:


  using value_type = typename Q::value_type;


  explicit BoundedPriorityWSQ(PriorityFn fn = {}) : _priority_fn(std::move(fn)) {}


  template <typename O>


  bool try_push(O&& item) {

    return _wsqs[_priority_fn(item)].try_push(std::forward<O>(item));

  }


  template <typename I>


  size_t try_bulk_push(I& first, size_t N) {


    if(N == 0) return 0;


    size_t remaining = N;


    while(remaining > 0) {

      size_t q = _priority_fn(first[0]);

      size_t run_len = 1;

      while(run_len < remaining && _priority_fn(first[run_len]) == q) {

        ++run_len;

      }

      size_t inserted = _wsqs[q].try_bulk_push(first, run_len);

      remaining -= inserted;

      if(inserted < run_len) {

        break;

      }

    }


    return N - remaining;

  }


  value_type pop() {

    value_type result = Q::empty_value();

    for(size_t p=0; p<MaxPriority && !(result = _wsqs[p].pop()); ++p);

    return result;

  }


  value_type steal() {

    value_type result = Q::empty_value();

    for(size_t p=0; p<MaxPriority && !(result = _wsqs[p].steal()); ++p);

    return result;

  }


  bool empty() const noexcept {

    bool empty = true;

    for(size_t i = 0; i < MaxPriority && (empty = _wsqs[i].empty()); ++i);

    return empty;

  }


  size_t size() const noexcept {

    size_t n = 0;

    for(size_t i = 0; i < MaxPriority; ++i) {

      n += _wsqs[i].size();

    }

    return n;

  }


  constexpr size_t capacity() const {

    return _wsqs[0].capacity() * MaxPriority;

  }


  Q& operator[](size_t priority) { return _wsqs[priority]; }


  const Q& operator[](size_t priority) const { return _wsqs[priority]; }


  private:


  [[no_unique_address]] PriorityFn _priority_fn;

  std::array<Q, MaxPriority> _wsqs;

};


}  // end of namespace tf -----------------------------------------------------


tf::BoundedPriorityWSQ::size
size_t size() const noexcept
queries the total number of items across all sub-queues at the time of this call
Definition wsq.hpp:1167

tf::BoundedPriorityWSQ::operator[]
Q & operator[](size_t priority)
returns a reference to the sub-queue at the given priority level
Definition wsq.hpp:1202

tf::BoundedPriorityWSQ::operator[]
const Q & operator[](size_t priority) const
returns a const reference to the sub-queue at the given priority level
Definition wsq.hpp:1214

tf::BoundedPriorityWSQ::BoundedPriorityWSQ
BoundedPriorityWSQ(PriorityFn fn={})
constructs the queue with an optional priority function
Definition wsq.hpp:1003

tf::BoundedPriorityWSQ::empty
bool empty() const noexcept
queries whether all sub-queues are empty at the time of this call
Definition wsq.hpp:1148

tf::BoundedPriorityWSQ::steal
value_type steal()
steals an item from the highest-priority non-empty sub-queue
Definition wsq.hpp:1129

tf::BoundedPriorityWSQ::try_push
bool try_push(O &&item)
tries to insert an item into the sub-queue determined by its priority
Definition wsq.hpp:1023

tf::BoundedPriorityWSQ::pop
value_type pop()
pops an item from the highest-priority non-empty sub-queue
Definition wsq.hpp:1103

tf::BoundedPriorityWSQ::capacity
constexpr size_t capacity() const
queries the total capacity across all sub-queues
Definition wsq.hpp:1186

tf::BoundedPriorityWSQ::try_bulk_push
size_t try_bulk_push(I &first, size_t N)
tries to insert a batch of items, routing contiguous same-priority runs to the corresponding sub-queu...
Definition wsq.hpp:1061

tf::BoundedPriorityWSQ::value_type
typename Q::value_type value_type
the return type of pop and steal operations
Definition wsq.hpp:996

tf::BoundedWSQ< Node * >::pop
value_type pop()

tf::BoundedWSQ< Node * >::try_bulk_push
size_t try_bulk_push(I &first, size_t N)

tf::BoundedWSQ::~BoundedWSQ
~BoundedWSQ()=default
destructs the queue

tf::BoundedWSQ< Node * >::steal
value_type steal()

tf::BoundedWSQ< Node * >::capacity
constexpr size_t capacity() const

tf::BoundedWSQ::BoundedWSQ
BoundedWSQ()=default
constructs the queue with a given capacity

tf::BoundedWSQ< Node * >::try_push
bool try_push(O &&item)

tf::BoundedWSQ::value_type
std::conditional_t< std::is_pointer_v< T >, T, std::optional< T > > value_type
the return type of queue operations
Definition wsq.hpp:588

tf::BoundedWSQ< Node * >::size
size_t size() const noexcept

tf::BoundedWSQ< Node * >::empty_value
static constexpr auto empty_value()
Definition wsq.hpp:751

tf::BoundedWSQ::empty
bool empty() const noexcept
queries if the queue is empty at the time of this call
Definition wsq.hpp:756

tf::UnboundedWSQ::empty
bool empty() const noexcept
queries if the queue is empty at the time of this call
Definition wsq.hpp:380

tf::UnboundedWSQ::pop
value_type pop()
pops out an item from the queue
Definition wsq.hpp:446

tf::UnboundedWSQ< Node * >::steal
value_type steal()
Definition wsq.hpp:479

tf::UnboundedWSQ::UnboundedWSQ
UnboundedWSQ(int64_t LogSize=TF_DEFAULT_UNBOUNDED_TASK_QUEUE_LOG_SIZE)
constructs the queue with the given size in the base-2 logarithm
Definition wsq.hpp:362

tf::UnboundedWSQ::push
void push(T item)
inserts an item to the queue
Definition wsq.hpp:396

tf::UnboundedWSQ< Node * >::size
size_t size() const noexcept
Definition wsq.hpp:388

tf::UnboundedWSQ< Node * >::bulk_push
void bulk_push(I &first, size_t N)
Definition wsq.hpp:419

tf::UnboundedWSQ::value_type
std::conditional_t< std::is_pointer_v< T >, T, std::optional< T > > value_type
the return type of queue operations
Definition wsq.hpp:186

tf::UnboundedWSQ::capacity
size_t capacity() const noexcept
queries the capacity of the queue
Definition wsq.hpp:505

tf::UnboundedWSQ::empty_value
static constexpr auto empty_value()
returns the empty sentinel value for the queue element type
Definition wsq.hpp:352

tf::UnboundedWSQ::~UnboundedWSQ
~UnboundedWSQ()
destructs the queue
Definition wsq.hpp:371

tf::BoundedWSQLike
concept to check if a type supports bounded work-stealing queue operations
Definition wsq.hpp:918

tf
taskflow namespace
Definition small_vector.hpp:20

tf::wsq_empty_value
constexpr auto wsq_empty_value()
returns the empty sentinel for work-stealing steal operations
Definition wsq.hpp:67