1// <barrier> -*- C++ -*- 2 3// Copyright (C) 2020-2021 Free Software Foundation, Inc. 4// 5// This file is part of the GNU ISO C++ Library. This library is free 6// software; you can redistribute it and/or modify it under the 7// terms of the GNU General Public License as published by the 8// Free Software Foundation; either version 3, or (at your option) 9// any later version. 10 11// This library is distributed in the hope that it will be useful, 12// but WITHOUT ANY WARRANTY; without even the implied warranty of 13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14// GNU General Public License for more details. 15 16// You should have received a copy of the GNU General Public License along 17// with this library; see the file COPYING3. If not see 18// <http://www.gnu.org/licenses/>. 19 20// This implementation is based on libcxx/include/barrier 21//===-- barrier.h --------------------------------------------------===// 22// 23// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 24// See https://llvm.org/LICENSE.txt for license information. 25// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 26// 27//===---------------------------------------------------------------===// 28 29/** @file include/barrier 30 * This is a Standard C++ Library header. 31 */ 32 33#ifndef _GLIBCXX_BARRIER 34#define _GLIBCXX_BARRIER 1 35 36#pragma GCC system_header 37 38#if __cplusplus > 201703L 39#include <bits/atomic_base.h> 40#if __cpp_lib_atomic_wait && __cpp_aligned_new 41#include <bits/std_thread.h> 42#include <bits/unique_ptr.h> 43 44#include <array> 45 46#define __cpp_lib_barrier 201907L 47 48namespace std _GLIBCXX_VISIBILITY(default) 49{ 50_GLIBCXX_BEGIN_NAMESPACE_VERSION 51 52 struct __empty_completion 53 { 54 _GLIBCXX_ALWAYS_INLINE void 55 operator()() noexcept 56 { } 57 }; 58 59/* 60 61The default implementation of __tree_barrier is a classic tree barrier. 62 63It looks different from literature pseudocode for two main reasons: 64 1. Threads that call into std::barrier functions do not provide indices, 65 so a numbering step is added before the actual barrier algorithm, 66 appearing as an N+1 round to the N rounds of the tree barrier. 67 2. A great deal of attention has been paid to avoid cache line thrashing 68 by flattening the tree structure into cache-line sized arrays, that 69 are indexed in an efficient way. 70 71*/ 72 73 enum class __barrier_phase_t : unsigned char { }; 74 75 template<typename _CompletionF> 76 class __tree_barrier 77 { 78 using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>; 79 using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>; 80 static constexpr auto __phase_alignment = 81 __atomic_phase_ref_t::required_alignment; 82 83 using __tickets_t = std::array<__barrier_phase_t, 64>; 84 struct alignas(64) /* naturally-align the heap state */ __state_t 85 { 86 alignas(__phase_alignment) __tickets_t __tickets; 87 }; 88 89 ptrdiff_t _M_expected; 90 unique_ptr<__state_t[]> _M_state; 91 __atomic_base<ptrdiff_t> _M_expected_adjustment; 92 _CompletionF _M_completion; 93 94 alignas(__phase_alignment) __barrier_phase_t _M_phase; 95 96 bool 97 _M_arrive(__barrier_phase_t __old_phase, size_t __current) 98 { 99 const auto __old_phase_val = static_cast<unsigned char>(__old_phase); 100 const auto __half_step = 101 static_cast<__barrier_phase_t>(__old_phase_val + 1); 102 const auto __full_step = 103 static_cast<__barrier_phase_t>(__old_phase_val + 2); 104 105 size_t __current_expected = _M_expected; 106 __current %= ((_M_expected + 1) >> 1); 107 108 for (int __round = 0; ; ++__round) 109 { 110 if (__current_expected <= 1) 111 return true; 112 size_t const __end_node = ((__current_expected + 1) >> 1), 113 __last_node = __end_node - 1; 114 for ( ; ; ++__current) 115 { 116 if (__current == __end_node) 117 __current = 0; 118 auto __expect = __old_phase; 119 __atomic_phase_ref_t __phase(_M_state[__current] 120 .__tickets[__round]); 121 if (__current == __last_node && (__current_expected & 1)) 122 { 123 if (__phase.compare_exchange_strong(__expect, __full_step, 124 memory_order_acq_rel)) 125 break; // I'm 1 in 1, go to next __round 126 } 127 else if (__phase.compare_exchange_strong(__expect, __half_step, 128 memory_order_acq_rel)) 129 { 130 return false; // I'm 1 in 2, done with arrival 131 } 132 else if (__expect == __half_step) 133 { 134 if (__phase.compare_exchange_strong(__expect, __full_step, 135 memory_order_acq_rel)) 136 break; // I'm 2 in 2, go to next __round 137 } 138 } 139 __current_expected = __last_node + 1; 140 __current >>= 1; 141 } 142 } 143 144 public: 145 using arrival_token = __barrier_phase_t; 146 147 static constexpr ptrdiff_t 148 max() noexcept 149 { return __PTRDIFF_MAX__; } 150 151 __tree_barrier(ptrdiff_t __expected, _CompletionF __completion) 152 : _M_expected(__expected), _M_expected_adjustment(0), 153 _M_completion(move(__completion)), 154 _M_phase(static_cast<__barrier_phase_t>(0)) 155 { 156 size_t const __count = (_M_expected + 1) >> 1; 157 158 _M_state = std::make_unique<__state_t[]>(__count); 159 } 160 161 [[nodiscard]] arrival_token 162 arrive(ptrdiff_t __update) 163 { 164 std::hash<std::thread::id> __hasher; 165 size_t __current = __hasher(std::this_thread::get_id()); 166 __atomic_phase_ref_t __phase(_M_phase); 167 const auto __old_phase = __phase.load(memory_order_relaxed); 168 const auto __cur = static_cast<unsigned char>(__old_phase); 169 for(; __update; --__update) 170 { 171 if(_M_arrive(__old_phase, __current)) 172 { 173 _M_completion(); 174 _M_expected += _M_expected_adjustment.load(memory_order_relaxed); 175 _M_expected_adjustment.store(0, memory_order_relaxed); 176 auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2); 177 __phase.store(__new_phase, memory_order_release); 178 __phase.notify_all(); 179 } 180 } 181 return __old_phase; 182 } 183 184 void 185 wait(arrival_token&& __old_phase) const 186 { 187 __atomic_phase_const_ref_t __phase(_M_phase); 188 auto const __test_fn = [=] 189 { 190 return __phase.load(memory_order_acquire) != __old_phase; 191 }; 192 std::__atomic_wait_address(&_M_phase, __test_fn); 193 } 194 195 void 196 arrive_and_drop() 197 { 198 _M_expected_adjustment.fetch_sub(1, memory_order_relaxed); 199 (void)arrive(1); 200 } 201 }; 202 203 template<typename _CompletionF = __empty_completion> 204 class barrier 205 { 206 // Note, we may introduce a "central" barrier algorithm at some point 207 // for more space constrained targets 208 using __algorithm_t = __tree_barrier<_CompletionF>; 209 __algorithm_t _M_b; 210 211 public: 212 class arrival_token final 213 { 214 public: 215 arrival_token(arrival_token&&) = default; 216 arrival_token& operator=(arrival_token&&) = default; 217 ~arrival_token() = default; 218 219 private: 220 friend class barrier; 221 using __token = typename __algorithm_t::arrival_token; 222 explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { } 223 __token _M_tok; 224 }; 225 226 static constexpr ptrdiff_t 227 max() noexcept 228 { return __algorithm_t::max(); } 229 230 explicit 231 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 232 : _M_b(__count, std::move(__completion)) 233 { } 234 235 barrier(barrier const&) = delete; 236 barrier& operator=(barrier const&) = delete; 237 238 [[nodiscard]] arrival_token 239 arrive(ptrdiff_t __update = 1) 240 { return arrival_token{_M_b.arrive(__update)}; } 241 242 void 243 wait(arrival_token&& __phase) const 244 { _M_b.wait(std::move(__phase._M_tok)); } 245 246 void 247 arrive_and_wait() 248 { wait(arrive()); } 249 250 void 251 arrive_and_drop() 252 { _M_b.arrive_and_drop(); } 253 }; 254 255_GLIBCXX_END_NAMESPACE_VERSION 256} // namespace 257#endif // __cpp_lib_atomic_wait && __cpp_aligned_new 258#endif // __cplusplus > 201703L 259#endif // _GLIBCXX_BARRIER 260