1 // Internal macros for the simd implementation -*- C++ -*- 2 3 // Copyright (C) 2020-2021 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 27 28 #if __cplusplus >= 201703L 29 30 #include <cstddef> 31 #include <cstdint> 32 33 /// @cond undocumented 34 35 #define _GLIBCXX_SIMD_BEGIN_NAMESPACE \ 36 namespace std _GLIBCXX_VISIBILITY(default) \ 37 { \ 38 _GLIBCXX_BEGIN_NAMESPACE_VERSION \ 39 namespace experimental { \ 40 inline namespace parallelism_v2 { 41 #define _GLIBCXX_SIMD_END_NAMESPACE \ 42 } \ 43 } \ 44 _GLIBCXX_END_NAMESPACE_VERSION \ 45 } 46 47 // ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX 48 // macros ARM{{{ 49 #if defined __ARM_NEON 50 #define _GLIBCXX_SIMD_HAVE_NEON 1 51 #else 52 #define _GLIBCXX_SIMD_HAVE_NEON 0 53 #endif 54 #if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__) 55 #define _GLIBCXX_SIMD_HAVE_NEON_A32 1 56 #else 57 #define _GLIBCXX_SIMD_HAVE_NEON_A32 0 58 #endif 59 #if defined __ARM_NEON && defined __aarch64__ 60 #define _GLIBCXX_SIMD_HAVE_NEON_A64 1 61 #else 62 #define _GLIBCXX_SIMD_HAVE_NEON_A64 0 63 #endif 64 //}}} 65 // x86{{{ 66 #ifdef __MMX__ 67 #define _GLIBCXX_SIMD_HAVE_MMX 1 68 #else 69 #define _GLIBCXX_SIMD_HAVE_MMX 0 70 #endif 71 #if defined __SSE__ || defined __x86_64__ 72 #define _GLIBCXX_SIMD_HAVE_SSE 1 73 #else 74 #define _GLIBCXX_SIMD_HAVE_SSE 0 75 #endif 76 #if defined __SSE2__ || defined __x86_64__ 77 #define _GLIBCXX_SIMD_HAVE_SSE2 1 78 #else 79 #define _GLIBCXX_SIMD_HAVE_SSE2 0 80 #endif 81 #ifdef __SSE3__ 82 #define _GLIBCXX_SIMD_HAVE_SSE3 1 83 #else 84 #define _GLIBCXX_SIMD_HAVE_SSE3 0 85 #endif 86 #ifdef __SSSE3__ 87 #define _GLIBCXX_SIMD_HAVE_SSSE3 1 88 #else 89 #define _GLIBCXX_SIMD_HAVE_SSSE3 0 90 #endif 91 #ifdef __SSE4_1__ 92 #define _GLIBCXX_SIMD_HAVE_SSE4_1 1 93 #else 94 #define _GLIBCXX_SIMD_HAVE_SSE4_1 0 95 #endif 96 #ifdef __SSE4_2__ 97 #define _GLIBCXX_SIMD_HAVE_SSE4_2 1 98 #else 99 #define _GLIBCXX_SIMD_HAVE_SSE4_2 0 100 #endif 101 #ifdef __XOP__ 102 #define _GLIBCXX_SIMD_HAVE_XOP 1 103 #else 104 #define _GLIBCXX_SIMD_HAVE_XOP 0 105 #endif 106 #ifdef __AVX__ 107 #define _GLIBCXX_SIMD_HAVE_AVX 1 108 #else 109 #define _GLIBCXX_SIMD_HAVE_AVX 0 110 #endif 111 #ifdef __AVX2__ 112 #define _GLIBCXX_SIMD_HAVE_AVX2 1 113 #else 114 #define _GLIBCXX_SIMD_HAVE_AVX2 0 115 #endif 116 #ifdef __BMI__ 117 #define _GLIBCXX_SIMD_HAVE_BMI1 1 118 #else 119 #define _GLIBCXX_SIMD_HAVE_BMI1 0 120 #endif 121 #ifdef __BMI2__ 122 #define _GLIBCXX_SIMD_HAVE_BMI2 1 123 #else 124 #define _GLIBCXX_SIMD_HAVE_BMI2 0 125 #endif 126 #ifdef __LZCNT__ 127 #define _GLIBCXX_SIMD_HAVE_LZCNT 1 128 #else 129 #define _GLIBCXX_SIMD_HAVE_LZCNT 0 130 #endif 131 #ifdef __SSE4A__ 132 #define _GLIBCXX_SIMD_HAVE_SSE4A 1 133 #else 134 #define _GLIBCXX_SIMD_HAVE_SSE4A 0 135 #endif 136 #ifdef __FMA__ 137 #define _GLIBCXX_SIMD_HAVE_FMA 1 138 #else 139 #define _GLIBCXX_SIMD_HAVE_FMA 0 140 #endif 141 #ifdef __FMA4__ 142 #define _GLIBCXX_SIMD_HAVE_FMA4 1 143 #else 144 #define _GLIBCXX_SIMD_HAVE_FMA4 0 145 #endif 146 #ifdef __F16C__ 147 #define _GLIBCXX_SIMD_HAVE_F16C 1 148 #else 149 #define _GLIBCXX_SIMD_HAVE_F16C 0 150 #endif 151 #ifdef __POPCNT__ 152 #define _GLIBCXX_SIMD_HAVE_POPCNT 1 153 #else 154 #define _GLIBCXX_SIMD_HAVE_POPCNT 0 155 #endif 156 #ifdef __AVX512F__ 157 #define _GLIBCXX_SIMD_HAVE_AVX512F 1 158 #else 159 #define _GLIBCXX_SIMD_HAVE_AVX512F 0 160 #endif 161 #ifdef __AVX512DQ__ 162 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 1 163 #else 164 #define _GLIBCXX_SIMD_HAVE_AVX512DQ 0 165 #endif 166 #ifdef __AVX512VL__ 167 #define _GLIBCXX_SIMD_HAVE_AVX512VL 1 168 #else 169 #define _GLIBCXX_SIMD_HAVE_AVX512VL 0 170 #endif 171 #ifdef __AVX512BW__ 172 #define _GLIBCXX_SIMD_HAVE_AVX512BW 1 173 #else 174 #define _GLIBCXX_SIMD_HAVE_AVX512BW 0 175 #endif 176 177 #if _GLIBCXX_SIMD_HAVE_SSE 178 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1 179 #else 180 #define _GLIBCXX_SIMD_HAVE_SSE_ABI 0 181 #endif 182 #if _GLIBCXX_SIMD_HAVE_SSE2 183 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1 184 #else 185 #define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0 186 #endif 187 188 #if _GLIBCXX_SIMD_HAVE_AVX 189 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 1 190 #else 191 #define _GLIBCXX_SIMD_HAVE_AVX_ABI 0 192 #endif 193 #if _GLIBCXX_SIMD_HAVE_AVX2 194 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1 195 #else 196 #define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0 197 #endif 198 199 #if _GLIBCXX_SIMD_HAVE_AVX512F 200 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1 201 #else 202 #define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0 203 #endif 204 #if _GLIBCXX_SIMD_HAVE_AVX512BW 205 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1 206 #else 207 #define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0 208 #endif 209 210 #if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2 211 #error "Use of SSE2 is required on AMD64" 212 #endif 213 //}}} 214 215 #ifdef __clang__ 216 #define _GLIBCXX_SIMD_NORMAL_MATH 217 #else 218 #define _GLIBCXX_SIMD_NORMAL_MATH \ 219 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]] 220 #endif 221 #define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]] 222 #define _GLIBCXX_SIMD_INTRINSIC \ 223 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline 224 #define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline 225 #define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0) 226 #define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1) 227 228 #if defined __STRICT_ANSI__ && __STRICT_ANSI__ 229 #define _GLIBCXX_SIMD_CONSTEXPR 230 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API const 231 #else 232 #define _GLIBCXX_SIMD_CONSTEXPR constexpr 233 #define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr 234 #endif 235 236 #if defined __clang__ 237 #define _GLIBCXX_SIMD_USE_CONSTEXPR const 238 #else 239 #define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr 240 #endif 241 242 #define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^) 243 #define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>) 244 #define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \ 245 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%) 246 247 #define _GLIBCXX_SIMD_ALL_BINARY(__macro) \ 248 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true) 249 #define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \ 250 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true) 251 #define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \ 252 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true) 253 254 #ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE 255 #undef _GLIBCXX_SIMD_ALWAYS_INLINE 256 #define _GLIBCXX_SIMD_ALWAYS_INLINE inline 257 #undef _GLIBCXX_SIMD_INTRINSIC 258 #define _GLIBCXX_SIMD_INTRINSIC inline 259 #endif 260 261 #if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX 262 #define _GLIBCXX_SIMD_X86INTRIN 1 263 #else 264 #define _GLIBCXX_SIMD_X86INTRIN 0 265 #endif 266 267 // workaround macros {{{ 268 // use aliasing loads to help GCC understand the data accesses better 269 // This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with 270 // fixed_size_simd<float, 16> x. 271 #define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1 272 273 // vector conversions on x86 not optimized: 274 #if _GLIBCXX_SIMD_X86INTRIN 275 #define _GLIBCXX_SIMD_WORKAROUND_PR85048 1 276 #endif 277 278 // integer division not optimized 279 #define _GLIBCXX_SIMD_WORKAROUND_PR90993 1 280 281 // very bad codegen for extraction and concatenation of 128/256 "subregisters" 282 // with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM 283 #if _GLIBCXX_SIMD_X86INTRIN 284 #define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1 285 #endif 286 287 // bad codegen for 8 Byte memcpy to __vector_type_t<char, 16> 288 #define _GLIBCXX_SIMD_WORKAROUND_PR90424 1 289 290 // bad codegen for zero-extend using simple concat(__x, 0) 291 #if _GLIBCXX_SIMD_X86INTRIN 292 #define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1 293 #endif 294 295 // https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type 296 // of static_simd_cast) 297 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1 298 299 // https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE 300 // constraint on (static)_simd_cast) 301 #define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1 302 // }}} 303 304 /// @endcond 305 306 #endif // __cplusplus >= 201703L 307 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_ 308 309 // vim: foldmethod=marker 310