1 #if !defined(__XOP__) && !defined(__AVX512F__)
2 #include "simd.h"
3 ENTRY(fma_test);
4 #endif
5
6 #if VEC_SIZE < 16 && !defined(to_bool)
7 # define to_bool(cmp) (!~(cmp)[0])
8 #elif VEC_SIZE == 16 && !defined(__AVX512VL__)
9 # if FLOAT_SIZE == 4
10 # define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
11 # elif FLOAT_SIZE == 8
12 # define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
13 # endif
14 #elif VEC_SIZE == 32 && !defined(__AVX512VL__)
15 # if FLOAT_SIZE == 4
16 # define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
17 # elif FLOAT_SIZE == 8
18 # define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
19 # endif
20 #endif
21
22 #ifndef eq
23 # define eq(x, y) to_bool((x) == (y))
24 #endif
25
26 #if defined(__AVX512F__) && VEC_SIZE > FLOAT_SIZE
27 # if FLOAT_SIZE == 4
28 # define fmaddsub(x, y, z) BR(vfmaddsubps, _mask, x, y, z, ~0)
29 # elif FLOAT_SIZE == 8
30 # define fmaddsub(x, y, z) BR(vfmaddsubpd, _mask, x, y, z, ~0)
31 # elif FLOAT_SIZE == 2
32 # define fmaddsub(x, y, z) BR(vfmaddsubph, _mask, x, y, z, ~0)
33 # endif
34 #elif VEC_SIZE == 16
35 # if FLOAT_SIZE == 4
36 # define addsub(x, y) __builtin_ia32_addsubps(x, y)
37 # if defined(__FMA4__) || defined(__FMA__)
38 # define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
39 # endif
40 # elif FLOAT_SIZE == 8
41 # define addsub(x, y) __builtin_ia32_addsubpd(x, y)
42 # if defined(__FMA4__) || defined(__FMA__)
43 # define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
44 # endif
45 # endif
46 #elif VEC_SIZE == 32
47 # if FLOAT_SIZE == 4
48 # define addsub(x, y) __builtin_ia32_addsubps256(x, y)
49 # if defined(__FMA4__) || defined(__FMA__)
50 # define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
51 # endif
52 # elif FLOAT_SIZE == 8
53 # define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
54 # if defined(__FMA4__) || defined(__FMA__)
55 # define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
56 # endif
57 # endif
58 #endif
59
60 #if defined(fmaddsub) && !defined(addsub)
61 # ifdef __AVX512F__
62 # define addsub(x, y) ({ \
63 vec_t t_; \
64 typeof(t_[0]) one_ = 1; \
65 asm ( "vfmaddsub231p" ELEM_SFX " %2%{1to%c4%}, %1, %0" \
66 : "=v" (t_) \
67 : "v" (x), "m" (one_), "0" (y), "i" (ELEM_COUNT) ); \
68 t_; \
69 })
70 # else
71 # define addsub(x, y) fmaddsub(x, broadcast(1), y)
72 # endif
73 #endif
74
75 #ifdef __AVX512FP16__
76 # define I (1.if16)
77 # if VEC_SIZE > FLOAT_SIZE
78 # define CELEM_COUNT (ELEM_COUNT / 2)
79 static const unsigned int conj_mask = 0x80000000;
80 # define conj(z) ({ \
81 vec_t r_; \
82 asm ( "vpxord %2%{1to%c3%}, %1, %0" \
83 : "=v" (r_) \
84 : "v" (z), "m" (conj_mask), "i" (CELEM_COUNT) ); \
85 r_; \
86 })
87 # define _cmul_vv(a, b, c) BR2(vf##c##mulcph, , a, b)
88 # define _cmul_vs(a, b, c) ({ \
89 vec_t r_; \
90 _Complex _Float16 b_ = (b); \
91 asm ( "vf"#c"mulcph %2%{1to%c3%}, %1, %0" \
92 : "=v" (r_) \
93 : "v" (a), "m" (b_), "i" (CELEM_COUNT) ); \
94 r_; \
95 })
96 # define cmadd_vv(a, b, c) BR2(vfmaddcph, , a, b, c)
97 # define cmadd_vs(a, b, c) ({ \
98 _Complex _Float16 b_ = (b); \
99 vec_t r_; \
100 asm ( "vfmaddcph %2%{1to%c3%}, %1, %0" \
101 : "=v" (r_) \
102 : "v" (a), "m" (b_), "i" (CELEM_COUNT), "0" (c) ); \
103 r_; \
104 })
105 # else
106 # define CELEM_COUNT 1
107 typedef _Float16 __attribute__((vector_size(4))) cvec_t;
108 # define conj(z) ({ \
109 cvec_t r_; \
110 asm ( "xor $0x80000000, %0" : "=rm" (r_) : "0" (z) ); \
111 r_; \
112 })
113 # define _cmul_vv(a, b, c) ({ \
114 cvec_t r_; \
115 /* "=&x" to force destination to be different from both sources */ \
116 asm ( "vf"#c"mulcsh %2, %1, %0" : "=&x" (r_) : "x" (a), "m" (b) ); \
117 r_; \
118 })
119 # define _cmul_vs(a, b, c) ({ \
120 _Complex _Float16 b_ = (b); \
121 cvec_t r_; \
122 /* "=&x" to force destination to be different from both sources */ \
123 asm ( "vf"#c"mulcsh %2, %1, %0" : "=&x" (r_) : "x" (a), "m" (b_) ); \
124 r_; \
125 })
126 # define cmadd_vv(a, b, c) ({ \
127 cvec_t r_ = (c); \
128 asm ( "vfmaddcsh %2, %1, %0" : "+x" (r_) : "x" (a), "m" (b) ); \
129 r_; \
130 })
131 # define cmadd_vs(a, b, c) ({ \
132 _Complex _Float16 b_ = (b); \
133 cvec_t r_ = (c); \
134 asm ( "vfmaddcsh %2, %1, %0" : "+x" (r_) : "x" (a), "m" (b_) ); \
135 r_; \
136 })
137 # endif
138 # define cmul_vv(a, b) _cmul_vv(a, b, )
139 # define cmulc_vv(a, b) _cmul_vv(a, b, c)
140 # define cmul_vs(a, b) _cmul_vs(a, b, )
141 # define cmulc_vs(a, b) _cmul_vs(a, b, c)
142 #endif
143
fma_test(void)144 int fma_test(void)
145 {
146 unsigned int i;
147 vec_t x, y, z, src, inv, one;
148 #ifdef __AVX512F__
149 typeof(one[0]) one_ = 1;
150 #endif
151
152 for ( i = 0; i < ELEM_COUNT; ++i )
153 {
154 src[i] = i + 1;
155 inv[i] = ELEM_COUNT - i;
156 one[i] = 1;
157 }
158
159 #ifdef __AVX512F__
160 # define one one_
161 #endif
162
163 x = (src + one) * inv;
164 y = (src - one) * inv;
165 touch(src);
166 z = inv * src + inv;
167 if ( !eq(x, z) ) return __LINE__;
168
169 touch(src);
170 z = -inv * src - inv;
171 if ( !eq(-x, z) ) return __LINE__;
172
173 touch(src);
174 z = inv * src - inv;
175 if ( !eq(y, z) ) return __LINE__;
176
177 touch(src);
178 z = -inv * src + inv;
179 if ( !eq(-y, z) ) return __LINE__;
180 touch(src);
181
182 x = src + inv;
183 y = src - inv;
184 touch(inv);
185 touch(one);
186 z = src * one + inv;
187 if ( !eq(x, z) ) return __LINE__;
188
189 touch(inv);
190 touch(one);
191 z = -src * one - inv;
192 if ( !eq(-x, z) ) return __LINE__;
193
194 touch(inv);
195 touch(one);
196 z = src * one - inv;
197 if ( !eq(y, z) ) return __LINE__;
198
199 touch(inv);
200 touch(one);
201 z = -src * one + inv;
202 if ( !eq(-y, z) ) return __LINE__;
203 touch(inv);
204
205 #undef one
206
207 #if defined(addsub) && defined(fmaddsub)
208 x = addsub(src * inv, one);
209 y = addsub(src * inv, -one);
210 touch(one);
211 z = fmaddsub(src, inv, one);
212 if ( !eq(x, z) ) return __LINE__;
213
214 touch(one);
215 z = fmaddsub(src, inv, -one);
216 if ( !eq(y, z) ) return __LINE__;
217 touch(one);
218
219 x = addsub(src * inv, one);
220 touch(inv);
221 z = fmaddsub(src, inv, one);
222 if ( !eq(x, z) ) return __LINE__;
223
224 touch(inv);
225 z = fmaddsub(src, inv, -one);
226 if ( !eq(y, z) ) return __LINE__;
227 touch(inv);
228 #endif
229
230 #ifdef CELEM_COUNT
231
232 # if VEC_SIZE > FLOAT_SIZE
233 # define cvec_t vec_t
234 # define ceq eq
235 # else
236 {
237 /* Cannot re-use the function-scope variables (for being too small). */
238 cvec_t x, y, z, src = { 1, 2 }, inv = { 2, 1 }, one = { 1, 1 };
239 # define ceq(x, y) ({ \
240 unsigned int r_; \
241 asm ( "vcmpph $0, %1, %2, %0" : "=k" (r_) : "x" (x), "x" (y) ); \
242 (r_ & 3) == 3; \
243 })
244 # endif
245
246 /* (a * i)² == -a² */
247 x = cmul_vs(src, I);
248 y = cmul_vv(x, x);
249 x = -src;
250 touch(src);
251 z = cmul_vv(x, src);
252 if ( !ceq(y, z) ) return __LINE__;
253
254 /* conj(a * b) == conj(a) * conj(b) */
255 touch(src);
256 x = conj(src);
257 touch(inv);
258 y = cmulc_vv(x, inv);
259 touch(src);
260 touch(inv);
261 z = conj(cmul_vv(src, inv));
262 if ( !ceq(y, z) ) return __LINE__;
263
264 /* a * conj(a) == |a|² */
265 touch(src);
266 y = src;
267 touch(src);
268 x = cmulc_vv(y, src);
269 y *= y;
270 for ( i = 0; i < ELEM_COUNT; i += 2 )
271 {
272 if ( x[i] != y[i] + y[i + 1] ) return __LINE__;
273 if ( x[i + 1] ) return __LINE__;
274 }
275
276 /* a * b == b * a + 0 */
277 touch(src);
278 touch(inv);
279 x = cmul_vv(src, inv);
280 touch(src);
281 touch(inv);
282 y = cmadd_vv(inv, src, (cvec_t){});
283 if ( !ceq(x, y) ) return __LINE__;
284
285 /* a * 1 + b == b * 1 + a */
286 touch(src);
287 touch(inv);
288 x = cmadd_vs(src, 1, inv);
289 for ( i = 0; i < ELEM_COUNT; i += 2 )
290 {
291 z[i] = 1;
292 z[i + 1] = 0;
293 }
294 touch(z);
295 y = cmadd_vv(inv, z, src);
296 if ( !ceq(x, y) ) return __LINE__;
297
298 /* (a + b) * c == a * c + b * c */
299 touch(one);
300 touch(inv);
301 x = cmul_vv(src + one, inv);
302 touch(inv);
303 y = cmul_vv(one, inv);
304 touch(inv);
305 z = cmadd_vv(src, inv, y);
306 if ( !ceq(x, z) ) return __LINE__;
307
308 /* a * i + conj(a) == (Re(a) - Im(a)) * (1 + i) */
309 x = cmadd_vs(src, I, conj(src));
310 for ( i = 0; i < ELEM_COUNT; i += 2 )
311 {
312 typeof(x[0]) val = src[i] - src[i + 1];
313
314 if ( x[i] != val ) return __LINE__;
315 if ( x[i + 1] != val ) return __LINE__;
316 }
317
318 # if VEC_SIZE == FLOAT_SIZE
319 }
320 # endif
321
322 #endif /* CELEM_COUNT */
323
324 return 0;
325 }
326