1 #if !defined(__XOP__) && !defined(__AVX512F__)
2 #include "simd.h"
3 ENTRY(fma_test);
4 #endif
5 
6 #if VEC_SIZE < 16 && !defined(to_bool)
7 # define to_bool(cmp) (!~(cmp)[0])
8 #elif VEC_SIZE == 16 && !defined(__AVX512VL__)
9 # if FLOAT_SIZE == 4
10 #  define to_bool(cmp) __builtin_ia32_vtestcps(cmp, (vec_t){} == 0)
11 # elif FLOAT_SIZE == 8
12 #  define to_bool(cmp) __builtin_ia32_vtestcpd(cmp, (vec_t){} == 0)
13 # endif
14 #elif VEC_SIZE == 32 && !defined(__AVX512VL__)
15 # if FLOAT_SIZE == 4
16 #  define to_bool(cmp) __builtin_ia32_vtestcps256(cmp, (vec_t){} == 0)
17 # elif FLOAT_SIZE == 8
18 #  define to_bool(cmp) __builtin_ia32_vtestcpd256(cmp, (vec_t){} == 0)
19 # endif
20 #endif
21 
22 #ifndef eq
23 # define eq(x, y) to_bool((x) == (y))
24 #endif
25 
26 #if defined(__AVX512F__) && VEC_SIZE > FLOAT_SIZE
27 # if FLOAT_SIZE == 4
28 #  define fmaddsub(x, y, z) BR(vfmaddsubps, _mask, x, y, z, ~0)
29 # elif FLOAT_SIZE == 8
30 #  define fmaddsub(x, y, z) BR(vfmaddsubpd, _mask, x, y, z, ~0)
31 # elif FLOAT_SIZE == 2
32 #  define fmaddsub(x, y, z) BR(vfmaddsubph, _mask, x, y, z, ~0)
33 # endif
34 #elif VEC_SIZE == 16
35 # if FLOAT_SIZE == 4
36 #  define addsub(x, y) __builtin_ia32_addsubps(x, y)
37 #  if defined(__FMA4__) || defined(__FMA__)
38 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps(x, y, z)
39 #  endif
40 # elif FLOAT_SIZE == 8
41 #  define addsub(x, y) __builtin_ia32_addsubpd(x, y)
42 #  if defined(__FMA4__) || defined(__FMA__)
43 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd(x, y, z)
44 #  endif
45 # endif
46 #elif VEC_SIZE == 32
47 # if FLOAT_SIZE == 4
48 #  define addsub(x, y) __builtin_ia32_addsubps256(x, y)
49 #  if defined(__FMA4__) || defined(__FMA__)
50 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubps256(x, y, z)
51 #  endif
52 # elif FLOAT_SIZE == 8
53 #  define addsub(x, y) __builtin_ia32_addsubpd256(x, y)
54 #  if defined(__FMA4__) || defined(__FMA__)
55 #   define fmaddsub(x, y, z) __builtin_ia32_vfmaddsubpd256(x, y, z)
56 #  endif
57 # endif
58 #endif
59 
60 #if defined(fmaddsub) && !defined(addsub)
61 # ifdef __AVX512F__
62 #  define addsub(x, y) ({ \
63     vec_t t_; \
64     typeof(t_[0]) one_ = 1; \
65     asm ( "vfmaddsub231p" ELEM_SFX " %2%{1to%c4%}, %1, %0" \
66           : "=v" (t_) \
67           : "v" (x), "m" (one_), "0" (y), "i" (ELEM_COUNT) ); \
68     t_; \
69 })
70 # else
71 #  define addsub(x, y) fmaddsub(x, broadcast(1), y)
72 # endif
73 #endif
74 
75 #ifdef __AVX512FP16__
76 # define I (1.if16)
77 # if VEC_SIZE > FLOAT_SIZE
78 #  define CELEM_COUNT (ELEM_COUNT / 2)
79 static const unsigned int conj_mask = 0x80000000;
80 #  define conj(z) ({ \
81     vec_t r_; \
82     asm ( "vpxord %2%{1to%c3%}, %1, %0" \
83           : "=v" (r_) \
84           : "v" (z), "m" (conj_mask), "i" (CELEM_COUNT) ); \
85     r_; \
86 })
87 #  define _cmul_vv(a, b, c)  BR2(vf##c##mulcph, , a, b)
88 #  define _cmul_vs(a, b, c) ({ \
89     vec_t r_; \
90     _Complex _Float16 b_ = (b); \
91     asm ( "vf"#c"mulcph %2%{1to%c3%}, %1, %0" \
92           : "=v" (r_) \
93           : "v" (a), "m" (b_), "i" (CELEM_COUNT) ); \
94     r_; \
95 })
96 #  define cmadd_vv(a, b, c) BR2(vfmaddcph, , a, b, c)
97 #  define cmadd_vs(a, b, c) ({ \
98     _Complex _Float16 b_ = (b); \
99     vec_t r_; \
100     asm ( "vfmaddcph %2%{1to%c3%}, %1, %0" \
101           : "=v" (r_) \
102           : "v" (a), "m" (b_), "i" (CELEM_COUNT), "0" (c) ); \
103     r_; \
104 })
105 # else
106 #  define CELEM_COUNT 1
107 typedef _Float16 __attribute__((vector_size(4))) cvec_t;
108 #  define conj(z) ({ \
109     cvec_t r_; \
110     asm ( "xor $0x80000000, %0" : "=rm" (r_) : "0" (z) ); \
111     r_; \
112 })
113 #  define _cmul_vv(a, b, c) ({ \
114     cvec_t r_; \
115     /* "=&x" to force destination to be different from both sources */ \
116     asm ( "vf"#c"mulcsh %2, %1, %0" : "=&x" (r_) : "x" (a), "m" (b) ); \
117     r_; \
118 })
119 #  define _cmul_vs(a, b, c) ({ \
120     _Complex _Float16 b_ = (b); \
121     cvec_t r_; \
122     /* "=&x" to force destination to be different from both sources */ \
123     asm ( "vf"#c"mulcsh %2, %1, %0" : "=&x" (r_) : "x" (a), "m" (b_) ); \
124     r_; \
125 })
126 #  define cmadd_vv(a, b, c) ({ \
127     cvec_t r_ = (c); \
128     asm ( "vfmaddcsh %2, %1, %0" : "+x" (r_) : "x" (a), "m" (b) ); \
129     r_; \
130 })
131 #  define cmadd_vs(a, b, c) ({ \
132     _Complex _Float16 b_ = (b); \
133     cvec_t r_ = (c); \
134     asm ( "vfmaddcsh %2, %1, %0" : "+x" (r_) : "x" (a), "m" (b_) ); \
135     r_; \
136 })
137 # endif
138 # define cmul_vv(a, b) _cmul_vv(a, b, )
139 # define cmulc_vv(a, b) _cmul_vv(a, b, c)
140 # define cmul_vs(a, b) _cmul_vs(a, b, )
141 # define cmulc_vs(a, b) _cmul_vs(a, b, c)
142 #endif
143 
fma_test(void)144 int fma_test(void)
145 {
146     unsigned int i;
147     vec_t x, y, z, src, inv, one;
148 #ifdef __AVX512F__
149     typeof(one[0]) one_ = 1;
150 #endif
151 
152     for ( i = 0; i < ELEM_COUNT; ++i )
153     {
154         src[i] = i + 1;
155         inv[i] = ELEM_COUNT - i;
156         one[i] = 1;
157     }
158 
159 #ifdef __AVX512F__
160 # define one one_
161 #endif
162 
163     x = (src + one) * inv;
164     y = (src - one) * inv;
165     touch(src);
166     z = inv * src + inv;
167     if ( !eq(x, z) ) return __LINE__;
168 
169     touch(src);
170     z = -inv * src - inv;
171     if ( !eq(-x, z) ) return __LINE__;
172 
173     touch(src);
174     z = inv * src - inv;
175     if ( !eq(y, z) ) return __LINE__;
176 
177     touch(src);
178     z = -inv * src + inv;
179     if ( !eq(-y, z) ) return __LINE__;
180     touch(src);
181 
182     x = src + inv;
183     y = src - inv;
184     touch(inv);
185     touch(one);
186     z = src * one + inv;
187     if ( !eq(x, z) ) return __LINE__;
188 
189     touch(inv);
190     touch(one);
191     z = -src * one - inv;
192     if ( !eq(-x, z) ) return __LINE__;
193 
194     touch(inv);
195     touch(one);
196     z = src * one - inv;
197     if ( !eq(y, z) ) return __LINE__;
198 
199     touch(inv);
200     touch(one);
201     z = -src * one + inv;
202     if ( !eq(-y, z) ) return __LINE__;
203     touch(inv);
204 
205 #undef one
206 
207 #if defined(addsub) && defined(fmaddsub)
208     x = addsub(src * inv, one);
209     y = addsub(src * inv, -one);
210     touch(one);
211     z = fmaddsub(src, inv, one);
212     if ( !eq(x, z) ) return __LINE__;
213 
214     touch(one);
215     z = fmaddsub(src, inv, -one);
216     if ( !eq(y, z) ) return __LINE__;
217     touch(one);
218 
219     x = addsub(src * inv, one);
220     touch(inv);
221     z = fmaddsub(src, inv, one);
222     if ( !eq(x, z) ) return __LINE__;
223 
224     touch(inv);
225     z = fmaddsub(src, inv, -one);
226     if ( !eq(y, z) ) return __LINE__;
227     touch(inv);
228 #endif
229 
230 #ifdef CELEM_COUNT
231 
232 # if VEC_SIZE > FLOAT_SIZE
233 #  define cvec_t vec_t
234 #  define ceq eq
235 # else
236   {
237     /* Cannot re-use the function-scope variables (for being too small). */
238     cvec_t x, y, z, src = { 1, 2 }, inv = { 2, 1 }, one = { 1, 1 };
239 #  define ceq(x, y) ({ \
240     unsigned int r_; \
241     asm ( "vcmpph $0, %1, %2, %0"  : "=k" (r_) : "x" (x), "x" (y) ); \
242     (r_ & 3) == 3; \
243 })
244 # endif
245 
246     /* (a * i)² == -a² */
247     x = cmul_vs(src, I);
248     y = cmul_vv(x, x);
249     x = -src;
250     touch(src);
251     z = cmul_vv(x, src);
252     if ( !ceq(y, z) ) return __LINE__;
253 
254     /* conj(a * b) == conj(a) * conj(b) */
255     touch(src);
256     x = conj(src);
257     touch(inv);
258     y = cmulc_vv(x, inv);
259     touch(src);
260     touch(inv);
261     z = conj(cmul_vv(src, inv));
262     if ( !ceq(y, z) ) return __LINE__;
263 
264     /* a * conj(a) == |a|² */
265     touch(src);
266     y = src;
267     touch(src);
268     x = cmulc_vv(y, src);
269     y *= y;
270     for ( i = 0; i < ELEM_COUNT; i += 2 )
271     {
272         if ( x[i] != y[i] + y[i + 1] ) return __LINE__;
273         if ( x[i + 1] ) return __LINE__;
274     }
275 
276     /* a * b == b * a + 0 */
277     touch(src);
278     touch(inv);
279     x = cmul_vv(src, inv);
280     touch(src);
281     touch(inv);
282     y = cmadd_vv(inv, src, (cvec_t){});
283     if ( !ceq(x, y) ) return __LINE__;
284 
285     /* a * 1 + b == b * 1 + a */
286     touch(src);
287     touch(inv);
288     x = cmadd_vs(src, 1, inv);
289     for ( i = 0; i < ELEM_COUNT; i += 2 )
290     {
291         z[i] = 1;
292         z[i + 1] = 0;
293     }
294     touch(z);
295     y = cmadd_vv(inv, z, src);
296     if ( !ceq(x, y) ) return __LINE__;
297 
298     /* (a + b) * c == a * c + b * c */
299     touch(one);
300     touch(inv);
301     x = cmul_vv(src + one, inv);
302     touch(inv);
303     y = cmul_vv(one, inv);
304     touch(inv);
305     z = cmadd_vv(src, inv, y);
306     if ( !ceq(x, z) ) return __LINE__;
307 
308     /* a * i + conj(a) == (Re(a) - Im(a)) * (1 + i) */
309     x = cmadd_vs(src, I, conj(src));
310     for ( i = 0; i < ELEM_COUNT; i += 2 )
311     {
312         typeof(x[0]) val = src[i] - src[i + 1];
313 
314         if ( x[i] != val ) return __LINE__;
315         if ( x[i + 1] != val ) return __LINE__;
316     }
317 
318 # if VEC_SIZE == FLOAT_SIZE
319   }
320 # endif
321 
322 #endif /* CELEM_COUNT */
323 
324     return 0;
325 }
326