Lines Matching defs:x

113 # define to_int(x) ((vec_t){ (int)(x)[0] })  argument
116 # define to_int(x) __builtin_ia32_cvtdq2ps(__builtin_ia32_cvtps2dq(x)) argument
118 # define to_int(x) __builtin_ia32_cvtdq2pd(__builtin_ia32_cvtpd2dq(x)) argument
122 # define to_int(x) __builtin_ia32_cvtdq2ps256(__builtin_ia32_cvtps2dq256(x)) argument
124 # define to_int(x) __builtin_ia32_cvtdq2pd256(__builtin_ia32_cvtpd2dq256(x)) argument
129 # define scalar_1op(x, op) ({ \ argument
138 # define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss256(&t_); }) argument
139 # define max(x, y) __builtin_ia32_maxps256(x, y) argument
140 # define min(x, y) __builtin_ia32_minps256(x, y) argument
141 # define recip(x) __builtin_ia32_rcpps256(x) argument
142 # define rsqrt(x) __builtin_ia32_rsqrtps256(x) argument
143 # define sqrt(x) __builtin_ia32_sqrtps256(x) argument
144 # define swap(x) ({ \ argument
148 # define swap2(x) ({ \ argument
154 # define broadcast(x) ({ float t_ = (x); __builtin_ia32_vbroadcastss(&t_); }) argument
156 # define interleave_hi(x, y) __builtin_ia32_unpckhps(x, y) argument
157 # define interleave_lo(x, y) __builtin_ia32_unpcklps(x, y) argument
158 # define max(x, y) __builtin_ia32_maxps(x, y) argument
159 # define min(x, y) __builtin_ia32_minps(x, y) argument
160 # define recip(x) __builtin_ia32_rcpps(x) argument
161 # define rsqrt(x) __builtin_ia32_rsqrtps(x) argument
162 # define sqrt(x) __builtin_ia32_sqrtps(x) argument
163 # define swap(x) __builtin_ia32_shufps(x, x, 0b00011011) argument
165 # define swap2(x) __builtin_ia32_vpermilvarps(x, __builtin_ia32_cvtps2dq(inv) - 1) argument
168 # define recip(x) scalar_1op(x, "rcpss %[in], %[out]") argument
169 # define rsqrt(x) scalar_1op(x, "rsqrtss %[in], %[out]") argument
170 # define sqrt(x) scalar_1op(x, "sqrtss %[in], %[out]") argument
174 # define broadcast(x) ({ double t_ = (x); __builtin_ia32_vbroadcastsd256(&t_); }) argument
175 # define max(x, y) __builtin_ia32_maxpd256(x, y) argument
176 # define min(x, y) __builtin_ia32_minpd256(x, y) argument
177 # define recip(x) ({ \ argument
184 # define rsqrt(x) ({ \ argument
191 # define sqrt(x) __builtin_ia32_sqrtpd256(x) argument
192 # define swap(x) ({ \ argument
197 # define interleave_hi(x, y) __builtin_ia32_unpckhpd(x, y) argument
198 # define interleave_lo(x, y) __builtin_ia32_unpcklpd(x, y) argument
199 # define max(x, y) __builtin_ia32_maxpd(x, y) argument
200 # define min(x, y) __builtin_ia32_minpd(x, y) argument
201 # define recip(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rcpps(__builtin_ia32_cvtpd2ps(x))) argument
202 # define rsqrt(x) __builtin_ia32_cvtps2pd(__builtin_ia32_rsqrtps(__builtin_ia32_cvtpd2ps(x))) argument
203 # define sqrt(x) __builtin_ia32_sqrtpd(x) argument
204 # define swap(x) __builtin_ia32_shufpd(x, x, 0b01) argument
206 # define swap2(x) __builtin_ia32_vpermilvarpd(x, __builtin_ia32_pmovsxdq128( \ argument
210 # define recip(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rcpss %[out], %[out]; cvtss2sd %[out], %[… argument
211 # define rsqrt(x) scalar_1op(x, "cvtsd2ss %[in], %[out]; rsqrtss %[out], %[out]; cvtss2sd %[out], … argument
212 # define sqrt(x) scalar_1op(x, "sqrtsd %[in], %[out]") argument
217 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhbw128((vqi_t)(x), (vqi_t)(y))) argument
218 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklbw128((vqi_t)(x), (vqi_t)(y))) argument
220 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhwd128((vhi_t)(x), (vhi_t)(y))) argument
221 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklwd128((vhi_t)(x), (vhi_t)(y))) argument
222 # define swap(x) ((vec_t)__builtin_ia32_pshufd( \ argument
226 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhdq128((vsi_t)(x), (vsi_t)(y))) argument
227 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpckldq128((vsi_t)(x), (vsi_t)(y))) argument
228 # define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b00011011)) argument
230 # define interleave_hi(x, y) ((vec_t)__builtin_ia32_punpckhqdq128((vdi_t)(x), (vdi_t)(y))) argument
231 # define interleave_lo(x, y) ((vec_t)__builtin_ia32_punpcklqdq128((vdi_t)(x), (vdi_t)(y))) argument
232 # define swap(x) ((vec_t)__builtin_ia32_pshufd((vsi_t)(x), 0b01001110)) argument
235 # define max(x, y) ((vec_t)__builtin_ia32_pmaxub128((vqi_t)(x), (vqi_t)(y))) argument
236 # define min(x, y) ((vec_t)__builtin_ia32_pminub128((vqi_t)(x), (vqi_t)(y))) argument
238 # define max(x, y) __builtin_ia32_pmaxsw128(x, y) argument
239 # define min(x, y) __builtin_ia32_pminsw128(x, y) argument
240 # define mul_hi(x, y) __builtin_ia32_pmulhw128(x, y) argument
242 # define mul_hi(x, y) ((vec_t)__builtin_ia32_pmulhuw128((vhi_t)(x), (vhi_t)(y))) argument
244 # define mul_full(x, y) ((vec_t)__builtin_ia32_pmuludq128((vsi_t)(x), (vsi_t)(y))) argument
246 # define select(d, x, y, m) ({ \ argument
255 # define addsub(x, y) __builtin_ia32_addsubps(x, y) argument
256 # define dup_hi(x) __builtin_ia32_movshdup(x) argument
257 # define dup_lo(x) __builtin_ia32_movsldup(x) argument
258 # define hadd(x, y) __builtin_ia32_haddps(x, y) argument
259 # define hsub(x, y) __builtin_ia32_hsubps(x, y) argument
261 # define addsub(x, y) __builtin_ia32_addsubpd(x, y) argument
262 # define dup_lo(x) ({ \ argument
267 # define hadd(x, y) __builtin_ia32_haddpd(x, y) argument
268 # define hsub(x, y) __builtin_ia32_hsubpd(x, y) argument
272 # define addsub(x, y) __builtin_ia32_addsubps256(x, y) argument
273 # define dup_hi(x) __builtin_ia32_movshdup256(x) argument
274 # define dup_lo(x) __builtin_ia32_movsldup256(x) argument
275 # define hadd(x, y) ({ \ argument
279 # define hsub(x, y) ({ \ argument
284 # define addsub(x, y) __builtin_ia32_addsubpd256(x, y) argument
285 # define dup_lo(x) __builtin_ia32_movddup256(x) argument
286 # define hadd(x, y) ({ \ argument
290 # define hsub(x, y) ({ \ argument
298 # define abs(x) ((vec_t)__builtin_ia32_pabsb128((vqi_t)(x))) argument
300 # define abs(x) __builtin_ia32_pabsw128(x) argument
302 # define abs(x) __builtin_ia32_pabsd128(x) argument
305 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignb128((vqi_t)(x), (vqi_t)(y))) argument
306 # define swap(x) ((vec_t)__builtin_ia32_pshufb128((vqi_t)(x), (vqi_t)(inv - 1))) argument
307 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 8)) argument
309 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignw128((vhi_t)(x), (vhi_t)(y))) argument
310 # define hadd(x, y) ((vec_t)__builtin_ia32_phaddw128((vhi_t)(x), (vhi_t)(y))) argument
311 # define hsub(x, y) ((vec_t)__builtin_ia32_phsubw128((vhi_t)(x), (vhi_t)(y))) argument
312 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 16)) argument
314 # define copysignz(x, y) ((vec_t)__builtin_ia32_psignd128((vsi_t)(x), (vsi_t)(y))) argument
315 # define hadd(x, y) ((vec_t)__builtin_ia32_phaddd128((vsi_t)(x), (vsi_t)(y))) argument
316 # define hsub(x, y) ((vec_t)__builtin_ia32_phsubd128((vsi_t)(x), (vsi_t)(y))) argument
317 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 32)) argument
319 # define rotr(x, n) ((vec_t)__builtin_ia32_palignr128((vdi_t)(x), (vdi_t)(x), (n) * 64)) argument
324 # define max(x, y) ((vec_t)__builtin_ia32_pmaxsb128((vqi_t)(x), (vqi_t)(y))) argument
325 # define min(x, y) ((vec_t)__builtin_ia32_pminsb128((vqi_t)(x), (vqi_t)(y))) argument
326 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxbw128((vqi_t)(x))) argument
327 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxbd128((vqi_t)(x))) argument
328 # define widen3(x) ((vec_t)__builtin_ia32_pmovsxbq128((vqi_t)(x))) argument
330 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxwd128(x)) argument
331 # define widen2(x) ((vec_t)__builtin_ia32_pmovsxwq128(x)) argument
333 # define max(x, y) __builtin_ia32_pmaxsd128(x, y) argument
334 # define min(x, y) __builtin_ia32_pminsd128(x, y) argument
335 # define mul_full(x, y) ((vec_t)__builtin_ia32_pmuldq128(x, y)) argument
336 # define widen1(x) ((vec_t)__builtin_ia32_pmovsxdq128(x)) argument
338 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxbw128((vqi_t)(x))) argument
339 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxbd128((vqi_t)(x))) argument
340 # define widen3(x) ((vec_t)__builtin_ia32_pmovzxbq128((vqi_t)(x))) argument
342 # define max(x, y) ((vec_t)__builtin_ia32_pmaxuw128((vhi_t)(x), (vhi_t)(y))) argument
343 # define min(x, y) ((vec_t)__builtin_ia32_pminuw128((vhi_t)(x), (vhi_t)(y))) argument
344 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxwd128((vhi_t)(x))) argument
345 # define widen2(x) ((vec_t)__builtin_ia32_pmovzxwq128((vhi_t)(x))) argument
347 # define max(x, y) ((vec_t)__builtin_ia32_pmaxud128((vsi_t)(x), (vsi_t)(y))) argument
348 # define min(x, y) ((vec_t)__builtin_ia32_pminud128((vsi_t)(x), (vsi_t)(y))) argument
349 # define widen1(x) ((vec_t)__builtin_ia32_pmovzxdq128((vsi_t)(x))) argument
353 # define select(d, x, y, m) \ argument
356 # define dot_product(x, y) __builtin_ia32_dpps(x, y, 0b11110001) argument
357 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps(y, x, m)) argument
358 # define trunc(x) __builtin_ia32_roundps(x, 0b1011) argument
360 # define dot_product(x, y) __builtin_ia32_dppd(x, y, 0b00110001) argument
361 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd(y, x, m)) argument
362 # define trunc(x) __builtin_ia32_roundpd(x, 0b1011) argument
365 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b10101010)) argument
367 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11001100)) argument
369 # define mix(x, y) ((vec_t)__builtin_ia32_pblendw128((vhi_t)(x), (vhi_t)(y), 0b11110000)) argument
371 # define mix(x, y) __builtin_ia32_blendps(x, y, 0b1010) argument
373 # define mix(x, y) __builtin_ia32_blendpd(x, y, 0b10) argument
378 # define dot_product(x, y) ({ \ argument
382 # define mix(x, y) __builtin_ia32_blendps256(x, y, 0b10101010) argument
383 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvps256(y, x, m)) argument
384 # define select2(d, x, y, m) ({ \ argument
389 # define trunc(x) __builtin_ia32_roundps256(x, 0b1011) argument
391 # define mix(x, y) __builtin_ia32_blendpd256(x, y, 0b1010) argument
392 # define select(d, x, y, m) (*(d) = __builtin_ia32_blendvpd256(y, x, m)) argument
393 # define select2(d, x, y, m) ({ \ argument
398 # define trunc(x) __builtin_ia32_roundpd256(x, 0b1011) argument
402 # define max(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ > y_ ? x_ : y_; })}) argument
403 # define min(x, y) ((vec_t){({ typeof(x[0]) x_ = (x)[0], y_ = (y)[0]; x_ < y_ ? x_ : y_; })}) argument
406 # define trunc(x) ({ \ argument
412 # define trunc(x) ({ \ argument
431 vec_t x, y, z, src, inv, alt, sh; in simd_test() local