1 #include "x86-emulate.h"
2 
3 #include <stdarg.h>
4 #include <stdio.h>
5 
6 struct test {
7     const char *mnemonic;
8     unsigned int opc:8;
9     unsigned int spc:3;
10     unsigned int pfx:2;
11     unsigned int vsz:3;
12     unsigned int esz:4;
13     unsigned int scale:1;
14     unsigned int ext:3;
15 };
16 
17 enum spc {
18     SPC_invalid,
19     SPC_0f,
20     SPC_0f38,
21     SPC_0f3a,
22     SPC_unused4,
23     SPC_map5,
24     SPC_map6,
25     SPC_unused7,
26 };
27 
28 enum pfx {
29     PFX_,
30     PFX_66,
31     PFX_f3,
32     PFX_f2
33 };
34 
35 enum vl {
36     VL_128,
37     VL_256,
38     VL_512,
39 };
40 
41 enum scale { /* scale by memory operand ... */
42     SC_vl,   /* ... vector length */
43     SC_el,   /* ... element length */
44 };
45 
46 /*
47  * Vector size is determined either from EVEX.L'L (VL) or vector
48  * element size (EL), often controlled by EVEX.W (see enum esz).
49  */
50 enum vsz {
51     VSZ_vl,
52     VSZ_vl_2, /* VL / 2 */
53     VSZ_vl_4, /* VL / 4 */
54     VSZ_vl_8, /* VL / 8 */
55     /* "no broadcast" implied from here on. */
56     VSZ_el,
57     VSZ_el_2, /* EL * 2 */
58     VSZ_el_4, /* EL * 4 */
59     VSZ_el_8, /* EL * 8 */
60 };
61 
62 /*
63  * Vector element size is either an opcode attribute or often determined
64  * by EVEX.W (in which case enumerators below name two sizes). Instructions
65  * accessing GPRs often use EVEX.W to select between 32- and 64-bit GPR
66  * width, but this distinction goes away outside of 64-bit mode (and EVEX.W
67  * is ignored there).
68  */
69 enum esz {
70     ESZ_d,
71     ESZ_q,
72     ESZ_dq,
73     ESZ_sd,
74     ESZ_d_nb,
75     ESZ_q_nb,
76     /* "no broadcast" implied from here on. */
77 #ifdef __i386__
78     ESZ_d_WIG,
79 #endif
80     ESZ_b,
81     ESZ_w,
82     ESZ_bw,
83     ESZ_fp16,
84 };
85 
86 #ifndef __i386__
87 # define ESZ_dq64 ESZ_dq
88 #else
89 # define ESZ_dq64 ESZ_d_WIG
90 #endif
91 
92 #define INSNX(m, p, sp, o, e, vs, es, sc) { \
93     .mnemonic = #m, .opc = 0x##o, .spc = SPC_##sp, .pfx = PFX_##p, \
94     .vsz = VSZ_##vs, .esz = ESZ_##es, .scale = SC_##sc, .ext = 0##e \
95 }
96 #define INSN(m, p, sp, o, vs, es, sc) INSNX(m, p, sp, o, 0, vs, es, sc)
97 #define INSN_PFP(m, sp, o) \
98     INSN(m##pd, 66, sp, o, vl, q, vl), \
99     INSN(m##ps,   , sp, o, vl, d, vl)
100 #define INSN_PFP_NB(m, sp, o) \
101     INSN(m##pd, 66, sp, o, vl, q_nb, vl), \
102     INSN(m##ps,   , sp, o, vl, d_nb, vl)
103 #define INSN_SFP(m, sp, o) \
104     INSN(m##sd, f2, sp, o, el, q, el), \
105     INSN(m##ss, f3, sp, o, el, d, el)
106 
107 #define INSN_FP(m, sp, o) \
108     INSN_PFP(m, sp, o), \
109     INSN_SFP(m, sp, o)
110 
111 static const struct test avx512f_all[] = {
112     INSN_FP(add,             0f, 58),
113     INSN(align,        66, 0f3a, 03,    vl,     dq, vl),
114     INSN(blendm,       66, 0f38, 65,    vl,     sd, vl),
115     INSN(broadcastss,  66, 0f38, 18,    el,      d, el),
116     INSN_FP(cmp,             0f, c2),
117     INSN(comisd,       66,   0f, 2f,    el,      q, el),
118     INSN(comiss,         ,   0f, 2f,    el,      d, el),
119     INSN(compress,     66, 0f38, 8a,    vl,     sd, el),
120     INSN(cvtdq2pd,     f3,   0f, e6,    vl_2,    d, vl),
121     INSN(cvtdq2ps,       ,   0f, 5b,    vl,      d, vl),
122     INSN(cvtpd2dq,     f2,   0f, e6,    vl,      q, vl),
123     INSN(cvtpd2udq,      ,   0f, 79,    vl,      q, vl),
124     INSN(cvtpd2ps,     66,   0f, 5a,    vl,      q, vl),
125     INSN(cvtph2ps,     66, 0f38, 13,    vl_2, d_nb, vl),
126     INSN(cvtps2dq,     66,   0f, 5b,    vl,      d, vl),
127     INSN(cvtps2pd,       ,   0f, 5a,    vl_2,    d, vl),
128     INSN(cvtps2ph,     66, 0f3a, 1d,    vl_2, d_nb, vl),
129     INSN(cvtps2udq,      ,   0f, 79,    vl,      d, vl),
130     INSN(cvtsd2si,     f2,   0f, 2d,    el,      q, el),
131     INSN(cvtsd2usi,    f2,   0f, 79,    el,      q, el),
132     INSN(cvtsd2ss,     f2,   0f, 5a,    el,      q, el),
133     INSN(cvtsi2sd,     f2,   0f, 2a,    el,   dq64, el),
134     INSN(cvtsi2ss,     f3,   0f, 2a,    el,   dq64, el),
135     INSN(cvtss2sd,     f3,   0f, 5a,    el,      d, el),
136     INSN(cvtss2si,     f3,   0f, 2d,    el,      d, el),
137     INSN(cvtss2usi,    f3,   0f, 79,    el,      d, el),
138     INSN(cvttpd2dq,    66,   0f, e6,    vl,      q, vl),
139     INSN(cvttpd2udq,     ,   0f, 78,    vl,      q, vl),
140     INSN(cvttps2dq,    f3,   0f, 5b,    vl,      d, vl),
141     INSN(cvttps2udq,     ,   0f, 78,    vl,      d, vl),
142     INSN(cvttsd2si,    f2,   0f, 2c,    el,      q, el),
143     INSN(cvttsd2usi,   f2,   0f, 78,    el,      q, el),
144     INSN(cvttss2si,    f3,   0f, 2c,    el,      d, el),
145     INSN(cvttss2usi,   f3,   0f, 78,    el,      d, el),
146     INSN(cvtudq2pd,    f3,   0f, 7a,    vl_2,    d, vl),
147     INSN(cvtudq2ps,    f2,   0f, 7a,    vl,      d, vl),
148     INSN(cvtusi2sd,    f2,   0f, 7b,    el,   dq64, el),
149     INSN(cvtusi2ss,    f3,   0f, 7b,    el,   dq64, el),
150     INSN_FP(div,             0f, 5e),
151     INSN(expand,       66, 0f38, 88,    vl,     sd, el),
152     INSN(fixupimm,     66, 0f3a, 54,    vl,     sd, vl),
153     INSN(fixupimm,     66, 0f3a, 55,    el,     sd, el),
154     INSN(fmadd132,     66, 0f38, 98,    vl,     sd, vl),
155     INSN(fmadd132,     66, 0f38, 99,    el,     sd, el),
156     INSN(fmadd213,     66, 0f38, a8,    vl,     sd, vl),
157     INSN(fmadd213,     66, 0f38, a9,    el,     sd, el),
158     INSN(fmadd231,     66, 0f38, b8,    vl,     sd, vl),
159     INSN(fmadd231,     66, 0f38, b9,    el,     sd, el),
160     INSN(fmaddsub132,  66, 0f38, 96,    vl,     sd, vl),
161     INSN(fmaddsub213,  66, 0f38, a6,    vl,     sd, vl),
162     INSN(fmaddsub231,  66, 0f38, b6,    vl,     sd, vl),
163     INSN(fmsub132,     66, 0f38, 9a,    vl,     sd, vl),
164     INSN(fmsub132,     66, 0f38, 9b,    el,     sd, el),
165     INSN(fmsub213,     66, 0f38, aa,    vl,     sd, vl),
166     INSN(fmsub213,     66, 0f38, ab,    el,     sd, el),
167     INSN(fmsub231,     66, 0f38, ba,    vl,     sd, vl),
168     INSN(fmsub231,     66, 0f38, bb,    el,     sd, el),
169     INSN(fmsubadd132,  66, 0f38, 97,    vl,     sd, vl),
170     INSN(fmsubadd213,  66, 0f38, a7,    vl,     sd, vl),
171     INSN(fmsubadd231,  66, 0f38, b7,    vl,     sd, vl),
172     INSN(fnmadd132,    66, 0f38, 9c,    vl,     sd, vl),
173     INSN(fnmadd132,    66, 0f38, 9d,    el,     sd, el),
174     INSN(fnmadd213,    66, 0f38, ac,    vl,     sd, vl),
175     INSN(fnmadd213,    66, 0f38, ad,    el,     sd, el),
176     INSN(fnmadd231,    66, 0f38, bc,    vl,     sd, vl),
177     INSN(fnmadd231,    66, 0f38, bd,    el,     sd, el),
178     INSN(fnmsub132,    66, 0f38, 9e,    vl,     sd, vl),
179     INSN(fnmsub132,    66, 0f38, 9f,    el,     sd, el),
180     INSN(fnmsub213,    66, 0f38, ae,    vl,     sd, vl),
181     INSN(fnmsub213,    66, 0f38, af,    el,     sd, el),
182     INSN(fnmsub231,    66, 0f38, be,    vl,     sd, vl),
183     INSN(fnmsub231,    66, 0f38, bf,    el,     sd, el),
184     INSN(gatherd,      66, 0f38, 92,    vl,     sd, el),
185     INSN(gatherq,      66, 0f38, 93,    vl,     sd, el),
186     INSN(getexp,       66, 0f38, 42,    vl,     sd, vl),
187     INSN(getexp,       66, 0f38, 43,    el,     sd, el),
188     INSN(getmant,      66, 0f3a, 26,    vl,     sd, vl),
189     INSN(getmant,      66, 0f3a, 27,    el,     sd, el),
190     INSN_FP(max,             0f, 5f),
191     INSN_FP(min,             0f, 5d),
192     INSN_SFP(mov,            0f, 10),
193     INSN_SFP(mov,            0f, 11),
194     INSN_PFP_NB(mova,        0f, 28),
195     INSN_PFP_NB(mova,        0f, 29),
196     INSN(movddup,      f2,   0f, 12,    vl,   q_nb, vl),
197     INSN(movdqa32,     66,   0f, 6f,    vl,   d_nb, vl),
198     INSN(movdqa32,     66,   0f, 7f,    vl,   d_nb, vl),
199     INSN(movdqa64,     66,   0f, 6f,    vl,   q_nb, vl),
200     INSN(movdqa64,     66,   0f, 7f,    vl,   q_nb, vl),
201     INSN(movdqu32,     f3,   0f, 6f,    vl,   d_nb, vl),
202     INSN(movdqu32,     f3,   0f, 7f,    vl,   d_nb, vl),
203     INSN(movdqu64,     f3,   0f, 6f,    vl,   q_nb, vl),
204     INSN(movdqu64,     f3,   0f, 7f,    vl,   q_nb, vl),
205     INSN(movntdq,      66,   0f, e7,    vl,   d_nb, vl),
206     INSN(movntdqa,     66, 0f38, 2a,    vl,   d_nb, vl),
207     INSN_PFP_NB(movnt,       0f, 2b),
208     INSN(movshdup,     f3,   0f, 16,    vl,   d_nb, vl),
209     INSN(movsldup,     f3,   0f, 12,    vl,   d_nb, vl),
210     INSN_PFP_NB(movu,        0f, 10),
211     INSN_PFP_NB(movu,        0f, 11),
212     INSN_FP(mul,             0f, 59),
213     INSN(pabsd,        66, 0f38, 1e,    vl,      d, vl),
214     INSN(pabsq,        66, 0f38, 1f,    vl,      q, vl),
215     INSN(paddd,        66,   0f, fe,    vl,      d, vl),
216     INSN(paddq,        66,   0f, d4,    vl,      q, vl),
217     INSN(pand,         66,   0f, db,    vl,     dq, vl),
218     INSN(pandn,        66,   0f, df,    vl,     dq, vl),
219     INSN(pblendm,      66, 0f38, 64,    vl,     dq, vl),
220 //       pbroadcast,   66, 0f38, 7c,          dq64
221     INSN(pbroadcastd,  66, 0f38, 58,    el,      d, el),
222     INSN(pbroadcastq,  66, 0f38, 59,    el,      q, el),
223     INSN(pcmp,         66, 0f3a, 1f,    vl,     dq, vl),
224     INSN(pcmpeqd,      66,   0f, 76,    vl,      d, vl),
225     INSN(pcmpeqq,      66, 0f38, 29,    vl,      q, vl),
226     INSN(pcmpgtd,      66,   0f, 66,    vl,      d, vl),
227     INSN(pcmpgtq,      66, 0f38, 37,    vl,      q, vl),
228     INSN(pcmpu,        66, 0f3a, 1e,    vl,     dq, vl),
229     INSN(pcompress,    66, 0f38, 8b,    vl,     dq, el),
230     INSN(permi2,       66, 0f38, 76,    vl,     dq, vl),
231     INSN(permi2,       66, 0f38, 77,    vl,     sd, vl),
232     INSN(permilpd,     66, 0f38, 0d,    vl,      q, vl),
233     INSN(permilpd,     66, 0f3a, 05,    vl,      q, vl),
234     INSN(permilps,     66, 0f38, 0c,    vl,      d, vl),
235     INSN(permilps,     66, 0f3a, 04,    vl,      d, vl),
236     INSN(permt2,       66, 0f38, 7e,    vl,     dq, vl),
237     INSN(permt2,       66, 0f38, 7f,    vl,     sd, vl),
238     INSN(pexpand,      66, 0f38, 89,    vl,     dq, el),
239     INSN(pgatherd,     66, 0f38, 90,    vl,     dq, el),
240     INSN(pgatherq,     66, 0f38, 91,    vl,     dq, el),
241     INSN(pmaxs,        66, 0f38, 3d,    vl,     dq, vl),
242     INSN(pmaxu,        66, 0f38, 3f,    vl,     dq, vl),
243     INSN(pmins,        66, 0f38, 39,    vl,     dq, vl),
244     INSN(pminu,        66, 0f38, 3b,    vl,     dq, vl),
245     INSN(pmovdb,       f3, 0f38, 31,    vl_4,    b, vl),
246     INSN(pmovdw,       f3, 0f38, 33,    vl_2,    b, vl),
247     INSN(pmovqb,       f3, 0f38, 32,    vl_8,    b, vl),
248     INSN(pmovqd,       f3, 0f38, 35,    vl_2, d_nb, vl),
249     INSN(pmovqw,       f3, 0f38, 34,    vl_4,    b, vl),
250     INSN(pmovsdb,      f3, 0f38, 21,    vl_4,    b, vl),
251     INSN(pmovsdw,      f3, 0f38, 23,    vl_2,    b, vl),
252     INSN(pmovsqb,      f3, 0f38, 22,    vl_8,    b, vl),
253     INSN(pmovsqd,      f3, 0f38, 25,    vl_2, d_nb, vl),
254     INSN(pmovsqw,      f3, 0f38, 24,    vl_4,    b, vl),
255     INSN(pmovsxbd,     66, 0f38, 21,    vl_4,    b, vl),
256     INSN(pmovsxbq,     66, 0f38, 22,    vl_8,    b, vl),
257     INSN(pmovsxwd,     66, 0f38, 23,    vl_2,    w, vl),
258     INSN(pmovsxwq,     66, 0f38, 24,    vl_4,    w, vl),
259     INSN(pmovsxdq,     66, 0f38, 25,    vl_2, d_nb, vl),
260     INSN(pmovusdb,     f3, 0f38, 11,    vl_4,    b, vl),
261     INSN(pmovusdw,     f3, 0f38, 13,    vl_2,    b, vl),
262     INSN(pmovusqb,     f3, 0f38, 12,    vl_8,    b, vl),
263     INSN(pmovusqd,     f3, 0f38, 15,    vl_2, d_nb, vl),
264     INSN(pmovusqw,     f3, 0f38, 14,    vl_4,    b, vl),
265     INSN(pmovzxbd,     66, 0f38, 31,    vl_4,    b, vl),
266     INSN(pmovzxbq,     66, 0f38, 32,    vl_8,    b, vl),
267     INSN(pmovzxwd,     66, 0f38, 33,    vl_2,    w, vl),
268     INSN(pmovzxwq,     66, 0f38, 34,    vl_4,    w, vl),
269     INSN(pmovzxdq,     66, 0f38, 35,    vl_2, d_nb, vl),
270     INSN(pmuldq,       66, 0f38, 28,    vl,      q, vl),
271     INSN(pmulld,       66, 0f38, 40,    vl,      d, vl),
272     INSN(pmuludq,      66,   0f, f4,    vl,      q, vl),
273     INSN(por,          66,   0f, eb,    vl,     dq, vl),
274     INSNX(prol,        66,   0f, 72, 1, vl,     dq, vl),
275     INSN(prolv,        66, 0f38, 15,    vl,     dq, vl),
276     INSNX(pror,        66,   0f, 72, 0, vl,     dq, vl),
277     INSN(prorv,        66, 0f38, 14,    vl,     dq, vl),
278     INSN(pscatterd,    66, 0f38, a0,    vl,     dq, el),
279     INSN(pscatterq,    66, 0f38, a1,    vl,     dq, el),
280     INSN(pshufd,       66,   0f, 70,    vl,      d, vl),
281     INSN(pslld,        66,   0f, f2,    el_4,    d, vl),
282     INSNX(pslld,       66,   0f, 72, 6, vl,      d, vl),
283     INSN(psllq,        66,   0f, f3,    el_2,    q, vl),
284     INSNX(psllq,       66,   0f, 73, 6, vl,      q, vl),
285     INSN(psllv,        66, 0f38, 47,    vl,     dq, vl),
286     INSNX(psra,        66,   0f, 72, 4, vl,     dq, vl),
287     INSN(psrad,        66,   0f, e2,    el_4,    d, vl),
288     INSN(psraq,        66,   0f, e2,    el_2,    q, vl),
289     INSN(psrav,        66, 0f38, 46,    vl,     dq, vl),
290     INSN(psrld,        66,   0f, d2,    el_4,    d, vl),
291     INSNX(psrld,       66,   0f, 72, 2, vl,      d, vl),
292     INSN(psrlq,        66,   0f, d3,    el_2,    q, vl),
293     INSNX(psrlq,       66,   0f, 73, 2, vl,      q, vl),
294     INSN(psrlv,        66, 0f38, 45,    vl,     dq, vl),
295     INSN(psubd,        66,   0f, fa,    vl,      d, vl),
296     INSN(psubq,        66,   0f, fb,    vl,      q, vl),
297     INSN(pternlog,     66, 0f3a, 25,    vl,     dq, vl),
298     INSN(ptestm,       66, 0f38, 27,    vl,     dq, vl),
299     INSN(ptestnm,      f3, 0f38, 27,    vl,     dq, vl),
300     INSN(punpckhdq,    66,   0f, 6a,    vl,      d, vl),
301     INSN(punpckhqdq,   66,   0f, 6d,    vl,      q, vl),
302     INSN(punpckldq,    66,   0f, 62,    vl,      d, vl),
303     INSN(punpcklqdq,   66,   0f, 6c,    vl,      q, vl),
304     INSN(pxor,         66,   0f, ef,    vl,     dq, vl),
305     INSN(rcp14,        66, 0f38, 4c,    vl,     sd, vl),
306     INSN(rcp14,        66, 0f38, 4d,    el,     sd, el),
307     INSN(rndscalepd,   66, 0f3a, 09,    vl,      q, vl),
308     INSN(rndscaleps,   66, 0f3a, 08,    vl,      d, vl),
309     INSN(rndscalesd,   66, 0f3a, 0b,    el,      q, el),
310     INSN(rndscaless,   66, 0f3a, 0a,    el,      d, el),
311     INSN(rsqrt14,      66, 0f38, 4e,    vl,     sd, vl),
312     INSN(rsqrt14,      66, 0f38, 4f,    el,     sd, el),
313     INSN(scalef,       66, 0f38, 2c,    vl,     sd, vl),
314     INSN(scalef,       66, 0f38, 2d,    el,     sd, el),
315     INSN(scatterd,     66, 0f38, a2,    vl,     sd, el),
316     INSN(scatterq,     66, 0f38, a3,    vl,     sd, el),
317     INSN_PFP(shuf,           0f, c6),
318     INSN_FP(sqrt,            0f, 51),
319     INSN_FP(sub,             0f, 5c),
320     INSN(ucomisd,      66,   0f, 2e,    el,      q, el),
321     INSN(ucomiss,        ,   0f, 2e,    el,      d, el),
322     INSN_PFP(unpckh,         0f, 15),
323     INSN_PFP(unpckl,         0f, 14),
324 };
325 
326 static const struct test avx512f_128[] = {
327     INSN(extractps, 66, 0f3a, 17, el,    d, el),
328     INSN(insertps,  66, 0f3a, 21, el,    d, el),
329     INSN(mov,       66,   0f, 6e, el, dq64, el),
330     INSN(mov,       66,   0f, 7e, el, dq64, el),
331 //       movhlps,     ,   0f, 12,        d
332     INSN(movhpd,    66,   0f, 16, el,    q, vl),
333     INSN(movhpd,    66,   0f, 17, el,    q, vl),
334     INSN(movhps,      ,   0f, 16, el_2,  d, vl),
335     INSN(movhps,      ,   0f, 17, el_2,  d, vl),
336 //       movlhps,     ,   0f, 16,        d
337     INSN(movlpd,    66,   0f, 12, el,    q, vl),
338     INSN(movlpd,    66,   0f, 13, el,    q, vl),
339     INSN(movlps,      ,   0f, 12, el_2,  d, vl),
340     INSN(movlps,      ,   0f, 13, el_2,  d, vl),
341     INSN(movq,      f3,   0f, 7e, el,    q, el),
342     INSN(movq,      66,   0f, d6, el,    q, el),
343 };
344 
345 static const struct test avx512f_no128[] = {
346     INSN(broadcastf32x4, 66, 0f38, 1a, el_4,  d, vl),
347     INSN(broadcasti32x4, 66, 0f38, 5a, el_4,  d, vl),
348     INSN(broadcastsd,    66, 0f38, 19, el,    q, el),
349     INSN(extractf32x4,   66, 0f3a, 19, el_4,  d, vl),
350     INSN(extracti32x4,   66, 0f3a, 39, el_4,  d, vl),
351     INSN(insertf32x4,    66, 0f3a, 18, el_4,  d, vl),
352     INSN(inserti32x4,    66, 0f3a, 38, el_4,  d, vl),
353     INSN(perm,           66, 0f38, 36, vl,   dq, vl),
354     INSN(perm,           66, 0f38, 16, vl,   sd, vl),
355     INSN(permpd,         66, 0f3a, 01, vl,    q, vl),
356     INSN(permq,          66, 0f3a, 00, vl,    q, vl),
357     INSN(shuff32x4,      66, 0f3a, 23, vl,    d, vl),
358     INSN(shuff64x2,      66, 0f3a, 23, vl,    q, vl),
359     INSN(shufi32x4,      66, 0f3a, 43, vl,    d, vl),
360     INSN(shufi64x2,      66, 0f3a, 43, vl,    q, vl),
361 };
362 
363 static const struct test avx512f_512[] = {
364     INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
365     INSN(broadcasti64x4, 66, 0f38, 5b, el_4, q, vl),
366     INSN(extractf64x4,   66, 0f3a, 1b, el_4, q, vl),
367     INSN(extracti64x4,   66, 0f3a, 3b, el_4, q, vl),
368     INSN(insertf64x4,    66, 0f3a, 1a, el_4, q, vl),
369     INSN(inserti64x4,    66, 0f3a, 3a, el_4, q, vl),
370 };
371 
372 static const struct test avx512bw_all[] = {
373     INSN(dbpsadbw,    66, 0f3a, 42,    vl,    b, vl),
374     INSN(movdqu8,     f2,   0f, 6f,    vl,    b, vl),
375     INSN(movdqu8,     f2,   0f, 7f,    vl,    b, vl),
376     INSN(movdqu16,    f2,   0f, 6f,    vl,    w, vl),
377     INSN(movdqu16,    f2,   0f, 7f,    vl,    w, vl),
378     INSN(pabsb,       66, 0f38, 1c,    vl,    b, vl),
379     INSN(pabsw,       66, 0f38, 1d,    vl,    w, vl),
380     INSN(packssdw,    66,   0f, 6b,    vl, d_nb, vl),
381     INSN(packsswb,    66,   0f, 63,    vl,    w, vl),
382     INSN(packusdw,    66, 0f38, 2b,    vl, d_nb, vl),
383     INSN(packuswb,    66,   0f, 67,    vl,    w, vl),
384     INSN(paddb,       66,   0f, fc,    vl,    b, vl),
385     INSN(paddsb,      66,   0f, ec,    vl,    b, vl),
386     INSN(paddsw,      66,   0f, ed,    vl,    w, vl),
387     INSN(paddusb,     66,   0f, dc,    vl,    b, vl),
388     INSN(paddusw,     66,   0f, dd,    vl,    w, vl),
389     INSN(paddw,       66,   0f, fd,    vl,    w, vl),
390     INSN(palignr,     66, 0f3a, 0f,    vl,    b, vl),
391     INSN(pavgb,       66,   0f, e0,    vl,    b, vl),
392     INSN(pavgw,       66,   0f, e3,    vl,    w, vl),
393     INSN(pblendm,     66, 0f38, 66,    vl,   bw, vl),
394     INSN(pbroadcastb, 66, 0f38, 78,    el,    b, el),
395 //       pbroadcastb, 66, 0f38, 7a,           b
396     INSN(pbroadcastw, 66, 0f38, 79,    el_2,  b, vl),
397 //       pbroadcastw, 66, 0f38, 7b,           b
398     INSN(pcmp,        66, 0f3a, 3f,    vl,   bw, vl),
399     INSN(pcmpeqb,     66,   0f, 74,    vl,    b, vl),
400     INSN(pcmpeqw,     66,   0f, 75,    vl,    w, vl),
401     INSN(pcmpgtb,     66,   0f, 64,    vl,    b, vl),
402     INSN(pcmpgtw,     66,   0f, 65,    vl,    w, vl),
403     INSN(pcmpu,       66, 0f3a, 3e,    vl,   bw, vl),
404     INSN(permw,       66, 0f38, 8d,    vl,    w, vl),
405     INSN(permi2w,     66, 0f38, 75,    vl,    w, vl),
406     INSN(permt2w,     66, 0f38, 7d,    vl,    w, vl),
407     INSN(pmaddubsw,   66, 0f38, 04,    vl,    b, vl),
408     INSN(pmaddwd,     66,   0f, f5,    vl,    w, vl),
409     INSN(pmaxsb,      66, 0f38, 3c,    vl,    b, vl),
410     INSN(pmaxsw,      66,   0f, ee,    vl,    w, vl),
411     INSN(pmaxub,      66,   0f, de,    vl,    b, vl),
412     INSN(pmaxuw,      66, 0f38, 3e,    vl,    w, vl),
413     INSN(pminsb,      66, 0f38, 38,    vl,    b, vl),
414     INSN(pminsw,      66,   0f, ea,    vl,    w, vl),
415     INSN(pminub,      66,   0f, da,    vl,    b, vl),
416     INSN(pminuw,      66, 0f38, 3a,    vl,    w, vl),
417 //       pmovb2m,     f3, 0f38, 29,           b
418 //       pmovm2,      f3, 0f38, 28,          bw
419     INSN(pmovswb,     f3, 0f38, 20,    vl_2,  b, vl),
420     INSN(pmovsxbw,    66, 0f38, 20,    vl_2,  b, vl),
421     INSN(pmovuswb,    f3, 0f38, 10,    vl_2,  b, vl),
422 //       pmovw2m,     f3, 0f38, 29,           w
423     INSN(pmovwb,      f3, 0f38, 30,    vl_2,  b, vl),
424     INSN(pmovzxbw,    66, 0f38, 30,    vl_2,  b, vl),
425     INSN(pmulhrsw,    66, 0f38, 0b,    vl,    w, vl),
426     INSN(pmulhuw,     66,   0f, e4,    vl,    w, vl),
427     INSN(pmulhw,      66,   0f, e5,    vl,    w, vl),
428     INSN(pmullw,      66,   0f, d5,    vl,    w, vl),
429     INSN(psadbw,      66,   0f, f6,    vl,    b, vl),
430     INSN(pshufb,      66, 0f38, 00,    vl,    b, vl),
431     INSN(pshufhw,     f3,   0f, 70,    vl,    w, vl),
432     INSN(pshuflw,     f2,   0f, 70,    vl,    w, vl),
433     INSNX(pslldq,     66,   0f, 73, 7, vl,    b, vl),
434     INSN(psllvw,      66, 0f38, 12,    vl,    w, vl),
435     INSN(psllw,       66,   0f, f1,    el_8,  w, vl),
436     INSNX(psllw,      66,   0f, 71, 6, vl,    w, vl),
437     INSN(psravw,      66, 0f38, 11,    vl,    w, vl),
438     INSN(psraw,       66,   0f, e1,    el_8,  w, vl),
439     INSNX(psraw,      66,   0f, 71, 4, vl,    w, vl),
440     INSNX(psrldq,     66,   0f, 73, 3, vl,    b, vl),
441     INSN(psrlvw,      66, 0f38, 10,    vl,    w, vl),
442     INSN(psrlw,       66,   0f, d1,    el_8,  w, vl),
443     INSNX(psrlw,      66,   0f, 71, 2, vl,    w, vl),
444     INSN(psubb,       66,   0f, f8,    vl,    b, vl),
445     INSN(psubsb,      66,   0f, e8,    vl,    b, vl),
446     INSN(psubsw,      66,   0f, e9,    vl,    w, vl),
447     INSN(psubusb,     66,   0f, d8,    vl,    b, vl),
448     INSN(psubusw,     66,   0f, d9,    vl,    w, vl),
449     INSN(psubw,       66,   0f, f9,    vl,    w, vl),
450     INSN(ptestm,      66, 0f38, 26,    vl,   bw, vl),
451     INSN(ptestnm,     f3, 0f38, 26,    vl,   bw, vl),
452     INSN(punpckhbw,   66,   0f, 68,    vl,    b, vl),
453     INSN(punpckhwd,   66,   0f, 69,    vl,    w, vl),
454     INSN(punpcklbw,   66,   0f, 60,    vl,    b, vl),
455     INSN(punpcklwd,   66,   0f, 61,    vl,    w, vl),
456 };
457 
458 static const struct test avx512bw_128[] = {
459     INSN(pextrb, 66, 0f3a, 14, el, b, el),
460 //       pextrw, 66,   0f, c5,     w
461     INSN(pextrw, 66, 0f3a, 15, el, w, el),
462     INSN(pinsrb, 66, 0f3a, 20, el, b, el),
463     INSN(pinsrw, 66,   0f, c4, el, w, el),
464 };
465 
466 static const struct test avx512cd_all[] = {
467 //       pbroadcastmb2q, f3, 0f38, 2a,      q
468 //       pbroadcastmw2d, f3, 0f38, 3a,      d
469     INSN(pconflict,      66, 0f38, c4, vl, dq, vl),
470     INSN(plzcnt,         66, 0f38, 44, vl, dq, vl),
471 };
472 
473 static const struct test avx512dq_all[] = {
474     INSN_PFP(and,              0f, 54),
475     INSN_PFP(andn,             0f, 55),
476     INSN(broadcasti32x2, 66, 0f38, 59, el_2,  d, vl),
477     INSN(cvtpd2qq,       66,   0f, 7b,   vl,  q, vl),
478     INSN(cvtpd2uqq,      66,   0f, 79,   vl,  q, vl),
479     INSN(cvtps2qq,       66,   0f, 7b, vl_2,  d, vl),
480     INSN(cvtps2uqq,      66,   0f, 79, vl_2,  d, vl),
481     INSN(cvtqq2pd,       f3,   0f, e6,   vl,  q, vl),
482     INSN(cvtqq2ps,         ,   0f, 5b,   vl,  q, vl),
483     INSN(cvttpd2qq,      66,   0f, 7a,   vl,  q, vl),
484     INSN(cvttpd2uqq,     66,   0f, 78,   vl,  q, vl),
485     INSN(cvttps2qq,      66,   0f, 7a, vl_2,  d, vl),
486     INSN(cvttps2uqq,     66,   0f, 78, vl_2,  d, vl),
487     INSN(cvtuqq2pd,      f3,   0f, 7a,   vl,  q, vl),
488     INSN(cvtuqq2ps,      f2,   0f, 7a,   vl,  q, vl),
489     INSN(fpclass,        66, 0f3a, 66,   vl, sd, vl),
490     INSN(fpclass,        66, 0f3a, 67,   el, sd, el),
491     INSN_PFP(or,               0f, 56),
492 //       pmovd2m,        f3, 0f38, 39,        d
493 //       pmovm2,         f3, 0f38, 38,       dq
494 //       pmovq2m,        f3, 0f38, 39,        q
495     INSN(pmullq,         66, 0f38, 40,   vl,  q, vl),
496     INSN(range,          66, 0f3a, 50,   vl, sd, vl),
497     INSN(range,          66, 0f3a, 51,   el, sd, el),
498     INSN(reduce,         66, 0f3a, 56,   vl, sd, vl),
499     INSN(reduce,         66, 0f3a, 57,   el, sd, el),
500     INSN_PFP(xor,              0f, 57),
501 };
502 
503 static const struct test avx512dq_128[] = {
504     INSN(pextr, 66, 0f3a, 16, el, dq64, el),
505     INSN(pinsr, 66, 0f3a, 22, el, dq64, el),
506 };
507 
508 static const struct test avx512dq_no128[] = {
509     INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
510     INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
511     INSN(broadcasti64x2, 66, 0f38, 5a, el_2, q, vl),
512     INSN(extractf64x2,   66, 0f3a, 19, el_2, q, vl),
513     INSN(extracti64x2,   66, 0f3a, 39, el_2, q, vl),
514     INSN(insertf64x2,    66, 0f3a, 18, el_2, q, vl),
515     INSN(inserti64x2,    66, 0f3a, 38, el_2, q, vl),
516 };
517 
518 static const struct test avx512dq_512[] = {
519     INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
520     INSN(broadcasti32x8, 66, 0f38, 5b, el_8, d, vl),
521     INSN(extractf32x8,   66, 0f3a, 1b, el_8, d, vl),
522     INSN(extracti32x8,   66, 0f3a, 3b, el_8, d, vl),
523     INSN(insertf32x8,    66, 0f3a, 1a, el_8, d, vl),
524     INSN(inserti32x8,    66, 0f3a, 3a, el_8, d, vl),
525 };
526 
527 static const struct test avx512er_512[] = {
528     INSN(exp2,    66, 0f38, c8, vl, sd, vl),
529     INSN(rcp28,   66, 0f38, ca, vl, sd, vl),
530     INSN(rcp28,   66, 0f38, cb, el, sd, el),
531     INSN(rsqrt28, 66, 0f38, cc, vl, sd, vl),
532     INSN(rsqrt28, 66, 0f38, cd, el, sd, el),
533 };
534 
535 static const struct test avx512pf_512[] = {
536     INSNX(gatherpf0d,  66, 0f38, c6, 1, vl, sd, el),
537     INSNX(gatherpf0q,  66, 0f38, c7, 1, vl, sd, el),
538     INSNX(gatherpf1d,  66, 0f38, c6, 2, vl, sd, el),
539     INSNX(gatherpf1q,  66, 0f38, c7, 2, vl, sd, el),
540     INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el),
541     INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el),
542     INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el),
543     INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
544 };
545 
546 static const struct test avx512_4fmaps_512[] = {
547     INSN(4fmaddps,  f2, 0f38, 9a, el_4, d, vl),
548     INSN(4fmaddss,  f2, 0f38, 9b, el_4, d, vl),
549     INSN(4fnmaddps, f2, 0f38, aa, el_4, d, vl),
550     INSN(4fnmaddss, f2, 0f38, ab, el_4, d, vl),
551 };
552 
553 static const struct test avx512_4vnniw_512[] = {
554     INSN(p4dpwssd,  f2, 0f38, 52, el_4, d, vl),
555     INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
556 };
557 
558 static const struct test avx512_bf16_all[] = {
559     INSN(cvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
560     INSN(cvtneps2bf16,  f3, 0f38, 72, vl, d, vl),
561     INSN(dpbf16ps,      f3, 0f38, 52, vl, d, vl),
562 };
563 
564 static const struct test avx512_bitalg_all[] = {
565     INSN(popcnt,      66, 0f38, 54, vl, bw, vl),
566     INSN(pshufbitqmb, 66, 0f38, 8f, vl,  b, vl),
567 };
568 
569 static const struct test avx512_ifma_all[] = {
570     INSN(pmadd52huq, 66, 0f38, b5, vl, q, vl),
571     INSN(pmadd52luq, 66, 0f38, b4, vl, q, vl),
572 };
573 
574 static const struct test avx512_vbmi_all[] = {
575     INSN(permb,         66, 0f38, 8d, vl, b, vl),
576     INSN(permi2b,       66, 0f38, 75, vl, b, vl),
577     INSN(permt2b,       66, 0f38, 7d, vl, b, vl),
578     INSN(pmultishiftqb, 66, 0f38, 83, vl, q, vl),
579 };
580 
581 static const struct test avx512_vbmi2_all[] = {
582     INSN(pcompress, 66, 0f38, 63, vl, bw, el),
583     INSN(pexpand,   66, 0f38, 62, vl, bw, el),
584     INSN(pshld,     66, 0f3a, 71, vl, dq, vl),
585     INSN(pshldv,    66, 0f38, 71, vl, dq, vl),
586     INSN(pshldvw,   66, 0f38, 70, vl,  w, vl),
587     INSN(pshldw,    66, 0f3a, 70, vl,  w, vl),
588     INSN(pshrd,     66, 0f3a, 73, vl, dq, vl),
589     INSN(pshrdv,    66, 0f38, 73, vl, dq, vl),
590     INSN(pshrdvw,   66, 0f38, 72, vl,  w, vl),
591     INSN(pshrdw,    66, 0f3a, 72, vl,  w, vl),
592 };
593 
594 static const struct test avx512_vnni_all[] = {
595     INSN(pdpbusd,  66, 0f38, 50, vl, d, vl),
596     INSN(pdpbusds, 66, 0f38, 51, vl, d, vl),
597     INSN(pdpwssd,  66, 0f38, 52, vl, d, vl),
598     INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
599 };
600 
601 static const struct test avx512_vp2intersect_all[] = {
602     INSN(p2intersect, f2, 0f38, 68, vl, dq, vl)
603 };
604 
605 static const struct test avx512_vpopcntdq_all[] = {
606     INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
607 };
608 
609 static const struct test avx512_fp16_all[] = {
610     INSN(addph,           , map5, 58,    vl, fp16, vl),
611     INSN(addsh,         f3, map5, 58,    el, fp16, el),
612     INSN(cmpph,           , 0f3a, c2,    vl, fp16, vl),
613     INSN(cmpsh,         f3, 0f3a, c2,    el, fp16, el),
614     INSN(comish,          , map5, 2f,    el, fp16, el),
615     INSN(cvtdq2ph,        , map5, 5b,    vl,    d, vl),
616     INSN(cvtpd2ph,      66, map5, 5a,    vl,    q, vl),
617     INSN(cvtph2dq,      66, map5, 5b,  vl_2, fp16, vl),
618     INSN(cvtph2pd,        , map5, 5a,  vl_4, fp16, vl),
619     INSN(cvtph2psx,     66, map6, 13,  vl_2, fp16, vl),
620     INSN(cvtph2qq,      66, map5, 7b,  vl_4, fp16, vl),
621     INSN(cvtph2udq,       , map5, 79,  vl_2, fp16, vl),
622     INSN(cvtph2uqq,     66, map5, 79,  vl_4, fp16, vl),
623     INSN(cvtph2uw,        , map5, 7d,    vl, fp16, vl),
624     INSN(cvtph2w,       66, map5, 7d,    vl, fp16, vl),
625     INSN(cvtps2phx,     66, map5, 1d,    vl,    d, vl),
626     INSN(cvtqq2ph,        , map5, 5b,    vl,    q, vl),
627     INSN(cvtsd2sh,      f2, map5, 5a,    el,    q, el),
628     INSN(cvtsh2sd,      f3, map5, 5a,    el, fp16, el),
629     INSN(cvtsh2si,      f3, map5, 2d,    el, fp16, el),
630     INSN(cvtsh2ss,        , map6, 13,    el, fp16, el),
631     INSN(cvtsh2usi,     f3, map5, 79,    el, fp16, el),
632     INSN(cvtsi2sh,      f3, map5, 2a,    el, dq64, el),
633     INSN(cvtss2sh,        , map5, 1d,    el,    d, el),
634     INSN(cvttph2dq,     f3, map5, 5b,  vl_2, fp16, vl),
635     INSN(cvttph2qq,     66, map5, 7a,  vl_4, fp16, vl),
636     INSN(cvttph2udq,      , map5, 78,  vl_2, fp16, vl),
637     INSN(cvttph2uqq,    66, map5, 78,  vl_4, fp16, vl),
638     INSN(cvttph2uw,       , map5, 7c,    vl, fp16, vl),
639     INSN(cvttph2w,      66, map5, 7c,    vl, fp16, vl),
640     INSN(cvttsh2si,     f3, map5, 2c,    el, fp16, el),
641     INSN(cvttsh2usi,    f3, map5, 78,    el, fp16, el),
642     INSN(cvtudq2ph,     f2, map5, 7a,    vl,    d, vl),
643     INSN(cvtuqq2ph,     f2, map5, 7a,    vl,    q, vl),
644     INSN(cvtusi2sh,     f3, map5, 7b,    el, dq64, el),
645     INSN(cvtuw2ph,      f2, map5, 7d,    vl, fp16, vl),
646     INSN(cvtw2ph,       f3, map5, 7d,    vl, fp16, vl),
647     INSN(divph,           , map5, 5e,    vl, fp16, vl),
648     INSN(divsh,         f3, map5, 5e,    el, fp16, el),
649     INSNX(fcmaddcph,    f2, map6, 56, 1, vl,    d, vl),
650     INSNX(fcmaddcsh,    f2, map6, 57, 1, el,    d, el),
651     INSNX(fcmulcph,     f2, map6, d6, 1, vl,    d, vl),
652     INSNX(fcmulcsh,     f2, map6, d7, 1, el,    d, el),
653     INSN(fmadd132ph,    66, map6, 98,    vl, fp16, vl),
654     INSN(fmadd132sh,    66, map6, 99,    el, fp16, el),
655     INSN(fmadd213ph,    66, map6, a8,    vl, fp16, vl),
656     INSN(fmadd213sh,    66, map6, a9,    el, fp16, el),
657     INSN(fmadd231ph,    66, map6, b8,    vl, fp16, vl),
658     INSN(fmadd231sh,    66, map6, b9,    el, fp16, el),
659     INSNX(fmaddcph,     f3, map6, 56, 1, vl,    d, vl),
660     INSNX(fmaddcsh,     f3, map6, 57, 1, el,    d, el),
661     INSN(fmaddsub132ph, 66, map6, 96,    vl, fp16, vl),
662     INSN(fmaddsub213ph, 66, map6, a6,    vl, fp16, vl),
663     INSN(fmaddsub231ph, 66, map6, b6,    vl, fp16, vl),
664     INSN(fmsub132ph,    66, map6, 9a,    vl, fp16, vl),
665     INSN(fmsub132sh,    66, map6, 9b,    el, fp16, el),
666     INSN(fmsub213ph,    66, map6, aa,    vl, fp16, vl),
667     INSN(fmsub213sh,    66, map6, ab,    el, fp16, el),
668     INSN(fmsub231ph,    66, map6, ba,    vl, fp16, vl),
669     INSN(fmsub231sh,    66, map6, bb,    el, fp16, el),
670     INSN(fmsubadd132ph, 66, map6, 97,    vl, fp16, vl),
671     INSN(fmsubadd213ph, 66, map6, a7,    vl, fp16, vl),
672     INSN(fmsubadd231ph, 66, map6, b7,    vl, fp16, vl),
673     INSNX(fmulcph,      f3, map6, d6, 1, vl,    d, vl),
674     INSNX(fmulcsh,      f3, map6, d7, 1, el,    d, el),
675     INSN(fnmadd132ph,   66, map6, 9c,    vl, fp16, vl),
676     INSN(fnmadd132sh,   66, map6, 9d,    el, fp16, el),
677     INSN(fnmadd213ph,   66, map6, ac,    vl, fp16, vl),
678     INSN(fnmadd213sh,   66, map6, ad,    el, fp16, el),
679     INSN(fnmadd231ph,   66, map6, bc,    vl, fp16, vl),
680     INSN(fnmadd231sh,   66, map6, bd,    el, fp16, el),
681     INSN(fnmsub132ph,   66, map6, 9e,    vl, fp16, vl),
682     INSN(fnmsub132sh,   66, map6, 9f,    el, fp16, el),
683     INSN(fnmsub213ph,   66, map6, ae,    vl, fp16, vl),
684     INSN(fnmsub213sh,   66, map6, af,    el, fp16, el),
685     INSN(fnmsub231ph,   66, map6, be,    vl, fp16, vl),
686     INSN(fnmsub231sh,   66, map6, bf,    el, fp16, el),
687     INSN(fpclassph,       , 0f3a, 66,    vl, fp16, vl),
688     INSN(fpclasssh,       , 0f3a, 67,    el, fp16, el),
689     INSN(getexpph,      66, map6, 42,    vl, fp16, vl),
690     INSN(getexpsh,      66, map6, 43,    el, fp16, el),
691     INSN(getmantph,       , 0f3a, 26,    vl, fp16, vl),
692     INSN(getmantsh,       , 0f3a, 27,    el, fp16, el),
693     INSN(maxph,           , map5, 5f,    vl, fp16, vl),
694     INSN(maxsh,         f3, map5, 5f,    el, fp16, el),
695     INSN(minph,           , map5, 5d,    vl, fp16, vl),
696     INSN(minsh,         f3, map5, 5d,    el, fp16, el),
697     INSN(movsh,         f3, map5, 10,    el, fp16, el),
698     INSN(movsh,         f3, map5, 11,    el, fp16, el),
699     INSN(mulph,           , map5, 59,    vl, fp16, vl),
700     INSN(mulsh,         f3, map5, 59,    el, fp16, el),
701     INSN(rcpph,         66, map6, 4c,    vl, fp16, vl),
702     INSN(rcpsh,         66, map6, 4d,    el, fp16, el),
703     INSN(reduceph,        , 0f3a, 56,    vl, fp16, vl),
704     INSN(reducesh,        , 0f3a, 57,    el, fp16, el),
705     INSN(rndscaleph,      , 0f3a, 08,    vl, fp16, vl),
706     INSN(rndscalesh,      , 0f3a, 0a,    el, fp16, el),
707     INSN(rsqrtph,       66, map6, 4e,    vl, fp16, vl),
708     INSN(rsqrtsh,       66, map6, 4f,    el, fp16, el),
709     INSN(scalefph,      66, map6, 2c,    vl, fp16, vl),
710     INSN(scalefsh,      66, map6, 2d,    el, fp16, el),
711     INSN(sqrtph,          , map5, 51,    vl, fp16, vl),
712     INSN(sqrtsh,        f3, map5, 51,    el, fp16, el),
713     INSN(subph,           , map5, 5c,    vl, fp16, vl),
714     INSN(subsh,         f3, map5, 5c,    el, fp16, el),
715     INSN(ucomish,         , map5, 2e,    el, fp16, el),
716 };
717 
718 static const struct test avx512_fp16_128[] = {
719     INSN(movw, 66, map5, 6e, el, fp16, el),
720     INSN(movw, 66, map5, 7e, el, fp16, el),
721 };
722 
723 static const struct test gfni_all[] = {
724     INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
725     INSN(gf2p8affineqb,    66, 0f3a, ce, vl, q, vl),
726     INSN(gf2p8mulb,        66, 0f38, cf, vl, b, vl),
727 };
728 
729 /*
730  * The uses of b in this table are simply (one of) the shortest form(s) of
731  * saying "no broadcast" without introducing a 128-bit granularity enumerator.
732  * Due to all of the insns being WIG, w, d_nb, and q_nb would all also fit.
733  */
734 static const struct test vaes_all[] = {
735     INSN(aesdec,     66, 0f38, de, vl, b, vl),
736     INSN(aesdeclast, 66, 0f38, df, vl, b, vl),
737     INSN(aesenc,     66, 0f38, dc, vl, b, vl),
738     INSN(aesenclast, 66, 0f38, dd, vl, b, vl),
739 };
740 
741 static const struct test vpclmulqdq_all[] = {
742     INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
743 };
744 
745 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
746 static const unsigned char vl_128[] = { VL_128 };
747 static const unsigned char vl_no128[] = { VL_512, VL_256 };
748 static const unsigned char vl_512[] = { VL_512 };
749 
750 /*
751  * This table, indicating the presence of an immediate (byte) for an opcode
752  * space 0f major opcode, is indexed by high major opcode byte nibble, with
753  * each table element then bit-indexed by low major opcode byte nibble.
754  */
755 static const uint16_t imm0f[16] = {
756     [0x7] = (1 << 0x0) /* vpshuf* */ |
757             (1 << 0x1) /* vps{ll,ra,rl}w */ |
758             (1 << 0x2) /* vps{l,r}ld, vp{rol,ror,sra}{d,q} */ |
759             (1 << 0x3) /* vps{l,r}l{,d}q */,
760     [0xc] = (1 << 0x2) /* vcmp{p,s}{d,s} */ |
761             (1 << 0x4) /* vpinsrw */ |
762             (1 << 0x5) /* vpextrw */ |
763             (1 << 0x6) /* vshufp{d,s} */,
764 };
765 
766 static struct x86_emulate_ops emulops;
767 
768 /*
769  * Access tracking (by granular) is used on the first 64 bytes of address
770  * space. Instructions get encode with a raw Disp8 value of 1, which then
771  * gets scaled accordingly. Hence accesses below the address <scaling factor>
772  * as well as at or above 2 * <scaling factor> are indications of bugs. To
773  * aid diagnosis / debugging, track all accesses below 3 * <scaling factor>.
774  * With AVX512 the maximum scaling factor is 64.
775  */
776 static unsigned int accessed[3 * 64];
777 
record_access(enum x86_segment seg,unsigned long offset,unsigned int bytes)778 static bool record_access(enum x86_segment seg, unsigned long offset,
779                           unsigned int bytes)
780 {
781     while ( bytes-- )
782     {
783         if ( offset >= ARRAY_SIZE(accessed) )
784             return false;
785         ++accessed[offset++];
786     }
787 
788     return true;
789 }
790 
read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)791 static int read(enum x86_segment seg, unsigned long offset, void *p_data,
792                 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
793 {
794     if ( !record_access(seg, offset, bytes + !bytes) )
795         return X86EMUL_UNHANDLEABLE;
796     memset(p_data, 0, bytes);
797     return X86EMUL_OKAY;
798 }
799 
write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)800 static int write(enum x86_segment seg, unsigned long offset, void *p_data,
801                  unsigned int bytes, struct x86_emulate_ctxt *ctxt)
802 {
803     if ( !record_access(seg, offset, bytes + !bytes) )
804         return X86EMUL_UNHANDLEABLE;
805     return X86EMUL_OKAY;
806 }
807 
test_one(const struct test * test,enum vl vl,unsigned char * instr,struct x86_emulate_ctxt * ctxt)808 static void test_one(const struct test *test, enum vl vl,
809                      unsigned char *instr, struct x86_emulate_ctxt *ctxt)
810 {
811     unsigned int vsz, esz, i, n;
812     int rc;
813     bool sg = strstr(test->mnemonic, "gather") ||
814               strstr(test->mnemonic, "scatter");
815     bool imm = test->spc == SPC_0f3a ||
816                (test->spc == SPC_0f &&
817                 (imm0f[test->opc >> 4] & (1 << (test->opc & 0xf))));
818     union evex {
819         uint8_t raw[3];
820         struct {
821             uint8_t opcx:3;
822             uint8_t mbz:1;
823             uint8_t R:1;
824             uint8_t b:1;
825             uint8_t x:1;
826             uint8_t r:1;
827             uint8_t pfx:2;
828             uint8_t mbs:1;
829             uint8_t reg:4;
830             uint8_t w:1;
831             uint8_t opmsk:3;
832             uint8_t RX:1;
833             uint8_t bcst:1;
834             uint8_t lr:2;
835             uint8_t z:1;
836         };
837     } evex = {
838         .opcx = test->spc, .pfx = test->pfx, .lr = vl,
839         .R = 1, .b = 1, .x = 1, .r = 1, .mbs = 1,
840         .reg = 0xf, .RX = 1, .opmsk = sg,
841     };
842 
843     switch ( test->esz )
844     {
845     case ESZ_b:
846         esz = 1;
847         break;
848 
849     case ESZ_w:
850         evex.w = 1;
851         /* fall through */
852     case ESZ_fp16:
853         esz = 2;
854         break;
855 
856 #ifdef __i386__
857     case ESZ_d_WIG:
858         evex.w = 1;
859         /* fall through */
860 #endif
861     case ESZ_d: case ESZ_d_nb:
862         esz = 4;
863         break;
864 
865     case ESZ_q: case ESZ_q_nb:
866         esz = 8;
867         evex.w = 1;
868         break;
869 
870     default:
871         ASSERT_UNREACHABLE();
872     }
873 
874     switch ( test->vsz )
875     {
876     case VSZ_vl:
877         vsz = 16 << vl;
878         break;
879 
880     case VSZ_vl_2:
881         vsz = 8 << vl;
882         break;
883 
884     case VSZ_vl_4:
885         vsz = 4 << vl;
886         break;
887 
888     case VSZ_vl_8:
889         vsz = 2 << vl;
890         break;
891 
892     case VSZ_el:
893         vsz = esz;
894         break;
895 
896     case VSZ_el_2:
897         vsz = esz * 2;
898         break;
899 
900     case VSZ_el_4:
901         vsz = esz * 4;
902         break;
903 
904     case VSZ_el_8:
905         vsz = esz * 8;
906         break;
907 
908     default:
909         ASSERT_UNREACHABLE();
910     }
911 
912     /*
913      * Note: SIB addressing is used here, such that S/G insns can be handled
914      * without extra conditionals.
915      */
916     instr[0] = 0x62;
917     instr[1] = evex.raw[0];
918     instr[2] = evex.raw[1];
919     instr[3] = evex.raw[2];
920     instr[4] = test->opc;
921     instr[5] = 0x44 | (test->ext << 3); /* ModR/M */
922     instr[6] = 0x22; /* SIB: base rDX, index none / xMM4 */
923     instr[7] = 1; /* Disp8 */
924     instr[8] = 0; /* immediate, if any */
925 
926     asm volatile ( "kxnorw %k1, %k1, %k1" );
927     asm volatile ( "vxorps %xmm4, %xmm4, %xmm4" );
928 
929     ctxt->regs->eip = (unsigned long)&instr[0];
930     ctxt->regs->edx = 0;
931     memset(accessed, 0, sizeof(accessed));
932 
933     rc = x86_emulate(ctxt, &emulops);
934     if ( rc != X86EMUL_OKAY ||
935          (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
936         goto fail;
937 
938     for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i )
939          if ( accessed[i] )
940              goto fail;
941 
942     n = test->scale == SC_vl ? vsz : esz;
943     if ( !sg )
944         n += vsz;
945     else if ( !strstr(test->mnemonic, "pf") )
946         n += esz;
947     else
948         ++n;
949 
950     for ( ; i < n; ++i )
951          if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w)
952                                  : 1) )
953              goto fail;
954 
955     for ( ; i < ARRAY_SIZE(accessed); ++i )
956          if ( accessed[i] )
957              goto fail;
958 
959     /* Also check the broadcast case, if available. */
960     if ( test->vsz >= VSZ_el || test->scale != SC_vl )
961         return;
962 
963     switch ( test->esz )
964     {
965     case ESZ_d_nb: case ESZ_q_nb:
966     case ESZ_b: case ESZ_w: case ESZ_bw:
967         return;
968 
969     case ESZ_d: case ESZ_q: case ESZ_fp16:
970         break;
971 
972     default:
973         ASSERT_UNREACHABLE();
974     }
975 
976     evex.bcst = 1;
977     instr[3] = evex.raw[2];
978 
979     ctxt->regs->eip = (unsigned long)&instr[0];
980     memset(accessed, 0, sizeof(accessed));
981 
982     rc = x86_emulate(ctxt, &emulops);
983     if ( rc != X86EMUL_OKAY ||
984          (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
985         goto fail;
986 
987     for ( i = 0; i < esz; ++i )
988          if ( accessed[i] )
989              goto fail;
990     for ( ; i < esz * 2; ++i )
991          if ( accessed[i] != 1 )
992              goto fail;
993     for ( ; i < ARRAY_SIZE(accessed); ++i )
994          if ( accessed[i] )
995              goto fail;
996 
997     return;
998 
999  fail:
1000     printf("failed (v%s%s %u-bit)\n", test->mnemonic,
1001            evex.bcst ? "/bcst" : "", 128 << vl);
1002     exit(1);
1003 }
1004 
test_pair(const struct test * tmpl,enum vl vl,enum esz esz1,const char * suffix1,enum esz esz2,const char * suffix2,unsigned char * instr,struct x86_emulate_ctxt * ctxt)1005 static void test_pair(const struct test *tmpl, enum vl vl,
1006                       enum esz esz1, const char *suffix1,
1007                       enum esz esz2, const char *suffix2,
1008                       unsigned char *instr, struct x86_emulate_ctxt *ctxt)
1009 {
1010     struct test test = *tmpl;
1011     char mnemonic[24];
1012 
1013     test.esz = esz1;
1014     snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix1);
1015     test.mnemonic = mnemonic;
1016     test_one(&test, vl, instr, ctxt);
1017 
1018     test.esz = esz2;
1019     snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix2);
1020     test.mnemonic = mnemonic;
1021     test_one(&test, vl, instr, ctxt);
1022 }
1023 
test_group(const struct test tests[],unsigned int nr_test,const unsigned char vl[],unsigned int nr_vl,void * instr,struct x86_emulate_ctxt * ctxt)1024 static void test_group(const struct test tests[], unsigned int nr_test,
1025                        const unsigned char vl[], unsigned int nr_vl,
1026                        void *instr, struct x86_emulate_ctxt *ctxt)
1027 {
1028     unsigned int i, j;
1029 
1030     for ( i = 0; i < nr_test; ++i )
1031     {
1032         for ( j = 0; j < nr_vl; ++j )
1033         {
1034             if ( vl[0] == VL_512 && vl[j] != VL_512 && !cpu_has_avx512vl )
1035                 continue;
1036 
1037             switch ( tests[i].esz )
1038             {
1039             case ESZ_q_nb:
1040                 /* The 128-bit form of VMOVDDUP needs special casing. */
1041                 if ( vl[j] == VL_128 && tests[i].spc == SPC_0f &&
1042                      tests[i].opc == 0x12 && tests[i].pfx == PFX_f2 )
1043                 {
1044                     struct test test = tests[i];
1045 
1046                     test.vsz = VSZ_el;
1047                     test.scale = SC_el;
1048                     test_one(&test, vl[j], instr, ctxt);
1049                     continue;
1050                 }
1051                 /* fall through */
1052             default:
1053                 test_one(&tests[i], vl[j], instr, ctxt);
1054                 break;
1055 
1056             case ESZ_bw:
1057                 test_pair(&tests[i], vl[j], ESZ_b, "b", ESZ_w, "w",
1058                           instr, ctxt);
1059                 break;
1060 
1061             case ESZ_dq:
1062                 test_pair(&tests[i], vl[j], ESZ_d,
1063                           strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
1064                           ESZ_q, "q", instr, ctxt);
1065                 break;
1066 
1067 #ifdef __i386__
1068             case ESZ_d_WIG:
1069                 test_pair(&tests[i], vl[j], ESZ_d, "/W0",
1070                           ESZ_d_WIG, "/W1", instr, ctxt);
1071                 break;
1072 #endif
1073 
1074             case ESZ_sd:
1075                 test_pair(&tests[i], vl[j],
1076                           ESZ_d, tests[i].vsz < VSZ_el ? "ps" : "ss",
1077                           ESZ_q, tests[i].vsz < VSZ_el ? "pd" : "sd",
1078                           instr, ctxt);
1079                 break;
1080             }
1081         }
1082     }
1083 }
1084 
evex_disp8_test(void * instr,struct x86_emulate_ctxt * ctxt,const struct x86_emulate_ops * ops)1085 void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
1086                      const struct x86_emulate_ops *ops)
1087 {
1088     emulops = *ops;
1089     emulops.read = read;
1090     emulops.write = write;
1091 
1092 #define RUN(feat, vl) do { \
1093     if ( cpu_has_##feat ) \
1094     { \
1095         printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \
1096         test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \
1097                    vl_ ## vl, ARRAY_SIZE(vl_ ## vl), instr, ctxt); \
1098         printf("okay\n"); \
1099     } \
1100 } while ( false )
1101 
1102     RUN(avx512f, all);
1103     RUN(avx512f, 128);
1104     RUN(avx512f, no128);
1105     RUN(avx512f, 512);
1106     RUN(avx512bw, all);
1107     RUN(avx512bw, 128);
1108     RUN(avx512cd, all);
1109     RUN(avx512dq, all);
1110     RUN(avx512dq, 128);
1111     RUN(avx512dq, no128);
1112     RUN(avx512dq, 512);
1113     RUN(avx512er, 512);
1114 #define cpu_has_avx512pf cpu_has_avx512f
1115     RUN(avx512pf, 512);
1116     RUN(avx512_4fmaps, 512);
1117     RUN(avx512_4vnniw, 512);
1118     RUN(avx512_bf16, all);
1119     RUN(avx512_bitalg, all);
1120     RUN(avx512_ifma, all);
1121     RUN(avx512_vbmi, all);
1122     RUN(avx512_vbmi2, all);
1123     RUN(avx512_vnni, all);
1124     RUN(avx512_vp2intersect, all);
1125     RUN(avx512_vpopcntdq, all);
1126     RUN(avx512_fp16, all);
1127     RUN(avx512_fp16, 128);
1128 
1129     if ( cpu_has_avx512f )
1130     {
1131         RUN(gfni, all);
1132         RUN(vaes, all);
1133         RUN(vpclmulqdq, all);
1134     }
1135 }
1136