1 #include "x86-emulate.h"
2
3 #include <stdarg.h>
4 #include <stdio.h>
5
6 struct test {
7 const char *mnemonic;
8 unsigned int opc:8;
9 unsigned int spc:3;
10 unsigned int pfx:2;
11 unsigned int vsz:3;
12 unsigned int esz:4;
13 unsigned int scale:1;
14 unsigned int ext:3;
15 };
16
17 enum spc {
18 SPC_invalid,
19 SPC_0f,
20 SPC_0f38,
21 SPC_0f3a,
22 SPC_unused4,
23 SPC_map5,
24 SPC_map6,
25 SPC_unused7,
26 };
27
28 enum pfx {
29 PFX_,
30 PFX_66,
31 PFX_f3,
32 PFX_f2
33 };
34
35 enum vl {
36 VL_128,
37 VL_256,
38 VL_512,
39 };
40
41 enum scale { /* scale by memory operand ... */
42 SC_vl, /* ... vector length */
43 SC_el, /* ... element length */
44 };
45
46 /*
47 * Vector size is determined either from EVEX.L'L (VL) or vector
48 * element size (EL), often controlled by EVEX.W (see enum esz).
49 */
50 enum vsz {
51 VSZ_vl,
52 VSZ_vl_2, /* VL / 2 */
53 VSZ_vl_4, /* VL / 4 */
54 VSZ_vl_8, /* VL / 8 */
55 /* "no broadcast" implied from here on. */
56 VSZ_el,
57 VSZ_el_2, /* EL * 2 */
58 VSZ_el_4, /* EL * 4 */
59 VSZ_el_8, /* EL * 8 */
60 };
61
62 /*
63 * Vector element size is either an opcode attribute or often determined
64 * by EVEX.W (in which case enumerators below name two sizes). Instructions
65 * accessing GPRs often use EVEX.W to select between 32- and 64-bit GPR
66 * width, but this distinction goes away outside of 64-bit mode (and EVEX.W
67 * is ignored there).
68 */
69 enum esz {
70 ESZ_d,
71 ESZ_q,
72 ESZ_dq,
73 ESZ_sd,
74 ESZ_d_nb,
75 ESZ_q_nb,
76 /* "no broadcast" implied from here on. */
77 #ifdef __i386__
78 ESZ_d_WIG,
79 #endif
80 ESZ_b,
81 ESZ_w,
82 ESZ_bw,
83 ESZ_fp16,
84 };
85
86 #ifndef __i386__
87 # define ESZ_dq64 ESZ_dq
88 #else
89 # define ESZ_dq64 ESZ_d_WIG
90 #endif
91
92 #define INSNX(m, p, sp, o, e, vs, es, sc) { \
93 .mnemonic = #m, .opc = 0x##o, .spc = SPC_##sp, .pfx = PFX_##p, \
94 .vsz = VSZ_##vs, .esz = ESZ_##es, .scale = SC_##sc, .ext = 0##e \
95 }
96 #define INSN(m, p, sp, o, vs, es, sc) INSNX(m, p, sp, o, 0, vs, es, sc)
97 #define INSN_PFP(m, sp, o) \
98 INSN(m##pd, 66, sp, o, vl, q, vl), \
99 INSN(m##ps, , sp, o, vl, d, vl)
100 #define INSN_PFP_NB(m, sp, o) \
101 INSN(m##pd, 66, sp, o, vl, q_nb, vl), \
102 INSN(m##ps, , sp, o, vl, d_nb, vl)
103 #define INSN_SFP(m, sp, o) \
104 INSN(m##sd, f2, sp, o, el, q, el), \
105 INSN(m##ss, f3, sp, o, el, d, el)
106
107 #define INSN_FP(m, sp, o) \
108 INSN_PFP(m, sp, o), \
109 INSN_SFP(m, sp, o)
110
111 static const struct test avx512f_all[] = {
112 INSN_FP(add, 0f, 58),
113 INSN(align, 66, 0f3a, 03, vl, dq, vl),
114 INSN(blendm, 66, 0f38, 65, vl, sd, vl),
115 INSN(broadcastss, 66, 0f38, 18, el, d, el),
116 INSN_FP(cmp, 0f, c2),
117 INSN(comisd, 66, 0f, 2f, el, q, el),
118 INSN(comiss, , 0f, 2f, el, d, el),
119 INSN(compress, 66, 0f38, 8a, vl, sd, el),
120 INSN(cvtdq2pd, f3, 0f, e6, vl_2, d, vl),
121 INSN(cvtdq2ps, , 0f, 5b, vl, d, vl),
122 INSN(cvtpd2dq, f2, 0f, e6, vl, q, vl),
123 INSN(cvtpd2udq, , 0f, 79, vl, q, vl),
124 INSN(cvtpd2ps, 66, 0f, 5a, vl, q, vl),
125 INSN(cvtph2ps, 66, 0f38, 13, vl_2, d_nb, vl),
126 INSN(cvtps2dq, 66, 0f, 5b, vl, d, vl),
127 INSN(cvtps2pd, , 0f, 5a, vl_2, d, vl),
128 INSN(cvtps2ph, 66, 0f3a, 1d, vl_2, d_nb, vl),
129 INSN(cvtps2udq, , 0f, 79, vl, d, vl),
130 INSN(cvtsd2si, f2, 0f, 2d, el, q, el),
131 INSN(cvtsd2usi, f2, 0f, 79, el, q, el),
132 INSN(cvtsd2ss, f2, 0f, 5a, el, q, el),
133 INSN(cvtsi2sd, f2, 0f, 2a, el, dq64, el),
134 INSN(cvtsi2ss, f3, 0f, 2a, el, dq64, el),
135 INSN(cvtss2sd, f3, 0f, 5a, el, d, el),
136 INSN(cvtss2si, f3, 0f, 2d, el, d, el),
137 INSN(cvtss2usi, f3, 0f, 79, el, d, el),
138 INSN(cvttpd2dq, 66, 0f, e6, vl, q, vl),
139 INSN(cvttpd2udq, , 0f, 78, vl, q, vl),
140 INSN(cvttps2dq, f3, 0f, 5b, vl, d, vl),
141 INSN(cvttps2udq, , 0f, 78, vl, d, vl),
142 INSN(cvttsd2si, f2, 0f, 2c, el, q, el),
143 INSN(cvttsd2usi, f2, 0f, 78, el, q, el),
144 INSN(cvttss2si, f3, 0f, 2c, el, d, el),
145 INSN(cvttss2usi, f3, 0f, 78, el, d, el),
146 INSN(cvtudq2pd, f3, 0f, 7a, vl_2, d, vl),
147 INSN(cvtudq2ps, f2, 0f, 7a, vl, d, vl),
148 INSN(cvtusi2sd, f2, 0f, 7b, el, dq64, el),
149 INSN(cvtusi2ss, f3, 0f, 7b, el, dq64, el),
150 INSN_FP(div, 0f, 5e),
151 INSN(expand, 66, 0f38, 88, vl, sd, el),
152 INSN(fixupimm, 66, 0f3a, 54, vl, sd, vl),
153 INSN(fixupimm, 66, 0f3a, 55, el, sd, el),
154 INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
155 INSN(fmadd132, 66, 0f38, 99, el, sd, el),
156 INSN(fmadd213, 66, 0f38, a8, vl, sd, vl),
157 INSN(fmadd213, 66, 0f38, a9, el, sd, el),
158 INSN(fmadd231, 66, 0f38, b8, vl, sd, vl),
159 INSN(fmadd231, 66, 0f38, b9, el, sd, el),
160 INSN(fmaddsub132, 66, 0f38, 96, vl, sd, vl),
161 INSN(fmaddsub213, 66, 0f38, a6, vl, sd, vl),
162 INSN(fmaddsub231, 66, 0f38, b6, vl, sd, vl),
163 INSN(fmsub132, 66, 0f38, 9a, vl, sd, vl),
164 INSN(fmsub132, 66, 0f38, 9b, el, sd, el),
165 INSN(fmsub213, 66, 0f38, aa, vl, sd, vl),
166 INSN(fmsub213, 66, 0f38, ab, el, sd, el),
167 INSN(fmsub231, 66, 0f38, ba, vl, sd, vl),
168 INSN(fmsub231, 66, 0f38, bb, el, sd, el),
169 INSN(fmsubadd132, 66, 0f38, 97, vl, sd, vl),
170 INSN(fmsubadd213, 66, 0f38, a7, vl, sd, vl),
171 INSN(fmsubadd231, 66, 0f38, b7, vl, sd, vl),
172 INSN(fnmadd132, 66, 0f38, 9c, vl, sd, vl),
173 INSN(fnmadd132, 66, 0f38, 9d, el, sd, el),
174 INSN(fnmadd213, 66, 0f38, ac, vl, sd, vl),
175 INSN(fnmadd213, 66, 0f38, ad, el, sd, el),
176 INSN(fnmadd231, 66, 0f38, bc, vl, sd, vl),
177 INSN(fnmadd231, 66, 0f38, bd, el, sd, el),
178 INSN(fnmsub132, 66, 0f38, 9e, vl, sd, vl),
179 INSN(fnmsub132, 66, 0f38, 9f, el, sd, el),
180 INSN(fnmsub213, 66, 0f38, ae, vl, sd, vl),
181 INSN(fnmsub213, 66, 0f38, af, el, sd, el),
182 INSN(fnmsub231, 66, 0f38, be, vl, sd, vl),
183 INSN(fnmsub231, 66, 0f38, bf, el, sd, el),
184 INSN(gatherd, 66, 0f38, 92, vl, sd, el),
185 INSN(gatherq, 66, 0f38, 93, vl, sd, el),
186 INSN(getexp, 66, 0f38, 42, vl, sd, vl),
187 INSN(getexp, 66, 0f38, 43, el, sd, el),
188 INSN(getmant, 66, 0f3a, 26, vl, sd, vl),
189 INSN(getmant, 66, 0f3a, 27, el, sd, el),
190 INSN_FP(max, 0f, 5f),
191 INSN_FP(min, 0f, 5d),
192 INSN_SFP(mov, 0f, 10),
193 INSN_SFP(mov, 0f, 11),
194 INSN_PFP_NB(mova, 0f, 28),
195 INSN_PFP_NB(mova, 0f, 29),
196 INSN(movddup, f2, 0f, 12, vl, q_nb, vl),
197 INSN(movdqa32, 66, 0f, 6f, vl, d_nb, vl),
198 INSN(movdqa32, 66, 0f, 7f, vl, d_nb, vl),
199 INSN(movdqa64, 66, 0f, 6f, vl, q_nb, vl),
200 INSN(movdqa64, 66, 0f, 7f, vl, q_nb, vl),
201 INSN(movdqu32, f3, 0f, 6f, vl, d_nb, vl),
202 INSN(movdqu32, f3, 0f, 7f, vl, d_nb, vl),
203 INSN(movdqu64, f3, 0f, 6f, vl, q_nb, vl),
204 INSN(movdqu64, f3, 0f, 7f, vl, q_nb, vl),
205 INSN(movntdq, 66, 0f, e7, vl, d_nb, vl),
206 INSN(movntdqa, 66, 0f38, 2a, vl, d_nb, vl),
207 INSN_PFP_NB(movnt, 0f, 2b),
208 INSN(movshdup, f3, 0f, 16, vl, d_nb, vl),
209 INSN(movsldup, f3, 0f, 12, vl, d_nb, vl),
210 INSN_PFP_NB(movu, 0f, 10),
211 INSN_PFP_NB(movu, 0f, 11),
212 INSN_FP(mul, 0f, 59),
213 INSN(pabsd, 66, 0f38, 1e, vl, d, vl),
214 INSN(pabsq, 66, 0f38, 1f, vl, q, vl),
215 INSN(paddd, 66, 0f, fe, vl, d, vl),
216 INSN(paddq, 66, 0f, d4, vl, q, vl),
217 INSN(pand, 66, 0f, db, vl, dq, vl),
218 INSN(pandn, 66, 0f, df, vl, dq, vl),
219 INSN(pblendm, 66, 0f38, 64, vl, dq, vl),
220 // pbroadcast, 66, 0f38, 7c, dq64
221 INSN(pbroadcastd, 66, 0f38, 58, el, d, el),
222 INSN(pbroadcastq, 66, 0f38, 59, el, q, el),
223 INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl),
224 INSN(pcmpeqd, 66, 0f, 76, vl, d, vl),
225 INSN(pcmpeqq, 66, 0f38, 29, vl, q, vl),
226 INSN(pcmpgtd, 66, 0f, 66, vl, d, vl),
227 INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl),
228 INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl),
229 INSN(pcompress, 66, 0f38, 8b, vl, dq, el),
230 INSN(permi2, 66, 0f38, 76, vl, dq, vl),
231 INSN(permi2, 66, 0f38, 77, vl, sd, vl),
232 INSN(permilpd, 66, 0f38, 0d, vl, q, vl),
233 INSN(permilpd, 66, 0f3a, 05, vl, q, vl),
234 INSN(permilps, 66, 0f38, 0c, vl, d, vl),
235 INSN(permilps, 66, 0f3a, 04, vl, d, vl),
236 INSN(permt2, 66, 0f38, 7e, vl, dq, vl),
237 INSN(permt2, 66, 0f38, 7f, vl, sd, vl),
238 INSN(pexpand, 66, 0f38, 89, vl, dq, el),
239 INSN(pgatherd, 66, 0f38, 90, vl, dq, el),
240 INSN(pgatherq, 66, 0f38, 91, vl, dq, el),
241 INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl),
242 INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
243 INSN(pmins, 66, 0f38, 39, vl, dq, vl),
244 INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
245 INSN(pmovdb, f3, 0f38, 31, vl_4, b, vl),
246 INSN(pmovdw, f3, 0f38, 33, vl_2, b, vl),
247 INSN(pmovqb, f3, 0f38, 32, vl_8, b, vl),
248 INSN(pmovqd, f3, 0f38, 35, vl_2, d_nb, vl),
249 INSN(pmovqw, f3, 0f38, 34, vl_4, b, vl),
250 INSN(pmovsdb, f3, 0f38, 21, vl_4, b, vl),
251 INSN(pmovsdw, f3, 0f38, 23, vl_2, b, vl),
252 INSN(pmovsqb, f3, 0f38, 22, vl_8, b, vl),
253 INSN(pmovsqd, f3, 0f38, 25, vl_2, d_nb, vl),
254 INSN(pmovsqw, f3, 0f38, 24, vl_4, b, vl),
255 INSN(pmovsxbd, 66, 0f38, 21, vl_4, b, vl),
256 INSN(pmovsxbq, 66, 0f38, 22, vl_8, b, vl),
257 INSN(pmovsxwd, 66, 0f38, 23, vl_2, w, vl),
258 INSN(pmovsxwq, 66, 0f38, 24, vl_4, w, vl),
259 INSN(pmovsxdq, 66, 0f38, 25, vl_2, d_nb, vl),
260 INSN(pmovusdb, f3, 0f38, 11, vl_4, b, vl),
261 INSN(pmovusdw, f3, 0f38, 13, vl_2, b, vl),
262 INSN(pmovusqb, f3, 0f38, 12, vl_8, b, vl),
263 INSN(pmovusqd, f3, 0f38, 15, vl_2, d_nb, vl),
264 INSN(pmovusqw, f3, 0f38, 14, vl_4, b, vl),
265 INSN(pmovzxbd, 66, 0f38, 31, vl_4, b, vl),
266 INSN(pmovzxbq, 66, 0f38, 32, vl_8, b, vl),
267 INSN(pmovzxwd, 66, 0f38, 33, vl_2, w, vl),
268 INSN(pmovzxwq, 66, 0f38, 34, vl_4, w, vl),
269 INSN(pmovzxdq, 66, 0f38, 35, vl_2, d_nb, vl),
270 INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
271 INSN(pmulld, 66, 0f38, 40, vl, d, vl),
272 INSN(pmuludq, 66, 0f, f4, vl, q, vl),
273 INSN(por, 66, 0f, eb, vl, dq, vl),
274 INSNX(prol, 66, 0f, 72, 1, vl, dq, vl),
275 INSN(prolv, 66, 0f38, 15, vl, dq, vl),
276 INSNX(pror, 66, 0f, 72, 0, vl, dq, vl),
277 INSN(prorv, 66, 0f38, 14, vl, dq, vl),
278 INSN(pscatterd, 66, 0f38, a0, vl, dq, el),
279 INSN(pscatterq, 66, 0f38, a1, vl, dq, el),
280 INSN(pshufd, 66, 0f, 70, vl, d, vl),
281 INSN(pslld, 66, 0f, f2, el_4, d, vl),
282 INSNX(pslld, 66, 0f, 72, 6, vl, d, vl),
283 INSN(psllq, 66, 0f, f3, el_2, q, vl),
284 INSNX(psllq, 66, 0f, 73, 6, vl, q, vl),
285 INSN(psllv, 66, 0f38, 47, vl, dq, vl),
286 INSNX(psra, 66, 0f, 72, 4, vl, dq, vl),
287 INSN(psrad, 66, 0f, e2, el_4, d, vl),
288 INSN(psraq, 66, 0f, e2, el_2, q, vl),
289 INSN(psrav, 66, 0f38, 46, vl, dq, vl),
290 INSN(psrld, 66, 0f, d2, el_4, d, vl),
291 INSNX(psrld, 66, 0f, 72, 2, vl, d, vl),
292 INSN(psrlq, 66, 0f, d3, el_2, q, vl),
293 INSNX(psrlq, 66, 0f, 73, 2, vl, q, vl),
294 INSN(psrlv, 66, 0f38, 45, vl, dq, vl),
295 INSN(psubd, 66, 0f, fa, vl, d, vl),
296 INSN(psubq, 66, 0f, fb, vl, q, vl),
297 INSN(pternlog, 66, 0f3a, 25, vl, dq, vl),
298 INSN(ptestm, 66, 0f38, 27, vl, dq, vl),
299 INSN(ptestnm, f3, 0f38, 27, vl, dq, vl),
300 INSN(punpckhdq, 66, 0f, 6a, vl, d, vl),
301 INSN(punpckhqdq, 66, 0f, 6d, vl, q, vl),
302 INSN(punpckldq, 66, 0f, 62, vl, d, vl),
303 INSN(punpcklqdq, 66, 0f, 6c, vl, q, vl),
304 INSN(pxor, 66, 0f, ef, vl, dq, vl),
305 INSN(rcp14, 66, 0f38, 4c, vl, sd, vl),
306 INSN(rcp14, 66, 0f38, 4d, el, sd, el),
307 INSN(rndscalepd, 66, 0f3a, 09, vl, q, vl),
308 INSN(rndscaleps, 66, 0f3a, 08, vl, d, vl),
309 INSN(rndscalesd, 66, 0f3a, 0b, el, q, el),
310 INSN(rndscaless, 66, 0f3a, 0a, el, d, el),
311 INSN(rsqrt14, 66, 0f38, 4e, vl, sd, vl),
312 INSN(rsqrt14, 66, 0f38, 4f, el, sd, el),
313 INSN(scalef, 66, 0f38, 2c, vl, sd, vl),
314 INSN(scalef, 66, 0f38, 2d, el, sd, el),
315 INSN(scatterd, 66, 0f38, a2, vl, sd, el),
316 INSN(scatterq, 66, 0f38, a3, vl, sd, el),
317 INSN_PFP(shuf, 0f, c6),
318 INSN_FP(sqrt, 0f, 51),
319 INSN_FP(sub, 0f, 5c),
320 INSN(ucomisd, 66, 0f, 2e, el, q, el),
321 INSN(ucomiss, , 0f, 2e, el, d, el),
322 INSN_PFP(unpckh, 0f, 15),
323 INSN_PFP(unpckl, 0f, 14),
324 };
325
326 static const struct test avx512f_128[] = {
327 INSN(extractps, 66, 0f3a, 17, el, d, el),
328 INSN(insertps, 66, 0f3a, 21, el, d, el),
329 INSN(mov, 66, 0f, 6e, el, dq64, el),
330 INSN(mov, 66, 0f, 7e, el, dq64, el),
331 // movhlps, , 0f, 12, d
332 INSN(movhpd, 66, 0f, 16, el, q, vl),
333 INSN(movhpd, 66, 0f, 17, el, q, vl),
334 INSN(movhps, , 0f, 16, el_2, d, vl),
335 INSN(movhps, , 0f, 17, el_2, d, vl),
336 // movlhps, , 0f, 16, d
337 INSN(movlpd, 66, 0f, 12, el, q, vl),
338 INSN(movlpd, 66, 0f, 13, el, q, vl),
339 INSN(movlps, , 0f, 12, el_2, d, vl),
340 INSN(movlps, , 0f, 13, el_2, d, vl),
341 INSN(movq, f3, 0f, 7e, el, q, el),
342 INSN(movq, 66, 0f, d6, el, q, el),
343 };
344
345 static const struct test avx512f_no128[] = {
346 INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl),
347 INSN(broadcasti32x4, 66, 0f38, 5a, el_4, d, vl),
348 INSN(broadcastsd, 66, 0f38, 19, el, q, el),
349 INSN(extractf32x4, 66, 0f3a, 19, el_4, d, vl),
350 INSN(extracti32x4, 66, 0f3a, 39, el_4, d, vl),
351 INSN(insertf32x4, 66, 0f3a, 18, el_4, d, vl),
352 INSN(inserti32x4, 66, 0f3a, 38, el_4, d, vl),
353 INSN(perm, 66, 0f38, 36, vl, dq, vl),
354 INSN(perm, 66, 0f38, 16, vl, sd, vl),
355 INSN(permpd, 66, 0f3a, 01, vl, q, vl),
356 INSN(permq, 66, 0f3a, 00, vl, q, vl),
357 INSN(shuff32x4, 66, 0f3a, 23, vl, d, vl),
358 INSN(shuff64x2, 66, 0f3a, 23, vl, q, vl),
359 INSN(shufi32x4, 66, 0f3a, 43, vl, d, vl),
360 INSN(shufi64x2, 66, 0f3a, 43, vl, q, vl),
361 };
362
363 static const struct test avx512f_512[] = {
364 INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
365 INSN(broadcasti64x4, 66, 0f38, 5b, el_4, q, vl),
366 INSN(extractf64x4, 66, 0f3a, 1b, el_4, q, vl),
367 INSN(extracti64x4, 66, 0f3a, 3b, el_4, q, vl),
368 INSN(insertf64x4, 66, 0f3a, 1a, el_4, q, vl),
369 INSN(inserti64x4, 66, 0f3a, 3a, el_4, q, vl),
370 };
371
372 static const struct test avx512bw_all[] = {
373 INSN(dbpsadbw, 66, 0f3a, 42, vl, b, vl),
374 INSN(movdqu8, f2, 0f, 6f, vl, b, vl),
375 INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
376 INSN(movdqu16, f2, 0f, 6f, vl, w, vl),
377 INSN(movdqu16, f2, 0f, 7f, vl, w, vl),
378 INSN(pabsb, 66, 0f38, 1c, vl, b, vl),
379 INSN(pabsw, 66, 0f38, 1d, vl, w, vl),
380 INSN(packssdw, 66, 0f, 6b, vl, d_nb, vl),
381 INSN(packsswb, 66, 0f, 63, vl, w, vl),
382 INSN(packusdw, 66, 0f38, 2b, vl, d_nb, vl),
383 INSN(packuswb, 66, 0f, 67, vl, w, vl),
384 INSN(paddb, 66, 0f, fc, vl, b, vl),
385 INSN(paddsb, 66, 0f, ec, vl, b, vl),
386 INSN(paddsw, 66, 0f, ed, vl, w, vl),
387 INSN(paddusb, 66, 0f, dc, vl, b, vl),
388 INSN(paddusw, 66, 0f, dd, vl, w, vl),
389 INSN(paddw, 66, 0f, fd, vl, w, vl),
390 INSN(palignr, 66, 0f3a, 0f, vl, b, vl),
391 INSN(pavgb, 66, 0f, e0, vl, b, vl),
392 INSN(pavgw, 66, 0f, e3, vl, w, vl),
393 INSN(pblendm, 66, 0f38, 66, vl, bw, vl),
394 INSN(pbroadcastb, 66, 0f38, 78, el, b, el),
395 // pbroadcastb, 66, 0f38, 7a, b
396 INSN(pbroadcastw, 66, 0f38, 79, el_2, b, vl),
397 // pbroadcastw, 66, 0f38, 7b, b
398 INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl),
399 INSN(pcmpeqb, 66, 0f, 74, vl, b, vl),
400 INSN(pcmpeqw, 66, 0f, 75, vl, w, vl),
401 INSN(pcmpgtb, 66, 0f, 64, vl, b, vl),
402 INSN(pcmpgtw, 66, 0f, 65, vl, w, vl),
403 INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl),
404 INSN(permw, 66, 0f38, 8d, vl, w, vl),
405 INSN(permi2w, 66, 0f38, 75, vl, w, vl),
406 INSN(permt2w, 66, 0f38, 7d, vl, w, vl),
407 INSN(pmaddubsw, 66, 0f38, 04, vl, b, vl),
408 INSN(pmaddwd, 66, 0f, f5, vl, w, vl),
409 INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl),
410 INSN(pmaxsw, 66, 0f, ee, vl, w, vl),
411 INSN(pmaxub, 66, 0f, de, vl, b, vl),
412 INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl),
413 INSN(pminsb, 66, 0f38, 38, vl, b, vl),
414 INSN(pminsw, 66, 0f, ea, vl, w, vl),
415 INSN(pminub, 66, 0f, da, vl, b, vl),
416 INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
417 // pmovb2m, f3, 0f38, 29, b
418 // pmovm2, f3, 0f38, 28, bw
419 INSN(pmovswb, f3, 0f38, 20, vl_2, b, vl),
420 INSN(pmovsxbw, 66, 0f38, 20, vl_2, b, vl),
421 INSN(pmovuswb, f3, 0f38, 10, vl_2, b, vl),
422 // pmovw2m, f3, 0f38, 29, w
423 INSN(pmovwb, f3, 0f38, 30, vl_2, b, vl),
424 INSN(pmovzxbw, 66, 0f38, 30, vl_2, b, vl),
425 INSN(pmulhrsw, 66, 0f38, 0b, vl, w, vl),
426 INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
427 INSN(pmulhw, 66, 0f, e5, vl, w, vl),
428 INSN(pmullw, 66, 0f, d5, vl, w, vl),
429 INSN(psadbw, 66, 0f, f6, vl, b, vl),
430 INSN(pshufb, 66, 0f38, 00, vl, b, vl),
431 INSN(pshufhw, f3, 0f, 70, vl, w, vl),
432 INSN(pshuflw, f2, 0f, 70, vl, w, vl),
433 INSNX(pslldq, 66, 0f, 73, 7, vl, b, vl),
434 INSN(psllvw, 66, 0f38, 12, vl, w, vl),
435 INSN(psllw, 66, 0f, f1, el_8, w, vl),
436 INSNX(psllw, 66, 0f, 71, 6, vl, w, vl),
437 INSN(psravw, 66, 0f38, 11, vl, w, vl),
438 INSN(psraw, 66, 0f, e1, el_8, w, vl),
439 INSNX(psraw, 66, 0f, 71, 4, vl, w, vl),
440 INSNX(psrldq, 66, 0f, 73, 3, vl, b, vl),
441 INSN(psrlvw, 66, 0f38, 10, vl, w, vl),
442 INSN(psrlw, 66, 0f, d1, el_8, w, vl),
443 INSNX(psrlw, 66, 0f, 71, 2, vl, w, vl),
444 INSN(psubb, 66, 0f, f8, vl, b, vl),
445 INSN(psubsb, 66, 0f, e8, vl, b, vl),
446 INSN(psubsw, 66, 0f, e9, vl, w, vl),
447 INSN(psubusb, 66, 0f, d8, vl, b, vl),
448 INSN(psubusw, 66, 0f, d9, vl, w, vl),
449 INSN(psubw, 66, 0f, f9, vl, w, vl),
450 INSN(ptestm, 66, 0f38, 26, vl, bw, vl),
451 INSN(ptestnm, f3, 0f38, 26, vl, bw, vl),
452 INSN(punpckhbw, 66, 0f, 68, vl, b, vl),
453 INSN(punpckhwd, 66, 0f, 69, vl, w, vl),
454 INSN(punpcklbw, 66, 0f, 60, vl, b, vl),
455 INSN(punpcklwd, 66, 0f, 61, vl, w, vl),
456 };
457
458 static const struct test avx512bw_128[] = {
459 INSN(pextrb, 66, 0f3a, 14, el, b, el),
460 // pextrw, 66, 0f, c5, w
461 INSN(pextrw, 66, 0f3a, 15, el, w, el),
462 INSN(pinsrb, 66, 0f3a, 20, el, b, el),
463 INSN(pinsrw, 66, 0f, c4, el, w, el),
464 };
465
466 static const struct test avx512cd_all[] = {
467 // pbroadcastmb2q, f3, 0f38, 2a, q
468 // pbroadcastmw2d, f3, 0f38, 3a, d
469 INSN(pconflict, 66, 0f38, c4, vl, dq, vl),
470 INSN(plzcnt, 66, 0f38, 44, vl, dq, vl),
471 };
472
473 static const struct test avx512dq_all[] = {
474 INSN_PFP(and, 0f, 54),
475 INSN_PFP(andn, 0f, 55),
476 INSN(broadcasti32x2, 66, 0f38, 59, el_2, d, vl),
477 INSN(cvtpd2qq, 66, 0f, 7b, vl, q, vl),
478 INSN(cvtpd2uqq, 66, 0f, 79, vl, q, vl),
479 INSN(cvtps2qq, 66, 0f, 7b, vl_2, d, vl),
480 INSN(cvtps2uqq, 66, 0f, 79, vl_2, d, vl),
481 INSN(cvtqq2pd, f3, 0f, e6, vl, q, vl),
482 INSN(cvtqq2ps, , 0f, 5b, vl, q, vl),
483 INSN(cvttpd2qq, 66, 0f, 7a, vl, q, vl),
484 INSN(cvttpd2uqq, 66, 0f, 78, vl, q, vl),
485 INSN(cvttps2qq, 66, 0f, 7a, vl_2, d, vl),
486 INSN(cvttps2uqq, 66, 0f, 78, vl_2, d, vl),
487 INSN(cvtuqq2pd, f3, 0f, 7a, vl, q, vl),
488 INSN(cvtuqq2ps, f2, 0f, 7a, vl, q, vl),
489 INSN(fpclass, 66, 0f3a, 66, vl, sd, vl),
490 INSN(fpclass, 66, 0f3a, 67, el, sd, el),
491 INSN_PFP(or, 0f, 56),
492 // pmovd2m, f3, 0f38, 39, d
493 // pmovm2, f3, 0f38, 38, dq
494 // pmovq2m, f3, 0f38, 39, q
495 INSN(pmullq, 66, 0f38, 40, vl, q, vl),
496 INSN(range, 66, 0f3a, 50, vl, sd, vl),
497 INSN(range, 66, 0f3a, 51, el, sd, el),
498 INSN(reduce, 66, 0f3a, 56, vl, sd, vl),
499 INSN(reduce, 66, 0f3a, 57, el, sd, el),
500 INSN_PFP(xor, 0f, 57),
501 };
502
503 static const struct test avx512dq_128[] = {
504 INSN(pextr, 66, 0f3a, 16, el, dq64, el),
505 INSN(pinsr, 66, 0f3a, 22, el, dq64, el),
506 };
507
508 static const struct test avx512dq_no128[] = {
509 INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
510 INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
511 INSN(broadcasti64x2, 66, 0f38, 5a, el_2, q, vl),
512 INSN(extractf64x2, 66, 0f3a, 19, el_2, q, vl),
513 INSN(extracti64x2, 66, 0f3a, 39, el_2, q, vl),
514 INSN(insertf64x2, 66, 0f3a, 18, el_2, q, vl),
515 INSN(inserti64x2, 66, 0f3a, 38, el_2, q, vl),
516 };
517
518 static const struct test avx512dq_512[] = {
519 INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
520 INSN(broadcasti32x8, 66, 0f38, 5b, el_8, d, vl),
521 INSN(extractf32x8, 66, 0f3a, 1b, el_8, d, vl),
522 INSN(extracti32x8, 66, 0f3a, 3b, el_8, d, vl),
523 INSN(insertf32x8, 66, 0f3a, 1a, el_8, d, vl),
524 INSN(inserti32x8, 66, 0f3a, 3a, el_8, d, vl),
525 };
526
527 static const struct test avx512er_512[] = {
528 INSN(exp2, 66, 0f38, c8, vl, sd, vl),
529 INSN(rcp28, 66, 0f38, ca, vl, sd, vl),
530 INSN(rcp28, 66, 0f38, cb, el, sd, el),
531 INSN(rsqrt28, 66, 0f38, cc, vl, sd, vl),
532 INSN(rsqrt28, 66, 0f38, cd, el, sd, el),
533 };
534
535 static const struct test avx512pf_512[] = {
536 INSNX(gatherpf0d, 66, 0f38, c6, 1, vl, sd, el),
537 INSNX(gatherpf0q, 66, 0f38, c7, 1, vl, sd, el),
538 INSNX(gatherpf1d, 66, 0f38, c6, 2, vl, sd, el),
539 INSNX(gatherpf1q, 66, 0f38, c7, 2, vl, sd, el),
540 INSNX(scatterpf0d, 66, 0f38, c6, 5, vl, sd, el),
541 INSNX(scatterpf0q, 66, 0f38, c7, 5, vl, sd, el),
542 INSNX(scatterpf1d, 66, 0f38, c6, 6, vl, sd, el),
543 INSNX(scatterpf1q, 66, 0f38, c7, 6, vl, sd, el),
544 };
545
546 static const struct test avx512_4fmaps_512[] = {
547 INSN(4fmaddps, f2, 0f38, 9a, el_4, d, vl),
548 INSN(4fmaddss, f2, 0f38, 9b, el_4, d, vl),
549 INSN(4fnmaddps, f2, 0f38, aa, el_4, d, vl),
550 INSN(4fnmaddss, f2, 0f38, ab, el_4, d, vl),
551 };
552
553 static const struct test avx512_4vnniw_512[] = {
554 INSN(p4dpwssd, f2, 0f38, 52, el_4, d, vl),
555 INSN(p4dpwssds, f2, 0f38, 53, el_4, d, vl),
556 };
557
558 static const struct test avx512_bf16_all[] = {
559 INSN(cvtne2ps2bf16, f2, 0f38, 72, vl, d, vl),
560 INSN(cvtneps2bf16, f3, 0f38, 72, vl, d, vl),
561 INSN(dpbf16ps, f3, 0f38, 52, vl, d, vl),
562 };
563
564 static const struct test avx512_bitalg_all[] = {
565 INSN(popcnt, 66, 0f38, 54, vl, bw, vl),
566 INSN(pshufbitqmb, 66, 0f38, 8f, vl, b, vl),
567 };
568
569 static const struct test avx512_ifma_all[] = {
570 INSN(pmadd52huq, 66, 0f38, b5, vl, q, vl),
571 INSN(pmadd52luq, 66, 0f38, b4, vl, q, vl),
572 };
573
574 static const struct test avx512_vbmi_all[] = {
575 INSN(permb, 66, 0f38, 8d, vl, b, vl),
576 INSN(permi2b, 66, 0f38, 75, vl, b, vl),
577 INSN(permt2b, 66, 0f38, 7d, vl, b, vl),
578 INSN(pmultishiftqb, 66, 0f38, 83, vl, q, vl),
579 };
580
581 static const struct test avx512_vbmi2_all[] = {
582 INSN(pcompress, 66, 0f38, 63, vl, bw, el),
583 INSN(pexpand, 66, 0f38, 62, vl, bw, el),
584 INSN(pshld, 66, 0f3a, 71, vl, dq, vl),
585 INSN(pshldv, 66, 0f38, 71, vl, dq, vl),
586 INSN(pshldvw, 66, 0f38, 70, vl, w, vl),
587 INSN(pshldw, 66, 0f3a, 70, vl, w, vl),
588 INSN(pshrd, 66, 0f3a, 73, vl, dq, vl),
589 INSN(pshrdv, 66, 0f38, 73, vl, dq, vl),
590 INSN(pshrdvw, 66, 0f38, 72, vl, w, vl),
591 INSN(pshrdw, 66, 0f3a, 72, vl, w, vl),
592 };
593
594 static const struct test avx512_vnni_all[] = {
595 INSN(pdpbusd, 66, 0f38, 50, vl, d, vl),
596 INSN(pdpbusds, 66, 0f38, 51, vl, d, vl),
597 INSN(pdpwssd, 66, 0f38, 52, vl, d, vl),
598 INSN(pdpwssds, 66, 0f38, 53, vl, d, vl),
599 };
600
601 static const struct test avx512_vp2intersect_all[] = {
602 INSN(p2intersect, f2, 0f38, 68, vl, dq, vl)
603 };
604
605 static const struct test avx512_vpopcntdq_all[] = {
606 INSN(popcnt, 66, 0f38, 55, vl, dq, vl)
607 };
608
609 static const struct test avx512_fp16_all[] = {
610 INSN(addph, , map5, 58, vl, fp16, vl),
611 INSN(addsh, f3, map5, 58, el, fp16, el),
612 INSN(cmpph, , 0f3a, c2, vl, fp16, vl),
613 INSN(cmpsh, f3, 0f3a, c2, el, fp16, el),
614 INSN(comish, , map5, 2f, el, fp16, el),
615 INSN(cvtdq2ph, , map5, 5b, vl, d, vl),
616 INSN(cvtpd2ph, 66, map5, 5a, vl, q, vl),
617 INSN(cvtph2dq, 66, map5, 5b, vl_2, fp16, vl),
618 INSN(cvtph2pd, , map5, 5a, vl_4, fp16, vl),
619 INSN(cvtph2psx, 66, map6, 13, vl_2, fp16, vl),
620 INSN(cvtph2qq, 66, map5, 7b, vl_4, fp16, vl),
621 INSN(cvtph2udq, , map5, 79, vl_2, fp16, vl),
622 INSN(cvtph2uqq, 66, map5, 79, vl_4, fp16, vl),
623 INSN(cvtph2uw, , map5, 7d, vl, fp16, vl),
624 INSN(cvtph2w, 66, map5, 7d, vl, fp16, vl),
625 INSN(cvtps2phx, 66, map5, 1d, vl, d, vl),
626 INSN(cvtqq2ph, , map5, 5b, vl, q, vl),
627 INSN(cvtsd2sh, f2, map5, 5a, el, q, el),
628 INSN(cvtsh2sd, f3, map5, 5a, el, fp16, el),
629 INSN(cvtsh2si, f3, map5, 2d, el, fp16, el),
630 INSN(cvtsh2ss, , map6, 13, el, fp16, el),
631 INSN(cvtsh2usi, f3, map5, 79, el, fp16, el),
632 INSN(cvtsi2sh, f3, map5, 2a, el, dq64, el),
633 INSN(cvtss2sh, , map5, 1d, el, d, el),
634 INSN(cvttph2dq, f3, map5, 5b, vl_2, fp16, vl),
635 INSN(cvttph2qq, 66, map5, 7a, vl_4, fp16, vl),
636 INSN(cvttph2udq, , map5, 78, vl_2, fp16, vl),
637 INSN(cvttph2uqq, 66, map5, 78, vl_4, fp16, vl),
638 INSN(cvttph2uw, , map5, 7c, vl, fp16, vl),
639 INSN(cvttph2w, 66, map5, 7c, vl, fp16, vl),
640 INSN(cvttsh2si, f3, map5, 2c, el, fp16, el),
641 INSN(cvttsh2usi, f3, map5, 78, el, fp16, el),
642 INSN(cvtudq2ph, f2, map5, 7a, vl, d, vl),
643 INSN(cvtuqq2ph, f2, map5, 7a, vl, q, vl),
644 INSN(cvtusi2sh, f3, map5, 7b, el, dq64, el),
645 INSN(cvtuw2ph, f2, map5, 7d, vl, fp16, vl),
646 INSN(cvtw2ph, f3, map5, 7d, vl, fp16, vl),
647 INSN(divph, , map5, 5e, vl, fp16, vl),
648 INSN(divsh, f3, map5, 5e, el, fp16, el),
649 INSNX(fcmaddcph, f2, map6, 56, 1, vl, d, vl),
650 INSNX(fcmaddcsh, f2, map6, 57, 1, el, d, el),
651 INSNX(fcmulcph, f2, map6, d6, 1, vl, d, vl),
652 INSNX(fcmulcsh, f2, map6, d7, 1, el, d, el),
653 INSN(fmadd132ph, 66, map6, 98, vl, fp16, vl),
654 INSN(fmadd132sh, 66, map6, 99, el, fp16, el),
655 INSN(fmadd213ph, 66, map6, a8, vl, fp16, vl),
656 INSN(fmadd213sh, 66, map6, a9, el, fp16, el),
657 INSN(fmadd231ph, 66, map6, b8, vl, fp16, vl),
658 INSN(fmadd231sh, 66, map6, b9, el, fp16, el),
659 INSNX(fmaddcph, f3, map6, 56, 1, vl, d, vl),
660 INSNX(fmaddcsh, f3, map6, 57, 1, el, d, el),
661 INSN(fmaddsub132ph, 66, map6, 96, vl, fp16, vl),
662 INSN(fmaddsub213ph, 66, map6, a6, vl, fp16, vl),
663 INSN(fmaddsub231ph, 66, map6, b6, vl, fp16, vl),
664 INSN(fmsub132ph, 66, map6, 9a, vl, fp16, vl),
665 INSN(fmsub132sh, 66, map6, 9b, el, fp16, el),
666 INSN(fmsub213ph, 66, map6, aa, vl, fp16, vl),
667 INSN(fmsub213sh, 66, map6, ab, el, fp16, el),
668 INSN(fmsub231ph, 66, map6, ba, vl, fp16, vl),
669 INSN(fmsub231sh, 66, map6, bb, el, fp16, el),
670 INSN(fmsubadd132ph, 66, map6, 97, vl, fp16, vl),
671 INSN(fmsubadd213ph, 66, map6, a7, vl, fp16, vl),
672 INSN(fmsubadd231ph, 66, map6, b7, vl, fp16, vl),
673 INSNX(fmulcph, f3, map6, d6, 1, vl, d, vl),
674 INSNX(fmulcsh, f3, map6, d7, 1, el, d, el),
675 INSN(fnmadd132ph, 66, map6, 9c, vl, fp16, vl),
676 INSN(fnmadd132sh, 66, map6, 9d, el, fp16, el),
677 INSN(fnmadd213ph, 66, map6, ac, vl, fp16, vl),
678 INSN(fnmadd213sh, 66, map6, ad, el, fp16, el),
679 INSN(fnmadd231ph, 66, map6, bc, vl, fp16, vl),
680 INSN(fnmadd231sh, 66, map6, bd, el, fp16, el),
681 INSN(fnmsub132ph, 66, map6, 9e, vl, fp16, vl),
682 INSN(fnmsub132sh, 66, map6, 9f, el, fp16, el),
683 INSN(fnmsub213ph, 66, map6, ae, vl, fp16, vl),
684 INSN(fnmsub213sh, 66, map6, af, el, fp16, el),
685 INSN(fnmsub231ph, 66, map6, be, vl, fp16, vl),
686 INSN(fnmsub231sh, 66, map6, bf, el, fp16, el),
687 INSN(fpclassph, , 0f3a, 66, vl, fp16, vl),
688 INSN(fpclasssh, , 0f3a, 67, el, fp16, el),
689 INSN(getexpph, 66, map6, 42, vl, fp16, vl),
690 INSN(getexpsh, 66, map6, 43, el, fp16, el),
691 INSN(getmantph, , 0f3a, 26, vl, fp16, vl),
692 INSN(getmantsh, , 0f3a, 27, el, fp16, el),
693 INSN(maxph, , map5, 5f, vl, fp16, vl),
694 INSN(maxsh, f3, map5, 5f, el, fp16, el),
695 INSN(minph, , map5, 5d, vl, fp16, vl),
696 INSN(minsh, f3, map5, 5d, el, fp16, el),
697 INSN(movsh, f3, map5, 10, el, fp16, el),
698 INSN(movsh, f3, map5, 11, el, fp16, el),
699 INSN(mulph, , map5, 59, vl, fp16, vl),
700 INSN(mulsh, f3, map5, 59, el, fp16, el),
701 INSN(rcpph, 66, map6, 4c, vl, fp16, vl),
702 INSN(rcpsh, 66, map6, 4d, el, fp16, el),
703 INSN(reduceph, , 0f3a, 56, vl, fp16, vl),
704 INSN(reducesh, , 0f3a, 57, el, fp16, el),
705 INSN(rndscaleph, , 0f3a, 08, vl, fp16, vl),
706 INSN(rndscalesh, , 0f3a, 0a, el, fp16, el),
707 INSN(rsqrtph, 66, map6, 4e, vl, fp16, vl),
708 INSN(rsqrtsh, 66, map6, 4f, el, fp16, el),
709 INSN(scalefph, 66, map6, 2c, vl, fp16, vl),
710 INSN(scalefsh, 66, map6, 2d, el, fp16, el),
711 INSN(sqrtph, , map5, 51, vl, fp16, vl),
712 INSN(sqrtsh, f3, map5, 51, el, fp16, el),
713 INSN(subph, , map5, 5c, vl, fp16, vl),
714 INSN(subsh, f3, map5, 5c, el, fp16, el),
715 INSN(ucomish, , map5, 2e, el, fp16, el),
716 };
717
718 static const struct test avx512_fp16_128[] = {
719 INSN(movw, 66, map5, 6e, el, fp16, el),
720 INSN(movw, 66, map5, 7e, el, fp16, el),
721 };
722
723 static const struct test gfni_all[] = {
724 INSN(gf2p8affineinvqb, 66, 0f3a, cf, vl, q, vl),
725 INSN(gf2p8affineqb, 66, 0f3a, ce, vl, q, vl),
726 INSN(gf2p8mulb, 66, 0f38, cf, vl, b, vl),
727 };
728
729 /*
730 * The uses of b in this table are simply (one of) the shortest form(s) of
731 * saying "no broadcast" without introducing a 128-bit granularity enumerator.
732 * Due to all of the insns being WIG, w, d_nb, and q_nb would all also fit.
733 */
734 static const struct test vaes_all[] = {
735 INSN(aesdec, 66, 0f38, de, vl, b, vl),
736 INSN(aesdeclast, 66, 0f38, df, vl, b, vl),
737 INSN(aesenc, 66, 0f38, dc, vl, b, vl),
738 INSN(aesenclast, 66, 0f38, dd, vl, b, vl),
739 };
740
741 static const struct test vpclmulqdq_all[] = {
742 INSN(pclmulqdq, 66, 0f3a, 44, vl, q_nb, vl)
743 };
744
745 static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
746 static const unsigned char vl_128[] = { VL_128 };
747 static const unsigned char vl_no128[] = { VL_512, VL_256 };
748 static const unsigned char vl_512[] = { VL_512 };
749
750 /*
751 * This table, indicating the presence of an immediate (byte) for an opcode
752 * space 0f major opcode, is indexed by high major opcode byte nibble, with
753 * each table element then bit-indexed by low major opcode byte nibble.
754 */
755 static const uint16_t imm0f[16] = {
756 [0x7] = (1 << 0x0) /* vpshuf* */ |
757 (1 << 0x1) /* vps{ll,ra,rl}w */ |
758 (1 << 0x2) /* vps{l,r}ld, vp{rol,ror,sra}{d,q} */ |
759 (1 << 0x3) /* vps{l,r}l{,d}q */,
760 [0xc] = (1 << 0x2) /* vcmp{p,s}{d,s} */ |
761 (1 << 0x4) /* vpinsrw */ |
762 (1 << 0x5) /* vpextrw */ |
763 (1 << 0x6) /* vshufp{d,s} */,
764 };
765
766 static struct x86_emulate_ops emulops;
767
768 /*
769 * Access tracking (by granular) is used on the first 64 bytes of address
770 * space. Instructions get encode with a raw Disp8 value of 1, which then
771 * gets scaled accordingly. Hence accesses below the address <scaling factor>
772 * as well as at or above 2 * <scaling factor> are indications of bugs. To
773 * aid diagnosis / debugging, track all accesses below 3 * <scaling factor>.
774 * With AVX512 the maximum scaling factor is 64.
775 */
776 static unsigned int accessed[3 * 64];
777
record_access(enum x86_segment seg,unsigned long offset,unsigned int bytes)778 static bool record_access(enum x86_segment seg, unsigned long offset,
779 unsigned int bytes)
780 {
781 while ( bytes-- )
782 {
783 if ( offset >= ARRAY_SIZE(accessed) )
784 return false;
785 ++accessed[offset++];
786 }
787
788 return true;
789 }
790
read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)791 static int read(enum x86_segment seg, unsigned long offset, void *p_data,
792 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
793 {
794 if ( !record_access(seg, offset, bytes + !bytes) )
795 return X86EMUL_UNHANDLEABLE;
796 memset(p_data, 0, bytes);
797 return X86EMUL_OKAY;
798 }
799
write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)800 static int write(enum x86_segment seg, unsigned long offset, void *p_data,
801 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
802 {
803 if ( !record_access(seg, offset, bytes + !bytes) )
804 return X86EMUL_UNHANDLEABLE;
805 return X86EMUL_OKAY;
806 }
807
test_one(const struct test * test,enum vl vl,unsigned char * instr,struct x86_emulate_ctxt * ctxt)808 static void test_one(const struct test *test, enum vl vl,
809 unsigned char *instr, struct x86_emulate_ctxt *ctxt)
810 {
811 unsigned int vsz, esz, i, n;
812 int rc;
813 bool sg = strstr(test->mnemonic, "gather") ||
814 strstr(test->mnemonic, "scatter");
815 bool imm = test->spc == SPC_0f3a ||
816 (test->spc == SPC_0f &&
817 (imm0f[test->opc >> 4] & (1 << (test->opc & 0xf))));
818 union evex {
819 uint8_t raw[3];
820 struct {
821 uint8_t opcx:3;
822 uint8_t mbz:1;
823 uint8_t R:1;
824 uint8_t b:1;
825 uint8_t x:1;
826 uint8_t r:1;
827 uint8_t pfx:2;
828 uint8_t mbs:1;
829 uint8_t reg:4;
830 uint8_t w:1;
831 uint8_t opmsk:3;
832 uint8_t RX:1;
833 uint8_t bcst:1;
834 uint8_t lr:2;
835 uint8_t z:1;
836 };
837 } evex = {
838 .opcx = test->spc, .pfx = test->pfx, .lr = vl,
839 .R = 1, .b = 1, .x = 1, .r = 1, .mbs = 1,
840 .reg = 0xf, .RX = 1, .opmsk = sg,
841 };
842
843 switch ( test->esz )
844 {
845 case ESZ_b:
846 esz = 1;
847 break;
848
849 case ESZ_w:
850 evex.w = 1;
851 /* fall through */
852 case ESZ_fp16:
853 esz = 2;
854 break;
855
856 #ifdef __i386__
857 case ESZ_d_WIG:
858 evex.w = 1;
859 /* fall through */
860 #endif
861 case ESZ_d: case ESZ_d_nb:
862 esz = 4;
863 break;
864
865 case ESZ_q: case ESZ_q_nb:
866 esz = 8;
867 evex.w = 1;
868 break;
869
870 default:
871 ASSERT_UNREACHABLE();
872 }
873
874 switch ( test->vsz )
875 {
876 case VSZ_vl:
877 vsz = 16 << vl;
878 break;
879
880 case VSZ_vl_2:
881 vsz = 8 << vl;
882 break;
883
884 case VSZ_vl_4:
885 vsz = 4 << vl;
886 break;
887
888 case VSZ_vl_8:
889 vsz = 2 << vl;
890 break;
891
892 case VSZ_el:
893 vsz = esz;
894 break;
895
896 case VSZ_el_2:
897 vsz = esz * 2;
898 break;
899
900 case VSZ_el_4:
901 vsz = esz * 4;
902 break;
903
904 case VSZ_el_8:
905 vsz = esz * 8;
906 break;
907
908 default:
909 ASSERT_UNREACHABLE();
910 }
911
912 /*
913 * Note: SIB addressing is used here, such that S/G insns can be handled
914 * without extra conditionals.
915 */
916 instr[0] = 0x62;
917 instr[1] = evex.raw[0];
918 instr[2] = evex.raw[1];
919 instr[3] = evex.raw[2];
920 instr[4] = test->opc;
921 instr[5] = 0x44 | (test->ext << 3); /* ModR/M */
922 instr[6] = 0x22; /* SIB: base rDX, index none / xMM4 */
923 instr[7] = 1; /* Disp8 */
924 instr[8] = 0; /* immediate, if any */
925
926 asm volatile ( "kxnorw %k1, %k1, %k1" );
927 asm volatile ( "vxorps %xmm4, %xmm4, %xmm4" );
928
929 ctxt->regs->eip = (unsigned long)&instr[0];
930 ctxt->regs->edx = 0;
931 memset(accessed, 0, sizeof(accessed));
932
933 rc = x86_emulate(ctxt, &emulops);
934 if ( rc != X86EMUL_OKAY ||
935 (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
936 goto fail;
937
938 for ( i = 0; i < (test->scale == SC_vl ? vsz : esz); ++i )
939 if ( accessed[i] )
940 goto fail;
941
942 n = test->scale == SC_vl ? vsz : esz;
943 if ( !sg )
944 n += vsz;
945 else if ( !strstr(test->mnemonic, "pf") )
946 n += esz;
947 else
948 ++n;
949
950 for ( ; i < n; ++i )
951 if ( accessed[i] != (sg ? (vsz / esz) >> (test->opc & 1 & !evex.w)
952 : 1) )
953 goto fail;
954
955 for ( ; i < ARRAY_SIZE(accessed); ++i )
956 if ( accessed[i] )
957 goto fail;
958
959 /* Also check the broadcast case, if available. */
960 if ( test->vsz >= VSZ_el || test->scale != SC_vl )
961 return;
962
963 switch ( test->esz )
964 {
965 case ESZ_d_nb: case ESZ_q_nb:
966 case ESZ_b: case ESZ_w: case ESZ_bw:
967 return;
968
969 case ESZ_d: case ESZ_q: case ESZ_fp16:
970 break;
971
972 default:
973 ASSERT_UNREACHABLE();
974 }
975
976 evex.bcst = 1;
977 instr[3] = evex.raw[2];
978
979 ctxt->regs->eip = (unsigned long)&instr[0];
980 memset(accessed, 0, sizeof(accessed));
981
982 rc = x86_emulate(ctxt, &emulops);
983 if ( rc != X86EMUL_OKAY ||
984 (ctxt->regs->eip != (unsigned long)&instr[8 + imm]) )
985 goto fail;
986
987 for ( i = 0; i < esz; ++i )
988 if ( accessed[i] )
989 goto fail;
990 for ( ; i < esz * 2; ++i )
991 if ( accessed[i] != 1 )
992 goto fail;
993 for ( ; i < ARRAY_SIZE(accessed); ++i )
994 if ( accessed[i] )
995 goto fail;
996
997 return;
998
999 fail:
1000 printf("failed (v%s%s %u-bit)\n", test->mnemonic,
1001 evex.bcst ? "/bcst" : "", 128 << vl);
1002 exit(1);
1003 }
1004
test_pair(const struct test * tmpl,enum vl vl,enum esz esz1,const char * suffix1,enum esz esz2,const char * suffix2,unsigned char * instr,struct x86_emulate_ctxt * ctxt)1005 static void test_pair(const struct test *tmpl, enum vl vl,
1006 enum esz esz1, const char *suffix1,
1007 enum esz esz2, const char *suffix2,
1008 unsigned char *instr, struct x86_emulate_ctxt *ctxt)
1009 {
1010 struct test test = *tmpl;
1011 char mnemonic[24];
1012
1013 test.esz = esz1;
1014 snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix1);
1015 test.mnemonic = mnemonic;
1016 test_one(&test, vl, instr, ctxt);
1017
1018 test.esz = esz2;
1019 snprintf(mnemonic, ARRAY_SIZE(mnemonic), "%s%s", tmpl->mnemonic, suffix2);
1020 test.mnemonic = mnemonic;
1021 test_one(&test, vl, instr, ctxt);
1022 }
1023
test_group(const struct test tests[],unsigned int nr_test,const unsigned char vl[],unsigned int nr_vl,void * instr,struct x86_emulate_ctxt * ctxt)1024 static void test_group(const struct test tests[], unsigned int nr_test,
1025 const unsigned char vl[], unsigned int nr_vl,
1026 void *instr, struct x86_emulate_ctxt *ctxt)
1027 {
1028 unsigned int i, j;
1029
1030 for ( i = 0; i < nr_test; ++i )
1031 {
1032 for ( j = 0; j < nr_vl; ++j )
1033 {
1034 if ( vl[0] == VL_512 && vl[j] != VL_512 && !cpu_has_avx512vl )
1035 continue;
1036
1037 switch ( tests[i].esz )
1038 {
1039 case ESZ_q_nb:
1040 /* The 128-bit form of VMOVDDUP needs special casing. */
1041 if ( vl[j] == VL_128 && tests[i].spc == SPC_0f &&
1042 tests[i].opc == 0x12 && tests[i].pfx == PFX_f2 )
1043 {
1044 struct test test = tests[i];
1045
1046 test.vsz = VSZ_el;
1047 test.scale = SC_el;
1048 test_one(&test, vl[j], instr, ctxt);
1049 continue;
1050 }
1051 /* fall through */
1052 default:
1053 test_one(&tests[i], vl[j], instr, ctxt);
1054 break;
1055
1056 case ESZ_bw:
1057 test_pair(&tests[i], vl[j], ESZ_b, "b", ESZ_w, "w",
1058 instr, ctxt);
1059 break;
1060
1061 case ESZ_dq:
1062 test_pair(&tests[i], vl[j], ESZ_d,
1063 strncmp(tests[i].mnemonic, "cvt", 3) ? "d" : "l",
1064 ESZ_q, "q", instr, ctxt);
1065 break;
1066
1067 #ifdef __i386__
1068 case ESZ_d_WIG:
1069 test_pair(&tests[i], vl[j], ESZ_d, "/W0",
1070 ESZ_d_WIG, "/W1", instr, ctxt);
1071 break;
1072 #endif
1073
1074 case ESZ_sd:
1075 test_pair(&tests[i], vl[j],
1076 ESZ_d, tests[i].vsz < VSZ_el ? "ps" : "ss",
1077 ESZ_q, tests[i].vsz < VSZ_el ? "pd" : "sd",
1078 instr, ctxt);
1079 break;
1080 }
1081 }
1082 }
1083 }
1084
evex_disp8_test(void * instr,struct x86_emulate_ctxt * ctxt,const struct x86_emulate_ops * ops)1085 void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
1086 const struct x86_emulate_ops *ops)
1087 {
1088 emulops = *ops;
1089 emulops.read = read;
1090 emulops.write = write;
1091
1092 #define RUN(feat, vl) do { \
1093 if ( cpu_has_##feat ) \
1094 { \
1095 printf("%-40s", "Testing " #feat "/" #vl " disp8 handling..."); \
1096 test_group(feat ## _ ## vl, ARRAY_SIZE(feat ## _ ## vl), \
1097 vl_ ## vl, ARRAY_SIZE(vl_ ## vl), instr, ctxt); \
1098 printf("okay\n"); \
1099 } \
1100 } while ( false )
1101
1102 RUN(avx512f, all);
1103 RUN(avx512f, 128);
1104 RUN(avx512f, no128);
1105 RUN(avx512f, 512);
1106 RUN(avx512bw, all);
1107 RUN(avx512bw, 128);
1108 RUN(avx512cd, all);
1109 RUN(avx512dq, all);
1110 RUN(avx512dq, 128);
1111 RUN(avx512dq, no128);
1112 RUN(avx512dq, 512);
1113 RUN(avx512er, 512);
1114 #define cpu_has_avx512pf cpu_has_avx512f
1115 RUN(avx512pf, 512);
1116 RUN(avx512_4fmaps, 512);
1117 RUN(avx512_4vnniw, 512);
1118 RUN(avx512_bf16, all);
1119 RUN(avx512_bitalg, all);
1120 RUN(avx512_ifma, all);
1121 RUN(avx512_vbmi, all);
1122 RUN(avx512_vbmi2, all);
1123 RUN(avx512_vnni, all);
1124 RUN(avx512_vp2intersect, all);
1125 RUN(avx512_vpopcntdq, all);
1126 RUN(avx512_fp16, all);
1127 RUN(avx512_fp16, 128);
1128
1129 if ( cpu_has_avx512f )
1130 {
1131 RUN(gfni, all);
1132 RUN(vaes, all);
1133 RUN(vpclmulqdq, all);
1134 }
1135 }
1136