1 /*
2 * arch/x86/xstate.c
3 *
4 * x86 extended state operations
5 *
6 */
7
8 #include <xen/percpu.h>
9 #include <xen/sched.h>
10 #include <asm/current.h>
11 #include <asm/processor.h>
12 #include <asm/hvm/support.h>
13 #include <asm/i387.h>
14 #include <asm/xstate.h>
15 #include <asm/asm_defns.h>
16
17 /*
18 * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
19 * the supported and enabled features on the processor, including the
20 * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known.
21 */
22 static u32 __read_mostly xsave_cntxt_size;
23
24 /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */
25 u64 __read_mostly xfeature_mask;
26
27 unsigned int *__read_mostly xstate_offsets;
28 unsigned int *__read_mostly xstate_sizes;
29 u64 __read_mostly xstate_align;
30 static unsigned int __read_mostly xstate_features;
31
32 uint32_t __read_mostly mxcsr_mask = 0x0000ffbf;
33
34 /* Cached xcr0 for fast read */
35 static DEFINE_PER_CPU(uint64_t, xcr0);
36
37 /* Because XCR0 is cached for each CPU, xsetbv() is not exposed. Users should
38 * use set_xcr0() instead.
39 */
xsetbv(u32 index,u64 xfeatures)40 static inline bool xsetbv(u32 index, u64 xfeatures)
41 {
42 u32 hi = xfeatures >> 32;
43 u32 lo = (u32)xfeatures;
44
45 asm volatile ( "1: .byte 0x0f,0x01,0xd1\n"
46 "3: \n"
47 ".section .fixup,\"ax\" \n"
48 "2: xor %0,%0 \n"
49 " jmp 3b \n"
50 ".previous \n"
51 _ASM_EXTABLE(1b, 2b)
52 : "+a" (lo)
53 : "c" (index), "d" (hi));
54 return lo != 0;
55 }
56
set_xcr0(u64 xfeatures)57 bool set_xcr0(u64 xfeatures)
58 {
59 if ( !xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures) )
60 return false;
61 this_cpu(xcr0) = xfeatures;
62 return true;
63 }
64
get_xcr0(void)65 uint64_t get_xcr0(void)
66 {
67 return this_cpu(xcr0);
68 }
69
70 /* Cached xss for fast read */
71 static DEFINE_PER_CPU(uint64_t, xss);
72
set_msr_xss(u64 xss)73 void set_msr_xss(u64 xss)
74 {
75 u64 *this_xss = &this_cpu(xss);
76
77 if ( *this_xss != xss )
78 {
79 wrmsrl(MSR_IA32_XSS, xss);
80 *this_xss = xss;
81 }
82 }
83
get_msr_xss(void)84 uint64_t get_msr_xss(void)
85 {
86 return this_cpu(xss);
87 }
88
setup_xstate_features(bool bsp)89 static int setup_xstate_features(bool bsp)
90 {
91 unsigned int leaf, eax, ebx, ecx, edx;
92
93 if ( bsp )
94 {
95 xstate_features = flsl(xfeature_mask);
96 xstate_offsets = xzalloc_array(unsigned int, xstate_features);
97 if ( !xstate_offsets )
98 return -ENOMEM;
99
100 xstate_sizes = xzalloc_array(unsigned int, xstate_features);
101 if ( !xstate_sizes )
102 return -ENOMEM;
103 }
104
105 for ( leaf = 2; leaf < xstate_features; leaf++ )
106 {
107 if ( bsp )
108 {
109 cpuid_count(XSTATE_CPUID, leaf, &xstate_sizes[leaf],
110 &xstate_offsets[leaf], &ecx, &edx);
111 if ( ecx & XSTATE_ALIGN64 )
112 __set_bit(leaf, &xstate_align);
113 }
114 else
115 {
116 cpuid_count(XSTATE_CPUID, leaf, &eax,
117 &ebx, &ecx, &edx);
118 BUG_ON(eax != xstate_sizes[leaf]);
119 BUG_ON(ebx != xstate_offsets[leaf]);
120 BUG_ON(!(ecx & XSTATE_ALIGN64) != !test_bit(leaf, &xstate_align));
121 }
122 }
123
124 return 0;
125 }
126
setup_xstate_comp(uint16_t * comp_offsets,const uint64_t xcomp_bv)127 static void setup_xstate_comp(uint16_t *comp_offsets,
128 const uint64_t xcomp_bv)
129 {
130 unsigned int i;
131 unsigned int offset;
132
133 /*
134 * The FP xstates and SSE xstates are legacy states. They are always
135 * in the fixed offsets in the xsave area in either compacted form
136 * or standard form.
137 */
138 comp_offsets[0] = 0;
139 comp_offsets[1] = XSAVE_SSE_OFFSET;
140
141 comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE;
142
143 offset = comp_offsets[2];
144 for ( i = 2; i < xstate_features; i++ )
145 {
146 if ( (1ul << i) & xcomp_bv )
147 {
148 if ( test_bit(i, &xstate_align) )
149 offset = ROUNDUP(offset, 64);
150 comp_offsets[i] = offset;
151 offset += xstate_sizes[i];
152 }
153 }
154 ASSERT(offset <= xsave_cntxt_size);
155 }
156
157 /*
158 * Serialise a vcpus xsave state into a representation suitable for the
159 * toolstack.
160 *
161 * Internally a vcpus xsave state may be compressed or uncompressed, depending
162 * on the features in use, but the ABI with the toolstack is strictly
163 * uncompressed.
164 *
165 * It is the callers responsibility to ensure that there is xsave state to
166 * serialise, and that the provided buffer is exactly the right size.
167 */
expand_xsave_states(struct vcpu * v,void * dest,unsigned int size)168 void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size)
169 {
170 const struct xsave_struct *xsave = v->arch.xsave_area;
171 const void *src;
172 uint16_t comp_offsets[sizeof(xfeature_mask)*8];
173 u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
174 u64 valid;
175
176 /* Check there is state to serialise (i.e. at least an XSAVE_HDR) */
177 BUG_ON(!v->arch.xcr0_accum);
178 /* Check there is the correct room to decompress into. */
179 BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
180
181 if ( !(xsave->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED) )
182 {
183 memcpy(dest, xsave, size);
184 return;
185 }
186
187 ASSERT(xsave_area_compressed(xsave));
188 setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
189
190 /*
191 * Copy legacy XSAVE area and XSAVE hdr area.
192 */
193 memcpy(dest, xsave, XSTATE_AREA_MIN_SIZE);
194 memset(dest + XSTATE_AREA_MIN_SIZE, 0, size - XSTATE_AREA_MIN_SIZE);
195
196 ((struct xsave_struct *)dest)->xsave_hdr.xcomp_bv = 0;
197
198 /*
199 * Copy each region from the possibly compacted offset to the
200 * non-compacted offset.
201 */
202 src = xsave;
203 valid = xstate_bv & ~XSTATE_FP_SSE;
204 while ( valid )
205 {
206 u64 feature = valid & -valid;
207 unsigned int index = fls(feature) - 1;
208
209 /*
210 * We previously verified xstate_bv. If there isn't valid
211 * comp_offsets[] information, something is very broken.
212 */
213 BUG_ON(!comp_offsets[index]);
214 BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
215
216 memcpy(dest + xstate_offsets[index], src + comp_offsets[index],
217 xstate_sizes[index]);
218
219 valid &= ~feature;
220 }
221 }
222
223 /*
224 * Deserialise a toolstack's xsave state representation suitably for a vcpu.
225 *
226 * Internally a vcpus xsave state may be compressed or uncompressed, depending
227 * on the features in use, but the ABI with the toolstack is strictly
228 * uncompressed.
229 *
230 * It is the callers responsibility to ensure that the source buffer contains
231 * xsave state, is uncompressed, and is exactly the right size.
232 */
compress_xsave_states(struct vcpu * v,const void * src,unsigned int size)233 void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size)
234 {
235 struct xsave_struct *xsave = v->arch.xsave_area;
236 void *dest;
237 uint16_t comp_offsets[sizeof(xfeature_mask)*8];
238 u64 xstate_bv, valid;
239
240 BUG_ON(!v->arch.xcr0_accum);
241 BUG_ON(size != xstate_ctxt_size(v->arch.xcr0_accum));
242 ASSERT(!xsave_area_compressed(src));
243
244 xstate_bv = ((const struct xsave_struct *)src)->xsave_hdr.xstate_bv;
245
246 if ( !(v->arch.xcr0_accum & XSTATE_XSAVES_ONLY) )
247 {
248 memcpy(xsave, src, size);
249 return;
250 }
251
252 /*
253 * Copy legacy XSAVE area, to avoid complications with CPUID
254 * leaves 0 and 1 in the loop below.
255 */
256 memcpy(xsave, src, FXSAVE_SIZE);
257
258 /* Set XSTATE_BV and XCOMP_BV. */
259 xsave->xsave_hdr.xstate_bv = xstate_bv;
260 xsave->xsave_hdr.xcomp_bv = v->arch.xcr0_accum | XSTATE_COMPACTION_ENABLED;
261
262 setup_xstate_comp(comp_offsets, xsave->xsave_hdr.xcomp_bv);
263
264 /*
265 * Copy each region from the non-compacted offset to the
266 * possibly compacted offset.
267 */
268 dest = xsave;
269 valid = xstate_bv & ~XSTATE_FP_SSE;
270 while ( valid )
271 {
272 u64 feature = valid & -valid;
273 unsigned int index = fls(feature) - 1;
274
275 /*
276 * We previously verified xstate_bv. If we don't have valid
277 * comp_offset[] information, something is very broken.
278 */
279 BUG_ON(!comp_offsets[index]);
280 BUG_ON((xstate_offsets[index] + xstate_sizes[index]) > size);
281
282 memcpy(dest + comp_offsets[index], src + xstate_offsets[index],
283 xstate_sizes[index]);
284
285 valid &= ~feature;
286 }
287 }
288
xsave(struct vcpu * v,uint64_t mask)289 void xsave(struct vcpu *v, uint64_t mask)
290 {
291 struct xsave_struct *ptr = v->arch.xsave_area;
292 uint32_t hmask = mask >> 32;
293 uint32_t lmask = mask;
294 unsigned int fip_width = v->domain->arch.x87_fip_width;
295 #define XSAVE(pfx) \
296 if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
297 asm volatile ( ".byte " pfx "0x0f,0xc7,0x2f\n" /* xsaves */ \
298 : "=m" (*ptr) \
299 : "a" (lmask), "d" (hmask), "D" (ptr) ); \
300 else \
301 alternative_io(".byte " pfx "0x0f,0xae,0x27\n", /* xsave */ \
302 ".byte " pfx "0x0f,0xae,0x37\n", /* xsaveopt */ \
303 X86_FEATURE_XSAVEOPT, \
304 "=m" (*ptr), \
305 "a" (lmask), "d" (hmask), "D" (ptr))
306
307 if ( fip_width == 8 || !(mask & XSTATE_FP) )
308 {
309 XSAVE("0x48,");
310 }
311 else if ( fip_width == 4 )
312 {
313 XSAVE("");
314 }
315 else
316 {
317 /*
318 * FIP/FDP may not be written in some cases (e.g., if XSAVEOPT/XSAVES
319 * is used, or on AMD CPUs if an exception isn't pending).
320 *
321 * To tell if the hardware writes these fields, poison the FIP field.
322 * The poison is
323 * a) non-canonical
324 * b) non-zero for the reserved part of a 32-bit FCS:FIP
325 * c) random with a vanishingly small probability to match a value the
326 * hardware may write (1e-19) even if it did not canonicalize the
327 * 64-bit FIP or zero-extend the 16-bit FCS.
328 */
329 uint64_t orig_fip = ptr->fpu_sse.fip.addr;
330 const uint64_t bad_fip = 0x6a3f5c4b13a533f6;
331
332 ptr->fpu_sse.fip.addr = bad_fip;
333
334 XSAVE("0x48,");
335
336 /* FIP/FDP not updated? Restore the old FIP value. */
337 if ( ptr->fpu_sse.fip.addr == bad_fip )
338 {
339 ptr->fpu_sse.fip.addr = orig_fip;
340 return;
341 }
342
343 /*
344 * If the FIP/FDP[63:32] are both zero, it is safe to use the
345 * 32-bit restore to also restore the selectors.
346 */
347 if ( !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) )
348 {
349 struct ix87_env fpu_env;
350
351 asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
352 ptr->fpu_sse.fip.sel = fpu_env.fcs;
353 ptr->fpu_sse.fdp.sel = fpu_env.fds;
354 fip_width = 4;
355 }
356 else
357 fip_width = 8;
358 }
359 #undef XSAVE
360 if ( mask & XSTATE_FP )
361 ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] = fip_width;
362 }
363
xrstor(struct vcpu * v,uint64_t mask)364 void xrstor(struct vcpu *v, uint64_t mask)
365 {
366 uint32_t hmask = mask >> 32;
367 uint32_t lmask = mask;
368 struct xsave_struct *ptr = v->arch.xsave_area;
369 unsigned int faults, prev_faults;
370
371 /*
372 * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
373 * is pending. Clear the x87 state here by setting it to fixed
374 * values. The hypervisor data segment can be sometimes 0 and
375 * sometimes new user value. Both should be ok. Use the FPU saved
376 * data block as a safe address because it should be in L1.
377 */
378 if ( (mask & ptr->xsave_hdr.xstate_bv & XSTATE_FP) &&
379 !(ptr->fpu_sse.fsw & ~ptr->fpu_sse.fcw & 0x003f) &&
380 boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
381 asm volatile ( "fnclex\n\t" /* clear exceptions */
382 "ffree %%st(7)\n\t" /* clear stack tag */
383 "fildl %0" /* load to clear state */
384 : : "m" (ptr->fpu_sse) );
385
386 /*
387 * XRSTOR can fault if passed a corrupted data block. We handle this
388 * possibility, which may occur if the block was passed to us by control
389 * tools or through VCPUOP_initialise, by silently adjusting state.
390 */
391 for ( prev_faults = faults = 0; ; prev_faults = faults )
392 {
393 switch ( __builtin_expect(ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET], 8) )
394 {
395 BUILD_BUG_ON(sizeof(faults) != 4); /* Clang doesn't support %z in asm. */
396 #define _xrstor(insn) \
397 asm volatile ( "1: .byte " insn "\n" \
398 "3:\n" \
399 " .section .fixup,\"ax\"\n" \
400 "2: incl %[faults]\n" \
401 " jmp 3b\n" \
402 " .previous\n" \
403 _ASM_EXTABLE(1b, 2b) \
404 : [mem] "+m" (*ptr), [faults] "+g" (faults) \
405 : [lmask] "a" (lmask), [hmask] "d" (hmask), \
406 [ptr] "D" (ptr) )
407
408 #define XRSTOR(pfx) \
409 if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY ) \
410 { \
411 if ( unlikely(!(ptr->xsave_hdr.xcomp_bv & \
412 XSTATE_COMPACTION_ENABLED)) ) \
413 { \
414 ASSERT(!ptr->xsave_hdr.xcomp_bv); \
415 ptr->xsave_hdr.xcomp_bv = ptr->xsave_hdr.xstate_bv | \
416 XSTATE_COMPACTION_ENABLED; \
417 } \
418 _xrstor(pfx "0x0f,0xc7,0x1f"); /* xrstors */ \
419 } \
420 else \
421 _xrstor(pfx "0x0f,0xae,0x2f") /* xrstor */
422
423 default:
424 XRSTOR("0x48,");
425 break;
426 case 4: case 2:
427 XRSTOR("");
428 break;
429 #undef XRSTOR
430 #undef _xrstor
431 }
432 if ( likely(faults == prev_faults) )
433 break;
434 #ifndef NDEBUG
435 gprintk(XENLOG_WARNING, "fault#%u: mxcsr=%08x\n",
436 faults, ptr->fpu_sse.mxcsr);
437 gprintk(XENLOG_WARNING, "xs=%016lx xc=%016lx\n",
438 ptr->xsave_hdr.xstate_bv, ptr->xsave_hdr.xcomp_bv);
439 gprintk(XENLOG_WARNING, "r0=%016lx r1=%016lx\n",
440 ptr->xsave_hdr.reserved[0], ptr->xsave_hdr.reserved[1]);
441 gprintk(XENLOG_WARNING, "r2=%016lx r3=%016lx\n",
442 ptr->xsave_hdr.reserved[2], ptr->xsave_hdr.reserved[3]);
443 gprintk(XENLOG_WARNING, "r4=%016lx r5=%016lx\n",
444 ptr->xsave_hdr.reserved[4], ptr->xsave_hdr.reserved[5]);
445 #endif
446 switch ( faults )
447 {
448 case 1: /* Stage 1: Reset state to be loaded. */
449 ptr->xsave_hdr.xstate_bv &= ~mask;
450 /*
451 * Also try to eliminate fault reasons, even if this shouldn't be
452 * needed here (other code should ensure the sanity of the data).
453 */
454 if ( ((mask & XSTATE_SSE) ||
455 ((mask & XSTATE_YMM) &&
456 !(ptr->xsave_hdr.xcomp_bv & XSTATE_COMPACTION_ENABLED))) )
457 ptr->fpu_sse.mxcsr &= mxcsr_mask;
458 if ( v->arch.xcr0_accum & XSTATE_XSAVES_ONLY )
459 {
460 ptr->xsave_hdr.xcomp_bv &= this_cpu(xcr0) | this_cpu(xss);
461 ptr->xsave_hdr.xstate_bv &= ptr->xsave_hdr.xcomp_bv;
462 ptr->xsave_hdr.xcomp_bv |= XSTATE_COMPACTION_ENABLED;
463 }
464 else
465 {
466 ptr->xsave_hdr.xstate_bv &= this_cpu(xcr0);
467 ptr->xsave_hdr.xcomp_bv = 0;
468 }
469 memset(ptr->xsave_hdr.reserved, 0, sizeof(ptr->xsave_hdr.reserved));
470 continue;
471
472 case 2: /* Stage 2: Reset all state. */
473 ptr->fpu_sse.mxcsr = MXCSR_DEFAULT;
474 ptr->xsave_hdr.xstate_bv = 0;
475 ptr->xsave_hdr.xcomp_bv = v->arch.xcr0_accum & XSTATE_XSAVES_ONLY
476 ? XSTATE_COMPACTION_ENABLED : 0;
477 continue;
478 }
479
480 domain_crash(current->domain);
481 return;
482 }
483 }
484
xsave_enabled(const struct vcpu * v)485 bool xsave_enabled(const struct vcpu *v)
486 {
487 if ( !cpu_has_xsave )
488 return false;
489
490 ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE);
491 ASSERT(v->arch.xsave_area);
492
493 return !!v->arch.xcr0_accum;
494 }
495
xstate_alloc_save_area(struct vcpu * v)496 int xstate_alloc_save_area(struct vcpu *v)
497 {
498 struct xsave_struct *save_area;
499 unsigned int size;
500
501 if ( !cpu_has_xsave )
502 return 0;
503
504 if ( !is_idle_vcpu(v) || !cpu_has_xsavec )
505 {
506 size = xsave_cntxt_size;
507 BUG_ON(size < XSTATE_AREA_MIN_SIZE);
508 }
509 else
510 {
511 /*
512 * For idle vcpus on XSAVEC-capable CPUs allocate an area large
513 * enough to save any individual extended state.
514 */
515 unsigned int i;
516
517 for ( size = 0, i = 2; i < xstate_features; ++i )
518 if ( size < xstate_sizes[i] )
519 size = xstate_sizes[i];
520 size += XSTATE_AREA_MIN_SIZE;
521 }
522
523 /* XSAVE/XRSTOR requires the save area be 64-byte-boundary aligned. */
524 BUILD_BUG_ON(__alignof(*save_area) < 64);
525 save_area = _xzalloc(size, __alignof(*save_area));
526 if ( save_area == NULL )
527 return -ENOMEM;
528
529 /*
530 * Set the memory image to default values, but don't force the context
531 * to be loaded from memory (i.e. keep save_area->xsave_hdr.xstate_bv
532 * clear).
533 */
534 save_area->fpu_sse.fcw = FCW_DEFAULT;
535 save_area->fpu_sse.mxcsr = MXCSR_DEFAULT;
536
537 v->arch.xsave_area = save_area;
538 v->arch.xcr0 = 0;
539 v->arch.xcr0_accum = 0;
540
541 return 0;
542 }
543
xstate_free_save_area(struct vcpu * v)544 void xstate_free_save_area(struct vcpu *v)
545 {
546 xfree(v->arch.xsave_area);
547 v->arch.xsave_area = NULL;
548 }
549
_xstate_ctxt_size(u64 xcr0)550 static unsigned int _xstate_ctxt_size(u64 xcr0)
551 {
552 u64 act_xcr0 = get_xcr0();
553 u32 eax, ebx = 0, ecx, edx;
554 bool ok = set_xcr0(xcr0);
555
556 ASSERT(ok);
557 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
558 ASSERT(ebx <= ecx);
559 ok = set_xcr0(act_xcr0);
560 ASSERT(ok);
561
562 return ebx;
563 }
564
565 /* Fastpath for common xstate size requests, avoiding reloads of xcr0. */
xstate_ctxt_size(u64 xcr0)566 unsigned int xstate_ctxt_size(u64 xcr0)
567 {
568 if ( xcr0 == xfeature_mask )
569 return xsave_cntxt_size;
570
571 if ( xcr0 == 0 )
572 return 0;
573
574 return _xstate_ctxt_size(xcr0);
575 }
576
577 /* Collect the information of processor's extended state */
xstate_init(struct cpuinfo_x86 * c)578 void xstate_init(struct cpuinfo_x86 *c)
579 {
580 static bool __initdata use_xsave = true;
581 boolean_param("xsave", use_xsave);
582
583 bool bsp = c == &boot_cpu_data;
584 u32 eax, ebx, ecx, edx;
585 u64 feature_mask;
586
587 if ( (bsp && !use_xsave) ||
588 boot_cpu_data.cpuid_level < XSTATE_CPUID )
589 {
590 BUG_ON(!bsp);
591 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
592 return;
593 }
594
595 cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
596
597 BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE);
598 BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE));
599 feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK;
600
601 /*
602 * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size.
603 */
604 set_in_cr4(X86_CR4_OSXSAVE);
605 if ( !set_xcr0(feature_mask) )
606 BUG();
607
608 if ( bsp )
609 {
610 static typeof(current->arch.xsave_area->fpu_sse) __initdata ctxt;
611
612 xfeature_mask = feature_mask;
613 /*
614 * xsave_cntxt_size is the max size required by enabled features.
615 * We know FP/SSE and YMM about eax, and nothing about edx at present.
616 */
617 xsave_cntxt_size = _xstate_ctxt_size(feature_mask);
618 printk("xstate: size: %#x and states: %#"PRIx64"\n",
619 xsave_cntxt_size, xfeature_mask);
620
621 asm ( "fxsave %0" : "=m" (ctxt) );
622 if ( ctxt.mxcsr_mask )
623 mxcsr_mask = ctxt.mxcsr_mask;
624 }
625 else
626 {
627 BUG_ON(xfeature_mask != feature_mask);
628 BUG_ON(xsave_cntxt_size != _xstate_ctxt_size(feature_mask));
629 }
630
631 /* Check extended XSAVE features. */
632 cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
633
634 /* Mask out features not currently understood by Xen. */
635 eax &= (cpufeat_mask(X86_FEATURE_XSAVEOPT) |
636 cpufeat_mask(X86_FEATURE_XSAVEC) |
637 cpufeat_mask(X86_FEATURE_XGETBV1) |
638 cpufeat_mask(X86_FEATURE_XSAVES));
639
640 c->x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)] = eax;
641
642 BUG_ON(eax != boot_cpu_data.x86_capability[cpufeat_word(X86_FEATURE_XSAVEOPT)]);
643
644 if ( setup_xstate_features(bsp) && bsp )
645 BUG();
646 }
647
valid_xcr0(u64 xcr0)648 static bool valid_xcr0(u64 xcr0)
649 {
650 /* FP must be unconditionally set. */
651 if ( !(xcr0 & XSTATE_FP) )
652 return false;
653
654 /* YMM depends on SSE. */
655 if ( (xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE) )
656 return false;
657
658 if ( xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) )
659 {
660 /* OPMASK, ZMM, and HI_ZMM require YMM. */
661 if ( !(xcr0 & XSTATE_YMM) )
662 return false;
663
664 /* OPMASK, ZMM, and HI_ZMM must be the same. */
665 if ( ~xcr0 & (XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM) )
666 return false;
667 }
668
669 /* BNDREGS and BNDCSR must be the same. */
670 return !(xcr0 & XSTATE_BNDREGS) == !(xcr0 & XSTATE_BNDCSR);
671 }
672
validate_xstate(u64 xcr0,u64 xcr0_accum,const struct xsave_hdr * hdr)673 int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr)
674 {
675 unsigned int i;
676
677 if ( (hdr->xstate_bv & ~xcr0_accum) ||
678 (xcr0 & ~xcr0_accum) ||
679 !valid_xcr0(xcr0) ||
680 !valid_xcr0(xcr0_accum) )
681 return -EINVAL;
682
683 if ( (xcr0_accum & ~xfeature_mask) ||
684 hdr->xcomp_bv )
685 return -EOPNOTSUPP;
686
687 for ( i = 0; i < ARRAY_SIZE(hdr->reserved); ++i )
688 if ( hdr->reserved[i] )
689 return -EIO;
690
691 return 0;
692 }
693
handle_xsetbv(u32 index,u64 new_bv)694 int handle_xsetbv(u32 index, u64 new_bv)
695 {
696 struct vcpu *curr = current;
697 u64 mask;
698
699 if ( index != XCR_XFEATURE_ENABLED_MASK )
700 return -EOPNOTSUPP;
701
702 if ( (new_bv & ~xfeature_mask) || !valid_xcr0(new_bv) )
703 return -EINVAL;
704
705 /* XCR0.PKRU is disabled on PV mode. */
706 if ( is_pv_vcpu(curr) && (new_bv & XSTATE_PKRU) )
707 return -EOPNOTSUPP;
708
709 if ( !set_xcr0(new_bv) )
710 return -EFAULT;
711
712 mask = new_bv & ~curr->arch.xcr0_accum;
713 curr->arch.xcr0 = new_bv;
714 curr->arch.xcr0_accum |= new_bv;
715
716 /* LWP sets nonlazy_xstate_used independently. */
717 if ( new_bv & (XSTATE_NONLAZY & ~XSTATE_LWP) )
718 curr->arch.nonlazy_xstate_used = 1;
719
720 mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY;
721 if ( mask )
722 {
723 unsigned long cr0 = read_cr0();
724
725 clts();
726 if ( curr->fpu_dirtied )
727 asm ( "stmxcsr %0" : "=m" (curr->arch.xsave_area->fpu_sse.mxcsr) );
728 else if ( xstate_all(curr) )
729 {
730 /* See the comment in i387.c:vcpu_restore_fpu_eager(). */
731 mask |= XSTATE_LAZY;
732 curr->fpu_initialised = 1;
733 curr->fpu_dirtied = 1;
734 cr0 &= ~X86_CR0_TS;
735 }
736 xrstor(curr, mask);
737 if ( cr0 & X86_CR0_TS )
738 write_cr0(cr0);
739 }
740
741 return 0;
742 }
743
read_bndcfgu(void)744 uint64_t read_bndcfgu(void)
745 {
746 unsigned long cr0 = read_cr0();
747 struct xsave_struct *xstate
748 = idle_vcpu[smp_processor_id()]->arch.xsave_area;
749 const struct xstate_bndcsr *bndcsr;
750
751 ASSERT(cpu_has_mpx);
752 clts();
753
754 if ( cpu_has_xsavec )
755 {
756 asm ( ".byte 0x0f,0xc7,0x27\n" /* xsavec */
757 : "=m" (*xstate)
758 : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) );
759
760 bndcsr = (void *)(xstate + 1);
761 }
762 else
763 {
764 asm ( ".byte 0x0f,0xae,0x27\n" /* xsave */
765 : "=m" (*xstate)
766 : "a" (XSTATE_BNDCSR), "d" (0), "D" (xstate) );
767
768 bndcsr = (void *)xstate + xstate_offsets[_XSTATE_BNDCSR];
769 }
770
771 if ( cr0 & X86_CR0_TS )
772 write_cr0(cr0);
773
774 return xstate->xsave_hdr.xstate_bv & XSTATE_BNDCSR ? bndcsr->bndcfgu : 0;
775 }
776
xstate_set_init(uint64_t mask)777 void xstate_set_init(uint64_t mask)
778 {
779 unsigned long cr0 = read_cr0();
780 unsigned long xcr0 = this_cpu(xcr0);
781 struct vcpu *v = idle_vcpu[smp_processor_id()];
782 struct xsave_struct *xstate = v->arch.xsave_area;
783
784 if ( ~xfeature_mask & mask )
785 {
786 ASSERT_UNREACHABLE();
787 return;
788 }
789
790 if ( (~xcr0 & mask) && !set_xcr0(xcr0 | mask) )
791 return;
792
793 clts();
794
795 memset(&xstate->xsave_hdr, 0, sizeof(xstate->xsave_hdr));
796 xrstor(v, mask);
797
798 if ( cr0 & X86_CR0_TS )
799 write_cr0(cr0);
800
801 if ( (~xcr0 & mask) && !set_xcr0(xcr0) )
802 BUG();
803 }
804
805 /*
806 * Local variables:
807 * mode: C
808 * c-file-style: "BSD"
809 * c-basic-offset: 4
810 * tab-width: 4
811 * indent-tabs-mode: nil
812 * End:
813 */
814