1 /*
2 * linux/arch/i386/kernel/i387.c
3 *
4 * Copyright (C) 1994 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * General FPU state handling cleanups
8 * Gareth Hughes <gareth@valinux.com>, May 2000
9 */
10
11 #include <xen/sched.h>
12 #include <asm/current.h>
13 #include <asm/processor.h>
14 #include <asm/hvm/support.h>
15 #include <asm/i387.h>
16 #include <asm/xstate.h>
17 #include <asm/asm_defns.h>
18
19 /*******************************/
20 /* FPU Restore Functions */
21 /*******************************/
22 /* Restore x87 extended state */
fpu_xrstor(struct vcpu * v,uint64_t mask)23 static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
24 {
25 bool ok;
26
27 ASSERT(v->arch.xsave_area);
28 /*
29 * XCR0 normally represents what guest OS set. In case of Xen itself,
30 * we set the accumulated feature mask before doing save/restore.
31 */
32 ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
33 ASSERT(ok);
34 xrstor(v, mask);
35 ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
36 ASSERT(ok);
37 }
38
39 /* Restor x87 FPU, MMX, SSE and SSE2 state */
fpu_fxrstor(struct vcpu * v)40 static inline void fpu_fxrstor(struct vcpu *v)
41 {
42 const typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt;
43
44 /*
45 * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
46 * is pending. Clear the x87 state here by setting it to fixed
47 * values. The hypervisor data segment can be sometimes 0 and
48 * sometimes new user value. Both should be ok. Use the FPU saved
49 * data block as a safe address because it should be in L1.
50 */
51 if ( !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) &&
52 boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
53 {
54 asm volatile ( "fnclex\n\t"
55 "ffree %%st(7)\n\t" /* clear stack tag */
56 "fildl %0" /* load to clear state */
57 : : "m" (*fpu_ctxt) );
58 }
59
60 /*
61 * FXRSTOR can fault if passed a corrupted data block. We handle this
62 * possibility, which may occur if the block was passed to us by control
63 * tools or through VCPUOP_initialise, by silently clearing the block.
64 */
65 switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
66 {
67 default:
68 asm volatile (
69 /* See below for why the operands/constraints are this way. */
70 "1: " REX64_PREFIX "fxrstor (%2)\n"
71 ".section .fixup,\"ax\" \n"
72 "2: push %%"__OP"ax \n"
73 " push %%"__OP"cx \n"
74 " push %%"__OP"di \n"
75 " mov %2,%%"__OP"di \n"
76 " mov %1,%%ecx \n"
77 " xor %%eax,%%eax \n"
78 " rep ; stosl \n"
79 " pop %%"__OP"di \n"
80 " pop %%"__OP"cx \n"
81 " pop %%"__OP"ax \n"
82 " jmp 1b \n"
83 ".previous \n"
84 _ASM_EXTABLE(1b, 2b)
85 :
86 : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4), "R" (fpu_ctxt) );
87 break;
88 case 4: case 2:
89 asm volatile (
90 "1: fxrstor %0 \n"
91 ".section .fixup,\"ax\"\n"
92 "2: push %%"__OP"ax \n"
93 " push %%"__OP"cx \n"
94 " push %%"__OP"di \n"
95 " lea %0,%%"__OP"di \n"
96 " mov %1,%%ecx \n"
97 " xor %%eax,%%eax \n"
98 " rep ; stosl \n"
99 " pop %%"__OP"di \n"
100 " pop %%"__OP"cx \n"
101 " pop %%"__OP"ax \n"
102 " jmp 1b \n"
103 ".previous \n"
104 _ASM_EXTABLE(1b, 2b)
105 :
106 : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
107 break;
108 }
109 }
110
111 /*******************************/
112 /* FPU Save Functions */
113 /*******************************/
114
vcpu_xsave_mask(const struct vcpu * v)115 static inline uint64_t vcpu_xsave_mask(const struct vcpu *v)
116 {
117 if ( v->fpu_dirtied )
118 return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY;
119
120 ASSERT(v->arch.nonlazy_xstate_used);
121
122 /*
123 * The offsets of components which live in the extended region of
124 * compact xsave area are not fixed. Xsave area may be overwritten
125 * when a xsave with v->fpu_dirtied set is followed by one with
126 * v->fpu_dirtied clear.
127 * In such case, if hypervisor uses compact xsave area and guest
128 * has ever used lazy states (checking xcr0_accum excluding
129 * XSTATE_FP_SSE), vcpu_xsave_mask will return XSTATE_ALL. Otherwise
130 * return XSTATE_NONLAZY.
131 */
132 return xstate_all(v) ? XSTATE_ALL : XSTATE_NONLAZY;
133 }
134
135 /* Save x87 extended state */
fpu_xsave(struct vcpu * v)136 static inline void fpu_xsave(struct vcpu *v)
137 {
138 bool ok;
139 uint64_t mask = vcpu_xsave_mask(v);
140
141 ASSERT(mask);
142 ASSERT(v->arch.xsave_area);
143 /*
144 * XCR0 normally represents what guest OS set. In case of Xen itself,
145 * we set the accumulated feature mask before doing save/restore.
146 */
147 ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
148 ASSERT(ok);
149 xsave(v, mask);
150 ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
151 ASSERT(ok);
152 }
153
154 /* Save x87 FPU, MMX, SSE and SSE2 state */
fpu_fxsave(struct vcpu * v)155 static inline void fpu_fxsave(struct vcpu *v)
156 {
157 typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt;
158 unsigned int fip_width = v->domain->arch.x87_fip_width;
159
160 if ( fip_width != 4 )
161 {
162 /*
163 * The only way to force fxsaveq on a wide range of gas versions.
164 * On older versions the rex64 prefix works only if we force an
165 * addressing mode that doesn't require extended registers.
166 */
167 asm volatile ( REX64_PREFIX "fxsave (%1)"
168 : "=m" (*fpu_ctxt) : "R" (fpu_ctxt) );
169
170 /*
171 * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
172 * is pending.
173 */
174 if ( !(fpu_ctxt->fsw & 0x0080) &&
175 boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
176 return;
177
178 /*
179 * If the FIP/FDP[63:32] are both zero, it is safe to use the
180 * 32-bit restore to also restore the selectors.
181 */
182 if ( !fip_width &&
183 !((fpu_ctxt->fip.addr | fpu_ctxt->fdp.addr) >> 32) )
184 {
185 struct ix87_env fpu_env;
186
187 asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
188 fpu_ctxt->fip.sel = fpu_env.fcs;
189 fpu_ctxt->fdp.sel = fpu_env.fds;
190 fip_width = 4;
191 }
192 else
193 fip_width = 8;
194 }
195 else
196 {
197 asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) );
198 fip_width = 4;
199 }
200
201 fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = fip_width;
202 }
203
204 /*******************************/
205 /* VCPU FPU Functions */
206 /*******************************/
207 /* Restore FPU state whenever VCPU is schduled in. */
vcpu_restore_fpu_eager(struct vcpu * v)208 void vcpu_restore_fpu_eager(struct vcpu *v)
209 {
210 ASSERT(!is_idle_vcpu(v));
211
212 /* Restore nonlazy extended state (i.e. parts not tracked by CR0.TS). */
213 if ( !v->arch.nonlazy_xstate_used )
214 return;
215
216 /* Avoid recursion */
217 clts();
218
219 /*
220 * When saving full state even with !v->fpu_dirtied (see vcpu_xsave_mask()
221 * above) we also need to restore full state, to prevent subsequently
222 * saving state belonging to another vCPU.
223 */
224 if ( xstate_all(v) )
225 {
226 fpu_xrstor(v, XSTATE_ALL);
227 v->fpu_initialised = 1;
228 v->fpu_dirtied = 1;
229 }
230 else
231 {
232 fpu_xrstor(v, XSTATE_NONLAZY);
233 stts();
234 }
235 }
236
237 /*
238 * Restore FPU state when #NM is triggered.
239 */
vcpu_restore_fpu_lazy(struct vcpu * v)240 void vcpu_restore_fpu_lazy(struct vcpu *v)
241 {
242 ASSERT(!is_idle_vcpu(v));
243
244 /* Avoid recursion. */
245 clts();
246
247 if ( v->fpu_dirtied )
248 return;
249
250 if ( cpu_has_xsave )
251 fpu_xrstor(v, XSTATE_LAZY);
252 else
253 fpu_fxrstor(v);
254
255 v->fpu_initialised = 1;
256 v->fpu_dirtied = 1;
257 }
258
259 /*
260 * On each context switch, save the necessary FPU info of VCPU being switch
261 * out. It dispatches saving operation based on CPU's capability.
262 */
_vcpu_save_fpu(struct vcpu * v)263 static bool _vcpu_save_fpu(struct vcpu *v)
264 {
265 if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
266 return false;
267
268 ASSERT(!is_idle_vcpu(v));
269
270 /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
271 clts();
272
273 if ( cpu_has_xsave )
274 fpu_xsave(v);
275 else
276 fpu_fxsave(v);
277
278 v->fpu_dirtied = 0;
279
280 return true;
281 }
282
vcpu_save_fpu(struct vcpu * v)283 void vcpu_save_fpu(struct vcpu *v)
284 {
285 _vcpu_save_fpu(v);
286 stts();
287 }
288
save_fpu_enable(void)289 void save_fpu_enable(void)
290 {
291 if ( !_vcpu_save_fpu(current) )
292 clts();
293 }
294
295 /* Initialize FPU's context save area */
vcpu_init_fpu(struct vcpu * v)296 int vcpu_init_fpu(struct vcpu *v)
297 {
298 int rc;
299
300 if ( (rc = xstate_alloc_save_area(v)) != 0 )
301 return rc;
302
303 if ( v->arch.xsave_area )
304 v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
305 else
306 {
307 BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
308 v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
309 __alignof(v->arch.xsave_area->fpu_sse));
310 if ( v->arch.fpu_ctxt )
311 {
312 typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt;
313
314 fpu_sse->fcw = FCW_DEFAULT;
315 fpu_sse->mxcsr = MXCSR_DEFAULT;
316 }
317 else
318 rc = -ENOMEM;
319 }
320
321 return rc;
322 }
323
324 /* Free FPU's context save area */
vcpu_destroy_fpu(struct vcpu * v)325 void vcpu_destroy_fpu(struct vcpu *v)
326 {
327 if ( v->arch.xsave_area )
328 xstate_free_save_area(v);
329 else
330 xfree(v->arch.fpu_ctxt);
331 }
332
333 /*
334 * Local variables:
335 * mode: C
336 * c-file-style: "BSD"
337 * c-basic-offset: 4
338 * tab-width: 4
339 * indent-tabs-mode: nil
340 * End:
341 */
342