1 /*
2  *  linux/arch/i386/kernel/i387.c
3  *
4  *  Copyright (C) 1994 Linus Torvalds
5  *
6  *  Pentium III FXSR, SSE support
7  *  General FPU state handling cleanups
8  *  Gareth Hughes <gareth@valinux.com>, May 2000
9  */
10 
11 #include <xen/sched.h>
12 #include <asm/current.h>
13 #include <asm/processor.h>
14 #include <asm/hvm/support.h>
15 #include <asm/i387.h>
16 #include <asm/xstate.h>
17 #include <asm/asm_defns.h>
18 
19 /*******************************/
20 /*     FPU Restore Functions   */
21 /*******************************/
22 /* Restore x87 extended state */
fpu_xrstor(struct vcpu * v,uint64_t mask)23 static inline void fpu_xrstor(struct vcpu *v, uint64_t mask)
24 {
25     bool ok;
26 
27     ASSERT(v->arch.xsave_area);
28     /*
29      * XCR0 normally represents what guest OS set. In case of Xen itself,
30      * we set the accumulated feature mask before doing save/restore.
31      */
32     ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
33     ASSERT(ok);
34     xrstor(v, mask);
35     ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
36     ASSERT(ok);
37 }
38 
39 /* Restor x87 FPU, MMX, SSE and SSE2 state */
fpu_fxrstor(struct vcpu * v)40 static inline void fpu_fxrstor(struct vcpu *v)
41 {
42     const typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt;
43 
44     /*
45      * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
46      * is pending. Clear the x87 state here by setting it to fixed
47      * values. The hypervisor data segment can be sometimes 0 and
48      * sometimes new user value. Both should be ok. Use the FPU saved
49      * data block as a safe address because it should be in L1.
50      */
51     if ( !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) &&
52          boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
53     {
54         asm volatile ( "fnclex\n\t"
55                        "ffree %%st(7)\n\t" /* clear stack tag */
56                        "fildl %0"          /* load to clear state */
57                        : : "m" (*fpu_ctxt) );
58     }
59 
60     /*
61      * FXRSTOR can fault if passed a corrupted data block. We handle this
62      * possibility, which may occur if the block was passed to us by control
63      * tools or through VCPUOP_initialise, by silently clearing the block.
64      */
65     switch ( __builtin_expect(fpu_ctxt->x[FPU_WORD_SIZE_OFFSET], 8) )
66     {
67     default:
68         asm volatile (
69             /* See below for why the operands/constraints are this way. */
70             "1: " REX64_PREFIX "fxrstor (%2)\n"
71             ".section .fixup,\"ax\"   \n"
72             "2: push %%"__OP"ax       \n"
73             "   push %%"__OP"cx       \n"
74             "   push %%"__OP"di       \n"
75             "   mov  %2,%%"__OP"di    \n"
76             "   mov  %1,%%ecx         \n"
77             "   xor  %%eax,%%eax      \n"
78             "   rep ; stosl           \n"
79             "   pop  %%"__OP"di       \n"
80             "   pop  %%"__OP"cx       \n"
81             "   pop  %%"__OP"ax       \n"
82             "   jmp  1b               \n"
83             ".previous                \n"
84             _ASM_EXTABLE(1b, 2b)
85             :
86             : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4), "R" (fpu_ctxt) );
87         break;
88     case 4: case 2:
89         asm volatile (
90             "1: fxrstor %0         \n"
91             ".section .fixup,\"ax\"\n"
92             "2: push %%"__OP"ax    \n"
93             "   push %%"__OP"cx    \n"
94             "   push %%"__OP"di    \n"
95             "   lea  %0,%%"__OP"di \n"
96             "   mov  %1,%%ecx      \n"
97             "   xor  %%eax,%%eax   \n"
98             "   rep ; stosl        \n"
99             "   pop  %%"__OP"di    \n"
100             "   pop  %%"__OP"cx    \n"
101             "   pop  %%"__OP"ax    \n"
102             "   jmp  1b            \n"
103             ".previous             \n"
104             _ASM_EXTABLE(1b, 2b)
105             :
106             : "m" (*fpu_ctxt), "i" (sizeof(*fpu_ctxt) / 4) );
107         break;
108     }
109 }
110 
111 /*******************************/
112 /*      FPU Save Functions     */
113 /*******************************/
114 
vcpu_xsave_mask(const struct vcpu * v)115 static inline uint64_t vcpu_xsave_mask(const struct vcpu *v)
116 {
117     if ( v->fpu_dirtied )
118         return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY;
119 
120     ASSERT(v->arch.nonlazy_xstate_used);
121 
122     /*
123      * The offsets of components which live in the extended region of
124      * compact xsave area are not fixed. Xsave area may be overwritten
125      * when a xsave with v->fpu_dirtied set is followed by one with
126      * v->fpu_dirtied clear.
127      * In such case, if hypervisor uses compact xsave area and guest
128      * has ever used lazy states (checking xcr0_accum excluding
129      * XSTATE_FP_SSE), vcpu_xsave_mask will return XSTATE_ALL. Otherwise
130      * return XSTATE_NONLAZY.
131      */
132     return xstate_all(v) ? XSTATE_ALL : XSTATE_NONLAZY;
133 }
134 
135 /* Save x87 extended state */
fpu_xsave(struct vcpu * v)136 static inline void fpu_xsave(struct vcpu *v)
137 {
138     bool ok;
139     uint64_t mask = vcpu_xsave_mask(v);
140 
141     ASSERT(mask);
142     ASSERT(v->arch.xsave_area);
143     /*
144      * XCR0 normally represents what guest OS set. In case of Xen itself,
145      * we set the accumulated feature mask before doing save/restore.
146      */
147     ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE);
148     ASSERT(ok);
149     xsave(v, mask);
150     ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE);
151     ASSERT(ok);
152 }
153 
154 /* Save x87 FPU, MMX, SSE and SSE2 state */
fpu_fxsave(struct vcpu * v)155 static inline void fpu_fxsave(struct vcpu *v)
156 {
157     typeof(v->arch.xsave_area->fpu_sse) *fpu_ctxt = v->arch.fpu_ctxt;
158     unsigned int fip_width = v->domain->arch.x87_fip_width;
159 
160     if ( fip_width != 4 )
161     {
162         /*
163          * The only way to force fxsaveq on a wide range of gas versions.
164          * On older versions the rex64 prefix works only if we force an
165          * addressing mode that doesn't require extended registers.
166          */
167         asm volatile ( REX64_PREFIX "fxsave (%1)"
168                        : "=m" (*fpu_ctxt) : "R" (fpu_ctxt) );
169 
170         /*
171          * AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
172          * is pending.
173          */
174         if ( !(fpu_ctxt->fsw & 0x0080) &&
175              boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
176             return;
177 
178         /*
179          * If the FIP/FDP[63:32] are both zero, it is safe to use the
180          * 32-bit restore to also restore the selectors.
181          */
182         if ( !fip_width &&
183              !((fpu_ctxt->fip.addr | fpu_ctxt->fdp.addr) >> 32) )
184         {
185             struct ix87_env fpu_env;
186 
187             asm volatile ( "fnstenv %0" : "=m" (fpu_env) );
188             fpu_ctxt->fip.sel = fpu_env.fcs;
189             fpu_ctxt->fdp.sel = fpu_env.fds;
190             fip_width = 4;
191         }
192         else
193             fip_width = 8;
194     }
195     else
196     {
197         asm volatile ( "fxsave %0" : "=m" (*fpu_ctxt) );
198         fip_width = 4;
199     }
200 
201     fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = fip_width;
202 }
203 
204 /*******************************/
205 /*       VCPU FPU Functions    */
206 /*******************************/
207 /* Restore FPU state whenever VCPU is schduled in. */
vcpu_restore_fpu_eager(struct vcpu * v)208 void vcpu_restore_fpu_eager(struct vcpu *v)
209 {
210     ASSERT(!is_idle_vcpu(v));
211 
212     /* Restore nonlazy extended state (i.e. parts not tracked by CR0.TS). */
213     if ( !v->arch.nonlazy_xstate_used )
214         return;
215 
216     /* Avoid recursion */
217     clts();
218 
219     /*
220      * When saving full state even with !v->fpu_dirtied (see vcpu_xsave_mask()
221      * above) we also need to restore full state, to prevent subsequently
222      * saving state belonging to another vCPU.
223      */
224     if ( xstate_all(v) )
225     {
226         fpu_xrstor(v, XSTATE_ALL);
227         v->fpu_initialised = 1;
228         v->fpu_dirtied = 1;
229     }
230     else
231     {
232         fpu_xrstor(v, XSTATE_NONLAZY);
233         stts();
234     }
235 }
236 
237 /*
238  * Restore FPU state when #NM is triggered.
239  */
vcpu_restore_fpu_lazy(struct vcpu * v)240 void vcpu_restore_fpu_lazy(struct vcpu *v)
241 {
242     ASSERT(!is_idle_vcpu(v));
243 
244     /* Avoid recursion. */
245     clts();
246 
247     if ( v->fpu_dirtied )
248         return;
249 
250     if ( cpu_has_xsave )
251         fpu_xrstor(v, XSTATE_LAZY);
252     else
253         fpu_fxrstor(v);
254 
255     v->fpu_initialised = 1;
256     v->fpu_dirtied = 1;
257 }
258 
259 /*
260  * On each context switch, save the necessary FPU info of VCPU being switch
261  * out. It dispatches saving operation based on CPU's capability.
262  */
_vcpu_save_fpu(struct vcpu * v)263 static bool _vcpu_save_fpu(struct vcpu *v)
264 {
265     if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used )
266         return false;
267 
268     ASSERT(!is_idle_vcpu(v));
269 
270     /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */
271     clts();
272 
273     if ( cpu_has_xsave )
274         fpu_xsave(v);
275     else
276         fpu_fxsave(v);
277 
278     v->fpu_dirtied = 0;
279 
280     return true;
281 }
282 
vcpu_save_fpu(struct vcpu * v)283 void vcpu_save_fpu(struct vcpu *v)
284 {
285     _vcpu_save_fpu(v);
286     stts();
287 }
288 
save_fpu_enable(void)289 void save_fpu_enable(void)
290 {
291     if ( !_vcpu_save_fpu(current) )
292         clts();
293 }
294 
295 /* Initialize FPU's context save area */
vcpu_init_fpu(struct vcpu * v)296 int vcpu_init_fpu(struct vcpu *v)
297 {
298     int rc;
299 
300     if ( (rc = xstate_alloc_save_area(v)) != 0 )
301         return rc;
302 
303     if ( v->arch.xsave_area )
304         v->arch.fpu_ctxt = &v->arch.xsave_area->fpu_sse;
305     else
306     {
307         BUILD_BUG_ON(__alignof(v->arch.xsave_area->fpu_sse) < 16);
308         v->arch.fpu_ctxt = _xzalloc(sizeof(v->arch.xsave_area->fpu_sse),
309                                     __alignof(v->arch.xsave_area->fpu_sse));
310         if ( v->arch.fpu_ctxt )
311         {
312             typeof(v->arch.xsave_area->fpu_sse) *fpu_sse = v->arch.fpu_ctxt;
313 
314             fpu_sse->fcw = FCW_DEFAULT;
315             fpu_sse->mxcsr = MXCSR_DEFAULT;
316         }
317         else
318             rc = -ENOMEM;
319     }
320 
321     return rc;
322 }
323 
324 /* Free FPU's context save area */
vcpu_destroy_fpu(struct vcpu * v)325 void vcpu_destroy_fpu(struct vcpu *v)
326 {
327     if ( v->arch.xsave_area )
328         xstate_free_save_area(v);
329     else
330         xfree(v->arch.fpu_ctxt);
331 }
332 
333 /*
334  * Local variables:
335  * mode: C
336  * c-file-style: "BSD"
337  * c-basic-offset: 4
338  * tab-width: 4
339  * indent-tabs-mode: nil
340  * End:
341  */
342