1 /*
2 * Copyright (c) 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 
24 #include <lk/trace.h>
25 #include <lk/bits.h>
26 #include <arch/x86.h>
27 #include <arch/x86/feature.h>
28 #include <arch/fpu.h>
29 #include <string.h>
30 #include <kernel/thread.h>
31 
32 #define LOCAL_TRACE 0
33 
34 #if X86_WITH_FPU
35 
36 #define FPU_MASK_ALL_EXCEPTIONS 1
37 
38 /* CPUID EAX = 1 return values */
39 static bool fp_supported;
40 
41 /* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
42 static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
43 
44 /* saved copy of some feature bits */
45 typedef struct {
46     bool with_fpu;
47     bool with_sse;
48     bool with_sse2;
49     bool with_sse3;
50     bool with_ssse3;
51     bool with_sse4_1;
52     bool with_sse4_2;
53     bool with_sse4a;
54     bool with_fxsave;
55     bool with_xsave;
56 
57     bool with_xsaveopt;
58     bool with_xsavec;
59     bool with_xsaves;
60 } fpu_features_t;
61 
62 static fpu_features_t fpu_features;
63 
disable_fpu(void)64 static void disable_fpu(void) {
65     x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
66 }
67 
enable_fpu(void)68 static void enable_fpu(void) {
69     x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);
70 }
71 
72 /* called per cpu as they're brought up */
x86_fpu_early_init_percpu(void)73 void x86_fpu_early_init_percpu(void) {
74     if (!fp_supported) {
75         return;
76     }
77 
78     /* No x87 emul, monitor co-processor */
79     ulong x = x86_get_cr0();
80     x &= ~X86_CR0_EM;
81     x |= X86_CR0_NE;
82     x |= X86_CR0_MP;
83     x86_set_cr0(x);
84 
85     /* Init x87 */
86     uint16_t fcw;
87     __asm__ __volatile__ ("finit");
88     __asm__ __volatile__("fstcw %0" : "=m" (fcw));
89 #if FPU_MASK_ALL_EXCEPTIONS
90     /* mask all exceptions */
91     fcw |= 0x3f;
92 #else
93     /* unmask all exceptions */
94     fcw &= 0xffc0;
95 #endif
96     __asm__ __volatile__("fldcw %0" : : "m" (fcw));
97 
98     /* Init SSE */
99     x = x86_get_cr4();
100     x |= X86_CR4_OSXMMEXPT; // supports exceptions
101     x |= X86_CR4_OSFXSR;    // supports fxsave
102     x &= ~X86_CR4_OSXSAVE;  // no support for xsave (currently)
103     x86_set_cr4(x);
104 
105     uint32_t mxcsr;
106     __asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
107 #if FPU_MASK_ALL_EXCEPTIONS
108     /* mask all exceptions */
109     mxcsr = (0x3f << 7);
110 #else
111     /* unmask all exceptions */
112     mxcsr &= 0x0000003f;
113 #endif
114     __asm__ __volatile__("ldmxcsr %0" : : "m" (mxcsr));
115 
116     /* save fpu initial states, and used when new thread creates */
117     __asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
118 
119     enable_fpu();
120 }
121 
122 /* called on the first cpu before the kernel is initialized. printfs may not work here */
x86_fpu_early_init(void)123 void x86_fpu_early_init(void) {
124     fp_supported = false;
125 
126     // test a bunch of fpu features
127     fpu_features.with_fpu = x86_feature_test(X86_FEATURE_FPU);
128     fpu_features.with_sse = x86_feature_test(X86_FEATURE_SSE);
129     fpu_features.with_sse2 = x86_feature_test(X86_FEATURE_SSE2);
130     fpu_features.with_sse3 = x86_feature_test(X86_FEATURE_SSE3);
131     fpu_features.with_ssse3 = x86_feature_test(X86_FEATURE_SSSE3);
132     fpu_features.with_sse4_1 = x86_feature_test(X86_FEATURE_SSE4_1);
133     fpu_features.with_sse4_2 = x86_feature_test(X86_FEATURE_SSE4_2);
134     fpu_features.with_sse4a = x86_feature_test(X86_FEATURE_SSE4A);
135     fpu_features.with_fxsave = x86_feature_test(X86_FEATURE_FXSR);
136     fpu_features.with_xsave = x86_feature_test(X86_FEATURE_XSAVE);
137 
138     // these are the mandatory ones to continue (for the moment)
139     if (!fpu_features.with_fpu || !fpu_features.with_sse || !fpu_features.with_fxsave) {
140         return;
141     }
142 
143     fp_supported = true;
144 
145     // detect and save some xsave information
146     // NOTE: currently unused
147     fpu_features.with_xsaveopt = false;
148     fpu_features.with_xsavec = false;
149     fpu_features.with_xsaves = false;
150     if (fpu_features.with_xsave) {
151         LTRACEF("X86: XSAVE detected\n");
152         struct x86_cpuid_leaf leaf;
153         if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
154             fpu_features.with_xsaveopt = BIT(leaf.a, 0);
155             fpu_features.with_xsavec = BIT(leaf.a, 1);
156             fpu_features.with_xsaves = BIT(leaf.a, 3);
157             LTRACEF("xsaveopt %u xsavec %u xsaves %u\n", fpu_features.with_xsaveopt, fpu_features.with_xsavec, fpu_features.with_xsaves);
158             LTRACEF("xsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
159         }
160         if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
161             LTRACEF("xsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
162         }
163 
164         for (int i = 2; i < 64; i++) {
165             if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
166                 if (leaf.a > 0) {
167                     LTRACEF("xsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
168                     LTRACEF("\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
169                 }
170             }
171         }
172     }
173 }
174 
x86_fpu_init(void)175 void x86_fpu_init(void) {
176     dprintf(SPEW, "X86: fpu %u sse %u sse2 %u sse3 %u ssse3 %u sse4.1 %u sse4.2 %u sse4a %u\n",
177             fpu_features.with_fpu, fpu_features.with_sse, fpu_features.with_sse2,
178             fpu_features.with_sse3, fpu_features.with_ssse3, fpu_features.with_sse4_1,
179             fpu_features.with_sse4_2, fpu_features.with_sse4a);
180     dprintf(SPEW, "X86: fxsave %u xsave %u\n", fpu_features.with_fxsave, fpu_features.with_xsave);
181 
182     if (!fp_supported) {
183         dprintf(SPEW, "no usable FPU detected (requires SSE + FXSAVE)\n");
184     }
185 
186     if (fpu_features.with_fxsave) {
187         dprintf(SPEW, "X86: FXSAVE detected\n");
188     }
189 
190     if (fpu_features.with_xsave) {
191         dprintf(SPEW, "X86: XSAVE detected\n");
192         dprintf(SPEW, "\txsaveopt %u xsavec %u xsaves %u\n", fpu_features.with_xsaveopt, fpu_features.with_xsavec, fpu_features.with_xsaves);
193 
194         struct x86_cpuid_leaf leaf;
195         if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
196             dprintf(SPEW, "\txsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
197         }
198         if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
199             dprintf(SPEW, "\txsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
200         }
201 
202         for (int i = 2; i < 64; i++) {
203             if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
204                 if (leaf.a > 0) {
205                     dprintf(SPEW, "\txsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
206                     dprintf(SPEW, "\t\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
207                 }
208             }
209         }
210     }
211 
212 }
213 
fpu_init_thread_states(thread_t * t)214 void fpu_init_thread_states(thread_t *t) {
215     t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16);
216     memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
217 }
218 
fpu_context_switch(thread_t * old_thread,thread_t * new_thread)219 void fpu_context_switch(thread_t *old_thread, thread_t *new_thread) {
220     if (!fp_supported)
221         return;
222 
223     DEBUG_ASSERT(old_thread != new_thread);
224 
225     LTRACEF("cpu %u old %p new %p\n", arch_curr_cpu_num(), old_thread, new_thread);
226     LTRACEF("old fpu_states %p new fpu_states %p\n",
227             old_thread->arch.fpu_states, new_thread->arch.fpu_states);
228 
229     // TODO: use the appropriate versions of fpu state save/restore based on the
230     // features of the CPU. For the moment, we assume that the CPU supports
231     // FXSAVE and that the threads have been initialized with FXSAVE state.
232 
233     // save the old thread's fpu state if it has one and restore the new thread's
234     // fpu state if it has one. Remember if the old thread had a valid FPU state
235     // so that we can enable the FPU if it was disabled.
236     bool old_fpu_enabled = false;
237     if (likely(old_thread->arch.fpu_states)) {
238         __asm__ __volatile__("fxsave %0" : "=m" (*old_thread->arch.fpu_states));
239         old_fpu_enabled = true;
240     }
241     if (likely(new_thread->arch.fpu_states)) {
242         if (!old_fpu_enabled) {
243             enable_fpu();
244         }
245         __asm__ __volatile__("fxrstor %0" : : "m" (*new_thread->arch.fpu_states));
246     } else {
247         // if switching to a thread that does not have FPU state, disable the FPU.
248         disable_fpu();
249     }
250 }
251 
fpu_dev_na_handler(void)252 void fpu_dev_na_handler(void) {
253     TRACEF("cpu %u\n", arch_curr_cpu_num());
254 
255     panic("FPU not available on this CPU\n");
256 }
257 #endif
258