1 /*
2 * Copyright (c) 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files
6 * (the "Software"), to deal in the Software without restriction,
7 * including without limitation the rights to use, copy, modify, merge,
8 * publish, distribute, sublicense, and/or sell copies of the Software,
9 * and to permit persons to whom the Software is furnished to do so,
10 * subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include <lk/trace.h>
25 #include <lk/bits.h>
26 #include <arch/x86.h>
27 #include <arch/x86/feature.h>
28 #include <arch/fpu.h>
29 #include <string.h>
30 #include <kernel/thread.h>
31
32 #define LOCAL_TRACE 0
33
34 #if X86_WITH_FPU
35
36 #define FPU_MASK_ALL_EXCEPTIONS 1
37
38 /* CPUID EAX = 1 return values */
39 static bool fp_supported;
40
41 /* FXSAVE area comprises 512 bytes starting with 16-byte aligned */
42 static uint8_t __ALIGNED(16) fpu_init_states[512]= {0};
43
44 /* saved copy of some feature bits */
45 typedef struct {
46 bool with_fpu;
47 bool with_sse;
48 bool with_sse2;
49 bool with_sse3;
50 bool with_ssse3;
51 bool with_sse4_1;
52 bool with_sse4_2;
53 bool with_sse4a;
54 bool with_fxsave;
55 bool with_xsave;
56
57 bool with_xsaveopt;
58 bool with_xsavec;
59 bool with_xsaves;
60 } fpu_features_t;
61
62 static fpu_features_t fpu_features;
63
disable_fpu(void)64 static void disable_fpu(void) {
65 x86_set_cr0(x86_get_cr0() | X86_CR0_TS);
66 }
67
enable_fpu(void)68 static void enable_fpu(void) {
69 x86_set_cr0(x86_get_cr0() & ~X86_CR0_TS);
70 }
71
72 /* called per cpu as they're brought up */
x86_fpu_early_init_percpu(void)73 void x86_fpu_early_init_percpu(void) {
74 if (!fp_supported) {
75 return;
76 }
77
78 /* No x87 emul, monitor co-processor */
79 ulong x = x86_get_cr0();
80 x &= ~X86_CR0_EM;
81 x |= X86_CR0_NE;
82 x |= X86_CR0_MP;
83 x86_set_cr0(x);
84
85 /* Init x87 */
86 uint16_t fcw;
87 __asm__ __volatile__ ("finit");
88 __asm__ __volatile__("fstcw %0" : "=m" (fcw));
89 #if FPU_MASK_ALL_EXCEPTIONS
90 /* mask all exceptions */
91 fcw |= 0x3f;
92 #else
93 /* unmask all exceptions */
94 fcw &= 0xffc0;
95 #endif
96 __asm__ __volatile__("fldcw %0" : : "m" (fcw));
97
98 /* Init SSE */
99 x = x86_get_cr4();
100 x |= X86_CR4_OSXMMEXPT; // supports exceptions
101 x |= X86_CR4_OSFXSR; // supports fxsave
102 x &= ~X86_CR4_OSXSAVE; // no support for xsave (currently)
103 x86_set_cr4(x);
104
105 uint32_t mxcsr;
106 __asm__ __volatile__("stmxcsr %0" : "=m" (mxcsr));
107 #if FPU_MASK_ALL_EXCEPTIONS
108 /* mask all exceptions */
109 mxcsr = (0x3f << 7);
110 #else
111 /* unmask all exceptions */
112 mxcsr &= 0x0000003f;
113 #endif
114 __asm__ __volatile__("ldmxcsr %0" : : "m" (mxcsr));
115
116 /* save fpu initial states, and used when new thread creates */
117 __asm__ __volatile__("fxsave %0" : "=m" (fpu_init_states));
118
119 enable_fpu();
120 }
121
122 /* called on the first cpu before the kernel is initialized. printfs may not work here */
x86_fpu_early_init(void)123 void x86_fpu_early_init(void) {
124 fp_supported = false;
125
126 // test a bunch of fpu features
127 fpu_features.with_fpu = x86_feature_test(X86_FEATURE_FPU);
128 fpu_features.with_sse = x86_feature_test(X86_FEATURE_SSE);
129 fpu_features.with_sse2 = x86_feature_test(X86_FEATURE_SSE2);
130 fpu_features.with_sse3 = x86_feature_test(X86_FEATURE_SSE3);
131 fpu_features.with_ssse3 = x86_feature_test(X86_FEATURE_SSSE3);
132 fpu_features.with_sse4_1 = x86_feature_test(X86_FEATURE_SSE4_1);
133 fpu_features.with_sse4_2 = x86_feature_test(X86_FEATURE_SSE4_2);
134 fpu_features.with_sse4a = x86_feature_test(X86_FEATURE_SSE4A);
135 fpu_features.with_fxsave = x86_feature_test(X86_FEATURE_FXSR);
136 fpu_features.with_xsave = x86_feature_test(X86_FEATURE_XSAVE);
137
138 // these are the mandatory ones to continue (for the moment)
139 if (!fpu_features.with_fpu || !fpu_features.with_sse || !fpu_features.with_fxsave) {
140 return;
141 }
142
143 fp_supported = true;
144
145 // detect and save some xsave information
146 // NOTE: currently unused
147 fpu_features.with_xsaveopt = false;
148 fpu_features.with_xsavec = false;
149 fpu_features.with_xsaves = false;
150 if (fpu_features.with_xsave) {
151 LTRACEF("X86: XSAVE detected\n");
152 struct x86_cpuid_leaf leaf;
153 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
154 fpu_features.with_xsaveopt = BIT(leaf.a, 0);
155 fpu_features.with_xsavec = BIT(leaf.a, 1);
156 fpu_features.with_xsaves = BIT(leaf.a, 3);
157 LTRACEF("xsaveopt %u xsavec %u xsaves %u\n", fpu_features.with_xsaveopt, fpu_features.with_xsavec, fpu_features.with_xsaves);
158 LTRACEF("xsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
159 }
160 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
161 LTRACEF("xsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
162 }
163
164 for (int i = 2; i < 64; i++) {
165 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
166 if (leaf.a > 0) {
167 LTRACEF("xsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
168 LTRACEF("\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
169 }
170 }
171 }
172 }
173 }
174
x86_fpu_init(void)175 void x86_fpu_init(void) {
176 dprintf(SPEW, "X86: fpu %u sse %u sse2 %u sse3 %u ssse3 %u sse4.1 %u sse4.2 %u sse4a %u\n",
177 fpu_features.with_fpu, fpu_features.with_sse, fpu_features.with_sse2,
178 fpu_features.with_sse3, fpu_features.with_ssse3, fpu_features.with_sse4_1,
179 fpu_features.with_sse4_2, fpu_features.with_sse4a);
180 dprintf(SPEW, "X86: fxsave %u xsave %u\n", fpu_features.with_fxsave, fpu_features.with_xsave);
181
182 if (!fp_supported) {
183 dprintf(SPEW, "no usable FPU detected (requires SSE + FXSAVE)\n");
184 }
185
186 if (fpu_features.with_fxsave) {
187 dprintf(SPEW, "X86: FXSAVE detected\n");
188 }
189
190 if (fpu_features.with_xsave) {
191 dprintf(SPEW, "X86: XSAVE detected\n");
192 dprintf(SPEW, "\txsaveopt %u xsavec %u xsaves %u\n", fpu_features.with_xsaveopt, fpu_features.with_xsavec, fpu_features.with_xsaves);
193
194 struct x86_cpuid_leaf leaf;
195 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 0, &leaf)) {
196 dprintf(SPEW, "\txsave leaf 0: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
197 }
198 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, 1, &leaf)) {
199 dprintf(SPEW, "\txsave leaf 1: %#x %#x %#x %#x\n", leaf.a, leaf.b, leaf.c, leaf.d);
200 }
201
202 for (int i = 2; i < 64; i++) {
203 if (x86_get_cpuid_subleaf(X86_CPUID_XSAVE, i, &leaf)) {
204 if (leaf.a > 0) {
205 dprintf(SPEW, "\txsave leaf %d: %#x %#x %#x %#x\n", i, leaf.a, leaf.b, leaf.c, leaf.d);
206 dprintf(SPEW, "\t\tstate %d: size required %u offset %u\n", i, leaf.a, leaf.b);
207 }
208 }
209 }
210 }
211
212 }
213
fpu_init_thread_states(thread_t * t)214 void fpu_init_thread_states(thread_t *t) {
215 t->arch.fpu_states = (vaddr_t *)ROUNDUP(((vaddr_t)t->arch.fpu_buffer), 16);
216 memcpy(t->arch.fpu_states, fpu_init_states, sizeof(fpu_init_states));
217 }
218
fpu_context_switch(thread_t * old_thread,thread_t * new_thread)219 void fpu_context_switch(thread_t *old_thread, thread_t *new_thread) {
220 if (!fp_supported)
221 return;
222
223 DEBUG_ASSERT(old_thread != new_thread);
224
225 LTRACEF("cpu %u old %p new %p\n", arch_curr_cpu_num(), old_thread, new_thread);
226 LTRACEF("old fpu_states %p new fpu_states %p\n",
227 old_thread->arch.fpu_states, new_thread->arch.fpu_states);
228
229 // TODO: use the appropriate versions of fpu state save/restore based on the
230 // features of the CPU. For the moment, we assume that the CPU supports
231 // FXSAVE and that the threads have been initialized with FXSAVE state.
232
233 // save the old thread's fpu state if it has one and restore the new thread's
234 // fpu state if it has one. Remember if the old thread had a valid FPU state
235 // so that we can enable the FPU if it was disabled.
236 bool old_fpu_enabled = false;
237 if (likely(old_thread->arch.fpu_states)) {
238 __asm__ __volatile__("fxsave %0" : "=m" (*old_thread->arch.fpu_states));
239 old_fpu_enabled = true;
240 }
241 if (likely(new_thread->arch.fpu_states)) {
242 if (!old_fpu_enabled) {
243 enable_fpu();
244 }
245 __asm__ __volatile__("fxrstor %0" : : "m" (*new_thread->arch.fpu_states));
246 } else {
247 // if switching to a thread that does not have FPU state, disable the FPU.
248 disable_fpu();
249 }
250 }
251
fpu_dev_na_handler(void)252 void fpu_dev_na_handler(void) {
253 TRACEF("cpu %u\n", arch_curr_cpu_num());
254
255 panic("FPU not available on this CPU\n");
256 }
257 #endif
258