1 /*
2 * Copyright (C) 2018-2022 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * this file contains vmcs operations which is vcpu related
7 */
8
9 #include <types.h>
10 #include <errno.h>
11 #include <asm/lib/bits.h>
12 #include <asm/guest/virq.h>
13 #include <asm/mmu.h>
14 #include <asm/guest/vcpu.h>
15 #include <asm/guest/vm.h>
16 #include <asm/vmx.h>
17 #include <asm/vtd.h>
18 #include <asm/guest/vmexit.h>
19 #include <asm/pgtable.h>
20 #include <asm/cpufeatures.h>
21 #include <trace.h>
22 #include <logmsg.h>
23
24 /*
25 * Physical CR4 bits in VMX operation may be either flexible or fixed.
26 * Guest CR4 bits may be operatable or reserved.
27 *
28 * All the guest reserved bits should be TRAPed and EMULATed by HV
29 * (inject #GP).
30 *
31 * For guest operatable bits, it may be:
32 * CR4_PASSTHRU_BITS:
33 * Bits that may be passed through to guest. The actual passthru bits
34 * should be masked by flexible bits.
35 *
36 * CR4_TRAP_AND_PASSTHRU_BITS:
37 * The bits are trapped by HV and HV emulation will eventually write
38 * the guest value to physical CR4 (GUEST_CR4) too. The actual bits
39 * should be masked by flexible bits.
40 *
41 * CR4_TRAP_AND_EMULATE_BITS:
42 * The bits are trapped by HV and emulated, but HV updates vCR4 only
43 * (no update to physical CR4), i.e. pure software emulation.
44 *
45 * CR4_EMULATED_RESERVE_BITS:
46 * The bits are trapped, but are emulated by injecting a #GP.
47 *
48 * NOTE: Above bits should not overlap.
49 *
50 */
51 #define CR4_PASSTHRU_BITS (CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | \
52 CR4_PGE | CR4_PCE | CR4_OSFXSR | CR4_PCIDE | \
53 CR4_OSXSAVE | CR4_FSGSBASE | CR4_OSXMMEXCPT | \
54 CR4_UMIP | CR4_LA57)
55 static uint64_t cr4_passthru_mask = CR4_PASSTHRU_BITS; /* bound to flexible bits */
56
57 #define CR4_TRAP_AND_PASSTHRU_BITS (CR4_PSE | CR4_PAE | CR4_SMEP | CR4_SMAP | CR4_PKE | CR4_PKS | CR4_KL)
58 static uint64_t cr4_trap_and_passthru_mask = CR4_TRAP_AND_PASSTHRU_BITS; /* bound to flexible bits */
59
60 #ifdef CONFIG_NVMX_ENABLED
61 #define CR4_TRAP_AND_EMULATE_BITS (CR4_VMXE | CR4_MCE) /* software emulated bits even if host is fixed */
62 #else
63 #define CR4_TRAP_AND_EMULATE_BITS CR4_MCE /* software emulated bits even if host is fixed */
64 #endif
65
66 /* Change of these bits should change vcpuid too */
67 #ifdef CONFIG_NVMX_ENABLED
68 #define CR4_EMULATED_RESERVE_BITS (CR4_CET | CR4_SMXE)
69 #else
70 #define CR4_EMULATED_RESERVE_BITS (CR4_VMXE | CR4_CET | CR4_SMXE)
71 #endif
72
73 /* The physical CR4 value for bits of CR4_EMULATED_RESERVE_BITS */
74 #define CR4_EMRSV_BITS_PHYS_VALUE CR4_VMXE
75
76 /* The CR4 value guest expected to see for bits of CR4_EMULATED_RESERVE_BITS */
77 #define CR4_EMRSV_BITS_VIRT_VALUE 0UL
78 static uint64_t cr4_rsv_bits_guest_value;
79
80 /*
81 * Initial value or reset value of GUEST_CR4, i.e. physical value.
82 * They are likely zeros, but some reserved bits may be not.
83 */
84 static uint64_t initial_guest_cr4;
85
86 /*
87 * Bits not in cr4_passthru_mask/cr4_trap_and_passthru_mask/cr4_trap_and_emulate_mask
88 * are reserved bits, includes at least CR4_EMULATED_RESERVE_BITS
89 */
90 static uint64_t cr4_reserved_bits_mask;
91
92 /*
93 * CR0 follows the same rule of CR4, except it won't inject #GP for reserved bits violation
94 * for the low 32 bits. Instead, it ignores the software write to those reserved bits.
95 */
96 #define CR0_PASSTHRU_BITS (CR0_MP | CR0_EM | CR0_TS | CR0_ET | CR0_NE | CR0_AM)
97 static uint64_t cr0_passthru_mask = CR0_PASSTHRU_BITS; /* bound to flexible bits */
98
99 #define CR0_TRAP_AND_PASSTHRU_BITS (CR0_PE | CR0_PG | CR0_WP)
100 static uint64_t cr0_trap_and_passthru_mask = CR0_TRAP_AND_PASSTHRU_BITS;/* bound to flexible bits */
101 /* software emulated bits even if host is fixed */
102 #define CR0_TRAP_AND_EMULATE_BITS (CR0_CD | CR0_NW)
103
104 /* These bits may be part of flexible bits but reserved to guest */
105 #define CR0_EMULATED_RESERVE_BITS 0UL
106 #define CR0_EMRSV_BITS_PHYS_VALUE 0UL
107 #define CR0_EMRSV_BITS_VIRT_VALUE 0UL
108 static uint64_t cr0_rsv_bits_guest_value;
109 static uint64_t initial_guest_cr0; /* Initial value of GUEST_CR0 */
110 static uint64_t cr0_reserved_bits_mask;
111
112 /* PAE PDPTE bits 1 ~ 2, 5 ~ 8 are always reserved */
113 #define PAE_PDPTE_FIXED_RESVD_BITS 0x00000000000001E6UL
114
load_pdptrs(const struct acrn_vcpu * vcpu)115 static int32_t load_pdptrs(const struct acrn_vcpu *vcpu)
116 {
117 uint64_t guest_cr3 = exec_vmread(VMX_GUEST_CR3);
118 struct cpuinfo_x86 *cpu_info = get_pcpu_info();
119 int32_t ret = 0;
120 uint64_t pdpte[4]; /* Total four PDPTE */
121 uint64_t rsvd_bits_mask;
122 uint8_t maxphyaddr;
123 int32_t i;
124
125 /* check whether the address area pointed by the guest cr3
126 * can be accessed or not
127 */
128 if (copy_from_gpa(vcpu->vm, pdpte, get_pae_pdpt_addr(guest_cr3), sizeof(pdpte)) != 0) {
129 ret = -EFAULT;
130 } else {
131 /* Check if any of the PDPTEs sets both the P flag
132 * and any reserved bit
133 */
134 maxphyaddr = cpu_info->phys_bits;
135 /* reserved bits: 1~2, 5~8, maxphyaddr ~ 63 */
136 rsvd_bits_mask = (63U < maxphyaddr) ? 0UL : (((1UL << (63U - maxphyaddr + 1U)) - 1UL) << maxphyaddr);
137 rsvd_bits_mask |= PAE_PDPTE_FIXED_RESVD_BITS;
138 for (i = 0; i < 4; i++) {
139 if (((pdpte[i] & PAGE_PRESENT) != 0UL) && ((pdpte[i] & rsvd_bits_mask) != 0UL)) {
140 ret = -EFAULT;
141 break;
142 }
143 }
144 }
145
146 if (ret == 0) {
147 exec_vmwrite64(VMX_GUEST_PDPTE0_FULL, pdpte[0]);
148 exec_vmwrite64(VMX_GUEST_PDPTE1_FULL, pdpte[1]);
149 exec_vmwrite64(VMX_GUEST_PDPTE2_FULL, pdpte[2]);
150 exec_vmwrite64(VMX_GUEST_PDPTE3_FULL, pdpte[3]);
151 }
152
153 return ret;
154 }
155
156 /*
157 * Whether the value changes the reserved bits.
158 */
is_valid_cr0(uint64_t cr0)159 static inline bool is_valid_cr0(uint64_t cr0)
160 {
161 return (cr0 & cr0_reserved_bits_mask) == cr0_rsv_bits_guest_value;
162 }
163
164 /*
165 * Certain combination of CR0 write may lead to #GP.
166 */
is_cr0_write_valid(struct acrn_vcpu * vcpu,uint64_t cr0)167 static bool is_cr0_write_valid(struct acrn_vcpu *vcpu, uint64_t cr0)
168 {
169 bool ret = true;
170
171 /*
172 * Set 1 in high 32 bits (part of reserved bits) leads to #GP.
173 */
174 if ((cr0 >> 32UL) != 0UL) {
175 ret = false;
176 } else {
177 /* SDM 25.3 "Changes to instruction behavior in VMX non-root"
178 *
179 * We always require "unrestricted guest" control enabled. So
180 *
181 * CR0.PG = 1, CR4.PAE = 0 and IA32_EFER.LME = 1 is invalid.
182 * CR0.PE = 0 and CR0.PG = 1 is invalid.
183 */
184 if (((cr0 & CR0_PG) != 0UL) && (!is_pae(vcpu)) &&
185 ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL)) {
186 ret = false;
187 } else {
188 if (((cr0 & CR0_PE) == 0UL) && ((cr0 & CR0_PG) != 0UL)) {
189 ret = false;
190 } else {
191 /* SDM 6.15 "Exception and Interrupt Refrerence" GP Exception
192 *
193 * Loading CR0 register with a set NW flag and a clear CD flag
194 * is invalid
195 */
196 if (((cr0 & CR0_CD) == 0UL) && ((cr0 & CR0_NW) != 0UL)) {
197 ret = false;
198 }
199 /* SDM 4.10.1 "Process-Context Identifiers"
200 *
201 * MOV to CR0 causes a general-protection exception if it would
202 * clear CR0.PG to 0 while CR4.PCIDE = 1
203 */
204 if (((cr0 & CR0_PG) == 0UL) && ((vcpu_get_cr4(vcpu) & CR4_PCIDE) != 0UL)) {
205 ret = false;
206 }
207 }
208 }
209 }
210
211 return ret;
212 }
213
214 /*
215 * Handling of CR0:
216 * Assume "unrestricted guest" feature is supported by vmx.
217 * For mode switch, hv only needs to take care of enabling/disabling long mode,
218 * thanks to "unrestricted guest" feature.
219 *
220 * - PE (0) Trapped to track cpu mode.
221 * Set the value according to the value from guest.
222 * - MP (1) Flexible to guest
223 * - EM (2) Flexible to guest
224 * - TS (3) Flexible to guest
225 * - ET (4) Flexible to guest
226 * - NE (5) must always be 1
227 * - WP (16) Trapped to get if it inhibits supervisor level procedures to
228 * write into ro-pages.
229 * - AM (18) Flexible to guest
230 * - NW (29) Trapped to emulate cache disable situation
231 * - CD (30) Trapped to emulate cache disable situation
232 * - PG (31) Trapped to track cpu/paging mode.
233 * Set the value according to the value from guest.
234 */
vmx_write_cr0(struct acrn_vcpu * vcpu,uint64_t value)235 static void vmx_write_cr0(struct acrn_vcpu *vcpu, uint64_t value)
236 {
237 bool err_found = false;
238 /*
239 * For reserved bits of CR0, SDM states:
240 * attempts to set them have no impact, while set to high 32 bits lead to #GP.
241 */
242
243 if (!is_cr0_write_valid(vcpu, value)) {
244 pr_err("Invalid cr0 write operation from guest");
245 vcpu_inject_gp(vcpu, 0U);
246 } else {
247 uint64_t effective_cr0 = (value & ~cr0_reserved_bits_mask) | cr0_rsv_bits_guest_value;
248 uint64_t mask, tmp;
249 uint32_t entry_ctrls;
250 uint64_t cr0_changed_bits = vcpu_get_cr0(vcpu) ^ effective_cr0;
251
252 if ((cr0_changed_bits & CR0_PG) != 0UL) {
253 /* PG bit changes */
254 if ((effective_cr0 & CR0_PG) != 0UL) {
255 /* Enable paging */
256 if ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL) {
257 /* Enable long mode */
258 pr_dbg("VMM: Enable long mode");
259 entry_ctrls = exec_vmread32(VMX_ENTRY_CONTROLS);
260 entry_ctrls |= VMX_ENTRY_CTLS_IA32E_MODE;
261 exec_vmwrite32(VMX_ENTRY_CONTROLS, entry_ctrls);
262
263 vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) | MSR_IA32_EFER_LMA_BIT);
264 } else {
265 pr_dbg("VMM: NOT Enable long mode");
266 if (is_pae(vcpu)) {
267 /* enabled PAE from paging disabled */
268 if (load_pdptrs(vcpu) != 0) {
269 err_found = true;
270 vcpu_inject_gp(vcpu, 0U);
271 }
272 }
273 }
274 } else {
275 /* Disable paging */
276 pr_dbg("disable paginge");
277 if ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL) {
278 /* Disable long mode */
279 pr_dbg("VMM: Disable long mode");
280 entry_ctrls = exec_vmread32(VMX_ENTRY_CONTROLS);
281 entry_ctrls &= ~VMX_ENTRY_CTLS_IA32E_MODE;
282 exec_vmwrite32(VMX_ENTRY_CONTROLS, entry_ctrls);
283
284 vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) & ~MSR_IA32_EFER_LMA_BIT);
285 }
286 }
287 }
288
289 if (!err_found) {
290 /* If CR0.CD or CR0.NW get cr0_changed_bits */
291 if ((cr0_changed_bits & CR0_TRAP_AND_EMULATE_BITS) != 0UL) {
292 /* No action if only CR0.NW is changed */
293 if ((cr0_changed_bits & CR0_CD) != 0UL) {
294 if ((effective_cr0 & CR0_CD) != 0UL) {
295 /*
296 * When the guest requests to set CR0.CD, we don't allow
297 * guest's CR0.CD to be actually set, instead, we write guest
298 * IA32_PAT with all-UC entries to emulate the cache
299 * disabled behavior
300 */
301 exec_vmwrite64(VMX_GUEST_IA32_PAT_FULL, PAT_ALL_UC_VALUE);
302 } else {
303 /* Restore IA32_PAT to enable cache again */
304 exec_vmwrite64(VMX_GUEST_IA32_PAT_FULL,
305 vcpu_get_guest_msr(vcpu, MSR_IA32_PAT));
306 }
307 }
308 }
309
310 if ((cr0_changed_bits & (CR0_PG | CR0_WP | CR0_CD)) != 0UL) {
311 vcpu_make_request(vcpu, ACRN_REQUEST_EPT_FLUSH);
312 }
313
314 mask = cr0_trap_and_passthru_mask | cr0_passthru_mask;
315 tmp = (initial_guest_cr0 & ~mask) | (effective_cr0 & mask);
316
317 exec_vmwrite(VMX_GUEST_CR0, tmp);
318 exec_vmwrite(VMX_CR0_READ_SHADOW, effective_cr0);
319
320 /* clear read cache, next time read should from VMCS */
321 bitmap_clear_nolock(CPU_REG_CR0, &vcpu->reg_cached);
322
323 pr_dbg("VMM: Try to write %016lx, allow to write 0x%016lx to CR0", effective_cr0, tmp);
324 }
325 }
326 }
327
is_valid_cr4(uint64_t cr4)328 static inline bool is_valid_cr4(uint64_t cr4)
329 {
330 return (cr4 & cr4_reserved_bits_mask) == cr4_rsv_bits_guest_value;
331 }
332
333 /*
334 * TODO: Implement more comprhensive check here.
335 */
is_valid_cr0_cr4(uint64_t cr0,uint64_t cr4)336 bool is_valid_cr0_cr4(uint64_t cr0, uint64_t cr4)
337 {
338 return is_valid_cr4(cr4) & is_valid_cr0(cr0);
339 }
340
is_cr4_write_valid(struct acrn_vcpu * vcpu,uint64_t cr4)341 static bool is_cr4_write_valid(struct acrn_vcpu *vcpu, uint64_t cr4)
342 {
343 bool ret = true;
344
345 if (!is_valid_cr4(cr4) || (is_long_mode(vcpu) && ((cr4 & CR4_PAE) == 0UL))) {
346 ret = false;
347 }
348
349 return ret;
350 }
351
352 /*
353 * Handling of CR4:
354 * Assume "unrestricted guest" feature is supported by vmx.
355 *
356 * For CR4, if a guest attempts to change the reserved bits, a #GP fault is injected.
357 * This includes hardware reserved bits in VMX operation (not flexible bits),
358 * and CR4_EMULATED_RESERVE_BITS, or check with cr4_reserved_bits_mask.
359 */
vmx_write_cr4(struct acrn_vcpu * vcpu,uint64_t cr4)360 static void vmx_write_cr4(struct acrn_vcpu *vcpu, uint64_t cr4)
361 {
362 bool err_found = false;
363
364 if (!is_cr4_write_valid(vcpu, cr4)) {
365 pr_err("Invalid cr4 write operation from guest");
366 vcpu_inject_gp(vcpu, 0U);
367 } else {
368 uint64_t mask, tmp;
369 uint64_t cr4_changed_bits = vcpu_get_cr4(vcpu) ^ cr4;
370
371 if ((cr4_changed_bits & CR4_TRAP_AND_PASSTHRU_BITS) != 0UL) {
372 if (((cr4 & CR4_PAE) != 0UL) && (is_paging_enabled(vcpu)) && (!is_long_mode(vcpu))) {
373 if (load_pdptrs(vcpu) != 0) {
374 err_found = true;
375 pr_dbg("Err found,cr4:0xlx,cr0:0x%lx ", cr4, vcpu_get_cr0(vcpu));
376 vcpu_inject_gp(vcpu, 0U);
377 }
378 }
379 vcpu_make_request(vcpu, ACRN_REQUEST_EPT_FLUSH);
380 }
381
382 if (!err_found && ((cr4_changed_bits & CR4_PCIDE) != 0UL)) {
383 /* MOV to CR4 causes a general-protection exception (#GP) if it would change
384 * CR4.PCIDE from 0 to 1 and either IA32_EFER.LMA = 0 or CR3[11:0] != 000H
385 */
386 if ((cr4 & CR4_PCIDE) != 0UL) {
387 uint64_t guest_cr3 = exec_vmread(VMX_GUEST_CR3);
388
389 if ((!is_long_mode(vcpu)) || ((guest_cr3 & 0xFFFUL) != 0UL)) {
390 pr_dbg("Failed to enable CR4.PCID, cr4:0x%lx, cr4_changed_bits:0x%lx,vcpu_cr4:0x%lx cr3:0x%lx",
391 cr4, cr4_changed_bits, vcpu_get_cr4(vcpu), guest_cr3);
392
393 err_found = true;
394 vcpu_inject_gp(vcpu, 0U);
395 }
396 }
397 }
398
399 if (!err_found && ((cr4_changed_bits & CR4_KL) != 0UL)) {
400 if ((cr4 & CR4_KL) != 0UL) {
401 vcpu->arch.cr4_kl_enabled = true;
402 load_iwkey(vcpu);
403 } else {
404 vcpu->arch.cr4_kl_enabled = false;
405 }
406 }
407
408 if (!err_found) {
409 /*
410 * Update the passthru bits.
411 */
412 mask = cr4_trap_and_passthru_mask | cr4_passthru_mask;
413 tmp = (initial_guest_cr4 & ~mask) | (cr4 & mask);
414
415 /*
416 * For all reserved bits (including CR4_EMULATED_RESERVE_BITS), we came here because
417 * the guest is not changing them.
418 */
419 exec_vmwrite(VMX_GUEST_CR4, tmp);
420 exec_vmwrite(VMX_CR4_READ_SHADOW, cr4);
421
422 /* clear read cache, next time read should from VMCS */
423 bitmap_clear_nolock(CPU_REG_CR4, &vcpu->reg_cached);
424
425 pr_dbg("VMM: Try to write %016lx, allow to write 0x%016lx to CR4", cr4, tmp);
426 }
427 }
428 }
429
init_cr0_cr4_flexible_bits(void)430 void init_cr0_cr4_flexible_bits(void)
431 {
432 uint64_t cr0_flexible_bits;
433 uint64_t cr4_flexible_bits;
434 uint64_t fixed0, fixed1;
435
436 /* make sure following MACROs don't have any overlapped set bit.
437 */
438 ASSERT(((CR0_PASSTHRU_BITS ^ CR0_TRAP_AND_PASSTHRU_BITS) ^ CR0_TRAP_AND_EMULATE_BITS) ==
439 (CR0_PASSTHRU_BITS | CR0_TRAP_AND_PASSTHRU_BITS | CR0_TRAP_AND_EMULATE_BITS));
440
441 ASSERT(((CR4_PASSTHRU_BITS ^ CR4_TRAP_AND_PASSTHRU_BITS) ^ CR4_TRAP_AND_EMULATE_BITS) ==
442 (CR4_PASSTHRU_BITS | CR4_TRAP_AND_PASSTHRU_BITS | CR4_TRAP_AND_EMULATE_BITS));
443
444 /* Read the CR0 fixed0 / fixed1 MSR registers */
445 fixed0 = msr_read(MSR_IA32_VMX_CR0_FIXED0);
446 fixed1 = msr_read(MSR_IA32_VMX_CR0_FIXED1);
447
448 pr_dbg("%s:cr0 fixed0 = 0x%016lx, fixed1 = 0x%016lx", __func__, fixed0, fixed1);
449 cr0_flexible_bits = (fixed0 ^ fixed1);
450 /*
451 * HW reports fixed bits for CR0_PG & CR0_PE, but do not check the violation.
452 * ACRN needs to set them for (unrestricted) guest, and therefore view them as
453 * flexible bits.
454 */
455 cr0_flexible_bits |= (CR0_PE | CR0_PG);
456 cr0_passthru_mask &= cr0_flexible_bits;
457 cr0_trap_and_passthru_mask &= cr0_flexible_bits;
458 cr0_reserved_bits_mask = ~(cr0_passthru_mask | cr0_trap_and_passthru_mask | CR0_TRAP_AND_EMULATE_BITS);
459
460 /*
461 * cr0_rsv_bits_guest_value should be sync with always ON bits (1 in both FIXED0/FIXED1 MSRs).
462 * Refer SDM Appendix A.7
463 */
464 cr0_rsv_bits_guest_value = (fixed0 & ~cr0_flexible_bits);
465 initial_guest_cr0 = (cr0_rsv_bits_guest_value & ~CR0_EMULATED_RESERVE_BITS) | CR0_EMRSV_BITS_PHYS_VALUE;
466 cr0_rsv_bits_guest_value = (cr0_rsv_bits_guest_value & ~CR0_EMULATED_RESERVE_BITS) | CR0_EMRSV_BITS_VIRT_VALUE;
467
468 pr_dbg("cr0_flexible_bits:0x%lx, cr0_passthru_mask:%lx, cr0_trap_and_passthru_mask:%lx.\n",
469 cr0_flexible_bits, cr0_passthru_mask, cr0_trap_and_passthru_mask);
470 pr_dbg("cr0_reserved_bits_mask:%lx, cr0_rsv_bits_guest_value:%lx, initial_guest_cr0:%lx.\n",
471 cr0_reserved_bits_mask, cr0_rsv_bits_guest_value, initial_guest_cr0);
472
473 /* Read the CR4 fixed0 / fixed1 MSR registers */
474 fixed0 = msr_read(MSR_IA32_VMX_CR4_FIXED0);
475 fixed1 = msr_read(MSR_IA32_VMX_CR4_FIXED1);
476
477 pr_dbg("%s:cr4 fixed0 = 0x%016lx, fixed1 = 0x%016lx", __func__, fixed0, fixed1);
478 cr4_flexible_bits = (fixed0 ^ fixed1);
479 cr4_passthru_mask &= cr4_flexible_bits;
480 cr4_trap_and_passthru_mask &= cr4_flexible_bits;
481
482 /*
483 * vcpuid should always consult cr4_reserved_bits_mask when reporting capability.
484 *
485 * The guest value of reserved bits are likely identical to fixed bits, but certains
486 * exceptions may be applied, i.e. for CR4_EMULATED_RESERVE_BITS.
487 */
488 cr4_reserved_bits_mask = ~(cr4_passthru_mask | cr4_trap_and_passthru_mask | CR4_TRAP_AND_EMULATE_BITS);
489
490 /*
491 * cr4_reserved_bits_value should be sync with always ON bits (1 in both FIXED0/FIXED1 MSRs).
492 * Refer SDM Appendix A.8
493 */
494 cr4_rsv_bits_guest_value = (fixed0 & ~cr4_flexible_bits);
495
496 #ifdef CONFIG_NVMX_ENABLED
497 cr4_rsv_bits_guest_value &= ~CR4_VMXE;
498 #endif
499 initial_guest_cr4 = (cr4_rsv_bits_guest_value & ~CR4_EMULATED_RESERVE_BITS) | CR4_EMRSV_BITS_PHYS_VALUE;
500 cr4_rsv_bits_guest_value = (cr4_rsv_bits_guest_value & ~CR4_EMULATED_RESERVE_BITS) | CR4_EMRSV_BITS_VIRT_VALUE;
501
502 pr_dbg("cr4_flexible_bits:0x%lx, cr4_passthru_mask:0x%lx, cr4_trap_and_passthru_mask:0x%lx.",
503 cr4_flexible_bits, cr4_passthru_mask, cr4_trap_and_passthru_mask);
504 pr_dbg("cr4_reserved_bits_mask:%lx, cr4_rsv_bits_guest_value:%lx, initial_guest_cr4:%lx.\n",
505 cr4_reserved_bits_mask, cr4_rsv_bits_guest_value, initial_guest_cr4);
506 }
507
init_cr0_cr4_host_guest_mask(void)508 void init_cr0_cr4_host_guest_mask(void)
509 {
510 /*
511 * "1" means the bit is trapped by host, and "0" means passthru to guest..
512 */
513 exec_vmwrite(VMX_CR0_GUEST_HOST_MASK, ~cr0_passthru_mask); /* all bits except passthrubits are trapped */
514 pr_dbg("CR0 guest-host mask value: 0x%016lx", ~cr0_passthru_mask);
515
516 exec_vmwrite(VMX_CR4_GUEST_HOST_MASK, ~cr4_passthru_mask); /* all bits except passthru bits are trapped */
517 pr_dbg("CR4 guest-host mask value: 0x%016lx", ~cr4_passthru_mask);
518 }
519
vcpu_get_cr0(struct acrn_vcpu * vcpu)520 uint64_t vcpu_get_cr0(struct acrn_vcpu *vcpu)
521 {
522 struct run_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
523
524 if (bitmap_test_and_set_nolock(CPU_REG_CR0, &vcpu->reg_cached) == 0) {
525 ctx->cr0 = (exec_vmread(VMX_CR0_READ_SHADOW) & ~cr0_passthru_mask) |
526 (exec_vmread(VMX_GUEST_CR0) & cr0_passthru_mask);
527 }
528 return ctx->cr0;
529 }
530
vcpu_set_cr0(struct acrn_vcpu * vcpu,uint64_t val)531 void vcpu_set_cr0(struct acrn_vcpu *vcpu, uint64_t val)
532 {
533 pr_dbg("%s, value: 0x%016lx rip: %016lx", __func__, val, vcpu_get_rip(vcpu));
534 vmx_write_cr0(vcpu, val);
535 }
536
vcpu_get_cr2(const struct acrn_vcpu * vcpu)537 uint64_t vcpu_get_cr2(const struct acrn_vcpu *vcpu)
538 {
539 return vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.cr2;
540 }
541
vcpu_set_cr2(struct acrn_vcpu * vcpu,uint64_t val)542 void vcpu_set_cr2(struct acrn_vcpu *vcpu, uint64_t val)
543 {
544 vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.cr2 = val;
545 }
546
547 /* This API shall be called after vCPU is created. */
vcpu_get_cr4(struct acrn_vcpu * vcpu)548 uint64_t vcpu_get_cr4(struct acrn_vcpu *vcpu)
549 {
550 struct run_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
551
552 if (bitmap_test_and_set_nolock(CPU_REG_CR4, &vcpu->reg_cached) == 0) {
553 ctx->cr4 = (exec_vmread(VMX_CR4_READ_SHADOW) & ~cr4_passthru_mask) |
554 (exec_vmread(VMX_GUEST_CR4) & cr4_passthru_mask);
555 }
556 return ctx->cr4;
557 }
558
vcpu_set_cr4(struct acrn_vcpu * vcpu,uint64_t val)559 void vcpu_set_cr4(struct acrn_vcpu *vcpu, uint64_t val)
560 {
561 pr_dbg("%s, value: 0x%016lx rip: %016lx", __func__, val, vcpu_get_rip(vcpu));
562 vmx_write_cr4(vcpu, val);
563 }
564
cr_access_vmexit_handler(struct acrn_vcpu * vcpu)565 int32_t cr_access_vmexit_handler(struct acrn_vcpu *vcpu)
566 {
567 uint64_t reg;
568 uint32_t idx;
569 uint64_t exit_qual;
570 int32_t ret = 0;
571
572 exit_qual = vcpu->arch.exit_qualification;
573 idx = (uint32_t)vm_exit_cr_access_reg_idx(exit_qual);
574
575 ASSERT((idx <= 15U), "index out of range");
576 reg = vcpu_get_gpreg(vcpu, idx);
577
578 switch ((vm_exit_cr_access_type(exit_qual) << 4U) | vm_exit_cr_access_cr_num(exit_qual)) {
579 case 0x00UL:
580 /* mov to cr0 */
581 vcpu_set_cr0(vcpu, reg);
582 break;
583
584 case 0x04UL:
585 /* mov to cr4 */
586 vcpu_set_cr4(vcpu, reg);
587 break;
588 default:
589 ASSERT(false, "Unhandled CR access");
590 ret = -EINVAL;
591 break;
592 }
593
594 TRACE_2L(TRACE_VMEXIT_CR_ACCESS, vm_exit_cr_access_type(exit_qual),
595 vm_exit_cr_access_cr_num(exit_qual));
596
597 return ret;
598 }
599
get_cr4_reserved_bits(void)600 uint64_t get_cr4_reserved_bits(void)
601 {
602 return cr4_reserved_bits_mask;
603 }
604