1 /*
2  * Copyright (C) 2018-2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * this file contains vmcs operations which is vcpu related
7  */
8 
9 #include <types.h>
10 #include <errno.h>
11 #include <asm/lib/bits.h>
12 #include <asm/guest/virq.h>
13 #include <asm/mmu.h>
14 #include <asm/guest/vcpu.h>
15 #include <asm/guest/vm.h>
16 #include <asm/vmx.h>
17 #include <asm/vtd.h>
18 #include <asm/guest/vmexit.h>
19 #include <asm/pgtable.h>
20 #include <asm/cpufeatures.h>
21 #include <trace.h>
22 #include <logmsg.h>
23 
24 /*
25  * Physical CR4 bits in VMX operation may be either flexible or fixed.
26  * Guest CR4 bits may be operatable or reserved.
27  *
28  * All the guest reserved bits should be TRAPed and EMULATed by HV
29  * (inject #GP).
30  *
31  * For guest operatable bits, it may be:
32  * CR4_PASSTHRU_BITS:
33  *	Bits that may be passed through to guest. The actual passthru bits
34  *	should be masked by flexible bits.
35  *
36  * CR4_TRAP_AND_PASSTHRU_BITS:
37  *	The bits are trapped by HV and HV emulation will eventually write
38  *	the guest value to physical CR4 (GUEST_CR4) too. The actual bits
39  *	should be masked by flexible bits.
40  *
41  * CR4_TRAP_AND_EMULATE_BITS:
42  *	The bits are trapped by HV and emulated, but HV updates vCR4 only
43  *	(no update to physical CR4), i.e. pure software emulation.
44  *
45  * CR4_EMULATED_RESERVE_BITS:
46  *	The bits are trapped, but are emulated by injecting a #GP.
47  *
48  * NOTE: Above bits should not overlap.
49  *
50  */
51 #define CR4_PASSTHRU_BITS	(CR4_VME | CR4_PVI | CR4_TSD | CR4_DE | \
52 				CR4_PGE | CR4_PCE | CR4_OSFXSR | CR4_PCIDE | \
53 				CR4_OSXSAVE | CR4_FSGSBASE | CR4_OSXMMEXCPT | \
54 				CR4_UMIP | CR4_LA57)
55 static uint64_t cr4_passthru_mask = CR4_PASSTHRU_BITS;	/* bound to flexible bits */
56 
57 #define CR4_TRAP_AND_PASSTHRU_BITS	(CR4_PSE | CR4_PAE | CR4_SMEP | CR4_SMAP | CR4_PKE | CR4_PKS | CR4_KL)
58 static uint64_t	cr4_trap_and_passthru_mask = CR4_TRAP_AND_PASSTHRU_BITS; /* bound to flexible bits */
59 
60 #ifdef CONFIG_NVMX_ENABLED
61 #define CR4_TRAP_AND_EMULATE_BITS	(CR4_VMXE | CR4_MCE) /* software emulated bits even if host is fixed */
62 #else
63 #define CR4_TRAP_AND_EMULATE_BITS	CR4_MCE /* software emulated bits even if host is fixed */
64 #endif
65 
66 /* Change of these bits should change vcpuid too */
67 #ifdef CONFIG_NVMX_ENABLED
68 #define CR4_EMULATED_RESERVE_BITS	(CR4_CET | CR4_SMXE)
69 #else
70 #define CR4_EMULATED_RESERVE_BITS	(CR4_VMXE | CR4_CET | CR4_SMXE)
71 #endif
72 
73 /* The physical CR4 value for bits of CR4_EMULATED_RESERVE_BITS */
74 #define CR4_EMRSV_BITS_PHYS_VALUE	CR4_VMXE
75 
76 /* The CR4 value guest expected to see for bits of CR4_EMULATED_RESERVE_BITS */
77 #define CR4_EMRSV_BITS_VIRT_VALUE	0UL
78 static uint64_t cr4_rsv_bits_guest_value;
79 
80 /*
81  * Initial value or reset value of GUEST_CR4, i.e. physical value.
82  * They are likely zeros, but some reserved bits may be not.
83  */
84 static uint64_t initial_guest_cr4;
85 
86 /*
87  * Bits not in cr4_passthru_mask/cr4_trap_and_passthru_mask/cr4_trap_and_emulate_mask
88  * are reserved bits, includes at least CR4_EMULATED_RESERVE_BITS
89  */
90 static uint64_t cr4_reserved_bits_mask;
91 
92 /*
93  * CR0 follows the same rule of CR4, except it won't inject #GP for reserved bits violation
94  * for the low 32 bits. Instead, it ignores the software write to those reserved bits.
95  */
96 #define CR0_PASSTHRU_BITS	(CR0_MP | CR0_EM | CR0_TS | CR0_ET | CR0_NE | CR0_AM)
97 static uint64_t cr0_passthru_mask = CR0_PASSTHRU_BITS;	/* bound to flexible bits */
98 
99 #define CR0_TRAP_AND_PASSTHRU_BITS	(CR0_PE | CR0_PG | CR0_WP)
100 static uint64_t	cr0_trap_and_passthru_mask = CR0_TRAP_AND_PASSTHRU_BITS;/* bound to flexible bits */
101 /* software emulated bits even if host is fixed */
102 #define CR0_TRAP_AND_EMULATE_BITS	(CR0_CD | CR0_NW)
103 
104 /* These bits may be part of flexible bits but reserved to guest */
105 #define CR0_EMULATED_RESERVE_BITS	0UL
106 #define	CR0_EMRSV_BITS_PHYS_VALUE	0UL
107 #define	CR0_EMRSV_BITS_VIRT_VALUE	0UL
108 static uint64_t cr0_rsv_bits_guest_value;
109 static uint64_t initial_guest_cr0;		/* Initial value of GUEST_CR0 */
110 static uint64_t cr0_reserved_bits_mask;
111 
112 /* PAE PDPTE bits 1 ~ 2, 5 ~ 8 are always reserved */
113 #define PAE_PDPTE_FIXED_RESVD_BITS	0x00000000000001E6UL
114 
load_pdptrs(const struct acrn_vcpu * vcpu)115 static int32_t load_pdptrs(const struct acrn_vcpu *vcpu)
116 {
117 	uint64_t guest_cr3 = exec_vmread(VMX_GUEST_CR3);
118 	struct cpuinfo_x86 *cpu_info = get_pcpu_info();
119 	int32_t ret = 0;
120 	uint64_t pdpte[4]; /* Total four PDPTE */
121 	uint64_t rsvd_bits_mask;
122 	uint8_t maxphyaddr;
123 	int32_t	i;
124 
125 	/* check whether the address area pointed by the guest cr3
126 	 * can be accessed or not
127 	 */
128 	if (copy_from_gpa(vcpu->vm, pdpte, get_pae_pdpt_addr(guest_cr3), sizeof(pdpte)) != 0) {
129 		ret = -EFAULT;
130 	} else {
131 		/* Check if any of the PDPTEs sets both the P flag
132 		 * and any reserved bit
133 		 */
134 		maxphyaddr = cpu_info->phys_bits;
135 		/* reserved bits: 1~2, 5~8, maxphyaddr ~ 63 */
136 		rsvd_bits_mask = (63U < maxphyaddr) ? 0UL : (((1UL << (63U - maxphyaddr + 1U)) - 1UL) << maxphyaddr);
137 		rsvd_bits_mask |= PAE_PDPTE_FIXED_RESVD_BITS;
138 		for (i = 0; i < 4; i++) {
139 			if (((pdpte[i] & PAGE_PRESENT) != 0UL) && ((pdpte[i] & rsvd_bits_mask) != 0UL)) {
140 				ret = -EFAULT;
141 				break;
142 			}
143 		}
144 	}
145 
146 	if (ret == 0) {
147 		exec_vmwrite64(VMX_GUEST_PDPTE0_FULL, pdpte[0]);
148 		exec_vmwrite64(VMX_GUEST_PDPTE1_FULL, pdpte[1]);
149 		exec_vmwrite64(VMX_GUEST_PDPTE2_FULL, pdpte[2]);
150 		exec_vmwrite64(VMX_GUEST_PDPTE3_FULL, pdpte[3]);
151 	}
152 
153 	return ret;
154 }
155 
156 /*
157  * Whether the value changes the reserved bits.
158  */
is_valid_cr0(uint64_t cr0)159 static inline bool is_valid_cr0(uint64_t cr0)
160 {
161 	return (cr0 & cr0_reserved_bits_mask) == cr0_rsv_bits_guest_value;
162 }
163 
164 /*
165  * Certain combination of CR0 write may lead to #GP.
166  */
is_cr0_write_valid(struct acrn_vcpu * vcpu,uint64_t cr0)167 static bool is_cr0_write_valid(struct acrn_vcpu *vcpu, uint64_t cr0)
168 {
169 	bool ret = true;
170 
171 	/*
172 	 * Set 1 in high 32 bits (part of reserved bits) leads to #GP.
173 	 */
174 	if ((cr0 >> 32UL) != 0UL) {
175 		ret = false;
176 	} else {
177 		/* SDM 25.3 "Changes to instruction behavior in VMX non-root"
178 		 *
179 		 * We always require "unrestricted guest" control enabled. So
180 		 *
181 		 * CR0.PG = 1, CR4.PAE = 0 and IA32_EFER.LME = 1 is invalid.
182 		 * CR0.PE = 0 and CR0.PG = 1 is invalid.
183 		 */
184 		if (((cr0 & CR0_PG) != 0UL) && (!is_pae(vcpu)) &&
185 			((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL)) {
186 			ret = false;
187 		} else {
188 			if (((cr0 & CR0_PE) == 0UL) && ((cr0 & CR0_PG) != 0UL)) {
189 				ret = false;
190 			} else {
191 				/* SDM 6.15 "Exception and Interrupt Refrerence" GP Exception
192 				 *
193 				 * Loading CR0 register with a set NW flag and a clear CD flag
194 				 * is invalid
195 				 */
196 				if (((cr0 & CR0_CD) == 0UL) && ((cr0 & CR0_NW) != 0UL)) {
197 					ret = false;
198 				}
199 				/* SDM 4.10.1 "Process-Context Identifiers"
200 				 *
201 				 * MOV to CR0 causes a general-protection exception if it would
202 				 * clear CR0.PG to 0 while CR4.PCIDE = 1
203 				 */
204 				if (((cr0 & CR0_PG) == 0UL) && ((vcpu_get_cr4(vcpu) & CR4_PCIDE) != 0UL)) {
205 					ret = false;
206 				}
207 			}
208 		}
209 	}
210 
211 	return ret;
212 }
213 
214 /*
215  * Handling of CR0:
216  * Assume "unrestricted guest" feature is supported by vmx.
217  * For mode switch, hv only needs to take care of enabling/disabling long mode,
218  * thanks to "unrestricted guest" feature.
219  *
220  *   - PE (0)  Trapped to track cpu mode.
221  *             Set the value according to the value from guest.
222  *   - MP (1)  Flexible to guest
223  *   - EM (2)  Flexible to guest
224  *   - TS (3)  Flexible to guest
225  *   - ET (4)  Flexible to guest
226  *   - NE (5)  must always be 1
227  *   - WP (16) Trapped to get if it inhibits supervisor level procedures to
228  *             write into ro-pages.
229  *   - AM (18) Flexible to guest
230  *   - NW (29) Trapped to emulate cache disable situation
231  *   - CD (30) Trapped to emulate cache disable situation
232  *   - PG (31) Trapped to track cpu/paging mode.
233  *             Set the value according to the value from guest.
234  */
vmx_write_cr0(struct acrn_vcpu * vcpu,uint64_t value)235 static void vmx_write_cr0(struct acrn_vcpu *vcpu, uint64_t value)
236 {
237 	bool err_found = false;
238 	/*
239 	 * For reserved bits of CR0, SDM states:
240 	 * attempts to set them have no impact, while set to high 32 bits lead to #GP.
241 	 */
242 
243 	if (!is_cr0_write_valid(vcpu, value)) {
244 		pr_err("Invalid cr0 write operation from guest");
245 		vcpu_inject_gp(vcpu, 0U);
246 	} else {
247 		uint64_t effective_cr0 = (value & ~cr0_reserved_bits_mask) | cr0_rsv_bits_guest_value;
248 		uint64_t mask, tmp;
249 		uint32_t entry_ctrls;
250 		uint64_t cr0_changed_bits = vcpu_get_cr0(vcpu) ^ effective_cr0;
251 
252 		if ((cr0_changed_bits & CR0_PG) != 0UL) {
253 			/* PG bit changes */
254 			if ((effective_cr0 & CR0_PG) != 0UL) {
255 				/* Enable paging */
256 				if ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL) {
257 					/* Enable long mode */
258 					pr_dbg("VMM: Enable long mode");
259 					entry_ctrls = exec_vmread32(VMX_ENTRY_CONTROLS);
260 					entry_ctrls |= VMX_ENTRY_CTLS_IA32E_MODE;
261 					exec_vmwrite32(VMX_ENTRY_CONTROLS, entry_ctrls);
262 
263 					vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) | MSR_IA32_EFER_LMA_BIT);
264 				} else {
265 					pr_dbg("VMM: NOT Enable long mode");
266 					if (is_pae(vcpu)) {
267 						/* enabled PAE from paging disabled */
268 						if (load_pdptrs(vcpu) != 0) {
269 							err_found = true;
270 							vcpu_inject_gp(vcpu, 0U);
271 						}
272 					}
273 				}
274 			} else  {
275 				/* Disable paging */
276 				pr_dbg("disable paginge");
277 				if ((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LME_BIT) != 0UL) {
278 					/* Disable long mode */
279 					pr_dbg("VMM: Disable long mode");
280 					entry_ctrls = exec_vmread32(VMX_ENTRY_CONTROLS);
281 					entry_ctrls &= ~VMX_ENTRY_CTLS_IA32E_MODE;
282 					exec_vmwrite32(VMX_ENTRY_CONTROLS, entry_ctrls);
283 
284 					vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) & ~MSR_IA32_EFER_LMA_BIT);
285 				}
286 			}
287 		}
288 
289 		if (!err_found) {
290 			/* If CR0.CD or CR0.NW get cr0_changed_bits */
291 			if ((cr0_changed_bits & CR0_TRAP_AND_EMULATE_BITS) != 0UL) {
292 				/* No action if only CR0.NW is changed */
293 				if ((cr0_changed_bits & CR0_CD) != 0UL) {
294 					if ((effective_cr0 & CR0_CD) != 0UL) {
295 						/*
296 						 * When the guest requests to set CR0.CD, we don't allow
297 						 * guest's CR0.CD to be actually set, instead, we write guest
298 						 * IA32_PAT with all-UC entries to emulate the cache
299 						 * disabled behavior
300 						 */
301 						exec_vmwrite64(VMX_GUEST_IA32_PAT_FULL, PAT_ALL_UC_VALUE);
302 					} else {
303 						/* Restore IA32_PAT to enable cache again */
304 						exec_vmwrite64(VMX_GUEST_IA32_PAT_FULL,
305 							vcpu_get_guest_msr(vcpu, MSR_IA32_PAT));
306 					}
307 				}
308 			}
309 
310 			if ((cr0_changed_bits & (CR0_PG | CR0_WP | CR0_CD)) != 0UL) {
311 				vcpu_make_request(vcpu, ACRN_REQUEST_EPT_FLUSH);
312 			}
313 
314 			mask = cr0_trap_and_passthru_mask | cr0_passthru_mask;
315 			tmp = (initial_guest_cr0 & ~mask) | (effective_cr0 & mask);
316 
317 			exec_vmwrite(VMX_GUEST_CR0, tmp);
318 			exec_vmwrite(VMX_CR0_READ_SHADOW, effective_cr0);
319 
320 			/* clear read cache, next time read should from VMCS */
321 			bitmap_clear_nolock(CPU_REG_CR0, &vcpu->reg_cached);
322 
323 			pr_dbg("VMM: Try to write %016lx, allow to write 0x%016lx to CR0", effective_cr0, tmp);
324 		}
325 	}
326 }
327 
is_valid_cr4(uint64_t cr4)328 static inline bool is_valid_cr4(uint64_t cr4)
329 {
330 	return (cr4 & cr4_reserved_bits_mask) == cr4_rsv_bits_guest_value;
331 }
332 
333 /*
334  * TODO: Implement more comprhensive check here.
335  */
is_valid_cr0_cr4(uint64_t cr0,uint64_t cr4)336 bool is_valid_cr0_cr4(uint64_t cr0, uint64_t cr4)
337 {
338 	return is_valid_cr4(cr4) & is_valid_cr0(cr0);
339 }
340 
is_cr4_write_valid(struct acrn_vcpu * vcpu,uint64_t cr4)341 static bool is_cr4_write_valid(struct acrn_vcpu *vcpu, uint64_t cr4)
342 {
343 	bool ret = true;
344 
345 	if (!is_valid_cr4(cr4) || (is_long_mode(vcpu) && ((cr4 & CR4_PAE) == 0UL))) {
346 		ret = false;
347 	}
348 
349 	return ret;
350 }
351 
352 /*
353  * Handling of CR4:
354  * Assume "unrestricted guest" feature is supported by vmx.
355  *
356  * For CR4, if a guest attempts to change the reserved bits, a #GP fault is injected.
357  * This includes hardware reserved bits in VMX operation (not flexible bits),
358  * and CR4_EMULATED_RESERVE_BITS, or check with cr4_reserved_bits_mask.
359  */
vmx_write_cr4(struct acrn_vcpu * vcpu,uint64_t cr4)360 static void vmx_write_cr4(struct acrn_vcpu *vcpu, uint64_t cr4)
361 {
362 	bool err_found = false;
363 
364 	if (!is_cr4_write_valid(vcpu, cr4)) {
365 		pr_err("Invalid cr4 write operation from guest");
366 		vcpu_inject_gp(vcpu, 0U);
367 	} else {
368 		uint64_t mask, tmp;
369 		uint64_t cr4_changed_bits = vcpu_get_cr4(vcpu) ^ cr4;
370 
371 		if ((cr4_changed_bits & CR4_TRAP_AND_PASSTHRU_BITS) != 0UL) {
372 			if (((cr4 & CR4_PAE) != 0UL) && (is_paging_enabled(vcpu)) && (!is_long_mode(vcpu))) {
373 				if (load_pdptrs(vcpu) != 0) {
374 					err_found = true;
375 					pr_dbg("Err found,cr4:0xlx,cr0:0x%lx ", cr4, vcpu_get_cr0(vcpu));
376 					vcpu_inject_gp(vcpu, 0U);
377 				}
378 			}
379 			vcpu_make_request(vcpu, ACRN_REQUEST_EPT_FLUSH);
380 		}
381 
382 		if (!err_found && ((cr4_changed_bits & CR4_PCIDE) != 0UL)) {
383 			/* MOV to CR4 causes a general-protection exception (#GP) if it would change
384 			 * CR4.PCIDE from 0 to 1 and either IA32_EFER.LMA = 0 or CR3[11:0] != 000H
385 			 */
386 			if ((cr4 & CR4_PCIDE) != 0UL) {
387 				uint64_t guest_cr3 = exec_vmread(VMX_GUEST_CR3);
388 
389 				if ((!is_long_mode(vcpu)) || ((guest_cr3 & 0xFFFUL) != 0UL)) {
390 					pr_dbg("Failed to enable CR4.PCID, cr4:0x%lx, cr4_changed_bits:0x%lx,vcpu_cr4:0x%lx cr3:0x%lx",
391 						cr4, cr4_changed_bits, vcpu_get_cr4(vcpu), guest_cr3);
392 
393 					err_found = true;
394 					vcpu_inject_gp(vcpu, 0U);
395 				}
396 			}
397 		}
398 
399 		if (!err_found && ((cr4_changed_bits & CR4_KL) != 0UL)) {
400 			if ((cr4 & CR4_KL) != 0UL) {
401 				vcpu->arch.cr4_kl_enabled = true;
402 				load_iwkey(vcpu);
403 			} else {
404 				vcpu->arch.cr4_kl_enabled = false;
405 			}
406 		}
407 
408 		if (!err_found) {
409 			/*
410 			 * Update the passthru bits.
411 			 */
412 			mask = cr4_trap_and_passthru_mask | cr4_passthru_mask;
413 			tmp = (initial_guest_cr4 & ~mask) | (cr4 & mask);
414 
415 			/*
416 			 * For all reserved bits (including CR4_EMULATED_RESERVE_BITS), we came here because
417 			 * the guest is not changing them.
418 			 */
419 			exec_vmwrite(VMX_GUEST_CR4, tmp);
420 			exec_vmwrite(VMX_CR4_READ_SHADOW, cr4);
421 
422 			/* clear read cache, next time read should from VMCS */
423 			bitmap_clear_nolock(CPU_REG_CR4, &vcpu->reg_cached);
424 
425 			pr_dbg("VMM: Try to write %016lx, allow to write 0x%016lx to CR4", cr4, tmp);
426 		}
427 	}
428 }
429 
init_cr0_cr4_flexible_bits(void)430 void init_cr0_cr4_flexible_bits(void)
431 {
432 	uint64_t cr0_flexible_bits;
433 	uint64_t cr4_flexible_bits;
434 	uint64_t fixed0, fixed1;
435 
436 	/* make sure following MACROs don't have any overlapped set bit.
437 	 */
438 	ASSERT(((CR0_PASSTHRU_BITS ^ CR0_TRAP_AND_PASSTHRU_BITS) ^ CR0_TRAP_AND_EMULATE_BITS) ==
439 			(CR0_PASSTHRU_BITS | CR0_TRAP_AND_PASSTHRU_BITS | CR0_TRAP_AND_EMULATE_BITS));
440 
441 	ASSERT(((CR4_PASSTHRU_BITS ^ CR4_TRAP_AND_PASSTHRU_BITS) ^ CR4_TRAP_AND_EMULATE_BITS) ==
442 			(CR4_PASSTHRU_BITS | CR4_TRAP_AND_PASSTHRU_BITS | CR4_TRAP_AND_EMULATE_BITS));
443 
444 	/* Read the CR0 fixed0 / fixed1 MSR registers */
445 	fixed0 = msr_read(MSR_IA32_VMX_CR0_FIXED0);
446 	fixed1 = msr_read(MSR_IA32_VMX_CR0_FIXED1);
447 
448 	pr_dbg("%s:cr0 fixed0 = 0x%016lx, fixed1 = 0x%016lx", __func__, fixed0, fixed1);
449 	cr0_flexible_bits = (fixed0 ^ fixed1);
450 	/*
451 	 * HW reports fixed bits for CR0_PG & CR0_PE, but do not check the violation.
452 	 * ACRN needs to set them for (unrestricted) guest, and therefore view them as
453 	 * flexible bits.
454 	 */
455 	cr0_flexible_bits |= (CR0_PE | CR0_PG);
456 	cr0_passthru_mask &= cr0_flexible_bits;
457 	cr0_trap_and_passthru_mask &= cr0_flexible_bits;
458 	cr0_reserved_bits_mask = ~(cr0_passthru_mask | cr0_trap_and_passthru_mask | CR0_TRAP_AND_EMULATE_BITS);
459 
460 	/*
461 	 * cr0_rsv_bits_guest_value should be sync with always ON bits (1 in both FIXED0/FIXED1 MSRs).
462 	 * Refer SDM Appendix A.7
463 	 */
464 	cr0_rsv_bits_guest_value = (fixed0 & ~cr0_flexible_bits);
465 	initial_guest_cr0 = (cr0_rsv_bits_guest_value & ~CR0_EMULATED_RESERVE_BITS) | CR0_EMRSV_BITS_PHYS_VALUE;
466 	cr0_rsv_bits_guest_value = (cr0_rsv_bits_guest_value & ~CR0_EMULATED_RESERVE_BITS) | CR0_EMRSV_BITS_VIRT_VALUE;
467 
468 	pr_dbg("cr0_flexible_bits:0x%lx, cr0_passthru_mask:%lx, cr0_trap_and_passthru_mask:%lx.\n",
469 		cr0_flexible_bits, cr0_passthru_mask, cr0_trap_and_passthru_mask);
470 	pr_dbg("cr0_reserved_bits_mask:%lx, cr0_rsv_bits_guest_value:%lx, initial_guest_cr0:%lx.\n",
471 		cr0_reserved_bits_mask, cr0_rsv_bits_guest_value, initial_guest_cr0);
472 
473 	/* Read the CR4 fixed0 / fixed1 MSR registers */
474 	fixed0 = msr_read(MSR_IA32_VMX_CR4_FIXED0);
475 	fixed1 = msr_read(MSR_IA32_VMX_CR4_FIXED1);
476 
477 	pr_dbg("%s:cr4 fixed0 = 0x%016lx, fixed1 = 0x%016lx", __func__, fixed0, fixed1);
478 	cr4_flexible_bits = (fixed0 ^ fixed1);
479 	cr4_passthru_mask &= cr4_flexible_bits;
480 	cr4_trap_and_passthru_mask &= cr4_flexible_bits;
481 
482 	/*
483 	 * vcpuid should always consult cr4_reserved_bits_mask when reporting capability.
484 	 *
485 	 * The guest value of reserved bits are likely identical to fixed bits, but certains
486 	 * exceptions may be applied, i.e. for CR4_EMULATED_RESERVE_BITS.
487 	 */
488 	cr4_reserved_bits_mask = ~(cr4_passthru_mask | cr4_trap_and_passthru_mask | CR4_TRAP_AND_EMULATE_BITS);
489 
490 	/*
491 	 * cr4_reserved_bits_value should be sync with always ON bits (1 in both FIXED0/FIXED1 MSRs).
492 	 * Refer SDM Appendix A.8
493 	 */
494 	cr4_rsv_bits_guest_value = (fixed0 & ~cr4_flexible_bits);
495 
496 #ifdef CONFIG_NVMX_ENABLED
497 	cr4_rsv_bits_guest_value &= ~CR4_VMXE;
498 #endif
499 	initial_guest_cr4 = (cr4_rsv_bits_guest_value & ~CR4_EMULATED_RESERVE_BITS) | CR4_EMRSV_BITS_PHYS_VALUE;
500 	cr4_rsv_bits_guest_value = (cr4_rsv_bits_guest_value & ~CR4_EMULATED_RESERVE_BITS) | CR4_EMRSV_BITS_VIRT_VALUE;
501 
502 	pr_dbg("cr4_flexible_bits:0x%lx, cr4_passthru_mask:0x%lx, cr4_trap_and_passthru_mask:0x%lx.",
503 		cr4_flexible_bits, cr4_passthru_mask, cr4_trap_and_passthru_mask);
504 	pr_dbg("cr4_reserved_bits_mask:%lx, cr4_rsv_bits_guest_value:%lx, initial_guest_cr4:%lx.\n",
505 		cr4_reserved_bits_mask, cr4_rsv_bits_guest_value, initial_guest_cr4);
506 }
507 
init_cr0_cr4_host_guest_mask(void)508 void init_cr0_cr4_host_guest_mask(void)
509 {
510 	/*
511 	 * "1" means the bit is trapped by host, and "0" means passthru to guest..
512 	 */
513 	exec_vmwrite(VMX_CR0_GUEST_HOST_MASK, ~cr0_passthru_mask); /* all bits except passthrubits are trapped */
514 	pr_dbg("CR0 guest-host mask value: 0x%016lx", ~cr0_passthru_mask);
515 
516 	exec_vmwrite(VMX_CR4_GUEST_HOST_MASK, ~cr4_passthru_mask); /* all bits except passthru bits are trapped */
517 	pr_dbg("CR4 guest-host mask value: 0x%016lx", ~cr4_passthru_mask);
518 }
519 
vcpu_get_cr0(struct acrn_vcpu * vcpu)520 uint64_t vcpu_get_cr0(struct acrn_vcpu *vcpu)
521 {
522 	struct run_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
523 
524 	if (bitmap_test_and_set_nolock(CPU_REG_CR0, &vcpu->reg_cached) == 0) {
525 		ctx->cr0 = (exec_vmread(VMX_CR0_READ_SHADOW) & ~cr0_passthru_mask) |
526 			(exec_vmread(VMX_GUEST_CR0) & cr0_passthru_mask);
527 	}
528 	return ctx->cr0;
529 }
530 
vcpu_set_cr0(struct acrn_vcpu * vcpu,uint64_t val)531 void vcpu_set_cr0(struct acrn_vcpu *vcpu, uint64_t val)
532 {
533 	pr_dbg("%s, value: 0x%016lx rip: %016lx", __func__, val, vcpu_get_rip(vcpu));
534 	vmx_write_cr0(vcpu, val);
535 }
536 
vcpu_get_cr2(const struct acrn_vcpu * vcpu)537 uint64_t vcpu_get_cr2(const struct acrn_vcpu *vcpu)
538 {
539 	return vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.cr2;
540 }
541 
vcpu_set_cr2(struct acrn_vcpu * vcpu,uint64_t val)542 void vcpu_set_cr2(struct acrn_vcpu *vcpu, uint64_t val)
543 {
544 	vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.cr2 = val;
545 }
546 
547 /* This API shall be called after vCPU is created. */
vcpu_get_cr4(struct acrn_vcpu * vcpu)548 uint64_t vcpu_get_cr4(struct acrn_vcpu *vcpu)
549 {
550 	struct run_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
551 
552 	if (bitmap_test_and_set_nolock(CPU_REG_CR4, &vcpu->reg_cached) == 0) {
553 		ctx->cr4 = (exec_vmread(VMX_CR4_READ_SHADOW) & ~cr4_passthru_mask) |
554 			(exec_vmread(VMX_GUEST_CR4) & cr4_passthru_mask);
555 	}
556 	return ctx->cr4;
557 }
558 
vcpu_set_cr4(struct acrn_vcpu * vcpu,uint64_t val)559 void vcpu_set_cr4(struct acrn_vcpu *vcpu, uint64_t val)
560 {
561 	pr_dbg("%s, value: 0x%016lx rip: %016lx", __func__, val, vcpu_get_rip(vcpu));
562 	vmx_write_cr4(vcpu, val);
563 }
564 
cr_access_vmexit_handler(struct acrn_vcpu * vcpu)565 int32_t cr_access_vmexit_handler(struct acrn_vcpu *vcpu)
566 {
567 	uint64_t reg;
568 	uint32_t idx;
569 	uint64_t exit_qual;
570 	int32_t ret = 0;
571 
572 	exit_qual = vcpu->arch.exit_qualification;
573 	idx = (uint32_t)vm_exit_cr_access_reg_idx(exit_qual);
574 
575 	ASSERT((idx <= 15U), "index out of range");
576 	reg = vcpu_get_gpreg(vcpu, idx);
577 
578 	switch ((vm_exit_cr_access_type(exit_qual) << 4U) | vm_exit_cr_access_cr_num(exit_qual)) {
579 	case 0x00UL:
580 		/* mov to cr0 */
581 		vcpu_set_cr0(vcpu, reg);
582 		break;
583 
584 	case 0x04UL:
585 		/* mov to cr4 */
586 		vcpu_set_cr4(vcpu, reg);
587 		break;
588 	default:
589 		ASSERT(false, "Unhandled CR access");
590 		ret = -EINVAL;
591 		break;
592 	}
593 
594 	TRACE_2L(TRACE_VMEXIT_CR_ACCESS, vm_exit_cr_access_type(exit_qual),
595 			vm_exit_cr_access_cr_num(exit_qual));
596 
597 	return ret;
598 }
599 
get_cr4_reserved_bits(void)600 uint64_t get_cr4_reserved_bits(void)
601 {
602 	return cr4_reserved_bits_mask;
603 }
604