1 /*
2  * Copyright (C) 2018-2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * this file contains vmcs operations which is vcpu related
7  */
8 
9 #include <types.h>
10 #include <asm/guest/vmcs.h>
11 #include <asm/guest/vcpu.h>
12 #include <asm/guest/vm.h>
13 #include <asm/vmx.h>
14 #include <asm/gdt.h>
15 #include <asm/pgtable.h>
16 #include <asm/per_cpu.h>
17 #include <asm/cpu_caps.h>
18 #include <asm/cpufeatures.h>
19 #include <asm/guest/vmexit.h>
20 #include <logmsg.h>
21 
22 /* rip, rsp, ia32_efer and rflags are written to VMCS in start_vcpu */
init_guest_vmx(struct acrn_vcpu * vcpu,uint64_t cr0,uint64_t cr3,uint64_t cr4)23 static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
24 	uint64_t cr4)
25 {
26 	struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
27 	struct ext_context *ectx = &ctx->ext_ctx;
28 
29 	pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);
30 
31 	vcpu_set_cr4(vcpu, cr4);
32 	vcpu_set_cr0(vcpu, cr0);
33 	exec_vmwrite(VMX_GUEST_CR3, cr3);
34 
35 	exec_vmwrite(VMX_GUEST_GDTR_BASE, ectx->gdtr.base);
36 	pr_dbg("VMX_GUEST_GDTR_BASE: 0x%016lx", ectx->gdtr.base);
37 	exec_vmwrite32(VMX_GUEST_GDTR_LIMIT, ectx->gdtr.limit);
38 	pr_dbg("VMX_GUEST_GDTR_LIMIT: 0x%016lx", ectx->gdtr.limit);
39 
40 	exec_vmwrite(VMX_GUEST_IDTR_BASE, ectx->idtr.base);
41 	pr_dbg("VMX_GUEST_IDTR_BASE: 0x%016lx", ectx->idtr.base);
42 	exec_vmwrite32(VMX_GUEST_IDTR_LIMIT, ectx->idtr.limit);
43 	pr_dbg("VMX_GUEST_IDTR_LIMIT: 0x%016lx", ectx->idtr.limit);
44 
45 	/* init segment selectors: es, cs, ss, ds, fs, gs, ldtr, tr */
46 	load_segment(ectx->cs, VMX_GUEST_CS);
47 	load_segment(ectx->ss, VMX_GUEST_SS);
48 	load_segment(ectx->ds, VMX_GUEST_DS);
49 	load_segment(ectx->es, VMX_GUEST_ES);
50 	load_segment(ectx->fs, VMX_GUEST_FS);
51 	load_segment(ectx->gs, VMX_GUEST_GS);
52 	load_segment(ectx->tr, VMX_GUEST_TR);
53 	load_segment(ectx->ldtr, VMX_GUEST_LDTR);
54 
55 	/* init guest ia32_misc_enable value for guest read */
56 	vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
57 		(msr_read(MSR_IA32_MISC_ENABLE) & (~MSR_IA32_MISC_ENABLE_MONITOR_ENA)));
58 
59 	vcpu_set_guest_msr(vcpu, MSR_IA32_PERF_CTL, msr_read(MSR_IA32_PERF_CTL));
60 
61 	/* fixed values */
62 	exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U);
63 	exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, 0UL);
64 	exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, 0UL);
65 	exec_vmwrite(VMX_GUEST_PENDING_DEBUG_EXCEPT, 0UL);
66 	exec_vmwrite(VMX_GUEST_IA32_DEBUGCTL_FULL, 0UL);
67 	exec_vmwrite32(VMX_GUEST_INTERRUPTIBILITY_INFO, 0U);
68 	exec_vmwrite32(VMX_GUEST_ACTIVITY_STATE, 0U);
69 	exec_vmwrite32(VMX_GUEST_SMBASE, 0U);
70 	vcpu_set_guest_msr(vcpu, MSR_IA32_PAT, PAT_POWER_ON_VALUE);
71 	exec_vmwrite(VMX_GUEST_IA32_PAT_FULL, PAT_POWER_ON_VALUE);
72 	exec_vmwrite(VMX_GUEST_DR7, DR7_INIT_VALUE);
73 }
74 
init_guest_state(struct acrn_vcpu * vcpu)75 static void init_guest_state(struct acrn_vcpu *vcpu)
76 {
77 	struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
78 
79 	pr_dbg("%s, cr0:0x%lx, cr4:0x%lx.\n", __func__,
80 	ctx->run_ctx.cr0, ctx->run_ctx.cr4);
81 
82 	init_guest_vmx(vcpu, ctx->run_ctx.cr0, ctx->ext_ctx.cr3,
83 			ctx->run_ctx.cr4 & ~(CR4_VMXE | CR4_SMXE | CR4_MCE));
84 }
85 
init_host_state(void)86 void init_host_state(void)
87 {
88 	uint16_t value16;
89 	uint64_t value64;
90 	uint64_t value;
91 	uint64_t tss_addr;
92 	uint64_t gdt_base;
93 	uint64_t idt_base;
94 
95 	pr_dbg("Initialize host state");
96 
97 	/***************************************************
98 	 * 16 - Bit fields
99 	 * Move the current ES, CS, SS, DS, FS, GS, TR, LDTR * values to the
100 	 * corresponding 16-bit host * segment selection (ES, CS, SS, DS, FS,
101 	 * GS), * Task Register (TR), * Local Descriptor Table Register (LDTR)
102 	 *
103 	 ***************************************************/
104 	CPU_SEG_READ(es, &value16);
105 	exec_vmwrite16(VMX_HOST_ES_SEL, value16);
106 	pr_dbg("VMX_HOST_ES_SEL: 0x%hx ", value16);
107 
108 	CPU_SEG_READ(cs, &value16);
109 	exec_vmwrite16(VMX_HOST_CS_SEL, value16);
110 	pr_dbg("VMX_HOST_CS_SEL: 0x%hx ", value16);
111 
112 	CPU_SEG_READ(ss, &value16);
113 	exec_vmwrite16(VMX_HOST_SS_SEL, value16);
114 	pr_dbg("VMX_HOST_SS_SEL: 0x%hx ", value16);
115 
116 	CPU_SEG_READ(ds, &value16);
117 	exec_vmwrite16(VMX_HOST_DS_SEL, value16);
118 	pr_dbg("VMX_HOST_DS_SEL: 0x%hx ", value16);
119 
120 	CPU_SEG_READ(fs, &value16);
121 	exec_vmwrite16(VMX_HOST_FS_SEL, value16);
122 	pr_dbg("VMX_HOST_FS_SEL: 0x%hx ", value16);
123 
124 	CPU_SEG_READ(gs, &value16);
125 	exec_vmwrite16(VMX_HOST_GS_SEL, value16);
126 	pr_dbg("VMX_HOST_GS_SEL: 0x%hx ", value16);
127 
128 	exec_vmwrite16(VMX_HOST_TR_SEL, HOST_GDT_RING0_CPU_TSS_SEL);
129 	pr_dbg("VMX_HOST_TR_SEL: 0x%hx ", HOST_GDT_RING0_CPU_TSS_SEL);
130 
131 	/******************************************************
132 	 * 32-bit fields
133 	 * Set up the 32 bit host state fields - pg 3418 B.3.3 * Set limit for
134 	 * ES, CS, DD, DS, FS, GS, LDTR, Guest TR, * GDTR, and IDTR
135 	 ******************************************************/
136 
137 	/* TODO: Should guest GDTB point to host GDTB ? */
138 	/* Obtain the current global descriptor table base */
139 	gdt_base = sgdt();
140 
141 	if (((gdt_base >> 47U) & 0x1UL) != 0UL) {
142 	        gdt_base |= 0xffff000000000000UL;
143 	}
144 
145 	/* Set up the guest and host GDTB base fields with current GDTB base */
146 	exec_vmwrite(VMX_HOST_GDTR_BASE, gdt_base);
147 	pr_dbg("VMX_HOST_GDTR_BASE: 0x%x ", gdt_base);
148 
149 	tss_addr = hva2hpa((void *)&get_cpu_var(tss));
150 	/* Set up host TR base fields */
151 	exec_vmwrite(VMX_HOST_TR_BASE, tss_addr);
152 	pr_dbg("VMX_HOST_TR_BASE: 0x%016lx ", tss_addr);
153 
154 	/* Obtain the current interrupt descriptor table base */
155 	idt_base = sidt();
156 	/* base */
157 	if (((idt_base >> 47U) & 0x1UL) != 0UL) {
158 		idt_base |= 0xffff000000000000UL;
159 	}
160 
161 	exec_vmwrite(VMX_HOST_IDTR_BASE, idt_base);
162 	pr_dbg("VMX_HOST_IDTR_BASE: 0x%x ", idt_base);
163 
164 	/**************************************************/
165 	/* 64-bit fields */
166 	pr_dbg("64-bit********");
167 
168 	value64 = msr_read(MSR_IA32_PAT);
169 	exec_vmwrite64(VMX_HOST_IA32_PAT_FULL, value64);
170 	pr_dbg("VMX_HOST_IA32_PAT: 0x%016lx ", value64);
171 
172 	value64 = msr_read(MSR_IA32_EFER);
173 	exec_vmwrite64(VMX_HOST_IA32_EFER_FULL, value64);
174 	pr_dbg("VMX_HOST_IA32_EFER: 0x%016lx ",
175 			value64);
176 
177 	/**************************************************/
178 	/* Natural width fields */
179 	pr_dbg("Natural-width********");
180 	/* Set up host CR0 field */
181 	CPU_CR_READ(cr0, &value);
182 	exec_vmwrite(VMX_HOST_CR0, value);
183 	pr_dbg("VMX_HOST_CR0: 0x%016lx ", value);
184 
185 	/* Set up host CR3 field */
186 	CPU_CR_READ(cr3, &value);
187 	exec_vmwrite(VMX_HOST_CR3, value);
188 	pr_dbg("VMX_HOST_CR3: 0x%016lx ", value);
189 
190 	/* Set up host CR4 field */
191 	CPU_CR_READ(cr4, &value);
192 	exec_vmwrite(VMX_HOST_CR4, value);
193 	pr_dbg("VMX_HOST_CR4: 0x%016lx ", value);
194 
195 	/* Set up host and guest FS base address */
196 	value = msr_read(MSR_IA32_FS_BASE);
197 	exec_vmwrite(VMX_HOST_FS_BASE, value);
198 	pr_dbg("VMX_HOST_FS_BASE: 0x%016lx ", value);
199 	value = msr_read(MSR_IA32_GS_BASE);
200 	exec_vmwrite(VMX_HOST_GS_BASE, value);
201 	pr_dbg("VMX_HOST_GS_BASE: 0x%016lx ", value);
202 
203 	/* Set up host instruction pointer on VM Exit */
204 	value64 = (uint64_t)&vm_exit;
205 	pr_dbg("HOST RIP on VMExit %016lx ", value64);
206 	exec_vmwrite(VMX_HOST_RIP, value64);
207 	pr_dbg("vm exit return address = %016lx ", value64);
208 
209 	/* As a type I hypervisor, just init sysenter fields to 0 */
210 	exec_vmwrite(VMX_HOST_IA32_SYSENTER_ESP, 0UL);
211 	exec_vmwrite(VMX_HOST_IA32_SYSENTER_EIP, 0UL);
212 
213 	/* We use IA32_SYSENTER_CS MSR to cache pCPU ID. */
214 	exec_vmwrite32(VMX_HOST_IA32_SYSENTER_CS, msr_read(ACRN_PSEUDO_PCPUID_MSR));
215 }
216 
check_vmx_ctrl(uint32_t msr,uint32_t ctrl_req)217 static uint32_t check_vmx_ctrl(uint32_t msr, uint32_t ctrl_req)
218 {
219 	uint64_t vmx_msr;
220 	uint32_t vmx_msr_low, vmx_msr_high;
221 	uint32_t ctrl = ctrl_req;
222 
223 	vmx_msr = msr_read(msr);
224 	vmx_msr_low  = (uint32_t)vmx_msr;
225 	vmx_msr_high = (uint32_t)(vmx_msr >> 32U);
226 	pr_dbg("VMX_PIN_VM_EXEC_CONTROLS:low=0x%x, high=0x%x\n",
227 			vmx_msr_low, vmx_msr_high);
228 
229 	/* high 32b: must 0 setting
230 	 * low 32b:  must 1 setting
231 	 */
232 	ctrl &= vmx_msr_high;
233 	ctrl |= vmx_msr_low;
234 
235 	if ((ctrl_req & ~ctrl) != 0U) {
236 		pr_info("VMX ctrl 0x%x not fully enabled: current capabilities are 0x%x (full capabilities are 0x%x)\n",
237 						msr, ctrl, ctrl_req);
238 	}
239 
240 	return ctrl;
241 
242 }
243 
check_vmx_ctrl_64(uint32_t msr,uint64_t ctrl_req)244 static uint32_t check_vmx_ctrl_64(uint32_t msr, uint64_t ctrl_req)
245 {
246 	uint64_t vmx_msr;
247 	uint32_t ctrl = ctrl_req;
248 
249 	vmx_msr = msr_read(msr);
250 
251 	/* 64 bits are allowed 1-setting */
252 	ctrl &= vmx_msr;
253 
254 	if ((ctrl_req & ~ctrl) != 0U) {
255 		pr_info("VMX ctrl 0x%x not fully enabled: current capabilities are 0x%lx (full capabilities are 0x%lx)\n",
256 						msr, ctrl, ctrl_req);
257 	}
258 
259 	return ctrl;
260 }
261 
init_exec_ctrl(struct acrn_vcpu * vcpu)262 static void init_exec_ctrl(struct acrn_vcpu *vcpu)
263 {
264 	uint32_t value32;
265 	uint64_t value64;
266 	struct acrn_vm *vm = vcpu->vm;
267 
268 	/* Log messages to show initializing VMX execution controls */
269 	pr_dbg("Initialize execution control ");
270 
271 	/* Set up VM Execution control to enable Set VM-exits on external
272 	 * interrupts preemption timer - pg 2899 24.6.1
273 	 */
274 	/* enable external interrupt VM Exit */
275 	value32 = check_vmx_ctrl(MSR_IA32_VMX_PINBASED_CTLS, VMX_PINBASED_CTLS_IRQ_EXIT);
276 
277 	if (is_apicv_advanced_feature_supported()) {
278 		value32 |= VMX_PINBASED_CTLS_POST_IRQ;
279 	}
280 
281 	exec_vmwrite32(VMX_PIN_VM_EXEC_CONTROLS, value32);
282 	pr_dbg("VMX_PIN_VM_EXEC_CONTROLS: 0x%x ", value32);
283 
284 	/* Set up primary processor based VM execution controls - pg 2900
285 	 * 24.6.2. Set up for:
286 	 * Enable TSC offsetting
287 	 * Enable TSC exiting
288 	 * guest access to IO bit-mapped ports causes VM exit
289 	 * guest access to MSR causes VM exit
290 	 * Activate secondary controls
291 	 * Activate tertiary controls
292 	 */
293 	/* These are bits 1,4-6,8,13-16, and 26, the corresponding bits of
294 	 * the IA32_VMX_PROCBASED_CTRLS MSR are always read as 1 --- A.3.2
295 	 */
296 	value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS,
297 			 VMX_PROCBASED_CTLS_TSC_OFF | VMX_PROCBASED_CTLS_TPR_SHADOW |
298 			 VMX_PROCBASED_CTLS_IO_BITMAP | VMX_PROCBASED_CTLS_MSR_BITMAP |
299 			 VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_SECONDARY | VMX_PROCBASED_CTLS_TERTIARY);
300 
301 	/*Disable VM_EXIT for CR3 access*/
302 	value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD | VMX_PROCBASED_CTLS_CR3_STORE);
303 	value32 &= ~(VMX_PROCBASED_CTLS_CR8_LOAD | VMX_PROCBASED_CTLS_CR8_STORE);
304 
305 	/*
306 	 * Disable VM_EXIT for invlpg execution.
307 	 */
308 	value32 &= ~VMX_PROCBASED_CTLS_INVLPG;
309 
310 	/*
311 	 * Enable VM_EXIT for rdpmc execution except core partition VM, like RTVM
312 	 */
313 	if (!is_pmu_pt_configured(vcpu->vm)) {
314 		value32 |= VMX_PROCBASED_CTLS_RDPMC;
315 	}
316 
317 	/*
318 	 * Enable MONITOR/MWAIT cause a VM-EXIT.
319 	 */
320 	value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
321 
322 	vcpu->arch.proc_vm_exec_ctrls = value32;
323 	exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
324 	pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32);
325 
326 	/* Set up secondary processor based VM execution controls - pg 2901
327 	 * 24.6.2. Set up for: * Enable EPT * Eable VPID * Enable RDTSCP *
328 	 * Enable Unrestricted guest (optional)
329 	 */
330 	value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS2,
331 			VMX_PROCBASED_CTLS2_VAPIC | VMX_PROCBASED_CTLS2_EPT |VMX_PROCBASED_CTLS2_VPID |
332 			VMX_PROCBASED_CTLS2_RDTSCP | VMX_PROCBASED_CTLS2_UNRESTRICT | VMX_PROCBASED_CTLS2_XSVE_XRSTR |
333 			VMX_PROCBASED_CTLS2_PAUSE_LOOP | VMX_PROCBASED_CTLS2_UWAIT_PAUSE);
334 
335 	/* SDM Vol3, 25.3,  setting "enable INVPCID" VM-execution to 1 with "INVLPG exiting" disabled,
336 	 * passes-through INVPCID instruction to guest if the instruction is supported.
337 	 */
338 	if (pcpu_has_cap(X86_FEATURE_INVPCID)) {
339 		value32 |= VMX_PROCBASED_CTLS2_INVPCID;
340 	} else {
341 		value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
342 	}
343 
344 	/* Enable TPAUSE, UMONITOR/UWAIT cause a #UD. */
345 	value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
346 
347 	if (is_apicv_advanced_feature_supported()) {
348 		value32 |= VMX_PROCBASED_CTLS2_VIRQ;
349 		value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS;
350 	} else {
351 		/*
352 		 * This field exists only on processors that support
353 		 * the 1-setting  of the "use TPR shadow"
354 		 * VM-execution control.
355 		 *
356 		 * Set up TPR threshold for virtual interrupt delivery
357 		 * - pg 2904 24.6.8
358 		 */
359 		exec_vmwrite32(VMX_TPR_THRESHOLD, 0U);
360 	}
361 
362 	if ((value32 & VMX_PROCBASED_CTLS2_XSVE_XRSTR) != 0U) {
363 		exec_vmwrite64(VMX_XSS_EXITING_BITMAP_FULL, 0UL);
364 		vcpu->arch.xsave_enabled = true;
365 	} else {
366 		value32 &= ~VMX_PROCBASED_CTLS2_XSVE_XRSTR;
367 	}
368 
369 	value32 |= VMX_PROCBASED_CTLS2_WBINVD;
370 
371 	exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, value32);
372 	pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32);
373 
374 	/* Set up tertiary processor based VM execution controls */
375 	if ((exec_vmread32(VMX_PROC_VM_EXEC_CONTROLS) & VMX_PROCBASED_CTLS_TERTIARY) != 0U) {
376 		/* Enable KeyLocker if support */
377 		value64 = check_vmx_ctrl_64(MSR_IA32_VMX_PROCBASED_CTLS3, VMX_PROCBASED_CTLS3_LOADIWKEY);
378 
379 		exec_vmwrite64(VMX_PROC_VM_EXEC_CONTROLS3_FULL, value64);
380 		pr_dbg("VMX_PROC_VM_EXEC_CONTROLS3: 0x%llx ", value64);
381 	}
382 
383 	/*APIC-v, config APIC-access address*/
384 	value64 = vlapic_apicv_get_apic_access_addr();
385 	exec_vmwrite64(VMX_APIC_ACCESS_ADDR_FULL, value64);
386 
387 	/*APIC-v, config APIC virtualized page address*/
388 	value64 = vlapic_apicv_get_apic_page_addr(vcpu_vlapic(vcpu));
389 	exec_vmwrite64(VMX_VIRTUAL_APIC_PAGE_ADDR_FULL, value64);
390 
391 	if (is_apicv_advanced_feature_supported()) {
392 		/* Disable all EOI VMEXIT by default and
393 		 * clear RVI and SVI.
394 		 */
395 		exec_vmwrite64(VMX_EOI_EXIT0_FULL, 0UL);
396 		exec_vmwrite64(VMX_EOI_EXIT1_FULL, 0UL);
397 		exec_vmwrite64(VMX_EOI_EXIT2_FULL, 0UL);
398 		exec_vmwrite64(VMX_EOI_EXIT3_FULL, 0UL);
399 
400 		exec_vmwrite16(VMX_GUEST_INTR_STATUS, 0U);
401 		exec_vmwrite16(VMX_POSTED_INTR_VECTOR, (uint16_t)vcpu->arch.pid.control.bits.nv);
402 		exec_vmwrite64(VMX_PIR_DESC_ADDR_FULL, hva2hpa(get_pi_desc(vcpu)));
403 	}
404 
405 	/* Load EPTP execution control
406 	 * TODO: introduce API to make this data driven based
407 	 * on VMX_EPT_VPID_CAP
408 	 */
409 	value64 = hva2hpa(vm->arch_vm.nworld_eptp) | (3UL << 3U) | 6UL;
410 	exec_vmwrite64(VMX_EPT_POINTER_FULL, value64);
411 	pr_dbg("VMX_EPT_POINTER: 0x%016lx ", value64);
412 
413 	/* Set up guest exception mask bitmap setting a bit * causes a VM exit
414 	 * on corresponding guest * exception - pg 2902 24.6.3
415 	 * enable VM exit on MC always
416 	 * enable AC for split-lock emulation when split-lock detection is enabled on physical platform.
417 	 */
418 	value32 = (1U << IDT_MC);
419 	if (is_ac_enabled()) {
420 		value32 = (value32 | (1U << IDT_AC));
421 	}
422 	if (is_gp_enabled()) {
423 		value32 = (value32 | (1U << IDT_GP));
424 	}
425 	exec_vmwrite32(VMX_EXCEPTION_BITMAP, value32);
426 
427 	/* Set up page fault error code mask - second paragraph * pg 2902
428 	 * 24.6.3 - guest page fault exception causing * vmexit is governed by
429 	 * both VMX_EXCEPTION_BITMAP and * VMX_PF_ERROR_CODE_MASK
430 	 */
431 	exec_vmwrite32(VMX_PF_ERROR_CODE_MASK, 0U);
432 
433 	/* Set up page fault error code match - second paragraph * pg 2902
434 	 * 24.6.3 - guest page fault exception causing * vmexit is governed by
435 	 * both VMX_EXCEPTION_BITMAP and * VMX_PF_ERROR_CODE_MATCH
436 	 */
437 	exec_vmwrite32(VMX_PF_ERROR_CODE_MATCH, 0U);
438 
439 	/* Set up CR3 target count - An execution of mov to CR3 * by guest
440 	 * causes HW to evaluate operand match with * one of N CR3-Target Value
441 	 * registers. The CR3 target * count values tells the number of
442 	 * target-value regs to evaluate
443 	 */
444 	exec_vmwrite32(VMX_CR3_TARGET_COUNT, 0U);
445 
446 	/* Set up IO bitmap register A and B - pg 2902 24.6.4 */
447 	value64 = hva2hpa(vm->arch_vm.io_bitmap);
448 	exec_vmwrite64(VMX_IO_BITMAP_A_FULL, value64);
449 	pr_dbg("VMX_IO_BITMAP_A: 0x%016lx ", value64);
450 	value64 = hva2hpa((void *)&(vm->arch_vm.io_bitmap[PAGE_SIZE]));
451 	exec_vmwrite64(VMX_IO_BITMAP_B_FULL, value64);
452 	pr_dbg("VMX_IO_BITMAP_B: 0x%016lx ", value64);
453 
454 	init_msr_emulation(vcpu);
455 
456 	/* Set up executive VMCS pointer - pg 2905 24.6.10 */
457 	exec_vmwrite64(VMX_EXECUTIVE_VMCS_PTR_FULL, 0UL);
458 
459 	/* Setup Time stamp counter offset - pg 2902 24.6.5
460 	 * VMCS.OFFSET = vAdjust - pAdjust
461 	 */
462 	value64 = vcpu_get_guest_msr(vcpu, MSR_IA32_TSC_ADJUST) - cpu_msr_read(MSR_IA32_TSC_ADJUST);
463 	exec_vmwrite64(VMX_TSC_OFFSET_FULL, value64);
464 
465 	/* Set up the link pointer */
466 	exec_vmwrite64(VMX_VMS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFUL);
467 
468 	/* Natural-width */
469 	pr_dbg("Natural-width*********");
470 
471 	init_cr0_cr4_host_guest_mask();
472 
473 	/* The CR3 target registers work in concert with VMX_CR3_TARGET_COUNT
474 	 * field. Using these registers guest CR3 access can be managed. i.e.,
475 	 * if operand does not match one of these register values a VM exit
476 	 * would occur
477 	 */
478 	exec_vmwrite(VMX_CR3_TARGET_0, 0UL);
479 	exec_vmwrite(VMX_CR3_TARGET_1, 0UL);
480 	exec_vmwrite(VMX_CR3_TARGET_2, 0UL);
481 	exec_vmwrite(VMX_CR3_TARGET_3, 0UL);
482 
483 	/* Setup PAUSE-loop exiting - 24.6.13 */
484 	exec_vmwrite(VMX_PLE_GAP, 128U);
485 	exec_vmwrite(VMX_PLE_WINDOW, 4096U);
486 }
487 
init_entry_ctrl(const struct acrn_vcpu * vcpu)488 static void init_entry_ctrl(const struct acrn_vcpu *vcpu)
489 {
490 	uint32_t value32;
491 
492 	/* Log messages to show initializing VMX entry controls */
493 	pr_dbg("Initialize Entry control ");
494 
495 	/* Set up VMX entry controls - ISDM 24.8.1 */
496 	value32 = VMX_ENTRY_CTLS_LOAD_PAT;
497 
498 	if (get_vcpu_mode(vcpu) == CPU_MODE_64BIT) {
499 		value32 |= (VMX_ENTRY_CTLS_IA32E_MODE);
500 	}
501 
502 	value32 = check_vmx_ctrl(MSR_IA32_VMX_ENTRY_CTLS, value32);
503 
504 	exec_vmwrite32(VMX_ENTRY_CONTROLS, value32);
505 	pr_dbg("VMX_ENTRY_CONTROLS: 0x%x ", value32);
506 
507 	/* Set up VMX entry MSR load count - pg 2908 24.8.2 Tells the number of
508 	 * MSRs on load from memory on VM entry from mem address provided by
509 	 * VM-entry MSR load address field
510 	 */
511 	exec_vmwrite32(VMX_ENTRY_MSR_LOAD_COUNT, vcpu->arch.msr_area.count);
512 	exec_vmwrite64(VMX_ENTRY_MSR_LOAD_ADDR_FULL, hva2hpa((void *)vcpu->arch.msr_area.guest));
513 
514 	/* Set up VM entry interrupt information field pg 2909 24.8.3 */
515 	exec_vmwrite32(VMX_ENTRY_INT_INFO_FIELD, 0U);
516 
517 	/* Set up VM entry exception error code - pg 2910 24.8.3 */
518 	exec_vmwrite32(VMX_ENTRY_EXCEPTION_ERROR_CODE, 0U);
519 
520 	/* Set up VM entry instruction length - pg 2910 24.8.3 */
521 	exec_vmwrite32(VMX_ENTRY_INSTR_LENGTH, 0U);
522 }
523 
init_exit_ctrl(const struct acrn_vcpu * vcpu)524 static void init_exit_ctrl(const struct acrn_vcpu *vcpu)
525 {
526 	uint32_t value32;
527 
528 	/* Log messages to show initializing VMX entry controls */
529 	pr_dbg("Initialize Exit control ");
530 
531 	/* Set up VM exit controls - pg 2907 24.7.1 for: Host address space
532 	 * size is 64 bit Set up to acknowledge interrupt on exit, if 1 the HW
533 	 * acks the interrupt in VMX non-root and saves the interrupt vector to
534 	 * the relevant VM exit field for further processing by Hypervisor
535 	 * Enable saving and loading IA32_PAT on VMEXIT
536 	 */
537 	value32 = check_vmx_ctrl(MSR_IA32_VMX_EXIT_CTLS,
538 			 VMX_EXIT_CTLS_ACK_IRQ | VMX_EXIT_CTLS_SAVE_PAT |
539 			 VMX_EXIT_CTLS_LOAD_PAT | VMX_EXIT_CTLS_HOST_ADDR64);
540 
541 	exec_vmwrite32(VMX_EXIT_CONTROLS, value32);
542 	pr_dbg("VMX_EXIT_CONTROL: 0x%x ", value32);
543 
544 	/* Set up VM exit MSR store and load counts pg 2908 24.7.2 - tells the
545 	 * HW number of MSRs to stored to mem and loaded from mem on VM exit.
546 	 * The 64 bit VM-exit MSR store and load address fields provide the
547 	 * corresponding addresses
548 	 */
549 	exec_vmwrite32(VMX_EXIT_MSR_STORE_COUNT, vcpu->arch.msr_area.count);
550 	exec_vmwrite32(VMX_EXIT_MSR_LOAD_COUNT, vcpu->arch.msr_area.count);
551 	exec_vmwrite64(VMX_EXIT_MSR_STORE_ADDR_FULL, hva2hpa((void *)vcpu->arch.msr_area.guest));
552 	exec_vmwrite64(VMX_EXIT_MSR_LOAD_ADDR_FULL, hva2hpa((void *)vcpu->arch.msr_area.host));
553 }
554 
555 /**
556  * @pre vcpu != NULL
557  */
init_vmcs(struct acrn_vcpu * vcpu)558 void init_vmcs(struct acrn_vcpu *vcpu)
559 {
560 	uint64_t vmx_rev_id;
561 	void **vmcs_ptr = &get_cpu_var(vmcs_run);
562 
563 	/* Log message */
564 	pr_dbg("Initializing VMCS");
565 
566 	/* Obtain the VM Rev ID from HW and populate VMCS page with it */
567 	vmx_rev_id = msr_read(MSR_IA32_VMX_BASIC);
568 	(void)memcpy_s(vcpu->arch.vmcs, 4U, (void *)&vmx_rev_id, 4U);
569 
570 	/* Execute VMCLEAR VMCS of this vcpu */
571 	clear_va_vmcs(vcpu->arch.vmcs);
572 
573 	/* Load VMCS pointer */
574 	load_va_vmcs(vcpu->arch.vmcs);
575 	*vmcs_ptr = (void *)vcpu->arch.vmcs;
576 
577 	/* Initialize the Virtual Machine Control Structure (VMCS) */
578 	init_host_state();
579 	/* init exec_ctrl needs to run before init_guest_state */
580 	init_exec_ctrl(vcpu);
581 	init_guest_state(vcpu);
582 	init_entry_ctrl(vcpu);
583 	init_exit_ctrl(vcpu);
584 }
585 
586 /**
587  * @pre vcpu != NULL
588  */
load_vmcs(const struct acrn_vcpu * vcpu)589 void load_vmcs(const struct acrn_vcpu *vcpu)
590 {
591 	void **vmcs_ptr = &get_cpu_var(vmcs_run);
592 
593 	if (vcpu->launched && (*vmcs_ptr != (void *)vcpu->arch.vmcs)) {
594 		load_va_vmcs(vcpu->arch.vmcs);
595 		*vmcs_ptr = (void *)vcpu->arch.vmcs;
596 	}
597 }
598 
switch_apicv_mode_x2apic(struct acrn_vcpu * vcpu)599 void switch_apicv_mode_x2apic(struct acrn_vcpu *vcpu)
600 {
601 	uint32_t value32;
602 	if (is_lapic_pt_configured(vcpu->vm)) {
603 		dev_dbg(DBG_LEVEL_LAPICPT, "%s: switching to x2apic and passthru", __func__);
604 		/*
605 		 * Disable external interrupt exiting and irq ack
606 		 * Disable posted interrupt processing
607 		 * update x2apic msr bitmap for pass-thru
608 		 * enable inteception only for ICR
609 		 * disable pre-emption for TSC DEADLINE MSR
610 		 * Disable Register Virtualization and virtual interrupt delivery
611 		 * Disable "use TPR shadow"
612 		 */
613 
614 		value32 = exec_vmread32(VMX_PIN_VM_EXEC_CONTROLS);
615 		value32 &= ~VMX_PINBASED_CTLS_IRQ_EXIT;
616 		if (is_apicv_advanced_feature_supported()) {
617 			value32 &= ~VMX_PINBASED_CTLS_POST_IRQ;
618 		}
619 
620 		exec_vmwrite32(VMX_PIN_VM_EXEC_CONTROLS, value32);
621 
622 		value32 = exec_vmread32(VMX_EXIT_CONTROLS);
623 		value32 &= ~VMX_EXIT_CTLS_ACK_IRQ;
624 		exec_vmwrite32(VMX_EXIT_CONTROLS, value32);
625 
626 		vcpu->arch.proc_vm_exec_ctrls &= ~VMX_PROCBASED_CTLS_TPR_SHADOW;
627 		vcpu->arch.proc_vm_exec_ctrls &= ~VMX_PROCBASED_CTLS_HLT;
628 		exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, vcpu->arch.proc_vm_exec_ctrls);
629 
630 		exec_vmwrite32(VMX_TPR_THRESHOLD, 0U);
631 
632 		value32 = exec_vmread32(VMX_PROC_VM_EXEC_CONTROLS2);
633 		value32 &= ~VMX_PROCBASED_CTLS2_PAUSE_LOOP;
634 		value32 &= ~VMX_PROCBASED_CTLS2_VAPIC;
635 		if (is_apicv_advanced_feature_supported()) {
636 			value32 &= ~VMX_PROCBASED_CTLS2_VIRQ;
637 			value32 &= ~VMX_PROCBASED_CTLS2_VAPIC_REGS;
638 		}
639 		exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, value32);
640 
641 		update_msr_bitmap_x2apic_passthru(vcpu);
642 
643 	} else {
644 		value32 = exec_vmread32(VMX_PROC_VM_EXEC_CONTROLS2);
645 		value32 &= ~VMX_PROCBASED_CTLS2_VAPIC;
646 		value32 |= VMX_PROCBASED_CTLS2_VX2APIC;
647 		exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, value32);
648 		update_msr_bitmap_x2apic_apicv(vcpu);
649 	}
650 }
651