1 /*
2  * Copyright (C) 2018-2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <types.h>
8 #include <errno.h>
9 #include <asm/guest/vcpu.h>
10 #include <asm/guest/virq.h>
11 #include <asm/lib/bits.h>
12 #include <asm/vmx.h>
13 #include <logmsg.h>
14 #include <asm/cpufeatures.h>
15 #include <asm/cpu_caps.h>
16 #include <asm/per_cpu.h>
17 #include <asm/init.h>
18 #include <asm/guest/vm.h>
19 #include <asm/guest/vmcs.h>
20 #include <asm/mmu.h>
21 #include <lib/sprintf.h>
22 #include <asm/lapic.h>
23 #include <asm/irq.h>
24 #include <console.h>
25 
26 /* stack_frame is linked with the sequence of stack operation in arch_switch_to() */
27 struct stack_frame {
28 	uint64_t rdi;
29 	uint64_t r15;
30 	uint64_t r14;
31 	uint64_t r13;
32 	uint64_t r12;
33 	uint64_t rbp;
34 	uint64_t rbx;
35 	uint64_t rflag;
36 	uint64_t rip;
37 	uint64_t magic;
38 };
39 
vcpu_get_gpreg(const struct acrn_vcpu * vcpu,uint32_t reg)40 uint64_t vcpu_get_gpreg(const struct acrn_vcpu *vcpu, uint32_t reg)
41 {
42 	const struct run_context *ctx =
43 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
44 
45 	return ctx->cpu_regs.longs[reg];
46 }
47 
vcpu_set_gpreg(struct acrn_vcpu * vcpu,uint32_t reg,uint64_t val)48 void vcpu_set_gpreg(struct acrn_vcpu *vcpu, uint32_t reg, uint64_t val)
49 {
50 	struct run_context *ctx =
51 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
52 
53 	ctx->cpu_regs.longs[reg] = val;
54 }
55 
vcpu_get_rip(struct acrn_vcpu * vcpu)56 uint64_t vcpu_get_rip(struct acrn_vcpu *vcpu)
57 {
58 	struct run_context *ctx =
59 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
60 
61 	if (!bitmap_test(CPU_REG_RIP, &vcpu->reg_updated) &&
62 		!bitmap_test_and_set_nolock(CPU_REG_RIP, &vcpu->reg_cached)) {
63 		ctx->rip = exec_vmread(VMX_GUEST_RIP);
64 	}
65 	return ctx->rip;
66 }
67 
vcpu_set_rip(struct acrn_vcpu * vcpu,uint64_t val)68 void vcpu_set_rip(struct acrn_vcpu *vcpu, uint64_t val)
69 {
70 	vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.rip = val;
71 	bitmap_set_nolock(CPU_REG_RIP, &vcpu->reg_updated);
72 }
73 
vcpu_get_rsp(const struct acrn_vcpu * vcpu)74 uint64_t vcpu_get_rsp(const struct acrn_vcpu *vcpu)
75 {
76 	const struct run_context *ctx =
77 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
78 
79 	return ctx->cpu_regs.regs.rsp;
80 }
81 
vcpu_set_rsp(struct acrn_vcpu * vcpu,uint64_t val)82 void vcpu_set_rsp(struct acrn_vcpu *vcpu, uint64_t val)
83 {
84 	struct run_context *ctx =
85 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
86 
87 	ctx->cpu_regs.regs.rsp = val;
88 	bitmap_set_nolock(CPU_REG_RSP, &vcpu->reg_updated);
89 }
90 
vcpu_get_efer(struct acrn_vcpu * vcpu)91 uint64_t vcpu_get_efer(struct acrn_vcpu *vcpu)
92 {
93 	struct run_context *ctx =
94 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
95 
96 	return ctx->ia32_efer;
97 }
98 
vcpu_set_efer(struct acrn_vcpu * vcpu,uint64_t val)99 void vcpu_set_efer(struct acrn_vcpu *vcpu, uint64_t val)
100 {
101 	vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.ia32_efer
102 		= val;
103 
104 	if (val == msr_read(MSR_IA32_EFER)) {
105 		clear_vmcs_bit(VMX_ENTRY_CONTROLS, VMX_ENTRY_CTLS_LOAD_EFER);
106 		clear_vmcs_bit(VMX_EXIT_CONTROLS, VMX_EXIT_CTLS_LOAD_EFER);
107 	} else {
108 		set_vmcs_bit(VMX_ENTRY_CONTROLS, VMX_ENTRY_CTLS_LOAD_EFER);
109 		set_vmcs_bit(VMX_EXIT_CONTROLS, VMX_EXIT_CTLS_LOAD_EFER);
110 	}
111 
112 	/* Write the new value to VMCS in either case */
113 	bitmap_set_nolock(CPU_REG_EFER, &vcpu->reg_updated);
114 }
115 
vcpu_get_rflags(struct acrn_vcpu * vcpu)116 uint64_t vcpu_get_rflags(struct acrn_vcpu *vcpu)
117 {
118 	struct run_context *ctx =
119 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
120 
121 	if (!bitmap_test(CPU_REG_RFLAGS, &vcpu->reg_updated) &&
122 		!bitmap_test_and_set_nolock(CPU_REG_RFLAGS, &vcpu->reg_cached) && vcpu->launched) {
123 		ctx->rflags = exec_vmread(VMX_GUEST_RFLAGS);
124 	}
125 	return ctx->rflags;
126 }
127 
vcpu_set_rflags(struct acrn_vcpu * vcpu,uint64_t val)128 void vcpu_set_rflags(struct acrn_vcpu *vcpu, uint64_t val)
129 {
130 	vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx.rflags =
131 		val;
132 	bitmap_set_nolock(CPU_REG_RFLAGS, &vcpu->reg_updated);
133 }
134 
vcpu_get_guest_msr(const struct acrn_vcpu * vcpu,uint32_t msr)135 uint64_t vcpu_get_guest_msr(const struct acrn_vcpu *vcpu, uint32_t msr)
136 {
137 	uint32_t index = vmsr_get_guest_msr_index(msr);
138 	uint64_t val = 0UL;
139 
140 	if (index < NUM_EMULATED_MSRS) {
141 		val = vcpu->arch.guest_msrs[index];
142 	}
143 
144 	return val;
145 }
146 
vcpu_set_guest_msr(struct acrn_vcpu * vcpu,uint32_t msr,uint64_t val)147 void vcpu_set_guest_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t val)
148 {
149 	uint32_t index = vmsr_get_guest_msr_index(msr);
150 
151 	if (index < NUM_EMULATED_MSRS) {
152 		vcpu->arch.guest_msrs[index] = val;
153 	}
154 }
155 
156 /*
157  * Write the eoi_exit_bitmaps to VMCS fields
158  */
vcpu_set_vmcs_eoi_exit(const struct acrn_vcpu * vcpu)159 void vcpu_set_vmcs_eoi_exit(const struct acrn_vcpu *vcpu)
160 {
161 	pr_dbg("%s", __func__);
162 
163 	if (is_apicv_advanced_feature_supported()) {
164 		exec_vmwrite64(VMX_EOI_EXIT0_FULL, vcpu->arch.eoi_exit_bitmap[0]);
165 		exec_vmwrite64(VMX_EOI_EXIT1_FULL, vcpu->arch.eoi_exit_bitmap[1]);
166 		exec_vmwrite64(VMX_EOI_EXIT2_FULL, vcpu->arch.eoi_exit_bitmap[2]);
167 		exec_vmwrite64(VMX_EOI_EXIT3_FULL, vcpu->arch.eoi_exit_bitmap[3]);
168 	}
169 }
170 
171 /*
172  * Set the eoi_exit_bitmap bit for specific vector
173  * @pre vcpu != NULL && vector <= 255U
174  */
vcpu_set_eoi_exit_bitmap(struct acrn_vcpu * vcpu,uint32_t vector)175 void vcpu_set_eoi_exit_bitmap(struct acrn_vcpu *vcpu, uint32_t vector)
176 {
177 	pr_dbg("%s", __func__);
178 
179 	if (!bitmap_test_and_set_lock((uint16_t)(vector & 0x3fU),
180 			&(vcpu->arch.eoi_exit_bitmap[(vector & 0xffU) >> 6U]))) {
181 		vcpu_make_request(vcpu, ACRN_REQUEST_EOI_EXIT_BITMAP_UPDATE);
182 	}
183 }
184 
vcpu_clear_eoi_exit_bitmap(struct acrn_vcpu * vcpu,uint32_t vector)185 void vcpu_clear_eoi_exit_bitmap(struct acrn_vcpu *vcpu, uint32_t vector)
186 {
187 	pr_dbg("%s", __func__);
188 
189 	if (bitmap_test_and_clear_lock((uint16_t)(vector & 0x3fU),
190 			&(vcpu->arch.eoi_exit_bitmap[(vector & 0xffU) >> 6U]))) {
191 		vcpu_make_request(vcpu, ACRN_REQUEST_EOI_EXIT_BITMAP_UPDATE);
192 	}
193 }
194 
195 /*
196  * Reset all eoi_exit_bitmaps
197  */
vcpu_reset_eoi_exit_bitmaps(struct acrn_vcpu * vcpu)198 void vcpu_reset_eoi_exit_bitmaps(struct acrn_vcpu *vcpu)
199 {
200 	pr_dbg("%s", __func__);
201 
202 	(void)memset((void *)(vcpu->arch.eoi_exit_bitmap), 0U, sizeof(vcpu->arch.eoi_exit_bitmap));
203 	vcpu_make_request(vcpu, ACRN_REQUEST_EOI_EXIT_BITMAP_UPDATE);
204 }
205 
init_iwkey(struct acrn_vcpu * vcpu)206 static void init_iwkey(struct acrn_vcpu *vcpu)
207 {
208 	/* Initial a random iwkey */
209 	if (pcpu_has_cap(X86_FEATURE_KEYLOCKER)) {
210 		vcpu->arch.IWKey.integrity_key[0] = get_random_value();
211 		vcpu->arch.IWKey.integrity_key[1] = get_random_value();
212 		vcpu->arch.IWKey.encryption_key[0] = get_random_value();
213 		vcpu->arch.IWKey.encryption_key[1] = get_random_value();
214 		vcpu->arch.IWKey.encryption_key[2] = get_random_value();
215 		vcpu->arch.IWKey.encryption_key[3] = get_random_value();
216 		/* It's always safe to clear whose_iwkey */
217 		per_cpu(whose_iwkey, pcpuid_from_vcpu(vcpu)) = NULL;
218 	}
219 }
220 
load_iwkey(struct acrn_vcpu * vcpu)221 void load_iwkey(struct acrn_vcpu *vcpu)
222 {
223 	uint64_t xmm_save[6];
224 
225 	/* Only load IWKey with vCPU CR4 keylocker bit enabled */
226 	if (pcpu_has_cap(X86_FEATURE_KEYLOCKER) && vcpu->arch.cr4_kl_enabled &&
227 	    (get_cpu_var(whose_iwkey) != vcpu)) {
228 		/* Save/restore xmm0/xmm1/xmm2 during the process */
229 		read_xmm_0_2(&xmm_save[0], &xmm_save[2], &xmm_save[4]);
230 		write_xmm_0_2(&vcpu->arch.IWKey.integrity_key[0], &vcpu->arch.IWKey.encryption_key[0],
231 						&vcpu->arch.IWKey.encryption_key[2]);
232 		asm_loadiwkey(0);
233 		write_xmm_0_2(&xmm_save[0], &xmm_save[2], &xmm_save[4]);
234 		get_cpu_var(whose_iwkey) = vcpu;
235 	}
236 }
237 
238 /* As a vcpu reset internal API, DO NOT touch any vcpu state transition in this function. */
vcpu_reset_internal(struct acrn_vcpu * vcpu,enum reset_mode mode)239 static void vcpu_reset_internal(struct acrn_vcpu *vcpu, enum reset_mode mode)
240 {
241 	int32_t i;
242 	struct acrn_vlapic *vlapic;
243 
244 	vcpu->launched = false;
245 	vcpu->arch.nr_sipi = 0U;
246 
247 	vcpu->arch.exception_info.exception = VECTOR_INVALID;
248 	vcpu->arch.cur_context = NORMAL_WORLD;
249 	vcpu->arch.lapic_pt_enabled = false;
250 	vcpu->arch.irq_window_enabled = false;
251 	vcpu->arch.emulating_lock = false;
252 	(void)memset((void *)vcpu->arch.vmcs, 0U, PAGE_SIZE);
253 
254 	for (i = 0; i < NR_WORLD; i++) {
255 		(void)memset((void *)(&vcpu->arch.contexts[i]), 0U,
256 			sizeof(struct run_context));
257 	}
258 
259 	vlapic = vcpu_vlapic(vcpu);
260 	vlapic_reset(vlapic, apicv_ops, mode);
261 
262 	reset_vcpu_regs(vcpu, mode);
263 
264 	for (i = 0; i < VCPU_EVENT_NUM; i++) {
265 		reset_event(&vcpu->events[i]);
266 	}
267 
268 	init_iwkey(vcpu);
269 	vcpu->arch.iwkey_copy_status = 0UL;
270 }
271 
get_running_vcpu(uint16_t pcpu_id)272 struct acrn_vcpu *get_running_vcpu(uint16_t pcpu_id)
273 {
274 	struct thread_object *curr = sched_get_current(pcpu_id);
275 	struct acrn_vcpu *vcpu = NULL;
276 
277 	if ((curr != NULL) && (!is_idle_thread(curr))) {
278 		vcpu = container_of(curr, struct acrn_vcpu, thread_obj);
279 	}
280 
281 	return vcpu;
282 }
283 
get_ever_run_vcpu(uint16_t pcpu_id)284 struct acrn_vcpu *get_ever_run_vcpu(uint16_t pcpu_id)
285 {
286 	return per_cpu(ever_run_vcpu, pcpu_id);
287 }
288 
set_vcpu_mode(struct acrn_vcpu * vcpu,uint32_t cs_attr,uint64_t ia32_efer,uint64_t cr0)289 static void set_vcpu_mode(struct acrn_vcpu *vcpu, uint32_t cs_attr, uint64_t ia32_efer,
290 		uint64_t cr0)
291 {
292 	if ((ia32_efer & MSR_IA32_EFER_LMA_BIT) != 0UL) {
293 		if ((cs_attr & 0x2000U) != 0U) {
294 			/* CS.L = 1 */
295 			vcpu->arch.cpu_mode = CPU_MODE_64BIT;
296 		} else {
297 			vcpu->arch.cpu_mode = CPU_MODE_COMPATIBILITY;
298 		}
299 	} else if ((cr0 & CR0_PE) != 0UL) {
300 		vcpu->arch.cpu_mode = CPU_MODE_PROTECTED;
301 	} else {
302 		vcpu->arch.cpu_mode = CPU_MODE_REAL;
303 	}
304 }
305 
init_xsave(struct acrn_vcpu * vcpu)306 static void init_xsave(struct acrn_vcpu *vcpu)
307 {
308 	struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
309 	struct xsave_area *area = &ectx->xs_area;
310 
311 	/* if the HW has this cap, we need to prepare the buffer for potential save/restore.
312 	 *  Guest may or may not enable XSAVE -- it doesn't matter.
313 	 */
314 	if (pcpu_has_cap(X86_FEATURE_XSAVE)) {
315 		ectx->xcr0 = XSAVE_FPU;
316 		(void)memset((void *)area, 0U, XSAVE_STATE_AREA_SIZE);
317 
318 		/* xsaves only support compacted format, so set it in xcomp_bv[63],
319 		 * keep the reset area in header area as zero.
320 		 */
321 		ectx->xs_area.xsave_hdr.hdr.xcomp_bv |= XSAVE_COMPACTED_FORMAT;
322 	}
323 }
324 
set_vcpu_regs(struct acrn_vcpu * vcpu,struct acrn_regs * vcpu_regs)325 void set_vcpu_regs(struct acrn_vcpu *vcpu, struct acrn_regs *vcpu_regs)
326 {
327 	struct ext_context *ectx;
328 	struct run_context *ctx;
329 	uint16_t *sel = &(vcpu_regs->cs_sel);
330 	struct segment_sel *seg;
331 	uint32_t limit, attr;
332 
333 	ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
334 	ctx = &(vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx);
335 
336 	/* NOTE:
337 	 * This is to set the attr and limit to default value.
338 	 * If the set_vcpu_regs is used not only for vcpu state
339 	 * initialization, this part of code needs be revised.
340 	 */
341 	if ((vcpu_regs->cr0 & CR0_PE) != 0UL) {
342 		attr = PROTECTED_MODE_DATA_SEG_AR;
343 		limit = PROTECTED_MODE_SEG_LIMIT;
344 	} else {
345 		attr = REAL_MODE_DATA_SEG_AR;
346 		limit = REAL_MODE_SEG_LIMIT;
347 	}
348 
349 	for (seg = &(ectx->cs); seg <= &(ectx->gs); seg++) {
350 		seg->base     = 0UL;
351 		seg->limit    = limit;
352 		seg->attr     = attr;
353 		seg->selector = *sel;
354 		sel++;
355 	}
356 
357 	/* override cs attr/base/limit */
358 	ectx->cs.attr = vcpu_regs->cs_ar;
359 	ectx->cs.base = vcpu_regs->cs_base;
360 	ectx->cs.limit = vcpu_regs->cs_limit;
361 
362 	ectx->gdtr.base = vcpu_regs->gdt.base;
363 	ectx->gdtr.limit = vcpu_regs->gdt.limit;
364 
365 	ectx->idtr.base = vcpu_regs->idt.base;
366 	ectx->idtr.limit = vcpu_regs->idt.limit;
367 
368 	ectx->ldtr.selector = vcpu_regs->ldt_sel;
369 	ectx->tr.selector = vcpu_regs->tr_sel;
370 
371 	/* NOTE:
372 	 * This is to set the ldtr and tr to default value.
373 	 * If the set_vcpu_regs is used not only for vcpu state
374 	 * initialization, this part of code needs be revised.
375 	 */
376 	ectx->ldtr.base = 0UL;
377 	ectx->tr.base = 0UL;
378 	ectx->ldtr.limit = 0xFFFFU;
379 	ectx->tr.limit = 0xFFFFU;
380 	ectx->ldtr.attr = LDTR_AR;
381 	ectx->tr.attr = TR_AR;
382 
383 	(void)memcpy_s((void *)&(ctx->cpu_regs), sizeof(struct acrn_gp_regs),
384 			(void *)&(vcpu_regs->gprs), sizeof(struct acrn_gp_regs));
385 
386 	vcpu_set_rip(vcpu, vcpu_regs->rip);
387 	vcpu_set_efer(vcpu, vcpu_regs->ia32_efer);
388 	vcpu_set_rsp(vcpu, vcpu_regs->gprs.rsp);
389 
390 	if (vcpu_regs->rflags == 0UL) {
391 		vcpu_set_rflags(vcpu, 0x02UL);
392 	} else {
393 		vcpu_set_rflags(vcpu, vcpu_regs->rflags & ~(0x8d5UL));
394 	}
395 
396 	/* cr0, cr3 and cr4 needs be set without using API.
397 	 * The real cr0/cr3/cr4 writing will be delayed to
398 	 * init_vmcs
399 	 */
400 	ctx->cr0 = vcpu_regs->cr0;
401 	ectx->cr3 = vcpu_regs->cr3;
402 	ctx->cr4 = vcpu_regs->cr4;
403 
404 	set_vcpu_mode(vcpu, vcpu_regs->cs_ar, vcpu_regs->ia32_efer,
405 			vcpu_regs->cr0);
406 }
407 
408 static struct acrn_regs realmode_init_vregs = {
409 	.gdt = {
410 		.limit = 0xFFFFU,
411 		.base = 0UL,
412 	},
413 	.idt = {
414 		.limit = 0xFFFFU,
415 		.base = 0UL,
416 	},
417 	.cs_ar = REAL_MODE_CODE_SEG_AR,
418 	.cs_sel = REAL_MODE_BSP_INIT_CODE_SEL,
419 	.cs_base = 0xFFFF0000UL,
420 	.cs_limit = 0xFFFFU,
421 	.rip = 0xFFF0UL,
422 	.cr0 = CR0_ET | CR0_NE,
423 	.cr3 = 0UL,
424 	.cr4 = 0UL,
425 };
426 
427 static uint64_t init_vgdt[] = {
428 	0x0UL,
429 	0x0UL,
430 	0x00CF9B000000FFFFUL,   /* Linear Code */
431 	0x00CF93000000FFFFUL,   /* Linear Data */
432 };
433 
434 static struct acrn_regs protect_mode_init_vregs = {
435 	.cs_ar = PROTECTED_MODE_CODE_SEG_AR,
436 	.cs_limit = PROTECTED_MODE_SEG_LIMIT,
437 	.cs_sel = 0x10U,
438 	.cr0 = CR0_ET | CR0_NE | CR0_PE,
439 	.ds_sel = 0x18U,
440 	.ss_sel = 0x18U,
441 	.es_sel = 0x18U,
442 };
443 
sanitize_cr0_cr4_pattern(void)444 bool sanitize_cr0_cr4_pattern(void)
445 {
446 	bool ret = false;
447 
448 	if (is_valid_cr0_cr4(realmode_init_vregs.cr0, realmode_init_vregs.cr4) &&
449 			is_valid_cr0_cr4(protect_mode_init_vregs.cr0, protect_mode_init_vregs.cr4)) {
450 		ret = true;
451 	} else {
452 		pr_err("Wrong CR0/CR4 pattern: real %lx %lx; protected %lx %lx\n", realmode_init_vregs.cr0,
453 			realmode_init_vregs.cr4, protect_mode_init_vregs.cr0, protect_mode_init_vregs.cr4);
454 	}
455 	return ret;
456 }
457 
reset_vcpu_regs(struct acrn_vcpu * vcpu,enum reset_mode mode)458 void reset_vcpu_regs(struct acrn_vcpu *vcpu, enum reset_mode mode)
459 {
460 	set_vcpu_regs(vcpu, &realmode_init_vregs);
461 
462 	/*
463 	 * According to SDM Vol3 "Table 9-1. IA-32 and Intel 64 Processor States Following Power-up, Reset, or INIT",
464 	 * for some registers, the state following INIT is different from the state following Power-up, Reset.
465 	 * (For all registers mentioned in Table 9-1, the state following Power-up and following Reset are same.)
466 	 *
467 	 * To distinguish this kind of case for vCPU:
468 	 *  - If the state following INIT is same as the state following Power-up/Reset, handle it in
469 	 *    set_vcpu_regs above.
470 	 *  - Otherwise, handle it below.
471 	 */
472 	if (mode != INIT_RESET) {
473 		struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
474 
475 		/* IA32_TSC_AUX: 0 following Power-up/Reset, unchanged following INIT */
476 		ectx->tsc_aux = 0UL;
477 	}
478 }
479 
init_vcpu_protect_mode_regs(struct acrn_vcpu * vcpu,uint64_t vgdt_base_gpa)480 void init_vcpu_protect_mode_regs(struct acrn_vcpu *vcpu, uint64_t vgdt_base_gpa)
481 {
482 	struct acrn_regs vcpu_regs;
483 
484 	(void)memcpy_s((void *)&vcpu_regs, sizeof(struct acrn_regs),
485 		(void *)&protect_mode_init_vregs, sizeof(struct acrn_regs));
486 
487 	vcpu_regs.gdt.base = vgdt_base_gpa;
488 	vcpu_regs.gdt.limit = sizeof(init_vgdt) - 1U;
489 	(void)copy_to_gpa(vcpu->vm, &init_vgdt, vgdt_base_gpa, sizeof(init_vgdt));
490 
491 	set_vcpu_regs(vcpu, &vcpu_regs);
492 }
493 
set_vcpu_startup_entry(struct acrn_vcpu * vcpu,uint64_t entry)494 void set_vcpu_startup_entry(struct acrn_vcpu *vcpu, uint64_t entry)
495 {
496 	struct ext_context *ectx;
497 
498 	ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
499 	ectx->cs.selector = (uint16_t)((entry >> 4U) & 0xFFFFU);
500 	ectx->cs.base = ectx->cs.selector << 4U;
501 
502 	vcpu_set_rip(vcpu, 0UL);
503 }
504 
505 /*
506  * @pre vm != NULL && rtn_vcpu_handle != NULL
507  */
create_vcpu(uint16_t pcpu_id,struct acrn_vm * vm,struct acrn_vcpu ** rtn_vcpu_handle)508 int32_t create_vcpu(uint16_t pcpu_id, struct acrn_vm *vm, struct acrn_vcpu **rtn_vcpu_handle)
509 {
510 	struct acrn_vcpu *vcpu;
511 	uint16_t vcpu_id;
512 	int32_t ret;
513 
514 	pr_info("Creating VCPU working on PCPU%hu", pcpu_id);
515 
516 	/*
517 	 * vcpu->vcpu_id = vm->hw.created_vcpus;
518 	 * vm->hw.created_vcpus++;
519 	 */
520 	vcpu_id = vm->hw.created_vcpus;
521 	if (vcpu_id < MAX_VCPUS_PER_VM) {
522 		/* Allocate memory for VCPU */
523 		vcpu = &(vm->hw.vcpu_array[vcpu_id]);
524 		(void)memset((void *)vcpu, 0U, sizeof(struct acrn_vcpu));
525 
526 		/* Initialize CPU ID for this VCPU */
527 		vcpu->vcpu_id = vcpu_id;
528 		per_cpu(ever_run_vcpu, pcpu_id) = vcpu;
529 
530 		if (is_lapic_pt_configured(vm) || is_using_init_ipi()) {
531 			/* Lapic_pt pCPU does not enable irq in root mode. So it
532 			 * should be set to PAUSE idle mode.
533 			 * At this point the pCPU is possibly in HLT idle. And the
534 			 * kick mode is to be set to INIT kick, which will not be
535 			 * able to wake root mode HLT. So a kick(if pCPU is in HLT
536 			 * idle, the kick mode is certainly ipi kick) will change
537 			 * it to PAUSE idle right away.
538 			 */
539 			if (per_cpu(mode_to_idle, pcpu_id) == IDLE_MODE_HLT) {
540 				per_cpu(mode_to_idle, pcpu_id) = IDLE_MODE_PAUSE;
541 				kick_pcpu(pcpu_id);
542 			}
543 			per_cpu(mode_to_kick_pcpu, pcpu_id) = DEL_MODE_INIT;
544 		} else {
545 			per_cpu(mode_to_kick_pcpu, pcpu_id) = DEL_MODE_IPI;
546 			per_cpu(mode_to_idle, pcpu_id) = IDLE_MODE_HLT;
547 		}
548 		pr_info("pcpu=%d, kick-mode=%d, use_init_flag=%d", pcpu_id,
549 			per_cpu(mode_to_kick_pcpu, pcpu_id), is_using_init_ipi());
550 
551 		/* Initialize the parent VM reference */
552 		vcpu->vm = vm;
553 
554 		/* Initialize the virtual ID for this VCPU */
555 		/* FIXME:
556 		 * We have assumption that we always destroys vcpus in one
557 		 * shot (like when vm is destroyed). If we need to support
558 		 * specific vcpu destroy on fly, this vcpu_id assignment
559 		 * needs revise.
560 		 */
561 
562 		pr_info("Create VM%d-VCPU%d, Role: %s",
563 				vcpu->vm->vm_id, vcpu->vcpu_id,
564 				is_vcpu_bsp(vcpu) ? "PRIMARY" : "SECONDARY");
565 
566 		/*
567 		 * If the logical processor is in VMX non-root operation and
568 		 * the "enable VPID" VM-execution control is 1, the current VPID
569 		 * is the value of the VPID VM-execution control field in the VMCS.
570 		 *
571 		 * This assignment guarantees a unique non-zero per vcpu vpid at runtime.
572 		 */
573 		vcpu->arch.vpid = ALLOCATED_MIN_L1_VPID + (vm->vm_id * MAX_VCPUS_PER_VM) + vcpu->vcpu_id;
574 
575 		/*
576 		 * There are two locally independent writing operations, namely the
577 		 * assignment of vcpu->vm and vcpu_array[]. Compilers may optimize
578 		 * and reorder writing operations while users of vcpu_array[] may
579 		 * assume the presence of vcpu->vm. A compiler barrier is added here
580 		 * to prevent compiler reordering, ensuring that assignments to
581 		 * vcpu->vm precede vcpu_array[].
582 		 */
583 		cpu_compiler_barrier();
584 
585 		/*
586 		 * ACRN uses the following approach to manage VT-d PI notification vectors:
587 		 * Allocate unique Activation Notification Vectors (ANV) for each vCPU that
588 		 * belongs to the same pCPU, the ANVs need only be unique within each pCPU,
589 		 * not across all vCPUs. The max numbers of vCPUs may be running on top of
590 		 * a pCPU is CONFIG_MAX_VM_NUM, since ACRN does not support 2 vCPUs of same
591 		 * VM running on top of same pCPU. This reduces # of pre-allocated ANVs for
592 		 * posted interrupts to CONFIG_MAX_VM_NUM, and enables ACRN to avoid switching
593 		 * between active and wake-up vector values in the posted interrupt descriptor
594 		 * on vCPU scheduling state changes.
595 		 *
596 		 * We maintain a per-pCPU array of vCPUs, and use vm_id as the index to the
597 		 * vCPU array
598 		 */
599 		per_cpu(vcpu_array, pcpu_id)[vm->vm_id] = vcpu;
600 
601 		/*
602 		 * Use vm_id as the index to indicate the posted interrupt IRQ/vector pair that are
603 		 * assigned to this vCPU:
604 		 * 0: first posted interrupt IRQs/vector pair (POSTED_INTR_IRQ/POSTED_INTR_VECTOR)
605 		 * ...
606 		 * CONFIG_MAX_VM_NUM-1: last posted interrupt IRQs/vector pair
607 		 * ((POSTED_INTR_IRQ + CONFIG_MAX_VM_NUM - 1U)/(POSTED_INTR_VECTOR + CONFIG_MAX_VM_NUM - 1U)
608 		 */
609 		vcpu->arch.pid.control.bits.nv = POSTED_INTR_VECTOR + vm->vm_id;
610 
611 		/* ACRN does not support vCPU migration, one vCPU always runs on
612 		 * the same pCPU, so PI's ndst is never changed after startup.
613 		 */
614 		vcpu->arch.pid.control.bits.ndst = per_cpu(lapic_id, pcpu_id);
615 
616 		/* Create per vcpu vlapic */
617 		vlapic_create(vcpu, pcpu_id);
618 
619 		if (!vm_hide_mtrr(vm)) {
620 			init_vmtrr(vcpu);
621 		}
622 
623 		/* Populate the return handle */
624 		*rtn_vcpu_handle = vcpu;
625 		vcpu_set_state(vcpu, VCPU_INIT);
626 
627 		init_xsave(vcpu);
628 		vcpu_reset_internal(vcpu, POWER_ON_RESET);
629 		(void)memset((void *)&vcpu->req, 0U, sizeof(struct io_request));
630 		vm->hw.created_vcpus++;
631 		ret = 0;
632 	} else {
633 		pr_err("%s, vcpu id is invalid!\n", __func__);
634 		ret = -EINVAL;
635 	}
636 
637 	return ret;
638 }
639 
640 /**
641  * @pre ctx != NULL
642  */
exec_vmentry(struct run_context * ctx,int32_t launch_type,int32_t ibrs_type)643 static inline int32_t exec_vmentry(struct run_context *ctx, int32_t launch_type, int32_t ibrs_type)
644 {
645 #ifdef CONFIG_L1D_FLUSH_VMENTRY_ENABLED
646 	cpu_l1d_flush();
647 #endif
648 
649 	/* Mitigation for MDS vulnerability, overwrite CPU internal buffers */
650 	cpu_internal_buffers_clear();
651 
652 	return vmx_vmrun(ctx, launch_type, ibrs_type);
653 }
654 
655 /*
656  * @pre vcpu != NULL
657  */
write_cached_registers(struct acrn_vcpu * vcpu)658 static void write_cached_registers(struct acrn_vcpu *vcpu)
659 {
660 	struct run_context *ctx =
661 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
662 
663 	if (bitmap_test_and_clear_nolock(CPU_REG_RIP, &vcpu->reg_updated)) {
664 		exec_vmwrite(VMX_GUEST_RIP, ctx->rip);
665 	}
666 	if (bitmap_test_and_clear_nolock(CPU_REG_RSP, &vcpu->reg_updated)) {
667 		exec_vmwrite(VMX_GUEST_RSP, ctx->cpu_regs.regs.rsp);
668 	}
669 	if (bitmap_test_and_clear_nolock(CPU_REG_EFER, &vcpu->reg_updated)) {
670 		exec_vmwrite64(VMX_GUEST_IA32_EFER_FULL, ctx->ia32_efer);
671 	}
672 	if (bitmap_test_and_clear_nolock(CPU_REG_RFLAGS, &vcpu->reg_updated)) {
673 		exec_vmwrite(VMX_GUEST_RFLAGS, ctx->rflags);
674 	}
675 
676 	/*
677 	 * Currently, updating CR0/CR4 here is only designed for world
678 	 * switching. There should no other module request updating
679 	 * CR0/CR4 here.
680 	 */
681 	if (bitmap_test_and_clear_nolock(CPU_REG_CR0, &vcpu->reg_updated)) {
682 		vcpu_set_cr0(vcpu, ctx->cr0);
683 	}
684 
685 	if (bitmap_test_and_clear_nolock(CPU_REG_CR4, &vcpu->reg_updated)) {
686 		vcpu_set_cr4(vcpu, ctx->cr4);
687 	}
688 }
689 
690 /*
691  * @pre vcpu != NULL
692  */
run_vcpu(struct acrn_vcpu * vcpu)693 int32_t run_vcpu(struct acrn_vcpu *vcpu)
694 {
695 	uint32_t cs_attr;
696 	uint64_t ia32_efer, cr0;
697 	struct run_context *ctx =
698 		&vcpu->arch.contexts[vcpu->arch.cur_context].run_ctx;
699 	int32_t status = 0;
700 	int32_t ibrs_type = get_ibrs_type();
701 
702 	if (vcpu->reg_updated != 0UL) {
703 		write_cached_registers(vcpu);
704 	}
705 
706 	if (is_vcpu_in_l2_guest(vcpu)) {
707 		int32_t launch_type;
708 
709 		if (vcpu->launched) {
710 			/* for nested VM-exits that don't need to be reflected to L1 hypervisor */
711 			launch_type = VM_RESUME;
712 		} else {
713 			/* for VMEntry case, VMCS02 was VMCLEARed by ACRN */
714 			launch_type = VM_LAUNCH;
715 			vcpu->launched = true;
716 		}
717 
718 		status = exec_vmentry(ctx, launch_type, ibrs_type);
719 	} else {
720 		/* If this VCPU is not already launched, launch it */
721 		if (!vcpu->launched) {
722 			pr_info("VM %d Starting VCPU %hu",
723 					vcpu->vm->vm_id, vcpu->vcpu_id);
724 
725 			if (vcpu->arch.vpid != 0U) {
726 				exec_vmwrite16(VMX_VPID, vcpu->arch.vpid);
727 			}
728 
729 			/*
730 			 * A power-up or a reset invalidates all linear mappings,
731 			 * guest-physical mappings, and combined mappings
732 			 */
733 			flush_vpid_global();
734 
735 #ifdef CONFIG_HYPERV_ENABLED
736 			if (is_vcpu_bsp(vcpu)) {
737 				hyperv_init_time(vcpu->vm);
738 			}
739 #endif
740 
741 			/* Set vcpu launched */
742 			vcpu->launched = true;
743 
744 			/* avoid VMCS recycling RSB usage, set IBPB.
745 			 * NOTE: this should be done for any time vmcs got switch
746 			 * currently, there is no other place to do vmcs switch
747 			 * Please add IBPB set for future vmcs switch case(like trusty)
748 			 */
749 			if (ibrs_type == IBRS_RAW) {
750 				msr_write(MSR_IA32_PRED_CMD, PRED_SET_IBPB);
751 			}
752 
753 			/* Launch the VM */
754 			status = exec_vmentry(ctx, VM_LAUNCH, ibrs_type);
755 
756 			/* See if VM launched successfully */
757 			if (status == 0) {
758 				if (is_vcpu_bsp(vcpu)) {
759 					pr_info("VM %d VCPU %hu successfully launched",
760 						vcpu->vm->vm_id, vcpu->vcpu_id);
761 				}
762 			}
763 		} else {
764 			/* This VCPU was already launched, check if the last guest
765 			 * instruction needs to be repeated and resume VCPU accordingly
766 			 */
767 			if (vcpu->arch.inst_len != 0U) {
768 				exec_vmwrite(VMX_GUEST_RIP, vcpu_get_rip(vcpu) + vcpu->arch.inst_len);
769 			}
770 
771 			/* Resume the VM */
772 			status = exec_vmentry(ctx, VM_RESUME, ibrs_type);
773 		}
774 
775 		cs_attr = exec_vmread32(VMX_GUEST_CS_ATTR);
776 		ia32_efer = vcpu_get_efer(vcpu);
777 		cr0 = vcpu_get_cr0(vcpu);
778 		set_vcpu_mode(vcpu, cs_attr, ia32_efer, cr0);
779 	}
780 
781 	vcpu->reg_cached = 0UL;
782 
783 	/* Obtain current VCPU instruction length */
784 	vcpu->arch.inst_len = exec_vmread32(VMX_EXIT_INSTR_LEN);
785 
786 	/* Obtain VM exit reason */
787 	vcpu->arch.exit_reason = exec_vmread32(VMX_EXIT_REASON);
788 
789 	if (status != 0) {
790 		/* refer to 64-ia32 spec section 24.9.1 volume#3 */
791 		if ((vcpu->arch.exit_reason & VMX_VMENTRY_FAIL) != 0U) {
792 			pr_fatal("vmentry fail reason=%lx", vcpu->arch.exit_reason);
793 		} else {
794 			pr_fatal("vmexit fail err_inst=%x", exec_vmread32(VMX_INSTR_ERROR));
795 		}
796 
797 		ASSERT(status == 0, "vm fail");
798 	}
799 
800 	return status;
801 }
802 
803 /*
804  *  @pre vcpu != NULL
805  *  @pre vcpu->state == VCPU_ZOMBIE
806  */
offline_vcpu(struct acrn_vcpu * vcpu)807 void offline_vcpu(struct acrn_vcpu *vcpu)
808 {
809 	vlapic_free(vcpu);
810 	per_cpu(ever_run_vcpu, pcpuid_from_vcpu(vcpu)) = NULL;
811 
812 	/* This operation must be atomic to avoid contention with posted interrupt handler */
813 	per_cpu(vcpu_array, pcpuid_from_vcpu(vcpu))[vcpu->vm->vm_id] = NULL;
814 
815 	vcpu_set_state(vcpu, VCPU_OFFLINE);
816 }
817 
kick_vcpu(struct acrn_vcpu * vcpu)818 void kick_vcpu(struct acrn_vcpu *vcpu)
819 {
820 	uint16_t pcpu_id = pcpuid_from_vcpu(vcpu);
821 
822 	if ((get_pcpu_id() != pcpu_id) && (per_cpu(vmcs_run, pcpu_id) == vcpu->arch.vmcs)) {
823 		kick_pcpu(pcpu_id);
824 	}
825 }
826 
827 /*
828  * @pre (&vcpu->stack[CONFIG_STACK_SIZE] & (CPU_STACK_ALIGN - 1UL)) == 0
829  */
build_stack_frame(struct acrn_vcpu * vcpu)830 static uint64_t build_stack_frame(struct acrn_vcpu *vcpu)
831 {
832 	uint64_t stacktop = (uint64_t)&vcpu->stack[CONFIG_STACK_SIZE];
833 	struct stack_frame *frame;
834 	uint64_t *ret;
835 
836 	frame = (struct stack_frame *)stacktop;
837 	frame -= 1;
838 
839 	frame->magic = SP_BOTTOM_MAGIC;
840 	frame->rip = (uint64_t)vcpu->thread_obj.thread_entry; /*return address*/
841 	frame->rflag = 0UL;
842 	frame->rbx = 0UL;
843 	frame->rbp = 0UL;
844 	frame->r12 = 0UL;
845 	frame->r13 = 0UL;
846 	frame->r14 = 0UL;
847 	frame->r15 = 0UL;
848 	frame->rdi = (uint64_t)&vcpu->thread_obj;
849 
850 	ret = &frame->rdi;
851 
852 	return (uint64_t) ret;
853 }
854 
855 /* NOTE:
856  * vcpu should be paused before call this function.
857  * @pre vcpu != NULL
858  * @pre vcpu->state == VCPU_ZOMBIE
859  */
reset_vcpu(struct acrn_vcpu * vcpu,enum reset_mode mode)860 void reset_vcpu(struct acrn_vcpu *vcpu, enum reset_mode mode)
861 {
862 	pr_dbg("vcpu%hu reset", vcpu->vcpu_id);
863 
864 	vcpu_reset_internal(vcpu, mode);
865 	vcpu_set_state(vcpu, VCPU_INIT);
866 }
867 
zombie_vcpu(struct acrn_vcpu * vcpu,enum vcpu_state new_state)868 void zombie_vcpu(struct acrn_vcpu *vcpu, enum vcpu_state new_state)
869 {
870 	enum vcpu_state prev_state;
871 	uint16_t pcpu_id = pcpuid_from_vcpu(vcpu);
872 
873 	pr_dbg("vcpu%hu paused, new state: %d",	vcpu->vcpu_id, new_state);
874 
875 	if (((vcpu->state == VCPU_RUNNING) || (vcpu->state == VCPU_INIT)) && (new_state == VCPU_ZOMBIE)) {
876 		prev_state = vcpu->state;
877 		vcpu_set_state(vcpu, new_state);
878 
879 		if (prev_state == VCPU_RUNNING) {
880 			if (pcpu_id == get_pcpu_id()) {
881 				sleep_thread(&vcpu->thread_obj);
882 			} else {
883 				sleep_thread_sync(&vcpu->thread_obj);
884 			}
885 		}
886 	}
887 }
888 
save_xsave_area(__unused struct acrn_vcpu * vcpu,struct ext_context * ectx)889 void save_xsave_area(__unused struct acrn_vcpu *vcpu, struct ext_context *ectx)
890 {
891 	if (pcpu_has_cap(X86_FEATURE_XSAVES)) {
892 		ectx->xcr0 = read_xcr(0);
893 		write_xcr(0, ectx->xcr0 | XSAVE_SSE);
894 		xsaves(&ectx->xs_area, UINT64_MAX);
895 	}
896 }
897 
rstore_xsave_area(const struct acrn_vcpu * vcpu,const struct ext_context * ectx)898 void rstore_xsave_area(const struct acrn_vcpu *vcpu, const struct ext_context *ectx)
899 {
900 	if (pcpu_has_cap(X86_FEATURE_XSAVES)) {
901 		/*
902 		 * Restore XSAVE area if any of the following conditions is met:
903 		 * 1. "vcpu->launched" is false (state initialization for guest)
904 		 * 2. "vcpu->arch.xsave_enabled" is true (state restoring for guest)
905 		 *
906 		 * Before vCPU is launched, condition 1 is satisfied.
907 		 * After vCPU is launched, condition 2 is satisfied because
908 		 * that "vcpu->arch.xsave_enabled" is consistent with pcpu_has_cap(X86_FEATURE_XSAVES).
909 		 *
910 		 * Therefore, the check against "vcpu->launched" and "vcpu->arch.xsave_enabled" can be eliminated here.
911 		 */
912 		write_xcr(0, ectx->xcr0 | XSAVE_SSE);
913 		msr_write(MSR_IA32_XSS, vcpu_get_guest_msr(vcpu, MSR_IA32_XSS));
914 		xrstors(&ectx->xs_area, UINT64_MAX);
915 		write_xcr(0, ectx->xcr0);
916 	}
917 }
918 
919 /* TODO:
920  * Now we have switch_out and switch_in callbacks for each thread_object, and schedule
921  * will call them every thread switch. We can implement lazy context swtich , which
922  * only do context swtich when really need.
923  */
context_switch_out(struct thread_object * prev)924 static void context_switch_out(struct thread_object *prev)
925 {
926 	struct acrn_vcpu *vcpu = container_of(prev, struct acrn_vcpu, thread_obj);
927 	struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
928 
929 	/* We don't flush TLB as we assume each vcpu has different vpid */
930 	ectx->ia32_star = msr_read(MSR_IA32_STAR);
931 	ectx->ia32_cstar = msr_read(MSR_IA32_CSTAR);
932 	ectx->ia32_lstar = msr_read(MSR_IA32_LSTAR);
933 	ectx->ia32_fmask = msr_read(MSR_IA32_FMASK);
934 	ectx->ia32_kernel_gs_base = msr_read(MSR_IA32_KERNEL_GS_BASE);
935 	ectx->tsc_aux = msr_read(MSR_IA32_TSC_AUX);
936 
937 	save_xsave_area(vcpu, ectx);
938 }
939 
context_switch_in(struct thread_object * next)940 static void context_switch_in(struct thread_object *next)
941 {
942 	struct acrn_vcpu *vcpu = container_of(next, struct acrn_vcpu, thread_obj);
943 	struct ext_context *ectx = &(vcpu->arch.contexts[vcpu->arch.cur_context].ext_ctx);
944 	uint64_t vmsr_val;
945 
946 	load_vmcs(vcpu);
947 
948 	msr_write(MSR_IA32_STAR, ectx->ia32_star);
949 	msr_write(MSR_IA32_CSTAR, ectx->ia32_cstar);
950 	msr_write(MSR_IA32_LSTAR, ectx->ia32_lstar);
951 	msr_write(MSR_IA32_FMASK, ectx->ia32_fmask);
952 	msr_write(MSR_IA32_KERNEL_GS_BASE, ectx->ia32_kernel_gs_base);
953 	msr_write(MSR_IA32_TSC_AUX, ectx->tsc_aux);
954 
955 	if (pcpu_has_cap(X86_FEATURE_WAITPKG)) {
956 		vmsr_val = vcpu_get_guest_msr(vcpu, MSR_IA32_UMWAIT_CONTROL);
957 		if (vmsr_val != msr_read(MSR_IA32_UMWAIT_CONTROL)) {
958 			msr_write(MSR_IA32_UMWAIT_CONTROL, vmsr_val);
959 		}
960 	}
961 
962 	load_iwkey(vcpu);
963 
964 	rstore_xsave_area(vcpu, ectx);
965 }
966 
967 
968 /**
969  * @pre vcpu != NULL
970  * @pre vcpu->state == VCPU_INIT
971  */
launch_vcpu(struct acrn_vcpu * vcpu)972 void launch_vcpu(struct acrn_vcpu *vcpu)
973 {
974 	uint16_t pcpu_id = pcpuid_from_vcpu(vcpu);
975 
976 	pr_dbg("vcpu%hu scheduled on pcpu%hu", vcpu->vcpu_id, pcpu_id);
977 	vcpu_set_state(vcpu, VCPU_RUNNING);
978 	wake_thread(&vcpu->thread_obj);
979 
980 }
981 
982 /* help function for vcpu create */
prepare_vcpu(struct acrn_vm * vm,uint16_t pcpu_id)983 int32_t prepare_vcpu(struct acrn_vm *vm, uint16_t pcpu_id)
984 {
985 	int32_t ret, i;
986 	struct acrn_vcpu *vcpu = NULL;
987 	char thread_name[16];
988 
989 	ret = create_vcpu(pcpu_id, vm, &vcpu);
990 	if (ret == 0) {
991 		snprintf(thread_name, 16U, "vm%hu:vcpu%hu", vm->vm_id, vcpu->vcpu_id);
992 		(void)strncpy_s(vcpu->thread_obj.name, 16U, thread_name, 16U);
993 		vcpu->thread_obj.sched_ctl = &per_cpu(sched_ctl, pcpu_id);
994 		vcpu->thread_obj.thread_entry = vcpu_thread;
995 		vcpu->thread_obj.pcpu_id = pcpu_id;
996 		vcpu->thread_obj.host_sp = build_stack_frame(vcpu);
997 		vcpu->thread_obj.switch_out = context_switch_out;
998 		vcpu->thread_obj.switch_in = context_switch_in;
999 		init_thread_data(&vcpu->thread_obj, &get_vm_config(vm->vm_id)->sched_params);
1000 		for (i = 0; i < VCPU_EVENT_NUM; i++) {
1001 			init_event(&vcpu->events[i]);
1002 		}
1003 	}
1004 
1005 	return ret;
1006 }
1007 
1008 /**
1009  * @pre vcpu != NULL
1010  */
pcpuid_from_vcpu(const struct acrn_vcpu * vcpu)1011 uint16_t pcpuid_from_vcpu(const struct acrn_vcpu *vcpu)
1012 {
1013 	return sched_get_pcpuid(&vcpu->thread_obj);
1014 }
1015 
vcpumask2pcpumask(struct acrn_vm * vm,uint64_t vdmask)1016 uint64_t vcpumask2pcpumask(struct acrn_vm *vm, uint64_t vdmask)
1017 {
1018 	uint16_t vcpu_id;
1019 	uint64_t dmask = 0UL;
1020 	struct acrn_vcpu *vcpu;
1021 
1022 	for (vcpu_id = 0U; vcpu_id < vm->hw.created_vcpus; vcpu_id++) {
1023 		if ((vdmask & (1UL << vcpu_id)) != 0UL) {
1024 			vcpu = vcpu_from_vid(vm, vcpu_id);
1025 			bitmap_set_nolock(pcpuid_from_vcpu(vcpu), &dmask);
1026 		}
1027 	}
1028 
1029 	return dmask;
1030 }
1031 
1032 /*
1033  * @brief handle posted interrupts
1034  *
1035  * VT-d PI handler, find the corresponding vCPU for this IRQ,
1036  * if the associated PID's bit ON is set, wake it up.
1037  *
1038  * shutdown_vm would unregister the devices before offline_vcpu is called,
1039  * so spinlock is not needed to protect access to vcpu_array and vcpu.
1040  *
1041  * @pre (vcpu_index < CONFIG_MAX_VM_NUM) && (get_pi_desc(get_cpu_var(vcpu_array)[vcpu_index]) != NULL)
1042  */
vcpu_handle_pi_notification(uint32_t vcpu_index)1043 void vcpu_handle_pi_notification(uint32_t vcpu_index)
1044 {
1045 	struct acrn_vcpu *vcpu = get_cpu_var(vcpu_array)[vcpu_index];
1046 
1047 	ASSERT(vcpu_index < CONFIG_MAX_VM_NUM, "");
1048 
1049 	if (vcpu != NULL) {
1050 		struct pi_desc *pid = get_pi_desc(vcpu);
1051 
1052 		if (bitmap_test(POSTED_INTR_ON, &(pid->control.value))) {
1053 			/*
1054 			 * Perform same as vlapic_accept_intr():
1055 			 * Wake up the waiting thread, set the NEED_RESCHEDULE flag,
1056 			 * at a point schedule() will be called to make scheduling decisions.
1057 			 *
1058 			 * Record this request as ACRN_REQUEST_EVENT,
1059 			 * so that vlapic_inject_intr() will sync PIR to vIRR
1060 			 */
1061 			vcpu_make_request(vcpu, ACRN_REQUEST_EVENT);
1062 			signal_event(&vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]);
1063 		}
1064 	}
1065 }
1066 
1067 /*
1068  * @brief Update the state of vCPU and state of vlapic
1069  *
1070  * The vlapic state of VM shall be updated for some vCPU
1071  * state update cases, such as from VCPU_INIT to VCPU_RUNNING.
1072 
1073  * @pre (vcpu != NULL)
1074  */
vcpu_set_state(struct acrn_vcpu * vcpu,enum vcpu_state new_state)1075 void vcpu_set_state(struct acrn_vcpu *vcpu, enum vcpu_state new_state)
1076 {
1077 	vcpu->state = new_state;
1078 	update_vm_vlapic_state(vcpu->vm);
1079 }
1080