1 /*
2 * Copyright (C) 2018-2022 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <types.h>
8 #include <asm/lib/bits.h>
9 #include <asm/page.h>
10 #include <asm/e820.h>
11 #include <asm/mmu.h>
12 #include <asm/guest/ept.h>
13 #include <asm/guest/vept.h>
14 #include <asm/vtd.h>
15 #include <asm/lapic.h>
16 #include <asm/irq.h>
17 #include <asm/per_cpu.h>
18 #include <asm/cpufeatures.h>
19 #include <asm/cpu_caps.h>
20 #include <acpi.h>
21 #include <asm/ioapic.h>
22 #include <asm/trampoline.h>
23 #include <asm/cpuid.h>
24 #include <version.h>
25 #include <asm/vmx.h>
26 #include <asm/msr.h>
27 #include <asm/host_pm.h>
28 #include <ptdev.h>
29 #include <logmsg.h>
30 #include <asm/rdt.h>
31 #include <asm/sgx.h>
32 #include <uart16550.h>
33 #include <vpci.h>
34 #include <ivshmem.h>
35 #include <asm/rtcm.h>
36 #include <reloc.h>
37 #include <asm/tsc.h>
38 #include <ticks.h>
39 #include <delay.h>
40 #include <thermal.h>
41
42 #define CPU_UP_TIMEOUT 100U /* millisecond */
43 #define CPU_DOWN_TIMEOUT 100U /* millisecond */
44
45 struct per_cpu_region per_cpu_data[MAX_PCPU_NUM] __aligned(PAGE_SIZE);
46 static uint16_t phys_cpu_num = 0U;
47 static uint64_t pcpu_sync = 0UL;
48 static uint64_t startup_paddr = 0UL;
49
50 /* physical cpu active bitmap, support up to 64 cpus */
51 static volatile uint64_t pcpu_active_bitmap = 0UL;
52
53 static void init_pcpu_xsave(void);
54 static void init_keylocker(void);
55 static void set_current_pcpu_id(uint16_t pcpu_id);
56 static void print_hv_banner(void);
57 static uint16_t get_pcpu_id_from_lapic_id(uint32_t lapic_id);
58 static uint64_t start_tick __attribute__((__section__(".bss_noinit")));
59
60 /**
61 * @pre phys_cpu_num <= MAX_PCPU_NUM
62 */
init_percpu_lapic_id(void)63 static bool init_percpu_lapic_id(void)
64 {
65 uint16_t i;
66 uint32_t lapic_id_array[MAX_PCPU_NUM];
67 bool success = false;
68
69 /* Save all lapic_id detected via parse_mdt in lapic_id_array */
70 phys_cpu_num = parse_madt(lapic_id_array);
71
72 if ((phys_cpu_num != 0U) && (phys_cpu_num <= MAX_PCPU_NUM)) {
73 for (i = 0U; i < phys_cpu_num; i++) {
74 per_cpu(lapic_id, i) = lapic_id_array[i];
75 }
76 success = true;
77 }
78
79 return success;
80 }
81
pcpu_set_current_state(uint16_t pcpu_id,enum pcpu_boot_state state)82 static void pcpu_set_current_state(uint16_t pcpu_id, enum pcpu_boot_state state)
83 {
84 /* Check if state is initializing */
85 if (state == PCPU_STATE_INITIALIZING) {
86
87 /* Save this CPU's logical ID to the TSC AUX MSR */
88 set_current_pcpu_id(pcpu_id);
89 }
90
91 /* Set state for the specified CPU */
92 per_cpu(boot_state, pcpu_id) = state;
93 }
94
95 /*
96 * @post return <= MAX_PCPU_NUM
97 */
get_pcpu_nums(void)98 uint16_t get_pcpu_nums(void)
99 {
100 return phys_cpu_num;
101 }
102
is_pcpu_active(uint16_t pcpu_id)103 bool is_pcpu_active(uint16_t pcpu_id)
104 {
105 return bitmap_test(pcpu_id, &pcpu_active_bitmap);
106 }
107
get_active_pcpu_bitmap(void)108 uint64_t get_active_pcpu_bitmap(void)
109 {
110 return pcpu_active_bitmap;
111 }
112
enable_ac_for_splitlock(void)113 static void enable_ac_for_splitlock(void)
114 {
115 #ifdef CONFIG_SPLIT_LOCK_DETECTION_ENABLED
116 uint64_t test_ctl;
117
118 if (has_core_cap(CORE_CAP_SPLIT_LOCK)) {
119 test_ctl = msr_read(MSR_TEST_CTL);
120 test_ctl |= MSR_TEST_CTL_AC_SPLITLOCK;
121 msr_write(MSR_TEST_CTL, test_ctl);
122 }
123 #endif /*CONFIG_SPLIT_LOCK_DETECTION_ENABLED*/
124 }
125
enable_gp_for_uclock(void)126 static void enable_gp_for_uclock(void)
127 {
128 #ifdef CONFIG_UC_LOCK_DETECTION_ENABLED
129 uint64_t test_ctl;
130
131 if (has_core_cap(CORE_CAP_UC_LOCK)) {
132 test_ctl = msr_read(MSR_TEST_CTL);
133 test_ctl |= MSR_TEST_CTL_GP_UCLOCK;
134 msr_write(MSR_TEST_CTL, test_ctl);
135 }
136 #endif /*CONFIG_UC_LOCK_DETECTION_ENABLED*/
137 }
138
init_pcpu_pre(bool is_bsp)139 void init_pcpu_pre(bool is_bsp)
140 {
141 uint16_t pcpu_id;
142 int32_t ret;
143
144 if (is_bsp) {
145 pcpu_id = BSP_CPU_ID;
146 start_tick = cpu_ticks();
147
148 /* Get CPU capabilities thru CPUID, including the physical address bit
149 * limit which is required for initializing paging.
150 */
151 init_pcpu_capabilities();
152
153 if (detect_hardware_support() != 0) {
154 panic("hardware not support!");
155 }
156
157 init_pcpu_model_name();
158
159 load_pcpu_state_data();
160
161 init_frequency_policy();
162
163 init_e820();
164
165 /* reserve ppt buffer from e820 */
166 allocate_ppt_pages();
167
168 /* Initialize the hypervisor paging */
169 init_paging();
170
171 /*
172 * Need update uart_base_address here for vaddr2paddr mapping may changed
173 * WARNNING: DO NOT CALL PRINTF BETWEEN ENABLE PAGING IN init_paging AND HERE!
174 */
175 uart16550_init(false);
176
177 early_init_lapic();
178
179 init_acpi();
180 #ifdef CONFIG_ACPI_PARSE_ENABLED
181 ret = acpi_fixup();
182 if (ret != 0) {
183 panic("failed to parse/fix up ACPI table!");
184 }
185 #endif
186
187 if (!init_percpu_lapic_id()) {
188 panic("failed to init_percpu_lapic_id!");
189 }
190
191 ret = init_ioapic_id_info();
192 if (ret != 0) {
193 panic("System IOAPIC info is incorrect!");
194 }
195
196 #ifdef CONFIG_VCAT_ENABLED
197 init_intercepted_cat_msr_list();
198 #endif
199
200 /* NOTE: this must call after MMCONFIG is parsed in acpi_fixup() and before APs are INIT.
201 * We only support platform with MMIO based CFG space access.
202 * IO port access only support in debug version.
203 */
204 pci_switch_to_mmio_cfg_ops();
205 } else {
206
207 /* Switch this CPU to use the same page tables set-up by the
208 * primary/boot CPU
209 */
210 enable_paging();
211
212 early_init_lapic();
213
214 pcpu_id = get_pcpu_id_from_lapic_id(get_cur_lapic_id());
215 if (pcpu_id >= MAX_PCPU_NUM) {
216 panic("Invalid pCPU ID!");
217 }
218 }
219
220 bitmap_set_lock(pcpu_id, &pcpu_active_bitmap);
221
222 /* Set state for this CPU to initializing */
223 pcpu_set_current_state(pcpu_id, PCPU_STATE_INITIALIZING);
224 }
225
init_pcpu_post(uint16_t pcpu_id)226 void init_pcpu_post(uint16_t pcpu_id)
227 {
228 #ifdef STACK_PROTECTOR
229 set_fs_base();
230 #endif
231 load_gdtr_and_tr();
232
233 enable_ac_for_splitlock();
234 enable_gp_for_uclock();
235
236 init_pcpu_xsave();
237
238 #ifdef CONFIG_RETPOLINE
239 disable_rrsba();
240 #endif
241
242 if (pcpu_id == BSP_CPU_ID) {
243 /* Print Hypervisor Banner */
244 print_hv_banner();
245
246 /* Initialie HPET */
247 hpet_init();
248
249 /* Calibrate TSC Frequency */
250 calibrate_tsc();
251
252 pr_acrnlog("HV: %s-%s-%s %s%s%s%s %s@%s build by %s, start time %luus",
253 HV_BRANCH_VERSION, HV_COMMIT_TIME, HV_COMMIT_DIRTY, HV_BUILD_TYPE,
254 (sizeof(HV_COMMIT_TAGS) > 1) ? "(tag: " : "", HV_COMMIT_TAGS,
255 (sizeof(HV_COMMIT_TAGS) > 1) ? ")" : "", HV_BUILD_SCENARIO,
256 HV_BUILD_BOARD, HV_BUILD_USER, ticks_to_us(start_tick));
257
258 pr_acrnlog("Detect processor: %s", (get_pcpu_info())->model_name);
259
260 pr_dbg("Core %hu is up", BSP_CPU_ID);
261
262 /* Warn for security feature not ready */
263 if (!check_cpu_security_cap()) {
264 pr_fatal("SECURITY WARNING!!!!!!");
265 pr_fatal("Please apply the latest CPU uCode patch!");
266 }
267
268 /* Initialize interrupts */
269 init_interrupt(BSP_CPU_ID);
270
271 timer_init();
272 thermal_init();
273 setup_notification();
274 setup_pi_notification();
275
276 if (init_iommu() != 0) {
277 panic("failed to initialize iommu!");
278 }
279
280 #ifdef CONFIG_IVSHMEM_ENABLED
281 init_ivshmem_shared_memory();
282 #endif
283 init_pci_pdev_list(); /* init_iommu must come before this */
284 ptdev_init();
285
286 if (init_sgx() != 0) {
287 panic("failed to initialize sgx!");
288 }
289
290 /*
291 * Reserve memory from platform E820 for EPT 4K pages for all VMs
292 */
293 reserve_buffer_for_ept_pages();
294
295 init_vept();
296
297 pcpu_sync = ALL_CPUS_MASK;
298 /* Start all secondary cores */
299 startup_paddr = prepare_trampoline();
300 if (!start_pcpus(AP_MASK)) {
301 panic("Failed to start all secondary cores!");
302 }
303
304 ASSERT(get_pcpu_id() == BSP_CPU_ID, "");
305 } else {
306 pr_dbg("Core %hu is up", pcpu_id);
307
308 pr_warn("Skipping VM configuration check which should be done before building HV binary.");
309
310 /* Initialize secondary processor interrupts. */
311 init_interrupt(pcpu_id);
312
313 timer_init();
314 thermal_init();
315 ptdev_init();
316 }
317
318 if (!init_software_sram(pcpu_id == BSP_CPU_ID)) {
319 panic("failed to initialize software SRAM!");
320 }
321
322 apply_frequency_policy();
323
324 init_sched(pcpu_id);
325
326 #ifdef CONFIG_RDT_ENABLED
327 setup_clos(pcpu_id);
328 #endif
329
330 enable_smep();
331
332 enable_smap();
333
334 init_keylocker();
335
336 bitmap_clear_lock(pcpu_id, &pcpu_sync);
337 /* Waiting for each pCPU has done its initialization before to continue */
338 wait_sync_change(&pcpu_sync, 0UL);
339 }
340
get_pcpu_id_from_lapic_id(uint32_t lapic_id)341 static uint16_t get_pcpu_id_from_lapic_id(uint32_t lapic_id)
342 {
343 uint16_t i;
344 uint16_t pcpu_id = INVALID_CPU_ID;
345
346 for (i = 0U; i < phys_cpu_num; i++) {
347 if (per_cpu(lapic_id, i) == lapic_id) {
348 pcpu_id = i;
349 break;
350 }
351 }
352
353 return pcpu_id;
354 }
355
start_pcpu(uint16_t pcpu_id)356 static void start_pcpu(uint16_t pcpu_id)
357 {
358 uint32_t timeout;
359
360 /* Update the stack for pcpu */
361 stac();
362 write_trampoline_stack_sym(pcpu_id);
363 clac();
364
365 /* Using the MFENCE to make sure trampoline code
366 * has been updated (clflush) into memory beforing start APs.
367 */
368 cpu_memory_barrier();
369 send_startup_ipi(pcpu_id, startup_paddr);
370
371 /* Wait until the pcpu with pcpu_id is running and set the active bitmap or
372 * configured time-out has expired
373 */
374 timeout = CPU_UP_TIMEOUT * 1000U;
375 while (!is_pcpu_active(pcpu_id) && (timeout != 0U)) {
376 /* Delay 10us */
377 udelay(10U);
378
379 /* Decrement timeout value */
380 timeout -= 10U;
381 }
382
383 /* Check to see if expected CPU is actually up */
384 if (!is_pcpu_active(pcpu_id)) {
385 pr_fatal("Secondary CPU%hu failed to come up", pcpu_id);
386 pcpu_set_current_state(pcpu_id, PCPU_STATE_DEAD);
387 }
388 }
389
390
391 /**
392 * @brief Start all cpus if the bit is set in mask except itself
393 *
394 * @param[in] mask bits mask of cpus which should be started
395 *
396 * @return true if all cpus set in mask are started
397 * @return false if there are any cpus set in mask aren't started
398 */
start_pcpus(uint64_t mask)399 bool start_pcpus(uint64_t mask)
400 {
401 uint16_t i;
402 uint16_t pcpu_id = get_pcpu_id();
403 uint64_t expected_start_mask = mask;
404
405 i = ffs64(expected_start_mask);
406 while (i != INVALID_BIT_INDEX) {
407 bitmap_clear_nolock(i, &expected_start_mask);
408
409 if (pcpu_id == i) {
410 continue; /* Avoid start itself */
411 }
412
413 start_pcpu(i);
414 i = ffs64(expected_start_mask);
415 }
416
417 return ((pcpu_active_bitmap & mask) == mask);
418 }
419
make_pcpu_offline(uint16_t pcpu_id)420 void make_pcpu_offline(uint16_t pcpu_id)
421 {
422 bitmap_set_lock(NEED_OFFLINE, &per_cpu(pcpu_flag, pcpu_id));
423 if (get_pcpu_id() != pcpu_id) {
424 kick_pcpu(pcpu_id);
425 }
426 }
427
need_offline(uint16_t pcpu_id)428 bool need_offline(uint16_t pcpu_id)
429 {
430 return bitmap_test_and_clear_lock(NEED_OFFLINE, &per_cpu(pcpu_flag, pcpu_id));
431 }
432
wait_pcpus_offline(uint64_t mask)433 void wait_pcpus_offline(uint64_t mask)
434 {
435 uint32_t timeout;
436
437 timeout = CPU_DOWN_TIMEOUT * 1000U;
438 while (((pcpu_active_bitmap & mask) != 0UL) && (timeout != 0U)) {
439 udelay(10U);
440 timeout -= 10U;
441 }
442 }
443
stop_pcpus(void)444 void stop_pcpus(void)
445 {
446 uint16_t pcpu_id;
447 uint64_t mask = 0UL;
448
449 for (pcpu_id = 0U; pcpu_id < phys_cpu_num; pcpu_id++) {
450 if (get_pcpu_id() == pcpu_id) { /* avoid offline itself */
451 continue;
452 }
453
454 bitmap_set_nolock(pcpu_id, &mask);
455 make_pcpu_offline(pcpu_id);
456 }
457
458 /**
459 * Timeout never occurs here:
460 * If target cpu received a NMI and panic, it has called cpu_dead and make_pcpu_offline success.
461 * If target cpu is running, an IPI will be delivered to it and then call cpu_dead.
462 */
463 wait_pcpus_offline(mask);
464 }
465
cpu_do_idle(void)466 void cpu_do_idle(void)
467 {
468 #ifdef CONFIG_KEEP_IRQ_DISABLED
469 asm_pause();
470 #else
471 uint16_t pcpu_id = get_pcpu_id();
472
473 if (per_cpu(mode_to_idle, pcpu_id) == IDLE_MODE_HLT) {
474 asm_safe_hlt();
475 } else {
476 struct acrn_vcpu *vcpu = get_ever_run_vcpu(pcpu_id);
477
478 if ((vcpu != NULL) && !is_lapic_pt_enabled(vcpu)) {
479 CPU_IRQ_ENABLE_ON_CONFIG();
480 }
481 asm_pause();
482 if ((vcpu != NULL) && !is_lapic_pt_enabled(vcpu)) {
483 CPU_IRQ_DISABLE_ON_CONFIG();
484 }
485 }
486 #endif
487 }
488
489 /**
490 * only run on current pcpu
491 */
cpu_dead(void)492 void cpu_dead(void)
493 {
494 /* For debug purposes, using a stack variable in the while loop enables
495 * us to modify the value using a JTAG probe and resume if needed.
496 */
497 int32_t halt = 1;
498 uint16_t pcpu_id = get_pcpu_id();
499
500 deinit_sched(pcpu_id);
501 if (bitmap_test(pcpu_id, &pcpu_active_bitmap)) {
502 /* clean up native stuff */
503 vmx_off();
504
505 stac();
506 flush_cache_range((void *)get_hv_image_base(), get_hv_image_size());
507 clac();
508
509 /* Set state to show CPU is dead */
510 pcpu_set_current_state(pcpu_id, PCPU_STATE_DEAD);
511 bitmap_clear_lock(pcpu_id, &pcpu_active_bitmap);
512
513 /* Halt the CPU */
514 do {
515 asm_hlt();
516 } while (halt != 0);
517 } else {
518 pr_err("pcpu%hu already dead", pcpu_id);
519 }
520 }
521
set_current_pcpu_id(uint16_t pcpu_id)522 static void set_current_pcpu_id(uint16_t pcpu_id)
523 {
524 /* Write TSC AUX register */
525 msr_write(ACRN_PSEUDO_PCPUID_MSR, (uint32_t) pcpu_id);
526 }
527
print_hv_banner(void)528 static void print_hv_banner(void)
529 {
530 const char *boot_msg = "ACRN Hypervisor\n\r";
531
532 /* Print the boot message */
533 printf(boot_msg);
534 }
535
536 static
asm_monitor(volatile const uint64_t * addr,uint64_t ecx,uint64_t edx)537 inline void asm_monitor(volatile const uint64_t *addr, uint64_t ecx, uint64_t edx)
538 {
539 asm volatile("monitor\n" : : "a" (addr), "c" (ecx), "d" (edx));
540 }
541
542 static
asm_mwait(uint64_t eax,uint64_t ecx)543 inline void asm_mwait(uint64_t eax, uint64_t ecx)
544 {
545 asm volatile("mwait\n" : : "a" (eax), "c" (ecx));
546 }
547
548 /* wait until *sync == wake_sync */
wait_sync_change(volatile const uint64_t * sync,uint64_t wake_sync)549 void wait_sync_change(volatile const uint64_t *sync, uint64_t wake_sync)
550 {
551 if (has_monitor_cap()) {
552 /* Wait for the event to be set using monitor/mwait */
553 while ((*sync) != wake_sync) {
554 asm_monitor(sync, 0UL, 0UL);
555 if ((*sync) != wake_sync) {
556 asm_mwait(0UL, 0UL);
557 }
558 }
559 } else {
560 while ((*sync) != wake_sync) {
561 asm_pause();
562 }
563 }
564 }
565
init_pcpu_xsave(void)566 static void init_pcpu_xsave(void)
567 {
568 uint64_t val64;
569 struct cpuinfo_x86 *cpu_info;
570 uint64_t xcr0, xss;
571 uint32_t eax, ecx, unused, xsave_area_size;
572
573 if (pcpu_has_cap(X86_FEATURE_XSAVE)) {
574 CPU_CR_READ(cr4, &val64);
575 val64 |= CR4_OSXSAVE;
576 CPU_CR_WRITE(cr4, val64);
577
578 if (get_pcpu_id() == BSP_CPU_ID) {
579 cpuid_subleaf(CPUID_FEATURES, 0x0U, &unused, &unused, &ecx, &unused);
580
581 /* if set, update it */
582 if ((ecx & CPUID_ECX_OSXSAVE) != 0U) {
583 cpu_info = get_pcpu_info();
584 cpu_info->cpuid_leaves[FEAT_1_ECX] |= CPUID_ECX_OSXSAVE;
585
586 /* set xcr0 and xss with the componets bitmap get from cpuid */
587 xcr0 = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_0_EDX] << 32U)
588 + cpu_info->cpuid_leaves[FEAT_D_0_EAX];
589 xss = ((uint64_t)cpu_info->cpuid_leaves[FEAT_D_1_EDX] << 32U)
590 + cpu_info->cpuid_leaves[FEAT_D_1_ECX];
591 write_xcr(0, xcr0);
592 msr_write(MSR_IA32_XSS, xss);
593
594 /* get xsave area size, containing all the state components
595 * corresponding to bits currently set in XCR0 | IA32_XSS */
596 cpuid_subleaf(CPUID_XSAVE_FEATURES, 1U,
597 &eax,
598 &xsave_area_size,
599 &ecx,
600 &unused);
601 if (xsave_area_size > XSAVE_STATE_AREA_SIZE) {
602 panic("XSAVE area (%d bytes) exceeds the pre-allocated 4K region\n",
603 xsave_area_size);
604 }
605 }
606 }
607 }
608 }
609
init_keylocker(void)610 static void init_keylocker(void)
611 {
612 uint64_t val64;
613
614 /* Enable host CR4.KL if keylocker feature is supported */
615 if (pcpu_has_cap(X86_FEATURE_KEYLOCKER)) {
616 CPU_CR_READ(cr4, &val64);
617 val64 |= CR4_KL;
618 CPU_CR_WRITE(cr4, val64);
619 }
620 }
621
smpcall_write_msr_func(void * data)622 static void smpcall_write_msr_func(void *data)
623 {
624 struct msr_data_struct *msr = (struct msr_data_struct *)data;
625
626 msr_write(msr->msr_index, msr->write_val);
627 }
628
msr_write_pcpu(uint32_t msr_index,uint64_t value64,uint16_t pcpu_id)629 void msr_write_pcpu(uint32_t msr_index, uint64_t value64, uint16_t pcpu_id)
630 {
631 struct msr_data_struct msr = {0};
632 uint64_t mask = 0UL;
633
634 if (pcpu_id == get_pcpu_id()) {
635 msr_write(msr_index, value64);
636 } else {
637 msr.msr_index = msr_index;
638 msr.write_val = value64;
639 bitmap_set_nolock(pcpu_id, &mask);
640 smp_call_function(mask, smpcall_write_msr_func, &msr);
641 }
642 }
643
smpcall_read_msr_func(void * data)644 static void smpcall_read_msr_func(void *data)
645 {
646 struct msr_data_struct *msr = (struct msr_data_struct *)data;
647
648 msr->read_val = msr_read(msr->msr_index);
649 }
650
msr_read_pcpu(uint32_t msr_index,uint16_t pcpu_id)651 uint64_t msr_read_pcpu(uint32_t msr_index, uint16_t pcpu_id)
652 {
653 struct msr_data_struct msr = {0};
654 uint64_t mask = 0UL;
655 uint64_t ret = 0;
656
657 if (pcpu_id == get_pcpu_id()) {
658 ret = msr_read(msr_index);
659 } else {
660 msr.msr_index = msr_index;
661 bitmap_set_nolock(pcpu_id, &mask);
662 smp_call_function(mask, smpcall_read_msr_func, &msr);
663 ret = msr.read_val;
664 }
665
666 return ret;
667 }
668