1 /*
2 * Copyright (c) 2025 Travis Geiselbrecht
3 *
4 * Use of this source code is governed by a MIT-style
5 * license that can be found in the LICENSE file or at
6 * https://opensource.org/licenses/MIT
7 */
8 #include "arch/x86/pv.h"
9
10 #include <lk/err.h>
11 #include <lk/trace.h>
12 #include <assert.h>
13 #include <stdint.h>
14 #include <inttypes.h>
15 #include <arch/x86/feature.h>
16 #include <kernel/vm.h>
17
18 #define LOCAL_TRACE 0
19
20 #if !X86_LEGACY
21
22 // Deals with paravirtualized clock sources and event timers on the PC platform,
23 // specifically KVM.
24
25 // From https://www.kernel.org/doc/html/v6.14/virt/kvm/x86/msr.html
26 struct pvclock_wall_clock {
27 uint32_t version;
28 uint32_t sec;
29 uint32_t nsec;
30 } __PACKED;
31 static_assert(sizeof(struct pvclock_wall_clock) == 12, "pvclock_wall_clock size mismatch");
32
33 struct pvclock_vcpu_time_info {
34 uint32_t version;
35 uint32_t pad0;
36 uint64_t tsc_timestamp;
37 uint64_t system_time;
38 uint32_t tsc_to_system_mul;
39 int8_t tsc_shift;
40 uint8_t flags;
41 uint8_t pad[2];
42 } __PACKED;
43 static_assert(sizeof(struct pvclock_vcpu_time_info) == 32, "pvclock_vcpu_time_info size mismatch");
44 #define VCPU_TIME_INFO_FLAG_STABLE 0x1
45
46 static volatile struct pvclock_wall_clock *wall_clock;
47 static volatile struct pvclock_vcpu_time_info *vcpu_time_info;
48
pvclock_init(void)49 status_t pvclock_init(void) {
50 uint32_t clocksource_msr_base = 0;
51 if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE)) {
52 clocksource_msr_base = 0x11;
53 }
54 if (x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE2)) {
55 clocksource_msr_base = 0x4b564d00;
56 }
57 if (!clocksource_msr_base) {
58 return ERR_NOT_SUPPORTED;
59 }
60 dprintf(INFO, "pv_clock: clocksource detected, msr base %#x\n", clocksource_msr_base);
61
62 // map a page of memory and point the KVM clocksource msrs at it
63 void *clocksource_page;
64 status_t err = vmm_alloc(vmm_get_kernel_aspace(), "lapic", PAGE_SIZE, &clocksource_page, 0, 0, 0);
65 if (err != NO_ERROR) {
66 printf("pv_clock: failed to allocate page for clocksource msrs\n");
67 return err;
68 }
69
70 paddr_t paddr;
71 arch_mmu_query(&vmm_get_kernel_aspace()->arch_aspace, (vaddr_t)clocksource_page, &paddr, NULL);
72 LTRACEF("clocksource page %p, paddr %#" PRIxPTR "\n", clocksource_page, paddr);
73
74 write_msr(clocksource_msr_base, paddr);
75 write_msr(clocksource_msr_base + 1, paddr + sizeof(struct pvclock_wall_clock) + 1);
76
77 wall_clock = (struct pvclock_wall_clock *)clocksource_page;
78 vcpu_time_info = (struct pvclock_vcpu_time_info *)(wall_clock + 1);
79
80 dprintf(SPEW, "pv_clock: wall clock version %u, sec %u, nsec %u\n",
81 wall_clock->version, wall_clock->sec, wall_clock->nsec);
82
83 dprintf(SPEW, "pv_clock: vcpu time info version %u, tsc timestamp %llu, system time %llu\n",
84 vcpu_time_info->version, vcpu_time_info->tsc_timestamp, vcpu_time_info->system_time);
85 dprintf(SPEW, "pv_clock: tsc to system mul %u, tsc shift %d, flags %u\n",
86 vcpu_time_info->tsc_to_system_mul, vcpu_time_info->tsc_shift, vcpu_time_info->flags);
87
88 return NO_ERROR;
89 }
90
pvclock_get_tsc_freq(void)91 uint64_t pvclock_get_tsc_freq(void) {
92 if (!vcpu_time_info) {
93 return 0;
94 }
95
96 uint32_t tsc_mul = 0;
97 int8_t tsc_shift = 0;
98 uint32_t pre_version = 0, post_version = 0;
99 do {
100 pre_version = vcpu_time_info->version;
101 if (pre_version % 2 != 0) {
102 asm("pause");
103 continue;
104 }
105 tsc_mul = vcpu_time_info->tsc_to_system_mul;
106 tsc_shift = vcpu_time_info->tsc_shift;
107 post_version = vcpu_time_info->version;
108 } while (pre_version != post_version);
109
110 uint64_t tsc_khz = 1000000ULL << 32;
111 tsc_khz = tsc_khz / tsc_mul;
112 if (tsc_shift > 0) {
113 tsc_khz >>= tsc_shift;
114 } else {
115 tsc_khz <<= -tsc_shift;
116 }
117 return tsc_khz * 1000;
118 }
119
pv_clock_is_stable(void)120 bool pv_clock_is_stable(void) {
121 if (!vcpu_time_info) {
122 return false;
123 }
124 bool is_stable = (vcpu_time_info->flags & VCPU_TIME_INFO_FLAG_STABLE) ||
125 x86_feature_test(X86_FEATURE_KVM_CLOCKSOURCE_STABLE);
126 return is_stable;
127 }
128
129 #endif // !X86_LEGACY