1 /******************************************************************************
2  * viridian.c
3  *
4  * An implementation of some Viridian enlightenments. See Microsoft's
5  * Hypervisor Top Level Functional Specification (v5.0a) at:
6  *
7  * https://github.com/Microsoft/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v5.0.pdf
8  *
9  * for more information.
10  */
11 
12 #include <xen/sched.h>
13 #include <xen/version.h>
14 #include <xen/perfc.h>
15 #include <xen/hypercall.h>
16 #include <xen/domain_page.h>
17 #include <asm/guest_access.h>
18 #include <asm/paging.h>
19 #include <asm/p2m.h>
20 #include <asm/apic.h>
21 #include <asm/hvm/support.h>
22 #include <public/sched.h>
23 #include <public/hvm/hvm_op.h>
24 
25 /* Viridian MSR numbers. */
26 #define HV_X64_MSR_GUEST_OS_ID                   0x40000000
27 #define HV_X64_MSR_HYPERCALL                     0x40000001
28 #define HV_X64_MSR_VP_INDEX                      0x40000002
29 #define HV_X64_MSR_RESET                         0x40000003
30 #define HV_X64_MSR_VP_RUNTIME                    0x40000010
31 #define HV_X64_MSR_TIME_REF_COUNT                0x40000020
32 #define HV_X64_MSR_REFERENCE_TSC                 0x40000021
33 #define HV_X64_MSR_TSC_FREQUENCY                 0x40000022
34 #define HV_X64_MSR_APIC_FREQUENCY                0x40000023
35 #define HV_X64_MSR_EOI                           0x40000070
36 #define HV_X64_MSR_ICR                           0x40000071
37 #define HV_X64_MSR_TPR                           0x40000072
38 #define HV_X64_MSR_VP_ASSIST_PAGE                0x40000073
39 #define HV_X64_MSR_SCONTROL                      0x40000080
40 #define HV_X64_MSR_SVERSION                      0x40000081
41 #define HV_X64_MSR_SIEFP                         0x40000082
42 #define HV_X64_MSR_SIMP                          0x40000083
43 #define HV_X64_MSR_EOM                           0x40000084
44 #define HV_X64_MSR_SINT0                         0x40000090
45 #define HV_X64_MSR_SINT1                         0x40000091
46 #define HV_X64_MSR_SINT2                         0x40000092
47 #define HV_X64_MSR_SINT3                         0x40000093
48 #define HV_X64_MSR_SINT4                         0x40000094
49 #define HV_X64_MSR_SINT5                         0x40000095
50 #define HV_X64_MSR_SINT6                         0x40000096
51 #define HV_X64_MSR_SINT7                         0x40000097
52 #define HV_X64_MSR_SINT8                         0x40000098
53 #define HV_X64_MSR_SINT9                         0x40000099
54 #define HV_X64_MSR_SINT10                        0x4000009A
55 #define HV_X64_MSR_SINT11                        0x4000009B
56 #define HV_X64_MSR_SINT12                        0x4000009C
57 #define HV_X64_MSR_SINT13                        0x4000009D
58 #define HV_X64_MSR_SINT14                        0x4000009E
59 #define HV_X64_MSR_SINT15                        0x4000009F
60 #define HV_X64_MSR_STIMER0_CONFIG                0x400000B0
61 #define HV_X64_MSR_STIMER0_COUNT                 0x400000B1
62 #define HV_X64_MSR_STIMER1_CONFIG                0x400000B2
63 #define HV_X64_MSR_STIMER1_COUNT                 0x400000B3
64 #define HV_X64_MSR_STIMER2_CONFIG                0x400000B4
65 #define HV_X64_MSR_STIMER2_COUNT                 0x400000B5
66 #define HV_X64_MSR_STIMER3_CONFIG                0x400000B6
67 #define HV_X64_MSR_STIMER3_COUNT                 0x400000B7
68 #define HV_X64_MSR_POWER_STATE_TRIGGER_C1        0x400000C1
69 #define HV_X64_MSR_POWER_STATE_TRIGGER_C2        0x400000C2
70 #define HV_X64_MSR_POWER_STATE_TRIGGER_C3        0x400000C3
71 #define HV_X64_MSR_POWER_STATE_CONFIG_C1         0x400000D1
72 #define HV_X64_MSR_POWER_STATE_CONFIG_C2         0x400000D2
73 #define HV_X64_MSR_POWER_STATE_CONFIG_C3         0x400000D3
74 #define HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE   0x400000E0
75 #define HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE 0x400000E1
76 #define HV_X64_MSR_STATS_VP_RETAIL_PAGE          0x400000E2
77 #define HV_X64_MSR_STATS_VP_INTERNAL_PAGE        0x400000E3
78 #define HV_X64_MSR_GUEST_IDLE                    0x400000F0
79 #define HV_X64_MSR_SYNTH_DEBUG_CONTROL           0x400000F1
80 #define HV_X64_MSR_SYNTH_DEBUG_STATUS            0x400000F2
81 #define HV_X64_MSR_SYNTH_DEBUG_SEND_BUFFER       0x400000F3
82 #define HV_X64_MSR_SYNTH_DEBUG_RECEIVE_BUFFER    0x400000F4
83 #define HV_X64_MSR_SYNTH_DEBUG_PENDING_BUFFER    0x400000F5
84 #define HV_X64_MSR_CRASH_P0                      0x40000100
85 #define HV_X64_MSR_CRASH_P1                      0x40000101
86 #define HV_X64_MSR_CRASH_P2                      0x40000102
87 #define HV_X64_MSR_CRASH_P3                      0x40000103
88 #define HV_X64_MSR_CRASH_P4                      0x40000104
89 #define HV_X64_MSR_CRASH_CTL                     0x40000105
90 
91 #define VIRIDIAN_MSR_MIN HV_X64_MSR_GUEST_OS_ID
92 #define VIRIDIAN_MSR_MAX HV_X64_MSR_CRASH_CTL
93 
94 /* Viridian Hypercall Status Codes. */
95 #define HV_STATUS_SUCCESS                       0x0000
96 #define HV_STATUS_INVALID_HYPERCALL_CODE        0x0002
97 #define HV_STATUS_INVALID_PARAMETER             0x0005
98 
99 /* Viridian Hypercall Codes. */
100 #define HvFlushVirtualAddressSpace 0x0002
101 #define HvFlushVirtualAddressList  0x0003
102 #define HvNotifyLongSpinWait       0x0008
103 #define HvGetPartitionId           0x0046
104 #define HvExtCallQueryCapabilities 0x8001
105 
106 /* Viridian Hypercall Flags. */
107 #define HV_FLUSH_ALL_PROCESSORS 1
108 
109 /*
110  * Viridian Partition Privilege Flags.
111  *
112  * This is taken from section 4.2.2 of the specification, and fixed for
113  * style and correctness.
114  */
115 typedef struct {
116     /* Access to virtual MSRs */
117     uint64_t AccessVpRunTimeReg:1;
118     uint64_t AccessPartitionReferenceCounter:1;
119     uint64_t AccessSynicRegs:1;
120     uint64_t AccessSyntheticTimerRegs:1;
121     uint64_t AccessIntrCtrlRegs:1;
122     uint64_t AccessHypercallMsrs:1;
123     uint64_t AccessVpIndex:1;
124     uint64_t AccessResetReg:1;
125     uint64_t AccessStatsReg:1;
126     uint64_t AccessPartitionReferenceTsc:1;
127     uint64_t AccessGuestIdleReg:1;
128     uint64_t AccessFrequencyRegs:1;
129     uint64_t AccessDebugRegs:1;
130     uint64_t Reserved1:19;
131 
132     /* Access to hypercalls */
133     uint64_t CreatePartitions:1;
134     uint64_t AccessPartitionId:1;
135     uint64_t AccessMemoryPool:1;
136     uint64_t AdjustMessageBuffers:1;
137     uint64_t PostMessages:1;
138     uint64_t SignalEvents:1;
139     uint64_t CreatePort:1;
140     uint64_t ConnectPort:1;
141     uint64_t AccessStats:1;
142     uint64_t Reserved2:2;
143     uint64_t Debugging:1;
144     uint64_t CpuManagement:1;
145     uint64_t Reserved3:1;
146     uint64_t Reserved4:1;
147     uint64_t Reserved5:1;
148     uint64_t AccessVSM:1;
149     uint64_t AccessVpRegisters:1;
150     uint64_t Reserved6:1;
151     uint64_t Reserved7:1;
152     uint64_t EnableExtendedHypercalls:1;
153     uint64_t StartVirtualProcessor:1;
154     uint64_t Reserved8:10;
155 } HV_PARTITION_PRIVILEGE_MASK;
156 
157 typedef union _HV_CRASH_CTL_REG_CONTENTS
158 {
159     uint64_t AsUINT64;
160     struct
161     {
162         uint64_t Reserved:63;
163         uint64_t CrashNotify:1;
164     } u;
165 } HV_CRASH_CTL_REG_CONTENTS;
166 
167 /* Viridian CPUID leaf 3, Hypervisor Feature Indication */
168 #define CPUID3D_CRASH_MSRS (1 << 10)
169 
170 /* Viridian CPUID leaf 4: Implementation Recommendations. */
171 #define CPUID4A_HCALL_REMOTE_TLB_FLUSH (1 << 2)
172 #define CPUID4A_MSR_BASED_APIC         (1 << 3)
173 #define CPUID4A_RELAX_TIMER_INT        (1 << 5)
174 
175 /* Viridian CPUID leaf 6: Implementation HW features detected and in use. */
176 #define CPUID6A_APIC_OVERLAY    (1 << 0)
177 #define CPUID6A_MSR_BITMAPS     (1 << 1)
178 #define CPUID6A_NESTED_PAGING   (1 << 3)
179 
180 /*
181  * Version and build number reported by CPUID leaf 2
182  *
183  * These numbers are chosen to match the version numbers reported by
184  * Windows Server 2008.
185  */
186 static uint16_t __read_mostly viridian_major = 6;
187 static uint16_t __read_mostly viridian_minor = 0;
188 static uint32_t __read_mostly viridian_build = 0x1772;
189 
190 /*
191  * Maximum number of retries before the guest will notify of failure
192  * to acquire a spinlock.
193  */
194 static uint32_t __read_mostly viridian_spinlock_retry_count = 2047;
195 integer_param("viridian-spinlock-retry-count",
196               viridian_spinlock_retry_count);
197 
cpuid_viridian_leaves(const struct vcpu * v,uint32_t leaf,uint32_t subleaf,struct cpuid_leaf * res)198 void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
199                            uint32_t subleaf, struct cpuid_leaf *res)
200 {
201     const struct domain *d = v->domain;
202 
203     ASSERT(is_viridian_domain(d));
204     ASSERT(leaf >= 0x40000000 && leaf < 0x40000100);
205 
206     leaf -= 0x40000000;
207 
208     switch ( leaf )
209     {
210     case 0:
211         /* See section 2.4.1 of the specification */
212         res->a = 0x40000006; /* Maximum leaf */
213         memcpy(&res->b, "Micr", 4);
214         memcpy(&res->c, "osof", 4);
215         memcpy(&res->d, "t Hv", 4);
216         break;
217 
218     case 1:
219         /* See section 2.4.2 of the specification */
220         memcpy(&res->a, "Hv#1", 4);
221         break;
222 
223     case 2:
224         /* Hypervisor information, but only if the guest has set its
225            own version number. */
226         if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 )
227             break;
228         res->a = viridian_build;
229         res->b = ((uint32_t)viridian_major << 16) | viridian_minor;
230         res->c = 0; /* SP */
231         res->d = 0; /* Service branch and number */
232         break;
233 
234     case 3:
235     {
236         /*
237          * Section 2.4.4 details this leaf and states that EAX and EBX
238          * are defined to be the low and high parts of the partition
239          * privilege mask respectively.
240          */
241         HV_PARTITION_PRIVILEGE_MASK mask = {
242             .AccessIntrCtrlRegs = 1,
243             .AccessHypercallMsrs = 1,
244             .AccessVpIndex = 1,
245         };
246         union {
247             HV_PARTITION_PRIVILEGE_MASK mask;
248             uint32_t lo, hi;
249         } u;
250 
251         if ( !(viridian_feature_mask(d) & HVMPV_no_freq) )
252             mask.AccessFrequencyRegs = 1;
253         if ( viridian_feature_mask(d) & HVMPV_time_ref_count )
254             mask.AccessPartitionReferenceCounter = 1;
255         if ( viridian_feature_mask(d) & HVMPV_reference_tsc )
256             mask.AccessPartitionReferenceTsc = 1;
257 
258         u.mask = mask;
259 
260         res->a = u.lo;
261         res->b = u.hi;
262 
263         if ( viridian_feature_mask(d) & HVMPV_crash_ctl )
264             res->d = CPUID3D_CRASH_MSRS;
265 
266         break;
267     }
268 
269     case 4:
270         /* Recommended hypercall usage. */
271         if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) ||
272              (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) )
273             break;
274         res->a = CPUID4A_RELAX_TIMER_INT;
275         if ( viridian_feature_mask(d) & HVMPV_hcall_remote_tlb_flush )
276             res->a |= CPUID4A_HCALL_REMOTE_TLB_FLUSH;
277         if ( !cpu_has_vmx_apic_reg_virt )
278             res->a |= CPUID4A_MSR_BASED_APIC;
279 
280         /*
281          * This value is the recommended number of attempts to try to
282          * acquire a spinlock before notifying the hypervisor via the
283          * HvNotifyLongSpinWait hypercall.
284          */
285         res->b = viridian_spinlock_retry_count;
286         break;
287 
288     case 6:
289         /* Detected and in use hardware features. */
290         if ( cpu_has_vmx_virtualize_apic_accesses )
291             res->a |= CPUID6A_APIC_OVERLAY;
292         if ( cpu_has_vmx_msr_bitmap || (read_efer() & EFER_SVME) )
293             res->a |= CPUID6A_MSR_BITMAPS;
294         if ( hap_enabled(d) )
295             res->a |= CPUID6A_NESTED_PAGING;
296         break;
297     }
298 }
299 
dump_guest_os_id(const struct domain * d)300 static void dump_guest_os_id(const struct domain *d)
301 {
302     const union viridian_guest_os_id *goi;
303 
304     goi = &d->arch.hvm_domain.viridian.guest_os_id;
305 
306     printk(XENLOG_G_INFO
307            "d%d: VIRIDIAN GUEST_OS_ID: vendor: %x os: %x major: %x minor: %x sp: %x build: %x\n",
308            d->domain_id,
309            goi->fields.vendor, goi->fields.os,
310            goi->fields.major, goi->fields.minor,
311            goi->fields.service_pack, goi->fields.build_number);
312 }
313 
dump_hypercall(const struct domain * d)314 static void dump_hypercall(const struct domain *d)
315 {
316     const union viridian_hypercall_gpa *hg;
317 
318     hg = &d->arch.hvm_domain.viridian.hypercall_gpa;
319 
320     printk(XENLOG_G_INFO "d%d: VIRIDIAN HYPERCALL: enabled: %x pfn: %lx\n",
321            d->domain_id,
322            hg->fields.enabled, (unsigned long)hg->fields.pfn);
323 }
324 
dump_vp_assist(const struct vcpu * v)325 static void dump_vp_assist(const struct vcpu *v)
326 {
327     const union viridian_vp_assist *va;
328 
329     va = &v->arch.hvm_vcpu.viridian.vp_assist.msr;
330 
331     printk(XENLOG_G_INFO "%pv: VIRIDIAN VP_ASSIST_PAGE: enabled: %x pfn: %lx\n",
332            v, va->fields.enabled, (unsigned long)va->fields.pfn);
333 }
334 
dump_reference_tsc(const struct domain * d)335 static void dump_reference_tsc(const struct domain *d)
336 {
337     const union viridian_reference_tsc *rt;
338 
339     rt = &d->arch.hvm_domain.viridian.reference_tsc;
340 
341     printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn: %lx\n",
342            d->domain_id,
343            rt->fields.enabled, (unsigned long)rt->fields.pfn);
344 }
345 
enable_hypercall_page(struct domain * d)346 static void enable_hypercall_page(struct domain *d)
347 {
348     unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
349     struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
350     uint8_t *p;
351 
352     if ( !page || !get_page_type(page, PGT_writable_page) )
353     {
354         if ( page )
355             put_page(page);
356         gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
357                  gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
358         return;
359     }
360 
361     p = __map_domain_page(page);
362 
363     /*
364      * We set the bit 31 in %eax (reserved field in the Viridian hypercall
365      * calling convention) to differentiate Xen and Viridian hypercalls.
366      */
367     *(u8  *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */
368     *(u32 *)(p + 1) = 0x80000000;
369     *(u8  *)(p + 5) = 0x0f; /* vmcall/vmmcall */
370     *(u8  *)(p + 6) = 0x01;
371     *(u8  *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9);
372     *(u8  *)(p + 8) = 0xc3; /* ret */
373     memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */
374 
375     unmap_domain_page(p);
376 
377     put_page_and_type(page);
378 }
379 
initialize_vp_assist(struct vcpu * v)380 static void initialize_vp_assist(struct vcpu *v)
381 {
382     struct domain *d = v->domain;
383     unsigned long gmfn = v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.pfn;
384     struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
385     void *va;
386 
387     ASSERT(!v->arch.hvm_vcpu.viridian.vp_assist.va);
388 
389     /*
390      * See section 7.8.7 of the specification for details of this
391      * enlightenment.
392      */
393 
394     if ( !page )
395         goto fail;
396 
397     if ( !get_page_type(page, PGT_writable_page) )
398     {
399         put_page(page);
400         goto fail;
401     }
402 
403     va = __map_domain_page_global(page);
404     if ( !va )
405     {
406         put_page_and_type(page);
407         goto fail;
408     }
409 
410     clear_page(va);
411 
412     v->arch.hvm_vcpu.viridian.vp_assist.va = va;
413     return;
414 
415  fail:
416     gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn,
417              page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
418 }
419 
teardown_vp_assist(struct vcpu * v)420 static void teardown_vp_assist(struct vcpu *v)
421 {
422     void *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
423     struct page_info *page;
424 
425     if ( !va )
426         return;
427 
428     v->arch.hvm_vcpu.viridian.vp_assist.va = NULL;
429 
430     page = mfn_to_page(domain_page_map_to_mfn(va));
431 
432     unmap_domain_page_global(va);
433     put_page_and_type(page);
434 }
435 
viridian_start_apic_assist(struct vcpu * v,int vector)436 void viridian_start_apic_assist(struct vcpu *v, int vector)
437 {
438     uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
439 
440     if ( !va )
441         return;
442 
443     if ( vector < 0x10 )
444         return;
445 
446     /*
447      * If there is already an assist pending then something has gone
448      * wrong and the VM will most likely hang so force a crash now
449      * to make the problem clear.
450      */
451     if ( v->arch.hvm_vcpu.viridian.vp_assist.vector )
452         domain_crash(v->domain);
453 
454     v->arch.hvm_vcpu.viridian.vp_assist.vector = vector;
455     *va |= 1u;
456 }
457 
viridian_complete_apic_assist(struct vcpu * v)458 int viridian_complete_apic_assist(struct vcpu *v)
459 {
460     uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
461     int vector;
462 
463     if ( !va )
464         return 0;
465 
466     if ( *va & 1u )
467         return 0; /* Interrupt not yet processed by the guest. */
468 
469     vector = v->arch.hvm_vcpu.viridian.vp_assist.vector;
470     v->arch.hvm_vcpu.viridian.vp_assist.vector = 0;
471 
472     return vector;
473 }
474 
viridian_abort_apic_assist(struct vcpu * v)475 void viridian_abort_apic_assist(struct vcpu *v)
476 {
477     uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
478 
479     if ( !va )
480         return;
481 
482     *va &= ~1u;
483     v->arch.hvm_vcpu.viridian.vp_assist.vector = 0;
484 }
485 
update_reference_tsc(struct domain * d,bool_t initialize)486 static void update_reference_tsc(struct domain *d, bool_t initialize)
487 {
488     unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn;
489     struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
490     HV_REFERENCE_TSC_PAGE *p;
491 
492     if ( !page || !get_page_type(page, PGT_writable_page) )
493     {
494         if ( page )
495             put_page(page);
496         gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
497                  gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
498         return;
499     }
500 
501     p = __map_domain_page(page);
502 
503     if ( initialize )
504         clear_page(p);
505 
506     /*
507      * This enlightenment must be disabled is the host TSC is not invariant.
508      * However it is also disabled if vtsc is true (which means rdtsc is being
509      * emulated). This generally happens when guest TSC freq and host TSC freq
510      * don't match. The TscScale value could be adjusted to cope with this,
511      * allowing vtsc to be turned off, but support for this is not yet present
512      * in the hypervisor. Thus is it is possible that migrating a Windows VM
513      * between hosts of differing TSC frequencies may result in large
514      * differences in guest performance.
515      */
516     if ( !host_tsc_is_safe() || d->arch.vtsc )
517     {
518         /*
519          * The specification states that valid values of TscSequence range
520          * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate
521          * this mechanism is no longer a reliable source of time and that
522          * the VM should fall back to a different source.
523          *
524          * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate
525          * the spec. and rely on a value of 0 to indicate that this
526          * enlightenment should no longer be used. These two kernel
527          * versions are currently the only ones to make use of this
528          * enlightenment, so just use 0 here.
529          */
530         p->TscSequence = 0;
531 
532         printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
533                d->domain_id);
534         goto out;
535     }
536 
537     /*
538      * The guest will calculate reference time according to the following
539      * formula:
540      *
541      * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset
542      *
543      * Windows uses a 100ns tick, so we need a scale which is cpu
544      * ticks per 100ns shifted left by 64.
545      */
546     p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
547 
548     p->TscSequence++;
549     if ( p->TscSequence == 0xFFFFFFFF ||
550          p->TscSequence == 0 ) /* Avoid both 'invalid' values */
551         p->TscSequence = 1;
552 
553  out:
554     unmap_domain_page(p);
555 
556     put_page_and_type(page);
557 }
558 
wrmsr_viridian_regs(uint32_t idx,uint64_t val)559 int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
560 {
561     struct vcpu *v = current;
562     struct domain *d = v->domain;
563 
564     if ( !is_viridian_domain(d) )
565         return 0;
566 
567     switch ( idx )
568     {
569     case HV_X64_MSR_GUEST_OS_ID:
570         perfc_incr(mshv_wrmsr_osid);
571         d->arch.hvm_domain.viridian.guest_os_id.raw = val;
572         dump_guest_os_id(d);
573         break;
574 
575     case HV_X64_MSR_HYPERCALL:
576         perfc_incr(mshv_wrmsr_hc_page);
577         d->arch.hvm_domain.viridian.hypercall_gpa.raw = val;
578         dump_hypercall(d);
579         if ( d->arch.hvm_domain.viridian.hypercall_gpa.fields.enabled )
580             enable_hypercall_page(d);
581         break;
582 
583     case HV_X64_MSR_VP_INDEX:
584         perfc_incr(mshv_wrmsr_vp_index);
585         break;
586 
587     case HV_X64_MSR_EOI:
588         perfc_incr(mshv_wrmsr_eoi);
589         vlapic_EOI_set(vcpu_vlapic(v));
590         break;
591 
592     case HV_X64_MSR_ICR: {
593         u32 eax = (u32)val, edx = (u32)(val >> 32);
594         struct vlapic *vlapic = vcpu_vlapic(v);
595         perfc_incr(mshv_wrmsr_icr);
596         eax &= ~(1 << 12);
597         edx &= 0xff000000;
598         vlapic_set_reg(vlapic, APIC_ICR2, edx);
599         vlapic_ipi(vlapic, eax, edx);
600         vlapic_set_reg(vlapic, APIC_ICR, eax);
601         break;
602     }
603 
604     case HV_X64_MSR_TPR:
605         perfc_incr(mshv_wrmsr_tpr);
606         vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI, (uint8_t)val);
607         break;
608 
609     case HV_X64_MSR_VP_ASSIST_PAGE:
610         perfc_incr(mshv_wrmsr_apic_msr);
611         teardown_vp_assist(v); /* release any previous mapping */
612         v->arch.hvm_vcpu.viridian.vp_assist.msr.raw = val;
613         dump_vp_assist(v);
614         if ( v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.enabled )
615             initialize_vp_assist(v);
616         break;
617 
618     case HV_X64_MSR_REFERENCE_TSC:
619         if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
620             return 0;
621 
622         perfc_incr(mshv_wrmsr_tsc_msr);
623         d->arch.hvm_domain.viridian.reference_tsc.raw = val;
624         dump_reference_tsc(d);
625         if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
626             update_reference_tsc(d, 1);
627         break;
628 
629     case HV_X64_MSR_CRASH_P0:
630     case HV_X64_MSR_CRASH_P1:
631     case HV_X64_MSR_CRASH_P2:
632     case HV_X64_MSR_CRASH_P3:
633     case HV_X64_MSR_CRASH_P4:
634         BUILD_BUG_ON(HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0 >=
635                      ARRAY_SIZE(v->arch.hvm_vcpu.viridian.crash_param));
636 
637         idx -= HV_X64_MSR_CRASH_P0;
638         v->arch.hvm_vcpu.viridian.crash_param[idx] = val;
639         break;
640 
641     case HV_X64_MSR_CRASH_CTL:
642     {
643         HV_CRASH_CTL_REG_CONTENTS ctl;
644 
645         ctl.AsUINT64 = val;
646 
647         if ( !ctl.u.CrashNotify )
648             break;
649 
650         gprintk(XENLOG_WARNING, "VIRIDIAN CRASH: %lx %lx %lx %lx %lx\n",
651                 v->arch.hvm_vcpu.viridian.crash_param[0],
652                 v->arch.hvm_vcpu.viridian.crash_param[1],
653                 v->arch.hvm_vcpu.viridian.crash_param[2],
654                 v->arch.hvm_vcpu.viridian.crash_param[3],
655                 v->arch.hvm_vcpu.viridian.crash_param[4]);
656         break;
657     }
658 
659     default:
660         if ( idx >= VIRIDIAN_MSR_MIN && idx <= VIRIDIAN_MSR_MAX )
661             gprintk(XENLOG_WARNING, "write to unimplemented MSR %#x\n",
662                     idx);
663 
664         return 0;
665     }
666 
667     return 1;
668 }
669 
raw_trc_val(struct domain * d)670 static int64_t raw_trc_val(struct domain *d)
671 {
672     uint64_t tsc;
673     struct time_scale tsc_to_ns;
674 
675     tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
676 
677     /* convert tsc to count of 100ns periods */
678     set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
679     return scale_delta(tsc, &tsc_to_ns) / 100ul;
680 }
681 
viridian_time_ref_count_freeze(struct domain * d)682 void viridian_time_ref_count_freeze(struct domain *d)
683 {
684     struct viridian_time_ref_count *trc;
685 
686     trc = &d->arch.hvm_domain.viridian.time_ref_count;
687 
688     if ( test_and_clear_bit(_TRC_running, &trc->flags) )
689         trc->val = raw_trc_val(d) + trc->off;
690 }
691 
viridian_time_ref_count_thaw(struct domain * d)692 void viridian_time_ref_count_thaw(struct domain *d)
693 {
694     struct viridian_time_ref_count *trc;
695 
696     trc = &d->arch.hvm_domain.viridian.time_ref_count;
697 
698     if ( !d->is_shutting_down &&
699          !test_and_set_bit(_TRC_running, &trc->flags) )
700         trc->off = (int64_t)trc->val - raw_trc_val(d);
701 }
702 
rdmsr_viridian_regs(uint32_t idx,uint64_t * val)703 int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
704 {
705     struct vcpu *v = current;
706     struct domain *d = v->domain;
707 
708     if ( !is_viridian_domain(d) )
709         return 0;
710 
711     switch ( idx )
712     {
713     case HV_X64_MSR_GUEST_OS_ID:
714         perfc_incr(mshv_rdmsr_osid);
715         *val = d->arch.hvm_domain.viridian.guest_os_id.raw;
716         break;
717 
718     case HV_X64_MSR_HYPERCALL:
719         perfc_incr(mshv_rdmsr_hc_page);
720         *val = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
721         break;
722 
723     case HV_X64_MSR_VP_INDEX:
724         perfc_incr(mshv_rdmsr_vp_index);
725         *val = v->vcpu_id;
726         break;
727 
728     case HV_X64_MSR_TSC_FREQUENCY:
729         if ( viridian_feature_mask(d) & HVMPV_no_freq )
730             return 0;
731 
732         perfc_incr(mshv_rdmsr_tsc_frequency);
733         *val = (uint64_t)d->arch.tsc_khz * 1000ull;
734         break;
735 
736     case HV_X64_MSR_APIC_FREQUENCY:
737         if ( viridian_feature_mask(d) & HVMPV_no_freq )
738             return 0;
739 
740         perfc_incr(mshv_rdmsr_apic_frequency);
741         *val = 1000000000ull / APIC_BUS_CYCLE_NS;
742         break;
743 
744     case HV_X64_MSR_ICR:
745         perfc_incr(mshv_rdmsr_icr);
746         *val = (((uint64_t)vlapic_get_reg(vcpu_vlapic(v), APIC_ICR2) << 32) |
747                 vlapic_get_reg(vcpu_vlapic(v), APIC_ICR));
748         break;
749 
750     case HV_X64_MSR_TPR:
751         perfc_incr(mshv_rdmsr_tpr);
752         *val = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI);
753         break;
754 
755     case HV_X64_MSR_VP_ASSIST_PAGE:
756         perfc_incr(mshv_rdmsr_apic_msr);
757         *val = v->arch.hvm_vcpu.viridian.vp_assist.msr.raw;
758         break;
759 
760     case HV_X64_MSR_REFERENCE_TSC:
761         if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
762             return 0;
763 
764         perfc_incr(mshv_rdmsr_tsc_msr);
765         *val = d->arch.hvm_domain.viridian.reference_tsc.raw;
766         break;
767 
768     case HV_X64_MSR_TIME_REF_COUNT:
769     {
770         struct viridian_time_ref_count *trc;
771 
772         trc = &d->arch.hvm_domain.viridian.time_ref_count;
773 
774         if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
775             return 0;
776 
777         if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
778             printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: accessed\n",
779                    d->domain_id);
780 
781         perfc_incr(mshv_rdmsr_time_ref_count);
782         *val = raw_trc_val(d) + trc->off;
783         break;
784     }
785 
786     case HV_X64_MSR_CRASH_P0:
787     case HV_X64_MSR_CRASH_P1:
788     case HV_X64_MSR_CRASH_P2:
789     case HV_X64_MSR_CRASH_P3:
790     case HV_X64_MSR_CRASH_P4:
791         BUILD_BUG_ON(HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0 >=
792                      ARRAY_SIZE(v->arch.hvm_vcpu.viridian.crash_param));
793 
794         idx -= HV_X64_MSR_CRASH_P0;
795         *val = v->arch.hvm_vcpu.viridian.crash_param[idx];
796         break;
797 
798     case HV_X64_MSR_CRASH_CTL:
799     {
800         HV_CRASH_CTL_REG_CONTENTS ctl = {
801             .u.CrashNotify = 1,
802         };
803 
804         *val = ctl.AsUINT64;
805         break;
806     }
807 
808     default:
809         if ( idx >= VIRIDIAN_MSR_MIN && idx <= VIRIDIAN_MSR_MAX )
810             gprintk(XENLOG_WARNING, "read from unimplemented MSR %#x\n",
811                     idx);
812 
813         return 0;
814     }
815 
816     return 1;
817 }
818 
viridian_vcpu_deinit(struct vcpu * v)819 void viridian_vcpu_deinit(struct vcpu *v)
820 {
821     teardown_vp_assist(v);
822 }
823 
viridian_domain_deinit(struct domain * d)824 void viridian_domain_deinit(struct domain *d)
825 {
826     struct vcpu *v;
827 
828     for_each_vcpu ( d, v )
829         teardown_vp_assist(v);
830 }
831 
832 static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
833 
viridian_hypercall(struct cpu_user_regs * regs)834 int viridian_hypercall(struct cpu_user_regs *regs)
835 {
836     struct vcpu *curr = current;
837     struct domain *currd = curr->domain;
838     int mode = hvm_guest_x86_mode(curr);
839     unsigned long input_params_gpa, output_params_gpa;
840     uint16_t status = HV_STATUS_SUCCESS;
841 
842     union hypercall_input {
843         uint64_t raw;
844         struct {
845             uint16_t call_code;
846             uint16_t fast:1;
847             uint16_t rsvd1:15;
848             uint16_t rep_count:12;
849             uint16_t rsvd2:4;
850             uint16_t rep_start:12;
851             uint16_t rsvd3:4;
852         };
853     } input;
854 
855     union hypercall_output {
856         uint64_t raw;
857         struct {
858             uint16_t result;
859             uint16_t rsvd1;
860             uint32_t rep_complete:12;
861             uint32_t rsvd2:20;
862         };
863     } output = { 0 };
864 
865     ASSERT(is_viridian_domain(currd));
866 
867     switch ( mode )
868     {
869     case 8:
870         input.raw = regs->rcx;
871         input_params_gpa = regs->rdx;
872         output_params_gpa = regs->r8;
873         break;
874     case 4:
875         input.raw = (regs->rdx << 32) | regs->eax;
876         input_params_gpa = (regs->rbx << 32) | regs->ecx;
877         output_params_gpa = (regs->rdi << 32) | regs->esi;
878         break;
879     default:
880         goto out;
881     }
882 
883     switch ( input.call_code )
884     {
885     case HvNotifyLongSpinWait:
886         /*
887          * See section 14.5.1 of the specification.
888          */
889         perfc_incr(mshv_call_long_wait);
890         do_sched_op(SCHEDOP_yield, guest_handle_from_ptr(NULL, void));
891         status = HV_STATUS_SUCCESS;
892         break;
893 
894     case HvFlushVirtualAddressSpace:
895     case HvFlushVirtualAddressList:
896     {
897         cpumask_t *pcpu_mask;
898         struct vcpu *v;
899         struct {
900             uint64_t address_space;
901             uint64_t flags;
902             uint64_t vcpu_mask;
903         } input_params;
904 
905         /*
906          * See sections 9.4.2 and 9.4.4 of the specification.
907          */
908         perfc_incr(mshv_call_flush);
909 
910         /* These hypercalls should never use the fast-call convention. */
911         status = HV_STATUS_INVALID_PARAMETER;
912         if ( input.fast )
913             break;
914 
915         /* Get input parameters. */
916         if ( hvm_copy_from_guest_phys(&input_params, input_params_gpa,
917                                       sizeof(input_params)) != HVMTRANS_okay )
918             break;
919 
920         /*
921          * It is not clear from the spec. if we are supposed to
922          * include current virtual CPU in the set or not in this case,
923          * so err on the safe side.
924          */
925         if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
926             input_params.vcpu_mask = ~0ul;
927 
928         pcpu_mask = &this_cpu(ipi_cpumask);
929         cpumask_clear(pcpu_mask);
930 
931         /*
932          * For each specified virtual CPU flush all ASIDs to invalidate
933          * TLB entries the next time it is scheduled and then, if it
934          * is currently running, add its physical CPU to a mask of
935          * those which need to be interrupted to force a flush.
936          */
937         for_each_vcpu ( currd, v )
938         {
939             if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
940                 break;
941 
942             if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
943                 continue;
944 
945             hvm_asid_flush_vcpu(v);
946             if ( v != curr && v->is_running )
947                 __cpumask_set_cpu(v->processor, pcpu_mask);
948         }
949 
950         /*
951          * Since ASIDs have now been flushed it just remains to
952          * force any CPUs currently running target vCPUs out of non-
953          * root mode. It's possible that re-scheduling has taken place
954          * so we may unnecessarily IPI some CPUs.
955          */
956         if ( !cpumask_empty(pcpu_mask) )
957             smp_send_event_check_mask(pcpu_mask);
958 
959         output.rep_complete = input.rep_count;
960 
961         status = HV_STATUS_SUCCESS;
962         break;
963     }
964 
965     default:
966         gprintk(XENLOG_WARNING, "unimplemented hypercall %04x\n",
967                 input.call_code);
968         /* Fallthrough. */
969     case HvGetPartitionId:
970     case HvExtCallQueryCapabilities:
971         /*
972          * These hypercalls seem to be erroneously issued by Windows
973          * despite neither AccessPartitionId nor EnableExtendedHypercalls
974          * being set in CPUID leaf 2.
975          * Given that return a status of 'invalid code' has not so far
976          * caused any problems it's not worth logging.
977          */
978         status = HV_STATUS_INVALID_HYPERCALL_CODE;
979         break;
980     }
981 
982 out:
983     output.result = status;
984     switch (mode) {
985     case 8:
986         regs->rax = output.raw;
987         break;
988     default:
989         regs->rdx = output.raw >> 32;
990         regs->rax = (uint32_t)output.raw;
991         break;
992     }
993 
994     return HVM_HCALL_completed;
995 }
996 
viridian_save_domain_ctxt(struct domain * d,hvm_domain_context_t * h)997 static int viridian_save_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
998 {
999     struct hvm_viridian_domain_context ctxt = {
1000         .time_ref_count = d->arch.hvm_domain.viridian.time_ref_count.val,
1001         .hypercall_gpa  = d->arch.hvm_domain.viridian.hypercall_gpa.raw,
1002         .guest_os_id    = d->arch.hvm_domain.viridian.guest_os_id.raw,
1003         .reference_tsc  = d->arch.hvm_domain.viridian.reference_tsc.raw,
1004     };
1005 
1006     if ( !is_viridian_domain(d) )
1007         return 0;
1008 
1009     return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
1010 }
1011 
viridian_load_domain_ctxt(struct domain * d,hvm_domain_context_t * h)1012 static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
1013 {
1014     struct hvm_viridian_domain_context ctxt;
1015 
1016     if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
1017         return -EINVAL;
1018 
1019     d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
1020     d->arch.hvm_domain.viridian.hypercall_gpa.raw  = ctxt.hypercall_gpa;
1021     d->arch.hvm_domain.viridian.guest_os_id.raw    = ctxt.guest_os_id;
1022     d->arch.hvm_domain.viridian.reference_tsc.raw  = ctxt.reference_tsc;
1023 
1024     if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
1025         update_reference_tsc(d, 0);
1026 
1027     return 0;
1028 }
1029 
1030 HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_DOMAIN, viridian_save_domain_ctxt,
1031                           viridian_load_domain_ctxt, 1, HVMSR_PER_DOM);
1032 
viridian_save_vcpu_ctxt(struct domain * d,hvm_domain_context_t * h)1033 static int viridian_save_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h)
1034 {
1035     struct vcpu *v;
1036 
1037     if ( !is_viridian_domain(d) )
1038         return 0;
1039 
1040     for_each_vcpu( d, v ) {
1041         struct hvm_viridian_vcpu_context ctxt = {
1042             .vp_assist_msr = v->arch.hvm_vcpu.viridian.vp_assist.msr.raw,
1043             .vp_assist_vector = v->arch.hvm_vcpu.viridian.vp_assist.vector,
1044         };
1045 
1046         if ( hvm_save_entry(VIRIDIAN_VCPU, v->vcpu_id, h, &ctxt) != 0 )
1047             return 1;
1048     }
1049 
1050     return 0;
1051 }
1052 
viridian_load_vcpu_ctxt(struct domain * d,hvm_domain_context_t * h)1053 static int viridian_load_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h)
1054 {
1055     int vcpuid;
1056     struct vcpu *v;
1057     struct hvm_viridian_vcpu_context ctxt;
1058 
1059     vcpuid = hvm_load_instance(h);
1060     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1061     {
1062         dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
1063                 d->domain_id, vcpuid);
1064         return -EINVAL;
1065     }
1066 
1067     if ( hvm_load_entry_zeroextend(VIRIDIAN_VCPU, h, &ctxt) != 0 )
1068         return -EINVAL;
1069 
1070     if ( memcmp(&ctxt._pad, zero_page, sizeof(ctxt._pad)) )
1071         return -EINVAL;
1072 
1073     v->arch.hvm_vcpu.viridian.vp_assist.msr.raw = ctxt.vp_assist_msr;
1074     if ( v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.enabled &&
1075          !v->arch.hvm_vcpu.viridian.vp_assist.va )
1076         initialize_vp_assist(v);
1077 
1078     v->arch.hvm_vcpu.viridian.vp_assist.vector = ctxt.vp_assist_vector;
1079 
1080     return 0;
1081 }
1082 
1083 HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_VCPU, viridian_save_vcpu_ctxt,
1084                           viridian_load_vcpu_ctxt, 1, HVMSR_PER_VCPU);
1085 
parse_viridian_version(const char * arg)1086 static int __init parse_viridian_version(const char *arg)
1087 {
1088     const char *t;
1089     unsigned int n[3];
1090     unsigned int i = 0;
1091 
1092     n[0] = viridian_major;
1093     n[1] = viridian_minor;
1094     n[2] = viridian_build;
1095 
1096     do {
1097         const char *e;
1098 
1099         t = strchr(arg, ',');
1100         if ( !t )
1101             t = strchr(arg, '\0');
1102 
1103         if ( *arg && *arg != ',' && i < 3 )
1104         {
1105             n[i] = simple_strtoul(arg, &e, 0);
1106             if ( e != t )
1107                 break;
1108         }
1109 
1110         i++;
1111         arg = t + 1;
1112     } while ( *t );
1113 
1114     if ( i != 3 )
1115         return -EINVAL;
1116 
1117     if ( ((typeof(viridian_major))n[0] != n[0]) ||
1118          ((typeof(viridian_minor))n[1] != n[1]) ||
1119          ((typeof(viridian_build))n[2] != n[2]) )
1120         return -EINVAL;
1121 
1122     viridian_major = n[0];
1123     viridian_minor = n[1];
1124     viridian_build = n[2];
1125 
1126     printk("viridian-version = %#x,%#x,%#x\n",
1127            viridian_major, viridian_minor, viridian_build);
1128     return 0;
1129 }
1130 custom_param("viridian-version", parse_viridian_version);
1131 
1132 /*
1133  * Local variables:
1134  * mode: C
1135  * c-file-style: "BSD"
1136  * c-basic-offset: 4
1137  * tab-width: 4
1138  * indent-tabs-mode: nil
1139  * End:
1140  */
1141