1 /******************************************************************************
2 * viridian.c
3 *
4 * An implementation of some Viridian enlightenments. See Microsoft's
5 * Hypervisor Top Level Functional Specification (v5.0a) at:
6 *
7 * https://github.com/Microsoft/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v5.0.pdf
8 *
9 * for more information.
10 */
11
12 #include <xen/sched.h>
13 #include <xen/version.h>
14 #include <xen/perfc.h>
15 #include <xen/hypercall.h>
16 #include <xen/domain_page.h>
17 #include <asm/guest_access.h>
18 #include <asm/paging.h>
19 #include <asm/p2m.h>
20 #include <asm/apic.h>
21 #include <asm/hvm/support.h>
22 #include <public/sched.h>
23 #include <public/hvm/hvm_op.h>
24
25 /* Viridian MSR numbers. */
26 #define HV_X64_MSR_GUEST_OS_ID 0x40000000
27 #define HV_X64_MSR_HYPERCALL 0x40000001
28 #define HV_X64_MSR_VP_INDEX 0x40000002
29 #define HV_X64_MSR_RESET 0x40000003
30 #define HV_X64_MSR_VP_RUNTIME 0x40000010
31 #define HV_X64_MSR_TIME_REF_COUNT 0x40000020
32 #define HV_X64_MSR_REFERENCE_TSC 0x40000021
33 #define HV_X64_MSR_TSC_FREQUENCY 0x40000022
34 #define HV_X64_MSR_APIC_FREQUENCY 0x40000023
35 #define HV_X64_MSR_EOI 0x40000070
36 #define HV_X64_MSR_ICR 0x40000071
37 #define HV_X64_MSR_TPR 0x40000072
38 #define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
39 #define HV_X64_MSR_SCONTROL 0x40000080
40 #define HV_X64_MSR_SVERSION 0x40000081
41 #define HV_X64_MSR_SIEFP 0x40000082
42 #define HV_X64_MSR_SIMP 0x40000083
43 #define HV_X64_MSR_EOM 0x40000084
44 #define HV_X64_MSR_SINT0 0x40000090
45 #define HV_X64_MSR_SINT1 0x40000091
46 #define HV_X64_MSR_SINT2 0x40000092
47 #define HV_X64_MSR_SINT3 0x40000093
48 #define HV_X64_MSR_SINT4 0x40000094
49 #define HV_X64_MSR_SINT5 0x40000095
50 #define HV_X64_MSR_SINT6 0x40000096
51 #define HV_X64_MSR_SINT7 0x40000097
52 #define HV_X64_MSR_SINT8 0x40000098
53 #define HV_X64_MSR_SINT9 0x40000099
54 #define HV_X64_MSR_SINT10 0x4000009A
55 #define HV_X64_MSR_SINT11 0x4000009B
56 #define HV_X64_MSR_SINT12 0x4000009C
57 #define HV_X64_MSR_SINT13 0x4000009D
58 #define HV_X64_MSR_SINT14 0x4000009E
59 #define HV_X64_MSR_SINT15 0x4000009F
60 #define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
61 #define HV_X64_MSR_STIMER0_COUNT 0x400000B1
62 #define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
63 #define HV_X64_MSR_STIMER1_COUNT 0x400000B3
64 #define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
65 #define HV_X64_MSR_STIMER2_COUNT 0x400000B5
66 #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
67 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7
68 #define HV_X64_MSR_POWER_STATE_TRIGGER_C1 0x400000C1
69 #define HV_X64_MSR_POWER_STATE_TRIGGER_C2 0x400000C2
70 #define HV_X64_MSR_POWER_STATE_TRIGGER_C3 0x400000C3
71 #define HV_X64_MSR_POWER_STATE_CONFIG_C1 0x400000D1
72 #define HV_X64_MSR_POWER_STATE_CONFIG_C2 0x400000D2
73 #define HV_X64_MSR_POWER_STATE_CONFIG_C3 0x400000D3
74 #define HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE 0x400000E0
75 #define HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE 0x400000E1
76 #define HV_X64_MSR_STATS_VP_RETAIL_PAGE 0x400000E2
77 #define HV_X64_MSR_STATS_VP_INTERNAL_PAGE 0x400000E3
78 #define HV_X64_MSR_GUEST_IDLE 0x400000F0
79 #define HV_X64_MSR_SYNTH_DEBUG_CONTROL 0x400000F1
80 #define HV_X64_MSR_SYNTH_DEBUG_STATUS 0x400000F2
81 #define HV_X64_MSR_SYNTH_DEBUG_SEND_BUFFER 0x400000F3
82 #define HV_X64_MSR_SYNTH_DEBUG_RECEIVE_BUFFER 0x400000F4
83 #define HV_X64_MSR_SYNTH_DEBUG_PENDING_BUFFER 0x400000F5
84 #define HV_X64_MSR_CRASH_P0 0x40000100
85 #define HV_X64_MSR_CRASH_P1 0x40000101
86 #define HV_X64_MSR_CRASH_P2 0x40000102
87 #define HV_X64_MSR_CRASH_P3 0x40000103
88 #define HV_X64_MSR_CRASH_P4 0x40000104
89 #define HV_X64_MSR_CRASH_CTL 0x40000105
90
91 #define VIRIDIAN_MSR_MIN HV_X64_MSR_GUEST_OS_ID
92 #define VIRIDIAN_MSR_MAX HV_X64_MSR_CRASH_CTL
93
94 /* Viridian Hypercall Status Codes. */
95 #define HV_STATUS_SUCCESS 0x0000
96 #define HV_STATUS_INVALID_HYPERCALL_CODE 0x0002
97 #define HV_STATUS_INVALID_PARAMETER 0x0005
98
99 /* Viridian Hypercall Codes. */
100 #define HvFlushVirtualAddressSpace 0x0002
101 #define HvFlushVirtualAddressList 0x0003
102 #define HvNotifyLongSpinWait 0x0008
103 #define HvGetPartitionId 0x0046
104 #define HvExtCallQueryCapabilities 0x8001
105
106 /* Viridian Hypercall Flags. */
107 #define HV_FLUSH_ALL_PROCESSORS 1
108
109 /*
110 * Viridian Partition Privilege Flags.
111 *
112 * This is taken from section 4.2.2 of the specification, and fixed for
113 * style and correctness.
114 */
115 typedef struct {
116 /* Access to virtual MSRs */
117 uint64_t AccessVpRunTimeReg:1;
118 uint64_t AccessPartitionReferenceCounter:1;
119 uint64_t AccessSynicRegs:1;
120 uint64_t AccessSyntheticTimerRegs:1;
121 uint64_t AccessIntrCtrlRegs:1;
122 uint64_t AccessHypercallMsrs:1;
123 uint64_t AccessVpIndex:1;
124 uint64_t AccessResetReg:1;
125 uint64_t AccessStatsReg:1;
126 uint64_t AccessPartitionReferenceTsc:1;
127 uint64_t AccessGuestIdleReg:1;
128 uint64_t AccessFrequencyRegs:1;
129 uint64_t AccessDebugRegs:1;
130 uint64_t Reserved1:19;
131
132 /* Access to hypercalls */
133 uint64_t CreatePartitions:1;
134 uint64_t AccessPartitionId:1;
135 uint64_t AccessMemoryPool:1;
136 uint64_t AdjustMessageBuffers:1;
137 uint64_t PostMessages:1;
138 uint64_t SignalEvents:1;
139 uint64_t CreatePort:1;
140 uint64_t ConnectPort:1;
141 uint64_t AccessStats:1;
142 uint64_t Reserved2:2;
143 uint64_t Debugging:1;
144 uint64_t CpuManagement:1;
145 uint64_t Reserved3:1;
146 uint64_t Reserved4:1;
147 uint64_t Reserved5:1;
148 uint64_t AccessVSM:1;
149 uint64_t AccessVpRegisters:1;
150 uint64_t Reserved6:1;
151 uint64_t Reserved7:1;
152 uint64_t EnableExtendedHypercalls:1;
153 uint64_t StartVirtualProcessor:1;
154 uint64_t Reserved8:10;
155 } HV_PARTITION_PRIVILEGE_MASK;
156
157 typedef union _HV_CRASH_CTL_REG_CONTENTS
158 {
159 uint64_t AsUINT64;
160 struct
161 {
162 uint64_t Reserved:63;
163 uint64_t CrashNotify:1;
164 } u;
165 } HV_CRASH_CTL_REG_CONTENTS;
166
167 /* Viridian CPUID leaf 3, Hypervisor Feature Indication */
168 #define CPUID3D_CRASH_MSRS (1 << 10)
169
170 /* Viridian CPUID leaf 4: Implementation Recommendations. */
171 #define CPUID4A_HCALL_REMOTE_TLB_FLUSH (1 << 2)
172 #define CPUID4A_MSR_BASED_APIC (1 << 3)
173 #define CPUID4A_RELAX_TIMER_INT (1 << 5)
174
175 /* Viridian CPUID leaf 6: Implementation HW features detected and in use. */
176 #define CPUID6A_APIC_OVERLAY (1 << 0)
177 #define CPUID6A_MSR_BITMAPS (1 << 1)
178 #define CPUID6A_NESTED_PAGING (1 << 3)
179
180 /*
181 * Version and build number reported by CPUID leaf 2
182 *
183 * These numbers are chosen to match the version numbers reported by
184 * Windows Server 2008.
185 */
186 static uint16_t __read_mostly viridian_major = 6;
187 static uint16_t __read_mostly viridian_minor = 0;
188 static uint32_t __read_mostly viridian_build = 0x1772;
189
190 /*
191 * Maximum number of retries before the guest will notify of failure
192 * to acquire a spinlock.
193 */
194 static uint32_t __read_mostly viridian_spinlock_retry_count = 2047;
195 integer_param("viridian-spinlock-retry-count",
196 viridian_spinlock_retry_count);
197
cpuid_viridian_leaves(const struct vcpu * v,uint32_t leaf,uint32_t subleaf,struct cpuid_leaf * res)198 void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
199 uint32_t subleaf, struct cpuid_leaf *res)
200 {
201 const struct domain *d = v->domain;
202
203 ASSERT(is_viridian_domain(d));
204 ASSERT(leaf >= 0x40000000 && leaf < 0x40000100);
205
206 leaf -= 0x40000000;
207
208 switch ( leaf )
209 {
210 case 0:
211 /* See section 2.4.1 of the specification */
212 res->a = 0x40000006; /* Maximum leaf */
213 memcpy(&res->b, "Micr", 4);
214 memcpy(&res->c, "osof", 4);
215 memcpy(&res->d, "t Hv", 4);
216 break;
217
218 case 1:
219 /* See section 2.4.2 of the specification */
220 memcpy(&res->a, "Hv#1", 4);
221 break;
222
223 case 2:
224 /* Hypervisor information, but only if the guest has set its
225 own version number. */
226 if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 )
227 break;
228 res->a = viridian_build;
229 res->b = ((uint32_t)viridian_major << 16) | viridian_minor;
230 res->c = 0; /* SP */
231 res->d = 0; /* Service branch and number */
232 break;
233
234 case 3:
235 {
236 /*
237 * Section 2.4.4 details this leaf and states that EAX and EBX
238 * are defined to be the low and high parts of the partition
239 * privilege mask respectively.
240 */
241 HV_PARTITION_PRIVILEGE_MASK mask = {
242 .AccessIntrCtrlRegs = 1,
243 .AccessHypercallMsrs = 1,
244 .AccessVpIndex = 1,
245 };
246 union {
247 HV_PARTITION_PRIVILEGE_MASK mask;
248 uint32_t lo, hi;
249 } u;
250
251 if ( !(viridian_feature_mask(d) & HVMPV_no_freq) )
252 mask.AccessFrequencyRegs = 1;
253 if ( viridian_feature_mask(d) & HVMPV_time_ref_count )
254 mask.AccessPartitionReferenceCounter = 1;
255 if ( viridian_feature_mask(d) & HVMPV_reference_tsc )
256 mask.AccessPartitionReferenceTsc = 1;
257
258 u.mask = mask;
259
260 res->a = u.lo;
261 res->b = u.hi;
262
263 if ( viridian_feature_mask(d) & HVMPV_crash_ctl )
264 res->d = CPUID3D_CRASH_MSRS;
265
266 break;
267 }
268
269 case 4:
270 /* Recommended hypercall usage. */
271 if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) ||
272 (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) )
273 break;
274 res->a = CPUID4A_RELAX_TIMER_INT;
275 if ( viridian_feature_mask(d) & HVMPV_hcall_remote_tlb_flush )
276 res->a |= CPUID4A_HCALL_REMOTE_TLB_FLUSH;
277 if ( !cpu_has_vmx_apic_reg_virt )
278 res->a |= CPUID4A_MSR_BASED_APIC;
279
280 /*
281 * This value is the recommended number of attempts to try to
282 * acquire a spinlock before notifying the hypervisor via the
283 * HvNotifyLongSpinWait hypercall.
284 */
285 res->b = viridian_spinlock_retry_count;
286 break;
287
288 case 6:
289 /* Detected and in use hardware features. */
290 if ( cpu_has_vmx_virtualize_apic_accesses )
291 res->a |= CPUID6A_APIC_OVERLAY;
292 if ( cpu_has_vmx_msr_bitmap || (read_efer() & EFER_SVME) )
293 res->a |= CPUID6A_MSR_BITMAPS;
294 if ( hap_enabled(d) )
295 res->a |= CPUID6A_NESTED_PAGING;
296 break;
297 }
298 }
299
dump_guest_os_id(const struct domain * d)300 static void dump_guest_os_id(const struct domain *d)
301 {
302 const union viridian_guest_os_id *goi;
303
304 goi = &d->arch.hvm_domain.viridian.guest_os_id;
305
306 printk(XENLOG_G_INFO
307 "d%d: VIRIDIAN GUEST_OS_ID: vendor: %x os: %x major: %x minor: %x sp: %x build: %x\n",
308 d->domain_id,
309 goi->fields.vendor, goi->fields.os,
310 goi->fields.major, goi->fields.minor,
311 goi->fields.service_pack, goi->fields.build_number);
312 }
313
dump_hypercall(const struct domain * d)314 static void dump_hypercall(const struct domain *d)
315 {
316 const union viridian_hypercall_gpa *hg;
317
318 hg = &d->arch.hvm_domain.viridian.hypercall_gpa;
319
320 printk(XENLOG_G_INFO "d%d: VIRIDIAN HYPERCALL: enabled: %x pfn: %lx\n",
321 d->domain_id,
322 hg->fields.enabled, (unsigned long)hg->fields.pfn);
323 }
324
dump_vp_assist(const struct vcpu * v)325 static void dump_vp_assist(const struct vcpu *v)
326 {
327 const union viridian_vp_assist *va;
328
329 va = &v->arch.hvm_vcpu.viridian.vp_assist.msr;
330
331 printk(XENLOG_G_INFO "%pv: VIRIDIAN VP_ASSIST_PAGE: enabled: %x pfn: %lx\n",
332 v, va->fields.enabled, (unsigned long)va->fields.pfn);
333 }
334
dump_reference_tsc(const struct domain * d)335 static void dump_reference_tsc(const struct domain *d)
336 {
337 const union viridian_reference_tsc *rt;
338
339 rt = &d->arch.hvm_domain.viridian.reference_tsc;
340
341 printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: enabled: %x pfn: %lx\n",
342 d->domain_id,
343 rt->fields.enabled, (unsigned long)rt->fields.pfn);
344 }
345
enable_hypercall_page(struct domain * d)346 static void enable_hypercall_page(struct domain *d)
347 {
348 unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
349 struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
350 uint8_t *p;
351
352 if ( !page || !get_page_type(page, PGT_writable_page) )
353 {
354 if ( page )
355 put_page(page);
356 gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
357 gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
358 return;
359 }
360
361 p = __map_domain_page(page);
362
363 /*
364 * We set the bit 31 in %eax (reserved field in the Viridian hypercall
365 * calling convention) to differentiate Xen and Viridian hypercalls.
366 */
367 *(u8 *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */
368 *(u32 *)(p + 1) = 0x80000000;
369 *(u8 *)(p + 5) = 0x0f; /* vmcall/vmmcall */
370 *(u8 *)(p + 6) = 0x01;
371 *(u8 *)(p + 7) = (cpu_has_vmx ? 0xc1 : 0xd9);
372 *(u8 *)(p + 8) = 0xc3; /* ret */
373 memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */
374
375 unmap_domain_page(p);
376
377 put_page_and_type(page);
378 }
379
initialize_vp_assist(struct vcpu * v)380 static void initialize_vp_assist(struct vcpu *v)
381 {
382 struct domain *d = v->domain;
383 unsigned long gmfn = v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.pfn;
384 struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
385 void *va;
386
387 ASSERT(!v->arch.hvm_vcpu.viridian.vp_assist.va);
388
389 /*
390 * See section 7.8.7 of the specification for details of this
391 * enlightenment.
392 */
393
394 if ( !page )
395 goto fail;
396
397 if ( !get_page_type(page, PGT_writable_page) )
398 {
399 put_page(page);
400 goto fail;
401 }
402
403 va = __map_domain_page_global(page);
404 if ( !va )
405 {
406 put_page_and_type(page);
407 goto fail;
408 }
409
410 clear_page(va);
411
412 v->arch.hvm_vcpu.viridian.vp_assist.va = va;
413 return;
414
415 fail:
416 gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n", gmfn,
417 page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
418 }
419
teardown_vp_assist(struct vcpu * v)420 static void teardown_vp_assist(struct vcpu *v)
421 {
422 void *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
423 struct page_info *page;
424
425 if ( !va )
426 return;
427
428 v->arch.hvm_vcpu.viridian.vp_assist.va = NULL;
429
430 page = mfn_to_page(domain_page_map_to_mfn(va));
431
432 unmap_domain_page_global(va);
433 put_page_and_type(page);
434 }
435
viridian_start_apic_assist(struct vcpu * v,int vector)436 void viridian_start_apic_assist(struct vcpu *v, int vector)
437 {
438 uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
439
440 if ( !va )
441 return;
442
443 if ( vector < 0x10 )
444 return;
445
446 /*
447 * If there is already an assist pending then something has gone
448 * wrong and the VM will most likely hang so force a crash now
449 * to make the problem clear.
450 */
451 if ( v->arch.hvm_vcpu.viridian.vp_assist.vector )
452 domain_crash(v->domain);
453
454 v->arch.hvm_vcpu.viridian.vp_assist.vector = vector;
455 *va |= 1u;
456 }
457
viridian_complete_apic_assist(struct vcpu * v)458 int viridian_complete_apic_assist(struct vcpu *v)
459 {
460 uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
461 int vector;
462
463 if ( !va )
464 return 0;
465
466 if ( *va & 1u )
467 return 0; /* Interrupt not yet processed by the guest. */
468
469 vector = v->arch.hvm_vcpu.viridian.vp_assist.vector;
470 v->arch.hvm_vcpu.viridian.vp_assist.vector = 0;
471
472 return vector;
473 }
474
viridian_abort_apic_assist(struct vcpu * v)475 void viridian_abort_apic_assist(struct vcpu *v)
476 {
477 uint32_t *va = v->arch.hvm_vcpu.viridian.vp_assist.va;
478
479 if ( !va )
480 return;
481
482 *va &= ~1u;
483 v->arch.hvm_vcpu.viridian.vp_assist.vector = 0;
484 }
485
update_reference_tsc(struct domain * d,bool_t initialize)486 static void update_reference_tsc(struct domain *d, bool_t initialize)
487 {
488 unsigned long gmfn = d->arch.hvm_domain.viridian.reference_tsc.fields.pfn;
489 struct page_info *page = get_page_from_gfn(d, gmfn, NULL, P2M_ALLOC);
490 HV_REFERENCE_TSC_PAGE *p;
491
492 if ( !page || !get_page_type(page, PGT_writable_page) )
493 {
494 if ( page )
495 put_page(page);
496 gdprintk(XENLOG_WARNING, "Bad GMFN %#"PRI_gfn" (MFN %#"PRI_mfn")\n",
497 gmfn, page ? page_to_mfn(page) : mfn_x(INVALID_MFN));
498 return;
499 }
500
501 p = __map_domain_page(page);
502
503 if ( initialize )
504 clear_page(p);
505
506 /*
507 * This enlightenment must be disabled is the host TSC is not invariant.
508 * However it is also disabled if vtsc is true (which means rdtsc is being
509 * emulated). This generally happens when guest TSC freq and host TSC freq
510 * don't match. The TscScale value could be adjusted to cope with this,
511 * allowing vtsc to be turned off, but support for this is not yet present
512 * in the hypervisor. Thus is it is possible that migrating a Windows VM
513 * between hosts of differing TSC frequencies may result in large
514 * differences in guest performance.
515 */
516 if ( !host_tsc_is_safe() || d->arch.vtsc )
517 {
518 /*
519 * The specification states that valid values of TscSequence range
520 * from 0 to 0xFFFFFFFE. The value 0xFFFFFFFF is used to indicate
521 * this mechanism is no longer a reliable source of time and that
522 * the VM should fall back to a different source.
523 *
524 * Server 2012 (6.2 kernel) and 2012 R2 (6.3 kernel) actually violate
525 * the spec. and rely on a value of 0 to indicate that this
526 * enlightenment should no longer be used. These two kernel
527 * versions are currently the only ones to make use of this
528 * enlightenment, so just use 0 here.
529 */
530 p->TscSequence = 0;
531
532 printk(XENLOG_G_INFO "d%d: VIRIDIAN REFERENCE_TSC: invalidated\n",
533 d->domain_id);
534 goto out;
535 }
536
537 /*
538 * The guest will calculate reference time according to the following
539 * formula:
540 *
541 * ReferenceTime = ((RDTSC() * TscScale) >> 64) + TscOffset
542 *
543 * Windows uses a 100ns tick, so we need a scale which is cpu
544 * ticks per 100ns shifted left by 64.
545 */
546 p->TscScale = ((10000ul << 32) / d->arch.tsc_khz) << 32;
547
548 p->TscSequence++;
549 if ( p->TscSequence == 0xFFFFFFFF ||
550 p->TscSequence == 0 ) /* Avoid both 'invalid' values */
551 p->TscSequence = 1;
552
553 out:
554 unmap_domain_page(p);
555
556 put_page_and_type(page);
557 }
558
wrmsr_viridian_regs(uint32_t idx,uint64_t val)559 int wrmsr_viridian_regs(uint32_t idx, uint64_t val)
560 {
561 struct vcpu *v = current;
562 struct domain *d = v->domain;
563
564 if ( !is_viridian_domain(d) )
565 return 0;
566
567 switch ( idx )
568 {
569 case HV_X64_MSR_GUEST_OS_ID:
570 perfc_incr(mshv_wrmsr_osid);
571 d->arch.hvm_domain.viridian.guest_os_id.raw = val;
572 dump_guest_os_id(d);
573 break;
574
575 case HV_X64_MSR_HYPERCALL:
576 perfc_incr(mshv_wrmsr_hc_page);
577 d->arch.hvm_domain.viridian.hypercall_gpa.raw = val;
578 dump_hypercall(d);
579 if ( d->arch.hvm_domain.viridian.hypercall_gpa.fields.enabled )
580 enable_hypercall_page(d);
581 break;
582
583 case HV_X64_MSR_VP_INDEX:
584 perfc_incr(mshv_wrmsr_vp_index);
585 break;
586
587 case HV_X64_MSR_EOI:
588 perfc_incr(mshv_wrmsr_eoi);
589 vlapic_EOI_set(vcpu_vlapic(v));
590 break;
591
592 case HV_X64_MSR_ICR: {
593 u32 eax = (u32)val, edx = (u32)(val >> 32);
594 struct vlapic *vlapic = vcpu_vlapic(v);
595 perfc_incr(mshv_wrmsr_icr);
596 eax &= ~(1 << 12);
597 edx &= 0xff000000;
598 vlapic_set_reg(vlapic, APIC_ICR2, edx);
599 vlapic_ipi(vlapic, eax, edx);
600 vlapic_set_reg(vlapic, APIC_ICR, eax);
601 break;
602 }
603
604 case HV_X64_MSR_TPR:
605 perfc_incr(mshv_wrmsr_tpr);
606 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI, (uint8_t)val);
607 break;
608
609 case HV_X64_MSR_VP_ASSIST_PAGE:
610 perfc_incr(mshv_wrmsr_apic_msr);
611 teardown_vp_assist(v); /* release any previous mapping */
612 v->arch.hvm_vcpu.viridian.vp_assist.msr.raw = val;
613 dump_vp_assist(v);
614 if ( v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.enabled )
615 initialize_vp_assist(v);
616 break;
617
618 case HV_X64_MSR_REFERENCE_TSC:
619 if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
620 return 0;
621
622 perfc_incr(mshv_wrmsr_tsc_msr);
623 d->arch.hvm_domain.viridian.reference_tsc.raw = val;
624 dump_reference_tsc(d);
625 if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
626 update_reference_tsc(d, 1);
627 break;
628
629 case HV_X64_MSR_CRASH_P0:
630 case HV_X64_MSR_CRASH_P1:
631 case HV_X64_MSR_CRASH_P2:
632 case HV_X64_MSR_CRASH_P3:
633 case HV_X64_MSR_CRASH_P4:
634 BUILD_BUG_ON(HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0 >=
635 ARRAY_SIZE(v->arch.hvm_vcpu.viridian.crash_param));
636
637 idx -= HV_X64_MSR_CRASH_P0;
638 v->arch.hvm_vcpu.viridian.crash_param[idx] = val;
639 break;
640
641 case HV_X64_MSR_CRASH_CTL:
642 {
643 HV_CRASH_CTL_REG_CONTENTS ctl;
644
645 ctl.AsUINT64 = val;
646
647 if ( !ctl.u.CrashNotify )
648 break;
649
650 gprintk(XENLOG_WARNING, "VIRIDIAN CRASH: %lx %lx %lx %lx %lx\n",
651 v->arch.hvm_vcpu.viridian.crash_param[0],
652 v->arch.hvm_vcpu.viridian.crash_param[1],
653 v->arch.hvm_vcpu.viridian.crash_param[2],
654 v->arch.hvm_vcpu.viridian.crash_param[3],
655 v->arch.hvm_vcpu.viridian.crash_param[4]);
656 break;
657 }
658
659 default:
660 if ( idx >= VIRIDIAN_MSR_MIN && idx <= VIRIDIAN_MSR_MAX )
661 gprintk(XENLOG_WARNING, "write to unimplemented MSR %#x\n",
662 idx);
663
664 return 0;
665 }
666
667 return 1;
668 }
669
raw_trc_val(struct domain * d)670 static int64_t raw_trc_val(struct domain *d)
671 {
672 uint64_t tsc;
673 struct time_scale tsc_to_ns;
674
675 tsc = hvm_get_guest_tsc(pt_global_vcpu_target(d));
676
677 /* convert tsc to count of 100ns periods */
678 set_time_scale(&tsc_to_ns, d->arch.tsc_khz * 1000ul);
679 return scale_delta(tsc, &tsc_to_ns) / 100ul;
680 }
681
viridian_time_ref_count_freeze(struct domain * d)682 void viridian_time_ref_count_freeze(struct domain *d)
683 {
684 struct viridian_time_ref_count *trc;
685
686 trc = &d->arch.hvm_domain.viridian.time_ref_count;
687
688 if ( test_and_clear_bit(_TRC_running, &trc->flags) )
689 trc->val = raw_trc_val(d) + trc->off;
690 }
691
viridian_time_ref_count_thaw(struct domain * d)692 void viridian_time_ref_count_thaw(struct domain *d)
693 {
694 struct viridian_time_ref_count *trc;
695
696 trc = &d->arch.hvm_domain.viridian.time_ref_count;
697
698 if ( !d->is_shutting_down &&
699 !test_and_set_bit(_TRC_running, &trc->flags) )
700 trc->off = (int64_t)trc->val - raw_trc_val(d);
701 }
702
rdmsr_viridian_regs(uint32_t idx,uint64_t * val)703 int rdmsr_viridian_regs(uint32_t idx, uint64_t *val)
704 {
705 struct vcpu *v = current;
706 struct domain *d = v->domain;
707
708 if ( !is_viridian_domain(d) )
709 return 0;
710
711 switch ( idx )
712 {
713 case HV_X64_MSR_GUEST_OS_ID:
714 perfc_incr(mshv_rdmsr_osid);
715 *val = d->arch.hvm_domain.viridian.guest_os_id.raw;
716 break;
717
718 case HV_X64_MSR_HYPERCALL:
719 perfc_incr(mshv_rdmsr_hc_page);
720 *val = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
721 break;
722
723 case HV_X64_MSR_VP_INDEX:
724 perfc_incr(mshv_rdmsr_vp_index);
725 *val = v->vcpu_id;
726 break;
727
728 case HV_X64_MSR_TSC_FREQUENCY:
729 if ( viridian_feature_mask(d) & HVMPV_no_freq )
730 return 0;
731
732 perfc_incr(mshv_rdmsr_tsc_frequency);
733 *val = (uint64_t)d->arch.tsc_khz * 1000ull;
734 break;
735
736 case HV_X64_MSR_APIC_FREQUENCY:
737 if ( viridian_feature_mask(d) & HVMPV_no_freq )
738 return 0;
739
740 perfc_incr(mshv_rdmsr_apic_frequency);
741 *val = 1000000000ull / APIC_BUS_CYCLE_NS;
742 break;
743
744 case HV_X64_MSR_ICR:
745 perfc_incr(mshv_rdmsr_icr);
746 *val = (((uint64_t)vlapic_get_reg(vcpu_vlapic(v), APIC_ICR2) << 32) |
747 vlapic_get_reg(vcpu_vlapic(v), APIC_ICR));
748 break;
749
750 case HV_X64_MSR_TPR:
751 perfc_incr(mshv_rdmsr_tpr);
752 *val = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI);
753 break;
754
755 case HV_X64_MSR_VP_ASSIST_PAGE:
756 perfc_incr(mshv_rdmsr_apic_msr);
757 *val = v->arch.hvm_vcpu.viridian.vp_assist.msr.raw;
758 break;
759
760 case HV_X64_MSR_REFERENCE_TSC:
761 if ( !(viridian_feature_mask(d) & HVMPV_reference_tsc) )
762 return 0;
763
764 perfc_incr(mshv_rdmsr_tsc_msr);
765 *val = d->arch.hvm_domain.viridian.reference_tsc.raw;
766 break;
767
768 case HV_X64_MSR_TIME_REF_COUNT:
769 {
770 struct viridian_time_ref_count *trc;
771
772 trc = &d->arch.hvm_domain.viridian.time_ref_count;
773
774 if ( !(viridian_feature_mask(d) & HVMPV_time_ref_count) )
775 return 0;
776
777 if ( !test_and_set_bit(_TRC_accessed, &trc->flags) )
778 printk(XENLOG_G_INFO "d%d: VIRIDIAN MSR_TIME_REF_COUNT: accessed\n",
779 d->domain_id);
780
781 perfc_incr(mshv_rdmsr_time_ref_count);
782 *val = raw_trc_val(d) + trc->off;
783 break;
784 }
785
786 case HV_X64_MSR_CRASH_P0:
787 case HV_X64_MSR_CRASH_P1:
788 case HV_X64_MSR_CRASH_P2:
789 case HV_X64_MSR_CRASH_P3:
790 case HV_X64_MSR_CRASH_P4:
791 BUILD_BUG_ON(HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0 >=
792 ARRAY_SIZE(v->arch.hvm_vcpu.viridian.crash_param));
793
794 idx -= HV_X64_MSR_CRASH_P0;
795 *val = v->arch.hvm_vcpu.viridian.crash_param[idx];
796 break;
797
798 case HV_X64_MSR_CRASH_CTL:
799 {
800 HV_CRASH_CTL_REG_CONTENTS ctl = {
801 .u.CrashNotify = 1,
802 };
803
804 *val = ctl.AsUINT64;
805 break;
806 }
807
808 default:
809 if ( idx >= VIRIDIAN_MSR_MIN && idx <= VIRIDIAN_MSR_MAX )
810 gprintk(XENLOG_WARNING, "read from unimplemented MSR %#x\n",
811 idx);
812
813 return 0;
814 }
815
816 return 1;
817 }
818
viridian_vcpu_deinit(struct vcpu * v)819 void viridian_vcpu_deinit(struct vcpu *v)
820 {
821 teardown_vp_assist(v);
822 }
823
viridian_domain_deinit(struct domain * d)824 void viridian_domain_deinit(struct domain *d)
825 {
826 struct vcpu *v;
827
828 for_each_vcpu ( d, v )
829 teardown_vp_assist(v);
830 }
831
832 static DEFINE_PER_CPU(cpumask_t, ipi_cpumask);
833
viridian_hypercall(struct cpu_user_regs * regs)834 int viridian_hypercall(struct cpu_user_regs *regs)
835 {
836 struct vcpu *curr = current;
837 struct domain *currd = curr->domain;
838 int mode = hvm_guest_x86_mode(curr);
839 unsigned long input_params_gpa, output_params_gpa;
840 uint16_t status = HV_STATUS_SUCCESS;
841
842 union hypercall_input {
843 uint64_t raw;
844 struct {
845 uint16_t call_code;
846 uint16_t fast:1;
847 uint16_t rsvd1:15;
848 uint16_t rep_count:12;
849 uint16_t rsvd2:4;
850 uint16_t rep_start:12;
851 uint16_t rsvd3:4;
852 };
853 } input;
854
855 union hypercall_output {
856 uint64_t raw;
857 struct {
858 uint16_t result;
859 uint16_t rsvd1;
860 uint32_t rep_complete:12;
861 uint32_t rsvd2:20;
862 };
863 } output = { 0 };
864
865 ASSERT(is_viridian_domain(currd));
866
867 switch ( mode )
868 {
869 case 8:
870 input.raw = regs->rcx;
871 input_params_gpa = regs->rdx;
872 output_params_gpa = regs->r8;
873 break;
874 case 4:
875 input.raw = (regs->rdx << 32) | regs->eax;
876 input_params_gpa = (regs->rbx << 32) | regs->ecx;
877 output_params_gpa = (regs->rdi << 32) | regs->esi;
878 break;
879 default:
880 goto out;
881 }
882
883 switch ( input.call_code )
884 {
885 case HvNotifyLongSpinWait:
886 /*
887 * See section 14.5.1 of the specification.
888 */
889 perfc_incr(mshv_call_long_wait);
890 do_sched_op(SCHEDOP_yield, guest_handle_from_ptr(NULL, void));
891 status = HV_STATUS_SUCCESS;
892 break;
893
894 case HvFlushVirtualAddressSpace:
895 case HvFlushVirtualAddressList:
896 {
897 cpumask_t *pcpu_mask;
898 struct vcpu *v;
899 struct {
900 uint64_t address_space;
901 uint64_t flags;
902 uint64_t vcpu_mask;
903 } input_params;
904
905 /*
906 * See sections 9.4.2 and 9.4.4 of the specification.
907 */
908 perfc_incr(mshv_call_flush);
909
910 /* These hypercalls should never use the fast-call convention. */
911 status = HV_STATUS_INVALID_PARAMETER;
912 if ( input.fast )
913 break;
914
915 /* Get input parameters. */
916 if ( hvm_copy_from_guest_phys(&input_params, input_params_gpa,
917 sizeof(input_params)) != HVMTRANS_okay )
918 break;
919
920 /*
921 * It is not clear from the spec. if we are supposed to
922 * include current virtual CPU in the set or not in this case,
923 * so err on the safe side.
924 */
925 if ( input_params.flags & HV_FLUSH_ALL_PROCESSORS )
926 input_params.vcpu_mask = ~0ul;
927
928 pcpu_mask = &this_cpu(ipi_cpumask);
929 cpumask_clear(pcpu_mask);
930
931 /*
932 * For each specified virtual CPU flush all ASIDs to invalidate
933 * TLB entries the next time it is scheduled and then, if it
934 * is currently running, add its physical CPU to a mask of
935 * those which need to be interrupted to force a flush.
936 */
937 for_each_vcpu ( currd, v )
938 {
939 if ( v->vcpu_id >= (sizeof(input_params.vcpu_mask) * 8) )
940 break;
941
942 if ( !(input_params.vcpu_mask & (1ul << v->vcpu_id)) )
943 continue;
944
945 hvm_asid_flush_vcpu(v);
946 if ( v != curr && v->is_running )
947 __cpumask_set_cpu(v->processor, pcpu_mask);
948 }
949
950 /*
951 * Since ASIDs have now been flushed it just remains to
952 * force any CPUs currently running target vCPUs out of non-
953 * root mode. It's possible that re-scheduling has taken place
954 * so we may unnecessarily IPI some CPUs.
955 */
956 if ( !cpumask_empty(pcpu_mask) )
957 smp_send_event_check_mask(pcpu_mask);
958
959 output.rep_complete = input.rep_count;
960
961 status = HV_STATUS_SUCCESS;
962 break;
963 }
964
965 default:
966 gprintk(XENLOG_WARNING, "unimplemented hypercall %04x\n",
967 input.call_code);
968 /* Fallthrough. */
969 case HvGetPartitionId:
970 case HvExtCallQueryCapabilities:
971 /*
972 * These hypercalls seem to be erroneously issued by Windows
973 * despite neither AccessPartitionId nor EnableExtendedHypercalls
974 * being set in CPUID leaf 2.
975 * Given that return a status of 'invalid code' has not so far
976 * caused any problems it's not worth logging.
977 */
978 status = HV_STATUS_INVALID_HYPERCALL_CODE;
979 break;
980 }
981
982 out:
983 output.result = status;
984 switch (mode) {
985 case 8:
986 regs->rax = output.raw;
987 break;
988 default:
989 regs->rdx = output.raw >> 32;
990 regs->rax = (uint32_t)output.raw;
991 break;
992 }
993
994 return HVM_HCALL_completed;
995 }
996
viridian_save_domain_ctxt(struct domain * d,hvm_domain_context_t * h)997 static int viridian_save_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
998 {
999 struct hvm_viridian_domain_context ctxt = {
1000 .time_ref_count = d->arch.hvm_domain.viridian.time_ref_count.val,
1001 .hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw,
1002 .guest_os_id = d->arch.hvm_domain.viridian.guest_os_id.raw,
1003 .reference_tsc = d->arch.hvm_domain.viridian.reference_tsc.raw,
1004 };
1005
1006 if ( !is_viridian_domain(d) )
1007 return 0;
1008
1009 return (hvm_save_entry(VIRIDIAN_DOMAIN, 0, h, &ctxt) != 0);
1010 }
1011
viridian_load_domain_ctxt(struct domain * d,hvm_domain_context_t * h)1012 static int viridian_load_domain_ctxt(struct domain *d, hvm_domain_context_t *h)
1013 {
1014 struct hvm_viridian_domain_context ctxt;
1015
1016 if ( hvm_load_entry_zeroextend(VIRIDIAN_DOMAIN, h, &ctxt) != 0 )
1017 return -EINVAL;
1018
1019 d->arch.hvm_domain.viridian.time_ref_count.val = ctxt.time_ref_count;
1020 d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
1021 d->arch.hvm_domain.viridian.guest_os_id.raw = ctxt.guest_os_id;
1022 d->arch.hvm_domain.viridian.reference_tsc.raw = ctxt.reference_tsc;
1023
1024 if ( d->arch.hvm_domain.viridian.reference_tsc.fields.enabled )
1025 update_reference_tsc(d, 0);
1026
1027 return 0;
1028 }
1029
1030 HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_DOMAIN, viridian_save_domain_ctxt,
1031 viridian_load_domain_ctxt, 1, HVMSR_PER_DOM);
1032
viridian_save_vcpu_ctxt(struct domain * d,hvm_domain_context_t * h)1033 static int viridian_save_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h)
1034 {
1035 struct vcpu *v;
1036
1037 if ( !is_viridian_domain(d) )
1038 return 0;
1039
1040 for_each_vcpu( d, v ) {
1041 struct hvm_viridian_vcpu_context ctxt = {
1042 .vp_assist_msr = v->arch.hvm_vcpu.viridian.vp_assist.msr.raw,
1043 .vp_assist_vector = v->arch.hvm_vcpu.viridian.vp_assist.vector,
1044 };
1045
1046 if ( hvm_save_entry(VIRIDIAN_VCPU, v->vcpu_id, h, &ctxt) != 0 )
1047 return 1;
1048 }
1049
1050 return 0;
1051 }
1052
viridian_load_vcpu_ctxt(struct domain * d,hvm_domain_context_t * h)1053 static int viridian_load_vcpu_ctxt(struct domain *d, hvm_domain_context_t *h)
1054 {
1055 int vcpuid;
1056 struct vcpu *v;
1057 struct hvm_viridian_vcpu_context ctxt;
1058
1059 vcpuid = hvm_load_instance(h);
1060 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1061 {
1062 dprintk(XENLOG_G_ERR, "HVM restore: dom%d has no vcpu%u\n",
1063 d->domain_id, vcpuid);
1064 return -EINVAL;
1065 }
1066
1067 if ( hvm_load_entry_zeroextend(VIRIDIAN_VCPU, h, &ctxt) != 0 )
1068 return -EINVAL;
1069
1070 if ( memcmp(&ctxt._pad, zero_page, sizeof(ctxt._pad)) )
1071 return -EINVAL;
1072
1073 v->arch.hvm_vcpu.viridian.vp_assist.msr.raw = ctxt.vp_assist_msr;
1074 if ( v->arch.hvm_vcpu.viridian.vp_assist.msr.fields.enabled &&
1075 !v->arch.hvm_vcpu.viridian.vp_assist.va )
1076 initialize_vp_assist(v);
1077
1078 v->arch.hvm_vcpu.viridian.vp_assist.vector = ctxt.vp_assist_vector;
1079
1080 return 0;
1081 }
1082
1083 HVM_REGISTER_SAVE_RESTORE(VIRIDIAN_VCPU, viridian_save_vcpu_ctxt,
1084 viridian_load_vcpu_ctxt, 1, HVMSR_PER_VCPU);
1085
parse_viridian_version(const char * arg)1086 static int __init parse_viridian_version(const char *arg)
1087 {
1088 const char *t;
1089 unsigned int n[3];
1090 unsigned int i = 0;
1091
1092 n[0] = viridian_major;
1093 n[1] = viridian_minor;
1094 n[2] = viridian_build;
1095
1096 do {
1097 const char *e;
1098
1099 t = strchr(arg, ',');
1100 if ( !t )
1101 t = strchr(arg, '\0');
1102
1103 if ( *arg && *arg != ',' && i < 3 )
1104 {
1105 n[i] = simple_strtoul(arg, &e, 0);
1106 if ( e != t )
1107 break;
1108 }
1109
1110 i++;
1111 arg = t + 1;
1112 } while ( *t );
1113
1114 if ( i != 3 )
1115 return -EINVAL;
1116
1117 if ( ((typeof(viridian_major))n[0] != n[0]) ||
1118 ((typeof(viridian_minor))n[1] != n[1]) ||
1119 ((typeof(viridian_build))n[2] != n[2]) )
1120 return -EINVAL;
1121
1122 viridian_major = n[0];
1123 viridian_minor = n[1];
1124 viridian_build = n[2];
1125
1126 printk("viridian-version = %#x,%#x,%#x\n",
1127 viridian_major, viridian_minor, viridian_build);
1128 return 0;
1129 }
1130 custom_param("viridian-version", parse_viridian_version);
1131
1132 /*
1133 * Local variables:
1134 * mode: C
1135 * c-file-style: "BSD"
1136 * c-basic-offset: 4
1137 * tab-width: 4
1138 * indent-tabs-mode: nil
1139 * End:
1140 */
1141