1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * vmcs.c: VMCS management
4 * Copyright (c) 2004, Intel Corporation.
5 */
6
7 #include <xen/domain_page.h>
8 #include <xen/errno.h>
9 #include <xen/event.h>
10 #include <xen/init.h>
11 #include <xen/kernel.h>
12 #include <xen/keyhandler.h>
13 #include <xen/lib.h>
14 #include <xen/mm.h>
15 #include <xen/param.h>
16 #include <xen/vm_event.h>
17
18 #include <asm/apic.h>
19 #include <asm/cpufeature.h>
20 #include <asm/current.h>
21 #include <asm/flushtlb.h>
22 #include <asm/hvm/hvm.h>
23 #include <asm/hvm/io.h>
24 #include <asm/hvm/nestedhvm.h>
25 #include <asm/hvm/vmx/vmcs.h>
26 #include <asm/hvm/vmx/vmx.h>
27 #include <asm/hvm/vmx/vvmx.h>
28 #include <asm/idt.h>
29 #include <asm/monitor.h>
30 #include <asm/msr.h>
31 #include <asm/processor.h>
32 #include <asm/shadow.h>
33 #include <asm/spec_ctrl.h>
34 #include <asm/tboot.h>
35 #include <asm/xstate.h>
36
37 static bool __read_mostly opt_vpid_enabled = true;
38 boolean_param("vpid", opt_vpid_enabled);
39
40 static bool __read_mostly opt_unrestricted_guest_enabled = true;
41 boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
42
43 static bool __read_mostly opt_apicv_enabled = true;
44 boolean_param("apicv", opt_apicv_enabled);
45
46 /*
47 * These two parameters are used to config the controls for Pause-Loop Exiting:
48 * ple_gap: upper bound on the amount of time between two successive
49 * executions of PAUSE in a loop.
50 * ple_window: upper bound on the amount of time a guest is allowed to execute
51 * in a PAUSE loop.
52 * Time is measured based on a counter that runs at the same rate as the TSC,
53 * refer SDM volume 3b section 21.6.13 & 22.1.3.
54 */
55 static unsigned int __read_mostly ple_gap = 128;
56 integer_param("ple_gap", ple_gap);
57 static unsigned int __read_mostly ple_window = 4096;
58 integer_param("ple_window", ple_window);
59
60 static unsigned int __ro_after_init vm_notify_window;
61 integer_param("vm-notify-window", vm_notify_window);
62
63 static bool __read_mostly opt_ept_pml = true;
64 static int8_t __ro_after_init opt_ept_ad = -1;
65 int8_t __read_mostly opt_ept_exec_sp = -1;
66
parse_ept_param(const char * s)67 static int __init cf_check parse_ept_param(const char *s)
68 {
69 const char *ss;
70 int val, rc = 0;
71
72 do {
73 ss = strchr(s, ',');
74 if ( !ss )
75 ss = strchr(s, '\0');
76
77 if ( (val = parse_boolean("ad", s, ss)) >= 0 )
78 opt_ept_ad = val;
79 else if ( (val = parse_boolean("pml", s, ss)) >= 0 )
80 opt_ept_pml = val;
81 else if ( (val = parse_boolean("exec-sp", s, ss)) >= 0 )
82 opt_ept_exec_sp = val;
83 else
84 rc = -EINVAL;
85
86 s = ss + 1;
87 } while ( *ss );
88
89 return rc;
90 }
91 custom_param("ept", parse_ept_param);
92
93 #ifdef CONFIG_HYPFS
94 static char opt_ept_setting[10];
95
update_ept_param(void)96 static void update_ept_param(void)
97 {
98 if ( opt_ept_exec_sp >= 0 )
99 snprintf(opt_ept_setting, sizeof(opt_ept_setting), "exec-sp=%d",
100 opt_ept_exec_sp);
101 }
102
init_ept_param(struct param_hypfs * par)103 static void __init cf_check init_ept_param(struct param_hypfs *par)
104 {
105 update_ept_param();
106 custom_runtime_set_var(par, opt_ept_setting);
107 }
108
109 static int cf_check parse_ept_param_runtime(const char *s);
110 custom_runtime_only_param("ept", parse_ept_param_runtime, init_ept_param);
111
parse_ept_param_runtime(const char * s)112 static int cf_check parse_ept_param_runtime(const char *s)
113 {
114 struct domain *d;
115 int val;
116
117 if ( !cpu_has_vmx_ept || !hvm_funcs.caps.hap ||
118 !(hvm_funcs.caps.hap_superpage_2mb ||
119 hvm_funcs.caps.hap_superpage_1gb) )
120 {
121 printk("VMX: EPT not available, or not in use - ignoring\n");
122 return 0;
123 }
124
125 if ( (val = parse_boolean("exec-sp", s, NULL)) < 0 )
126 return -EINVAL;
127
128 opt_ept_exec_sp = val;
129
130 update_ept_param();
131 custom_runtime_set_var(param_2_parfs(parse_ept_param_runtime),
132 opt_ept_setting);
133
134 rcu_read_lock(&domlist_read_lock);
135 for_each_domain ( d )
136 {
137 /* PV, or HVM Shadow domain? Not applicable. */
138 if ( !paging_mode_hap(d) )
139 continue;
140
141 /* Hardware domain? Not applicable. */
142 if ( is_hardware_domain(d) )
143 continue;
144
145 /* Nested Virt? Broken and exec_sp forced on to avoid livelocks. */
146 if ( nestedhvm_enabled(d) )
147 continue;
148
149 /* Setting already matches? No need to rebuild the p2m. */
150 if ( d->arch.hvm.vmx.exec_sp == val )
151 continue;
152
153 d->arch.hvm.vmx.exec_sp = val;
154 p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_rw);
155 }
156 rcu_read_unlock(&domlist_read_lock);
157
158 printk("VMX: EPT executable superpages %sabled\n",
159 val ? "en" : "dis");
160
161 return 0;
162 }
163 #endif
164
165 /* Dynamic (run-time adjusted) execution control flags. */
166 struct vmx_caps __ro_after_init vmx_caps;
167
168 static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
169 static DEFINE_PER_CPU(paddr_t, current_vmcs);
170 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
171 DEFINE_PER_CPU(bool, vmxon);
172
173 #define vmcs_revision_id (vmx_caps.basic_msr & VMX_BASIC_REVISION_MASK)
174
vmx_display_features(void)175 static void __init vmx_display_features(void)
176 {
177 int printed = 0;
178
179 printk("VMX: Supported advanced features:\n");
180
181 #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
182 P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
183 P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
184 P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
185 P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
186 P(cpu_has_vmx_vnmi, "Virtual NMI");
187 P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
188 P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
189 P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
190 P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
191 P(cpu_has_vmx_posted_intr_processing, "Posted Interrupt Processing");
192 P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
193 P(cpu_has_vmx_vmfunc, "VM Functions");
194 P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions");
195 P(cpu_has_vmx_pml, "Page Modification Logging");
196 P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
197 P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection");
198 P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit");
199 P(cpu_has_vmx_virt_spec_ctrl, "Virtualize SPEC_CTRL");
200 P(cpu_has_vmx_ept_paging_write, "EPT Paging-Write");
201 #undef P
202
203 if ( !printed )
204 printk(" - none\n");
205 }
206
adjust_vmx_controls(const char * name,u32 ctl_min,u32 ctl_opt,u32 msr,bool * mismatch)207 static u32 adjust_vmx_controls(
208 const char *name, u32 ctl_min, u32 ctl_opt, u32 msr, bool *mismatch)
209 {
210 u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
211
212 rdmsr(msr, vmx_msr_low, vmx_msr_high);
213
214 ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
215 ctl |= vmx_msr_low; /* bit == 1 in low word ==> must be one */
216
217 /* Ensure minimum (required) set of control bits are supported. */
218 if ( ctl_min & ~ctl )
219 {
220 *mismatch = 1;
221 printk("VMX: CPU%d has insufficient %s (%08x; requires %08x)\n",
222 smp_processor_id(), name, ctl, ctl_min);
223 }
224
225 return ctl;
226 }
227
adjust_vmx_controls2(const char * name,uint64_t ctl_min,uint64_t ctl_opt,unsigned int msr,bool * mismatch)228 static uint64_t adjust_vmx_controls2(
229 const char *name, uint64_t ctl_min, uint64_t ctl_opt, unsigned int msr,
230 bool *mismatch)
231 {
232 uint64_t vmx_msr, ctl = ctl_min | ctl_opt;
233
234 rdmsrl(msr, vmx_msr);
235
236 ctl &= vmx_msr; /* bit == 0 ==> must be zero */
237
238 /* Ensure minimum (required) set of control bits are supported. */
239 if ( ctl_min & ~ctl )
240 {
241 *mismatch = true;
242 printk("VMX: CPU%u has insufficient %s (%#lx; requires %#lx)\n",
243 smp_processor_id(), name, ctl, ctl_min);
244 }
245
246 return ctl;
247 }
248
cap_check(const char * name,unsigned long expected,unsigned long saw)249 static bool cap_check(
250 const char *name, unsigned long expected, unsigned long saw)
251 {
252 if ( saw != expected )
253 printk("VMX %s: saw %#lx expected %#lx\n", name, saw, expected);
254 return saw != expected;
255 }
256
vmx_init_vmcs_config(bool bsp)257 static int vmx_init_vmcs_config(bool bsp)
258 {
259 u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt;
260 struct vmx_caps caps = {};
261 u64 _vmx_misc_cap = 0;
262 bool mismatch = false;
263
264 rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high);
265
266 min = (PIN_BASED_EXT_INTR_MASK |
267 PIN_BASED_NMI_EXITING);
268 opt = (PIN_BASED_VIRTUAL_NMIS |
269 PIN_BASED_POSTED_INTERRUPT);
270 caps.pin_based_exec_control = adjust_vmx_controls(
271 "Pin-Based Exec Control", min, opt,
272 MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
273
274 min = (CPU_BASED_HLT_EXITING |
275 CPU_BASED_VIRTUAL_INTR_PENDING |
276 CPU_BASED_CR8_LOAD_EXITING |
277 CPU_BASED_CR8_STORE_EXITING |
278 CPU_BASED_INVLPG_EXITING |
279 CPU_BASED_CR3_LOAD_EXITING |
280 CPU_BASED_CR3_STORE_EXITING |
281 CPU_BASED_MONITOR_EXITING |
282 CPU_BASED_MWAIT_EXITING |
283 CPU_BASED_MOV_DR_EXITING |
284 CPU_BASED_ACTIVATE_IO_BITMAP |
285 CPU_BASED_USE_TSC_OFFSETING |
286 CPU_BASED_RDTSC_EXITING);
287 opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
288 CPU_BASED_TPR_SHADOW |
289 CPU_BASED_MONITOR_TRAP_FLAG |
290 CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
291 CPU_BASED_ACTIVATE_TERTIARY_CONTROLS);
292 caps.cpu_based_exec_control = adjust_vmx_controls(
293 "CPU-Based Exec Control", min, opt,
294 MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
295 caps.cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
296 if ( caps.cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
297 caps.cpu_based_exec_control &=
298 ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
299
300 rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
301
302 /* Check whether IPT is supported in VMX operation. */
303 if ( bsp )
304 vmtrace_available = cpu_has_proc_trace &&
305 (_vmx_misc_cap & VMX_MISC_PROC_TRACE);
306 else if ( vmtrace_available &&
307 !(_vmx_misc_cap & VMX_MISC_PROC_TRACE) )
308 {
309 printk("VMX: IPT capabilities differ between CPU%u and BSP\n",
310 smp_processor_id());
311 return -EINVAL;
312 }
313
314 if ( caps.cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
315 {
316 min = 0;
317 opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
318 SECONDARY_EXEC_WBINVD_EXITING |
319 SECONDARY_EXEC_ENABLE_EPT |
320 SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
321 SECONDARY_EXEC_ENABLE_RDTSCP |
322 SECONDARY_EXEC_PAUSE_LOOP_EXITING |
323 SECONDARY_EXEC_ENABLE_INVPCID |
324 SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
325 SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
326 SECONDARY_EXEC_XSAVES |
327 SECONDARY_EXEC_TSC_SCALING |
328 SECONDARY_EXEC_BUS_LOCK_DETECTION);
329 if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
330 opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
331 if ( opt_vpid_enabled )
332 opt |= SECONDARY_EXEC_ENABLE_VPID;
333 if ( opt_unrestricted_guest_enabled )
334 opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
335 if ( opt_ept_pml )
336 opt |= SECONDARY_EXEC_ENABLE_PML;
337 if ( vm_notify_window != ~0u )
338 opt |= SECONDARY_EXEC_NOTIFY_VM_EXITING;
339
340 /*
341 * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
342 * can be set only when "use TPR shadow" is set
343 */
344 if ( (caps.cpu_based_exec_control & CPU_BASED_TPR_SHADOW) &&
345 opt_apicv_enabled )
346 opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
347 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
348 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
349
350 caps.secondary_exec_control = adjust_vmx_controls(
351 "Secondary Exec Control", min, opt,
352 MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
353 }
354
355 if ( caps.cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS )
356 {
357 uint64_t opt = (TERTIARY_EXEC_VIRT_SPEC_CTRL |
358 TERTIARY_EXEC_EPT_PAGING_WRITE);
359
360 caps.tertiary_exec_control = adjust_vmx_controls2(
361 "Tertiary Exec Control", 0, opt,
362 MSR_IA32_VMX_PROCBASED_CTLS3, &mismatch);
363 }
364
365 /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */
366 if ( caps.secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT |
367 SECONDARY_EXEC_ENABLE_VPID) )
368 {
369 rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, caps.ept, caps.vpid);
370
371 if ( !opt_ept_ad )
372 caps.ept &= ~VMX_EPT_AD_BIT;
373
374 /*
375 * Additional sanity checking before using EPT:
376 * 1) the CPU we are running on must support EPT WB, as we will set
377 * ept paging structures memory type to WB;
378 * 2) the CPU must support the EPT page-walk length of 4 according to
379 * Intel SDM 25.2.2.
380 * 3) the CPU must support INVEPT all context invalidation, because we
381 * will use it as final resort if other types are not supported.
382 *
383 * Or we just don't use EPT.
384 */
385 if ( !(caps.ept & VMX_EPT_MEMORY_TYPE_WB) ||
386 !(caps.ept & VMX_EPT_WALK_LENGTH_4_SUPPORTED) ||
387 !(caps.ept & VMX_EPT_INVEPT_ALL_CONTEXT) )
388 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
389
390 /*
391 * the CPU must support INVVPID all context invalidation, because we
392 * will use it as final resort if other types are not supported.
393 *
394 * Or we just don't use VPID.
395 */
396 if ( !(caps.vpid & VMX_VPID_INVVPID_ALL_CONTEXT) )
397 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
398
399 /* EPT A/D bits is required for PML */
400 if ( !(caps.ept & VMX_EPT_AD_BIT) )
401 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
402 }
403
404 if ( caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
405 {
406 /*
407 * To use EPT we expect to be able to clear certain intercepts.
408 * We check VMX_BASIC_MSR[55] to correctly handle default controls.
409 */
410 uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS;
411 if ( vmx_basic_msr_high & (VMX_BASIC_DEFAULT1_ZERO >> 32) )
412 msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
413 rdmsr(msr, must_be_one, must_be_zero);
414 if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
415 CPU_BASED_CR3_LOAD_EXITING |
416 CPU_BASED_CR3_STORE_EXITING) )
417 caps.secondary_exec_control &=
418 ~(SECONDARY_EXEC_ENABLE_EPT |
419 SECONDARY_EXEC_UNRESTRICTED_GUEST);
420 }
421
422 /* PML cannot be supported if EPT is not used */
423 if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) )
424 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
425
426 /* Turn off opt_ept_pml if PML feature is not present. */
427 if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) )
428 opt_ept_pml = false;
429
430 if ( (caps.secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
431 ple_gap == 0 )
432 {
433 if ( !vmx_caps.pin_based_exec_control )
434 printk(XENLOG_INFO "Disable Pause-Loop Exiting.\n");
435 caps.secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
436 }
437
438 min = VM_EXIT_ACK_INTR_ON_EXIT;
439 opt = (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
440 VM_EXIT_LOAD_HOST_EFER | VM_EXIT_CLEAR_BNDCFGS);
441 min |= VM_EXIT_IA32E_MODE;
442 caps.vmexit_control = adjust_vmx_controls(
443 "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch);
444
445 /*
446 * "Process posted interrupt" can be set only when "virtual-interrupt
447 * delivery" and "acknowledge interrupt on exit" is set. For the latter
448 * is a minimal requirement, only check the former, which is optional.
449 */
450 if ( !(caps.secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) )
451 caps.pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT;
452
453 if ( iommu_intpost &&
454 !(caps.pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
455 {
456 printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted "
457 "Interrupt is not enabled\n");
458 iommu_intpost = 0;
459 }
460
461 /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */
462 if ( caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS )
463 {
464 rdmsrl(MSR_IA32_VMX_VMFUNC, caps.vmfunc);
465
466 /*
467 * VMFUNC leaf 0 (EPTP switching) must be supported.
468 *
469 * Or we just don't use VMFUNC.
470 */
471 if ( !(caps.vmfunc & VMX_VMFUNC_EPTP_SWITCHING) )
472 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VM_FUNCTIONS;
473 }
474
475 /* Virtualization exceptions are only enabled if VMFUNC is enabled */
476 if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
477 caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS;
478
479 min = 0;
480 opt = (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER |
481 VM_ENTRY_LOAD_BNDCFGS);
482 caps.vmentry_control = adjust_vmx_controls(
483 "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch);
484
485 if ( mismatch )
486 return -EINVAL;
487
488 if ( !vmx_caps.pin_based_exec_control )
489 {
490 /* First time through. */
491 vmx_caps = caps;
492 vmx_caps.basic_msr = ((uint64_t)vmx_basic_msr_high << 32) |
493 vmx_basic_msr_low;
494
495 vmx_display_features();
496
497 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
498 if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) >
499 PAGE_SIZE )
500 {
501 printk("VMX: CPU%d VMCS size is too big (%Lu bytes)\n",
502 smp_processor_id(),
503 vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
504 return -EINVAL;
505 }
506 }
507 else
508 {
509 /* Globals are already initialised: re-check them. */
510 mismatch |= cap_check(
511 "VMCS revision ID",
512 vmcs_revision_id, vmx_basic_msr_low & VMX_BASIC_REVISION_MASK);
513 mismatch |= cap_check(
514 "Pin-Based Exec Control",
515 vmx_caps.pin_based_exec_control, caps.pin_based_exec_control);
516 mismatch |= cap_check(
517 "CPU-Based Exec Control",
518 vmx_caps.cpu_based_exec_control, caps.cpu_based_exec_control);
519 mismatch |= cap_check(
520 "Secondary Exec Control",
521 vmx_caps.secondary_exec_control, caps.secondary_exec_control);
522 mismatch |= cap_check(
523 "Tertiary Exec Control",
524 vmx_caps.tertiary_exec_control, caps.tertiary_exec_control);
525 mismatch |= cap_check(
526 "VMExit Control",
527 vmx_caps.vmexit_control, caps.vmexit_control);
528 mismatch |= cap_check(
529 "VMEntry Control",
530 vmx_caps.vmentry_control, caps.vmentry_control);
531 mismatch |= cap_check("EPT Capability", vmx_caps.ept, caps.ept);
532 mismatch |= cap_check("VPID Capability", vmx_caps.vpid, caps.vpid);
533 mismatch |= cap_check(
534 "VMFUNC Capability",
535 vmx_caps.vmfunc, caps.vmfunc);
536 if ( cpu_has_vmx_ins_outs_instr_info !=
537 !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)) )
538 {
539 printk("VMX INS/OUTS Instruction Info: saw %d expected %d\n",
540 !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)),
541 cpu_has_vmx_ins_outs_instr_info);
542 mismatch = 1;
543 }
544 if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) !=
545 ((vmx_caps.basic_msr & VMX_BASIC_VMCS_SIZE_MASK) >> 32) )
546 {
547 printk("VMX: CPU%d unexpected VMCS size %Lu\n",
548 smp_processor_id(),
549 vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
550 mismatch = 1;
551 }
552 if ( mismatch )
553 {
554 printk("VMX: Capabilities fatally differ between CPU%d and CPU0\n",
555 smp_processor_id());
556 return -EINVAL;
557 }
558 }
559
560 /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
561 if ( vmx_basic_msr_high & (VMX_BASIC_32BIT_ADDRESSES >> 32) )
562 {
563 printk("VMX: CPU%d limits VMX structure pointers to 32 bits\n",
564 smp_processor_id());
565 return -EINVAL;
566 }
567
568 /* Require Write-Back (WB) memory type for VMCS accesses. */
569 opt = (vmx_basic_msr_high & (VMX_BASIC_MEMORY_TYPE_MASK >> 32)) /
570 ((VMX_BASIC_MEMORY_TYPE_MASK & -VMX_BASIC_MEMORY_TYPE_MASK) >> 32);
571 if ( opt != X86_MT_WB )
572 {
573 printk("VMX: CPU%d has unexpected VMCS access type %u\n",
574 smp_processor_id(), opt);
575 return -EINVAL;
576 }
577
578 return 0;
579 }
580
vmx_alloc_vmcs(void)581 static paddr_t vmx_alloc_vmcs(void)
582 {
583 struct page_info *pg;
584 struct vmcs_struct *vmcs;
585
586 if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
587 {
588 gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
589 return 0;
590 }
591
592 vmcs = __map_domain_page(pg);
593 clear_page(vmcs);
594 vmcs->revision_id = vmcs_revision_id;
595 unmap_domain_page(vmcs);
596
597 return page_to_maddr(pg);
598 }
599
vmx_free_vmcs(paddr_t pa)600 static void vmx_free_vmcs(paddr_t pa)
601 {
602 free_domheap_page(maddr_to_page(pa));
603 }
604
__vmx_clear_vmcs(void * info)605 static void cf_check __vmx_clear_vmcs(void *info)
606 {
607 struct vcpu *v = info;
608 struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
609
610 /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
611 ASSERT(!local_irq_is_enabled());
612
613 if ( vmx->active_cpu == smp_processor_id() )
614 {
615 __vmpclear(vmx->vmcs_pa);
616 if ( vmx->vmcs_shadow_maddr )
617 __vmpclear(vmx->vmcs_shadow_maddr);
618
619 vmx->active_cpu = -1;
620 vmx->launched = 0;
621
622 list_del(&vmx->active_list);
623
624 if ( vmx->vmcs_pa == this_cpu(current_vmcs) )
625 this_cpu(current_vmcs) = 0;
626 }
627 }
628
vmx_clear_vmcs(struct vcpu * v)629 static void vmx_clear_vmcs(struct vcpu *v)
630 {
631 int cpu = v->arch.hvm.vmx.active_cpu;
632
633 if ( cpu != -1 )
634 on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1);
635 }
636
vmx_load_vmcs(struct vcpu * v)637 static void vmx_load_vmcs(struct vcpu *v)
638 {
639 unsigned long flags;
640
641 local_irq_save(flags);
642
643 if ( v->arch.hvm.vmx.active_cpu == -1 )
644 {
645 list_add(&v->arch.hvm.vmx.active_list, &this_cpu(active_vmcs_list));
646 v->arch.hvm.vmx.active_cpu = smp_processor_id();
647 }
648
649 ASSERT(v->arch.hvm.vmx.active_cpu == smp_processor_id());
650
651 __vmptrld(v->arch.hvm.vmx.vmcs_pa);
652 this_cpu(current_vmcs) = v->arch.hvm.vmx.vmcs_pa;
653
654 local_irq_restore(flags);
655 }
656
vmx_vmcs_reload(struct vcpu * v)657 void vmx_vmcs_reload(struct vcpu *v)
658 {
659 /*
660 * As we may be running with interrupts disabled, we can't acquire
661 * v->arch.hvm.vmx.vmcs_lock here. However, with interrupts disabled
662 * the VMCS can't be taken away from us anymore if we still own it.
663 */
664 ASSERT(v->is_running || !local_irq_is_enabled());
665 if ( v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs) )
666 return;
667
668 vmx_load_vmcs(v);
669 }
670
vmx_cpu_up_prepare(unsigned int cpu)671 int cf_check vmx_cpu_up_prepare(unsigned int cpu)
672 {
673 /*
674 * If nvmx_cpu_up_prepare() failed, do not return failure and just fallback
675 * to legacy mode for vvmcs synchronization.
676 */
677 if ( nvmx_cpu_up_prepare(cpu) != 0 )
678 printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
679
680 if ( per_cpu(vmxon_region, cpu) )
681 return 0;
682
683 per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
684 if ( per_cpu(vmxon_region, cpu) )
685 return 0;
686
687 printk("CPU%d: Could not allocate host VMCS\n", cpu);
688 nvmx_cpu_dead(cpu);
689 return -ENOMEM;
690 }
691
vmx_cpu_dead(unsigned int cpu)692 void cf_check vmx_cpu_dead(unsigned int cpu)
693 {
694 vmx_free_vmcs(per_cpu(vmxon_region, cpu));
695 per_cpu(vmxon_region, cpu) = 0;
696 nvmx_cpu_dead(cpu);
697 vmx_pi_desc_fixup(cpu);
698 }
699
_vmx_cpu_up(bool bsp)700 static int _vmx_cpu_up(bool bsp)
701 {
702 u32 eax, edx;
703 int rc, bios_locked, cpu = smp_processor_id();
704 u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
705
706 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
707
708 /*
709 * Ensure the current processor operating mode meets
710 * the requred CRO fixed bits in VMX operation.
711 */
712 cr0 = read_cr0();
713 rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
714 rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
715 if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
716 {
717 printk("CPU%d: some settings of host CR0 are "
718 "not allowed in VMX operation.\n", cpu);
719 return -EINVAL;
720 }
721
722 rdmsr(MSR_IA32_FEATURE_CONTROL, eax, edx);
723
724 bios_locked = !!(eax & IA32_FEATURE_CONTROL_LOCK);
725 if ( bios_locked )
726 {
727 if ( !(eax & (tboot_in_measured_env()
728 ? IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX
729 : IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX)) )
730 {
731 printk("CPU%d: VMX disabled by BIOS.\n", cpu);
732 return -EINVAL;
733 }
734 }
735 else
736 {
737 eax = IA32_FEATURE_CONTROL_LOCK;
738 eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
739 if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) )
740 eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX;
741 wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0);
742 }
743
744 if ( (rc = vmx_init_vmcs_config(bsp)) != 0 )
745 return rc;
746
747 INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
748
749 if ( bsp && (rc = vmx_cpu_up_prepare(cpu)) != 0 )
750 return rc;
751
752 asm_inline goto (
753 "1: vmxon %[addr]\n\t"
754 " jbe %l[vmxon_fail]\n\t"
755 _ASM_EXTABLE(1b, %l[vmxon_fault])
756 :
757 : [addr] "m" (this_cpu(vmxon_region))
758 : "memory"
759 : vmxon_fail, vmxon_fault );
760
761 this_cpu(vmxon) = 1;
762
763 hvm_asid_init(cpu_has_vmx_vpid ? (1u << VMCS_VPID_WIDTH) : 0);
764
765 if ( cpu_has_vmx_ept )
766 ept_sync_all();
767
768 if ( cpu_has_vmx_vpid )
769 vpid_sync_all();
770
771 vmx_pi_per_cpu_init(cpu);
772
773 return 0;
774
775 vmxon_fault:
776 if ( bios_locked &&
777 test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) &&
778 (!(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX) ||
779 !(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX)) )
780 {
781 printk(XENLOG_ERR
782 "CPU%d: VMXON failed: perhaps because of TXT settings in your BIOS configuration?\n",
783 cpu);
784 printk(XENLOG_ERR
785 " --> Disable TXT in your BIOS unless using a secure bootloader.\n");
786 return -EINVAL;
787 }
788
789 vmxon_fail:
790 printk(XENLOG_ERR "CPU%d: unexpected VMXON failure\n", cpu);
791 return -EINVAL;
792 }
793
vmx_cpu_up(void)794 int cf_check vmx_cpu_up(void)
795 {
796 return _vmx_cpu_up(false);
797 }
798
vmx_cpu_down(void)799 void cf_check vmx_cpu_down(void)
800 {
801 struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
802 unsigned long flags;
803
804 if ( !this_cpu(vmxon) )
805 return;
806
807 local_irq_save(flags);
808
809 while ( !list_empty(active_vmcs_list) )
810 __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
811 struct vcpu, arch.hvm.vmx.active_list));
812
813 BUG_ON(!(read_cr4() & X86_CR4_VMXE));
814 this_cpu(vmxon) = 0;
815 asm volatile ( "vmxoff" ::: "memory" );
816
817 local_irq_restore(flags);
818 }
819
820 struct foreign_vmcs {
821 struct vcpu *v;
822 unsigned int count;
823 };
824 static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
825
vmx_vmcs_try_enter(struct vcpu * v)826 bool vmx_vmcs_try_enter(struct vcpu *v)
827 {
828 struct foreign_vmcs *fv;
829
830 /*
831 * NB. We must *always* run an HVM VCPU on its own VMCS, except for
832 * vmx_vmcs_enter/exit and scheduling tail critical regions.
833 */
834 if ( likely(v == current) )
835 return v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs);
836
837 fv = &this_cpu(foreign_vmcs);
838
839 if ( fv->v == v )
840 {
841 BUG_ON(fv->count == 0);
842 }
843 else
844 {
845 BUG_ON(fv->v != NULL);
846 BUG_ON(fv->count != 0);
847
848 vcpu_pause(v);
849 spin_lock(&v->arch.hvm.vmx.vmcs_lock);
850
851 vmx_clear_vmcs(v);
852 vmx_load_vmcs(v);
853
854 fv->v = v;
855 }
856
857 fv->count++;
858
859 return 1;
860 }
861
vmx_vmcs_enter(struct vcpu * v)862 void vmx_vmcs_enter(struct vcpu *v)
863 {
864 bool okay = vmx_vmcs_try_enter(v);
865
866 ASSERT(okay);
867 }
868
vmx_vmcs_exit(struct vcpu * v)869 void vmx_vmcs_exit(struct vcpu *v)
870 {
871 struct foreign_vmcs *fv;
872
873 if ( likely(v == current) )
874 return;
875
876 fv = &this_cpu(foreign_vmcs);
877 BUG_ON(fv->v != v);
878 BUG_ON(fv->count == 0);
879
880 if ( --fv->count == 0 )
881 {
882 /* Don't confuse vmx_do_resume (for @v or @current!) */
883 vmx_clear_vmcs(v);
884 if ( is_hvm_vcpu(current) )
885 vmx_load_vmcs(current);
886
887 spin_unlock(&v->arch.hvm.vmx.vmcs_lock);
888 vcpu_unpause(v);
889
890 fv->v = NULL;
891 }
892 }
893
vmx_set_host_env(struct vcpu * v)894 static void vmx_set_host_env(struct vcpu *v)
895 {
896 unsigned int cpu = smp_processor_id();
897
898 __vmwrite(HOST_GDTR_BASE,
899 (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
900 __vmwrite(HOST_IDTR_BASE, (unsigned long)per_cpu(idt, cpu));
901
902 __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
903
904 __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
905
906 /*
907 * Skip end of cpu_user_regs when entering the hypervisor because the
908 * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
909 * all get saved into the VMCS instead.
910 */
911 __vmwrite(HOST_RSP,
912 (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
913 }
914
vmx_clear_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)915 void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr,
916 enum vmx_msr_intercept_type type)
917 {
918 struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
919 struct domain *d = v->domain;
920
921 /* VMX MSR bitmap supported? */
922 if ( msr_bitmap == NULL )
923 return;
924
925 if ( unlikely(monitored_msr(d, msr)) )
926 return;
927
928 if ( msr <= 0x1fff )
929 {
930 if ( type & VMX_MSR_R )
931 clear_bit(msr, msr_bitmap->read_low);
932 if ( type & VMX_MSR_W )
933 clear_bit(msr, msr_bitmap->write_low);
934 }
935 else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
936 {
937 msr &= 0x1fff;
938 if ( type & VMX_MSR_R )
939 clear_bit(msr, msr_bitmap->read_high);
940 if ( type & VMX_MSR_W )
941 clear_bit(msr, msr_bitmap->write_high);
942 }
943 else
944 ASSERT(!"MSR out of range for interception\n");
945 }
946
vmx_set_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)947 void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr,
948 enum vmx_msr_intercept_type type)
949 {
950 struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
951
952 /* VMX MSR bitmap supported? */
953 if ( msr_bitmap == NULL )
954 return;
955
956 if ( msr <= 0x1fff )
957 {
958 if ( type & VMX_MSR_R )
959 set_bit(msr, msr_bitmap->read_low);
960 if ( type & VMX_MSR_W )
961 set_bit(msr, msr_bitmap->write_low);
962 }
963 else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
964 {
965 msr &= 0x1fff;
966 if ( type & VMX_MSR_R )
967 set_bit(msr, msr_bitmap->read_high);
968 if ( type & VMX_MSR_W )
969 set_bit(msr, msr_bitmap->write_high);
970 }
971 else
972 ASSERT(!"MSR out of range for interception\n");
973 }
974
vmx_msr_is_intercepted(struct vmx_msr_bitmap * msr_bitmap,unsigned int msr,bool is_write)975 bool vmx_msr_is_intercepted(struct vmx_msr_bitmap *msr_bitmap,
976 unsigned int msr, bool is_write)
977 {
978 if ( msr <= 0x1fff )
979 return test_bit(msr, is_write ? msr_bitmap->write_low
980 : msr_bitmap->read_low);
981 else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
982 return test_bit(msr & 0x1fff, is_write ? msr_bitmap->write_high
983 : msr_bitmap->read_high);
984 else
985 /* MSRs outside the bitmap ranges are always intercepted. */
986 return true;
987 }
988
989
990 /*
991 * Switch VMCS between layer 1 & 2 guest
992 */
vmx_vmcs_switch(paddr_t from,paddr_t to)993 void vmx_vmcs_switch(paddr_t from, paddr_t to)
994 {
995 struct vmx_vcpu *vmx = ¤t->arch.hvm.vmx;
996 spin_lock(&vmx->vmcs_lock);
997
998 __vmpclear(from);
999 if ( vmx->vmcs_shadow_maddr )
1000 __vmpclear(vmx->vmcs_shadow_maddr);
1001 __vmptrld(to);
1002
1003 vmx->vmcs_pa = to;
1004 vmx->launched = 0;
1005 this_cpu(current_vmcs) = to;
1006
1007 if ( vmx->hostenv_migrated )
1008 {
1009 vmx->hostenv_migrated = 0;
1010 vmx_set_host_env(current);
1011 }
1012
1013 spin_unlock(&vmx->vmcs_lock);
1014 }
1015
virtual_vmcs_enter(const struct vcpu * v)1016 void virtual_vmcs_enter(const struct vcpu *v)
1017 {
1018 __vmptrld(v->arch.hvm.vmx.vmcs_shadow_maddr);
1019 }
1020
virtual_vmcs_exit(const struct vcpu * v)1021 void virtual_vmcs_exit(const struct vcpu *v)
1022 {
1023 paddr_t cur = this_cpu(current_vmcs);
1024
1025 __vmpclear(v->arch.hvm.vmx.vmcs_shadow_maddr);
1026 if ( cur )
1027 __vmptrld(cur);
1028 }
1029
virtual_vmcs_vmread(const struct vcpu * v,u32 vmcs_encoding)1030 u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
1031 {
1032 u64 res;
1033
1034 virtual_vmcs_enter(v);
1035 __vmread(vmcs_encoding, &res);
1036 virtual_vmcs_exit(v);
1037
1038 return res;
1039 }
1040
virtual_vmcs_vmread_safe(const struct vcpu * v,u32 vmcs_encoding,u64 * val)1041 enum vmx_insn_errno virtual_vmcs_vmread_safe(const struct vcpu *v,
1042 u32 vmcs_encoding, u64 *val)
1043 {
1044 enum vmx_insn_errno ret;
1045
1046 virtual_vmcs_enter(v);
1047 ret = vmread_safe(vmcs_encoding, val);
1048 virtual_vmcs_exit(v);
1049
1050 return ret;
1051 }
1052
virtual_vmcs_vmwrite(const struct vcpu * v,u32 vmcs_encoding,u64 val)1053 void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
1054 {
1055 virtual_vmcs_enter(v);
1056 __vmwrite(vmcs_encoding, val);
1057 virtual_vmcs_exit(v);
1058 }
1059
virtual_vmcs_vmwrite_safe(const struct vcpu * v,u32 vmcs_encoding,u64 val)1060 enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v,
1061 u32 vmcs_encoding, u64 val)
1062 {
1063 enum vmx_insn_errno ret;
1064
1065 virtual_vmcs_enter(v);
1066 ret = vmwrite_safe(vmcs_encoding, val);
1067 virtual_vmcs_exit(v);
1068
1069 return ret;
1070 }
1071
1072 /*
1073 * This function is only called in a vCPU's initialization phase,
1074 * so we can update the posted-interrupt descriptor in non-atomic way.
1075 */
pi_desc_init(struct vcpu * v)1076 static void pi_desc_init(struct vcpu *v)
1077 {
1078 v->arch.hvm.vmx.pi_desc.nv = posted_intr_vector;
1079
1080 /*
1081 * Mark NDST as invalid, then we can use this invalid value as a
1082 * marker to whether update NDST or not in vmx_pi_hooks_assign().
1083 */
1084 v->arch.hvm.vmx.pi_desc.ndst = APIC_INVALID_DEST;
1085 }
1086
1087 void nocall vmx_asm_vmexit_handler(void);
1088
construct_vmcs(struct vcpu * v)1089 static int construct_vmcs(struct vcpu *v)
1090 {
1091 struct domain *d = v->domain;
1092 uint32_t vmexit_ctl = vmx_caps.vmexit_control;
1093 u32 vmentry_ctl = vmx_caps.vmentry_control;
1094 int rc = 0;
1095
1096 vmx_vmcs_enter(v);
1097
1098 /* VMCS controls. */
1099 __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_caps.pin_based_exec_control);
1100
1101 v->arch.hvm.vmx.exec_control = vmx_caps.cpu_based_exec_control;
1102 if ( d->arch.vtsc && !cpu_has_vmx_tsc_scaling )
1103 v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
1104
1105 v->arch.hvm.vmx.secondary_exec_control = vmx_caps.secondary_exec_control;
1106 v->arch.hvm.vmx.tertiary_exec_control = vmx_caps.tertiary_exec_control;
1107
1108 /*
1109 * Disable features which we don't want active by default:
1110 * - Descriptor table exiting only if wanted by introspection
1111 * - x2APIC - default is xAPIC mode
1112 * - VPID settings chosen at VMEntry time
1113 * - VMCS Shadowing only when in nested VMX mode
1114 * - PML only when logdirty is active
1115 * - VMFUNC/#VE only if wanted by altp2m
1116 */
1117 v->arch.hvm.vmx.secondary_exec_control &=
1118 ~(SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
1119 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
1120 SECONDARY_EXEC_ENABLE_VPID |
1121 SECONDARY_EXEC_ENABLE_VMCS_SHADOWING |
1122 SECONDARY_EXEC_ENABLE_PML |
1123 SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
1124 SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
1125
1126 if ( paging_mode_hap(d) )
1127 {
1128 v->arch.hvm.vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
1129 CPU_BASED_CR3_LOAD_EXITING |
1130 CPU_BASED_CR3_STORE_EXITING);
1131 }
1132 else
1133 {
1134 v->arch.hvm.vmx.secondary_exec_control &=
1135 ~(SECONDARY_EXEC_ENABLE_EPT |
1136 SECONDARY_EXEC_UNRESTRICTED_GUEST |
1137 SECONDARY_EXEC_ENABLE_INVPCID);
1138 v->arch.hvm.vmx.tertiary_exec_control &=
1139 ~(TERTIARY_EXEC_EPT_PAGING_WRITE);
1140 vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT |
1141 VM_EXIT_LOAD_HOST_PAT);
1142 vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT;
1143 }
1144
1145 /* Do not enable Monitor Trap Flag unless start single step debug */
1146 v->arch.hvm.vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
1147
1148 vmx_update_cpu_exec_control(v);
1149
1150 __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
1151 __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
1152
1153 if ( cpu_has_vmx_ple )
1154 {
1155 __vmwrite(PLE_GAP, ple_gap);
1156 __vmwrite(PLE_WINDOW, ple_window);
1157 }
1158
1159 if ( cpu_has_vmx_secondary_exec_control )
1160 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1161 v->arch.hvm.vmx.secondary_exec_control);
1162
1163 if ( cpu_has_vmx_tertiary_exec_control )
1164 __vmwrite(TERTIARY_VM_EXEC_CONTROL,
1165 v->arch.hvm.vmx.tertiary_exec_control);
1166
1167 /* MSR access bitmap. */
1168 if ( cpu_has_vmx_msr_bitmap )
1169 {
1170 struct vmx_msr_bitmap *msr_bitmap = alloc_xenheap_page();
1171
1172 if ( msr_bitmap == NULL )
1173 {
1174 rc = -ENOMEM;
1175 goto out;
1176 }
1177
1178 memset(msr_bitmap, ~0, PAGE_SIZE);
1179 v->arch.hvm.vmx.msr_bitmap = msr_bitmap;
1180 __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
1181
1182 vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW);
1183 vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW);
1184 vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW);
1185 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
1186 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
1187 vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
1188 if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) )
1189 vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
1190 if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
1191 (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
1192 vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
1193 }
1194
1195 /* I/O access bitmap. */
1196 __vmwrite(IO_BITMAP_A, __pa(d->arch.hvm.io_bitmap));
1197 __vmwrite(IO_BITMAP_B, __pa(d->arch.hvm.io_bitmap) + PAGE_SIZE);
1198
1199 if ( cpu_has_vmx_virtual_intr_delivery )
1200 {
1201 unsigned int i;
1202
1203 /* EOI-exit bitmap */
1204 bitmap_zero(v->arch.hvm.vmx.eoi_exit_bitmap, X86_IDT_VECTORS);
1205 for ( i = 0; i < ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap); ++i )
1206 __vmwrite(EOI_EXIT_BITMAP(i), 0);
1207
1208 /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
1209 __vmwrite(GUEST_INTR_STATUS, 0);
1210 }
1211
1212 if ( cpu_has_vmx_posted_intr_processing )
1213 {
1214 if ( iommu_intpost )
1215 pi_desc_init(v);
1216
1217 __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm.vmx.pi_desc));
1218 __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
1219 }
1220
1221 /* Host data selectors. */
1222 __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
1223 __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
1224 __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
1225 __vmwrite(HOST_FS_SELECTOR, 0);
1226 __vmwrite(HOST_GS_SELECTOR, 0);
1227 __vmwrite(HOST_FS_BASE, 0);
1228 __vmwrite(HOST_GS_BASE, 0);
1229 __vmwrite(HOST_TR_SELECTOR, TSS_SELECTOR);
1230
1231 /* Host control registers. */
1232 v->arch.hvm.vmx.host_cr0 = read_cr0() & ~X86_CR0_TS;
1233 if ( !v->arch.fully_eager_fpu )
1234 v->arch.hvm.vmx.host_cr0 |= X86_CR0_TS;
1235 __vmwrite(HOST_CR0, v->arch.hvm.vmx.host_cr0);
1236 __vmwrite(HOST_CR4, mmu_cr4_features);
1237 if ( cpu_has_vmx_efer )
1238 __vmwrite(HOST_EFER, read_efer());
1239
1240 /* Host CS:RIP. */
1241 __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
1242 __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
1243
1244 /* Host SYSENTER CS:RIP. */
1245 __vmwrite(HOST_SYSENTER_CS, IS_ENABLED(CONFIG_PV) ? __HYPERVISOR_CS : 0);
1246 __vmwrite(HOST_SYSENTER_EIP,
1247 IS_ENABLED(CONFIG_PV) ? (unsigned long)sysenter_entry : 0);
1248
1249 /* MSR intercepts. */
1250 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
1251 __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
1252 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
1253
1254 __vmwrite(VM_ENTRY_INTR_INFO, 0);
1255
1256 __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
1257 __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
1258 v->arch.hvm.vmx.cr4_host_mask = ~0UL;
1259
1260 __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
1261 __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
1262
1263 __vmwrite(CR3_TARGET_COUNT, 0);
1264
1265 __vmwrite(GUEST_ACTIVITY_STATE, 0);
1266
1267 /* Guest segment bases. */
1268 __vmwrite(GUEST_ES_BASE, 0);
1269 __vmwrite(GUEST_SS_BASE, 0);
1270 __vmwrite(GUEST_DS_BASE, 0);
1271 __vmwrite(GUEST_FS_BASE, 0);
1272 __vmwrite(GUEST_GS_BASE, 0);
1273 __vmwrite(GUEST_CS_BASE, 0);
1274
1275 /* Guest segment limits. */
1276 __vmwrite(GUEST_ES_LIMIT, ~0u);
1277 __vmwrite(GUEST_SS_LIMIT, ~0u);
1278 __vmwrite(GUEST_DS_LIMIT, ~0u);
1279 __vmwrite(GUEST_FS_LIMIT, ~0u);
1280 __vmwrite(GUEST_GS_LIMIT, ~0u);
1281 __vmwrite(GUEST_CS_LIMIT, ~0u);
1282
1283 /* Guest segment AR bytes. */
1284 __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
1285 __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
1286 __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
1287 __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
1288 __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
1289 __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
1290
1291 /* Guest IDT. */
1292 __vmwrite(GUEST_IDTR_BASE, 0);
1293 __vmwrite(GUEST_IDTR_LIMIT, 0);
1294
1295 /* Guest GDT. */
1296 __vmwrite(GUEST_GDTR_BASE, 0);
1297 __vmwrite(GUEST_GDTR_LIMIT, 0);
1298
1299 /* Guest LDT. */
1300 __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
1301 __vmwrite(GUEST_LDTR_SELECTOR, 0);
1302 __vmwrite(GUEST_LDTR_BASE, 0);
1303 __vmwrite(GUEST_LDTR_LIMIT, 0);
1304
1305 /* Guest TSS. */
1306 __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
1307 __vmwrite(GUEST_TR_BASE, 0);
1308 __vmwrite(GUEST_TR_LIMIT, 0xff);
1309
1310 __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
1311 __vmwrite(GUEST_DR7, 0);
1312 __vmwrite(VMCS_LINK_POINTER, ~0UL);
1313
1314 v->arch.hvm.vmx.exception_bitmap = HVM_TRAP_MASK
1315 | (paging_mode_hap(d) ? 0 : (1U << X86_EXC_PF))
1316 | (v->arch.fully_eager_fpu ? 0 : (1U << X86_EXC_NM));
1317
1318 if ( cpu_has_vmx_notify_vm_exiting )
1319 __vmwrite(NOTIFY_WINDOW, vm_notify_window);
1320
1321 vmx_update_exception_bitmap(v);
1322
1323 v->arch.hvm.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
1324 hvm_update_guest_cr(v, 0);
1325
1326 v->arch.hvm.guest_cr[4] = 0;
1327 hvm_update_guest_cr(v, 4);
1328
1329 if ( cpu_has_vmx_tpr_shadow )
1330 {
1331 __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
1332 page_to_maddr(vcpu_vlapic(v)->regs_page));
1333 __vmwrite(TPR_THRESHOLD, 0);
1334 }
1335
1336 if ( paging_mode_hap(d) )
1337 {
1338 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1339 struct ept_data *ept = &p2m->ept;
1340
1341 ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1342 __vmwrite(EPT_POINTER, ept->eptp);
1343
1344 __vmwrite(HOST_PAT, XEN_MSR_PAT);
1345 __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
1346 }
1347 if ( cpu_has_vmx_mpx )
1348 __vmwrite(GUEST_BNDCFGS, 0);
1349 if ( cpu_has_vmx_xsaves )
1350 __vmwrite(XSS_EXIT_BITMAP, 0);
1351
1352 if ( cpu_has_vmx_tsc_scaling )
1353 __vmwrite(TSC_MULTIPLIER, d->arch.hvm.tsc_scaling_ratio);
1354
1355 if ( cpu_has_vmx_virt_spec_ctrl )
1356 {
1357 __vmwrite(SPEC_CTRL_MASK, 0);
1358 __vmwrite(SPEC_CTRL_SHADOW, 0);
1359 }
1360
1361 /* will update HOST & GUEST_CR3 as reqd */
1362 paging_update_paging_modes(v);
1363
1364 vmx_vlapic_msr_changed(v);
1365
1366 if ( opt_l1d_flush && paging_mode_hap(d) )
1367 rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D,
1368 VMX_MSR_GUEST_LOADONLY);
1369
1370 if ( !rc && (d->arch.scf & SCF_entry_ibpb) )
1371 rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB,
1372 VMX_MSR_HOST);
1373
1374 out:
1375 vmx_vmcs_exit(v);
1376
1377 return rc;
1378 }
1379
1380 /*
1381 * Search an MSR list looking for an MSR entry, or the slot in which it should
1382 * live (to keep the data sorted) if an entry is not found.
1383 *
1384 * The return pointer is guaranteed to be bounded by start and end. However,
1385 * it may point at end, and may be invalid for the caller to dereference.
1386 */
locate_msr_entry(struct vmx_msr_entry * start,struct vmx_msr_entry * end,uint32_t msr)1387 static struct vmx_msr_entry *locate_msr_entry(
1388 struct vmx_msr_entry *start, struct vmx_msr_entry *end, uint32_t msr)
1389 {
1390 while ( start < end )
1391 {
1392 struct vmx_msr_entry *mid = start + (end - start) / 2;
1393
1394 if ( msr < mid->index )
1395 end = mid;
1396 else if ( msr > mid->index )
1397 start = mid + 1;
1398 else
1399 return mid;
1400 }
1401
1402 return start;
1403 }
1404
vmx_find_msr(const struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1405 struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
1406 enum vmx_msr_list_type type)
1407 {
1408 const struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1409 struct vmx_msr_entry *start = NULL, *ent, *end;
1410 unsigned int substart = 0, subend = vmx->msr_save_count;
1411 unsigned int total = vmx->msr_load_count;
1412
1413 ASSERT(v == current || !vcpu_runnable(v));
1414
1415 switch ( type )
1416 {
1417 case VMX_MSR_HOST:
1418 start = vmx->host_msr_area;
1419 subend = vmx->host_msr_count;
1420 total = subend;
1421 break;
1422
1423 case VMX_MSR_GUEST:
1424 start = vmx->msr_area;
1425 break;
1426
1427 case VMX_MSR_GUEST_LOADONLY:
1428 start = vmx->msr_area;
1429 substart = subend;
1430 subend = total;
1431 break;
1432
1433 default:
1434 ASSERT_UNREACHABLE();
1435 break;
1436 }
1437
1438 if ( !start )
1439 return NULL;
1440
1441 end = start + total;
1442 ent = locate_msr_entry(start + substart, start + subend, msr);
1443
1444 return ((ent < end) && (ent->index == msr)) ? ent : NULL;
1445 }
1446
vmx_add_msr(struct vcpu * v,uint32_t msr,uint64_t val,enum vmx_msr_list_type type)1447 int vmx_add_msr(struct vcpu *v, uint32_t msr, uint64_t val,
1448 enum vmx_msr_list_type type)
1449 {
1450 struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1451 struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
1452 unsigned int substart, subend, total;
1453 int rc;
1454
1455 ASSERT(v == current || !vcpu_runnable(v));
1456
1457 switch ( type )
1458 {
1459 case VMX_MSR_HOST:
1460 ptr = &vmx->host_msr_area;
1461 substart = 0;
1462 subend = vmx->host_msr_count;
1463 total = subend;
1464 break;
1465
1466 case VMX_MSR_GUEST:
1467 ptr = &vmx->msr_area;
1468 substart = 0;
1469 subend = vmx->msr_save_count;
1470 total = vmx->msr_load_count;
1471 break;
1472
1473 case VMX_MSR_GUEST_LOADONLY:
1474 ptr = &vmx->msr_area;
1475 substart = vmx->msr_save_count;
1476 subend = vmx->msr_load_count;
1477 total = subend;
1478 break;
1479
1480 default:
1481 ASSERT_UNREACHABLE();
1482 return -EINVAL;
1483 }
1484
1485 vmx_vmcs_enter(v);
1486
1487 /* Allocate memory on first use. */
1488 if ( unlikely(!*ptr) )
1489 {
1490 paddr_t addr;
1491
1492 if ( (*ptr = alloc_xenheap_page()) == NULL )
1493 {
1494 rc = -ENOMEM;
1495 goto out;
1496 }
1497
1498 addr = virt_to_maddr(*ptr);
1499
1500 switch ( type )
1501 {
1502 case VMX_MSR_HOST:
1503 __vmwrite(VM_EXIT_MSR_LOAD_ADDR, addr);
1504 break;
1505
1506 case VMX_MSR_GUEST:
1507 case VMX_MSR_GUEST_LOADONLY:
1508 __vmwrite(VM_EXIT_MSR_STORE_ADDR, addr);
1509 __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, addr);
1510 break;
1511 }
1512 }
1513
1514 start = *ptr;
1515 end = start + total;
1516 ent = locate_msr_entry(start + substart, start + subend, msr);
1517
1518 if ( (ent < end) && (ent->index == msr) )
1519 goto found;
1520
1521 /* If there isn't an existing entry for msr, insert room for one. */
1522 if ( total == (PAGE_SIZE / sizeof(*ent)) )
1523 {
1524 rc = -ENOSPC;
1525 goto out;
1526 }
1527
1528 memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
1529
1530 ent->index = msr;
1531 ent->mbz = 0;
1532
1533 switch ( type )
1534 {
1535 case VMX_MSR_HOST:
1536 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, ++vmx->host_msr_count);
1537 break;
1538
1539 case VMX_MSR_GUEST:
1540 __vmwrite(VM_EXIT_MSR_STORE_COUNT, ++vmx->msr_save_count);
1541
1542 /* Fallthrough */
1543 case VMX_MSR_GUEST_LOADONLY:
1544 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, ++vmx->msr_load_count);
1545 break;
1546 }
1547
1548 /* Set the msr's value. */
1549 found:
1550 ent->data = val;
1551 rc = 0;
1552
1553 out:
1554 vmx_vmcs_exit(v);
1555
1556 return rc;
1557 }
1558
vmx_del_msr(struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1559 int vmx_del_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type)
1560 {
1561 struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1562 struct vmx_msr_entry *start = NULL, *ent, *end;
1563 unsigned int substart = 0, subend = vmx->msr_save_count;
1564 unsigned int total = vmx->msr_load_count;
1565
1566 ASSERT(v == current || !vcpu_runnable(v));
1567
1568 switch ( type )
1569 {
1570 case VMX_MSR_HOST:
1571 start = vmx->host_msr_area;
1572 subend = vmx->host_msr_count;
1573 total = subend;
1574 break;
1575
1576 case VMX_MSR_GUEST:
1577 start = vmx->msr_area;
1578 break;
1579
1580 case VMX_MSR_GUEST_LOADONLY:
1581 start = vmx->msr_area;
1582 substart = subend;
1583 subend = total;
1584 break;
1585
1586 default:
1587 ASSERT_UNREACHABLE();
1588 return -EINVAL;
1589 }
1590
1591 if ( !start )
1592 return -ESRCH;
1593
1594 end = start + total;
1595 ent = locate_msr_entry(start + substart, start + subend, msr);
1596
1597 if ( (ent == end) || (ent->index != msr) )
1598 return -ESRCH;
1599
1600 memmove(ent, ent + 1, sizeof(*ent) * (end - ent - 1));
1601
1602 vmx_vmcs_enter(v);
1603
1604 switch ( type )
1605 {
1606 case VMX_MSR_HOST:
1607 __vmwrite(VM_EXIT_MSR_LOAD_COUNT, --vmx->host_msr_count);
1608 break;
1609
1610 case VMX_MSR_GUEST:
1611 __vmwrite(VM_EXIT_MSR_STORE_COUNT, --vmx->msr_save_count);
1612
1613 /* Fallthrough */
1614 case VMX_MSR_GUEST_LOADONLY:
1615 __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, --vmx->msr_load_count);
1616 break;
1617 }
1618
1619 vmx_vmcs_exit(v);
1620
1621 return 0;
1622 }
1623
vmx_set_eoi_exit_bitmap(struct vcpu * v,u8 vector)1624 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1625 {
1626 if ( !test_and_set_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1627 set_bit(vector / BITS_PER_LONG,
1628 &v->arch.hvm.vmx.eoi_exitmap_changed);
1629 }
1630
vmx_clear_eoi_exit_bitmap(struct vcpu * v,u8 vector)1631 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1632 {
1633 if ( test_and_clear_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1634 set_bit(vector / BITS_PER_LONG,
1635 &v->arch.hvm.vmx.eoi_exitmap_changed);
1636 }
1637
vmx_vcpu_pml_enabled(const struct vcpu * v)1638 bool vmx_vcpu_pml_enabled(const struct vcpu *v)
1639 {
1640 return v->arch.hvm.vmx.secondary_exec_control & SECONDARY_EXEC_ENABLE_PML;
1641 }
1642
vmx_vcpu_enable_pml(struct vcpu * v)1643 int vmx_vcpu_enable_pml(struct vcpu *v)
1644 {
1645 if ( vmx_vcpu_pml_enabled(v) )
1646 return 0;
1647
1648 v->arch.hvm.vmx.pml_pg = v->domain->arch.paging.alloc_page(v->domain);
1649 if ( !v->arch.hvm.vmx.pml_pg )
1650 return -ENOMEM;
1651
1652 vmx_vmcs_enter(v);
1653
1654 __vmwrite(PML_ADDRESS, page_to_maddr(v->arch.hvm.vmx.pml_pg));
1655 __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1656
1657 v->arch.hvm.vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_PML;
1658
1659 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1660 v->arch.hvm.vmx.secondary_exec_control);
1661
1662 vmx_vmcs_exit(v);
1663
1664 return 0;
1665 }
1666
vmx_vcpu_disable_pml(struct vcpu * v)1667 void vmx_vcpu_disable_pml(struct vcpu *v)
1668 {
1669 if ( !vmx_vcpu_pml_enabled(v) )
1670 return;
1671
1672 /* Make sure we don't lose any logged GPAs. */
1673 ept_vcpu_flush_pml_buffer(v);
1674
1675 vmx_vmcs_enter(v);
1676
1677 v->arch.hvm.vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
1678 __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1679 v->arch.hvm.vmx.secondary_exec_control);
1680
1681 vmx_vmcs_exit(v);
1682
1683 v->domain->arch.paging.free_page(v->domain, v->arch.hvm.vmx.pml_pg);
1684 v->arch.hvm.vmx.pml_pg = NULL;
1685 }
1686
vmx_domain_pml_enabled(const struct domain * d)1687 bool vmx_domain_pml_enabled(const struct domain *d)
1688 {
1689 return d->arch.hvm.vmx.status & VMX_DOMAIN_PML_ENABLED;
1690 }
1691
1692 /*
1693 * This function enables PML for particular domain. It should be called when
1694 * domain is paused.
1695 *
1696 * PML needs to be enabled globally for all vcpus of the domain, as PML buffer
1697 * and PML index are pre-vcpu, but EPT table is shared by vcpus, therefore
1698 * enabling PML on partial vcpus won't work.
1699 */
vmx_domain_enable_pml(struct domain * d)1700 int vmx_domain_enable_pml(struct domain *d)
1701 {
1702 struct vcpu *v;
1703 int rc;
1704
1705 ASSERT(atomic_read(&d->pause_count));
1706
1707 if ( vmx_domain_pml_enabled(d) )
1708 return 0;
1709
1710 for_each_vcpu ( d, v )
1711 if ( (rc = vmx_vcpu_enable_pml(v)) != 0 )
1712 goto error;
1713
1714 d->arch.hvm.vmx.status |= VMX_DOMAIN_PML_ENABLED;
1715
1716 return 0;
1717
1718 error:
1719 for_each_vcpu ( d, v )
1720 if ( vmx_vcpu_pml_enabled(v) )
1721 vmx_vcpu_disable_pml(v);
1722 return rc;
1723 }
1724
1725 /*
1726 * Disable PML for particular domain. Called when domain is paused.
1727 *
1728 * The same as enabling PML for domain, disabling PML should be done for all
1729 * vcpus at once.
1730 */
vmx_domain_disable_pml(struct domain * d)1731 void vmx_domain_disable_pml(struct domain *d)
1732 {
1733 struct vcpu *v;
1734
1735 ASSERT(atomic_read(&d->pause_count));
1736
1737 if ( !vmx_domain_pml_enabled(d) )
1738 return;
1739
1740 for_each_vcpu ( d, v )
1741 vmx_vcpu_disable_pml(v);
1742
1743 d->arch.hvm.vmx.status &= ~VMX_DOMAIN_PML_ENABLED;
1744 }
1745
1746 /*
1747 * Flush PML buffer of all vcpus, and update the logged dirty pages to log-dirty
1748 * radix tree. Called when domain is paused.
1749 */
vmx_domain_flush_pml_buffers(struct domain * d)1750 void vmx_domain_flush_pml_buffers(struct domain *d)
1751 {
1752 struct vcpu *v;
1753
1754 ASSERT(atomic_read(&d->pause_count));
1755
1756 if ( !vmx_domain_pml_enabled(d) )
1757 return;
1758
1759 for_each_vcpu ( d, v )
1760 ept_vcpu_flush_pml_buffer(v);
1761 }
1762
vmx_vcpu_update_eptp(struct vcpu * v,u64 eptp)1763 static void vmx_vcpu_update_eptp(struct vcpu *v, u64 eptp)
1764 {
1765 vmx_vmcs_enter(v);
1766 __vmwrite(EPT_POINTER, eptp);
1767 vmx_vmcs_exit(v);
1768 }
1769
1770 /*
1771 * Update EPTP data to VMCS of all vcpus of the domain. Must be called when
1772 * domain is paused.
1773 */
vmx_domain_update_eptp(struct domain * d)1774 void vmx_domain_update_eptp(struct domain *d)
1775 {
1776 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1777 struct vcpu *v;
1778
1779 ASSERT(atomic_read(&d->pause_count));
1780
1781 for_each_vcpu ( d, v )
1782 vmx_vcpu_update_eptp(v, p2m->ept.eptp);
1783
1784 ept_sync_domain(p2m);
1785 }
1786
vmx_create_vmcs(struct vcpu * v)1787 int vmx_create_vmcs(struct vcpu *v)
1788 {
1789 struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1790 int rc;
1791
1792 if ( (vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
1793 return -ENOMEM;
1794
1795 INIT_LIST_HEAD(&vmx->active_list);
1796 __vmpclear(vmx->vmcs_pa);
1797 vmx->active_cpu = -1;
1798 vmx->launched = 0;
1799
1800 if ( (rc = construct_vmcs(v)) != 0 )
1801 {
1802 vmx_destroy_vmcs(v);
1803 return rc;
1804 }
1805
1806 return 0;
1807 }
1808
vmx_destroy_vmcs(struct vcpu * v)1809 void vmx_destroy_vmcs(struct vcpu *v)
1810 {
1811 struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1812
1813 vmx_clear_vmcs(v);
1814
1815 vmx_free_vmcs(vmx->vmcs_pa);
1816
1817 free_xenheap_page(v->arch.hvm.vmx.host_msr_area);
1818 free_xenheap_page(v->arch.hvm.vmx.msr_area);
1819 free_xenheap_page(v->arch.hvm.vmx.msr_bitmap);
1820 }
1821
vmx_vmentry_failure(void)1822 void vmx_vmentry_failure(void)
1823 {
1824 struct vcpu *curr = current;
1825 unsigned long error;
1826
1827 __vmread(VM_INSTRUCTION_ERROR, &error);
1828 gprintk(XENLOG_ERR, "VM%s error: %#lx\n",
1829 curr->arch.hvm.vmx.launched ? "RESUME" : "LAUNCH", error);
1830
1831 if ( error == VMX_INSN_INVALID_CONTROL_STATE ||
1832 error == VMX_INSN_INVALID_HOST_STATE )
1833 vmcs_dump_vcpu(curr);
1834
1835 domain_crash(curr->domain);
1836 }
1837
1838 void noreturn vmx_asm_do_vmentry(void);
1839
vmx_update_debug_state(struct vcpu * v)1840 static void vmx_update_debug_state(struct vcpu *v)
1841 {
1842 if ( v->arch.hvm.debug_state_latch )
1843 v->arch.hvm.vmx.exception_bitmap |= 1U << X86_EXC_BP;
1844 else
1845 v->arch.hvm.vmx.exception_bitmap &= ~(1U << X86_EXC_BP);
1846
1847 vmx_vmcs_enter(v);
1848 vmx_update_exception_bitmap(v);
1849 vmx_vmcs_exit(v);
1850 }
1851
vmx_do_resume(void)1852 void cf_check vmx_do_resume(void)
1853 {
1854 struct vcpu *v = current;
1855 bool debug_state;
1856 unsigned long host_cr4;
1857
1858 if ( v->arch.hvm.vmx.active_cpu == smp_processor_id() )
1859 vmx_vmcs_reload(v);
1860 else
1861 {
1862 /*
1863 * For pass-through domain, guest PCI-E device driver may leverage the
1864 * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space.
1865 * Since migration may occur before WBINVD or CLFLUSH, we need to
1866 * maintain data consistency either by:
1867 * 1: flushing cache (wbinvd) when the guest is scheduled out if
1868 * there is no wbinvd exit, or
1869 * 2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
1870 * If VT-d engine can force snooping, we don't need to do these.
1871 */
1872 if ( has_arch_pdevs(v->domain) && !iommu_snoop
1873 && !cpu_has_wbinvd_exiting )
1874 {
1875 int cpu = v->arch.hvm.vmx.active_cpu;
1876 if ( cpu != -1 )
1877 flush_mask(cpumask_of(cpu), FLUSH_CACHE_EVICT);
1878 }
1879
1880 vmx_clear_vmcs(v);
1881 vmx_load_vmcs(v);
1882 hvm_migrate_timers(v);
1883 hvm_migrate_pirqs(v);
1884 vmx_set_host_env(v);
1885 /*
1886 * Both n1 VMCS and n2 VMCS need to update the host environment after
1887 * VCPU migration. The environment of current VMCS is updated in place,
1888 * but the action of another VMCS is deferred till it is switched in.
1889 */
1890 v->arch.hvm.vmx.hostenv_migrated = 1;
1891
1892 hvm_asid_flush_vcpu(v);
1893 }
1894
1895 debug_state = v->domain->debugger_attached
1896 || v->domain->arch.monitor.software_breakpoint_enabled
1897 || v->domain->arch.monitor.singlestep_enabled;
1898
1899 if ( unlikely(v->arch.hvm.debug_state_latch != debug_state) )
1900 {
1901 v->arch.hvm.debug_state_latch = debug_state;
1902 vmx_update_debug_state(v);
1903 }
1904
1905 hvm_do_resume(v);
1906
1907 /* Sync host CR4 in case its value has changed. */
1908 __vmread(HOST_CR4, &host_cr4);
1909 if ( host_cr4 != read_cr4() )
1910 __vmwrite(HOST_CR4, read_cr4());
1911
1912 reset_stack_and_jump(vmx_asm_do_vmentry);
1913 }
1914
vmr(unsigned long field)1915 static inline unsigned long vmr(unsigned long field)
1916 {
1917 unsigned long val;
1918
1919 return vmread_safe(field, &val) ? 0 : val;
1920 }
1921
1922 #define vmr16(fld) ({ \
1923 BUILD_BUG_ON((fld) & 0x6001); \
1924 (uint16_t)vmr(fld); \
1925 })
1926
1927 #define vmr32(fld) ({ \
1928 BUILD_BUG_ON(((fld) & 0x6001) != 0x4000); \
1929 (uint32_t)vmr(fld); \
1930 })
1931
vmx_dump_sel(const char * name,uint32_t selector)1932 static void vmx_dump_sel(const char *name, uint32_t selector)
1933 {
1934 uint32_t sel, attr, limit;
1935 uint64_t base;
1936 sel = vmr(selector);
1937 attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
1938 limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
1939 base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
1940 printk("%s: %04x %05x %08x %016"PRIx64"\n", name, sel, attr, limit, base);
1941 }
1942
vmx_dump_sel2(const char * name,uint32_t lim)1943 static void vmx_dump_sel2(const char *name, uint32_t lim)
1944 {
1945 uint32_t limit;
1946 uint64_t base;
1947 limit = vmr(lim);
1948 base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
1949 printk("%s: %08x %016"PRIx64"\n", name, limit, base);
1950 }
1951
vmcs_dump_vcpu(struct vcpu * v)1952 void vmcs_dump_vcpu(struct vcpu *v)
1953 {
1954 struct cpu_user_regs *regs = &v->arch.user_regs;
1955 uint32_t vmentry_ctl, vmexit_ctl;
1956 unsigned long cr4;
1957 uint64_t efer;
1958 unsigned int i, n;
1959
1960 if ( v == current )
1961 regs = guest_cpu_user_regs();
1962
1963 vmx_vmcs_enter(v);
1964
1965 vmentry_ctl = vmr32(VM_ENTRY_CONTROLS),
1966 vmexit_ctl = vmr32(VM_EXIT_CONTROLS);
1967 cr4 = vmr(GUEST_CR4);
1968
1969 /*
1970 * The guests EFER setting comes from the GUEST_EFER VMCS field whenever
1971 * available, or the guest load-only MSR list on Gen1 hardware, the entry
1972 * for which may be elided for performance reasons if identical to Xen's
1973 * setting.
1974 */
1975 if ( cpu_has_vmx_efer )
1976 efer = vmr(GUEST_EFER);
1977 else if ( vmx_read_guest_loadonly_msr(v, MSR_EFER, &efer) )
1978 efer = read_efer();
1979
1980 printk("*** Guest State ***\n");
1981 printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1982 vmr(GUEST_CR0), vmr(CR0_READ_SHADOW), vmr(CR0_GUEST_HOST_MASK));
1983 printk("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1984 cr4, vmr(CR4_READ_SHADOW), vmr(CR4_GUEST_HOST_MASK));
1985 printk("CR3 = 0x%016lx\n", vmr(GUEST_CR3));
1986 if ( (v->arch.hvm.vmx.secondary_exec_control &
1987 SECONDARY_EXEC_ENABLE_EPT) &&
1988 (cr4 & X86_CR4_PAE) && !(vmentry_ctl & VM_ENTRY_IA32E_MODE) )
1989 {
1990 printk("PDPTE0 = 0x%016lx PDPTE1 = 0x%016lx\n",
1991 vmr(GUEST_PDPTE(0)), vmr(GUEST_PDPTE(1)));
1992 printk("PDPTE2 = 0x%016lx PDPTE3 = 0x%016lx\n",
1993 vmr(GUEST_PDPTE(2)), vmr(GUEST_PDPTE(3)));
1994 }
1995 printk("RSP = 0x%016lx (0x%016lx) RIP = 0x%016lx (0x%016lx)\n",
1996 vmr(GUEST_RSP), regs->rsp,
1997 vmr(GUEST_RIP), regs->rip);
1998 printk("RFLAGS=0x%08lx (0x%08lx) DR7 = 0x%016lx\n",
1999 vmr(GUEST_RFLAGS), regs->rflags,
2000 vmr(GUEST_DR7));
2001 printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2002 vmr(GUEST_SYSENTER_ESP),
2003 vmr32(GUEST_SYSENTER_CS), vmr(GUEST_SYSENTER_EIP));
2004 printk(" sel attr limit base\n");
2005 vmx_dump_sel(" CS", GUEST_CS_SELECTOR);
2006 vmx_dump_sel(" DS", GUEST_DS_SELECTOR);
2007 vmx_dump_sel(" SS", GUEST_SS_SELECTOR);
2008 vmx_dump_sel(" ES", GUEST_ES_SELECTOR);
2009 vmx_dump_sel(" FS", GUEST_FS_SELECTOR);
2010 vmx_dump_sel(" GS", GUEST_GS_SELECTOR);
2011 vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
2012 vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
2013 vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
2014 vmx_dump_sel(" TR", GUEST_TR_SELECTOR);
2015 printk("EFER(%s) = 0x%016lx PAT = 0x%016lx\n",
2016 cpu_has_vmx_efer ? "VMCS" : "MSR LL", efer, vmr(GUEST_PAT));
2017 printk("PreemptionTimer = 0x%08x SM Base = 0x%08x\n",
2018 vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE));
2019 printk("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n",
2020 vmr(GUEST_IA32_DEBUGCTL), vmr(GUEST_PENDING_DBG_EXCEPTIONS));
2021 if ( vmentry_ctl & (VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_BNDCFGS) )
2022 printk("PerfGlobCtl = 0x%016lx BndCfgS = 0x%016lx\n",
2023 vmr(GUEST_PERF_GLOBAL_CTRL), vmr(GUEST_BNDCFGS));
2024 printk("Interruptibility = %08x ActivityState = %08x\n",
2025 vmr32(GUEST_INTERRUPTIBILITY_INFO), vmr32(GUEST_ACTIVITY_STATE));
2026 if ( v->arch.hvm.vmx.secondary_exec_control &
2027 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY )
2028 printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS));
2029 if ( cpu_has_vmx_virt_spec_ctrl )
2030 printk("SPEC_CTRL mask = 0x%016lx shadow = 0x%016lx\n",
2031 vmr(SPEC_CTRL_MASK), vmr(SPEC_CTRL_SHADOW));
2032
2033 printk("*** Host State ***\n");
2034 printk("RIP = 0x%016lx (%ps) RSP = 0x%016lx\n",
2035 vmr(HOST_RIP), (void *)vmr(HOST_RIP), vmr(HOST_RSP));
2036 printk("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
2037 vmr16(HOST_CS_SELECTOR), vmr16(HOST_SS_SELECTOR),
2038 vmr16(HOST_DS_SELECTOR), vmr16(HOST_ES_SELECTOR),
2039 vmr16(HOST_FS_SELECTOR), vmr16(HOST_GS_SELECTOR),
2040 vmr16(HOST_TR_SELECTOR));
2041 printk("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
2042 vmr(HOST_FS_BASE), vmr(HOST_GS_BASE), vmr(HOST_TR_BASE));
2043 printk("GDTBase=%016lx IDTBase=%016lx\n",
2044 vmr(HOST_GDTR_BASE), vmr(HOST_IDTR_BASE));
2045 printk("CR0=%016lx CR3=%016lx CR4=%016lx\n",
2046 vmr(HOST_CR0), vmr(HOST_CR3), vmr(HOST_CR4));
2047 printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2048 vmr(HOST_SYSENTER_ESP),
2049 vmr32(HOST_SYSENTER_CS), vmr(HOST_SYSENTER_EIP));
2050 if ( vmexit_ctl & (VM_EXIT_LOAD_HOST_PAT | VM_EXIT_LOAD_HOST_EFER) )
2051 printk("EFER = 0x%016lx PAT = 0x%016lx\n", vmr(HOST_EFER), vmr(HOST_PAT));
2052 if ( vmexit_ctl & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
2053 printk("PerfGlobCtl = 0x%016lx\n",
2054 vmr(HOST_PERF_GLOBAL_CTRL));
2055
2056 printk("*** Control State ***\n");
2057 printk("PinBased=%08x CPUBased=%08x\n",
2058 vmr32(PIN_BASED_VM_EXEC_CONTROL),
2059 vmr32(CPU_BASED_VM_EXEC_CONTROL));
2060 printk("SecondaryExec=%08x TertiaryExec=%016lx\n",
2061 vmr32(SECONDARY_VM_EXEC_CONTROL),
2062 vmr(TERTIARY_VM_EXEC_CONTROL));
2063 printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
2064 printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
2065 vmr32(EXCEPTION_BITMAP),
2066 vmr32(PAGE_FAULT_ERROR_CODE_MASK),
2067 vmr32(PAGE_FAULT_ERROR_CODE_MATCH));
2068 printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
2069 vmr32(VM_ENTRY_INTR_INFO),
2070 vmr32(VM_ENTRY_EXCEPTION_ERROR_CODE),
2071 vmr32(VM_ENTRY_INSTRUCTION_LEN));
2072 printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
2073 vmr32(VM_EXIT_INTR_INFO),
2074 vmr32(VM_EXIT_INTR_ERROR_CODE),
2075 vmr32(VM_EXIT_INSTRUCTION_LEN));
2076 printk(" reason=%08x qualification=%016lx\n",
2077 vmr32(VM_EXIT_REASON), vmr(EXIT_QUALIFICATION));
2078 printk("IDTVectoring: info=%08x errcode=%08x\n",
2079 vmr32(IDT_VECTORING_INFO), vmr32(IDT_VECTORING_ERROR_CODE));
2080 printk("TSC Offset = 0x%016lx TSC Multiplier = 0x%016lx\n",
2081 vmr(TSC_OFFSET), vmr(TSC_MULTIPLIER));
2082 if ( (v->arch.hvm.vmx.exec_control & CPU_BASED_TPR_SHADOW) ||
2083 (vmx_caps.pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
2084 printk("TPR Threshold = 0x%02x PostedIntrVec = 0x%02x\n",
2085 vmr32(TPR_THRESHOLD), vmr16(POSTED_INTR_NOTIFICATION_VECTOR));
2086 if ( (v->arch.hvm.vmx.secondary_exec_control &
2087 SECONDARY_EXEC_ENABLE_EPT) )
2088 printk("EPT pointer = 0x%016lx EPTP index = 0x%04x\n",
2089 vmr(EPT_POINTER), vmr16(EPTP_INDEX));
2090 n = vmr32(CR3_TARGET_COUNT);
2091 for ( i = 0; i + 1 < n; i += 2 )
2092 printk("CR3 target%u=%016lx target%u=%016lx\n",
2093 i, vmr(CR3_TARGET_VALUE(i)),
2094 i + 1, vmr(CR3_TARGET_VALUE(i + 1)));
2095 if ( i < n )
2096 printk("CR3 target%u=%016lx\n", i, vmr(CR3_TARGET_VALUE(i)));
2097 if ( v->arch.hvm.vmx.secondary_exec_control &
2098 SECONDARY_EXEC_PAUSE_LOOP_EXITING )
2099 printk("PLE Gap=%08x Window=%08x\n",
2100 vmr32(PLE_GAP), vmr32(PLE_WINDOW));
2101 if ( v->arch.hvm.vmx.secondary_exec_control &
2102 (SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
2103 printk("Virtual processor ID = 0x%04x VMfunc controls = %016lx\n",
2104 vmr16(VIRTUAL_PROCESSOR_ID), vmr(VM_FUNCTION_CONTROL));
2105
2106 vmx_vmcs_exit(v);
2107 }
2108
vmcs_dump(unsigned char ch)2109 static void cf_check vmcs_dump(unsigned char ch)
2110 {
2111 struct domain *d;
2112 struct vcpu *v;
2113
2114 printk("*********** VMCS Areas **************\n");
2115
2116 rcu_read_lock(&domlist_read_lock);
2117
2118 for_each_domain ( d )
2119 {
2120 if ( !is_hvm_domain(d) )
2121 continue;
2122 printk("\n>>> Domain %d <<<\n", d->domain_id);
2123 for_each_vcpu ( d, v )
2124 {
2125 if ( !v->is_initialised )
2126 {
2127 printk("\tVCPU %u: not initialized\n", v->vcpu_id);
2128 continue;
2129 }
2130 printk("\tVCPU %d\n", v->vcpu_id);
2131 vmcs_dump_vcpu(v);
2132
2133 process_pending_softirqs();
2134 }
2135 }
2136
2137 rcu_read_unlock(&domlist_read_lock);
2138
2139 printk("**************************************\n");
2140 }
2141
vmx_vmcs_init(void)2142 int __init vmx_vmcs_init(void)
2143 {
2144 int ret;
2145
2146 if ( opt_ept_ad < 0 )
2147 /* Work around Erratum AVR41 on Avoton processors. */
2148 opt_ept_ad = !(boot_cpu_data.x86 == 6 &&
2149 boot_cpu_data.x86_model == 0x4d);
2150
2151 ret = _vmx_cpu_up(true);
2152
2153 if ( !ret )
2154 register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
2155 else
2156 {
2157 setup_clear_cpu_cap(X86_FEATURE_VMX);
2158
2159 /*
2160 * _vmx_vcpu_up() may have made it past feature identification.
2161 * Make sure all dependent features are off as well.
2162 */
2163 memset(&vmx_caps, 0, sizeof(vmx_caps));
2164 }
2165
2166 return ret;
2167 }
2168
build_assertions(void)2169 static void __init __maybe_unused build_assertions(void)
2170 {
2171 struct vmx_msr_bitmap bitmap;
2172
2173 /* Check vmx_msr_bitmap layoug against hardware expectations. */
2174 BUILD_BUG_ON(sizeof(bitmap) != PAGE_SIZE);
2175 BUILD_BUG_ON(sizeof(bitmap.read_low) != 1024);
2176 BUILD_BUG_ON(sizeof(bitmap.read_high) != 1024);
2177 BUILD_BUG_ON(sizeof(bitmap.write_low) != 1024);
2178 BUILD_BUG_ON(sizeof(bitmap.write_high) != 1024);
2179 BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_low) != 0);
2180 BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_high) != 1024);
2181 BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_low) != 2048);
2182 BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_high) != 3072);
2183 }
2184
2185 /*
2186 * Local variables:
2187 * mode: C
2188 * c-file-style: "BSD"
2189 * c-basic-offset: 4
2190 * tab-width: 4
2191 * indent-tabs-mode: nil
2192 * End:
2193 */
2194