1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * vmcs.c: VMCS management
4  * Copyright (c) 2004, Intel Corporation.
5  */
6 
7 #include <xen/domain_page.h>
8 #include <xen/errno.h>
9 #include <xen/event.h>
10 #include <xen/init.h>
11 #include <xen/kernel.h>
12 #include <xen/keyhandler.h>
13 #include <xen/lib.h>
14 #include <xen/mm.h>
15 #include <xen/param.h>
16 #include <xen/vm_event.h>
17 
18 #include <asm/apic.h>
19 #include <asm/cpufeature.h>
20 #include <asm/current.h>
21 #include <asm/flushtlb.h>
22 #include <asm/hvm/hvm.h>
23 #include <asm/hvm/io.h>
24 #include <asm/hvm/nestedhvm.h>
25 #include <asm/hvm/vmx/vmcs.h>
26 #include <asm/hvm/vmx/vmx.h>
27 #include <asm/hvm/vmx/vvmx.h>
28 #include <asm/idt.h>
29 #include <asm/monitor.h>
30 #include <asm/msr.h>
31 #include <asm/processor.h>
32 #include <asm/shadow.h>
33 #include <asm/spec_ctrl.h>
34 #include <asm/tboot.h>
35 #include <asm/xstate.h>
36 
37 static bool __read_mostly opt_vpid_enabled = true;
38 boolean_param("vpid", opt_vpid_enabled);
39 
40 static bool __read_mostly opt_unrestricted_guest_enabled = true;
41 boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled);
42 
43 static bool __read_mostly opt_apicv_enabled = true;
44 boolean_param("apicv", opt_apicv_enabled);
45 
46 /*
47  * These two parameters are used to config the controls for Pause-Loop Exiting:
48  * ple_gap:    upper bound on the amount of time between two successive
49  *             executions of PAUSE in a loop.
50  * ple_window: upper bound on the amount of time a guest is allowed to execute
51  *             in a PAUSE loop.
52  * Time is measured based on a counter that runs at the same rate as the TSC,
53  * refer SDM volume 3b section 21.6.13 & 22.1.3.
54  */
55 static unsigned int __read_mostly ple_gap = 128;
56 integer_param("ple_gap", ple_gap);
57 static unsigned int __read_mostly ple_window = 4096;
58 integer_param("ple_window", ple_window);
59 
60 static unsigned int __ro_after_init vm_notify_window;
61 integer_param("vm-notify-window", vm_notify_window);
62 
63 static bool __read_mostly opt_ept_pml = true;
64 static int8_t __ro_after_init opt_ept_ad = -1;
65 int8_t __read_mostly opt_ept_exec_sp = -1;
66 
parse_ept_param(const char * s)67 static int __init cf_check parse_ept_param(const char *s)
68 {
69     const char *ss;
70     int val, rc = 0;
71 
72     do {
73         ss = strchr(s, ',');
74         if ( !ss )
75             ss = strchr(s, '\0');
76 
77         if ( (val = parse_boolean("ad", s, ss)) >= 0 )
78             opt_ept_ad = val;
79         else if ( (val = parse_boolean("pml", s, ss)) >= 0 )
80             opt_ept_pml = val;
81         else if ( (val = parse_boolean("exec-sp", s, ss)) >= 0 )
82             opt_ept_exec_sp = val;
83         else
84             rc = -EINVAL;
85 
86         s = ss + 1;
87     } while ( *ss );
88 
89     return rc;
90 }
91 custom_param("ept", parse_ept_param);
92 
93 #ifdef CONFIG_HYPFS
94 static char opt_ept_setting[10];
95 
update_ept_param(void)96 static void update_ept_param(void)
97 {
98     if ( opt_ept_exec_sp >= 0 )
99         snprintf(opt_ept_setting, sizeof(opt_ept_setting), "exec-sp=%d",
100                  opt_ept_exec_sp);
101 }
102 
init_ept_param(struct param_hypfs * par)103 static void __init cf_check init_ept_param(struct param_hypfs *par)
104 {
105     update_ept_param();
106     custom_runtime_set_var(par, opt_ept_setting);
107 }
108 
109 static int cf_check parse_ept_param_runtime(const char *s);
110 custom_runtime_only_param("ept", parse_ept_param_runtime, init_ept_param);
111 
parse_ept_param_runtime(const char * s)112 static int cf_check parse_ept_param_runtime(const char *s)
113 {
114     struct domain *d;
115     int val;
116 
117     if ( !cpu_has_vmx_ept || !hvm_funcs.caps.hap ||
118          !(hvm_funcs.caps.hap_superpage_2mb ||
119            hvm_funcs.caps.hap_superpage_1gb) )
120     {
121         printk("VMX: EPT not available, or not in use - ignoring\n");
122         return 0;
123     }
124 
125     if ( (val = parse_boolean("exec-sp", s, NULL)) < 0 )
126         return -EINVAL;
127 
128     opt_ept_exec_sp = val;
129 
130     update_ept_param();
131     custom_runtime_set_var(param_2_parfs(parse_ept_param_runtime),
132                            opt_ept_setting);
133 
134     rcu_read_lock(&domlist_read_lock);
135     for_each_domain ( d )
136     {
137         /* PV, or HVM Shadow domain?  Not applicable. */
138         if ( !paging_mode_hap(d) )
139             continue;
140 
141         /* Hardware domain? Not applicable. */
142         if ( is_hardware_domain(d) )
143             continue;
144 
145         /* Nested Virt?  Broken and exec_sp forced on to avoid livelocks. */
146         if ( nestedhvm_enabled(d) )
147             continue;
148 
149         /* Setting already matches?  No need to rebuild the p2m. */
150         if ( d->arch.hvm.vmx.exec_sp == val )
151             continue;
152 
153         d->arch.hvm.vmx.exec_sp = val;
154         p2m_change_entry_type_global(d, p2m_ram_rw, p2m_ram_rw);
155     }
156     rcu_read_unlock(&domlist_read_lock);
157 
158     printk("VMX: EPT executable superpages %sabled\n",
159            val ? "en" : "dis");
160 
161     return 0;
162 }
163 #endif
164 
165 /* Dynamic (run-time adjusted) execution control flags. */
166 struct vmx_caps __ro_after_init vmx_caps;
167 
168 static DEFINE_PER_CPU_READ_MOSTLY(paddr_t, vmxon_region);
169 static DEFINE_PER_CPU(paddr_t, current_vmcs);
170 static DEFINE_PER_CPU(struct list_head, active_vmcs_list);
171 DEFINE_PER_CPU(bool, vmxon);
172 
173 #define vmcs_revision_id (vmx_caps.basic_msr & VMX_BASIC_REVISION_MASK)
174 
vmx_display_features(void)175 static void __init vmx_display_features(void)
176 {
177     int printed = 0;
178 
179     printk("VMX: Supported advanced features:\n");
180 
181 #define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
182     P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
183     P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
184     P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
185     P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
186     P(cpu_has_vmx_vnmi, "Virtual NMI");
187     P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
188     P(cpu_has_vmx_unrestricted_guest, "Unrestricted Guest");
189     P(cpu_has_vmx_apic_reg_virt, "APIC Register Virtualization");
190     P(cpu_has_vmx_virtual_intr_delivery, "Virtual Interrupt Delivery");
191     P(cpu_has_vmx_posted_intr_processing, "Posted Interrupt Processing");
192     P(cpu_has_vmx_vmcs_shadowing, "VMCS shadowing");
193     P(cpu_has_vmx_vmfunc, "VM Functions");
194     P(cpu_has_vmx_virt_exceptions, "Virtualisation Exceptions");
195     P(cpu_has_vmx_pml, "Page Modification Logging");
196     P(cpu_has_vmx_tsc_scaling, "TSC Scaling");
197     P(cpu_has_vmx_bus_lock_detection, "Bus Lock Detection");
198     P(cpu_has_vmx_notify_vm_exiting, "Notify VM Exit");
199     P(cpu_has_vmx_virt_spec_ctrl, "Virtualize SPEC_CTRL");
200     P(cpu_has_vmx_ept_paging_write, "EPT Paging-Write");
201 #undef P
202 
203     if ( !printed )
204         printk(" - none\n");
205 }
206 
adjust_vmx_controls(const char * name,u32 ctl_min,u32 ctl_opt,u32 msr,bool * mismatch)207 static u32 adjust_vmx_controls(
208     const char *name, u32 ctl_min, u32 ctl_opt, u32 msr, bool *mismatch)
209 {
210     u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
211 
212     rdmsr(msr, vmx_msr_low, vmx_msr_high);
213 
214     ctl &= vmx_msr_high; /* bit == 0 in high word ==> must be zero */
215     ctl |= vmx_msr_low;  /* bit == 1 in low word  ==> must be one  */
216 
217     /* Ensure minimum (required) set of control bits are supported. */
218     if ( ctl_min & ~ctl )
219     {
220         *mismatch = 1;
221         printk("VMX: CPU%d has insufficient %s (%08x; requires %08x)\n",
222                smp_processor_id(), name, ctl, ctl_min);
223     }
224 
225     return ctl;
226 }
227 
adjust_vmx_controls2(const char * name,uint64_t ctl_min,uint64_t ctl_opt,unsigned int msr,bool * mismatch)228 static uint64_t adjust_vmx_controls2(
229     const char *name, uint64_t ctl_min, uint64_t ctl_opt, unsigned int msr,
230     bool *mismatch)
231 {
232     uint64_t vmx_msr, ctl = ctl_min | ctl_opt;
233 
234     rdmsrl(msr, vmx_msr);
235 
236     ctl &= vmx_msr; /* bit == 0 ==> must be zero */
237 
238     /* Ensure minimum (required) set of control bits are supported. */
239     if ( ctl_min & ~ctl )
240     {
241         *mismatch = true;
242         printk("VMX: CPU%u has insufficient %s (%#lx; requires %#lx)\n",
243                smp_processor_id(), name, ctl, ctl_min);
244     }
245 
246     return ctl;
247 }
248 
cap_check(const char * name,unsigned long expected,unsigned long saw)249 static bool cap_check(
250     const char *name, unsigned long expected, unsigned long saw)
251 {
252     if ( saw != expected )
253         printk("VMX %s: saw %#lx expected %#lx\n", name, saw, expected);
254     return saw != expected;
255 }
256 
vmx_init_vmcs_config(bool bsp)257 static int vmx_init_vmcs_config(bool bsp)
258 {
259     u32 vmx_basic_msr_low, vmx_basic_msr_high, min, opt;
260     struct vmx_caps caps = {};
261     u64 _vmx_misc_cap = 0;
262     bool mismatch = false;
263 
264     rdmsr(MSR_IA32_VMX_BASIC, vmx_basic_msr_low, vmx_basic_msr_high);
265 
266     min = (PIN_BASED_EXT_INTR_MASK |
267            PIN_BASED_NMI_EXITING);
268     opt = (PIN_BASED_VIRTUAL_NMIS |
269            PIN_BASED_POSTED_INTERRUPT);
270     caps.pin_based_exec_control = adjust_vmx_controls(
271         "Pin-Based Exec Control", min, opt,
272         MSR_IA32_VMX_PINBASED_CTLS, &mismatch);
273 
274     min = (CPU_BASED_HLT_EXITING |
275            CPU_BASED_VIRTUAL_INTR_PENDING |
276            CPU_BASED_CR8_LOAD_EXITING |
277            CPU_BASED_CR8_STORE_EXITING |
278            CPU_BASED_INVLPG_EXITING |
279            CPU_BASED_CR3_LOAD_EXITING |
280            CPU_BASED_CR3_STORE_EXITING |
281            CPU_BASED_MONITOR_EXITING |
282            CPU_BASED_MWAIT_EXITING |
283            CPU_BASED_MOV_DR_EXITING |
284            CPU_BASED_ACTIVATE_IO_BITMAP |
285            CPU_BASED_USE_TSC_OFFSETING |
286            CPU_BASED_RDTSC_EXITING);
287     opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
288            CPU_BASED_TPR_SHADOW |
289            CPU_BASED_MONITOR_TRAP_FLAG |
290            CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
291            CPU_BASED_ACTIVATE_TERTIARY_CONTROLS);
292     caps.cpu_based_exec_control = adjust_vmx_controls(
293         "CPU-Based Exec Control", min, opt,
294         MSR_IA32_VMX_PROCBASED_CTLS, &mismatch);
295     caps.cpu_based_exec_control &= ~CPU_BASED_RDTSC_EXITING;
296     if ( caps.cpu_based_exec_control & CPU_BASED_TPR_SHADOW )
297         caps.cpu_based_exec_control &=
298             ~(CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING);
299 
300     rdmsrl(MSR_IA32_VMX_MISC, _vmx_misc_cap);
301 
302     /* Check whether IPT is supported in VMX operation. */
303     if ( bsp )
304         vmtrace_available = cpu_has_proc_trace &&
305                             (_vmx_misc_cap & VMX_MISC_PROC_TRACE);
306     else if ( vmtrace_available &&
307               !(_vmx_misc_cap & VMX_MISC_PROC_TRACE) )
308     {
309         printk("VMX: IPT capabilities differ between CPU%u and BSP\n",
310                smp_processor_id());
311         return -EINVAL;
312     }
313 
314     if ( caps.cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS )
315     {
316         min = 0;
317         opt = (SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
318                SECONDARY_EXEC_WBINVD_EXITING |
319                SECONDARY_EXEC_ENABLE_EPT |
320                SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
321                SECONDARY_EXEC_ENABLE_RDTSCP |
322                SECONDARY_EXEC_PAUSE_LOOP_EXITING |
323                SECONDARY_EXEC_ENABLE_INVPCID |
324                SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
325                SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS |
326                SECONDARY_EXEC_XSAVES |
327                SECONDARY_EXEC_TSC_SCALING |
328                SECONDARY_EXEC_BUS_LOCK_DETECTION);
329         if ( _vmx_misc_cap & VMX_MISC_VMWRITE_ALL )
330             opt |= SECONDARY_EXEC_ENABLE_VMCS_SHADOWING;
331         if ( opt_vpid_enabled )
332             opt |= SECONDARY_EXEC_ENABLE_VPID;
333         if ( opt_unrestricted_guest_enabled )
334             opt |= SECONDARY_EXEC_UNRESTRICTED_GUEST;
335         if ( opt_ept_pml )
336             opt |= SECONDARY_EXEC_ENABLE_PML;
337         if ( vm_notify_window != ~0u )
338             opt |= SECONDARY_EXEC_NOTIFY_VM_EXITING;
339 
340         /*
341          * "APIC Register Virtualization" and "Virtual Interrupt Delivery"
342          * can be set only when "use TPR shadow" is set
343          */
344         if ( (caps.cpu_based_exec_control & CPU_BASED_TPR_SHADOW) &&
345              opt_apicv_enabled )
346             opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT |
347                    SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
348                    SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
349 
350         caps.secondary_exec_control = adjust_vmx_controls(
351             "Secondary Exec Control", min, opt,
352             MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch);
353     }
354 
355     if ( caps.cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS )
356     {
357         uint64_t opt = (TERTIARY_EXEC_VIRT_SPEC_CTRL |
358                         TERTIARY_EXEC_EPT_PAGING_WRITE);
359 
360         caps.tertiary_exec_control = adjust_vmx_controls2(
361             "Tertiary Exec Control", 0, opt,
362             MSR_IA32_VMX_PROCBASED_CTLS3, &mismatch);
363     }
364 
365     /* The IA32_VMX_EPT_VPID_CAP MSR exists only when EPT or VPID available */
366     if ( caps.secondary_exec_control & (SECONDARY_EXEC_ENABLE_EPT |
367                                         SECONDARY_EXEC_ENABLE_VPID) )
368     {
369         rdmsr(MSR_IA32_VMX_EPT_VPID_CAP, caps.ept, caps.vpid);
370 
371         if ( !opt_ept_ad )
372             caps.ept &= ~VMX_EPT_AD_BIT;
373 
374         /*
375          * Additional sanity checking before using EPT:
376          * 1) the CPU we are running on must support EPT WB, as we will set
377          *    ept paging structures memory type to WB;
378          * 2) the CPU must support the EPT page-walk length of 4 according to
379          *    Intel SDM 25.2.2.
380          * 3) the CPU must support INVEPT all context invalidation, because we
381          *    will use it as final resort if other types are not supported.
382          *
383          * Or we just don't use EPT.
384          */
385         if ( !(caps.ept & VMX_EPT_MEMORY_TYPE_WB) ||
386              !(caps.ept & VMX_EPT_WALK_LENGTH_4_SUPPORTED) ||
387              !(caps.ept & VMX_EPT_INVEPT_ALL_CONTEXT) )
388             caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
389 
390         /*
391          * the CPU must support INVVPID all context invalidation, because we
392          * will use it as final resort if other types are not supported.
393          *
394          * Or we just don't use VPID.
395          */
396         if ( !(caps.vpid & VMX_VPID_INVVPID_ALL_CONTEXT) )
397             caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
398 
399         /* EPT A/D bits is required for PML */
400         if ( !(caps.ept & VMX_EPT_AD_BIT) )
401             caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
402     }
403 
404     if ( caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT )
405     {
406         /*
407          * To use EPT we expect to be able to clear certain intercepts.
408          * We check VMX_BASIC_MSR[55] to correctly handle default controls.
409          */
410         uint32_t must_be_one, must_be_zero, msr = MSR_IA32_VMX_PROCBASED_CTLS;
411         if ( vmx_basic_msr_high & (VMX_BASIC_DEFAULT1_ZERO >> 32) )
412             msr = MSR_IA32_VMX_TRUE_PROCBASED_CTLS;
413         rdmsr(msr, must_be_one, must_be_zero);
414         if ( must_be_one & (CPU_BASED_INVLPG_EXITING |
415                             CPU_BASED_CR3_LOAD_EXITING |
416                             CPU_BASED_CR3_STORE_EXITING) )
417             caps.secondary_exec_control &=
418                 ~(SECONDARY_EXEC_ENABLE_EPT |
419                   SECONDARY_EXEC_UNRESTRICTED_GUEST);
420     }
421 
422     /* PML cannot be supported if EPT is not used */
423     if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) )
424         caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
425 
426     /* Turn off opt_ept_pml if PML feature is not present. */
427     if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_PML) )
428         opt_ept_pml = false;
429 
430     if ( (caps.secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) &&
431           ple_gap == 0 )
432     {
433         if ( !vmx_caps.pin_based_exec_control )
434             printk(XENLOG_INFO "Disable Pause-Loop Exiting.\n");
435         caps.secondary_exec_control &= ~ SECONDARY_EXEC_PAUSE_LOOP_EXITING;
436     }
437 
438     min = VM_EXIT_ACK_INTR_ON_EXIT;
439     opt = (VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT |
440            VM_EXIT_LOAD_HOST_EFER | VM_EXIT_CLEAR_BNDCFGS);
441     min |= VM_EXIT_IA32E_MODE;
442     caps.vmexit_control = adjust_vmx_controls(
443         "VMExit Control", min, opt, MSR_IA32_VMX_EXIT_CTLS, &mismatch);
444 
445     /*
446      * "Process posted interrupt" can be set only when "virtual-interrupt
447      * delivery" and "acknowledge interrupt on exit" is set. For the latter
448      * is a minimal requirement, only check the former, which is optional.
449      */
450     if ( !(caps.secondary_exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) )
451         caps.pin_based_exec_control &= ~PIN_BASED_POSTED_INTERRUPT;
452 
453     if ( iommu_intpost &&
454          !(caps.pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
455     {
456         printk("Intel VT-d Posted Interrupt is disabled for CPU-side Posted "
457                "Interrupt is not enabled\n");
458         iommu_intpost = 0;
459     }
460 
461     /* The IA32_VMX_VMFUNC MSR exists only when VMFUNC is available */
462     if ( caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS )
463     {
464         rdmsrl(MSR_IA32_VMX_VMFUNC, caps.vmfunc);
465 
466         /*
467          * VMFUNC leaf 0 (EPTP switching) must be supported.
468          *
469          * Or we just don't use VMFUNC.
470          */
471         if ( !(caps.vmfunc & VMX_VMFUNC_EPTP_SWITCHING) )
472             caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VM_FUNCTIONS;
473     }
474 
475     /* Virtualization exceptions are only enabled if VMFUNC is enabled */
476     if ( !(caps.secondary_exec_control & SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
477         caps.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS;
478 
479     min = 0;
480     opt = (VM_ENTRY_LOAD_GUEST_PAT | VM_ENTRY_LOAD_GUEST_EFER |
481            VM_ENTRY_LOAD_BNDCFGS);
482     caps.vmentry_control = adjust_vmx_controls(
483         "VMEntry Control", min, opt, MSR_IA32_VMX_ENTRY_CTLS, &mismatch);
484 
485     if ( mismatch )
486         return -EINVAL;
487 
488     if ( !vmx_caps.pin_based_exec_control )
489     {
490         /* First time through. */
491         vmx_caps = caps;
492         vmx_caps.basic_msr = ((uint64_t)vmx_basic_msr_high << 32) |
493                              vmx_basic_msr_low;
494 
495         vmx_display_features();
496 
497         /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
498         if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) >
499              PAGE_SIZE )
500         {
501             printk("VMX: CPU%d VMCS size is too big (%Lu bytes)\n",
502                    smp_processor_id(),
503                    vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
504             return -EINVAL;
505         }
506     }
507     else
508     {
509         /* Globals are already initialised: re-check them. */
510         mismatch |= cap_check(
511             "VMCS revision ID",
512             vmcs_revision_id, vmx_basic_msr_low & VMX_BASIC_REVISION_MASK);
513         mismatch |= cap_check(
514             "Pin-Based Exec Control",
515             vmx_caps.pin_based_exec_control, caps.pin_based_exec_control);
516         mismatch |= cap_check(
517             "CPU-Based Exec Control",
518             vmx_caps.cpu_based_exec_control, caps.cpu_based_exec_control);
519         mismatch |= cap_check(
520             "Secondary Exec Control",
521             vmx_caps.secondary_exec_control, caps.secondary_exec_control);
522         mismatch |= cap_check(
523             "Tertiary Exec Control",
524             vmx_caps.tertiary_exec_control, caps.tertiary_exec_control);
525         mismatch |= cap_check(
526             "VMExit Control",
527             vmx_caps.vmexit_control, caps.vmexit_control);
528         mismatch |= cap_check(
529             "VMEntry Control",
530             vmx_caps.vmentry_control, caps.vmentry_control);
531         mismatch |= cap_check("EPT Capability", vmx_caps.ept, caps.ept);
532         mismatch |= cap_check("VPID Capability", vmx_caps.vpid, caps.vpid);
533         mismatch |= cap_check(
534             "VMFUNC Capability",
535             vmx_caps.vmfunc, caps.vmfunc);
536         if ( cpu_has_vmx_ins_outs_instr_info !=
537              !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)) )
538         {
539             printk("VMX INS/OUTS Instruction Info: saw %d expected %d\n",
540                    !!(vmx_basic_msr_high & (VMX_BASIC_INS_OUT_INFO >> 32)),
541                    cpu_has_vmx_ins_outs_instr_info);
542             mismatch = 1;
543         }
544         if ( (vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32)) !=
545              ((vmx_caps.basic_msr & VMX_BASIC_VMCS_SIZE_MASK) >> 32) )
546         {
547             printk("VMX: CPU%d unexpected VMCS size %Lu\n",
548                    smp_processor_id(),
549                    vmx_basic_msr_high & (VMX_BASIC_VMCS_SIZE_MASK >> 32));
550             mismatch = 1;
551         }
552         if ( mismatch )
553         {
554             printk("VMX: Capabilities fatally differ between CPU%d and CPU0\n",
555                    smp_processor_id());
556             return -EINVAL;
557         }
558     }
559 
560     /* IA-32 SDM Vol 3B: 64-bit CPUs always have VMX_BASIC_MSR[48]==0. */
561     if ( vmx_basic_msr_high & (VMX_BASIC_32BIT_ADDRESSES >> 32) )
562     {
563         printk("VMX: CPU%d limits VMX structure pointers to 32 bits\n",
564                smp_processor_id());
565         return -EINVAL;
566     }
567 
568     /* Require Write-Back (WB) memory type for VMCS accesses. */
569     opt = (vmx_basic_msr_high & (VMX_BASIC_MEMORY_TYPE_MASK >> 32)) /
570           ((VMX_BASIC_MEMORY_TYPE_MASK & -VMX_BASIC_MEMORY_TYPE_MASK) >> 32);
571     if ( opt != X86_MT_WB )
572     {
573         printk("VMX: CPU%d has unexpected VMCS access type %u\n",
574                smp_processor_id(), opt);
575         return -EINVAL;
576     }
577 
578     return 0;
579 }
580 
vmx_alloc_vmcs(void)581 static paddr_t vmx_alloc_vmcs(void)
582 {
583     struct page_info *pg;
584     struct vmcs_struct *vmcs;
585 
586     if ( (pg = alloc_domheap_page(NULL, 0)) == NULL )
587     {
588         gdprintk(XENLOG_WARNING, "Failed to allocate VMCS.\n");
589         return 0;
590     }
591 
592     vmcs = __map_domain_page(pg);
593     clear_page(vmcs);
594     vmcs->revision_id = vmcs_revision_id;
595     unmap_domain_page(vmcs);
596 
597     return page_to_maddr(pg);
598 }
599 
vmx_free_vmcs(paddr_t pa)600 static void vmx_free_vmcs(paddr_t pa)
601 {
602     free_domheap_page(maddr_to_page(pa));
603 }
604 
__vmx_clear_vmcs(void * info)605 static void cf_check __vmx_clear_vmcs(void *info)
606 {
607     struct vcpu *v = info;
608     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
609 
610     /* Otherwise we can nest (vmx_cpu_down() vs. vmx_clear_vmcs()). */
611     ASSERT(!local_irq_is_enabled());
612 
613     if ( vmx->active_cpu == smp_processor_id() )
614     {
615         __vmpclear(vmx->vmcs_pa);
616         if ( vmx->vmcs_shadow_maddr )
617             __vmpclear(vmx->vmcs_shadow_maddr);
618 
619         vmx->active_cpu = -1;
620         vmx->launched   = 0;
621 
622         list_del(&vmx->active_list);
623 
624         if ( vmx->vmcs_pa == this_cpu(current_vmcs) )
625             this_cpu(current_vmcs) = 0;
626     }
627 }
628 
vmx_clear_vmcs(struct vcpu * v)629 static void vmx_clear_vmcs(struct vcpu *v)
630 {
631     int cpu = v->arch.hvm.vmx.active_cpu;
632 
633     if ( cpu != -1 )
634         on_selected_cpus(cpumask_of(cpu), __vmx_clear_vmcs, v, 1);
635 }
636 
vmx_load_vmcs(struct vcpu * v)637 static void vmx_load_vmcs(struct vcpu *v)
638 {
639     unsigned long flags;
640 
641     local_irq_save(flags);
642 
643     if ( v->arch.hvm.vmx.active_cpu == -1 )
644     {
645         list_add(&v->arch.hvm.vmx.active_list, &this_cpu(active_vmcs_list));
646         v->arch.hvm.vmx.active_cpu = smp_processor_id();
647     }
648 
649     ASSERT(v->arch.hvm.vmx.active_cpu == smp_processor_id());
650 
651     __vmptrld(v->arch.hvm.vmx.vmcs_pa);
652     this_cpu(current_vmcs) = v->arch.hvm.vmx.vmcs_pa;
653 
654     local_irq_restore(flags);
655 }
656 
vmx_vmcs_reload(struct vcpu * v)657 void vmx_vmcs_reload(struct vcpu *v)
658 {
659     /*
660      * As we may be running with interrupts disabled, we can't acquire
661      * v->arch.hvm.vmx.vmcs_lock here. However, with interrupts disabled
662      * the VMCS can't be taken away from us anymore if we still own it.
663      */
664     ASSERT(v->is_running || !local_irq_is_enabled());
665     if ( v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs) )
666         return;
667 
668     vmx_load_vmcs(v);
669 }
670 
vmx_cpu_up_prepare(unsigned int cpu)671 int cf_check vmx_cpu_up_prepare(unsigned int cpu)
672 {
673     /*
674      * If nvmx_cpu_up_prepare() failed, do not return failure and just fallback
675      * to legacy mode for vvmcs synchronization.
676      */
677     if ( nvmx_cpu_up_prepare(cpu) != 0 )
678         printk("CPU%d: Could not allocate virtual VMCS buffer.\n", cpu);
679 
680     if ( per_cpu(vmxon_region, cpu) )
681         return 0;
682 
683     per_cpu(vmxon_region, cpu) = vmx_alloc_vmcs();
684     if ( per_cpu(vmxon_region, cpu) )
685         return 0;
686 
687     printk("CPU%d: Could not allocate host VMCS\n", cpu);
688     nvmx_cpu_dead(cpu);
689     return -ENOMEM;
690 }
691 
vmx_cpu_dead(unsigned int cpu)692 void cf_check vmx_cpu_dead(unsigned int cpu)
693 {
694     vmx_free_vmcs(per_cpu(vmxon_region, cpu));
695     per_cpu(vmxon_region, cpu) = 0;
696     nvmx_cpu_dead(cpu);
697     vmx_pi_desc_fixup(cpu);
698 }
699 
_vmx_cpu_up(bool bsp)700 static int _vmx_cpu_up(bool bsp)
701 {
702     u32 eax, edx;
703     int rc, bios_locked, cpu = smp_processor_id();
704     u64 cr0, vmx_cr0_fixed0, vmx_cr0_fixed1;
705 
706     BUG_ON(!(read_cr4() & X86_CR4_VMXE));
707 
708     /*
709      * Ensure the current processor operating mode meets
710      * the requred CRO fixed bits in VMX operation.
711      */
712     cr0 = read_cr0();
713     rdmsrl(MSR_IA32_VMX_CR0_FIXED0, vmx_cr0_fixed0);
714     rdmsrl(MSR_IA32_VMX_CR0_FIXED1, vmx_cr0_fixed1);
715     if ( (~cr0 & vmx_cr0_fixed0) || (cr0 & ~vmx_cr0_fixed1) )
716     {
717         printk("CPU%d: some settings of host CR0 are "
718                "not allowed in VMX operation.\n", cpu);
719         return -EINVAL;
720     }
721 
722     rdmsr(MSR_IA32_FEATURE_CONTROL, eax, edx);
723 
724     bios_locked = !!(eax & IA32_FEATURE_CONTROL_LOCK);
725     if ( bios_locked )
726     {
727         if ( !(eax & (tboot_in_measured_env()
728                       ? IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX
729                       : IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX)) )
730         {
731             printk("CPU%d: VMX disabled by BIOS.\n", cpu);
732             return -EINVAL;
733         }
734     }
735     else
736     {
737         eax  = IA32_FEATURE_CONTROL_LOCK;
738         eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX;
739         if ( test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) )
740             eax |= IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX;
741         wrmsr(MSR_IA32_FEATURE_CONTROL, eax, 0);
742     }
743 
744     if ( (rc = vmx_init_vmcs_config(bsp)) != 0 )
745         return rc;
746 
747     INIT_LIST_HEAD(&this_cpu(active_vmcs_list));
748 
749     if ( bsp && (rc = vmx_cpu_up_prepare(cpu)) != 0 )
750         return rc;
751 
752     asm_inline goto (
753         "1: vmxon %[addr]\n\t"
754         "   jbe %l[vmxon_fail]\n\t"
755         _ASM_EXTABLE(1b, %l[vmxon_fault])
756         :
757         : [addr] "m" (this_cpu(vmxon_region))
758         : "memory"
759         : vmxon_fail, vmxon_fault );
760 
761     this_cpu(vmxon) = 1;
762 
763     hvm_asid_init(cpu_has_vmx_vpid ? (1u << VMCS_VPID_WIDTH) : 0);
764 
765     if ( cpu_has_vmx_ept )
766         ept_sync_all();
767 
768     if ( cpu_has_vmx_vpid )
769         vpid_sync_all();
770 
771     vmx_pi_per_cpu_init(cpu);
772 
773     return 0;
774 
775  vmxon_fault:
776     if ( bios_locked &&
777          test_bit(X86_FEATURE_SMX, &boot_cpu_data.x86_capability) &&
778          (!(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_OUTSIDE_SMX) ||
779           !(eax & IA32_FEATURE_CONTROL_ENABLE_VMXON_INSIDE_SMX)) )
780     {
781         printk(XENLOG_ERR
782                "CPU%d: VMXON failed: perhaps because of TXT settings in your BIOS configuration?\n",
783                cpu);
784         printk(XENLOG_ERR
785                " --> Disable TXT in your BIOS unless using a secure bootloader.\n");
786         return -EINVAL;
787     }
788 
789  vmxon_fail:
790     printk(XENLOG_ERR "CPU%d: unexpected VMXON failure\n", cpu);
791     return -EINVAL;
792 }
793 
vmx_cpu_up(void)794 int cf_check vmx_cpu_up(void)
795 {
796     return _vmx_cpu_up(false);
797 }
798 
vmx_cpu_down(void)799 void cf_check vmx_cpu_down(void)
800 {
801     struct list_head *active_vmcs_list = &this_cpu(active_vmcs_list);
802     unsigned long flags;
803 
804     if ( !this_cpu(vmxon) )
805         return;
806 
807     local_irq_save(flags);
808 
809     while ( !list_empty(active_vmcs_list) )
810         __vmx_clear_vmcs(list_entry(active_vmcs_list->next,
811                                     struct vcpu, arch.hvm.vmx.active_list));
812 
813     BUG_ON(!(read_cr4() & X86_CR4_VMXE));
814     this_cpu(vmxon) = 0;
815     asm volatile ( "vmxoff" ::: "memory" );
816 
817     local_irq_restore(flags);
818 }
819 
820 struct foreign_vmcs {
821     struct vcpu *v;
822     unsigned int count;
823 };
824 static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs);
825 
vmx_vmcs_try_enter(struct vcpu * v)826 bool vmx_vmcs_try_enter(struct vcpu *v)
827 {
828     struct foreign_vmcs *fv;
829 
830     /*
831      * NB. We must *always* run an HVM VCPU on its own VMCS, except for
832      * vmx_vmcs_enter/exit and scheduling tail critical regions.
833      */
834     if ( likely(v == current) )
835         return v->arch.hvm.vmx.vmcs_pa == this_cpu(current_vmcs);
836 
837     fv = &this_cpu(foreign_vmcs);
838 
839     if ( fv->v == v )
840     {
841         BUG_ON(fv->count == 0);
842     }
843     else
844     {
845         BUG_ON(fv->v != NULL);
846         BUG_ON(fv->count != 0);
847 
848         vcpu_pause(v);
849         spin_lock(&v->arch.hvm.vmx.vmcs_lock);
850 
851         vmx_clear_vmcs(v);
852         vmx_load_vmcs(v);
853 
854         fv->v = v;
855     }
856 
857     fv->count++;
858 
859     return 1;
860 }
861 
vmx_vmcs_enter(struct vcpu * v)862 void vmx_vmcs_enter(struct vcpu *v)
863 {
864     bool okay = vmx_vmcs_try_enter(v);
865 
866     ASSERT(okay);
867 }
868 
vmx_vmcs_exit(struct vcpu * v)869 void vmx_vmcs_exit(struct vcpu *v)
870 {
871     struct foreign_vmcs *fv;
872 
873     if ( likely(v == current) )
874         return;
875 
876     fv = &this_cpu(foreign_vmcs);
877     BUG_ON(fv->v != v);
878     BUG_ON(fv->count == 0);
879 
880     if ( --fv->count == 0 )
881     {
882         /* Don't confuse vmx_do_resume (for @v or @current!) */
883         vmx_clear_vmcs(v);
884         if ( is_hvm_vcpu(current) )
885             vmx_load_vmcs(current);
886 
887         spin_unlock(&v->arch.hvm.vmx.vmcs_lock);
888         vcpu_unpause(v);
889 
890         fv->v = NULL;
891     }
892 }
893 
vmx_set_host_env(struct vcpu * v)894 static void vmx_set_host_env(struct vcpu *v)
895 {
896     unsigned int cpu = smp_processor_id();
897 
898     __vmwrite(HOST_GDTR_BASE,
899               (unsigned long)(this_cpu(gdt) - FIRST_RESERVED_GDT_ENTRY));
900     __vmwrite(HOST_IDTR_BASE, (unsigned long)per_cpu(idt, cpu));
901 
902     __vmwrite(HOST_TR_BASE, (unsigned long)&per_cpu(tss_page, cpu).tss);
903 
904     __vmwrite(HOST_SYSENTER_ESP, get_stack_bottom());
905 
906     /*
907      * Skip end of cpu_user_regs when entering the hypervisor because the
908      * CPU does not save context onto the stack. SS,RSP,CS,RIP,RFLAGS,etc
909      * all get saved into the VMCS instead.
910      */
911     __vmwrite(HOST_RSP,
912               (unsigned long)&get_cpu_info()->guest_cpu_user_regs.error_code);
913 }
914 
vmx_clear_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)915 void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr,
916                              enum vmx_msr_intercept_type type)
917 {
918     struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
919     struct domain *d = v->domain;
920 
921     /* VMX MSR bitmap supported? */
922     if ( msr_bitmap == NULL )
923         return;
924 
925     if ( unlikely(monitored_msr(d, msr)) )
926         return;
927 
928     if ( msr <= 0x1fff )
929     {
930         if ( type & VMX_MSR_R )
931             clear_bit(msr, msr_bitmap->read_low);
932         if ( type & VMX_MSR_W )
933             clear_bit(msr, msr_bitmap->write_low);
934     }
935     else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
936     {
937         msr &= 0x1fff;
938         if ( type & VMX_MSR_R )
939             clear_bit(msr, msr_bitmap->read_high);
940         if ( type & VMX_MSR_W )
941             clear_bit(msr, msr_bitmap->write_high);
942     }
943     else
944         ASSERT(!"MSR out of range for interception\n");
945 }
946 
vmx_set_msr_intercept(struct vcpu * v,unsigned int msr,enum vmx_msr_intercept_type type)947 void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr,
948                            enum vmx_msr_intercept_type type)
949 {
950     struct vmx_msr_bitmap *msr_bitmap = v->arch.hvm.vmx.msr_bitmap;
951 
952     /* VMX MSR bitmap supported? */
953     if ( msr_bitmap == NULL )
954         return;
955 
956     if ( msr <= 0x1fff )
957     {
958         if ( type & VMX_MSR_R )
959             set_bit(msr, msr_bitmap->read_low);
960         if ( type & VMX_MSR_W )
961             set_bit(msr, msr_bitmap->write_low);
962     }
963     else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
964     {
965         msr &= 0x1fff;
966         if ( type & VMX_MSR_R )
967             set_bit(msr, msr_bitmap->read_high);
968         if ( type & VMX_MSR_W )
969             set_bit(msr, msr_bitmap->write_high);
970     }
971     else
972         ASSERT(!"MSR out of range for interception\n");
973 }
974 
vmx_msr_is_intercepted(struct vmx_msr_bitmap * msr_bitmap,unsigned int msr,bool is_write)975 bool vmx_msr_is_intercepted(struct vmx_msr_bitmap *msr_bitmap,
976                             unsigned int msr, bool is_write)
977 {
978     if ( msr <= 0x1fff )
979         return test_bit(msr, is_write ? msr_bitmap->write_low
980                                       : msr_bitmap->read_low);
981     else if ( (msr >= 0xc0000000U) && (msr <= 0xc0001fffU) )
982         return test_bit(msr & 0x1fff, is_write ? msr_bitmap->write_high
983                                                : msr_bitmap->read_high);
984     else
985         /* MSRs outside the bitmap ranges are always intercepted. */
986         return true;
987 }
988 
989 
990 /*
991  * Switch VMCS between layer 1 & 2 guest
992  */
vmx_vmcs_switch(paddr_t from,paddr_t to)993 void vmx_vmcs_switch(paddr_t from, paddr_t to)
994 {
995     struct vmx_vcpu *vmx = &current->arch.hvm.vmx;
996     spin_lock(&vmx->vmcs_lock);
997 
998     __vmpclear(from);
999     if ( vmx->vmcs_shadow_maddr )
1000         __vmpclear(vmx->vmcs_shadow_maddr);
1001     __vmptrld(to);
1002 
1003     vmx->vmcs_pa = to;
1004     vmx->launched = 0;
1005     this_cpu(current_vmcs) = to;
1006 
1007     if ( vmx->hostenv_migrated )
1008     {
1009         vmx->hostenv_migrated = 0;
1010         vmx_set_host_env(current);
1011     }
1012 
1013     spin_unlock(&vmx->vmcs_lock);
1014 }
1015 
virtual_vmcs_enter(const struct vcpu * v)1016 void virtual_vmcs_enter(const struct vcpu *v)
1017 {
1018     __vmptrld(v->arch.hvm.vmx.vmcs_shadow_maddr);
1019 }
1020 
virtual_vmcs_exit(const struct vcpu * v)1021 void virtual_vmcs_exit(const struct vcpu *v)
1022 {
1023     paddr_t cur = this_cpu(current_vmcs);
1024 
1025     __vmpclear(v->arch.hvm.vmx.vmcs_shadow_maddr);
1026     if ( cur )
1027         __vmptrld(cur);
1028 }
1029 
virtual_vmcs_vmread(const struct vcpu * v,u32 vmcs_encoding)1030 u64 virtual_vmcs_vmread(const struct vcpu *v, u32 vmcs_encoding)
1031 {
1032     u64 res;
1033 
1034     virtual_vmcs_enter(v);
1035     __vmread(vmcs_encoding, &res);
1036     virtual_vmcs_exit(v);
1037 
1038     return res;
1039 }
1040 
virtual_vmcs_vmread_safe(const struct vcpu * v,u32 vmcs_encoding,u64 * val)1041 enum vmx_insn_errno virtual_vmcs_vmread_safe(const struct vcpu *v,
1042                                              u32 vmcs_encoding, u64 *val)
1043 {
1044     enum vmx_insn_errno ret;
1045 
1046     virtual_vmcs_enter(v);
1047     ret = vmread_safe(vmcs_encoding, val);
1048     virtual_vmcs_exit(v);
1049 
1050     return ret;
1051 }
1052 
virtual_vmcs_vmwrite(const struct vcpu * v,u32 vmcs_encoding,u64 val)1053 void virtual_vmcs_vmwrite(const struct vcpu *v, u32 vmcs_encoding, u64 val)
1054 {
1055     virtual_vmcs_enter(v);
1056     __vmwrite(vmcs_encoding, val);
1057     virtual_vmcs_exit(v);
1058 }
1059 
virtual_vmcs_vmwrite_safe(const struct vcpu * v,u32 vmcs_encoding,u64 val)1060 enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v,
1061                                               u32 vmcs_encoding, u64 val)
1062 {
1063     enum vmx_insn_errno ret;
1064 
1065     virtual_vmcs_enter(v);
1066     ret = vmwrite_safe(vmcs_encoding, val);
1067     virtual_vmcs_exit(v);
1068 
1069     return ret;
1070 }
1071 
1072 /*
1073  * This function is only called in a vCPU's initialization phase,
1074  * so we can update the posted-interrupt descriptor in non-atomic way.
1075  */
pi_desc_init(struct vcpu * v)1076 static void pi_desc_init(struct vcpu *v)
1077 {
1078     v->arch.hvm.vmx.pi_desc.nv = posted_intr_vector;
1079 
1080     /*
1081      * Mark NDST as invalid, then we can use this invalid value as a
1082      * marker to whether update NDST or not in vmx_pi_hooks_assign().
1083      */
1084     v->arch.hvm.vmx.pi_desc.ndst = APIC_INVALID_DEST;
1085 }
1086 
1087 void nocall vmx_asm_vmexit_handler(void);
1088 
construct_vmcs(struct vcpu * v)1089 static int construct_vmcs(struct vcpu *v)
1090 {
1091     struct domain *d = v->domain;
1092     uint32_t vmexit_ctl = vmx_caps.vmexit_control;
1093     u32 vmentry_ctl = vmx_caps.vmentry_control;
1094     int rc = 0;
1095 
1096     vmx_vmcs_enter(v);
1097 
1098     /* VMCS controls. */
1099     __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_caps.pin_based_exec_control);
1100 
1101     v->arch.hvm.vmx.exec_control = vmx_caps.cpu_based_exec_control;
1102     if ( d->arch.vtsc && !cpu_has_vmx_tsc_scaling )
1103         v->arch.hvm.vmx.exec_control |= CPU_BASED_RDTSC_EXITING;
1104 
1105     v->arch.hvm.vmx.secondary_exec_control = vmx_caps.secondary_exec_control;
1106     v->arch.hvm.vmx.tertiary_exec_control  = vmx_caps.tertiary_exec_control;
1107 
1108     /*
1109      * Disable features which we don't want active by default:
1110      *  - Descriptor table exiting only if wanted by introspection
1111      *  - x2APIC - default is xAPIC mode
1112      *  - VPID settings chosen at VMEntry time
1113      *  - VMCS Shadowing only when in nested VMX mode
1114      *  - PML only when logdirty is active
1115      *  - VMFUNC/#VE only if wanted by altp2m
1116      */
1117     v->arch.hvm.vmx.secondary_exec_control &=
1118         ~(SECONDARY_EXEC_DESCRIPTOR_TABLE_EXITING |
1119           SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
1120           SECONDARY_EXEC_ENABLE_VPID |
1121           SECONDARY_EXEC_ENABLE_VMCS_SHADOWING |
1122           SECONDARY_EXEC_ENABLE_PML |
1123           SECONDARY_EXEC_ENABLE_VM_FUNCTIONS |
1124           SECONDARY_EXEC_ENABLE_VIRT_EXCEPTIONS);
1125 
1126     if ( paging_mode_hap(d) )
1127     {
1128         v->arch.hvm.vmx.exec_control &= ~(CPU_BASED_INVLPG_EXITING |
1129                                           CPU_BASED_CR3_LOAD_EXITING |
1130                                           CPU_BASED_CR3_STORE_EXITING);
1131     }
1132     else
1133     {
1134         v->arch.hvm.vmx.secondary_exec_control &=
1135             ~(SECONDARY_EXEC_ENABLE_EPT |
1136               SECONDARY_EXEC_UNRESTRICTED_GUEST |
1137               SECONDARY_EXEC_ENABLE_INVPCID);
1138         v->arch.hvm.vmx.tertiary_exec_control &=
1139             ~(TERTIARY_EXEC_EPT_PAGING_WRITE);
1140         vmexit_ctl &= ~(VM_EXIT_SAVE_GUEST_PAT |
1141                         VM_EXIT_LOAD_HOST_PAT);
1142         vmentry_ctl &= ~VM_ENTRY_LOAD_GUEST_PAT;
1143     }
1144 
1145     /* Do not enable Monitor Trap Flag unless start single step debug */
1146     v->arch.hvm.vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
1147 
1148     vmx_update_cpu_exec_control(v);
1149 
1150     __vmwrite(VM_EXIT_CONTROLS, vmexit_ctl);
1151     __vmwrite(VM_ENTRY_CONTROLS, vmentry_ctl);
1152 
1153     if ( cpu_has_vmx_ple )
1154     {
1155         __vmwrite(PLE_GAP, ple_gap);
1156         __vmwrite(PLE_WINDOW, ple_window);
1157     }
1158 
1159     if ( cpu_has_vmx_secondary_exec_control )
1160         __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1161                   v->arch.hvm.vmx.secondary_exec_control);
1162 
1163     if ( cpu_has_vmx_tertiary_exec_control )
1164         __vmwrite(TERTIARY_VM_EXEC_CONTROL,
1165                   v->arch.hvm.vmx.tertiary_exec_control);
1166 
1167     /* MSR access bitmap. */
1168     if ( cpu_has_vmx_msr_bitmap )
1169     {
1170         struct vmx_msr_bitmap *msr_bitmap = alloc_xenheap_page();
1171 
1172         if ( msr_bitmap == NULL )
1173         {
1174             rc = -ENOMEM;
1175             goto out;
1176         }
1177 
1178         memset(msr_bitmap, ~0, PAGE_SIZE);
1179         v->arch.hvm.vmx.msr_bitmap = msr_bitmap;
1180         __vmwrite(MSR_BITMAP, virt_to_maddr(msr_bitmap));
1181 
1182         vmx_clear_msr_intercept(v, MSR_FS_BASE, VMX_MSR_RW);
1183         vmx_clear_msr_intercept(v, MSR_GS_BASE, VMX_MSR_RW);
1184         vmx_clear_msr_intercept(v, MSR_SHADOW_GS_BASE, VMX_MSR_RW);
1185         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_CS, VMX_MSR_RW);
1186         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_ESP, VMX_MSR_RW);
1187         vmx_clear_msr_intercept(v, MSR_IA32_SYSENTER_EIP, VMX_MSR_RW);
1188         if ( paging_mode_hap(d) && (!is_iommu_enabled(d) || iommu_snoop) )
1189             vmx_clear_msr_intercept(v, MSR_IA32_CR_PAT, VMX_MSR_RW);
1190         if ( (vmexit_ctl & VM_EXIT_CLEAR_BNDCFGS) &&
1191              (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) )
1192             vmx_clear_msr_intercept(v, MSR_IA32_BNDCFGS, VMX_MSR_RW);
1193     }
1194 
1195     /* I/O access bitmap. */
1196     __vmwrite(IO_BITMAP_A, __pa(d->arch.hvm.io_bitmap));
1197     __vmwrite(IO_BITMAP_B, __pa(d->arch.hvm.io_bitmap) + PAGE_SIZE);
1198 
1199     if ( cpu_has_vmx_virtual_intr_delivery )
1200     {
1201         unsigned int i;
1202 
1203         /* EOI-exit bitmap */
1204         bitmap_zero(v->arch.hvm.vmx.eoi_exit_bitmap, X86_IDT_VECTORS);
1205         for ( i = 0; i < ARRAY_SIZE(v->arch.hvm.vmx.eoi_exit_bitmap); ++i )
1206             __vmwrite(EOI_EXIT_BITMAP(i), 0);
1207 
1208         /* Initialise Guest Interrupt Status (RVI and SVI) to 0 */
1209         __vmwrite(GUEST_INTR_STATUS, 0);
1210     }
1211 
1212     if ( cpu_has_vmx_posted_intr_processing )
1213     {
1214         if ( iommu_intpost )
1215             pi_desc_init(v);
1216 
1217         __vmwrite(PI_DESC_ADDR, virt_to_maddr(&v->arch.hvm.vmx.pi_desc));
1218         __vmwrite(POSTED_INTR_NOTIFICATION_VECTOR, posted_intr_vector);
1219     }
1220 
1221     /* Host data selectors. */
1222     __vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
1223     __vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
1224     __vmwrite(HOST_ES_SELECTOR, __HYPERVISOR_DS);
1225     __vmwrite(HOST_FS_SELECTOR, 0);
1226     __vmwrite(HOST_GS_SELECTOR, 0);
1227     __vmwrite(HOST_FS_BASE, 0);
1228     __vmwrite(HOST_GS_BASE, 0);
1229     __vmwrite(HOST_TR_SELECTOR, TSS_SELECTOR);
1230 
1231     /* Host control registers. */
1232     v->arch.hvm.vmx.host_cr0 = read_cr0() & ~X86_CR0_TS;
1233     if ( !v->arch.fully_eager_fpu )
1234         v->arch.hvm.vmx.host_cr0 |= X86_CR0_TS;
1235     __vmwrite(HOST_CR0, v->arch.hvm.vmx.host_cr0);
1236     __vmwrite(HOST_CR4, mmu_cr4_features);
1237     if ( cpu_has_vmx_efer )
1238         __vmwrite(HOST_EFER, read_efer());
1239 
1240     /* Host CS:RIP. */
1241     __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS);
1242     __vmwrite(HOST_RIP, (unsigned long)vmx_asm_vmexit_handler);
1243 
1244     /* Host SYSENTER CS:RIP. */
1245     __vmwrite(HOST_SYSENTER_CS, IS_ENABLED(CONFIG_PV) ? __HYPERVISOR_CS : 0);
1246     __vmwrite(HOST_SYSENTER_EIP,
1247               IS_ENABLED(CONFIG_PV) ? (unsigned long)sysenter_entry : 0);
1248 
1249     /* MSR intercepts. */
1250     __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
1251     __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
1252     __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
1253 
1254     __vmwrite(VM_ENTRY_INTR_INFO, 0);
1255 
1256     __vmwrite(CR0_GUEST_HOST_MASK, ~0UL);
1257     __vmwrite(CR4_GUEST_HOST_MASK, ~0UL);
1258     v->arch.hvm.vmx.cr4_host_mask = ~0UL;
1259 
1260     __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
1261     __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
1262 
1263     __vmwrite(CR3_TARGET_COUNT, 0);
1264 
1265     __vmwrite(GUEST_ACTIVITY_STATE, 0);
1266 
1267     /* Guest segment bases. */
1268     __vmwrite(GUEST_ES_BASE, 0);
1269     __vmwrite(GUEST_SS_BASE, 0);
1270     __vmwrite(GUEST_DS_BASE, 0);
1271     __vmwrite(GUEST_FS_BASE, 0);
1272     __vmwrite(GUEST_GS_BASE, 0);
1273     __vmwrite(GUEST_CS_BASE, 0);
1274 
1275     /* Guest segment limits. */
1276     __vmwrite(GUEST_ES_LIMIT, ~0u);
1277     __vmwrite(GUEST_SS_LIMIT, ~0u);
1278     __vmwrite(GUEST_DS_LIMIT, ~0u);
1279     __vmwrite(GUEST_FS_LIMIT, ~0u);
1280     __vmwrite(GUEST_GS_LIMIT, ~0u);
1281     __vmwrite(GUEST_CS_LIMIT, ~0u);
1282 
1283     /* Guest segment AR bytes. */
1284     __vmwrite(GUEST_ES_AR_BYTES, 0xc093); /* read/write, accessed */
1285     __vmwrite(GUEST_SS_AR_BYTES, 0xc093);
1286     __vmwrite(GUEST_DS_AR_BYTES, 0xc093);
1287     __vmwrite(GUEST_FS_AR_BYTES, 0xc093);
1288     __vmwrite(GUEST_GS_AR_BYTES, 0xc093);
1289     __vmwrite(GUEST_CS_AR_BYTES, 0xc09b); /* exec/read, accessed */
1290 
1291     /* Guest IDT. */
1292     __vmwrite(GUEST_IDTR_BASE, 0);
1293     __vmwrite(GUEST_IDTR_LIMIT, 0);
1294 
1295     /* Guest GDT. */
1296     __vmwrite(GUEST_GDTR_BASE, 0);
1297     __vmwrite(GUEST_GDTR_LIMIT, 0);
1298 
1299     /* Guest LDT. */
1300     __vmwrite(GUEST_LDTR_AR_BYTES, 0x0082); /* LDT */
1301     __vmwrite(GUEST_LDTR_SELECTOR, 0);
1302     __vmwrite(GUEST_LDTR_BASE, 0);
1303     __vmwrite(GUEST_LDTR_LIMIT, 0);
1304 
1305     /* Guest TSS. */
1306     __vmwrite(GUEST_TR_AR_BYTES, 0x008b); /* 32-bit TSS (busy) */
1307     __vmwrite(GUEST_TR_BASE, 0);
1308     __vmwrite(GUEST_TR_LIMIT, 0xff);
1309 
1310     __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
1311     __vmwrite(GUEST_DR7, 0);
1312     __vmwrite(VMCS_LINK_POINTER, ~0UL);
1313 
1314     v->arch.hvm.vmx.exception_bitmap = HVM_TRAP_MASK
1315               | (paging_mode_hap(d) ? 0 : (1U << X86_EXC_PF))
1316               | (v->arch.fully_eager_fpu ? 0 : (1U << X86_EXC_NM));
1317 
1318     if ( cpu_has_vmx_notify_vm_exiting )
1319         __vmwrite(NOTIFY_WINDOW, vm_notify_window);
1320 
1321     vmx_update_exception_bitmap(v);
1322 
1323     v->arch.hvm.guest_cr[0] = X86_CR0_PE | X86_CR0_ET;
1324     hvm_update_guest_cr(v, 0);
1325 
1326     v->arch.hvm.guest_cr[4] = 0;
1327     hvm_update_guest_cr(v, 4);
1328 
1329     if ( cpu_has_vmx_tpr_shadow )
1330     {
1331         __vmwrite(VIRTUAL_APIC_PAGE_ADDR,
1332                   page_to_maddr(vcpu_vlapic(v)->regs_page));
1333         __vmwrite(TPR_THRESHOLD, 0);
1334     }
1335 
1336     if ( paging_mode_hap(d) )
1337     {
1338         struct p2m_domain *p2m = p2m_get_hostp2m(d);
1339         struct ept_data *ept = &p2m->ept;
1340 
1341         ept->mfn = pagetable_get_pfn(p2m_get_pagetable(p2m));
1342         __vmwrite(EPT_POINTER, ept->eptp);
1343 
1344         __vmwrite(HOST_PAT, XEN_MSR_PAT);
1345         __vmwrite(GUEST_PAT, MSR_IA32_CR_PAT_RESET);
1346     }
1347     if ( cpu_has_vmx_mpx )
1348         __vmwrite(GUEST_BNDCFGS, 0);
1349     if ( cpu_has_vmx_xsaves )
1350         __vmwrite(XSS_EXIT_BITMAP, 0);
1351 
1352     if ( cpu_has_vmx_tsc_scaling )
1353         __vmwrite(TSC_MULTIPLIER, d->arch.hvm.tsc_scaling_ratio);
1354 
1355     if ( cpu_has_vmx_virt_spec_ctrl )
1356     {
1357         __vmwrite(SPEC_CTRL_MASK, 0);
1358         __vmwrite(SPEC_CTRL_SHADOW, 0);
1359     }
1360 
1361     /* will update HOST & GUEST_CR3 as reqd */
1362     paging_update_paging_modes(v);
1363 
1364     vmx_vlapic_msr_changed(v);
1365 
1366     if ( opt_l1d_flush && paging_mode_hap(d) )
1367         rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D,
1368                          VMX_MSR_GUEST_LOADONLY);
1369 
1370     if ( !rc && (d->arch.scf & SCF_entry_ibpb) )
1371         rc = vmx_add_msr(v, MSR_PRED_CMD, PRED_CMD_IBPB,
1372                          VMX_MSR_HOST);
1373 
1374  out:
1375     vmx_vmcs_exit(v);
1376 
1377     return rc;
1378 }
1379 
1380 /*
1381  * Search an MSR list looking for an MSR entry, or the slot in which it should
1382  * live (to keep the data sorted) if an entry is not found.
1383  *
1384  * The return pointer is guaranteed to be bounded by start and end.  However,
1385  * it may point at end, and may be invalid for the caller to dereference.
1386  */
locate_msr_entry(struct vmx_msr_entry * start,struct vmx_msr_entry * end,uint32_t msr)1387 static struct vmx_msr_entry *locate_msr_entry(
1388     struct vmx_msr_entry *start, struct vmx_msr_entry *end, uint32_t msr)
1389 {
1390     while ( start < end )
1391     {
1392         struct vmx_msr_entry *mid = start + (end - start) / 2;
1393 
1394         if ( msr < mid->index )
1395             end = mid;
1396         else if ( msr > mid->index )
1397             start = mid + 1;
1398         else
1399             return mid;
1400     }
1401 
1402     return start;
1403 }
1404 
vmx_find_msr(const struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1405 struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
1406                                    enum vmx_msr_list_type type)
1407 {
1408     const struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1409     struct vmx_msr_entry *start = NULL, *ent, *end;
1410     unsigned int substart = 0, subend = vmx->msr_save_count;
1411     unsigned int total = vmx->msr_load_count;
1412 
1413     ASSERT(v == current || !vcpu_runnable(v));
1414 
1415     switch ( type )
1416     {
1417     case VMX_MSR_HOST:
1418         start    = vmx->host_msr_area;
1419         subend   = vmx->host_msr_count;
1420         total    = subend;
1421         break;
1422 
1423     case VMX_MSR_GUEST:
1424         start    = vmx->msr_area;
1425         break;
1426 
1427     case VMX_MSR_GUEST_LOADONLY:
1428         start    = vmx->msr_area;
1429         substart = subend;
1430         subend   = total;
1431         break;
1432 
1433     default:
1434         ASSERT_UNREACHABLE();
1435         break;
1436     }
1437 
1438     if ( !start )
1439         return NULL;
1440 
1441     end = start + total;
1442     ent = locate_msr_entry(start + substart, start + subend, msr);
1443 
1444     return ((ent < end) && (ent->index == msr)) ? ent : NULL;
1445 }
1446 
vmx_add_msr(struct vcpu * v,uint32_t msr,uint64_t val,enum vmx_msr_list_type type)1447 int vmx_add_msr(struct vcpu *v, uint32_t msr, uint64_t val,
1448                 enum vmx_msr_list_type type)
1449 {
1450     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1451     struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
1452     unsigned int substart, subend, total;
1453     int rc;
1454 
1455     ASSERT(v == current || !vcpu_runnable(v));
1456 
1457     switch ( type )
1458     {
1459     case VMX_MSR_HOST:
1460         ptr      = &vmx->host_msr_area;
1461         substart = 0;
1462         subend   = vmx->host_msr_count;
1463         total    = subend;
1464         break;
1465 
1466     case VMX_MSR_GUEST:
1467         ptr      = &vmx->msr_area;
1468         substart = 0;
1469         subend   = vmx->msr_save_count;
1470         total    = vmx->msr_load_count;
1471         break;
1472 
1473     case VMX_MSR_GUEST_LOADONLY:
1474         ptr      = &vmx->msr_area;
1475         substart = vmx->msr_save_count;
1476         subend   = vmx->msr_load_count;
1477         total    = subend;
1478         break;
1479 
1480     default:
1481         ASSERT_UNREACHABLE();
1482         return -EINVAL;
1483     }
1484 
1485     vmx_vmcs_enter(v);
1486 
1487     /* Allocate memory on first use. */
1488     if ( unlikely(!*ptr) )
1489     {
1490         paddr_t addr;
1491 
1492         if ( (*ptr = alloc_xenheap_page()) == NULL )
1493         {
1494             rc = -ENOMEM;
1495             goto out;
1496         }
1497 
1498         addr = virt_to_maddr(*ptr);
1499 
1500         switch ( type )
1501         {
1502         case VMX_MSR_HOST:
1503             __vmwrite(VM_EXIT_MSR_LOAD_ADDR, addr);
1504             break;
1505 
1506         case VMX_MSR_GUEST:
1507         case VMX_MSR_GUEST_LOADONLY:
1508             __vmwrite(VM_EXIT_MSR_STORE_ADDR, addr);
1509             __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, addr);
1510             break;
1511         }
1512     }
1513 
1514     start = *ptr;
1515     end   = start + total;
1516     ent   = locate_msr_entry(start + substart, start + subend, msr);
1517 
1518     if ( (ent < end) && (ent->index == msr) )
1519         goto found;
1520 
1521     /* If there isn't an existing entry for msr, insert room for one. */
1522     if ( total == (PAGE_SIZE / sizeof(*ent)) )
1523     {
1524         rc = -ENOSPC;
1525         goto out;
1526     }
1527 
1528     memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
1529 
1530     ent->index = msr;
1531     ent->mbz = 0;
1532 
1533     switch ( type )
1534     {
1535     case VMX_MSR_HOST:
1536         __vmwrite(VM_EXIT_MSR_LOAD_COUNT, ++vmx->host_msr_count);
1537         break;
1538 
1539     case VMX_MSR_GUEST:
1540         __vmwrite(VM_EXIT_MSR_STORE_COUNT, ++vmx->msr_save_count);
1541 
1542         /* Fallthrough */
1543     case VMX_MSR_GUEST_LOADONLY:
1544         __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, ++vmx->msr_load_count);
1545         break;
1546     }
1547 
1548     /* Set the msr's value. */
1549  found:
1550     ent->data = val;
1551     rc = 0;
1552 
1553  out:
1554     vmx_vmcs_exit(v);
1555 
1556     return rc;
1557 }
1558 
vmx_del_msr(struct vcpu * v,uint32_t msr,enum vmx_msr_list_type type)1559 int vmx_del_msr(struct vcpu *v, uint32_t msr, enum vmx_msr_list_type type)
1560 {
1561     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1562     struct vmx_msr_entry *start = NULL, *ent, *end;
1563     unsigned int substart = 0, subend = vmx->msr_save_count;
1564     unsigned int total = vmx->msr_load_count;
1565 
1566     ASSERT(v == current || !vcpu_runnable(v));
1567 
1568     switch ( type )
1569     {
1570     case VMX_MSR_HOST:
1571         start    = vmx->host_msr_area;
1572         subend   = vmx->host_msr_count;
1573         total    = subend;
1574         break;
1575 
1576     case VMX_MSR_GUEST:
1577         start    = vmx->msr_area;
1578         break;
1579 
1580     case VMX_MSR_GUEST_LOADONLY:
1581         start    = vmx->msr_area;
1582         substart = subend;
1583         subend   = total;
1584         break;
1585 
1586     default:
1587         ASSERT_UNREACHABLE();
1588         return -EINVAL;
1589     }
1590 
1591     if ( !start )
1592         return -ESRCH;
1593 
1594     end = start + total;
1595     ent = locate_msr_entry(start + substart, start + subend, msr);
1596 
1597     if ( (ent == end) || (ent->index != msr) )
1598         return -ESRCH;
1599 
1600     memmove(ent, ent + 1, sizeof(*ent) * (end - ent - 1));
1601 
1602     vmx_vmcs_enter(v);
1603 
1604     switch ( type )
1605     {
1606     case VMX_MSR_HOST:
1607         __vmwrite(VM_EXIT_MSR_LOAD_COUNT, --vmx->host_msr_count);
1608         break;
1609 
1610     case VMX_MSR_GUEST:
1611         __vmwrite(VM_EXIT_MSR_STORE_COUNT, --vmx->msr_save_count);
1612 
1613         /* Fallthrough */
1614     case VMX_MSR_GUEST_LOADONLY:
1615         __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, --vmx->msr_load_count);
1616         break;
1617     }
1618 
1619     vmx_vmcs_exit(v);
1620 
1621     return 0;
1622 }
1623 
vmx_set_eoi_exit_bitmap(struct vcpu * v,u8 vector)1624 void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1625 {
1626     if ( !test_and_set_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1627         set_bit(vector / BITS_PER_LONG,
1628                 &v->arch.hvm.vmx.eoi_exitmap_changed);
1629 }
1630 
vmx_clear_eoi_exit_bitmap(struct vcpu * v,u8 vector)1631 void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector)
1632 {
1633     if ( test_and_clear_bit(vector, v->arch.hvm.vmx.eoi_exit_bitmap) )
1634         set_bit(vector / BITS_PER_LONG,
1635                 &v->arch.hvm.vmx.eoi_exitmap_changed);
1636 }
1637 
vmx_vcpu_pml_enabled(const struct vcpu * v)1638 bool vmx_vcpu_pml_enabled(const struct vcpu *v)
1639 {
1640     return v->arch.hvm.vmx.secondary_exec_control & SECONDARY_EXEC_ENABLE_PML;
1641 }
1642 
vmx_vcpu_enable_pml(struct vcpu * v)1643 int vmx_vcpu_enable_pml(struct vcpu *v)
1644 {
1645     if ( vmx_vcpu_pml_enabled(v) )
1646         return 0;
1647 
1648     v->arch.hvm.vmx.pml_pg = v->domain->arch.paging.alloc_page(v->domain);
1649     if ( !v->arch.hvm.vmx.pml_pg )
1650         return -ENOMEM;
1651 
1652     vmx_vmcs_enter(v);
1653 
1654     __vmwrite(PML_ADDRESS, page_to_maddr(v->arch.hvm.vmx.pml_pg));
1655     __vmwrite(GUEST_PML_INDEX, NR_PML_ENTRIES - 1);
1656 
1657     v->arch.hvm.vmx.secondary_exec_control |= SECONDARY_EXEC_ENABLE_PML;
1658 
1659     __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1660               v->arch.hvm.vmx.secondary_exec_control);
1661 
1662     vmx_vmcs_exit(v);
1663 
1664     return 0;
1665 }
1666 
vmx_vcpu_disable_pml(struct vcpu * v)1667 void vmx_vcpu_disable_pml(struct vcpu *v)
1668 {
1669     if ( !vmx_vcpu_pml_enabled(v) )
1670         return;
1671 
1672     /* Make sure we don't lose any logged GPAs. */
1673     ept_vcpu_flush_pml_buffer(v);
1674 
1675     vmx_vmcs_enter(v);
1676 
1677     v->arch.hvm.vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
1678     __vmwrite(SECONDARY_VM_EXEC_CONTROL,
1679               v->arch.hvm.vmx.secondary_exec_control);
1680 
1681     vmx_vmcs_exit(v);
1682 
1683     v->domain->arch.paging.free_page(v->domain, v->arch.hvm.vmx.pml_pg);
1684     v->arch.hvm.vmx.pml_pg = NULL;
1685 }
1686 
vmx_domain_pml_enabled(const struct domain * d)1687 bool vmx_domain_pml_enabled(const struct domain *d)
1688 {
1689     return d->arch.hvm.vmx.status & VMX_DOMAIN_PML_ENABLED;
1690 }
1691 
1692 /*
1693  * This function enables PML for particular domain. It should be called when
1694  * domain is paused.
1695  *
1696  * PML needs to be enabled globally for all vcpus of the domain, as PML buffer
1697  * and PML index are pre-vcpu, but EPT table is shared by vcpus, therefore
1698  * enabling PML on partial vcpus won't work.
1699  */
vmx_domain_enable_pml(struct domain * d)1700 int vmx_domain_enable_pml(struct domain *d)
1701 {
1702     struct vcpu *v;
1703     int rc;
1704 
1705     ASSERT(atomic_read(&d->pause_count));
1706 
1707     if ( vmx_domain_pml_enabled(d) )
1708         return 0;
1709 
1710     for_each_vcpu ( d, v )
1711         if ( (rc = vmx_vcpu_enable_pml(v)) != 0 )
1712             goto error;
1713 
1714     d->arch.hvm.vmx.status |= VMX_DOMAIN_PML_ENABLED;
1715 
1716     return 0;
1717 
1718  error:
1719     for_each_vcpu ( d, v )
1720         if ( vmx_vcpu_pml_enabled(v) )
1721             vmx_vcpu_disable_pml(v);
1722     return rc;
1723 }
1724 
1725 /*
1726  * Disable PML for particular domain. Called when domain is paused.
1727  *
1728  * The same as enabling PML for domain, disabling PML should be done for all
1729  * vcpus at once.
1730  */
vmx_domain_disable_pml(struct domain * d)1731 void vmx_domain_disable_pml(struct domain *d)
1732 {
1733     struct vcpu *v;
1734 
1735     ASSERT(atomic_read(&d->pause_count));
1736 
1737     if ( !vmx_domain_pml_enabled(d) )
1738         return;
1739 
1740     for_each_vcpu ( d, v )
1741         vmx_vcpu_disable_pml(v);
1742 
1743     d->arch.hvm.vmx.status &= ~VMX_DOMAIN_PML_ENABLED;
1744 }
1745 
1746 /*
1747  * Flush PML buffer of all vcpus, and update the logged dirty pages to log-dirty
1748  * radix tree. Called when domain is paused.
1749  */
vmx_domain_flush_pml_buffers(struct domain * d)1750 void vmx_domain_flush_pml_buffers(struct domain *d)
1751 {
1752     struct vcpu *v;
1753 
1754     ASSERT(atomic_read(&d->pause_count));
1755 
1756     if ( !vmx_domain_pml_enabled(d) )
1757         return;
1758 
1759     for_each_vcpu ( d, v )
1760         ept_vcpu_flush_pml_buffer(v);
1761 }
1762 
vmx_vcpu_update_eptp(struct vcpu * v,u64 eptp)1763 static void vmx_vcpu_update_eptp(struct vcpu *v, u64 eptp)
1764 {
1765     vmx_vmcs_enter(v);
1766     __vmwrite(EPT_POINTER, eptp);
1767     vmx_vmcs_exit(v);
1768 }
1769 
1770 /*
1771  * Update EPTP data to VMCS of all vcpus of the domain. Must be called when
1772  * domain is paused.
1773  */
vmx_domain_update_eptp(struct domain * d)1774 void vmx_domain_update_eptp(struct domain *d)
1775 {
1776     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1777     struct vcpu *v;
1778 
1779     ASSERT(atomic_read(&d->pause_count));
1780 
1781     for_each_vcpu ( d, v )
1782         vmx_vcpu_update_eptp(v, p2m->ept.eptp);
1783 
1784     ept_sync_domain(p2m);
1785 }
1786 
vmx_create_vmcs(struct vcpu * v)1787 int vmx_create_vmcs(struct vcpu *v)
1788 {
1789     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1790     int rc;
1791 
1792     if ( (vmx->vmcs_pa = vmx_alloc_vmcs()) == 0 )
1793         return -ENOMEM;
1794 
1795     INIT_LIST_HEAD(&vmx->active_list);
1796     __vmpclear(vmx->vmcs_pa);
1797     vmx->active_cpu = -1;
1798     vmx->launched   = 0;
1799 
1800     if ( (rc = construct_vmcs(v)) != 0 )
1801     {
1802         vmx_destroy_vmcs(v);
1803         return rc;
1804     }
1805 
1806     return 0;
1807 }
1808 
vmx_destroy_vmcs(struct vcpu * v)1809 void vmx_destroy_vmcs(struct vcpu *v)
1810 {
1811     struct vmx_vcpu *vmx = &v->arch.hvm.vmx;
1812 
1813     vmx_clear_vmcs(v);
1814 
1815     vmx_free_vmcs(vmx->vmcs_pa);
1816 
1817     free_xenheap_page(v->arch.hvm.vmx.host_msr_area);
1818     free_xenheap_page(v->arch.hvm.vmx.msr_area);
1819     free_xenheap_page(v->arch.hvm.vmx.msr_bitmap);
1820 }
1821 
vmx_vmentry_failure(void)1822 void vmx_vmentry_failure(void)
1823 {
1824     struct vcpu *curr = current;
1825     unsigned long error;
1826 
1827     __vmread(VM_INSTRUCTION_ERROR, &error);
1828     gprintk(XENLOG_ERR, "VM%s error: %#lx\n",
1829             curr->arch.hvm.vmx.launched ? "RESUME" : "LAUNCH", error);
1830 
1831     if ( error == VMX_INSN_INVALID_CONTROL_STATE ||
1832          error == VMX_INSN_INVALID_HOST_STATE )
1833         vmcs_dump_vcpu(curr);
1834 
1835     domain_crash(curr->domain);
1836 }
1837 
1838 void noreturn vmx_asm_do_vmentry(void);
1839 
vmx_update_debug_state(struct vcpu * v)1840 static void vmx_update_debug_state(struct vcpu *v)
1841 {
1842     if ( v->arch.hvm.debug_state_latch )
1843         v->arch.hvm.vmx.exception_bitmap |= 1U << X86_EXC_BP;
1844     else
1845         v->arch.hvm.vmx.exception_bitmap &= ~(1U << X86_EXC_BP);
1846 
1847     vmx_vmcs_enter(v);
1848     vmx_update_exception_bitmap(v);
1849     vmx_vmcs_exit(v);
1850 }
1851 
vmx_do_resume(void)1852 void cf_check vmx_do_resume(void)
1853 {
1854     struct vcpu *v = current;
1855     bool debug_state;
1856     unsigned long host_cr4;
1857 
1858     if ( v->arch.hvm.vmx.active_cpu == smp_processor_id() )
1859         vmx_vmcs_reload(v);
1860     else
1861     {
1862         /*
1863          * For pass-through domain, guest PCI-E device driver may leverage the
1864          * "Non-Snoop" I/O, and explicitly WBINVD or CLFLUSH to a RAM space.
1865          * Since migration may occur before WBINVD or CLFLUSH, we need to
1866          * maintain data consistency either by:
1867          *  1: flushing cache (wbinvd) when the guest is scheduled out if
1868          *     there is no wbinvd exit, or
1869          *  2: execute wbinvd on all dirty pCPUs when guest wbinvd exits.
1870          * If VT-d engine can force snooping, we don't need to do these.
1871          */
1872         if ( has_arch_pdevs(v->domain) && !iommu_snoop
1873                 && !cpu_has_wbinvd_exiting )
1874         {
1875             int cpu = v->arch.hvm.vmx.active_cpu;
1876             if ( cpu != -1 )
1877                 flush_mask(cpumask_of(cpu), FLUSH_CACHE_EVICT);
1878         }
1879 
1880         vmx_clear_vmcs(v);
1881         vmx_load_vmcs(v);
1882         hvm_migrate_timers(v);
1883         hvm_migrate_pirqs(v);
1884         vmx_set_host_env(v);
1885         /*
1886          * Both n1 VMCS and n2 VMCS need to update the host environment after
1887          * VCPU migration. The environment of current VMCS is updated in place,
1888          * but the action of another VMCS is deferred till it is switched in.
1889          */
1890         v->arch.hvm.vmx.hostenv_migrated = 1;
1891 
1892         hvm_asid_flush_vcpu(v);
1893     }
1894 
1895     debug_state = v->domain->debugger_attached
1896                   || v->domain->arch.monitor.software_breakpoint_enabled
1897                   || v->domain->arch.monitor.singlestep_enabled;
1898 
1899     if ( unlikely(v->arch.hvm.debug_state_latch != debug_state) )
1900     {
1901         v->arch.hvm.debug_state_latch = debug_state;
1902         vmx_update_debug_state(v);
1903     }
1904 
1905     hvm_do_resume(v);
1906 
1907     /* Sync host CR4 in case its value has changed. */
1908     __vmread(HOST_CR4, &host_cr4);
1909     if ( host_cr4 != read_cr4() )
1910         __vmwrite(HOST_CR4, read_cr4());
1911 
1912     reset_stack_and_jump(vmx_asm_do_vmentry);
1913 }
1914 
vmr(unsigned long field)1915 static inline unsigned long vmr(unsigned long field)
1916 {
1917     unsigned long val;
1918 
1919     return vmread_safe(field, &val) ? 0 : val;
1920 }
1921 
1922 #define vmr16(fld) ({             \
1923     BUILD_BUG_ON((fld) & 0x6001); \
1924     (uint16_t)vmr(fld);           \
1925 })
1926 
1927 #define vmr32(fld) ({                         \
1928     BUILD_BUG_ON(((fld) & 0x6001) != 0x4000); \
1929     (uint32_t)vmr(fld);                       \
1930 })
1931 
vmx_dump_sel(const char * name,uint32_t selector)1932 static void vmx_dump_sel(const char *name, uint32_t selector)
1933 {
1934     uint32_t sel, attr, limit;
1935     uint64_t base;
1936     sel = vmr(selector);
1937     attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
1938     limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
1939     base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
1940     printk("%s: %04x %05x %08x %016"PRIx64"\n", name, sel, attr, limit, base);
1941 }
1942 
vmx_dump_sel2(const char * name,uint32_t lim)1943 static void vmx_dump_sel2(const char *name, uint32_t lim)
1944 {
1945     uint32_t limit;
1946     uint64_t base;
1947     limit = vmr(lim);
1948     base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
1949     printk("%s:            %08x %016"PRIx64"\n", name, limit, base);
1950 }
1951 
vmcs_dump_vcpu(struct vcpu * v)1952 void vmcs_dump_vcpu(struct vcpu *v)
1953 {
1954     struct cpu_user_regs *regs = &v->arch.user_regs;
1955     uint32_t vmentry_ctl, vmexit_ctl;
1956     unsigned long cr4;
1957     uint64_t efer;
1958     unsigned int i, n;
1959 
1960     if ( v == current )
1961         regs = guest_cpu_user_regs();
1962 
1963     vmx_vmcs_enter(v);
1964 
1965     vmentry_ctl = vmr32(VM_ENTRY_CONTROLS),
1966     vmexit_ctl = vmr32(VM_EXIT_CONTROLS);
1967     cr4 = vmr(GUEST_CR4);
1968 
1969     /*
1970      * The guests EFER setting comes from the GUEST_EFER VMCS field whenever
1971      * available, or the guest load-only MSR list on Gen1 hardware, the entry
1972      * for which may be elided for performance reasons if identical to Xen's
1973      * setting.
1974      */
1975     if ( cpu_has_vmx_efer )
1976         efer = vmr(GUEST_EFER);
1977     else if ( vmx_read_guest_loadonly_msr(v, MSR_EFER, &efer) )
1978         efer = read_efer();
1979 
1980     printk("*** Guest State ***\n");
1981     printk("CR0: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1982            vmr(GUEST_CR0), vmr(CR0_READ_SHADOW), vmr(CR0_GUEST_HOST_MASK));
1983     printk("CR4: actual=0x%016lx, shadow=0x%016lx, gh_mask=%016lx\n",
1984            cr4, vmr(CR4_READ_SHADOW), vmr(CR4_GUEST_HOST_MASK));
1985     printk("CR3 = 0x%016lx\n", vmr(GUEST_CR3));
1986     if ( (v->arch.hvm.vmx.secondary_exec_control &
1987           SECONDARY_EXEC_ENABLE_EPT) &&
1988          (cr4 & X86_CR4_PAE) && !(vmentry_ctl & VM_ENTRY_IA32E_MODE) )
1989     {
1990         printk("PDPTE0 = 0x%016lx  PDPTE1 = 0x%016lx\n",
1991                vmr(GUEST_PDPTE(0)), vmr(GUEST_PDPTE(1)));
1992         printk("PDPTE2 = 0x%016lx  PDPTE3 = 0x%016lx\n",
1993                vmr(GUEST_PDPTE(2)), vmr(GUEST_PDPTE(3)));
1994     }
1995     printk("RSP = 0x%016lx (0x%016lx)  RIP = 0x%016lx (0x%016lx)\n",
1996            vmr(GUEST_RSP), regs->rsp,
1997            vmr(GUEST_RIP), regs->rip);
1998     printk("RFLAGS=0x%08lx (0x%08lx)  DR7 = 0x%016lx\n",
1999            vmr(GUEST_RFLAGS), regs->rflags,
2000            vmr(GUEST_DR7));
2001     printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2002            vmr(GUEST_SYSENTER_ESP),
2003            vmr32(GUEST_SYSENTER_CS), vmr(GUEST_SYSENTER_EIP));
2004     printk("       sel  attr  limit   base\n");
2005     vmx_dump_sel("  CS", GUEST_CS_SELECTOR);
2006     vmx_dump_sel("  DS", GUEST_DS_SELECTOR);
2007     vmx_dump_sel("  SS", GUEST_SS_SELECTOR);
2008     vmx_dump_sel("  ES", GUEST_ES_SELECTOR);
2009     vmx_dump_sel("  FS", GUEST_FS_SELECTOR);
2010     vmx_dump_sel("  GS", GUEST_GS_SELECTOR);
2011     vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
2012     vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
2013     vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
2014     vmx_dump_sel("  TR", GUEST_TR_SELECTOR);
2015     printk("EFER(%s) = 0x%016lx  PAT = 0x%016lx\n",
2016            cpu_has_vmx_efer ? "VMCS" : "MSR LL", efer, vmr(GUEST_PAT));
2017     printk("PreemptionTimer = 0x%08x  SM Base = 0x%08x\n",
2018            vmr32(GUEST_PREEMPTION_TIMER), vmr32(GUEST_SMBASE));
2019     printk("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
2020            vmr(GUEST_IA32_DEBUGCTL), vmr(GUEST_PENDING_DBG_EXCEPTIONS));
2021     if ( vmentry_ctl & (VM_ENTRY_LOAD_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_BNDCFGS) )
2022         printk("PerfGlobCtl = 0x%016lx  BndCfgS = 0x%016lx\n",
2023                vmr(GUEST_PERF_GLOBAL_CTRL), vmr(GUEST_BNDCFGS));
2024     printk("Interruptibility = %08x  ActivityState = %08x\n",
2025            vmr32(GUEST_INTERRUPTIBILITY_INFO), vmr32(GUEST_ACTIVITY_STATE));
2026     if ( v->arch.hvm.vmx.secondary_exec_control &
2027          SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY )
2028         printk("InterruptStatus = %04x\n", vmr16(GUEST_INTR_STATUS));
2029     if ( cpu_has_vmx_virt_spec_ctrl )
2030         printk("SPEC_CTRL mask = 0x%016lx  shadow = 0x%016lx\n",
2031                vmr(SPEC_CTRL_MASK), vmr(SPEC_CTRL_SHADOW));
2032 
2033     printk("*** Host State ***\n");
2034     printk("RIP = 0x%016lx (%ps)  RSP = 0x%016lx\n",
2035            vmr(HOST_RIP), (void *)vmr(HOST_RIP), vmr(HOST_RSP));
2036     printk("CS=%04x SS=%04x DS=%04x ES=%04x FS=%04x GS=%04x TR=%04x\n",
2037            vmr16(HOST_CS_SELECTOR), vmr16(HOST_SS_SELECTOR),
2038            vmr16(HOST_DS_SELECTOR), vmr16(HOST_ES_SELECTOR),
2039            vmr16(HOST_FS_SELECTOR), vmr16(HOST_GS_SELECTOR),
2040            vmr16(HOST_TR_SELECTOR));
2041     printk("FSBase=%016lx GSBase=%016lx TRBase=%016lx\n",
2042            vmr(HOST_FS_BASE), vmr(HOST_GS_BASE), vmr(HOST_TR_BASE));
2043     printk("GDTBase=%016lx IDTBase=%016lx\n",
2044            vmr(HOST_GDTR_BASE), vmr(HOST_IDTR_BASE));
2045     printk("CR0=%016lx CR3=%016lx CR4=%016lx\n",
2046            vmr(HOST_CR0), vmr(HOST_CR3), vmr(HOST_CR4));
2047     printk("Sysenter RSP=%016lx CS:RIP=%04x:%016lx\n",
2048            vmr(HOST_SYSENTER_ESP),
2049            vmr32(HOST_SYSENTER_CS), vmr(HOST_SYSENTER_EIP));
2050     if ( vmexit_ctl & (VM_EXIT_LOAD_HOST_PAT | VM_EXIT_LOAD_HOST_EFER) )
2051         printk("EFER = 0x%016lx  PAT = 0x%016lx\n", vmr(HOST_EFER), vmr(HOST_PAT));
2052     if ( vmexit_ctl & VM_EXIT_LOAD_PERF_GLOBAL_CTRL )
2053         printk("PerfGlobCtl = 0x%016lx\n",
2054                vmr(HOST_PERF_GLOBAL_CTRL));
2055 
2056     printk("*** Control State ***\n");
2057     printk("PinBased=%08x CPUBased=%08x\n",
2058            vmr32(PIN_BASED_VM_EXEC_CONTROL),
2059            vmr32(CPU_BASED_VM_EXEC_CONTROL));
2060     printk("SecondaryExec=%08x TertiaryExec=%016lx\n",
2061            vmr32(SECONDARY_VM_EXEC_CONTROL),
2062            vmr(TERTIARY_VM_EXEC_CONTROL));
2063     printk("EntryControls=%08x ExitControls=%08x\n", vmentry_ctl, vmexit_ctl);
2064     printk("ExceptionBitmap=%08x PFECmask=%08x PFECmatch=%08x\n",
2065            vmr32(EXCEPTION_BITMAP),
2066            vmr32(PAGE_FAULT_ERROR_CODE_MASK),
2067            vmr32(PAGE_FAULT_ERROR_CODE_MATCH));
2068     printk("VMEntry: intr_info=%08x errcode=%08x ilen=%08x\n",
2069            vmr32(VM_ENTRY_INTR_INFO),
2070            vmr32(VM_ENTRY_EXCEPTION_ERROR_CODE),
2071            vmr32(VM_ENTRY_INSTRUCTION_LEN));
2072     printk("VMExit: intr_info=%08x errcode=%08x ilen=%08x\n",
2073            vmr32(VM_EXIT_INTR_INFO),
2074            vmr32(VM_EXIT_INTR_ERROR_CODE),
2075            vmr32(VM_EXIT_INSTRUCTION_LEN));
2076     printk("        reason=%08x qualification=%016lx\n",
2077            vmr32(VM_EXIT_REASON), vmr(EXIT_QUALIFICATION));
2078     printk("IDTVectoring: info=%08x errcode=%08x\n",
2079            vmr32(IDT_VECTORING_INFO), vmr32(IDT_VECTORING_ERROR_CODE));
2080     printk("TSC Offset = 0x%016lx  TSC Multiplier = 0x%016lx\n",
2081            vmr(TSC_OFFSET), vmr(TSC_MULTIPLIER));
2082     if ( (v->arch.hvm.vmx.exec_control & CPU_BASED_TPR_SHADOW) ||
2083          (vmx_caps.pin_based_exec_control & PIN_BASED_POSTED_INTERRUPT) )
2084         printk("TPR Threshold = 0x%02x  PostedIntrVec = 0x%02x\n",
2085                vmr32(TPR_THRESHOLD), vmr16(POSTED_INTR_NOTIFICATION_VECTOR));
2086     if ( (v->arch.hvm.vmx.secondary_exec_control &
2087           SECONDARY_EXEC_ENABLE_EPT) )
2088         printk("EPT pointer = 0x%016lx  EPTP index = 0x%04x\n",
2089                vmr(EPT_POINTER), vmr16(EPTP_INDEX));
2090     n = vmr32(CR3_TARGET_COUNT);
2091     for ( i = 0; i + 1 < n; i += 2 )
2092         printk("CR3 target%u=%016lx target%u=%016lx\n",
2093                i, vmr(CR3_TARGET_VALUE(i)),
2094                i + 1, vmr(CR3_TARGET_VALUE(i + 1)));
2095     if ( i < n )
2096         printk("CR3 target%u=%016lx\n", i, vmr(CR3_TARGET_VALUE(i)));
2097     if ( v->arch.hvm.vmx.secondary_exec_control &
2098          SECONDARY_EXEC_PAUSE_LOOP_EXITING )
2099         printk("PLE Gap=%08x Window=%08x\n",
2100                vmr32(PLE_GAP), vmr32(PLE_WINDOW));
2101     if ( v->arch.hvm.vmx.secondary_exec_control &
2102          (SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_VM_FUNCTIONS) )
2103         printk("Virtual processor ID = 0x%04x VMfunc controls = %016lx\n",
2104                vmr16(VIRTUAL_PROCESSOR_ID), vmr(VM_FUNCTION_CONTROL));
2105 
2106     vmx_vmcs_exit(v);
2107 }
2108 
vmcs_dump(unsigned char ch)2109 static void cf_check vmcs_dump(unsigned char ch)
2110 {
2111     struct domain *d;
2112     struct vcpu *v;
2113 
2114     printk("*********** VMCS Areas **************\n");
2115 
2116     rcu_read_lock(&domlist_read_lock);
2117 
2118     for_each_domain ( d )
2119     {
2120         if ( !is_hvm_domain(d) )
2121             continue;
2122         printk("\n>>> Domain %d <<<\n", d->domain_id);
2123         for_each_vcpu ( d, v )
2124         {
2125             if ( !v->is_initialised )
2126             {
2127                 printk("\tVCPU %u: not initialized\n", v->vcpu_id);
2128                 continue;
2129             }
2130             printk("\tVCPU %d\n", v->vcpu_id);
2131             vmcs_dump_vcpu(v);
2132 
2133             process_pending_softirqs();
2134         }
2135     }
2136 
2137     rcu_read_unlock(&domlist_read_lock);
2138 
2139     printk("**************************************\n");
2140 }
2141 
vmx_vmcs_init(void)2142 int __init vmx_vmcs_init(void)
2143 {
2144     int ret;
2145 
2146     if ( opt_ept_ad < 0 )
2147         /* Work around Erratum AVR41 on Avoton processors. */
2148         opt_ept_ad = !(boot_cpu_data.x86 == 6 &&
2149                        boot_cpu_data.x86_model == 0x4d);
2150 
2151     ret = _vmx_cpu_up(true);
2152 
2153     if ( !ret )
2154         register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1);
2155     else
2156     {
2157         setup_clear_cpu_cap(X86_FEATURE_VMX);
2158 
2159         /*
2160          * _vmx_vcpu_up() may have made it past feature identification.
2161          * Make sure all dependent features are off as well.
2162          */
2163         memset(&vmx_caps, 0, sizeof(vmx_caps));
2164     }
2165 
2166     return ret;
2167 }
2168 
build_assertions(void)2169 static void __init __maybe_unused build_assertions(void)
2170 {
2171     struct vmx_msr_bitmap bitmap;
2172 
2173     /* Check vmx_msr_bitmap layoug against hardware expectations. */
2174     BUILD_BUG_ON(sizeof(bitmap)            != PAGE_SIZE);
2175     BUILD_BUG_ON(sizeof(bitmap.read_low)   != 1024);
2176     BUILD_BUG_ON(sizeof(bitmap.read_high)  != 1024);
2177     BUILD_BUG_ON(sizeof(bitmap.write_low)  != 1024);
2178     BUILD_BUG_ON(sizeof(bitmap.write_high) != 1024);
2179     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_low)   != 0);
2180     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, read_high)  != 1024);
2181     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_low)  != 2048);
2182     BUILD_BUG_ON(offsetof(struct vmx_msr_bitmap, write_high) != 3072);
2183 }
2184 
2185 /*
2186  * Local variables:
2187  * mode: C
2188  * c-file-style: "BSD"
2189  * c-basic-offset: 4
2190  * tab-width: 4
2191  * indent-tabs-mode: nil
2192  * End:
2193  */
2194