1 /*
2  * vpmu.c: PMU virtualization for HVM domain.
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Haitao Shan <haitao.shan@intel.com>
19  */
20 #include <xen/sched.h>
21 #include <xen/xenoprof.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/cpu.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/msr.h>
28 #include <asm/nmi.h>
29 #include <asm/p2m.h>
30 #include <asm/vpmu.h>
31 #include <asm/hvm/support.h>
32 #include <asm/hvm/vmx/vmx.h>
33 #include <asm/hvm/vmx/vmcs.h>
34 #include <asm/hvm/svm/svm.h>
35 #include <asm/hvm/svm/vmcb.h>
36 #include <asm/apic.h>
37 #include <public/pmu.h>
38 #include <xsm/xsm.h>
39 
40 #include <compat/pmu.h>
41 CHECK_pmu_cntr_pair;
42 CHECK_pmu_data;
43 CHECK_pmu_params;
44 
45 /*
46  * "vpmu" :     vpmu generally enabled (all counters)
47  * "vpmu=off"  : vpmu generally disabled
48  * "vpmu=bts"  : vpmu enabled and Intel BTS feature switched on.
49  * "vpmu=ipc"  : vpmu enabled for IPC counters only (most restrictive)
50  * "vpmu=arch" : vpmu enabled for predef arch counters only (restrictive)
51  * flag combinations are allowed, eg, "vpmu=ipc,bts".
52  */
53 static unsigned int __read_mostly opt_vpmu_enabled;
54 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
55 unsigned int __read_mostly vpmu_features = 0;
56 static int parse_vpmu_params(const char *s);
57 custom_param("vpmu", parse_vpmu_params);
58 
59 static DEFINE_SPINLOCK(vpmu_lock);
60 static unsigned vpmu_count;
61 
62 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
63 
parse_vpmu_param(const char * s,unsigned int len)64 static int parse_vpmu_param(const char *s, unsigned int len)
65 {
66     if ( !*s || !len )
67         return 0;
68     if ( !strncmp(s, "bts", len) )
69         vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
70     else if ( !strncmp(s, "ipc", len) )
71         vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
72     else if ( !strncmp(s, "arch", len) )
73         vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
74     else
75         return 1;
76     return 0;
77 }
78 
parse_vpmu_params(const char * s)79 static int __init parse_vpmu_params(const char *s)
80 {
81     const char *sep, *p = s;
82 
83     switch ( parse_bool(s, NULL) )
84     {
85     case 0:
86         break;
87     default:
88         for ( ; ; )
89         {
90             sep = strchr(p, ',');
91             if ( sep == NULL )
92                 sep = strchr(p, 0);
93             if ( parse_vpmu_param(p, sep - p) )
94                 goto error;
95             if ( !*sep )
96                 /* reached end of flags */
97                 break;
98             p = sep + 1;
99         }
100         /* fall through */
101     case 1:
102         /* Default VPMU mode */
103         vpmu_mode = XENPMU_MODE_SELF;
104         opt_vpmu_enabled = 1;
105         break;
106     }
107     return 0;
108 
109  error:
110     printk("VPMU: unknown flags: %s - vpmu disabled!\n", s);
111     return -EINVAL;
112 }
113 
vpmu_lvtpc_update(uint32_t val)114 void vpmu_lvtpc_update(uint32_t val)
115 {
116     struct vpmu_struct *vpmu;
117     struct vcpu *curr = current;
118 
119     if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
120         return;
121 
122     vpmu = vcpu_vpmu(curr);
123 
124     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
125 
126     /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
127     if ( has_vlapic(curr->domain) || !vpmu->xenpmu_data ||
128          !vpmu_is_set(vpmu, VPMU_CACHED) )
129         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
130 }
131 
vpmu_do_msr(unsigned int msr,uint64_t * msr_content,uint64_t supported,bool_t is_write)132 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
133                 uint64_t supported, bool_t is_write)
134 {
135     struct vcpu *curr = current;
136     struct vpmu_struct *vpmu;
137     const struct arch_vpmu_ops *ops;
138     int ret = 0;
139 
140     /*
141      * Hide the PMU MSRs if vpmu is not configured, or the hardware domain is
142      * profiling the whole system.
143      */
144     if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
145          ((vpmu_mode & XENPMU_MODE_ALL) &&
146           !is_hardware_domain(curr->domain)) )
147          goto nop;
148 
149     vpmu = vcpu_vpmu(curr);
150     ops = vpmu->arch_vpmu_ops;
151     if ( !ops )
152         goto nop;
153 
154     if ( is_write && ops->do_wrmsr )
155         ret = ops->do_wrmsr(msr, *msr_content, supported);
156     else if ( !is_write && ops->do_rdmsr )
157         ret = ops->do_rdmsr(msr, msr_content);
158     else
159         goto nop;
160 
161     /*
162      * We may have received a PMU interrupt while handling MSR access
163      * and since do_wr/rdmsr may load VPMU context we should save
164      * (and unload) it again.
165      */
166     if ( !has_vlapic(curr->domain) && vpmu->xenpmu_data &&
167         vpmu_is_set(vpmu, VPMU_CACHED) )
168     {
169         vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
170         ops->arch_vpmu_save(curr, 0);
171         vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
172     }
173 
174     return ret;
175 
176  nop:
177     if ( !is_write && (msr != MSR_IA32_MISC_ENABLE) )
178         *msr_content = 0;
179 
180     return 0;
181 }
182 
choose_hwdom_vcpu(void)183 static inline struct vcpu *choose_hwdom_vcpu(void)
184 {
185     unsigned idx;
186 
187     if ( hardware_domain->max_vcpus == 0 )
188         return NULL;
189 
190     idx = smp_processor_id() % hardware_domain->max_vcpus;
191 
192     return hardware_domain->vcpu[idx];
193 }
194 
vpmu_do_interrupt(struct cpu_user_regs * regs)195 void vpmu_do_interrupt(struct cpu_user_regs *regs)
196 {
197     struct vcpu *sampled = current, *sampling;
198     struct vpmu_struct *vpmu;
199     struct vlapic *vlapic;
200     u32 vlapic_lvtpc;
201 
202     /*
203      * dom0 will handle interrupt for special domains (e.g. idle domain) or,
204      * in XENPMU_MODE_ALL, for everyone.
205      */
206     if ( (vpmu_mode & XENPMU_MODE_ALL) ||
207          (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
208     {
209         sampling = choose_hwdom_vcpu();
210         if ( !sampling )
211             return;
212     }
213     else
214         sampling = sampled;
215 
216     vpmu = vcpu_vpmu(sampling);
217     if ( !vpmu->arch_vpmu_ops )
218         return;
219 
220     /* PV(H) guest */
221     if ( !has_vlapic(sampling->domain) || (vpmu_mode & XENPMU_MODE_ALL) )
222     {
223         const struct cpu_user_regs *cur_regs;
224         uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
225         domid_t domid;
226 
227         if ( !vpmu->xenpmu_data )
228             return;
229 
230         if ( vpmu_is_set(vpmu, VPMU_CACHED) )
231             return;
232 
233         /* PV guest will be reading PMU MSRs from xenpmu_data */
234         vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
235         vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
236         vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
237 
238         if ( is_hvm_vcpu(sampled) )
239             *flags = 0;
240         else
241             *flags = PMU_SAMPLE_PV;
242 
243         if ( sampled == sampling )
244             domid = DOMID_SELF;
245         else
246             domid = sampled->domain->domain_id;
247 
248         /* Store appropriate registers in xenpmu_data */
249         /* FIXME: 32-bit PVH should go here as well */
250         if ( is_pv_32bit_vcpu(sampling) )
251         {
252             /*
253              * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
254              * and therefore we treat it the same way as a non-privileged
255              * PV 32-bit domain.
256              */
257             struct compat_pmu_regs *cmp;
258 
259             cur_regs = guest_cpu_user_regs();
260 
261             cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
262             cmp->ip = cur_regs->rip;
263             cmp->sp = cur_regs->rsp;
264             cmp->flags = cur_regs->rflags;
265             cmp->ss = cur_regs->ss;
266             cmp->cs = cur_regs->cs;
267             if ( (cmp->cs & 3) > 1 )
268                 *flags |= PMU_SAMPLE_USER;
269         }
270         else
271         {
272             struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
273 
274             if ( (vpmu_mode & XENPMU_MODE_SELF) )
275                 cur_regs = guest_cpu_user_regs();
276             else if ( !guest_mode(regs) &&
277                       is_hardware_domain(sampling->domain) )
278             {
279                 cur_regs = regs;
280                 domid = DOMID_XEN;
281             }
282             else
283                 cur_regs = guest_cpu_user_regs();
284 
285             r->ip = cur_regs->rip;
286             r->sp = cur_regs->rsp;
287             r->flags = cur_regs->rflags;
288 
289             if ( !is_hvm_vcpu(sampled) )
290             {
291                 r->ss = cur_regs->ss;
292                 r->cs = cur_regs->cs;
293                 if ( !(sampled->arch.flags & TF_kernel_mode) )
294                     *flags |= PMU_SAMPLE_USER;
295             }
296             else
297             {
298                 struct segment_register seg;
299 
300                 hvm_get_segment_register(sampled, x86_seg_cs, &seg);
301                 r->cs = seg.sel;
302                 hvm_get_segment_register(sampled, x86_seg_ss, &seg);
303                 r->ss = seg.sel;
304                 r->cpl = seg.dpl;
305                 if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
306                     *flags |= PMU_SAMPLE_REAL;
307             }
308         }
309 
310         vpmu->xenpmu_data->domain_id = domid;
311         vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
312         if ( is_hardware_domain(sampling->domain) )
313             vpmu->xenpmu_data->pcpu_id = smp_processor_id();
314         else
315             vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
316 
317         vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
318         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
319         *flags |= PMU_CACHED;
320         vpmu_set(vpmu, VPMU_CACHED);
321 
322         send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
323 
324         return;
325     }
326 
327     /* HVM guests */
328     vlapic = vcpu_vlapic(sampling);
329 
330     /* We don't support (yet) HVM dom0 */
331     ASSERT(sampling == sampled);
332 
333     if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
334          !is_vlapic_lvtpc_enabled(vlapic) )
335         return;
336 
337     vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
338 
339     switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
340     {
341     case APIC_MODE_FIXED:
342         vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
343         break;
344     case APIC_MODE_NMI:
345         sampling->nmi_pending = 1;
346         break;
347     }
348 }
349 
vpmu_save_force(void * arg)350 static void vpmu_save_force(void *arg)
351 {
352     struct vcpu *v = (struct vcpu *)arg;
353     struct vpmu_struct *vpmu = vcpu_vpmu(v);
354 
355     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
356         return;
357 
358     vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
359 
360     if ( vpmu->arch_vpmu_ops )
361         (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
362 
363     vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
364 
365     per_cpu(last_vcpu, smp_processor_id()) = NULL;
366 }
367 
vpmu_save(struct vcpu * v)368 void vpmu_save(struct vcpu *v)
369 {
370     struct vpmu_struct *vpmu = vcpu_vpmu(v);
371     int pcpu = smp_processor_id();
372 
373     if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
374        return;
375 
376     vpmu->last_pcpu = pcpu;
377     per_cpu(last_vcpu, pcpu) = v;
378 
379     if ( vpmu->arch_vpmu_ops )
380         if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
381             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
382 
383     apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
384 }
385 
vpmu_load(struct vcpu * v,bool_t from_guest)386 int vpmu_load(struct vcpu *v, bool_t from_guest)
387 {
388     struct vpmu_struct *vpmu = vcpu_vpmu(v);
389     int pcpu = smp_processor_id();
390     struct vcpu *prev = NULL;
391 
392     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
393         return 0;
394 
395     /* First time this VCPU is running here */
396     if ( vpmu->last_pcpu != pcpu )
397     {
398         /*
399          * Get the context from last pcpu that we ran on. Note that if another
400          * VCPU is running there it must have saved this VPCU's context before
401          * startig to run (see below).
402          * There should be no race since remote pcpu will disable interrupts
403          * before saving the context.
404          */
405         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
406         {
407             on_selected_cpus(cpumask_of(vpmu->last_pcpu),
408                              vpmu_save_force, (void *)v, 1);
409             vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
410         }
411     }
412 
413     /* Prevent forced context save from remote CPU */
414     local_irq_disable();
415 
416     prev = per_cpu(last_vcpu, pcpu);
417 
418     if ( prev != v && prev )
419     {
420         vpmu = vcpu_vpmu(prev);
421 
422         /* Someone ran here before us */
423         vpmu_save_force(prev);
424         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
425 
426         vpmu = vcpu_vpmu(v);
427     }
428 
429     local_irq_enable();
430 
431     /* Only when PMU is counting, we load PMU context immediately. */
432     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
433          (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
434          vpmu_is_set(vpmu, VPMU_CACHED)) )
435         return 0;
436 
437     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
438     {
439         int ret;
440 
441         apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
442         /* Arch code needs to set VPMU_CONTEXT_LOADED */
443         ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
444         if ( ret )
445         {
446             apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
447             return ret;
448         }
449     }
450 
451     return 0;
452 }
453 
vpmu_arch_initialise(struct vcpu * v)454 static int vpmu_arch_initialise(struct vcpu *v)
455 {
456     struct vpmu_struct *vpmu = vcpu_vpmu(v);
457     uint8_t vendor = current_cpu_data.x86_vendor;
458     int ret;
459 
460     BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
461     BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
462     BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
463     BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
464 
465     ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
466 
467     if ( !vpmu_available(v) )
468         return 0;
469 
470     switch ( vendor )
471     {
472     case X86_VENDOR_AMD:
473         ret = svm_vpmu_initialise(v);
474         break;
475 
476     case X86_VENDOR_INTEL:
477         ret = vmx_vpmu_initialise(v);
478         break;
479 
480     default:
481         if ( vpmu_mode != XENPMU_MODE_OFF )
482         {
483             printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
484                    "Disabling VPMU\n", vendor);
485             opt_vpmu_enabled = 0;
486             vpmu_mode = XENPMU_MODE_OFF;
487         }
488         return -EINVAL;
489     }
490 
491     vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
492 
493     if ( ret )
494         printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
495 
496     return ret;
497 }
498 
get_vpmu(struct vcpu * v)499 static void get_vpmu(struct vcpu *v)
500 {
501     spin_lock(&vpmu_lock);
502 
503     /*
504      * Keep count of VPMUs in the system so that we won't try to change
505      * vpmu_mode while a guest might be using one.
506      * vpmu_mode can be safely updated while dom0's VPMUs are active and
507      * so we don't need to include it in the count.
508      */
509     if ( !is_hardware_domain(v->domain) &&
510         (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
511     {
512         vpmu_count++;
513         vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
514     }
515     else if ( is_hardware_domain(v->domain) &&
516               (vpmu_mode != XENPMU_MODE_OFF) )
517         vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
518 
519     spin_unlock(&vpmu_lock);
520 }
521 
put_vpmu(struct vcpu * v)522 static void put_vpmu(struct vcpu *v)
523 {
524     spin_lock(&vpmu_lock);
525 
526     if ( !vpmu_available(v) )
527         goto out;
528 
529     if ( !is_hardware_domain(v->domain) &&
530          (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
531     {
532         vpmu_count--;
533         vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
534     }
535     else if ( is_hardware_domain(v->domain) &&
536               (vpmu_mode != XENPMU_MODE_OFF) )
537         vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
538 
539  out:
540     spin_unlock(&vpmu_lock);
541 }
542 
vpmu_initialise(struct vcpu * v)543 void vpmu_initialise(struct vcpu *v)
544 {
545     get_vpmu(v);
546 
547     /*
548      * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
549      * from pvpmu_init().
550      */
551     if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
552         put_vpmu(v);
553 }
554 
vpmu_clear_last(void * arg)555 static void vpmu_clear_last(void *arg)
556 {
557     if ( this_cpu(last_vcpu) == arg )
558         this_cpu(last_vcpu) = NULL;
559 }
560 
vpmu_arch_destroy(struct vcpu * v)561 static void vpmu_arch_destroy(struct vcpu *v)
562 {
563     struct vpmu_struct *vpmu = vcpu_vpmu(v);
564 
565     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
566         return;
567 
568     /*
569      * Need to clear last_vcpu in case it points to v.
570      * We can check here non-atomically whether it is 'v' since
571      * last_vcpu can never become 'v' again at this point.
572      * We will test it again in vpmu_clear_last() with interrupts
573      * disabled to make sure we don't clear someone else.
574      */
575     if ( cpu_online(vpmu->last_pcpu) &&
576          per_cpu(last_vcpu, vpmu->last_pcpu) == v )
577         on_selected_cpus(cpumask_of(vpmu->last_pcpu),
578                          vpmu_clear_last, v, 1);
579 
580     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
581     {
582         /*
583          * Unload VPMU first if VPMU_CONTEXT_LOADED being set.
584          * This will stop counters.
585          */
586         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
587             on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
588                              vpmu_save_force, v, 1);
589 
590          vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
591     }
592 }
593 
vpmu_destroy(struct vcpu * v)594 void vpmu_destroy(struct vcpu *v)
595 {
596     vpmu_arch_destroy(v);
597 
598     put_vpmu(v);
599 }
600 
pvpmu_init(struct domain * d,xen_pmu_params_t * params)601 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
602 {
603     struct vcpu *v;
604     struct vpmu_struct *vpmu;
605     struct page_info *page;
606     uint64_t gfn = params->val;
607 
608     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
609         return -EINVAL;
610 
611     v = d->vcpu[params->vcpu];
612     vpmu = vcpu_vpmu(v);
613 
614     if ( !vpmu_available(v) )
615         return -ENOENT;
616 
617     page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
618     if ( !page )
619         return -EINVAL;
620 
621     if ( !get_page_type(page, PGT_writable_page) )
622     {
623         put_page(page);
624         return -EINVAL;
625     }
626 
627     spin_lock(&vpmu->vpmu_lock);
628 
629     if ( v->arch.vpmu.xenpmu_data )
630     {
631         spin_unlock(&vpmu->vpmu_lock);
632         put_page_and_type(page);
633         return -EEXIST;
634     }
635 
636     v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
637     if ( !v->arch.vpmu.xenpmu_data )
638     {
639         spin_unlock(&vpmu->vpmu_lock);
640         put_page_and_type(page);
641         return -ENOMEM;
642     }
643 
644     if ( vpmu_arch_initialise(v) )
645         put_vpmu(v);
646 
647     spin_unlock(&vpmu->vpmu_lock);
648 
649     return 0;
650 }
651 
pvpmu_finish(struct domain * d,xen_pmu_params_t * params)652 static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
653 {
654     struct vcpu *v;
655     struct vpmu_struct *vpmu;
656     uint64_t mfn;
657     void *xenpmu_data;
658 
659     if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
660         return;
661 
662     v = d->vcpu[params->vcpu];
663     if ( v != current )
664         vcpu_pause(v);
665 
666     vpmu = vcpu_vpmu(v);
667     spin_lock(&vpmu->vpmu_lock);
668 
669     vpmu_arch_destroy(v);
670     xenpmu_data = vpmu->xenpmu_data;
671     vpmu->xenpmu_data = NULL;
672 
673     spin_unlock(&vpmu->vpmu_lock);
674 
675     if ( xenpmu_data )
676     {
677         mfn = domain_page_map_to_mfn(xenpmu_data);
678         ASSERT(mfn_valid(_mfn(mfn)));
679         unmap_domain_page_global(xenpmu_data);
680         put_page_and_type(mfn_to_page(mfn));
681     }
682 
683     if ( v != current )
684         vcpu_unpause(v);
685 }
686 
687 /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
vpmu_dump(struct vcpu * v)688 void vpmu_dump(struct vcpu *v)
689 {
690     struct vpmu_struct *vpmu = vcpu_vpmu(v);
691 
692     if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
693         vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
694 }
695 
do_xenpmu_op(unsigned int op,XEN_GUEST_HANDLE_PARAM (xen_pmu_params_t)arg)696 long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
697 {
698     int ret;
699     struct vcpu *curr;
700     struct xen_pmu_params pmu_params = {.val = 0};
701     struct xen_pmu_data *xenpmu_data;
702     struct vpmu_struct *vpmu;
703 
704     if ( !opt_vpmu_enabled || has_vlapic(current->domain) )
705         return -EOPNOTSUPP;
706 
707     ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
708     if ( ret )
709         return ret;
710 
711     /* Check major version when parameters are specified */
712     switch ( op )
713     {
714     case XENPMU_mode_set:
715     case XENPMU_feature_set:
716     case XENPMU_init:
717     case XENPMU_finish:
718         if ( copy_from_guest(&pmu_params, arg, 1) )
719             return -EFAULT;
720 
721         if ( pmu_params.version.maj != XENPMU_VER_MAJ )
722             return -EINVAL;
723     }
724 
725     switch ( op )
726     {
727     case XENPMU_mode_set:
728     {
729         if ( (pmu_params.val &
730               ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
731              (hweight64(pmu_params.val) > 1) )
732             return -EINVAL;
733 
734         /* 32-bit dom0 can only sample itself. */
735         if ( is_pv_32bit_vcpu(current) &&
736              (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
737             return -EINVAL;
738 
739         spin_lock(&vpmu_lock);
740 
741         /*
742          * We can always safely switch between XENPMU_MODE_SELF and
743          * XENPMU_MODE_HV while other VPMUs are active.
744          */
745         if ( (vpmu_count == 0) ||
746              ((vpmu_mode ^ pmu_params.val) ==
747               (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
748             vpmu_mode = pmu_params.val;
749         else if ( vpmu_mode != pmu_params.val )
750         {
751             gprintk(XENLOG_WARNING,
752                     "VPMU: Cannot change mode while active VPMUs exist\n");
753             ret = -EBUSY;
754         }
755 
756         spin_unlock(&vpmu_lock);
757 
758         break;
759     }
760 
761     case XENPMU_mode_get:
762         memset(&pmu_params, 0, sizeof(pmu_params));
763         pmu_params.val = vpmu_mode;
764 
765         pmu_params.version.maj = XENPMU_VER_MAJ;
766         pmu_params.version.min = XENPMU_VER_MIN;
767 
768         if ( copy_to_guest(arg, &pmu_params, 1) )
769             ret = -EFAULT;
770 
771         break;
772 
773     case XENPMU_feature_set:
774         if ( pmu_params.val & ~(XENPMU_FEATURE_INTEL_BTS |
775                                 XENPMU_FEATURE_IPC_ONLY |
776                                 XENPMU_FEATURE_ARCH_ONLY))
777             return -EINVAL;
778 
779         spin_lock(&vpmu_lock);
780 
781         if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
782             vpmu_features = pmu_params.val;
783         else
784         {
785             gprintk(XENLOG_WARNING,
786                     "VPMU: Cannot change features while active VPMUs exist\n");
787             ret = -EBUSY;
788         }
789 
790         spin_unlock(&vpmu_lock);
791 
792         break;
793 
794     case XENPMU_feature_get:
795         pmu_params.val = vpmu_features;
796         if ( copy_field_to_guest(arg, &pmu_params, val) )
797             ret = -EFAULT;
798 
799         break;
800 
801     case XENPMU_init:
802         ret = pvpmu_init(current->domain, &pmu_params);
803         break;
804 
805     case XENPMU_finish:
806         pvpmu_finish(current->domain, &pmu_params);
807         break;
808 
809     case XENPMU_lvtpc_set:
810         xenpmu_data = current->arch.vpmu.xenpmu_data;
811         if ( xenpmu_data != NULL )
812             vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
813         else
814             ret = -EINVAL;
815         break;
816 
817     case XENPMU_flush:
818         curr = current;
819         vpmu = vcpu_vpmu(curr);
820         xenpmu_data = curr->arch.vpmu.xenpmu_data;
821         if ( xenpmu_data == NULL )
822             return -EINVAL;
823         xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
824         vpmu_reset(vpmu, VPMU_CACHED);
825         vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
826         if ( vpmu_load(curr, 1) )
827         {
828             xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
829             vpmu_set(vpmu, VPMU_CACHED);
830             ret = -EIO;
831         }
832         break ;
833 
834     default:
835         ret = -EINVAL;
836     }
837 
838     return ret;
839 }
840 
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)841 static int cpu_callback(
842     struct notifier_block *nfb, unsigned long action, void *hcpu)
843 {
844     unsigned int cpu = (unsigned long)hcpu;
845     struct vcpu *vcpu = per_cpu(last_vcpu, cpu);
846     struct vpmu_struct *vpmu;
847 
848     if ( !vcpu )
849         return NOTIFY_DONE;
850 
851     vpmu = vcpu_vpmu(vcpu);
852     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
853         return NOTIFY_DONE;
854 
855     if ( action == CPU_DYING )
856     {
857         vpmu_save_force(vcpu);
858         vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
859     }
860 
861     return NOTIFY_DONE;
862 }
863 
864 static struct notifier_block cpu_nfb = {
865     .notifier_call = cpu_callback
866 };
867 
vpmu_init(void)868 static int __init vpmu_init(void)
869 {
870     int vendor = current_cpu_data.x86_vendor;
871 
872     if ( !opt_vpmu_enabled )
873     {
874         printk(XENLOG_INFO "VPMU: disabled\n");
875         return 0;
876     }
877 
878     /* NMI watchdog uses LVTPC and HW counter */
879     if ( opt_watchdog && opt_vpmu_enabled )
880     {
881         printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
882         opt_vpmu_enabled = 0;
883         vpmu_mode = XENPMU_MODE_OFF;
884         return 0;
885     }
886 
887     switch ( vendor )
888     {
889     case X86_VENDOR_AMD:
890         if ( amd_vpmu_init() )
891            vpmu_mode = XENPMU_MODE_OFF;
892         break;
893     case X86_VENDOR_INTEL:
894         if ( core2_vpmu_init() )
895            vpmu_mode = XENPMU_MODE_OFF;
896         break;
897     default:
898         printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
899                "Turning VPMU off.\n", vendor);
900         vpmu_mode = XENPMU_MODE_OFF;
901         break;
902     }
903 
904     if ( vpmu_mode != XENPMU_MODE_OFF )
905     {
906         register_cpu_notifier(&cpu_nfb);
907         printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
908                __stringify(XENPMU_VER_MIN) "\n");
909     }
910     else
911         opt_vpmu_enabled = 0;
912 
913     return 0;
914 }
915 __initcall(vpmu_init);
916