1 /*
2 * vpmu.c: PMU virtualization for HVM domain.
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Haitao Shan <haitao.shan@intel.com>
19 */
20 #include <xen/sched.h>
21 #include <xen/xenoprof.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/cpu.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/msr.h>
28 #include <asm/nmi.h>
29 #include <asm/p2m.h>
30 #include <asm/vpmu.h>
31 #include <asm/hvm/support.h>
32 #include <asm/hvm/vmx/vmx.h>
33 #include <asm/hvm/vmx/vmcs.h>
34 #include <asm/hvm/svm/svm.h>
35 #include <asm/hvm/svm/vmcb.h>
36 #include <asm/apic.h>
37 #include <public/pmu.h>
38 #include <xsm/xsm.h>
39
40 #include <compat/pmu.h>
41 CHECK_pmu_cntr_pair;
42 CHECK_pmu_data;
43 CHECK_pmu_params;
44
45 /*
46 * "vpmu" : vpmu generally enabled (all counters)
47 * "vpmu=off" : vpmu generally disabled
48 * "vpmu=bts" : vpmu enabled and Intel BTS feature switched on.
49 * "vpmu=ipc" : vpmu enabled for IPC counters only (most restrictive)
50 * "vpmu=arch" : vpmu enabled for predef arch counters only (restrictive)
51 * flag combinations are allowed, eg, "vpmu=ipc,bts".
52 */
53 static unsigned int __read_mostly opt_vpmu_enabled;
54 unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF;
55 unsigned int __read_mostly vpmu_features = 0;
56 static int parse_vpmu_params(const char *s);
57 custom_param("vpmu", parse_vpmu_params);
58
59 static DEFINE_SPINLOCK(vpmu_lock);
60 static unsigned vpmu_count;
61
62 static DEFINE_PER_CPU(struct vcpu *, last_vcpu);
63
parse_vpmu_param(const char * s,unsigned int len)64 static int parse_vpmu_param(const char *s, unsigned int len)
65 {
66 if ( !*s || !len )
67 return 0;
68 if ( !strncmp(s, "bts", len) )
69 vpmu_features |= XENPMU_FEATURE_INTEL_BTS;
70 else if ( !strncmp(s, "ipc", len) )
71 vpmu_features |= XENPMU_FEATURE_IPC_ONLY;
72 else if ( !strncmp(s, "arch", len) )
73 vpmu_features |= XENPMU_FEATURE_ARCH_ONLY;
74 else
75 return 1;
76 return 0;
77 }
78
parse_vpmu_params(const char * s)79 static int __init parse_vpmu_params(const char *s)
80 {
81 const char *sep, *p = s;
82
83 switch ( parse_bool(s, NULL) )
84 {
85 case 0:
86 break;
87 default:
88 for ( ; ; )
89 {
90 sep = strchr(p, ',');
91 if ( sep == NULL )
92 sep = strchr(p, 0);
93 if ( parse_vpmu_param(p, sep - p) )
94 goto error;
95 if ( !*sep )
96 /* reached end of flags */
97 break;
98 p = sep + 1;
99 }
100 /* fall through */
101 case 1:
102 /* Default VPMU mode */
103 vpmu_mode = XENPMU_MODE_SELF;
104 opt_vpmu_enabled = 1;
105 break;
106 }
107 return 0;
108
109 error:
110 printk("VPMU: unknown flags: %s - vpmu disabled!\n", s);
111 return -EINVAL;
112 }
113
vpmu_lvtpc_update(uint32_t val)114 void vpmu_lvtpc_update(uint32_t val)
115 {
116 struct vpmu_struct *vpmu;
117 struct vcpu *curr = current;
118
119 if ( likely(vpmu_mode == XENPMU_MODE_OFF) )
120 return;
121
122 vpmu = vcpu_vpmu(curr);
123
124 vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | (val & APIC_LVT_MASKED);
125
126 /* Postpone APIC updates for PV(H) guests if PMU interrupt is pending */
127 if ( has_vlapic(curr->domain) || !vpmu->xenpmu_data ||
128 !vpmu_is_set(vpmu, VPMU_CACHED) )
129 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
130 }
131
vpmu_do_msr(unsigned int msr,uint64_t * msr_content,uint64_t supported,bool_t is_write)132 int vpmu_do_msr(unsigned int msr, uint64_t *msr_content,
133 uint64_t supported, bool_t is_write)
134 {
135 struct vcpu *curr = current;
136 struct vpmu_struct *vpmu;
137 const struct arch_vpmu_ops *ops;
138 int ret = 0;
139
140 /*
141 * Hide the PMU MSRs if vpmu is not configured, or the hardware domain is
142 * profiling the whole system.
143 */
144 if ( likely(vpmu_mode == XENPMU_MODE_OFF) ||
145 ((vpmu_mode & XENPMU_MODE_ALL) &&
146 !is_hardware_domain(curr->domain)) )
147 goto nop;
148
149 vpmu = vcpu_vpmu(curr);
150 ops = vpmu->arch_vpmu_ops;
151 if ( !ops )
152 goto nop;
153
154 if ( is_write && ops->do_wrmsr )
155 ret = ops->do_wrmsr(msr, *msr_content, supported);
156 else if ( !is_write && ops->do_rdmsr )
157 ret = ops->do_rdmsr(msr, msr_content);
158 else
159 goto nop;
160
161 /*
162 * We may have received a PMU interrupt while handling MSR access
163 * and since do_wr/rdmsr may load VPMU context we should save
164 * (and unload) it again.
165 */
166 if ( !has_vlapic(curr->domain) && vpmu->xenpmu_data &&
167 vpmu_is_set(vpmu, VPMU_CACHED) )
168 {
169 vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
170 ops->arch_vpmu_save(curr, 0);
171 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
172 }
173
174 return ret;
175
176 nop:
177 if ( !is_write && (msr != MSR_IA32_MISC_ENABLE) )
178 *msr_content = 0;
179
180 return 0;
181 }
182
choose_hwdom_vcpu(void)183 static inline struct vcpu *choose_hwdom_vcpu(void)
184 {
185 unsigned idx;
186
187 if ( hardware_domain->max_vcpus == 0 )
188 return NULL;
189
190 idx = smp_processor_id() % hardware_domain->max_vcpus;
191
192 return hardware_domain->vcpu[idx];
193 }
194
vpmu_do_interrupt(struct cpu_user_regs * regs)195 void vpmu_do_interrupt(struct cpu_user_regs *regs)
196 {
197 struct vcpu *sampled = current, *sampling;
198 struct vpmu_struct *vpmu;
199 struct vlapic *vlapic;
200 u32 vlapic_lvtpc;
201
202 /*
203 * dom0 will handle interrupt for special domains (e.g. idle domain) or,
204 * in XENPMU_MODE_ALL, for everyone.
205 */
206 if ( (vpmu_mode & XENPMU_MODE_ALL) ||
207 (sampled->domain->domain_id >= DOMID_FIRST_RESERVED) )
208 {
209 sampling = choose_hwdom_vcpu();
210 if ( !sampling )
211 return;
212 }
213 else
214 sampling = sampled;
215
216 vpmu = vcpu_vpmu(sampling);
217 if ( !vpmu->arch_vpmu_ops )
218 return;
219
220 /* PV(H) guest */
221 if ( !has_vlapic(sampling->domain) || (vpmu_mode & XENPMU_MODE_ALL) )
222 {
223 const struct cpu_user_regs *cur_regs;
224 uint64_t *flags = &vpmu->xenpmu_data->pmu.pmu_flags;
225 domid_t domid;
226
227 if ( !vpmu->xenpmu_data )
228 return;
229
230 if ( vpmu_is_set(vpmu, VPMU_CACHED) )
231 return;
232
233 /* PV guest will be reading PMU MSRs from xenpmu_data */
234 vpmu_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
235 vpmu->arch_vpmu_ops->arch_vpmu_save(sampling, 1);
236 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED);
237
238 if ( is_hvm_vcpu(sampled) )
239 *flags = 0;
240 else
241 *flags = PMU_SAMPLE_PV;
242
243 if ( sampled == sampling )
244 domid = DOMID_SELF;
245 else
246 domid = sampled->domain->domain_id;
247
248 /* Store appropriate registers in xenpmu_data */
249 /* FIXME: 32-bit PVH should go here as well */
250 if ( is_pv_32bit_vcpu(sampling) )
251 {
252 /*
253 * 32-bit dom0 cannot process Xen's addresses (which are 64 bit)
254 * and therefore we treat it the same way as a non-privileged
255 * PV 32-bit domain.
256 */
257 struct compat_pmu_regs *cmp;
258
259 cur_regs = guest_cpu_user_regs();
260
261 cmp = (void *)&vpmu->xenpmu_data->pmu.r.regs;
262 cmp->ip = cur_regs->rip;
263 cmp->sp = cur_regs->rsp;
264 cmp->flags = cur_regs->rflags;
265 cmp->ss = cur_regs->ss;
266 cmp->cs = cur_regs->cs;
267 if ( (cmp->cs & 3) > 1 )
268 *flags |= PMU_SAMPLE_USER;
269 }
270 else
271 {
272 struct xen_pmu_regs *r = &vpmu->xenpmu_data->pmu.r.regs;
273
274 if ( (vpmu_mode & XENPMU_MODE_SELF) )
275 cur_regs = guest_cpu_user_regs();
276 else if ( !guest_mode(regs) &&
277 is_hardware_domain(sampling->domain) )
278 {
279 cur_regs = regs;
280 domid = DOMID_XEN;
281 }
282 else
283 cur_regs = guest_cpu_user_regs();
284
285 r->ip = cur_regs->rip;
286 r->sp = cur_regs->rsp;
287 r->flags = cur_regs->rflags;
288
289 if ( !is_hvm_vcpu(sampled) )
290 {
291 r->ss = cur_regs->ss;
292 r->cs = cur_regs->cs;
293 if ( !(sampled->arch.flags & TF_kernel_mode) )
294 *flags |= PMU_SAMPLE_USER;
295 }
296 else
297 {
298 struct segment_register seg;
299
300 hvm_get_segment_register(sampled, x86_seg_cs, &seg);
301 r->cs = seg.sel;
302 hvm_get_segment_register(sampled, x86_seg_ss, &seg);
303 r->ss = seg.sel;
304 r->cpl = seg.dpl;
305 if ( !(sampled->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
306 *flags |= PMU_SAMPLE_REAL;
307 }
308 }
309
310 vpmu->xenpmu_data->domain_id = domid;
311 vpmu->xenpmu_data->vcpu_id = sampled->vcpu_id;
312 if ( is_hardware_domain(sampling->domain) )
313 vpmu->xenpmu_data->pcpu_id = smp_processor_id();
314 else
315 vpmu->xenpmu_data->pcpu_id = sampled->vcpu_id;
316
317 vpmu->hw_lapic_lvtpc |= APIC_LVT_MASKED;
318 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
319 *flags |= PMU_CACHED;
320 vpmu_set(vpmu, VPMU_CACHED);
321
322 send_guest_vcpu_virq(sampling, VIRQ_XENPMU);
323
324 return;
325 }
326
327 /* HVM guests */
328 vlapic = vcpu_vlapic(sampling);
329
330 /* We don't support (yet) HVM dom0 */
331 ASSERT(sampling == sampled);
332
333 if ( !vpmu->arch_vpmu_ops->do_interrupt(regs) ||
334 !is_vlapic_lvtpc_enabled(vlapic) )
335 return;
336
337 vlapic_lvtpc = vlapic_get_reg(vlapic, APIC_LVTPC);
338
339 switch ( GET_APIC_DELIVERY_MODE(vlapic_lvtpc) )
340 {
341 case APIC_MODE_FIXED:
342 vlapic_set_irq(vlapic, vlapic_lvtpc & APIC_VECTOR_MASK, 0);
343 break;
344 case APIC_MODE_NMI:
345 sampling->nmi_pending = 1;
346 break;
347 }
348 }
349
vpmu_save_force(void * arg)350 static void vpmu_save_force(void *arg)
351 {
352 struct vcpu *v = (struct vcpu *)arg;
353 struct vpmu_struct *vpmu = vcpu_vpmu(v);
354
355 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
356 return;
357
358 vpmu_set(vpmu, VPMU_CONTEXT_SAVE);
359
360 if ( vpmu->arch_vpmu_ops )
361 (void)vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0);
362
363 vpmu_reset(vpmu, VPMU_CONTEXT_SAVE);
364
365 per_cpu(last_vcpu, smp_processor_id()) = NULL;
366 }
367
vpmu_save(struct vcpu * v)368 void vpmu_save(struct vcpu *v)
369 {
370 struct vpmu_struct *vpmu = vcpu_vpmu(v);
371 int pcpu = smp_processor_id();
372
373 if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_ALLOCATED | VPMU_CONTEXT_LOADED) )
374 return;
375
376 vpmu->last_pcpu = pcpu;
377 per_cpu(last_vcpu, pcpu) = v;
378
379 if ( vpmu->arch_vpmu_ops )
380 if ( vpmu->arch_vpmu_ops->arch_vpmu_save(v, 0) )
381 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
382
383 apic_write(APIC_LVTPC, PMU_APIC_VECTOR | APIC_LVT_MASKED);
384 }
385
vpmu_load(struct vcpu * v,bool_t from_guest)386 int vpmu_load(struct vcpu *v, bool_t from_guest)
387 {
388 struct vpmu_struct *vpmu = vcpu_vpmu(v);
389 int pcpu = smp_processor_id();
390 struct vcpu *prev = NULL;
391
392 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
393 return 0;
394
395 /* First time this VCPU is running here */
396 if ( vpmu->last_pcpu != pcpu )
397 {
398 /*
399 * Get the context from last pcpu that we ran on. Note that if another
400 * VCPU is running there it must have saved this VPCU's context before
401 * startig to run (see below).
402 * There should be no race since remote pcpu will disable interrupts
403 * before saving the context.
404 */
405 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
406 {
407 on_selected_cpus(cpumask_of(vpmu->last_pcpu),
408 vpmu_save_force, (void *)v, 1);
409 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
410 }
411 }
412
413 /* Prevent forced context save from remote CPU */
414 local_irq_disable();
415
416 prev = per_cpu(last_vcpu, pcpu);
417
418 if ( prev != v && prev )
419 {
420 vpmu = vcpu_vpmu(prev);
421
422 /* Someone ran here before us */
423 vpmu_save_force(prev);
424 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
425
426 vpmu = vcpu_vpmu(v);
427 }
428
429 local_irq_enable();
430
431 /* Only when PMU is counting, we load PMU context immediately. */
432 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) ||
433 (!has_vlapic(vpmu_vcpu(vpmu)->domain) &&
434 vpmu_is_set(vpmu, VPMU_CACHED)) )
435 return 0;
436
437 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_load )
438 {
439 int ret;
440
441 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc);
442 /* Arch code needs to set VPMU_CONTEXT_LOADED */
443 ret = vpmu->arch_vpmu_ops->arch_vpmu_load(v, from_guest);
444 if ( ret )
445 {
446 apic_write(APIC_LVTPC, vpmu->hw_lapic_lvtpc | APIC_LVT_MASKED);
447 return ret;
448 }
449 }
450
451 return 0;
452 }
453
vpmu_arch_initialise(struct vcpu * v)454 static int vpmu_arch_initialise(struct vcpu *v)
455 {
456 struct vpmu_struct *vpmu = vcpu_vpmu(v);
457 uint8_t vendor = current_cpu_data.x86_vendor;
458 int ret;
459
460 BUILD_BUG_ON(sizeof(struct xen_pmu_intel_ctxt) > XENPMU_CTXT_PAD_SZ);
461 BUILD_BUG_ON(sizeof(struct xen_pmu_amd_ctxt) > XENPMU_CTXT_PAD_SZ);
462 BUILD_BUG_ON(sizeof(struct xen_pmu_regs) > XENPMU_REGS_PAD_SZ);
463 BUILD_BUG_ON(sizeof(struct compat_pmu_regs) > XENPMU_REGS_PAD_SZ);
464
465 ASSERT(!(vpmu->flags & ~VPMU_AVAILABLE) && !vpmu->context);
466
467 if ( !vpmu_available(v) )
468 return 0;
469
470 switch ( vendor )
471 {
472 case X86_VENDOR_AMD:
473 ret = svm_vpmu_initialise(v);
474 break;
475
476 case X86_VENDOR_INTEL:
477 ret = vmx_vpmu_initialise(v);
478 break;
479
480 default:
481 if ( vpmu_mode != XENPMU_MODE_OFF )
482 {
483 printk(XENLOG_G_WARNING "VPMU: Unknown CPU vendor %d. "
484 "Disabling VPMU\n", vendor);
485 opt_vpmu_enabled = 0;
486 vpmu_mode = XENPMU_MODE_OFF;
487 }
488 return -EINVAL;
489 }
490
491 vpmu->hw_lapic_lvtpc = PMU_APIC_VECTOR | APIC_LVT_MASKED;
492
493 if ( ret )
494 printk(XENLOG_G_WARNING "VPMU: Initialization failed for %pv\n", v);
495
496 return ret;
497 }
498
get_vpmu(struct vcpu * v)499 static void get_vpmu(struct vcpu *v)
500 {
501 spin_lock(&vpmu_lock);
502
503 /*
504 * Keep count of VPMUs in the system so that we won't try to change
505 * vpmu_mode while a guest might be using one.
506 * vpmu_mode can be safely updated while dom0's VPMUs are active and
507 * so we don't need to include it in the count.
508 */
509 if ( !is_hardware_domain(v->domain) &&
510 (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
511 {
512 vpmu_count++;
513 vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
514 }
515 else if ( is_hardware_domain(v->domain) &&
516 (vpmu_mode != XENPMU_MODE_OFF) )
517 vpmu_set(vcpu_vpmu(v), VPMU_AVAILABLE);
518
519 spin_unlock(&vpmu_lock);
520 }
521
put_vpmu(struct vcpu * v)522 static void put_vpmu(struct vcpu *v)
523 {
524 spin_lock(&vpmu_lock);
525
526 if ( !vpmu_available(v) )
527 goto out;
528
529 if ( !is_hardware_domain(v->domain) &&
530 (vpmu_mode & (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
531 {
532 vpmu_count--;
533 vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
534 }
535 else if ( is_hardware_domain(v->domain) &&
536 (vpmu_mode != XENPMU_MODE_OFF) )
537 vpmu_reset(vcpu_vpmu(v), VPMU_AVAILABLE);
538
539 out:
540 spin_unlock(&vpmu_lock);
541 }
542
vpmu_initialise(struct vcpu * v)543 void vpmu_initialise(struct vcpu *v)
544 {
545 get_vpmu(v);
546
547 /*
548 * Guests without LAPIC (i.e. PV) call vpmu_arch_initialise()
549 * from pvpmu_init().
550 */
551 if ( has_vlapic(v->domain) && vpmu_arch_initialise(v) )
552 put_vpmu(v);
553 }
554
vpmu_clear_last(void * arg)555 static void vpmu_clear_last(void *arg)
556 {
557 if ( this_cpu(last_vcpu) == arg )
558 this_cpu(last_vcpu) = NULL;
559 }
560
vpmu_arch_destroy(struct vcpu * v)561 static void vpmu_arch_destroy(struct vcpu *v)
562 {
563 struct vpmu_struct *vpmu = vcpu_vpmu(v);
564
565 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
566 return;
567
568 /*
569 * Need to clear last_vcpu in case it points to v.
570 * We can check here non-atomically whether it is 'v' since
571 * last_vcpu can never become 'v' again at this point.
572 * We will test it again in vpmu_clear_last() with interrupts
573 * disabled to make sure we don't clear someone else.
574 */
575 if ( cpu_online(vpmu->last_pcpu) &&
576 per_cpu(last_vcpu, vpmu->last_pcpu) == v )
577 on_selected_cpus(cpumask_of(vpmu->last_pcpu),
578 vpmu_clear_last, v, 1);
579
580 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_destroy )
581 {
582 /*
583 * Unload VPMU first if VPMU_CONTEXT_LOADED being set.
584 * This will stop counters.
585 */
586 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
587 on_selected_cpus(cpumask_of(vcpu_vpmu(v)->last_pcpu),
588 vpmu_save_force, v, 1);
589
590 vpmu->arch_vpmu_ops->arch_vpmu_destroy(v);
591 }
592 }
593
vpmu_destroy(struct vcpu * v)594 void vpmu_destroy(struct vcpu *v)
595 {
596 vpmu_arch_destroy(v);
597
598 put_vpmu(v);
599 }
600
pvpmu_init(struct domain * d,xen_pmu_params_t * params)601 static int pvpmu_init(struct domain *d, xen_pmu_params_t *params)
602 {
603 struct vcpu *v;
604 struct vpmu_struct *vpmu;
605 struct page_info *page;
606 uint64_t gfn = params->val;
607
608 if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
609 return -EINVAL;
610
611 v = d->vcpu[params->vcpu];
612 vpmu = vcpu_vpmu(v);
613
614 if ( !vpmu_available(v) )
615 return -ENOENT;
616
617 page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
618 if ( !page )
619 return -EINVAL;
620
621 if ( !get_page_type(page, PGT_writable_page) )
622 {
623 put_page(page);
624 return -EINVAL;
625 }
626
627 spin_lock(&vpmu->vpmu_lock);
628
629 if ( v->arch.vpmu.xenpmu_data )
630 {
631 spin_unlock(&vpmu->vpmu_lock);
632 put_page_and_type(page);
633 return -EEXIST;
634 }
635
636 v->arch.vpmu.xenpmu_data = __map_domain_page_global(page);
637 if ( !v->arch.vpmu.xenpmu_data )
638 {
639 spin_unlock(&vpmu->vpmu_lock);
640 put_page_and_type(page);
641 return -ENOMEM;
642 }
643
644 if ( vpmu_arch_initialise(v) )
645 put_vpmu(v);
646
647 spin_unlock(&vpmu->vpmu_lock);
648
649 return 0;
650 }
651
pvpmu_finish(struct domain * d,xen_pmu_params_t * params)652 static void pvpmu_finish(struct domain *d, xen_pmu_params_t *params)
653 {
654 struct vcpu *v;
655 struct vpmu_struct *vpmu;
656 uint64_t mfn;
657 void *xenpmu_data;
658
659 if ( (params->vcpu >= d->max_vcpus) || (d->vcpu[params->vcpu] == NULL) )
660 return;
661
662 v = d->vcpu[params->vcpu];
663 if ( v != current )
664 vcpu_pause(v);
665
666 vpmu = vcpu_vpmu(v);
667 spin_lock(&vpmu->vpmu_lock);
668
669 vpmu_arch_destroy(v);
670 xenpmu_data = vpmu->xenpmu_data;
671 vpmu->xenpmu_data = NULL;
672
673 spin_unlock(&vpmu->vpmu_lock);
674
675 if ( xenpmu_data )
676 {
677 mfn = domain_page_map_to_mfn(xenpmu_data);
678 ASSERT(mfn_valid(_mfn(mfn)));
679 unmap_domain_page_global(xenpmu_data);
680 put_page_and_type(mfn_to_page(mfn));
681 }
682
683 if ( v != current )
684 vcpu_unpause(v);
685 }
686
687 /* Dump some vpmu informations on console. Used in keyhandler dump_domains(). */
vpmu_dump(struct vcpu * v)688 void vpmu_dump(struct vcpu *v)
689 {
690 struct vpmu_struct *vpmu = vcpu_vpmu(v);
691
692 if ( vpmu->arch_vpmu_ops && vpmu->arch_vpmu_ops->arch_vpmu_dump )
693 vpmu->arch_vpmu_ops->arch_vpmu_dump(v);
694 }
695
do_xenpmu_op(unsigned int op,XEN_GUEST_HANDLE_PARAM (xen_pmu_params_t)arg)696 long do_xenpmu_op(unsigned int op, XEN_GUEST_HANDLE_PARAM(xen_pmu_params_t) arg)
697 {
698 int ret;
699 struct vcpu *curr;
700 struct xen_pmu_params pmu_params = {.val = 0};
701 struct xen_pmu_data *xenpmu_data;
702 struct vpmu_struct *vpmu;
703
704 if ( !opt_vpmu_enabled || has_vlapic(current->domain) )
705 return -EOPNOTSUPP;
706
707 ret = xsm_pmu_op(XSM_OTHER, current->domain, op);
708 if ( ret )
709 return ret;
710
711 /* Check major version when parameters are specified */
712 switch ( op )
713 {
714 case XENPMU_mode_set:
715 case XENPMU_feature_set:
716 case XENPMU_init:
717 case XENPMU_finish:
718 if ( copy_from_guest(&pmu_params, arg, 1) )
719 return -EFAULT;
720
721 if ( pmu_params.version.maj != XENPMU_VER_MAJ )
722 return -EINVAL;
723 }
724
725 switch ( op )
726 {
727 case XENPMU_mode_set:
728 {
729 if ( (pmu_params.val &
730 ~(XENPMU_MODE_SELF | XENPMU_MODE_HV | XENPMU_MODE_ALL)) ||
731 (hweight64(pmu_params.val) > 1) )
732 return -EINVAL;
733
734 /* 32-bit dom0 can only sample itself. */
735 if ( is_pv_32bit_vcpu(current) &&
736 (pmu_params.val & (XENPMU_MODE_HV | XENPMU_MODE_ALL)) )
737 return -EINVAL;
738
739 spin_lock(&vpmu_lock);
740
741 /*
742 * We can always safely switch between XENPMU_MODE_SELF and
743 * XENPMU_MODE_HV while other VPMUs are active.
744 */
745 if ( (vpmu_count == 0) ||
746 ((vpmu_mode ^ pmu_params.val) ==
747 (XENPMU_MODE_SELF | XENPMU_MODE_HV)) )
748 vpmu_mode = pmu_params.val;
749 else if ( vpmu_mode != pmu_params.val )
750 {
751 gprintk(XENLOG_WARNING,
752 "VPMU: Cannot change mode while active VPMUs exist\n");
753 ret = -EBUSY;
754 }
755
756 spin_unlock(&vpmu_lock);
757
758 break;
759 }
760
761 case XENPMU_mode_get:
762 memset(&pmu_params, 0, sizeof(pmu_params));
763 pmu_params.val = vpmu_mode;
764
765 pmu_params.version.maj = XENPMU_VER_MAJ;
766 pmu_params.version.min = XENPMU_VER_MIN;
767
768 if ( copy_to_guest(arg, &pmu_params, 1) )
769 ret = -EFAULT;
770
771 break;
772
773 case XENPMU_feature_set:
774 if ( pmu_params.val & ~(XENPMU_FEATURE_INTEL_BTS |
775 XENPMU_FEATURE_IPC_ONLY |
776 XENPMU_FEATURE_ARCH_ONLY))
777 return -EINVAL;
778
779 spin_lock(&vpmu_lock);
780
781 if ( (vpmu_count == 0) || (vpmu_features == pmu_params.val) )
782 vpmu_features = pmu_params.val;
783 else
784 {
785 gprintk(XENLOG_WARNING,
786 "VPMU: Cannot change features while active VPMUs exist\n");
787 ret = -EBUSY;
788 }
789
790 spin_unlock(&vpmu_lock);
791
792 break;
793
794 case XENPMU_feature_get:
795 pmu_params.val = vpmu_features;
796 if ( copy_field_to_guest(arg, &pmu_params, val) )
797 ret = -EFAULT;
798
799 break;
800
801 case XENPMU_init:
802 ret = pvpmu_init(current->domain, &pmu_params);
803 break;
804
805 case XENPMU_finish:
806 pvpmu_finish(current->domain, &pmu_params);
807 break;
808
809 case XENPMU_lvtpc_set:
810 xenpmu_data = current->arch.vpmu.xenpmu_data;
811 if ( xenpmu_data != NULL )
812 vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
813 else
814 ret = -EINVAL;
815 break;
816
817 case XENPMU_flush:
818 curr = current;
819 vpmu = vcpu_vpmu(curr);
820 xenpmu_data = curr->arch.vpmu.xenpmu_data;
821 if ( xenpmu_data == NULL )
822 return -EINVAL;
823 xenpmu_data->pmu.pmu_flags &= ~PMU_CACHED;
824 vpmu_reset(vpmu, VPMU_CACHED);
825 vpmu_lvtpc_update(xenpmu_data->pmu.l.lapic_lvtpc);
826 if ( vpmu_load(curr, 1) )
827 {
828 xenpmu_data->pmu.pmu_flags |= PMU_CACHED;
829 vpmu_set(vpmu, VPMU_CACHED);
830 ret = -EIO;
831 }
832 break ;
833
834 default:
835 ret = -EINVAL;
836 }
837
838 return ret;
839 }
840
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)841 static int cpu_callback(
842 struct notifier_block *nfb, unsigned long action, void *hcpu)
843 {
844 unsigned int cpu = (unsigned long)hcpu;
845 struct vcpu *vcpu = per_cpu(last_vcpu, cpu);
846 struct vpmu_struct *vpmu;
847
848 if ( !vcpu )
849 return NOTIFY_DONE;
850
851 vpmu = vcpu_vpmu(vcpu);
852 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
853 return NOTIFY_DONE;
854
855 if ( action == CPU_DYING )
856 {
857 vpmu_save_force(vcpu);
858 vpmu_reset(vpmu, VPMU_CONTEXT_LOADED);
859 }
860
861 return NOTIFY_DONE;
862 }
863
864 static struct notifier_block cpu_nfb = {
865 .notifier_call = cpu_callback
866 };
867
vpmu_init(void)868 static int __init vpmu_init(void)
869 {
870 int vendor = current_cpu_data.x86_vendor;
871
872 if ( !opt_vpmu_enabled )
873 {
874 printk(XENLOG_INFO "VPMU: disabled\n");
875 return 0;
876 }
877
878 /* NMI watchdog uses LVTPC and HW counter */
879 if ( opt_watchdog && opt_vpmu_enabled )
880 {
881 printk(XENLOG_WARNING "NMI watchdog is enabled. Turning VPMU off.\n");
882 opt_vpmu_enabled = 0;
883 vpmu_mode = XENPMU_MODE_OFF;
884 return 0;
885 }
886
887 switch ( vendor )
888 {
889 case X86_VENDOR_AMD:
890 if ( amd_vpmu_init() )
891 vpmu_mode = XENPMU_MODE_OFF;
892 break;
893 case X86_VENDOR_INTEL:
894 if ( core2_vpmu_init() )
895 vpmu_mode = XENPMU_MODE_OFF;
896 break;
897 default:
898 printk(XENLOG_WARNING "VPMU: Unknown CPU vendor: %d. "
899 "Turning VPMU off.\n", vendor);
900 vpmu_mode = XENPMU_MODE_OFF;
901 break;
902 }
903
904 if ( vpmu_mode != XENPMU_MODE_OFF )
905 {
906 register_cpu_notifier(&cpu_nfb);
907 printk(XENLOG_INFO "VPMU: version " __stringify(XENPMU_VER_MAJ) "."
908 __stringify(XENPMU_VER_MIN) "\n");
909 }
910 else
911 opt_vpmu_enabled = 0;
912
913 return 0;
914 }
915 __initcall(vpmu_init);
916