1 /*
2  * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
3  *
4  * Copyright (c) 2007, Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Author: Haitao Shan <haitao.shan@intel.com>
19  */
20 
21 #include <xen/sched.h>
22 #include <xen/xenoprof.h>
23 #include <xen/irq.h>
24 #include <asm/system.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/apic.h>
28 #include <asm/traps.h>
29 #include <asm/msr.h>
30 #include <asm/msr-index.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vlapic.h>
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vmcs.h>
36 #include <public/sched.h>
37 #include <public/hvm/save.h>
38 #include <public/pmu.h>
39 
40 /*
41  * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
42  * instruction.
43  * cpuid 0xa - Architectural Performance Monitoring Leaf
44  * Register eax
45  */
46 #define PMU_VERSION_SHIFT        0  /* Version ID */
47 #define PMU_VERSION_BITS         8  /* 8 bits 0..7 */
48 #define PMU_VERSION_MASK         (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
49 
50 #define PMU_GENERAL_NR_SHIFT     8  /* Number of general pmu registers */
51 #define PMU_GENERAL_NR_BITS      8  /* 8 bits 8..15 */
52 #define PMU_GENERAL_NR_MASK      (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
53 
54 #define PMU_GENERAL_WIDTH_SHIFT 16  /* Width of general pmu registers */
55 #define PMU_GENERAL_WIDTH_BITS   8  /* 8 bits 16..23 */
56 #define PMU_GENERAL_WIDTH_MASK  (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
57 /* Register edx */
58 #define PMU_FIXED_NR_SHIFT       0  /* Number of fixed pmu registers */
59 #define PMU_FIXED_NR_BITS        5  /* 5 bits 0..4 */
60 #define PMU_FIXED_NR_MASK        (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
61 
62 #define PMU_FIXED_WIDTH_SHIFT    5  /* Width of fixed pmu registers */
63 #define PMU_FIXED_WIDTH_BITS     8  /* 8 bits 5..12 */
64 #define PMU_FIXED_WIDTH_MASK     (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
65 
66 /* Alias registers (0x4c1) for full-width writes to PMCs */
67 #define MSR_PMC_ALIAS_MASK       (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
68 static bool_t __read_mostly full_width_write;
69 
70 /*
71  * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
72  * counters. 4 bits for every counter.
73  */
74 #define FIXED_CTR_CTRL_BITS 4
75 #define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
76 #define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
77 
78 #define ARCH_CNTR_ENABLED   (1ULL << 22)
79 #define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
80 
81 /* Number of general-purpose and fixed performance counters */
82 static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
83 
84 /* Masks used for testing whether and MSR is valid */
85 #define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
86 static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
87 static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
88 
89 /* Total size of PMU registers block (copied to/from PV(H) guest) */
90 static unsigned int __read_mostly regs_sz;
91 /* Offset into context of the beginning of PMU register block */
92 static const unsigned int regs_off =
93         sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
94         sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
95 
96 /*
97  * QUIRK to workaround an issue on various family 6 cpus.
98  * The issue leads to endless PMC interrupt loops on the processor.
99  * If the interrupt handler is running and a pmc reaches the value 0, this
100  * value remains forever and it triggers immediately a new interrupt after
101  * finishing the handler.
102  * A workaround is to read all flagged counters and if the value is 0 write
103  * 1 (or another value != 0) into it.
104  * There exist no errata and the real cause of this behaviour is unknown.
105  */
106 bool_t __read_mostly is_pmc_quirk;
107 
check_pmc_quirk(void)108 static void check_pmc_quirk(void)
109 {
110     if ( current_cpu_data.x86 == 6 )
111         is_pmc_quirk = 1;
112     else
113         is_pmc_quirk = 0;
114 }
115 
handle_pmc_quirk(u64 msr_content)116 static void handle_pmc_quirk(u64 msr_content)
117 {
118     int i;
119     u64 val;
120 
121     if ( !is_pmc_quirk )
122         return;
123 
124     val = msr_content;
125     for ( i = 0; i < arch_pmc_cnt; i++ )
126     {
127         if ( val & 0x1 )
128         {
129             u64 cnt;
130             rdmsrl(MSR_P6_PERFCTR(i), cnt);
131             if ( cnt == 0 )
132                 wrmsrl(MSR_P6_PERFCTR(i), 1);
133         }
134         val >>= 1;
135     }
136     val = msr_content >> 32;
137     for ( i = 0; i < fixed_pmc_cnt; i++ )
138     {
139         if ( val & 0x1 )
140         {
141             u64 cnt;
142             rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
143             if ( cnt == 0 )
144                 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
145         }
146         val >>= 1;
147     }
148 }
149 
150 /*
151  * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
152  */
core2_get_arch_pmc_count(void)153 static int core2_get_arch_pmc_count(void)
154 {
155     u32 eax;
156 
157     eax = cpuid_eax(0xa);
158     return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
159 }
160 
161 /*
162  * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
163  */
core2_get_fixed_pmc_count(void)164 static int core2_get_fixed_pmc_count(void)
165 {
166     u32 edx = cpuid_edx(0xa);
167 
168     return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
169 }
170 
171 /* edx bits 5-12: Bit width of fixed-function performance counters  */
core2_get_bitwidth_fix_count(void)172 static int core2_get_bitwidth_fix_count(void)
173 {
174     u32 edx;
175 
176     edx = cpuid_edx(0xa);
177     return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
178 }
179 
is_core2_vpmu_msr(u32 msr_index,int * type,int * index)180 static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
181 {
182     u32 msr_index_pmc;
183 
184     switch ( msr_index )
185     {
186     case MSR_CORE_PERF_FIXED_CTR_CTRL:
187     case MSR_IA32_DS_AREA:
188     case MSR_IA32_PEBS_ENABLE:
189         *type = MSR_TYPE_CTRL;
190         return 1;
191 
192     case MSR_CORE_PERF_GLOBAL_CTRL:
193     case MSR_CORE_PERF_GLOBAL_STATUS:
194     case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
195         *type = MSR_TYPE_GLOBAL;
196         return 1;
197 
198     default:
199 
200         if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
201              (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
202         {
203             *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
204             *type = MSR_TYPE_COUNTER;
205             return 1;
206         }
207 
208         if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
209              (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
210         {
211             *index = msr_index - MSR_P6_EVNTSEL(0);
212             *type = MSR_TYPE_ARCH_CTRL;
213             return 1;
214         }
215 
216         msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
217         if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
218              (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
219         {
220             *type = MSR_TYPE_ARCH_COUNTER;
221             *index = msr_index_pmc - MSR_IA32_PERFCTR0;
222             return 1;
223         }
224         return 0;
225     }
226 }
227 
core2_vpmu_set_msr_bitmap(struct vcpu * v)228 static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
229 {
230     unsigned int i;
231 
232     /* Allow Read/Write PMU Counters MSR Directly. */
233     for ( i = 0; i < fixed_pmc_cnt; i++ )
234         vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
235 
236     for ( i = 0; i < arch_pmc_cnt; i++ )
237     {
238         vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
239 
240         if ( full_width_write )
241             vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
242     }
243 
244     /* Allow Read PMU Non-global Controls Directly. */
245     for ( i = 0; i < arch_pmc_cnt; i++ )
246         vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
247 
248     vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
249     vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
250 }
251 
core2_vpmu_unset_msr_bitmap(struct vcpu * v)252 static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
253 {
254     unsigned int i;
255 
256     for ( i = 0; i < fixed_pmc_cnt; i++ )
257         vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
258 
259     for ( i = 0; i < arch_pmc_cnt; i++ )
260     {
261         vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
262 
263         if ( full_width_write )
264             vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
265     }
266 
267     for ( i = 0; i < arch_pmc_cnt; i++ )
268         vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
269 
270     vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
271     vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
272 }
273 
__core2_vpmu_save(struct vcpu * v)274 static inline void __core2_vpmu_save(struct vcpu *v)
275 {
276     int i;
277     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
278     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
279     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
280         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
281 
282     for ( i = 0; i < fixed_pmc_cnt; i++ )
283         rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
284     for ( i = 0; i < arch_pmc_cnt; i++ )
285         rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
286 
287     if ( !is_hvm_vcpu(v) )
288         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
289 }
290 
core2_vpmu_save(struct vcpu * v,bool_t to_guest)291 static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
292 {
293     struct vpmu_struct *vpmu = vcpu_vpmu(v);
294 
295     if ( !is_hvm_vcpu(v) )
296         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
297 
298     if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
299         return 0;
300 
301     __core2_vpmu_save(v);
302 
303     /* Unset PMU MSR bitmap to trap lazy load. */
304     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
305          cpu_has_vmx_msr_bitmap )
306         core2_vpmu_unset_msr_bitmap(v);
307 
308     if ( to_guest )
309     {
310         ASSERT(!has_vlapic(v->domain));
311         memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
312                vpmu->context + regs_off, regs_sz);
313     }
314 
315     return 1;
316 }
317 
__core2_vpmu_load(struct vcpu * v)318 static inline void __core2_vpmu_load(struct vcpu *v)
319 {
320     unsigned int i, pmc_start;
321     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
322     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
323     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
324         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
325 
326     for ( i = 0; i < fixed_pmc_cnt; i++ )
327         wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
328 
329     if ( full_width_write )
330         pmc_start = MSR_IA32_A_PERFCTR0;
331     else
332         pmc_start = MSR_IA32_PERFCTR0;
333     for ( i = 0; i < arch_pmc_cnt; i++ )
334     {
335         wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
336         wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
337     }
338 
339     wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
340     if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
341         wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
342 
343     if ( !is_hvm_vcpu(v) )
344     {
345         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
346         core2_vpmu_cxt->global_ovf_ctrl = 0;
347         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
348     }
349 }
350 
core2_vpmu_verify(struct vcpu * v)351 static int core2_vpmu_verify(struct vcpu *v)
352 {
353     unsigned int i;
354     struct vpmu_struct *vpmu = vcpu_vpmu(v);
355     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
356     uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
357     struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
358         vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
359     uint64_t fixed_ctrl;
360     uint64_t *priv_context = vpmu->priv_context;
361     uint64_t enabled_cntrs = 0;
362 
363     if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
364         return -EINVAL;
365     if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
366         return -EINVAL;
367     if ( core2_vpmu_cxt->pebs_enable )
368         return -EINVAL;
369 
370     fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
371     if ( fixed_ctrl & fixed_ctrl_mask )
372         return -EINVAL;
373 
374     for ( i = 0; i < fixed_pmc_cnt; i++ )
375     {
376         if ( fixed_counters[i] & fixed_counters_mask )
377             return -EINVAL;
378         if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
379             enabled_cntrs |= (1ULL << i);
380     }
381     enabled_cntrs <<= 32;
382 
383     for ( i = 0; i < arch_pmc_cnt; i++ )
384     {
385         uint64_t control = xen_pmu_cntr_pair[i].control;
386 
387         if ( control & ARCH_CTRL_MASK )
388             return -EINVAL;
389         if ( control & ARCH_CNTR_ENABLED )
390             enabled_cntrs |= (1ULL << i);
391     }
392 
393     if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
394          !(is_hvm_vcpu(v)
395            ? is_canonical_address(core2_vpmu_cxt->ds_area)
396            : __addr_ok(core2_vpmu_cxt->ds_area)) )
397         return -EINVAL;
398 
399     if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
400          (core2_vpmu_cxt->ds_area != 0) )
401         vpmu_set(vpmu, VPMU_RUNNING);
402     else
403         vpmu_reset(vpmu, VPMU_RUNNING);
404 
405     *priv_context = enabled_cntrs;
406 
407     return 0;
408 }
409 
core2_vpmu_load(struct vcpu * v,bool_t from_guest)410 static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
411 {
412     struct vpmu_struct *vpmu = vcpu_vpmu(v);
413 
414     if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
415         return 0;
416 
417     if ( from_guest )
418     {
419         int ret;
420 
421         ASSERT(!has_vlapic(v->domain));
422 
423         memcpy(vpmu->context + regs_off,
424                (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
425                regs_sz);
426 
427         ret = core2_vpmu_verify(v);
428         if ( ret )
429         {
430             /*
431              * Not necessary since we should never load the context until
432              * guest provides valid values. But just to be safe.
433              */
434             memset(vpmu->context + regs_off, 0, regs_sz);
435             return ret;
436         }
437     }
438 
439     vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
440 
441     __core2_vpmu_load(v);
442 
443     return 0;
444 }
445 
core2_vpmu_alloc_resource(struct vcpu * v)446 static int core2_vpmu_alloc_resource(struct vcpu *v)
447 {
448     struct vpmu_struct *vpmu = vcpu_vpmu(v);
449     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
450     uint64_t *p = NULL;
451 
452     if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
453         return 0;
454 
455     if ( is_hvm_vcpu(v) )
456     {
457         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
458         if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
459             goto out_err;
460 
461         if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
462             goto out_err;
463         vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
464     }
465 
466     core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
467                                    sizeof(uint64_t) * fixed_pmc_cnt +
468                                    sizeof(struct xen_pmu_cntr_pair) *
469                                    arch_pmc_cnt);
470     p = xzalloc(uint64_t);
471     if ( !core2_vpmu_cxt || !p )
472         goto out_err;
473 
474     core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
475     core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
476                                     sizeof(uint64_t) * fixed_pmc_cnt;
477 
478     vpmu->context = core2_vpmu_cxt;
479     vpmu->priv_context = p;
480 
481     if ( !has_vlapic(v->domain) )
482     {
483         /* Copy fixed/arch register offsets to shared area */
484         ASSERT(vpmu->xenpmu_data);
485         memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
486     }
487 
488     vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
489 
490     return 1;
491 
492 out_err:
493     release_pmu_ownership(PMU_OWNER_HVM);
494 
495     xfree(core2_vpmu_cxt);
496     xfree(p);
497 
498     printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
499            v->vcpu_id, v->domain->domain_id);
500 
501     return 0;
502 }
503 
core2_vpmu_msr_common_check(u32 msr_index,int * type,int * index)504 static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
505 {
506     struct vpmu_struct *vpmu = vcpu_vpmu(current);
507 
508     if ( !is_core2_vpmu_msr(msr_index, type, index) )
509         return 0;
510 
511     if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
512          !core2_vpmu_alloc_resource(current) )
513         return 0;
514 
515     /* Do the lazy load staff. */
516     if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
517     {
518         __core2_vpmu_load(current);
519         vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
520 
521         if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
522             core2_vpmu_set_msr_bitmap(current);
523     }
524     return 1;
525 }
526 
core2_vpmu_do_wrmsr(unsigned int msr,uint64_t msr_content,uint64_t supported)527 static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
528                                uint64_t supported)
529 {
530     int i, tmp;
531     int type = -1, index = -1;
532     struct vcpu *v = current;
533     struct vpmu_struct *vpmu = vcpu_vpmu(v);
534     struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
535     uint64_t *enabled_cntrs;
536 
537     if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
538     {
539         /* Special handling for BTS */
540         if ( msr == MSR_IA32_DEBUGCTLMSR )
541         {
542             supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
543                          IA32_DEBUGCTLMSR_BTINT;
544 
545             if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
546                 supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
547                              IA32_DEBUGCTLMSR_BTS_OFF_USR;
548             if ( !(msr_content & ~supported) &&
549                  vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
550                 return 0;
551             if ( (msr_content & supported) &&
552                  !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
553                 printk(XENLOG_G_WARNING
554                        "%pv: Debug Store unsupported on this CPU\n",
555                        current);
556         }
557         return -EINVAL;
558     }
559 
560     ASSERT(!supported);
561 
562     if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
563         /* Writing unsupported bits to a fixed counter */
564         return -EINVAL;
565 
566     core2_vpmu_cxt = vpmu->context;
567     enabled_cntrs = vpmu->priv_context;
568     switch ( msr )
569     {
570     case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
571         if ( msr_content & global_ovf_ctrl_mask )
572             return -EINVAL;
573         core2_vpmu_cxt->global_status &= ~msr_content;
574         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
575         return 0;
576     case MSR_CORE_PERF_GLOBAL_STATUS:
577         gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
578                  "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
579         return -EINVAL;
580     case MSR_IA32_PEBS_ENABLE:
581         if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
582                               XENPMU_FEATURE_ARCH_ONLY) )
583             return -EINVAL;
584         if ( msr_content )
585             /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
586             return -EINVAL;
587         return 0;
588     case MSR_IA32_DS_AREA:
589         if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
590             return -EINVAL;
591         if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
592         {
593             if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
594                                   : __addr_ok(msr_content)) )
595             {
596                 gdprintk(XENLOG_WARNING,
597                          "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
598                          msr_content);
599                 return -EINVAL;
600             }
601             core2_vpmu_cxt->ds_area = msr_content;
602             break;
603         }
604         gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
605         return 0;
606     case MSR_CORE_PERF_GLOBAL_CTRL:
607         if ( msr_content & global_ctrl_mask )
608             return -EINVAL;
609         core2_vpmu_cxt->global_ctrl = msr_content;
610         break;
611     case MSR_CORE_PERF_FIXED_CTR_CTRL:
612         if ( msr_content & fixed_ctrl_mask )
613             return -EINVAL;
614 
615         if ( is_hvm_vcpu(v) )
616             vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
617                                &core2_vpmu_cxt->global_ctrl);
618         else
619             rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
620         *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
621         if ( msr_content != 0 )
622         {
623             u64 val = msr_content;
624             for ( i = 0; i < fixed_pmc_cnt; i++ )
625             {
626                 if ( val & 3 )
627                     *enabled_cntrs |= (1ULL << 32) << i;
628                 val >>= FIXED_CTR_CTRL_BITS;
629             }
630         }
631 
632         core2_vpmu_cxt->fixed_ctrl = msr_content;
633         break;
634     default:
635         tmp = msr - MSR_P6_EVNTSEL(0);
636         if ( tmp >= 0 && tmp < arch_pmc_cnt )
637         {
638             bool_t blocked = 0;
639             uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
640             struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
641                 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
642 
643             if ( msr_content & ARCH_CTRL_MASK )
644                 return -EINVAL;
645 
646             /* PMC filters */
647             if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
648                                   XENPMU_FEATURE_ARCH_ONLY) )
649             {
650                 blocked = 1;
651                 switch ( umaskevent )
652                 {
653                 /*
654                  * See the Pre-Defined Architectural Performance Events table
655                  * from the Intel 64 and IA-32 Architectures Software
656                  * Developer's Manual, Volume 3B, System Programming Guide,
657                  * Part 2.
658                  */
659                 case 0x003c:	/* UnHalted Core Cycles */
660                 case 0x013c:	/* UnHalted Reference Cycles */
661                 case 0x00c0:	/* Instructions Retired */
662                     blocked = 0;
663                     break;
664                 }
665             }
666 
667             if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
668             {
669                 /* Additional counters beyond IPC only; blocked already set. */
670                 switch ( umaskevent )
671                 {
672                 case 0x4f2e:	/* Last Level Cache References */
673                 case 0x412e:	/* Last Level Cache Misses */
674                 case 0x00c4:	/* Branch Instructions Retired */
675                 case 0x00c5:	/* All Branch Mispredict Retired */
676                     blocked = 0;
677                     break;
678                }
679             }
680 
681             if ( blocked )
682                 return -EINVAL;
683 
684             if ( is_hvm_vcpu(v) )
685                 vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
686                                    &core2_vpmu_cxt->global_ctrl);
687             else
688                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
689 
690             if ( msr_content & ARCH_CNTR_ENABLED )
691                 *enabled_cntrs |= 1ULL << tmp;
692             else
693                 *enabled_cntrs &= ~(1ULL << tmp);
694 
695             xen_pmu_cntr_pair[tmp].control = msr_content;
696         }
697     }
698 
699     if ( type != MSR_TYPE_GLOBAL )
700         wrmsrl(msr, msr_content);
701     else
702     {
703         if ( is_hvm_vcpu(v) )
704             vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
705         else
706             wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
707     }
708 
709     if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
710          (core2_vpmu_cxt->ds_area != 0) )
711         vpmu_set(vpmu, VPMU_RUNNING);
712     else
713         vpmu_reset(vpmu, VPMU_RUNNING);
714 
715     return 0;
716 }
717 
core2_vpmu_do_rdmsr(unsigned int msr,uint64_t * msr_content)718 static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
719 {
720     int type = -1, index = -1;
721     struct vcpu *v = current;
722     struct vpmu_struct *vpmu = vcpu_vpmu(v);
723     struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
724 
725     if ( core2_vpmu_msr_common_check(msr, &type, &index) )
726     {
727         core2_vpmu_cxt = vpmu->context;
728         switch ( msr )
729         {
730         case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
731             *msr_content = 0;
732             break;
733         case MSR_CORE_PERF_GLOBAL_STATUS:
734             *msr_content = core2_vpmu_cxt->global_status;
735             break;
736         case MSR_CORE_PERF_GLOBAL_CTRL:
737             if ( is_hvm_vcpu(v) )
738                 vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
739             else
740                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
741             break;
742         default:
743             rdmsrl(msr, *msr_content);
744         }
745     }
746     else if ( msr == MSR_IA32_MISC_ENABLE )
747     {
748         /* Extension for BTS */
749         if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
750             *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
751         *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
752     }
753 
754     return 0;
755 }
756 
757 /* Dump vpmu info on console, called in the context of keyhandler 'q'. */
core2_vpmu_dump(const struct vcpu * v)758 static void core2_vpmu_dump(const struct vcpu *v)
759 {
760     const struct vpmu_struct *vpmu = vcpu_vpmu(v);
761     unsigned int i;
762     const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
763     u64 val;
764     uint64_t *fixed_counters;
765     struct xen_pmu_cntr_pair *cntr_pair;
766 
767     if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
768          return;
769 
770     if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
771     {
772         if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
773             printk("    vPMU loaded\n");
774         else
775             printk("    vPMU allocated\n");
776         return;
777     }
778 
779     printk("    vPMU running\n");
780 
781     cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
782     fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
783 
784     /* Print the contents of the counter and its configuration msr. */
785     for ( i = 0; i < arch_pmc_cnt; i++ )
786         printk("      general_%d: 0x%016lx ctrl: 0x%016lx\n",
787             i, cntr_pair[i].counter, cntr_pair[i].control);
788 
789     /*
790      * The configuration of the fixed counter is 4 bits each in the
791      * MSR_CORE_PERF_FIXED_CTR_CTRL.
792      */
793     val = core2_vpmu_cxt->fixed_ctrl;
794     for ( i = 0; i < fixed_pmc_cnt; i++ )
795     {
796         printk("      fixed_%d:   0x%016lx ctrl: %#lx\n",
797                i, fixed_counters[i],
798                val & FIXED_CTR_CTRL_MASK);
799         val >>= FIXED_CTR_CTRL_BITS;
800     }
801 }
802 
core2_vpmu_do_interrupt(struct cpu_user_regs * regs)803 static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
804 {
805     struct vcpu *v = current;
806     u64 msr_content;
807     struct vpmu_struct *vpmu = vcpu_vpmu(v);
808     struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
809 
810     rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
811     if ( msr_content )
812     {
813         if ( is_pmc_quirk )
814             handle_pmc_quirk(msr_content);
815         core2_vpmu_cxt->global_status |= msr_content;
816         msr_content &= ~global_ovf_ctrl_mask;
817         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
818     }
819     else
820     {
821         /* No PMC overflow but perhaps a Trace Message interrupt. */
822         __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
823         if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
824             return 0;
825     }
826 
827     return 1;
828 }
829 
core2_vpmu_destroy(struct vcpu * v)830 static void core2_vpmu_destroy(struct vcpu *v)
831 {
832     struct vpmu_struct *vpmu = vcpu_vpmu(v);
833 
834     xfree(vpmu->context);
835     vpmu->context = NULL;
836     xfree(vpmu->priv_context);
837     vpmu->priv_context = NULL;
838     if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
839         core2_vpmu_unset_msr_bitmap(v);
840     release_pmu_ownership(PMU_OWNER_HVM);
841     vpmu_clear(vpmu);
842 }
843 
844 static const struct arch_vpmu_ops core2_vpmu_ops = {
845     .do_wrmsr = core2_vpmu_do_wrmsr,
846     .do_rdmsr = core2_vpmu_do_rdmsr,
847     .do_interrupt = core2_vpmu_do_interrupt,
848     .arch_vpmu_destroy = core2_vpmu_destroy,
849     .arch_vpmu_save = core2_vpmu_save,
850     .arch_vpmu_load = core2_vpmu_load,
851     .arch_vpmu_dump = core2_vpmu_dump
852 };
853 
vmx_vpmu_initialise(struct vcpu * v)854 int vmx_vpmu_initialise(struct vcpu *v)
855 {
856     struct vpmu_struct *vpmu = vcpu_vpmu(v);
857     u64 msr_content;
858     static bool_t ds_warned;
859 
860     if ( vpmu_mode == XENPMU_MODE_OFF )
861         return 0;
862 
863     if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
864          v->domain->arch.cpuid->basic.pmu_version >= 5 )
865         return -EINVAL;
866 
867     if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
868         return -EINVAL;
869 
870     if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
871         goto func_out;
872     /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
873     while ( boot_cpu_has(X86_FEATURE_DS) )
874     {
875         if ( !boot_cpu_has(X86_FEATURE_DTES64) )
876         {
877             if ( !ds_warned )
878                 printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
879                        " - Debug Store disabled for guests\n");
880             break;
881         }
882         vpmu_set(vpmu, VPMU_CPU_HAS_DS);
883         rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
884         if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
885         {
886             /* If BTS_UNAVAIL is set reset the DS feature. */
887             vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
888             if ( !ds_warned )
889                 printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
890                        " - Debug Store disabled for guests\n");
891             break;
892         }
893 
894         vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
895         if ( !ds_warned )
896         {
897             if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
898                 printk(XENLOG_G_INFO
899                        "vpmu: CPU doesn't support CPL-Qualified BTS\n");
900             printk("******************************************************\n");
901             printk("** WARNING: Emulation of BTS Feature is switched on **\n");
902             printk("** Using this processor feature in a virtualized    **\n");
903             printk("** environment is not 100%% safe.                    **\n");
904             printk("** Setting the DS buffer address with wrong values  **\n");
905             printk("** may lead to hypervisor hangs or crashes.         **\n");
906             printk("** It is NOT recommended for production use!        **\n");
907             printk("******************************************************\n");
908         }
909         break;
910     }
911     ds_warned = 1;
912  func_out:
913 
914     /* PV domains can allocate resources immediately */
915     if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
916         return -EIO;
917 
918     vpmu->arch_vpmu_ops = &core2_vpmu_ops;
919 
920     return 0;
921 }
922 
core2_vpmu_init(void)923 int __init core2_vpmu_init(void)
924 {
925     u64 caps;
926     unsigned int version = 0;
927     unsigned int i;
928 
929     if ( current_cpu_data.cpuid_level >= 0xa )
930         version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
931 
932     switch ( version )
933     {
934     case 4:
935         printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
936                "Emulating version 3\n");
937         /* FALLTHROUGH */
938 
939     case 2:
940     case 3:
941         break;
942 
943     default:
944         printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
945                version);
946         return -EINVAL;
947     }
948 
949     if ( current_cpu_data.x86 != 6 )
950     {
951         printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
952         return -EINVAL;
953     }
954 
955     arch_pmc_cnt = core2_get_arch_pmc_count();
956     fixed_pmc_cnt = core2_get_fixed_pmc_count();
957     rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
958     full_width_write = (caps >> 13) & 1;
959 
960     fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
961     /* mask .AnyThread bits for all fixed counters */
962     for( i = 0; i < fixed_pmc_cnt; i++ )
963        fixed_ctrl_mask |=
964            (FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
965 
966     fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
967     global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
968                          ((1ULL << arch_pmc_cnt) - 1));
969     global_ovf_ctrl_mask = ~(0xC000000000000000 |
970                              (((1ULL << fixed_pmc_cnt) - 1) << 32) |
971                              ((1ULL << arch_pmc_cnt) - 1));
972     if ( version > 2 )
973         /*
974          * Even though we don't support Uncore counters guests should be
975          * able to clear all available overflows.
976          */
977         global_ovf_ctrl_mask &= ~(1ULL << 61);
978 
979     regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
980               sizeof(uint64_t) * fixed_pmc_cnt +
981               sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
982 
983     check_pmc_quirk();
984 
985     if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
986          sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
987     {
988         printk(XENLOG_WARNING
989                "VPMU: Register bank does not fit into VPMU share page\n");
990         arch_pmc_cnt = fixed_pmc_cnt = 0;
991         return -ENOSPC;
992     }
993 
994     return 0;
995 }
996 
997