1 /*
2 * vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
3 *
4 * Copyright (c) 2007, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; If not, see <http://www.gnu.org/licenses/>.
17 *
18 * Author: Haitao Shan <haitao.shan@intel.com>
19 */
20
21 #include <xen/sched.h>
22 #include <xen/xenoprof.h>
23 #include <xen/irq.h>
24 #include <asm/system.h>
25 #include <asm/regs.h>
26 #include <asm/types.h>
27 #include <asm/apic.h>
28 #include <asm/traps.h>
29 #include <asm/msr.h>
30 #include <asm/msr-index.h>
31 #include <asm/vpmu.h>
32 #include <asm/hvm/support.h>
33 #include <asm/hvm/vlapic.h>
34 #include <asm/hvm/vmx/vmx.h>
35 #include <asm/hvm/vmx/vmcs.h>
36 #include <public/sched.h>
37 #include <public/hvm/save.h>
38 #include <public/pmu.h>
39
40 /*
41 * See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
42 * instruction.
43 * cpuid 0xa - Architectural Performance Monitoring Leaf
44 * Register eax
45 */
46 #define PMU_VERSION_SHIFT 0 /* Version ID */
47 #define PMU_VERSION_BITS 8 /* 8 bits 0..7 */
48 #define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
49
50 #define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */
51 #define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */
52 #define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
53
54 #define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */
55 #define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */
56 #define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
57 /* Register edx */
58 #define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */
59 #define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */
60 #define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
61
62 #define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */
63 #define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */
64 #define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
65
66 /* Alias registers (0x4c1) for full-width writes to PMCs */
67 #define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
68 static bool_t __read_mostly full_width_write;
69
70 /*
71 * MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
72 * counters. 4 bits for every counter.
73 */
74 #define FIXED_CTR_CTRL_BITS 4
75 #define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
76 #define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
77
78 #define ARCH_CNTR_ENABLED (1ULL << 22)
79 #define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
80
81 /* Number of general-purpose and fixed performance counters */
82 static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
83
84 /* Masks used for testing whether and MSR is valid */
85 #define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
86 static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
87 static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
88
89 /* Total size of PMU registers block (copied to/from PV(H) guest) */
90 static unsigned int __read_mostly regs_sz;
91 /* Offset into context of the beginning of PMU register block */
92 static const unsigned int regs_off =
93 sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
94 sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
95
96 /*
97 * QUIRK to workaround an issue on various family 6 cpus.
98 * The issue leads to endless PMC interrupt loops on the processor.
99 * If the interrupt handler is running and a pmc reaches the value 0, this
100 * value remains forever and it triggers immediately a new interrupt after
101 * finishing the handler.
102 * A workaround is to read all flagged counters and if the value is 0 write
103 * 1 (or another value != 0) into it.
104 * There exist no errata and the real cause of this behaviour is unknown.
105 */
106 bool_t __read_mostly is_pmc_quirk;
107
check_pmc_quirk(void)108 static void check_pmc_quirk(void)
109 {
110 if ( current_cpu_data.x86 == 6 )
111 is_pmc_quirk = 1;
112 else
113 is_pmc_quirk = 0;
114 }
115
handle_pmc_quirk(u64 msr_content)116 static void handle_pmc_quirk(u64 msr_content)
117 {
118 int i;
119 u64 val;
120
121 if ( !is_pmc_quirk )
122 return;
123
124 val = msr_content;
125 for ( i = 0; i < arch_pmc_cnt; i++ )
126 {
127 if ( val & 0x1 )
128 {
129 u64 cnt;
130 rdmsrl(MSR_P6_PERFCTR(i), cnt);
131 if ( cnt == 0 )
132 wrmsrl(MSR_P6_PERFCTR(i), 1);
133 }
134 val >>= 1;
135 }
136 val = msr_content >> 32;
137 for ( i = 0; i < fixed_pmc_cnt; i++ )
138 {
139 if ( val & 0x1 )
140 {
141 u64 cnt;
142 rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
143 if ( cnt == 0 )
144 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
145 }
146 val >>= 1;
147 }
148 }
149
150 /*
151 * Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
152 */
core2_get_arch_pmc_count(void)153 static int core2_get_arch_pmc_count(void)
154 {
155 u32 eax;
156
157 eax = cpuid_eax(0xa);
158 return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
159 }
160
161 /*
162 * Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
163 */
core2_get_fixed_pmc_count(void)164 static int core2_get_fixed_pmc_count(void)
165 {
166 u32 edx = cpuid_edx(0xa);
167
168 return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
169 }
170
171 /* edx bits 5-12: Bit width of fixed-function performance counters */
core2_get_bitwidth_fix_count(void)172 static int core2_get_bitwidth_fix_count(void)
173 {
174 u32 edx;
175
176 edx = cpuid_edx(0xa);
177 return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
178 }
179
is_core2_vpmu_msr(u32 msr_index,int * type,int * index)180 static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
181 {
182 u32 msr_index_pmc;
183
184 switch ( msr_index )
185 {
186 case MSR_CORE_PERF_FIXED_CTR_CTRL:
187 case MSR_IA32_DS_AREA:
188 case MSR_IA32_PEBS_ENABLE:
189 *type = MSR_TYPE_CTRL;
190 return 1;
191
192 case MSR_CORE_PERF_GLOBAL_CTRL:
193 case MSR_CORE_PERF_GLOBAL_STATUS:
194 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
195 *type = MSR_TYPE_GLOBAL;
196 return 1;
197
198 default:
199
200 if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
201 (msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
202 {
203 *index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
204 *type = MSR_TYPE_COUNTER;
205 return 1;
206 }
207
208 if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
209 (msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
210 {
211 *index = msr_index - MSR_P6_EVNTSEL(0);
212 *type = MSR_TYPE_ARCH_CTRL;
213 return 1;
214 }
215
216 msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
217 if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
218 (msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
219 {
220 *type = MSR_TYPE_ARCH_COUNTER;
221 *index = msr_index_pmc - MSR_IA32_PERFCTR0;
222 return 1;
223 }
224 return 0;
225 }
226 }
227
core2_vpmu_set_msr_bitmap(struct vcpu * v)228 static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
229 {
230 unsigned int i;
231
232 /* Allow Read/Write PMU Counters MSR Directly. */
233 for ( i = 0; i < fixed_pmc_cnt; i++ )
234 vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
235
236 for ( i = 0; i < arch_pmc_cnt; i++ )
237 {
238 vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
239
240 if ( full_width_write )
241 vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
242 }
243
244 /* Allow Read PMU Non-global Controls Directly. */
245 for ( i = 0; i < arch_pmc_cnt; i++ )
246 vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
247
248 vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
249 vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
250 }
251
core2_vpmu_unset_msr_bitmap(struct vcpu * v)252 static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
253 {
254 unsigned int i;
255
256 for ( i = 0; i < fixed_pmc_cnt; i++ )
257 vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
258
259 for ( i = 0; i < arch_pmc_cnt; i++ )
260 {
261 vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
262
263 if ( full_width_write )
264 vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
265 }
266
267 for ( i = 0; i < arch_pmc_cnt; i++ )
268 vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
269
270 vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
271 vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
272 }
273
__core2_vpmu_save(struct vcpu * v)274 static inline void __core2_vpmu_save(struct vcpu *v)
275 {
276 int i;
277 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
278 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
279 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
280 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
281
282 for ( i = 0; i < fixed_pmc_cnt; i++ )
283 rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
284 for ( i = 0; i < arch_pmc_cnt; i++ )
285 rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
286
287 if ( !is_hvm_vcpu(v) )
288 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
289 }
290
core2_vpmu_save(struct vcpu * v,bool_t to_guest)291 static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
292 {
293 struct vpmu_struct *vpmu = vcpu_vpmu(v);
294
295 if ( !is_hvm_vcpu(v) )
296 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
297
298 if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
299 return 0;
300
301 __core2_vpmu_save(v);
302
303 /* Unset PMU MSR bitmap to trap lazy load. */
304 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
305 cpu_has_vmx_msr_bitmap )
306 core2_vpmu_unset_msr_bitmap(v);
307
308 if ( to_guest )
309 {
310 ASSERT(!has_vlapic(v->domain));
311 memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
312 vpmu->context + regs_off, regs_sz);
313 }
314
315 return 1;
316 }
317
__core2_vpmu_load(struct vcpu * v)318 static inline void __core2_vpmu_load(struct vcpu *v)
319 {
320 unsigned int i, pmc_start;
321 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
322 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
323 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
324 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
325
326 for ( i = 0; i < fixed_pmc_cnt; i++ )
327 wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
328
329 if ( full_width_write )
330 pmc_start = MSR_IA32_A_PERFCTR0;
331 else
332 pmc_start = MSR_IA32_PERFCTR0;
333 for ( i = 0; i < arch_pmc_cnt; i++ )
334 {
335 wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
336 wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
337 }
338
339 wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
340 if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
341 wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
342
343 if ( !is_hvm_vcpu(v) )
344 {
345 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
346 core2_vpmu_cxt->global_ovf_ctrl = 0;
347 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
348 }
349 }
350
core2_vpmu_verify(struct vcpu * v)351 static int core2_vpmu_verify(struct vcpu *v)
352 {
353 unsigned int i;
354 struct vpmu_struct *vpmu = vcpu_vpmu(v);
355 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
356 uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
357 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
358 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
359 uint64_t fixed_ctrl;
360 uint64_t *priv_context = vpmu->priv_context;
361 uint64_t enabled_cntrs = 0;
362
363 if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
364 return -EINVAL;
365 if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
366 return -EINVAL;
367 if ( core2_vpmu_cxt->pebs_enable )
368 return -EINVAL;
369
370 fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
371 if ( fixed_ctrl & fixed_ctrl_mask )
372 return -EINVAL;
373
374 for ( i = 0; i < fixed_pmc_cnt; i++ )
375 {
376 if ( fixed_counters[i] & fixed_counters_mask )
377 return -EINVAL;
378 if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
379 enabled_cntrs |= (1ULL << i);
380 }
381 enabled_cntrs <<= 32;
382
383 for ( i = 0; i < arch_pmc_cnt; i++ )
384 {
385 uint64_t control = xen_pmu_cntr_pair[i].control;
386
387 if ( control & ARCH_CTRL_MASK )
388 return -EINVAL;
389 if ( control & ARCH_CNTR_ENABLED )
390 enabled_cntrs |= (1ULL << i);
391 }
392
393 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
394 !(is_hvm_vcpu(v)
395 ? is_canonical_address(core2_vpmu_cxt->ds_area)
396 : __addr_ok(core2_vpmu_cxt->ds_area)) )
397 return -EINVAL;
398
399 if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
400 (core2_vpmu_cxt->ds_area != 0) )
401 vpmu_set(vpmu, VPMU_RUNNING);
402 else
403 vpmu_reset(vpmu, VPMU_RUNNING);
404
405 *priv_context = enabled_cntrs;
406
407 return 0;
408 }
409
core2_vpmu_load(struct vcpu * v,bool_t from_guest)410 static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
411 {
412 struct vpmu_struct *vpmu = vcpu_vpmu(v);
413
414 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
415 return 0;
416
417 if ( from_guest )
418 {
419 int ret;
420
421 ASSERT(!has_vlapic(v->domain));
422
423 memcpy(vpmu->context + regs_off,
424 (void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
425 regs_sz);
426
427 ret = core2_vpmu_verify(v);
428 if ( ret )
429 {
430 /*
431 * Not necessary since we should never load the context until
432 * guest provides valid values. But just to be safe.
433 */
434 memset(vpmu->context + regs_off, 0, regs_sz);
435 return ret;
436 }
437 }
438
439 vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
440
441 __core2_vpmu_load(v);
442
443 return 0;
444 }
445
core2_vpmu_alloc_resource(struct vcpu * v)446 static int core2_vpmu_alloc_resource(struct vcpu *v)
447 {
448 struct vpmu_struct *vpmu = vcpu_vpmu(v);
449 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
450 uint64_t *p = NULL;
451
452 if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
453 return 0;
454
455 if ( is_hvm_vcpu(v) )
456 {
457 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
458 if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
459 goto out_err;
460
461 if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
462 goto out_err;
463 vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
464 }
465
466 core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
467 sizeof(uint64_t) * fixed_pmc_cnt +
468 sizeof(struct xen_pmu_cntr_pair) *
469 arch_pmc_cnt);
470 p = xzalloc(uint64_t);
471 if ( !core2_vpmu_cxt || !p )
472 goto out_err;
473
474 core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
475 core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
476 sizeof(uint64_t) * fixed_pmc_cnt;
477
478 vpmu->context = core2_vpmu_cxt;
479 vpmu->priv_context = p;
480
481 if ( !has_vlapic(v->domain) )
482 {
483 /* Copy fixed/arch register offsets to shared area */
484 ASSERT(vpmu->xenpmu_data);
485 memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
486 }
487
488 vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
489
490 return 1;
491
492 out_err:
493 release_pmu_ownership(PMU_OWNER_HVM);
494
495 xfree(core2_vpmu_cxt);
496 xfree(p);
497
498 printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
499 v->vcpu_id, v->domain->domain_id);
500
501 return 0;
502 }
503
core2_vpmu_msr_common_check(u32 msr_index,int * type,int * index)504 static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
505 {
506 struct vpmu_struct *vpmu = vcpu_vpmu(current);
507
508 if ( !is_core2_vpmu_msr(msr_index, type, index) )
509 return 0;
510
511 if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
512 !core2_vpmu_alloc_resource(current) )
513 return 0;
514
515 /* Do the lazy load staff. */
516 if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
517 {
518 __core2_vpmu_load(current);
519 vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
520
521 if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
522 core2_vpmu_set_msr_bitmap(current);
523 }
524 return 1;
525 }
526
core2_vpmu_do_wrmsr(unsigned int msr,uint64_t msr_content,uint64_t supported)527 static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
528 uint64_t supported)
529 {
530 int i, tmp;
531 int type = -1, index = -1;
532 struct vcpu *v = current;
533 struct vpmu_struct *vpmu = vcpu_vpmu(v);
534 struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
535 uint64_t *enabled_cntrs;
536
537 if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
538 {
539 /* Special handling for BTS */
540 if ( msr == MSR_IA32_DEBUGCTLMSR )
541 {
542 supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
543 IA32_DEBUGCTLMSR_BTINT;
544
545 if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
546 supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
547 IA32_DEBUGCTLMSR_BTS_OFF_USR;
548 if ( !(msr_content & ~supported) &&
549 vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
550 return 0;
551 if ( (msr_content & supported) &&
552 !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
553 printk(XENLOG_G_WARNING
554 "%pv: Debug Store unsupported on this CPU\n",
555 current);
556 }
557 return -EINVAL;
558 }
559
560 ASSERT(!supported);
561
562 if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
563 /* Writing unsupported bits to a fixed counter */
564 return -EINVAL;
565
566 core2_vpmu_cxt = vpmu->context;
567 enabled_cntrs = vpmu->priv_context;
568 switch ( msr )
569 {
570 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
571 if ( msr_content & global_ovf_ctrl_mask )
572 return -EINVAL;
573 core2_vpmu_cxt->global_status &= ~msr_content;
574 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
575 return 0;
576 case MSR_CORE_PERF_GLOBAL_STATUS:
577 gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
578 "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
579 return -EINVAL;
580 case MSR_IA32_PEBS_ENABLE:
581 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
582 XENPMU_FEATURE_ARCH_ONLY) )
583 return -EINVAL;
584 if ( msr_content )
585 /* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
586 return -EINVAL;
587 return 0;
588 case MSR_IA32_DS_AREA:
589 if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
590 return -EINVAL;
591 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
592 {
593 if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
594 : __addr_ok(msr_content)) )
595 {
596 gdprintk(XENLOG_WARNING,
597 "Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
598 msr_content);
599 return -EINVAL;
600 }
601 core2_vpmu_cxt->ds_area = msr_content;
602 break;
603 }
604 gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
605 return 0;
606 case MSR_CORE_PERF_GLOBAL_CTRL:
607 if ( msr_content & global_ctrl_mask )
608 return -EINVAL;
609 core2_vpmu_cxt->global_ctrl = msr_content;
610 break;
611 case MSR_CORE_PERF_FIXED_CTR_CTRL:
612 if ( msr_content & fixed_ctrl_mask )
613 return -EINVAL;
614
615 if ( is_hvm_vcpu(v) )
616 vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
617 &core2_vpmu_cxt->global_ctrl);
618 else
619 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
620 *enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
621 if ( msr_content != 0 )
622 {
623 u64 val = msr_content;
624 for ( i = 0; i < fixed_pmc_cnt; i++ )
625 {
626 if ( val & 3 )
627 *enabled_cntrs |= (1ULL << 32) << i;
628 val >>= FIXED_CTR_CTRL_BITS;
629 }
630 }
631
632 core2_vpmu_cxt->fixed_ctrl = msr_content;
633 break;
634 default:
635 tmp = msr - MSR_P6_EVNTSEL(0);
636 if ( tmp >= 0 && tmp < arch_pmc_cnt )
637 {
638 bool_t blocked = 0;
639 uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
640 struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
641 vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
642
643 if ( msr_content & ARCH_CTRL_MASK )
644 return -EINVAL;
645
646 /* PMC filters */
647 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
648 XENPMU_FEATURE_ARCH_ONLY) )
649 {
650 blocked = 1;
651 switch ( umaskevent )
652 {
653 /*
654 * See the Pre-Defined Architectural Performance Events table
655 * from the Intel 64 and IA-32 Architectures Software
656 * Developer's Manual, Volume 3B, System Programming Guide,
657 * Part 2.
658 */
659 case 0x003c: /* UnHalted Core Cycles */
660 case 0x013c: /* UnHalted Reference Cycles */
661 case 0x00c0: /* Instructions Retired */
662 blocked = 0;
663 break;
664 }
665 }
666
667 if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
668 {
669 /* Additional counters beyond IPC only; blocked already set. */
670 switch ( umaskevent )
671 {
672 case 0x4f2e: /* Last Level Cache References */
673 case 0x412e: /* Last Level Cache Misses */
674 case 0x00c4: /* Branch Instructions Retired */
675 case 0x00c5: /* All Branch Mispredict Retired */
676 blocked = 0;
677 break;
678 }
679 }
680
681 if ( blocked )
682 return -EINVAL;
683
684 if ( is_hvm_vcpu(v) )
685 vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
686 &core2_vpmu_cxt->global_ctrl);
687 else
688 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
689
690 if ( msr_content & ARCH_CNTR_ENABLED )
691 *enabled_cntrs |= 1ULL << tmp;
692 else
693 *enabled_cntrs &= ~(1ULL << tmp);
694
695 xen_pmu_cntr_pair[tmp].control = msr_content;
696 }
697 }
698
699 if ( type != MSR_TYPE_GLOBAL )
700 wrmsrl(msr, msr_content);
701 else
702 {
703 if ( is_hvm_vcpu(v) )
704 vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
705 else
706 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
707 }
708
709 if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
710 (core2_vpmu_cxt->ds_area != 0) )
711 vpmu_set(vpmu, VPMU_RUNNING);
712 else
713 vpmu_reset(vpmu, VPMU_RUNNING);
714
715 return 0;
716 }
717
core2_vpmu_do_rdmsr(unsigned int msr,uint64_t * msr_content)718 static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
719 {
720 int type = -1, index = -1;
721 struct vcpu *v = current;
722 struct vpmu_struct *vpmu = vcpu_vpmu(v);
723 struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
724
725 if ( core2_vpmu_msr_common_check(msr, &type, &index) )
726 {
727 core2_vpmu_cxt = vpmu->context;
728 switch ( msr )
729 {
730 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
731 *msr_content = 0;
732 break;
733 case MSR_CORE_PERF_GLOBAL_STATUS:
734 *msr_content = core2_vpmu_cxt->global_status;
735 break;
736 case MSR_CORE_PERF_GLOBAL_CTRL:
737 if ( is_hvm_vcpu(v) )
738 vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
739 else
740 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
741 break;
742 default:
743 rdmsrl(msr, *msr_content);
744 }
745 }
746 else if ( msr == MSR_IA32_MISC_ENABLE )
747 {
748 /* Extension for BTS */
749 if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
750 *msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
751 *msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
752 }
753
754 return 0;
755 }
756
757 /* Dump vpmu info on console, called in the context of keyhandler 'q'. */
core2_vpmu_dump(const struct vcpu * v)758 static void core2_vpmu_dump(const struct vcpu *v)
759 {
760 const struct vpmu_struct *vpmu = vcpu_vpmu(v);
761 unsigned int i;
762 const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
763 u64 val;
764 uint64_t *fixed_counters;
765 struct xen_pmu_cntr_pair *cntr_pair;
766
767 if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
768 return;
769
770 if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
771 {
772 if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
773 printk(" vPMU loaded\n");
774 else
775 printk(" vPMU allocated\n");
776 return;
777 }
778
779 printk(" vPMU running\n");
780
781 cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
782 fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
783
784 /* Print the contents of the counter and its configuration msr. */
785 for ( i = 0; i < arch_pmc_cnt; i++ )
786 printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
787 i, cntr_pair[i].counter, cntr_pair[i].control);
788
789 /*
790 * The configuration of the fixed counter is 4 bits each in the
791 * MSR_CORE_PERF_FIXED_CTR_CTRL.
792 */
793 val = core2_vpmu_cxt->fixed_ctrl;
794 for ( i = 0; i < fixed_pmc_cnt; i++ )
795 {
796 printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
797 i, fixed_counters[i],
798 val & FIXED_CTR_CTRL_MASK);
799 val >>= FIXED_CTR_CTRL_BITS;
800 }
801 }
802
core2_vpmu_do_interrupt(struct cpu_user_regs * regs)803 static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
804 {
805 struct vcpu *v = current;
806 u64 msr_content;
807 struct vpmu_struct *vpmu = vcpu_vpmu(v);
808 struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
809
810 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
811 if ( msr_content )
812 {
813 if ( is_pmc_quirk )
814 handle_pmc_quirk(msr_content);
815 core2_vpmu_cxt->global_status |= msr_content;
816 msr_content &= ~global_ovf_ctrl_mask;
817 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
818 }
819 else
820 {
821 /* No PMC overflow but perhaps a Trace Message interrupt. */
822 __vmread(GUEST_IA32_DEBUGCTL, &msr_content);
823 if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
824 return 0;
825 }
826
827 return 1;
828 }
829
core2_vpmu_destroy(struct vcpu * v)830 static void core2_vpmu_destroy(struct vcpu *v)
831 {
832 struct vpmu_struct *vpmu = vcpu_vpmu(v);
833
834 xfree(vpmu->context);
835 vpmu->context = NULL;
836 xfree(vpmu->priv_context);
837 vpmu->priv_context = NULL;
838 if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
839 core2_vpmu_unset_msr_bitmap(v);
840 release_pmu_ownership(PMU_OWNER_HVM);
841 vpmu_clear(vpmu);
842 }
843
844 static const struct arch_vpmu_ops core2_vpmu_ops = {
845 .do_wrmsr = core2_vpmu_do_wrmsr,
846 .do_rdmsr = core2_vpmu_do_rdmsr,
847 .do_interrupt = core2_vpmu_do_interrupt,
848 .arch_vpmu_destroy = core2_vpmu_destroy,
849 .arch_vpmu_save = core2_vpmu_save,
850 .arch_vpmu_load = core2_vpmu_load,
851 .arch_vpmu_dump = core2_vpmu_dump
852 };
853
vmx_vpmu_initialise(struct vcpu * v)854 int vmx_vpmu_initialise(struct vcpu *v)
855 {
856 struct vpmu_struct *vpmu = vcpu_vpmu(v);
857 u64 msr_content;
858 static bool_t ds_warned;
859
860 if ( vpmu_mode == XENPMU_MODE_OFF )
861 return 0;
862
863 if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
864 v->domain->arch.cpuid->basic.pmu_version >= 5 )
865 return -EINVAL;
866
867 if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
868 return -EINVAL;
869
870 if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
871 goto func_out;
872 /* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
873 while ( boot_cpu_has(X86_FEATURE_DS) )
874 {
875 if ( !boot_cpu_has(X86_FEATURE_DTES64) )
876 {
877 if ( !ds_warned )
878 printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
879 " - Debug Store disabled for guests\n");
880 break;
881 }
882 vpmu_set(vpmu, VPMU_CPU_HAS_DS);
883 rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
884 if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
885 {
886 /* If BTS_UNAVAIL is set reset the DS feature. */
887 vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
888 if ( !ds_warned )
889 printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
890 " - Debug Store disabled for guests\n");
891 break;
892 }
893
894 vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
895 if ( !ds_warned )
896 {
897 if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
898 printk(XENLOG_G_INFO
899 "vpmu: CPU doesn't support CPL-Qualified BTS\n");
900 printk("******************************************************\n");
901 printk("** WARNING: Emulation of BTS Feature is switched on **\n");
902 printk("** Using this processor feature in a virtualized **\n");
903 printk("** environment is not 100%% safe. **\n");
904 printk("** Setting the DS buffer address with wrong values **\n");
905 printk("** may lead to hypervisor hangs or crashes. **\n");
906 printk("** It is NOT recommended for production use! **\n");
907 printk("******************************************************\n");
908 }
909 break;
910 }
911 ds_warned = 1;
912 func_out:
913
914 /* PV domains can allocate resources immediately */
915 if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
916 return -EIO;
917
918 vpmu->arch_vpmu_ops = &core2_vpmu_ops;
919
920 return 0;
921 }
922
core2_vpmu_init(void)923 int __init core2_vpmu_init(void)
924 {
925 u64 caps;
926 unsigned int version = 0;
927 unsigned int i;
928
929 if ( current_cpu_data.cpuid_level >= 0xa )
930 version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
931
932 switch ( version )
933 {
934 case 4:
935 printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
936 "Emulating version 3\n");
937 /* FALLTHROUGH */
938
939 case 2:
940 case 3:
941 break;
942
943 default:
944 printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
945 version);
946 return -EINVAL;
947 }
948
949 if ( current_cpu_data.x86 != 6 )
950 {
951 printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
952 return -EINVAL;
953 }
954
955 arch_pmc_cnt = core2_get_arch_pmc_count();
956 fixed_pmc_cnt = core2_get_fixed_pmc_count();
957 rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
958 full_width_write = (caps >> 13) & 1;
959
960 fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
961 /* mask .AnyThread bits for all fixed counters */
962 for( i = 0; i < fixed_pmc_cnt; i++ )
963 fixed_ctrl_mask |=
964 (FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
965
966 fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
967 global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
968 ((1ULL << arch_pmc_cnt) - 1));
969 global_ovf_ctrl_mask = ~(0xC000000000000000 |
970 (((1ULL << fixed_pmc_cnt) - 1) << 32) |
971 ((1ULL << arch_pmc_cnt) - 1));
972 if ( version > 2 )
973 /*
974 * Even though we don't support Uncore counters guests should be
975 * able to clear all available overflows.
976 */
977 global_ovf_ctrl_mask &= ~(1ULL << 61);
978
979 regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
980 sizeof(uint64_t) * fixed_pmc_cnt +
981 sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
982
983 check_pmc_quirk();
984
985 if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
986 sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
987 {
988 printk(XENLOG_WARNING
989 "VPMU: Register bank does not fit into VPMU share page\n");
990 arch_pmc_cnt = fixed_pmc_cnt = 0;
991 return -ENOSPC;
992 }
993
994 return 0;
995 }
996
997