/*
* vpmu_core2.c: CORE 2 specific PMU virtualization for HVM domain.
*
* Copyright (c) 2007, Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; If not, see .
*
* Author: Haitao Shan
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* See Intel SDM Vol 2a Instruction Set Reference chapter 3 for CPUID
* instruction.
* cpuid 0xa - Architectural Performance Monitoring Leaf
* Register eax
*/
#define PMU_VERSION_SHIFT 0 /* Version ID */
#define PMU_VERSION_BITS 8 /* 8 bits 0..7 */
#define PMU_VERSION_MASK (((1 << PMU_VERSION_BITS) - 1) << PMU_VERSION_SHIFT)
#define PMU_GENERAL_NR_SHIFT 8 /* Number of general pmu registers */
#define PMU_GENERAL_NR_BITS 8 /* 8 bits 8..15 */
#define PMU_GENERAL_NR_MASK (((1 << PMU_GENERAL_NR_BITS) - 1) << PMU_GENERAL_NR_SHIFT)
#define PMU_GENERAL_WIDTH_SHIFT 16 /* Width of general pmu registers */
#define PMU_GENERAL_WIDTH_BITS 8 /* 8 bits 16..23 */
#define PMU_GENERAL_WIDTH_MASK (((1 << PMU_GENERAL_WIDTH_BITS) - 1) << PMU_GENERAL_WIDTH_SHIFT)
/* Register edx */
#define PMU_FIXED_NR_SHIFT 0 /* Number of fixed pmu registers */
#define PMU_FIXED_NR_BITS 5 /* 5 bits 0..4 */
#define PMU_FIXED_NR_MASK (((1 << PMU_FIXED_NR_BITS) -1) << PMU_FIXED_NR_SHIFT)
#define PMU_FIXED_WIDTH_SHIFT 5 /* Width of fixed pmu registers */
#define PMU_FIXED_WIDTH_BITS 8 /* 8 bits 5..12 */
#define PMU_FIXED_WIDTH_MASK (((1 << PMU_FIXED_WIDTH_BITS) -1) << PMU_FIXED_WIDTH_SHIFT)
/* Alias registers (0x4c1) for full-width writes to PMCs */
#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_A_PERFCTR0))
static bool_t __read_mostly full_width_write;
/*
* MSR_CORE_PERF_FIXED_CTR_CTRL contains the configuration of all fixed
* counters. 4 bits for every counter.
*/
#define FIXED_CTR_CTRL_BITS 4
#define FIXED_CTR_CTRL_MASK ((1 << FIXED_CTR_CTRL_BITS) - 1)
#define FIXED_CTR_CTRL_ANYTHREAD_MASK 0x4
#define ARCH_CNTR_ENABLED (1ULL << 22)
#define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
/* Number of general-purpose and fixed performance counters */
static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
/* Masks used for testing whether and MSR is valid */
#define ARCH_CTRL_MASK (~((1ull << 32) - 1) | (1ull << 21) | ARCH_CNTR_PIN_CONTROL)
static uint64_t __read_mostly fixed_ctrl_mask, fixed_counters_mask;
static uint64_t __read_mostly global_ovf_ctrl_mask, global_ctrl_mask;
/* Total size of PMU registers block (copied to/from PV(H) guest) */
static unsigned int __read_mostly regs_sz;
/* Offset into context of the beginning of PMU register block */
static const unsigned int regs_off =
sizeof(((struct xen_pmu_intel_ctxt *)0)->fixed_counters) +
sizeof(((struct xen_pmu_intel_ctxt *)0)->arch_counters);
/*
* QUIRK to workaround an issue on various family 6 cpus.
* The issue leads to endless PMC interrupt loops on the processor.
* If the interrupt handler is running and a pmc reaches the value 0, this
* value remains forever and it triggers immediately a new interrupt after
* finishing the handler.
* A workaround is to read all flagged counters and if the value is 0 write
* 1 (or another value != 0) into it.
* There exist no errata and the real cause of this behaviour is unknown.
*/
bool_t __read_mostly is_pmc_quirk;
static void check_pmc_quirk(void)
{
if ( current_cpu_data.x86 == 6 )
is_pmc_quirk = 1;
else
is_pmc_quirk = 0;
}
static void handle_pmc_quirk(u64 msr_content)
{
int i;
u64 val;
if ( !is_pmc_quirk )
return;
val = msr_content;
for ( i = 0; i < arch_pmc_cnt; i++ )
{
if ( val & 0x1 )
{
u64 cnt;
rdmsrl(MSR_P6_PERFCTR(i), cnt);
if ( cnt == 0 )
wrmsrl(MSR_P6_PERFCTR(i), 1);
}
val >>= 1;
}
val = msr_content >> 32;
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
if ( val & 0x1 )
{
u64 cnt;
rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
if ( cnt == 0 )
wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
}
val >>= 1;
}
}
/*
* Read the number of general counters via CPUID.EAX[0xa].EAX[8..15]
*/
static int core2_get_arch_pmc_count(void)
{
u32 eax;
eax = cpuid_eax(0xa);
return MASK_EXTR(eax, PMU_GENERAL_NR_MASK);
}
/*
* Read the number of fixed counters via CPUID.EDX[0xa].EDX[0..4]
*/
static int core2_get_fixed_pmc_count(void)
{
u32 edx = cpuid_edx(0xa);
return MASK_EXTR(edx, PMU_FIXED_NR_MASK);
}
/* edx bits 5-12: Bit width of fixed-function performance counters */
static int core2_get_bitwidth_fix_count(void)
{
u32 edx;
edx = cpuid_edx(0xa);
return MASK_EXTR(edx, PMU_FIXED_WIDTH_MASK);
}
static int is_core2_vpmu_msr(u32 msr_index, int *type, int *index)
{
u32 msr_index_pmc;
switch ( msr_index )
{
case MSR_CORE_PERF_FIXED_CTR_CTRL:
case MSR_IA32_DS_AREA:
case MSR_IA32_PEBS_ENABLE:
*type = MSR_TYPE_CTRL;
return 1;
case MSR_CORE_PERF_GLOBAL_CTRL:
case MSR_CORE_PERF_GLOBAL_STATUS:
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
*type = MSR_TYPE_GLOBAL;
return 1;
default:
if ( (msr_index >= MSR_CORE_PERF_FIXED_CTR0) &&
(msr_index < MSR_CORE_PERF_FIXED_CTR0 + fixed_pmc_cnt) )
{
*index = msr_index - MSR_CORE_PERF_FIXED_CTR0;
*type = MSR_TYPE_COUNTER;
return 1;
}
if ( (msr_index >= MSR_P6_EVNTSEL(0)) &&
(msr_index < MSR_P6_EVNTSEL(arch_pmc_cnt)) )
{
*index = msr_index - MSR_P6_EVNTSEL(0);
*type = MSR_TYPE_ARCH_CTRL;
return 1;
}
msr_index_pmc = msr_index & MSR_PMC_ALIAS_MASK;
if ( (msr_index_pmc >= MSR_IA32_PERFCTR0) &&
(msr_index_pmc < (MSR_IA32_PERFCTR0 + arch_pmc_cnt)) )
{
*type = MSR_TYPE_ARCH_COUNTER;
*index = msr_index_pmc - MSR_IA32_PERFCTR0;
return 1;
}
return 0;
}
}
static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
{
unsigned int i;
/* Allow Read/Write PMU Counters MSR Directly. */
for ( i = 0; i < fixed_pmc_cnt; i++ )
vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
for ( i = 0; i < arch_pmc_cnt; i++ )
{
vmx_clear_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
if ( full_width_write )
vmx_clear_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
}
/* Allow Read PMU Non-global Controls Directly. */
for ( i = 0; i < arch_pmc_cnt; i++ )
vmx_clear_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
}
static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
{
unsigned int i;
for ( i = 0; i < fixed_pmc_cnt; i++ )
vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR0 + i, VMX_MSR_RW);
for ( i = 0; i < arch_pmc_cnt; i++ )
{
vmx_set_msr_intercept(v, MSR_IA32_PERFCTR0 + i, VMX_MSR_RW);
if ( full_width_write )
vmx_set_msr_intercept(v, MSR_IA32_A_PERFCTR0 + i, VMX_MSR_RW);
}
for ( i = 0; i < arch_pmc_cnt; i++ )
vmx_set_msr_intercept(v, MSR_P6_EVNTSEL(i), VMX_MSR_R);
vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
}
static inline void __core2_vpmu_save(struct vcpu *v)
{
int i;
struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
for ( i = 0; i < fixed_pmc_cnt; i++ )
rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
for ( i = 0; i < arch_pmc_cnt; i++ )
rdmsrl(MSR_IA32_PERFCTR0 + i, xen_pmu_cntr_pair[i].counter);
if ( !is_hvm_vcpu(v) )
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
}
static int core2_vpmu_save(struct vcpu *v, bool_t to_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
if ( !is_hvm_vcpu(v) )
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if ( !vpmu_are_all_set(vpmu, VPMU_CONTEXT_SAVE | VPMU_CONTEXT_LOADED) )
return 0;
__core2_vpmu_save(v);
/* Unset PMU MSR bitmap to trap lazy load. */
if ( !vpmu_is_set(vpmu, VPMU_RUNNING) && is_hvm_vcpu(v) &&
cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v);
if ( to_guest )
{
ASSERT(!has_vlapic(v->domain));
memcpy((void *)(&vpmu->xenpmu_data->pmu.c.intel) + regs_off,
vpmu->context + regs_off, regs_sz);
}
return 1;
}
static inline void __core2_vpmu_load(struct vcpu *v)
{
unsigned int i, pmc_start;
struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
for ( i = 0; i < fixed_pmc_cnt; i++ )
wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, fixed_counters[i]);
if ( full_width_write )
pmc_start = MSR_IA32_A_PERFCTR0;
else
pmc_start = MSR_IA32_PERFCTR0;
for ( i = 0; i < arch_pmc_cnt; i++ )
{
wrmsrl(pmc_start + i, xen_pmu_cntr_pair[i].counter);
wrmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
}
wrmsrl(MSR_CORE_PERF_FIXED_CTR_CTRL, core2_vpmu_cxt->fixed_ctrl);
if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_DS) )
wrmsrl(MSR_IA32_DS_AREA, core2_vpmu_cxt->ds_area);
if ( !is_hvm_vcpu(v) )
{
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, core2_vpmu_cxt->global_ovf_ctrl);
core2_vpmu_cxt->global_ovf_ctrl = 0;
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
}
}
static int core2_vpmu_verify(struct vcpu *v)
{
unsigned int i;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vcpu_vpmu(v)->context;
uint64_t *fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
uint64_t fixed_ctrl;
uint64_t *priv_context = vpmu->priv_context;
uint64_t enabled_cntrs = 0;
if ( core2_vpmu_cxt->global_ovf_ctrl & global_ovf_ctrl_mask )
return -EINVAL;
if ( core2_vpmu_cxt->global_ctrl & global_ctrl_mask )
return -EINVAL;
if ( core2_vpmu_cxt->pebs_enable )
return -EINVAL;
fixed_ctrl = core2_vpmu_cxt->fixed_ctrl;
if ( fixed_ctrl & fixed_ctrl_mask )
return -EINVAL;
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
if ( fixed_counters[i] & fixed_counters_mask )
return -EINVAL;
if ( (fixed_ctrl >> (i * FIXED_CTR_CTRL_BITS)) & 3 )
enabled_cntrs |= (1ULL << i);
}
enabled_cntrs <<= 32;
for ( i = 0; i < arch_pmc_cnt; i++ )
{
uint64_t control = xen_pmu_cntr_pair[i].control;
if ( control & ARCH_CTRL_MASK )
return -EINVAL;
if ( control & ARCH_CNTR_ENABLED )
enabled_cntrs |= (1ULL << i);
}
if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) &&
!(is_hvm_vcpu(v)
? is_canonical_address(core2_vpmu_cxt->ds_area)
: __addr_ok(core2_vpmu_cxt->ds_area)) )
return -EINVAL;
if ( (core2_vpmu_cxt->global_ctrl & enabled_cntrs) ||
(core2_vpmu_cxt->ds_area != 0) )
vpmu_set(vpmu, VPMU_RUNNING);
else
vpmu_reset(vpmu, VPMU_RUNNING);
*priv_context = enabled_cntrs;
return 0;
}
static int core2_vpmu_load(struct vcpu *v, bool_t from_guest)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
return 0;
if ( from_guest )
{
int ret;
ASSERT(!has_vlapic(v->domain));
memcpy(vpmu->context + regs_off,
(void *)&v->arch.vpmu.xenpmu_data->pmu.c.intel + regs_off,
regs_sz);
ret = core2_vpmu_verify(v);
if ( ret )
{
/*
* Not necessary since we should never load the context until
* guest provides valid values. But just to be safe.
*/
memset(vpmu->context + regs_off, 0, regs_sz);
return ret;
}
}
vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
__core2_vpmu_load(v);
return 0;
}
static int core2_vpmu_alloc_resource(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct xen_pmu_intel_ctxt *core2_vpmu_cxt = NULL;
uint64_t *p = NULL;
if ( !acquire_pmu_ownership(PMU_OWNER_HVM) )
return 0;
if ( is_hvm_vcpu(v) )
{
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
goto out_err;
if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
goto out_err;
vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
sizeof(uint64_t) * fixed_pmc_cnt +
sizeof(struct xen_pmu_cntr_pair) *
arch_pmc_cnt);
p = xzalloc(uint64_t);
if ( !core2_vpmu_cxt || !p )
goto out_err;
core2_vpmu_cxt->fixed_counters = sizeof(*core2_vpmu_cxt);
core2_vpmu_cxt->arch_counters = core2_vpmu_cxt->fixed_counters +
sizeof(uint64_t) * fixed_pmc_cnt;
vpmu->context = core2_vpmu_cxt;
vpmu->priv_context = p;
if ( !has_vlapic(v->domain) )
{
/* Copy fixed/arch register offsets to shared area */
ASSERT(vpmu->xenpmu_data);
memcpy(&vpmu->xenpmu_data->pmu.c.intel, core2_vpmu_cxt, regs_off);
}
vpmu_set(vpmu, VPMU_CONTEXT_ALLOCATED);
return 1;
out_err:
release_pmu_ownership(PMU_OWNER_HVM);
xfree(core2_vpmu_cxt);
xfree(p);
printk("Failed to allocate VPMU resources for domain %u vcpu %u\n",
v->vcpu_id, v->domain->domain_id);
return 0;
}
static int core2_vpmu_msr_common_check(u32 msr_index, int *type, int *index)
{
struct vpmu_struct *vpmu = vcpu_vpmu(current);
if ( !is_core2_vpmu_msr(msr_index, type, index) )
return 0;
if ( unlikely(!vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED)) &&
!core2_vpmu_alloc_resource(current) )
return 0;
/* Do the lazy load staff. */
if ( !vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
{
__core2_vpmu_load(current);
vpmu_set(vpmu, VPMU_CONTEXT_LOADED);
if ( is_hvm_vcpu(current) && cpu_has_vmx_msr_bitmap )
core2_vpmu_set_msr_bitmap(current);
}
return 1;
}
static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
uint64_t supported)
{
int i, tmp;
int type = -1, index = -1;
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
uint64_t *enabled_cntrs;
if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
{
/* Special handling for BTS */
if ( msr == MSR_IA32_DEBUGCTLMSR )
{
supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
IA32_DEBUGCTLMSR_BTINT;
if ( cpu_has(¤t_cpu_data, X86_FEATURE_DSCPL) )
supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
IA32_DEBUGCTLMSR_BTS_OFF_USR;
if ( !(msr_content & ~supported) &&
vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
return 0;
if ( (msr_content & supported) &&
!vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
printk(XENLOG_G_WARNING
"%pv: Debug Store unsupported on this CPU\n",
current);
}
return -EINVAL;
}
ASSERT(!supported);
if ( (type == MSR_TYPE_COUNTER) && (msr_content & fixed_counters_mask) )
/* Writing unsupported bits to a fixed counter */
return -EINVAL;
core2_vpmu_cxt = vpmu->context;
enabled_cntrs = vpmu->priv_context;
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
if ( msr_content & global_ovf_ctrl_mask )
return -EINVAL;
core2_vpmu_cxt->global_status &= ~msr_content;
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
return 0;
case MSR_CORE_PERF_GLOBAL_STATUS:
gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
"MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
return -EINVAL;
case MSR_IA32_PEBS_ENABLE:
if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
XENPMU_FEATURE_ARCH_ONLY) )
return -EINVAL;
if ( msr_content )
/* PEBS is reported as unavailable in MSR_IA32_MISC_ENABLE */
return -EINVAL;
return 0;
case MSR_IA32_DS_AREA:
if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
return -EINVAL;
if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_DS) )
{
if ( !(is_hvm_vcpu(v) ? is_canonical_address(msr_content)
: __addr_ok(msr_content)) )
{
gdprintk(XENLOG_WARNING,
"Illegal address for IA32_DS_AREA: %#" PRIx64 "x\n",
msr_content);
return -EINVAL;
}
core2_vpmu_cxt->ds_area = msr_content;
break;
}
gdprintk(XENLOG_WARNING, "Guest setting of DTS is ignored.\n");
return 0;
case MSR_CORE_PERF_GLOBAL_CTRL:
if ( msr_content & global_ctrl_mask )
return -EINVAL;
core2_vpmu_cxt->global_ctrl = msr_content;
break;
case MSR_CORE_PERF_FIXED_CTR_CTRL:
if ( msr_content & fixed_ctrl_mask )
return -EINVAL;
if ( is_hvm_vcpu(v) )
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
&core2_vpmu_cxt->global_ctrl);
else
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
*enabled_cntrs &= ~(((1ULL << fixed_pmc_cnt) - 1) << 32);
if ( msr_content != 0 )
{
u64 val = msr_content;
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
if ( val & 3 )
*enabled_cntrs |= (1ULL << 32) << i;
val >>= FIXED_CTR_CTRL_BITS;
}
}
core2_vpmu_cxt->fixed_ctrl = msr_content;
break;
default:
tmp = msr - MSR_P6_EVNTSEL(0);
if ( tmp >= 0 && tmp < arch_pmc_cnt )
{
bool_t blocked = 0;
uint64_t umaskevent = msr_content & MSR_IA32_CMT_EVTSEL_UE_MASK;
struct xen_pmu_cntr_pair *xen_pmu_cntr_pair =
vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
if ( msr_content & ARCH_CTRL_MASK )
return -EINVAL;
/* PMC filters */
if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
XENPMU_FEATURE_ARCH_ONLY) )
{
blocked = 1;
switch ( umaskevent )
{
/*
* See the Pre-Defined Architectural Performance Events table
* from the Intel 64 and IA-32 Architectures Software
* Developer's Manual, Volume 3B, System Programming Guide,
* Part 2.
*/
case 0x003c: /* UnHalted Core Cycles */
case 0x013c: /* UnHalted Reference Cycles */
case 0x00c0: /* Instructions Retired */
blocked = 0;
break;
}
}
if ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY )
{
/* Additional counters beyond IPC only; blocked already set. */
switch ( umaskevent )
{
case 0x4f2e: /* Last Level Cache References */
case 0x412e: /* Last Level Cache Misses */
case 0x00c4: /* Branch Instructions Retired */
case 0x00c5: /* All Branch Mispredict Retired */
blocked = 0;
break;
}
}
if ( blocked )
return -EINVAL;
if ( is_hvm_vcpu(v) )
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
&core2_vpmu_cxt->global_ctrl);
else
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
if ( msr_content & ARCH_CNTR_ENABLED )
*enabled_cntrs |= 1ULL << tmp;
else
*enabled_cntrs &= ~(1ULL << tmp);
xen_pmu_cntr_pair[tmp].control = msr_content;
}
}
if ( type != MSR_TYPE_GLOBAL )
wrmsrl(msr, msr_content);
else
{
if ( is_hvm_vcpu(v) )
vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
else
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
}
if ( (core2_vpmu_cxt->global_ctrl & *enabled_cntrs) ||
(core2_vpmu_cxt->ds_area != 0) )
vpmu_set(vpmu, VPMU_RUNNING);
else
vpmu_reset(vpmu, VPMU_RUNNING);
return 0;
}
static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
{
int type = -1, index = -1;
struct vcpu *v = current;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct xen_pmu_intel_ctxt *core2_vpmu_cxt;
if ( core2_vpmu_msr_common_check(msr, &type, &index) )
{
core2_vpmu_cxt = vpmu->context;
switch ( msr )
{
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
*msr_content = 0;
break;
case MSR_CORE_PERF_GLOBAL_STATUS:
*msr_content = core2_vpmu_cxt->global_status;
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
if ( is_hvm_vcpu(v) )
vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
else
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
break;
default:
rdmsrl(msr, *msr_content);
}
}
else if ( msr == MSR_IA32_MISC_ENABLE )
{
/* Extension for BTS */
if ( vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
*msr_content &= ~MSR_IA32_MISC_ENABLE_BTS_UNAVAIL;
*msr_content |= MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
}
return 0;
}
/* Dump vpmu info on console, called in the context of keyhandler 'q'. */
static void core2_vpmu_dump(const struct vcpu *v)
{
const struct vpmu_struct *vpmu = vcpu_vpmu(v);
unsigned int i;
const struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
u64 val;
uint64_t *fixed_counters;
struct xen_pmu_cntr_pair *cntr_pair;
if ( !core2_vpmu_cxt || !vpmu_is_set(vpmu, VPMU_CONTEXT_ALLOCATED) )
return;
if ( !vpmu_is_set(vpmu, VPMU_RUNNING) )
{
if ( vpmu_is_set(vpmu, VPMU_CONTEXT_LOADED) )
printk(" vPMU loaded\n");
else
printk(" vPMU allocated\n");
return;
}
printk(" vPMU running\n");
cntr_pair = vpmu_reg_pointer(core2_vpmu_cxt, arch_counters);
fixed_counters = vpmu_reg_pointer(core2_vpmu_cxt, fixed_counters);
/* Print the contents of the counter and its configuration msr. */
for ( i = 0; i < arch_pmc_cnt; i++ )
printk(" general_%d: 0x%016lx ctrl: 0x%016lx\n",
i, cntr_pair[i].counter, cntr_pair[i].control);
/*
* The configuration of the fixed counter is 4 bits each in the
* MSR_CORE_PERF_FIXED_CTR_CTRL.
*/
val = core2_vpmu_cxt->fixed_ctrl;
for ( i = 0; i < fixed_pmc_cnt; i++ )
{
printk(" fixed_%d: 0x%016lx ctrl: %#lx\n",
i, fixed_counters[i],
val & FIXED_CTR_CTRL_MASK);
val >>= FIXED_CTR_CTRL_BITS;
}
}
static int core2_vpmu_do_interrupt(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
u64 msr_content;
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct xen_pmu_intel_ctxt *core2_vpmu_cxt = vpmu->context;
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
if ( msr_content )
{
if ( is_pmc_quirk )
handle_pmc_quirk(msr_content);
core2_vpmu_cxt->global_status |= msr_content;
msr_content &= ~global_ovf_ctrl_mask;
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
}
else
{
/* No PMC overflow but perhaps a Trace Message interrupt. */
__vmread(GUEST_IA32_DEBUGCTL, &msr_content);
if ( !(msr_content & IA32_DEBUGCTLMSR_TR) )
return 0;
}
return 1;
}
static void core2_vpmu_destroy(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
xfree(vpmu->context);
vpmu->context = NULL;
xfree(vpmu->priv_context);
vpmu->priv_context = NULL;
if ( is_hvm_vcpu(v) && cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v);
release_pmu_ownership(PMU_OWNER_HVM);
vpmu_clear(vpmu);
}
static const struct arch_vpmu_ops core2_vpmu_ops = {
.do_wrmsr = core2_vpmu_do_wrmsr,
.do_rdmsr = core2_vpmu_do_rdmsr,
.do_interrupt = core2_vpmu_do_interrupt,
.arch_vpmu_destroy = core2_vpmu_destroy,
.arch_vpmu_save = core2_vpmu_save,
.arch_vpmu_load = core2_vpmu_load,
.arch_vpmu_dump = core2_vpmu_dump
};
int vmx_vpmu_initialise(struct vcpu *v)
{
struct vpmu_struct *vpmu = vcpu_vpmu(v);
u64 msr_content;
static bool_t ds_warned;
if ( vpmu_mode == XENPMU_MODE_OFF )
return 0;
if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
v->domain->arch.cpuid->basic.pmu_version >= 5 )
return -EINVAL;
if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
return -EINVAL;
if ( !(vpmu_features & XENPMU_FEATURE_INTEL_BTS) )
goto func_out;
/* Check the 'Debug Store' feature in the CPUID.EAX[1]:EDX[21] */
while ( boot_cpu_has(X86_FEATURE_DS) )
{
if ( !boot_cpu_has(X86_FEATURE_DTES64) )
{
if ( !ds_warned )
printk(XENLOG_G_WARNING "CPU doesn't support 64-bit DS Area"
" - Debug Store disabled for guests\n");
break;
}
vpmu_set(vpmu, VPMU_CPU_HAS_DS);
rdmsrl(MSR_IA32_MISC_ENABLE, msr_content);
if ( msr_content & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL )
{
/* If BTS_UNAVAIL is set reset the DS feature. */
vpmu_reset(vpmu, VPMU_CPU_HAS_DS);
if ( !ds_warned )
printk(XENLOG_G_WARNING "CPU has set BTS_UNAVAIL"
" - Debug Store disabled for guests\n");
break;
}
vpmu_set(vpmu, VPMU_CPU_HAS_BTS);
if ( !ds_warned )
{
if ( !boot_cpu_has(X86_FEATURE_DSCPL) )
printk(XENLOG_G_INFO
"vpmu: CPU doesn't support CPL-Qualified BTS\n");
printk("******************************************************\n");
printk("** WARNING: Emulation of BTS Feature is switched on **\n");
printk("** Using this processor feature in a virtualized **\n");
printk("** environment is not 100%% safe. **\n");
printk("** Setting the DS buffer address with wrong values **\n");
printk("** may lead to hypervisor hangs or crashes. **\n");
printk("** It is NOT recommended for production use! **\n");
printk("******************************************************\n");
}
break;
}
ds_warned = 1;
func_out:
/* PV domains can allocate resources immediately */
if ( is_pv_vcpu(v) && !core2_vpmu_alloc_resource(v) )
return -EIO;
vpmu->arch_vpmu_ops = &core2_vpmu_ops;
return 0;
}
int __init core2_vpmu_init(void)
{
u64 caps;
unsigned int version = 0;
unsigned int i;
if ( current_cpu_data.cpuid_level >= 0xa )
version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
switch ( version )
{
case 4:
printk(XENLOG_INFO "VPMU: PMU version 4 is not fully supported. "
"Emulating version 3\n");
/* FALLTHROUGH */
case 2:
case 3:
break;
default:
printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
version);
return -EINVAL;
}
if ( current_cpu_data.x86 != 6 )
{
printk(XENLOG_WARNING "VPMU: only family 6 is supported\n");
return -EINVAL;
}
arch_pmc_cnt = core2_get_arch_pmc_count();
fixed_pmc_cnt = core2_get_fixed_pmc_count();
rdmsrl(MSR_IA32_PERF_CAPABILITIES, caps);
full_width_write = (caps >> 13) & 1;
fixed_ctrl_mask = ~((1ull << (fixed_pmc_cnt * FIXED_CTR_CTRL_BITS)) - 1);
/* mask .AnyThread bits for all fixed counters */
for( i = 0; i < fixed_pmc_cnt; i++ )
fixed_ctrl_mask |=
(FIXED_CTR_CTRL_ANYTHREAD_MASK << (FIXED_CTR_CTRL_BITS * i));
fixed_counters_mask = ~((1ull << core2_get_bitwidth_fix_count()) - 1);
global_ctrl_mask = ~((((1ULL << fixed_pmc_cnt) - 1) << 32) |
((1ULL << arch_pmc_cnt) - 1));
global_ovf_ctrl_mask = ~(0xC000000000000000 |
(((1ULL << fixed_pmc_cnt) - 1) << 32) |
((1ULL << arch_pmc_cnt) - 1));
if ( version > 2 )
/*
* Even though we don't support Uncore counters guests should be
* able to clear all available overflows.
*/
global_ovf_ctrl_mask &= ~(1ULL << 61);
regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
sizeof(uint64_t) * fixed_pmc_cnt +
sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
check_pmc_quirk();
if ( sizeof(struct xen_pmu_data) + sizeof(uint64_t) * fixed_pmc_cnt +
sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt > PAGE_SIZE )
{
printk(XENLOG_WARNING
"VPMU: Register bank does not fit into VPMU share page\n");
arch_pmc_cnt = fixed_pmc_cnt = 0;
return -ENOSPC;
}
return 0;
}