x86, perf: Add raw events support for the P4 PMU
The adding of raw event support lead to complete code refactoring. I hope is became more readable then it was. The list of changes: 1) The 64bit config field is enough to hold all information we need to track event details. To achieve it we used *own* enum for events selection in ESCR register and map this key into proper value at moment of event enabling. For the same reason we use 12LSB bits in CCCR register -- to track which exactly cache trace event was requested. And we cear this bits at real 'write' moment. 2) There is no per-cpu area reserved for P4 PMU anymore. We don't need it. All is held by config. 3) Now we may use any available counter, ie we try to grab any possible counter. v2: - Lin Ming reported the lack of ESCR selector in CCCR for cache events v3: - Don't loose cache event codes at config unpacking procedure, we may need it one day so no obscure hack behind our back, better to clear reserved bits explicitly when needed (thanks Ming for pointing out) - Lin Ming fixed misplaced opcodes in cache events Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Tested-by: Lin Ming <ming.m.lin@intel.com> Signed-off-by: Lin Ming <ming.m.lin@intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Stephane Eranian <eranian@google.com> Cc: Robert Richter <robert.richter@amd.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> LKML-Reference: <1269403766.3409.6.camel@minggr.sh.intel.com> [ v4: did a few whitespace fixlets ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
88978e5623
commit
d814f30105
File diff suppressed because it is too large
Load Diff
|
@ -11,35 +11,281 @@
|
||||||
|
|
||||||
#include <asm/perf_event_p4.h>
|
#include <asm/perf_event_p4.h>
|
||||||
|
|
||||||
|
#define P4_CNTR_LIMIT 3
|
||||||
/*
|
/*
|
||||||
* array indices: 0,1 - HT threads, used with HT enabled cpu
|
* array indices: 0,1 - HT threads, used with HT enabled cpu
|
||||||
*/
|
*/
|
||||||
struct p4_event_template {
|
struct p4_event_bind {
|
||||||
u32 opcode; /* ESCR event + CCCR selector */
|
unsigned int opcode; /* Event code and ESCR selector */
|
||||||
u64 config; /* packed predefined bits */
|
unsigned int escr_msr[2]; /* ESCR MSR for this event */
|
||||||
int dep; /* upstream dependency event index */
|
unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
|
||||||
int key; /* index into p4_templates */
|
|
||||||
u64 msr; /*
|
|
||||||
* the high 32 bits set into MSR_IA32_PEBS_ENABLE and
|
|
||||||
* the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT
|
|
||||||
* for cache events
|
|
||||||
*/
|
|
||||||
unsigned int emask; /* ESCR EventMask */
|
|
||||||
unsigned int escr_msr[2]; /* ESCR MSR for this event */
|
|
||||||
unsigned int cntr[2]; /* counter index (offset) */
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct p4_pmu_res {
|
struct p4_cache_event_bind {
|
||||||
/* maps hw_conf::idx into template for ESCR sake */
|
unsigned int metric_pebs;
|
||||||
struct p4_event_template *tpl[ARCH_P4_MAX_CCCR];
|
unsigned int metric_vert;
|
||||||
};
|
};
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
|
#define P4_GEN_CACHE_EVENT_BIND(name) \
|
||||||
|
[P4_CACHE__##name] = { \
|
||||||
|
.metric_pebs = P4_PEBS__##name, \
|
||||||
|
.metric_vert = P4_VERT__##name, \
|
||||||
|
}
|
||||||
|
|
||||||
#define P4_CACHE_EVENT_CONFIG(event, bit) \
|
static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
|
||||||
p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \
|
P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
|
||||||
p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \
|
P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
|
||||||
p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT)
|
P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
|
||||||
|
P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note that we don't use CCCR1 here, there is an
|
||||||
|
* exception for P4_BSQ_ALLOCATION but we just have
|
||||||
|
* no workaround
|
||||||
|
*
|
||||||
|
* consider this binding as resources which particular
|
||||||
|
* event may borrow, it doesn't contain EventMask,
|
||||||
|
* Tags and friends -- they are left to a caller
|
||||||
|
*/
|
||||||
|
static struct p4_event_bind p4_event_bind_map[] = {
|
||||||
|
[P4_EVENT_TC_DELIVER_MODE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE),
|
||||||
|
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BPU_FETCH_REQUEST] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST),
|
||||||
|
.escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_ITLB_REFERENCE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE),
|
||||||
|
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_MEMORY_CANCEL] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL),
|
||||||
|
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_MEMORY_COMPLETE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE),
|
||||||
|
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_LOAD_PORT_REPLAY] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY),
|
||||||
|
.escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_STORE_PORT_REPLAY] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY),
|
||||||
|
.escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_MOB_LOAD_REPLAY] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY),
|
||||||
|
.escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_PAGE_WALK_TYPE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE),
|
||||||
|
.escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BSQ_CACHE_REFERENCE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE),
|
||||||
|
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_IOQ_ALLOCATION] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {2, -1, -1}, {3, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_FSB_DATA_ACTIVITY] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION),
|
||||||
|
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 },
|
||||||
|
.cntr = { {0, -1, -1}, {1, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES),
|
||||||
|
.escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 },
|
||||||
|
.cntr = { {2, -1, -1}, {3, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_SSE_INPUT_ASSIST] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_PACKED_SP_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_PACKED_DP_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_SCALAR_SP_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_SCALAR_DP_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_64BIT_MMX_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_128BIT_MMX_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_X87_FP_UOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP),
|
||||||
|
.escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_TC_MISC] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_TC_MISC),
|
||||||
|
.escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_GLOBAL_POWER_EVENTS] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_TC_MS_XFER] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER),
|
||||||
|
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_UOP_QUEUE_WRITES] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES),
|
||||||
|
.escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE),
|
||||||
|
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_RETIRED_BRANCH_TYPE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE),
|
||||||
|
.escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 },
|
||||||
|
.cntr = { {4, 5, -1}, {6, 7, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_RESOURCE_STALL] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL),
|
||||||
|
.escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_WC_BUFFER] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_WC_BUFFER),
|
||||||
|
.escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 },
|
||||||
|
.cntr = { {8, 9, -1}, {10, 11, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_B2B_CYCLES] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BNR] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BNR),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_SNOOP] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_SNOOP),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_RESPONSE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_RESPONSE),
|
||||||
|
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
||||||
|
.cntr = { {0, -1, -1}, {2, -1, -1} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_FRONT_END_EVENT] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_EXECUTION_EVENT] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_REPLAY_EVENT] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_INSTR_RETIRED] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_UOPS_RETIRED] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_UOP_TYPE] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_UOP_TYPE),
|
||||||
|
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_BRANCH_RETIRED] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_MISPRED_BRANCH_RETIRED] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_X87_ASSIST] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_X87_ASSIST),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_MACHINE_CLEAR] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
[P4_EVENT_INSTR_COMPLETED] = {
|
||||||
|
.opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED),
|
||||||
|
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
||||||
|
.cntr = { {12, 13, 16}, {14, 15, 17} },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
|
||||||
|
p4_config_pack_escr(P4_ESCR_EVENT(event) | \
|
||||||
|
P4_ESCR_EMASK_BIT(event, bit)) | \
|
||||||
|
p4_config_pack_cccr(cache_event | \
|
||||||
|
P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
|
||||||
|
|
||||||
static __initconst u64 p4_hw_cache_event_ids
|
static __initconst u64 p4_hw_cache_event_ids
|
||||||
[PERF_COUNT_HW_CACHE_MAX]
|
[PERF_COUNT_HW_CACHE_MAX]
|
||||||
|
@ -49,42 +295,35 @@ static __initconst u64 p4_hw_cache_event_ids
|
||||||
[ C(L1D ) ] = {
|
[ C(L1D ) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0,
|
[ C(RESULT_ACCESS) ] = 0x0,
|
||||||
/* 1stL_cache_load_miss_retired */
|
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
|
||||||
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
|
P4_CACHE__1stl_cache_load_miss_retired),
|
||||||
| KEY_P4_L1D_OP_READ_RESULT_MISS,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[ C(LL ) ] = {
|
[ C(LL ) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0,
|
[ C(RESULT_ACCESS) ] = 0x0,
|
||||||
/* 2ndL_cache_load_miss_retired */
|
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
|
||||||
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
|
P4_CACHE__2ndl_cache_load_miss_retired),
|
||||||
| KEY_P4_LL_OP_READ_RESULT_MISS,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[ C(DTLB) ] = {
|
[ C(DTLB) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0,
|
[ C(RESULT_ACCESS) ] = 0x0,
|
||||||
/* DTLB_load_miss_retired */
|
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
|
||||||
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
|
P4_CACHE__dtlb_load_miss_retired),
|
||||||
| KEY_P4_DTLB_OP_READ_RESULT_MISS,
|
|
||||||
},
|
},
|
||||||
[ C(OP_WRITE) ] = {
|
[ C(OP_WRITE) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = 0x0,
|
[ C(RESULT_ACCESS) ] = 0x0,
|
||||||
/* DTLB_store_miss_retired */
|
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
|
||||||
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
|
P4_CACHE__dtlb_store_miss_retired),
|
||||||
| KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[ C(ITLB) ] = {
|
[ C(ITLB) ] = {
|
||||||
[ C(OP_READ) ] = {
|
[ C(OP_READ) ] = {
|
||||||
/* ITLB_reference.HIT */
|
[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
|
||||||
[ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT)
|
P4_CACHE__itlb_reference_hit),
|
||||||
| KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
|
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
|
||||||
|
P4_CACHE__itlb_reference_miss),
|
||||||
/* ITLB_reference.MISS */
|
|
||||||
[ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS)
|
|
||||||
| KEY_P4_ITLB_OP_READ_RESULT_MISS,
|
|
||||||
},
|
},
|
||||||
[ C(OP_WRITE) ] = {
|
[ C(OP_WRITE) ] = {
|
||||||
[ C(RESULT_ACCESS) ] = -1,
|
[ C(RESULT_ACCESS) ] = -1,
|
||||||
|
@ -97,219 +336,89 @@ static __initconst u64 p4_hw_cache_event_ids
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
|
||||||
* WARN: CCCR1 doesn't have a working enable bit so try to not
|
/* non-halted CPU clocks */
|
||||||
* use it if possible
|
[PERF_COUNT_HW_CPU_CYCLES] =
|
||||||
*
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
|
||||||
* Also as only we start to support raw events we will need to
|
P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
|
||||||
* append _all_ P4_EVENT_PACK'ed events here
|
|
||||||
*/
|
/*
|
||||||
struct p4_event_template p4_templates[] = {
|
* retired instructions
|
||||||
[0] = {
|
* in a sake of simplicity we don't use the FSB tagging
|
||||||
.opcode = P4_GLOBAL_POWER_EVENTS,
|
*/
|
||||||
.config = 0,
|
[PERF_COUNT_HW_INSTRUCTIONS] =
|
||||||
.dep = -1,
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) |
|
||||||
.key = 0,
|
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) |
|
||||||
.emask =
|
P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)),
|
||||||
P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING),
|
|
||||||
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
/* cache hits */
|
||||||
.cntr = { 0, 2 },
|
[PERF_COUNT_HW_CACHE_REFERENCES] =
|
||||||
},
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
|
||||||
[1] = {
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
|
||||||
.opcode = P4_INSTR_RETIRED,
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
|
||||||
.config = 0,
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
|
||||||
.dep = -1, /* needs front-end tagging */
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
|
||||||
.key = 1,
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
|
||||||
.emask =
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)),
|
||||||
P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) |
|
|
||||||
P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG),
|
/* cache misses */
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
[PERF_COUNT_HW_CACHE_MISSES] =
|
||||||
.cntr = { 12, 14 },
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) |
|
||||||
},
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
|
||||||
[2] = {
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
|
||||||
.opcode = P4_BSQ_CACHE_REFERENCE,
|
P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)),
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
/* branch instructions retired */
|
||||||
.key = 2,
|
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =
|
||||||
.emask =
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) |
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) |
|
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) |
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) |
|
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) |
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) |
|
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) |
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) |
|
P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)),
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) |
|
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM),
|
/* mispredicted branches retired */
|
||||||
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
|
[PERF_COUNT_HW_BRANCH_MISSES] =
|
||||||
.cntr = { 0, 2 },
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) |
|
||||||
},
|
P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)),
|
||||||
[3] = {
|
|
||||||
.opcode = P4_BSQ_CACHE_REFERENCE,
|
/* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */
|
||||||
.config = 0,
|
[PERF_COUNT_HW_BUS_CYCLES] =
|
||||||
.dep = -1,
|
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) |
|
||||||
.key = 3,
|
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) |
|
||||||
.emask =
|
P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) |
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) |
|
p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) |
|
|
||||||
P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS),
|
|
||||||
.escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 },
|
|
||||||
.cntr = { 0, 3 },
|
|
||||||
},
|
|
||||||
[4] = {
|
|
||||||
.opcode = P4_RETIRED_BRANCH_TYPE,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.key = 4,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) |
|
|
||||||
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) |
|
|
||||||
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) |
|
|
||||||
P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT),
|
|
||||||
.escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 },
|
|
||||||
.cntr = { 4, 6 },
|
|
||||||
},
|
|
||||||
[5] = {
|
|
||||||
.opcode = P4_MISPRED_BRANCH_RETIRED,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.key = 5,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS),
|
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 },
|
|
||||||
.cntr = { 12, 14 },
|
|
||||||
},
|
|
||||||
[6] = {
|
|
||||||
.opcode = P4_FSB_DATA_ACTIVITY,
|
|
||||||
.config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE),
|
|
||||||
.dep = -1,
|
|
||||||
.key = 6,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) |
|
|
||||||
P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN),
|
|
||||||
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
|
|
||||||
.cntr = { 0, 2 },
|
|
||||||
},
|
|
||||||
[KEY_P4_L1D_OP_READ_RESULT_MISS] = {
|
|
||||||
.opcode = P4_REPLAY_EVENT,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0),
|
|
||||||
.key = KEY_P4_L1D_OP_READ_RESULT_MISS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
|
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
|
|
||||||
.cntr = { 16, 17 },
|
|
||||||
},
|
|
||||||
[KEY_P4_LL_OP_READ_RESULT_MISS] = {
|
|
||||||
.opcode = P4_REPLAY_EVENT,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0),
|
|
||||||
.key = KEY_P4_LL_OP_READ_RESULT_MISS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
|
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
|
|
||||||
.cntr = { 16, 17 },
|
|
||||||
},
|
|
||||||
[KEY_P4_DTLB_OP_READ_RESULT_MISS] = {
|
|
||||||
.opcode = P4_REPLAY_EVENT,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0),
|
|
||||||
.key = KEY_P4_DTLB_OP_READ_RESULT_MISS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
|
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
|
|
||||||
.cntr = { 16, 17 },
|
|
||||||
},
|
|
||||||
[KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = {
|
|
||||||
.opcode = P4_REPLAY_EVENT,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1),
|
|
||||||
.key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
|
|
||||||
.escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
|
|
||||||
.cntr = { 16, 17 },
|
|
||||||
},
|
|
||||||
[KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = {
|
|
||||||
.opcode = P4_ITLB_REFERENCE,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = 0,
|
|
||||||
.key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT),
|
|
||||||
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
|
|
||||||
.cntr = { 0, 2 },
|
|
||||||
},
|
|
||||||
[KEY_P4_ITLB_OP_READ_RESULT_MISS] = {
|
|
||||||
.opcode = P4_ITLB_REFERENCE,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.msr = 0,
|
|
||||||
.key = KEY_P4_ITLB_OP_READ_RESULT_MISS,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS),
|
|
||||||
.escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
|
|
||||||
.cntr = { 0, 2 },
|
|
||||||
},
|
|
||||||
[KEY_P4_UOP_TYPE] = {
|
|
||||||
.opcode = P4_UOP_TYPE,
|
|
||||||
.config = 0,
|
|
||||||
.dep = -1,
|
|
||||||
.key = KEY_P4_UOP_TYPE,
|
|
||||||
.emask =
|
|
||||||
P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
|
|
||||||
P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
|
|
||||||
.escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 },
|
|
||||||
.cntr = { 16, 17 },
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct p4_event_bind *p4_config_get_bind(u64 config)
|
||||||
|
{
|
||||||
|
unsigned int evnt = p4_config_unpack_event(config);
|
||||||
|
struct p4_event_bind *bind = NULL;
|
||||||
|
|
||||||
|
if (evnt < ARRAY_SIZE(p4_event_bind_map))
|
||||||
|
bind = &p4_event_bind_map[evnt];
|
||||||
|
|
||||||
|
return bind;
|
||||||
|
}
|
||||||
|
|
||||||
static u64 p4_pmu_event_map(int hw_event)
|
static u64 p4_pmu_event_map(int hw_event)
|
||||||
{
|
{
|
||||||
struct p4_event_template *tpl;
|
struct p4_event_bind *bind;
|
||||||
|
unsigned int esel;
|
||||||
u64 config;
|
u64 config;
|
||||||
|
|
||||||
if (hw_event > ARRAY_SIZE(p4_templates)) {
|
if (hw_event > ARRAY_SIZE(p4_general_events)) {
|
||||||
printk_once(KERN_ERR "PMU: Incorrect event index\n");
|
printk_once(KERN_ERR "P4 PMU: Bad index: %i\n", hw_event);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
tpl = &p4_templates[hw_event];
|
|
||||||
|
|
||||||
/*
|
config = p4_general_events[hw_event];
|
||||||
* fill config up according to
|
bind = p4_config_get_bind(config);
|
||||||
* a predefined event template
|
esel = P4_OPCODE_ESEL(bind->opcode);
|
||||||
*/
|
config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel));
|
||||||
config = tpl->config;
|
|
||||||
config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT);
|
|
||||||
config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT);
|
|
||||||
config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
|
|
||||||
config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
|
|
||||||
|
|
||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that we still have 5 events (from global events SDM list)
|
|
||||||
* intersected in opcode+emask bits so we will need another
|
|
||||||
* scheme there do distinguish templates.
|
|
||||||
*/
|
|
||||||
static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src)
|
|
||||||
{
|
|
||||||
return dst & src;
|
|
||||||
}
|
|
||||||
|
|
||||||
static struct p4_event_template *p4_pmu_template_lookup(u64 config)
|
|
||||||
{
|
|
||||||
int key = p4_config_unpack_key(config);
|
|
||||||
|
|
||||||
if (key < ARRAY_SIZE(p4_templates))
|
|
||||||
return &p4_templates[key];
|
|
||||||
else
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We don't control raw events so it's up to the caller
|
* We don't control raw events so it's up to the caller
|
||||||
* to pass sane values (and we don't count the thread number
|
* to pass sane values (and we don't count the thread number
|
||||||
|
@ -319,13 +428,14 @@ static struct p4_event_template *p4_pmu_template_lookup(u64 config)
|
||||||
static u64 p4_pmu_raw_event(u64 hw_event)
|
static u64 p4_pmu_raw_event(u64 hw_event)
|
||||||
{
|
{
|
||||||
return hw_event &
|
return hw_event &
|
||||||
(p4_config_pack_escr(P4_EVNTSEL_MASK_HT) |
|
(p4_config_pack_escr(P4_ESCR_MASK_HT) |
|
||||||
p4_config_pack_cccr(P4_CCCR_MASK_HT));
|
p4_config_pack_cccr(P4_CCCR_MASK_HT));
|
||||||
}
|
}
|
||||||
|
|
||||||
static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
|
static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
|
u32 escr, cccr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the reason we use cpu that early is that: if we get scheduled
|
* the reason we use cpu that early is that: if we get scheduled
|
||||||
|
@ -333,13 +443,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
|
||||||
* specific flags in config (and will save some cpu cycles)
|
* specific flags in config (and will save some cpu cycles)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* CCCR by default */
|
cccr = p4_default_cccr_conf(cpu);
|
||||||
hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu));
|
escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user);
|
||||||
|
hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr);
|
||||||
|
|
||||||
/* Count user and OS events unless not requested to */
|
|
||||||
hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel,
|
|
||||||
attr->exclude_user));
|
|
||||||
/* on HT machine we need a special bit */
|
|
||||||
if (p4_ht_active() && p4_ht_thread(cpu))
|
if (p4_ht_active() && p4_ht_thread(cpu))
|
||||||
hwc->config = p4_set_ht_bit(hwc->config);
|
hwc->config = p4_set_ht_bit(hwc->config);
|
||||||
|
|
||||||
|
@ -368,7 +475,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event)
|
||||||
*/
|
*/
|
||||||
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
|
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
|
||||||
(u64)(p4_config_unpack_cccr(hwc->config)) &
|
(u64)(p4_config_unpack_cccr(hwc->config)) &
|
||||||
~P4_CCCR_ENABLE & ~P4_CCCR_OVF);
|
~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void p4_pmu_disable_all(void)
|
static void p4_pmu_disable_all(void)
|
||||||
|
@ -389,27 +496,14 @@ static void p4_pmu_enable_event(struct perf_event *event)
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
int thread = p4_ht_config_thread(hwc->config);
|
int thread = p4_ht_config_thread(hwc->config);
|
||||||
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
|
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
|
||||||
u64 escr_base;
|
unsigned int idx = p4_config_unpack_event(hwc->config);
|
||||||
struct p4_event_template *tpl;
|
unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
|
||||||
struct p4_pmu_res *c;
|
struct p4_event_bind *bind;
|
||||||
|
struct p4_cache_event_bind *bind_cache;
|
||||||
|
u64 escr_addr, cccr;
|
||||||
|
|
||||||
/*
|
bind = &p4_event_bind_map[idx];
|
||||||
* some preparation work from per-cpu private fields
|
escr_addr = (u64)bind->escr_msr[thread];
|
||||||
* since we need to find out which ESCR to use
|
|
||||||
*/
|
|
||||||
c = &__get_cpu_var(p4_pmu_config);
|
|
||||||
tpl = c->tpl[hwc->idx];
|
|
||||||
if (!tpl) {
|
|
||||||
pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tpl->msr) {
|
|
||||||
(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32);
|
|
||||||
(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff);
|
|
||||||
}
|
|
||||||
|
|
||||||
escr_base = (u64)tpl->escr_msr[thread];
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* - we dont support cascaded counters yet
|
* - we dont support cascaded counters yet
|
||||||
|
@ -418,9 +512,27 @@ static void p4_pmu_enable_event(struct perf_event *event)
|
||||||
WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
|
WARN_ON_ONCE(p4_is_event_cascaded(hwc->config));
|
||||||
WARN_ON_ONCE(hwc->idx == 1);
|
WARN_ON_ONCE(hwc->idx == 1);
|
||||||
|
|
||||||
(void)checking_wrmsrl(escr_base, escr_conf);
|
/* we need a real Event value */
|
||||||
|
escr_conf &= ~P4_ESCR_EVENT_MASK;
|
||||||
|
escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode));
|
||||||
|
|
||||||
|
cccr = p4_config_unpack_cccr(hwc->config);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* it could be Cache event so that we need to
|
||||||
|
* set metrics into additional MSRs
|
||||||
|
*/
|
||||||
|
BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
|
||||||
|
if (idx_cache > P4_CACHE__NONE &&
|
||||||
|
idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
|
||||||
|
bind_cache = &p4_cache_event_bind_map[idx_cache];
|
||||||
|
(void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
|
||||||
|
(void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void)checking_wrmsrl(escr_addr, escr_conf);
|
||||||
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
|
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
|
||||||
(u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE);
|
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void p4_pmu_enable_all(void)
|
static void p4_pmu_enable_all(void)
|
||||||
|
@ -516,13 +628,13 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
|
||||||
if (p4_ht_thread(cpu)) {
|
if (p4_ht_thread(cpu)) {
|
||||||
cccr &= ~P4_CCCR_OVF_PMI_T0;
|
cccr &= ~P4_CCCR_OVF_PMI_T0;
|
||||||
cccr |= P4_CCCR_OVF_PMI_T1;
|
cccr |= P4_CCCR_OVF_PMI_T1;
|
||||||
if (escr & P4_EVNTSEL_T0_OS) {
|
if (escr & P4_ESCR_T0_OS) {
|
||||||
escr &= ~P4_EVNTSEL_T0_OS;
|
escr &= ~P4_ESCR_T0_OS;
|
||||||
escr |= P4_EVNTSEL_T1_OS;
|
escr |= P4_ESCR_T1_OS;
|
||||||
}
|
}
|
||||||
if (escr & P4_EVNTSEL_T0_USR) {
|
if (escr & P4_ESCR_T0_USR) {
|
||||||
escr &= ~P4_EVNTSEL_T0_USR;
|
escr &= ~P4_ESCR_T0_USR;
|
||||||
escr |= P4_EVNTSEL_T1_USR;
|
escr |= P4_ESCR_T1_USR;
|
||||||
}
|
}
|
||||||
hwc->config = p4_config_pack_escr(escr);
|
hwc->config = p4_config_pack_escr(escr);
|
||||||
hwc->config |= p4_config_pack_cccr(cccr);
|
hwc->config |= p4_config_pack_cccr(cccr);
|
||||||
|
@ -530,13 +642,13 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
|
||||||
} else {
|
} else {
|
||||||
cccr &= ~P4_CCCR_OVF_PMI_T1;
|
cccr &= ~P4_CCCR_OVF_PMI_T1;
|
||||||
cccr |= P4_CCCR_OVF_PMI_T0;
|
cccr |= P4_CCCR_OVF_PMI_T0;
|
||||||
if (escr & P4_EVNTSEL_T1_OS) {
|
if (escr & P4_ESCR_T1_OS) {
|
||||||
escr &= ~P4_EVNTSEL_T1_OS;
|
escr &= ~P4_ESCR_T1_OS;
|
||||||
escr |= P4_EVNTSEL_T0_OS;
|
escr |= P4_ESCR_T0_OS;
|
||||||
}
|
}
|
||||||
if (escr & P4_EVNTSEL_T1_USR) {
|
if (escr & P4_ESCR_T1_USR) {
|
||||||
escr &= ~P4_EVNTSEL_T1_USR;
|
escr &= ~P4_ESCR_T1_USR;
|
||||||
escr |= P4_EVNTSEL_T0_USR;
|
escr |= P4_ESCR_T0_USR;
|
||||||
}
|
}
|
||||||
hwc->config = p4_config_pack_escr(escr);
|
hwc->config = p4_config_pack_escr(escr);
|
||||||
hwc->config |= p4_config_pack_cccr(cccr);
|
hwc->config |= p4_config_pack_cccr(cccr);
|
||||||
|
@ -606,66 +718,56 @@ static int p4_get_escr_idx(unsigned int addr)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int p4_next_cntr(int thread, unsigned long *used_mask,
|
||||||
|
struct p4_event_bind *bind)
|
||||||
|
{
|
||||||
|
int i = 0, j;
|
||||||
|
|
||||||
|
for (i = 0; i < P4_CNTR_LIMIT; i++) {
|
||||||
|
j = bind->cntr[thread][i++];
|
||||||
|
if (j == -1 || !test_bit(j, used_mask))
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
|
||||||
{
|
{
|
||||||
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
|
unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)];
|
||||||
|
|
||||||
struct hw_perf_event *hwc;
|
|
||||||
struct p4_event_template *tpl;
|
|
||||||
struct p4_pmu_res *c;
|
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
int escr_idx, thread, i, num;
|
struct hw_perf_event *hwc;
|
||||||
|
struct p4_event_bind *bind;
|
||||||
|
unsigned int i, thread, num;
|
||||||
|
int cntr_idx, escr_idx;
|
||||||
|
|
||||||
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
|
||||||
bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
|
bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR);
|
||||||
|
|
||||||
c = &__get_cpu_var(p4_pmu_config);
|
|
||||||
/*
|
|
||||||
* Firstly find out which resource events are going
|
|
||||||
* to use, if ESCR+CCCR tuple is already borrowed
|
|
||||||
* then get out of here
|
|
||||||
*/
|
|
||||||
for (i = 0, num = n; i < n; i++, num--) {
|
for (i = 0, num = n; i < n; i++, num--) {
|
||||||
hwc = &cpuc->event_list[i]->hw;
|
|
||||||
tpl = p4_pmu_template_lookup(hwc->config);
|
|
||||||
if (!tpl)
|
|
||||||
goto done;
|
|
||||||
thread = p4_ht_thread(cpu);
|
|
||||||
escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]);
|
|
||||||
if (escr_idx == -1)
|
|
||||||
goto done;
|
|
||||||
|
|
||||||
/* already allocated and remains on the same cpu */
|
hwc = &cpuc->event_list[i]->hw;
|
||||||
|
thread = p4_ht_thread(cpu);
|
||||||
|
bind = p4_config_get_bind(hwc->config);
|
||||||
|
escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
|
||||||
|
|
||||||
if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
|
if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) {
|
||||||
|
cntr_idx = hwc->idx;
|
||||||
if (assign)
|
if (assign)
|
||||||
assign[i] = hwc->idx;
|
assign[i] = hwc->idx;
|
||||||
/* upstream dependent event */
|
|
||||||
if (unlikely(tpl->dep != -1))
|
|
||||||
printk_once(KERN_WARNING "PMU: Dep events are "
|
|
||||||
"not implemented yet\n");
|
|
||||||
goto reserve;
|
goto reserve;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* it may be already borrowed */
|
cntr_idx = p4_next_cntr(thread, used_mask, bind);
|
||||||
if (test_bit(tpl->cntr[thread], used_mask) ||
|
if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
|
||||||
test_bit(escr_idx, escr_mask))
|
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
/*
|
|
||||||
* ESCR+CCCR+COUNTERs are available to use lets swap
|
|
||||||
* thread specific bits, push assigned bits
|
|
||||||
* back and save template into per-cpu
|
|
||||||
* area (which will allow us to find out the ESCR
|
|
||||||
* to be used at moment of "enable event via real MSR")
|
|
||||||
*/
|
|
||||||
p4_pmu_swap_config_ts(hwc, cpu);
|
p4_pmu_swap_config_ts(hwc, cpu);
|
||||||
if (assign) {
|
if (assign)
|
||||||
assign[i] = tpl->cntr[thread];
|
assign[i] = cntr_idx;
|
||||||
c->tpl[assign[i]] = tpl;
|
|
||||||
}
|
|
||||||
reserve:
|
reserve:
|
||||||
set_bit(tpl->cntr[thread], used_mask);
|
set_bit(cntr_idx, used_mask);
|
||||||
set_bit(escr_idx, escr_mask);
|
set_bit(escr_idx, escr_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -684,7 +786,7 @@ static __initconst struct x86_pmu p4_pmu = {
|
||||||
.perfctr = MSR_P4_BPU_PERFCTR0,
|
.perfctr = MSR_P4_BPU_PERFCTR0,
|
||||||
.event_map = p4_pmu_event_map,
|
.event_map = p4_pmu_event_map,
|
||||||
.raw_event = p4_pmu_raw_event,
|
.raw_event = p4_pmu_raw_event,
|
||||||
.max_events = ARRAY_SIZE(p4_templates),
|
.max_events = ARRAY_SIZE(p4_general_events),
|
||||||
.get_event_constraints = x86_get_event_constraints,
|
.get_event_constraints = x86_get_event_constraints,
|
||||||
/*
|
/*
|
||||||
* IF HT disabled we may need to use all
|
* IF HT disabled we may need to use all
|
||||||
|
@ -716,7 +818,7 @@ static __init int p4_pmu_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
|
memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
|
||||||
sizeof(hw_cache_event_ids));
|
sizeof(hw_cache_event_ids));
|
||||||
|
|
||||||
pr_cont("Netburst events, ");
|
pr_cont("Netburst events, ");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue