perf events, x86: Fix Intel Nehalem and Westmere last level cache event definitions

The Intel Nehalem offcore bits implemented in:

  e994d7d23a0b: perf: Fix LLC-* events on Intel Nehalem/Westmere

... are wrong: they implemented _ACCESS as _HIT and counted OTHER_CORE_HIT* as
MISS even though its clearly documented as an L3 hit ...

Fix them and the Westmere definitions as well.

Cc: Andi Kleen <ak@linux.intel.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1299119690-13991-3-git-send-email-ming.m.lin@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2011-04-23 00:57:42 +02:00 committed by Ingo Molnar
parent 925f83c085
commit 63b6a6758e
1 changed files with 52 additions and 35 deletions

View File

@ -184,26 +184,23 @@ static __initconst const u64 snb_hw_cache_event_ids
}, },
}, },
[ C(LL ) ] = { [ C(LL ) ] = {
/*
* TBD: Need Off-core Response Performance Monitoring support
*/
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01bb, [ C(RESULT_MISS) ] = 0x01b7,
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01bb, [ C(RESULT_MISS) ] = 0x01b7,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01bb, [ C(RESULT_MISS) ] = 0x01b7,
}, },
}, },
[ C(DTLB) ] = { [ C(DTLB) ] = {
@ -285,26 +282,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
}, },
[ C(LL ) ] = { [ C(LL ) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01bb, [ C(RESULT_MISS) ] = 0x01b7,
}, },
/* /*
* Use RFO, not WRITEBACK, because a write miss would typically occur * Use RFO, not WRITEBACK, because a write miss would typically occur
* on RFO. * on RFO.
*/ */
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
/* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01bb, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01b7, [ C(RESULT_MISS) ] = 0x01b7,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
[ C(RESULT_ACCESS) ] = 0x01b7, [ C(RESULT_ACCESS) ] = 0x01b7,
/* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
[ C(RESULT_MISS) ] = 0x01bb, [ C(RESULT_MISS) ] = 0x01b7,
}, },
}, },
[ C(DTLB) ] = { [ C(DTLB) ] = {
@ -352,16 +349,36 @@ static __initconst const u64 westmere_hw_cache_event_ids
}; };
/* /*
* OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
* See IA32 SDM Vol 3B 30.6.1.3
*/ */
#define DMND_DATA_RD (1 << 0) #define NHM_DMND_DATA_RD (1 << 0)
#define DMND_RFO (1 << 1) #define NHM_DMND_RFO (1 << 1)
#define DMND_WB (1 << 3) #define NHM_DMND_IFETCH (1 << 2)
#define PF_DATA_RD (1 << 4) #define NHM_DMND_WB (1 << 3)
#define PF_DATA_RFO (1 << 5) #define NHM_PF_DATA_RD (1 << 4)
#define RESP_UNCORE_HIT (1 << 8) #define NHM_PF_DATA_RFO (1 << 5)
#define RESP_MISS (0xf600) /* non uncore hit */ #define NHM_PF_IFETCH (1 << 6)
#define NHM_OFFCORE_OTHER (1 << 7)
#define NHM_UNCORE_HIT (1 << 8)
#define NHM_OTHER_CORE_HIT_SNP (1 << 9)
#define NHM_OTHER_CORE_HITM (1 << 10)
/* reserved */
#define NHM_REMOTE_CACHE_FWD (1 << 12)
#define NHM_REMOTE_DRAM (1 << 13)
#define NHM_LOCAL_DRAM (1 << 14)
#define NHM_NON_DRAM (1 << 15)
#define NHM_ALL_DRAM (NHM_REMOTE_DRAM|NHM_LOCAL_DRAM)
#define NHM_DMND_READ (NHM_DMND_DATA_RD)
#define NHM_DMND_WRITE (NHM_DMND_RFO|NHM_DMND_WB)
#define NHM_DMND_PREFETCH (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
#define NHM_L3_HIT (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
#define NHM_L3_MISS (NHM_NON_DRAM|NHM_ALL_DRAM|NHM_REMOTE_CACHE_FWD)
#define NHM_L3_ACCESS (NHM_L3_HIT|NHM_L3_MISS)
static __initconst const u64 nehalem_hw_cache_extra_regs static __initconst const u64 nehalem_hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_MAX]
@ -370,16 +387,16 @@ static __initconst const u64 nehalem_hw_cache_extra_regs
{ {
[ C(LL ) ] = { [ C(LL ) ] = {
[ C(OP_READ) ] = { [ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, [ C(RESULT_MISS) ] = NHM_DMND_READ|NHM_L3_MISS,
}, },
[ C(OP_WRITE) ] = { [ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, [ C(RESULT_MISS) ] = NHM_DMND_WRITE|NHM_L3_MISS,
}, },
[ C(OP_PREFETCH) ] = { [ C(OP_PREFETCH) ] = {
[ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
[ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, [ C(RESULT_MISS) ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
}, },
} }
}; };