perf vendor events intel: Update metrics to detect pmem at runtime
By detecting whether nvdimms are installed at runtime the number of events can be reduced if it isn't. These changes come from this PR: https://github.com/intel/perfmon/pull/63 Reviewed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Caleb Biggers <caleb.biggers@intel.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Edward Baker <edward.baker@intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Perry Taylor <perry.taylor@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Samantha Alt <samantha.alt@intel.com> Cc: Weilin Wang <weilin.wang@intel.com> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Link: https://lore.kernel.org/r/20230324072218.181880-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
c3bf86f11d
commit
31c5ba6c25
|
@ -201,7 +201,7 @@
|
|||
{
|
||||
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
|
||||
"MetricConstraint": "NO_GROUP_EVENTS",
|
||||
"MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound",
|
||||
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
|
||||
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_dram_bound",
|
||||
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
@ -933,7 +933,7 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
|
||||
"MetricExpr": "1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@",
|
||||
"MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryLat;Server;SoC",
|
||||
"MetricName": "tma_info_mem_pmm_read_latency",
|
||||
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
|
||||
|
@ -998,13 +998,13 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_read_bw"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_write_bw"
|
||||
},
|
||||
|
@ -1310,7 +1310,7 @@
|
|||
{
|
||||
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
|
||||
"MetricConstraint": "NO_GROUP_EVENTS",
|
||||
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)",
|
||||
"MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_pmm_bound",
|
||||
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
|
|
@ -186,7 +186,7 @@
|
|||
{
|
||||
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
|
||||
"MetricConstraint": "NO_GROUP_EVENTS",
|
||||
"MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound",
|
||||
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound - tma_pmm_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound)",
|
||||
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_dram_bound",
|
||||
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
@ -918,7 +918,7 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
|
||||
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@",
|
||||
"MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryLat;Server;SoC",
|
||||
"MetricName": "tma_info_mem_pmm_read_latency",
|
||||
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
|
||||
|
@ -984,13 +984,13 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_read_bw"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_write_bw"
|
||||
},
|
||||
|
@ -1298,7 +1298,7 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
|
||||
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)",
|
||||
"MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_pmm_bound",
|
||||
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
|
|
@ -185,7 +185,7 @@
|
|||
{
|
||||
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
|
||||
"MetricConstraint": "NO_GROUP_EVENTS",
|
||||
"MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks - tma_pmm_bound",
|
||||
"MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks)",
|
||||
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_dram_bound",
|
||||
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
@ -968,7 +968,7 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
|
||||
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@",
|
||||
"MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / uncore_cha_0@event\\=0x1@ if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryLat;Server;SoC",
|
||||
"MetricName": "tma_info_mem_pmm_read_latency",
|
||||
"PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
|
||||
|
@ -1034,13 +1034,13 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_read_bw"
|
||||
},
|
||||
{
|
||||
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
|
||||
"MetricExpr": "64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time",
|
||||
"MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / duration_time if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "Mem;MemoryBW;Server;SoC",
|
||||
"MetricName": "tma_info_pmm_write_bw"
|
||||
},
|
||||
|
@ -1406,7 +1406,7 @@
|
|||
},
|
||||
{
|
||||
"BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external 3D-Xpoint (Crystal Ridge, a.k.a",
|
||||
"MetricExpr": "((1 - (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))))) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0)",
|
||||
"MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 0)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
|
||||
"MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
|
||||
"MetricName": "tma_pmm_bound",
|
||||
"MetricThreshold": "tma_pmm_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
|
||||
|
|
Loading…
Reference in New Issue