diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 3b623ea6ee7e..238305868644 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -18,6 +18,7 @@ #include "pmu-events/pmu-events.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; +static bool zen4_ibs_extensions; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -39,6 +40,7 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) }; const char *ic_miss_str = NULL; const char *l1tlb_pgsz_str = NULL; + char l3_miss_str[sizeof(" L3MissOnly _ FetchOcMiss _ FetchL3Miss _")] = ""; if (cpu_family == 0x19 && cpu_model < 0x10) { /* @@ -53,12 +55,19 @@ static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) ic_miss_str = ic_miss_strs[reg.ic_miss]; } + if (zen4_ibs_extensions) { + snprintf(l3_miss_str, sizeof(l3_miss_str), + " L3MissOnly %d FetchOcMiss %d FetchL3Miss %d", + reg.l3_miss_only, reg.fetch_oc_miss, reg.fetch_l3_miss); + } + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " - "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s%s\n", reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, - reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : "", + l3_miss_str); } static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) @@ -68,9 +77,15 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { - printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", - reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, - reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + + if (zen4_ibs_extensions) + snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, + reg.op_en, reg.op_val, reg.cnt_ctl, + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -84,7 +99,34 @@ static void pr_ibs_op_data(union ibs_op_data reg) reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); } -static void pr_ibs_op_data2(union ibs_op_data2 reg) +static void pr_ibs_op_data2_extended(union ibs_op_data2 reg) +{ + static const char * const data_src_str[] = { + "", + " DataSrc 1=Local L3 or other L1/L2 in CCX", + " DataSrc 2=A peer cache in a near CCX", + " DataSrc 3=Data returned from DRAM", + " DataSrc 4=(reserved)", + " DataSrc 5=A peer cache in a far CCX", + " DataSrc 6=DRAM address map with \"long latency\" bit set", + " DataSrc 7=Data returned from MMIO/Config/PCI/APIC", + " DataSrc 8=Extension Memory (S-Link, GenZ, etc)", + " DataSrc 9=(reserved)", + " DataSrc 10=(reserved)", + " DataSrc 11=(reserved)", + " DataSrc 12=Peer Agent Memory", + /* 13 to 31 are reserved. Avoid printing them. */ + }; + int data_src = (reg.data_src_hi << 3) | reg.data_src_lo; + + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, + (data_src == 1 || data_src == 2 || data_src == 5) ? + (reg.cache_hit_st ? "CacheHitSt 1=O-State " : "CacheHitSt 0=M-state ") : "", + reg.rmt_node, + data_src < (int)ARRAY_SIZE(data_src_str) ? data_src_str[data_src] : ""); +} + +static void pr_ibs_op_data2_default(union ibs_op_data2 reg) { static const char * const data_src_str[] = { "", @@ -103,6 +145,13 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) reg.rmt_node, data_src_str[reg.data_src_lo]); } +static void pr_ibs_op_data2(union ibs_op_data2 reg) +{ + if (zen4_ibs_extensions) + return pr_ibs_op_data2_extended(reg); + pr_ibs_op_data2_default(reg); +} + static void pr_ibs_op_data3(union ibs_op_data3 reg) { char l2_miss_str[sizeof(" L2Miss _")] = ""; @@ -279,6 +328,9 @@ bool evlist__has_amd_ibs(struct evlist *evlist) pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; } + if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) + zen4_ibs_extensions = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env);