perf tsc: Add arch TSC frequency information
The TSC frequency information is required for the event metrics with the literal, system_tsc_freq. For the newer Intel platform, the TSC frequency information can be retrieved from the CPUID leaf 0x15. If the TSC frequency information isn't present the /proc/cpuinfo approach is used. Refactor cpuid() for this use. Note, the previous stack pushing/popping approach was broken on x86-64 that has stack red zones that would be clobbered. Committer testing: Before: $ perf record sleep 0.0001 [ perf record: Woken up 1 times to write data ] $ perf report --header-only |& grep cpuid # cpuid : AuthenticAMD,25,33,0 $ After the patch: $ perf record sleep 0.0001 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.002 MB perf.data (8 samples) ] $ perf report --header-only |& grep cpuid # cpuid : AuthenticAMD,25,33,0 $ Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexandre Torgue <alexandre.torgue@foss.st.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Caleb Biggers <caleb.biggers@intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: James Clark <james.clark@arm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.garry@huawei.com> Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Perry Taylor <perry.taylor@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com> Link: https://lore.kernel.org/r/20220718164312.3994191-2-irogers@google.com Signed-off-by: Ian Rogers <irogers@google.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
9fe9b252c7
commit
bc2373a58a
|
@ -0,0 +1,34 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef PERF_CPUID_H
|
||||||
|
#define PERF_CPUID_H 1
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b,
|
||||||
|
unsigned int *c, unsigned int *d)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Preserve %ebx/%rbx register by either placing it in %rdi or saving it
|
||||||
|
* on the stack - x86-64 needs to avoid the stack red zone. In PIC
|
||||||
|
* compilations %ebx contains the address of the global offset
|
||||||
|
* table. %rbx is occasionally used to address stack variables in
|
||||||
|
* presence of dynamic allocas.
|
||||||
|
*/
|
||||||
|
asm(
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
"mov %%rbx, %%rdi\n"
|
||||||
|
"cpuid\n"
|
||||||
|
"xchg %%rdi, %%rbx\n"
|
||||||
|
#else
|
||||||
|
"pushl %%ebx\n"
|
||||||
|
"cpuid\n"
|
||||||
|
"movl %%ebx, %%edi\n"
|
||||||
|
"popl %%ebx\n"
|
||||||
|
#endif
|
||||||
|
: "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d)
|
||||||
|
: "a"(op), "2"(op2));
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_cpuid_0(char *vendor, unsigned int *lvl);
|
||||||
|
|
||||||
|
#endif
|
|
@ -9,18 +9,17 @@
|
||||||
|
|
||||||
#include "../../../util/debug.h"
|
#include "../../../util/debug.h"
|
||||||
#include "../../../util/header.h"
|
#include "../../../util/header.h"
|
||||||
|
#include "cpuid.h"
|
||||||
|
|
||||||
static inline void
|
void get_cpuid_0(char *vendor, unsigned int *lvl)
|
||||||
cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
|
|
||||||
unsigned int *d)
|
|
||||||
{
|
{
|
||||||
__asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
|
unsigned int b, c, d;
|
||||||
"movl %%ebx, %%esi\n\t.byte 0x5b"
|
|
||||||
: "=a" (*a),
|
cpuid(0, 0, lvl, &b, &c, &d);
|
||||||
"=S" (*b),
|
strncpy(&vendor[0], (char *)(&b), 4);
|
||||||
"=c" (*c),
|
strncpy(&vendor[4], (char *)(&d), 4);
|
||||||
"=d" (*d)
|
strncpy(&vendor[8], (char *)(&c), 4);
|
||||||
: "a" (op));
|
vendor[12] = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt)
|
||||||
int nb;
|
int nb;
|
||||||
char vendor[16];
|
char vendor[16];
|
||||||
|
|
||||||
cpuid(0, &lvl, &b, &c, &d);
|
get_cpuid_0(vendor, &lvl);
|
||||||
strncpy(&vendor[0], (char *)(&b), 4);
|
|
||||||
strncpy(&vendor[4], (char *)(&d), 4);
|
|
||||||
strncpy(&vendor[8], (char *)(&c), 4);
|
|
||||||
vendor[12] = '\0';
|
|
||||||
|
|
||||||
if (lvl >= 1) {
|
if (lvl >= 1) {
|
||||||
cpuid(1, &a, &b, &c, &d);
|
cpuid(1, 0, &a, &b, &c, &d);
|
||||||
|
|
||||||
family = (a >> 8) & 0xf; /* bits 11 - 8 */
|
family = (a >> 8) & 0xf; /* bits 11 - 8 */
|
||||||
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
|
model = (a >> 4) & 0xf; /* Bits 7 - 4 */
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "../../../util/tsc.h"
|
#include "../../../util/tsc.h"
|
||||||
|
#include "cpuid.h"
|
||||||
|
|
||||||
u64 rdtsc(void)
|
u64 rdtsc(void)
|
||||||
{
|
{
|
||||||
|
@ -11,3 +13,34 @@ u64 rdtsc(void)
|
||||||
|
|
||||||
return low | ((u64)high) << 32;
|
return low | ((u64)high) << 32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double arch_get_tsc_freq(void)
|
||||||
|
{
|
||||||
|
unsigned int a, b, c, d, lvl;
|
||||||
|
static bool cached;
|
||||||
|
static double tsc;
|
||||||
|
char vendor[16];
|
||||||
|
|
||||||
|
if (cached)
|
||||||
|
return tsc;
|
||||||
|
|
||||||
|
cached = true;
|
||||||
|
get_cpuid_0(vendor, &lvl);
|
||||||
|
if (!strstr(vendor, "Intel"))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't support Time Stamp Counter and
|
||||||
|
* Nominal Core Crystal Clock Information Leaf.
|
||||||
|
*/
|
||||||
|
if (lvl < 0x15)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
cpuid(0x15, 0, &a, &b, &c, &d);
|
||||||
|
/* TSC frequency is not enumerated */
|
||||||
|
if (!a || !b || !c)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
tsc = (double)c * (double)b / (double)a;
|
||||||
|
return tsc;
|
||||||
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "expr-bison.h"
|
#include "expr-bison.h"
|
||||||
#include "expr-flex.h"
|
#include "expr-flex.h"
|
||||||
#include "smt.h"
|
#include "smt.h"
|
||||||
|
#include "tsc.h"
|
||||||
#include <linux/err.h>
|
#include <linux/err.h>
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/zalloc.h>
|
#include <linux/zalloc.h>
|
||||||
|
@ -402,6 +403,13 @@ double expr_id_data__source_count(const struct expr_id_data *data)
|
||||||
return data->val.source_count;
|
return data->val.source_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(__i386__) && !defined(__x86_64__)
|
||||||
|
double arch_get_tsc_freq(void)
|
||||||
|
{
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
double expr__get_literal(const char *literal)
|
double expr__get_literal(const char *literal)
|
||||||
{
|
{
|
||||||
static struct cpu_topology *topology;
|
static struct cpu_topology *topology;
|
||||||
|
@ -417,6 +425,11 @@ double expr__get_literal(const char *literal)
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcasecmp("#system_tsc_freq", literal)) {
|
||||||
|
result = arch_get_tsc_freq();
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Assume that topology strings are consistent, such as CPUs "0-1"
|
* Assume that topology strings are consistent, such as CPUs "0-1"
|
||||||
* wouldn't be listed as "0,1", and so after deduplication the number of
|
* wouldn't be listed as "0,1", and so after deduplication the number of
|
||||||
|
|
|
@ -25,6 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
|
||||||
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
|
u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
|
||||||
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
|
u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
|
||||||
u64 rdtsc(void);
|
u64 rdtsc(void);
|
||||||
|
double arch_get_tsc_freq(void);
|
||||||
|
|
||||||
size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
|
size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue