perf/core improvements and fixes:

perf report/top:
 
   - Fix segfault due to missing initialization of recently introduced
     struct map_symbol 'maps' field in append_inlines(), when running
     with DWARF callchains.
 
 perf stat:
 
   Andi Kleen:
 
   - Affinity based optimizations for sessions with many events in
     machines with large core counts, avoiding excessive number of IPIs.
 
 libtraceevent:
 
   - Sudip Mukherjee:
 
   - Fix installation with O=.
 
   - Copy pkg-config file to output folder when using O=.
 
 perf bench:
 
   Arnaldo Carvalho de Melo:
 
   - Update the copies of x86's mem{cpy,set}_64.S, and because that
     now uses new stuff in linux/linkage.h, update that header too, which
     made the minimal clang version to build perf to be 3.5, as
     3.4 as found in some of the container images used to test build perf
     can't grok STT_FUNC as a token in .type lines.
 
 ABI headers:
 
   Arnaldo Carvalho de Melo:
 
   - Sync x86's msr-index.h copy with the kernel sources, resulting
     in new MSRs to be usable in filter expressions in 'perf trace',
     such as IA32_TSX_CTRL.
 
   - Sync linux/fscrypt.h, linux/stat.h, sched.h and the kvm headers.
 
 perf trace:
 
   Arnaldo Carvalho de Melo:
 
   - Add CLEAR_SIGHAND support for clone's flags arg
 
 perf kvm:
 
   Arnaldo Carvalho de Melo:
 
   - Clarify the 'perf kvm' -i and -o command line options
 
 perf test:
 
   Ian Rogers:
 
   - Move test functionality in to a 'perf test' entry.
 
 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQR2GiIUctdOfX2qHhGyPKLppCJ+JwUCXeZkaAAKCRCyPKLppCJ+
 J5LeAQCh4Pprux9b7uqkORMzOdur2+RbrQgHr2pSKno0Or/ojAD/dR/i8wUd/fYf
 j9lKGGDCvrfNxzrg0opbymNljveacg0=
 =RxjG
 -----END PGP SIGNATURE-----

Merge tag 'perf-core-for-mingo-5.5-20191203' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

perf report/top:

  - Fix segfault due to missing initialization of recently introduced
    struct map_symbol 'maps' field in append_inlines(), when running
    with DWARF callchains.

perf stat:

  Andi Kleen:

  - Affinity based optimizations for sessions with many events in
    machines with large core counts, avoiding excessive number of IPIs.

libtraceevent:

  - Sudip Mukherjee:

  - Fix installation with O=.

  - Copy pkg-config file to output folder when using O=.

perf bench:

  Arnaldo Carvalho de Melo:

  - Update the copies of x86's mem{cpy,set}_64.S, and because that
    now uses new stuff in linux/linkage.h, update that header too, which
    made the minimal clang version to build perf to be 3.5, as
    3.4 as found in some of the container images used to test build perf
    can't grok STT_FUNC as a token in .type lines.

ABI headers:

  Arnaldo Carvalho de Melo:

  - Sync x86's msr-index.h copy with the kernel sources, resulting
    in new MSRs to be usable in filter expressions in 'perf trace',
    such as IA32_TSX_CTRL.

  - Sync linux/fscrypt.h, linux/stat.h, sched.h and the kvm headers.

perf trace:

  Arnaldo Carvalho de Melo:

  - Add CLEAR_SIGHAND support for clone's flags arg

perf kvm:

  Arnaldo Carvalho de Melo:

  - Clarify the 'perf kvm' -i and -o command line options

perf test:

  Ian Rogers:

  - Move test functionality in to a 'perf test' entry.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar 2019-12-04 08:49:52 +01:00
commit 9f58c93efd
42 changed files with 794 additions and 234 deletions

View File

@ -131,8 +131,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
__u8 ext_dabt_pending;
/* Align it to 8 bytes */
__u8 pad[6];
__u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];

View File

@ -164,8 +164,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
__u8 ext_dabt_pending;
/* Align it to 8 bytes */
__u8 pad[6];
__u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
@ -323,6 +324,8 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
#define KVM_ARM_VCPU_PVTIME_CTRL 2
#define KVM_ARM_VCPU_PVTIME_IPA 0
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28

View File

@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
#define KVM_DEV_XICS_GRP_CTRL 2
#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */

View File

@ -292,6 +292,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
@ -399,5 +400,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */

View File

@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
* The processor is not susceptible to a
* machine check error due to modifying the
* code page size along with either the
* physical address or cache type
* without TLB invalidation.
*/
#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
#define ARCH_CAP_TAA_NO BIT(8) /*
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
#define MSR_IA32_TSX_CTRL 0x00000122
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
@ -393,6 +409,8 @@
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a

View File

@ -28,8 +28,8 @@
* Output:
* rax original destination
*/
ENTRY(__memcpy)
ENTRY(memcpy)
SYM_FUNC_START_ALIAS(__memcpy)
SYM_FUNC_START_LOCAL(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@ -41,8 +41,8 @@ ENTRY(memcpy)
movl %edx, %ecx
rep movsb
ret
ENDPROC(memcpy)
ENDPROC(__memcpy)
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
@ -50,14 +50,14 @@ EXPORT_SYMBOL(__memcpy)
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
ENTRY(memcpy_erms)
SYM_FUNC_START(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
ENDPROC(memcpy_erms)
SYM_FUNC_END(memcpy_erms)
ENTRY(memcpy_orig)
SYM_FUNC_START(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
@ -182,7 +182,7 @@ ENTRY(memcpy_orig)
.Lend:
retq
ENDPROC(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
#ifndef CONFIG_UML
@ -193,7 +193,7 @@ MCSAFE_TEST_CTL
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
ENTRY(__memcpy_mcsafe)
SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe)
xorl %eax, %eax
.L_done:
ret
ENDPROC(__memcpy_mcsafe)
SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"

View File

@ -18,8 +18,8 @@
*
* rax original destination
*/
ENTRY(memset)
ENTRY(__memset)
SYM_FUNC_START_ALIAS(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
@ -42,8 +42,8 @@ ENTRY(__memset)
rep stosb
movq %r9,%rax
ret
ENDPROC(memset)
ENDPROC(__memset)
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
@ -56,16 +56,16 @@ ENDPROC(__memset)
*
* rax original destination
*/
ENTRY(memset_erms)
SYM_FUNC_START(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
ENDPROC(memset_erms)
SYM_FUNC_END(memset_erms)
ENTRY(memset_orig)
SYM_FUNC_START(memset_orig)
movq %rdi,%r10
/* expand byte value */
@ -136,4 +136,4 @@ ENTRY(memset_orig)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
ENDPROC(memset_orig)
SYM_FUNC_END(memset_orig)

View File

@ -17,7 +17,8 @@
#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
#define FSCRYPT_POLICY_FLAGS_VALID 0x07
#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08
#define FSCRYPT_POLICY_FLAGS_VALID 0x0F
/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1

View File

@ -235,6 +235,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
#define KVM_EXIT_ARM_NISV 28
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@ -394,6 +395,11 @@ struct kvm_run {
} eoi;
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
/* KVM_EXIT_ARM_NISV */
struct {
__u64 esr_iss;
__u64 fault_ipa;
} arm_nisv;
/* Fix the size of the union. */
char padding[256];
};
@ -1000,6 +1006,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
#ifdef KVM_CAP_IRQ_ROUTING
@ -1227,6 +1236,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
KVM_DEV_TYPE_XIVE,
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_MAX,
};

View File

@ -33,27 +33,48 @@
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */
/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
* @flags: Flags for the new process as listed above.
* All flags are valid except for CSIGNAL and
* CLONE_DETACHED.
* @pidfd: If CLONE_PIDFD is set, a pidfd will be
* returned in this argument.
* @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
* child process will be returned in the child's
* memory.
* @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
* the child process will be returned in the
* parent's memory.
* @exit_signal: The exit_signal the parent process will be
* sent when the child exits.
* @stack: Specify the location of the stack for the
* child process.
* @stack_size: The size of the stack for the child process.
* @tls: If CLONE_SETTLS is set, the tls descriptor
* is set to tls.
* @flags: Flags for the new process as listed above.
* All flags are valid except for CSIGNAL and
* CLONE_DETACHED.
* @pidfd: If CLONE_PIDFD is set, a pidfd will be
* returned in this argument.
* @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
* child process will be returned in the child's
* memory.
* @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
* the child process will be returned in the
* parent's memory.
* @exit_signal: The exit_signal the parent process will be
* sent when the child exits.
* @stack: Specify the location of the stack for the
* child process.
* Note, @stack is expected to point to the
* lowest address. The stack direction will be
* determined by the kernel and set up
* appropriately based on @stack_size.
* @stack_size: The size of the stack for the child process.
* @tls: If CLONE_SETTLS is set, the tls descriptor
* is set to tls.
* @set_tid: Pointer to an array of type *pid_t. The size
* of the array is defined using @set_tid_size.
* This array is used to select PIDs/TIDs for
* newly created processes. The first element in
* this defines the PID in the most nested PID
* namespace. Each additional element in the array
* defines the PID in the parent PID namespace of
* the original PID namespace. If the array has
* less entries than the number of currently
* nested PID namespaces only the PIDs in the
* corresponding namespaces are set.
* @set_tid_size: This defines the size of the array referenced
* in @set_tid. This cannot be larger than the
* kernel's limit of nested PID namespaces.
*
* The structure is versioned by size and thus extensible.
* New struct members must go at the end of the struct and
@ -68,10 +89,13 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
};
#endif
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
/*
* Scheduling policies

View File

@ -167,8 +167,8 @@ struct statx {
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#endif /* _UAPI_LINUX_STAT_H */

View File

@ -97,6 +97,7 @@ EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
LIB_INSTALL = libtraceevent.a libtraceevent.so*
LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
@ -207,10 +208,11 @@ define do_install
$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef
PKG_CONFIG_FILE = libtraceevent.pc
PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
define do_install_pkgconfig_file
if [ -n "${pkgconfig_dir}" ]; then \
cp -f ${PKG_CONFIG_FILE}.template ${PKG_CONFIG_FILE}; \
cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \
sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \

View File

@ -68,10 +68,11 @@ OPTIONS
-------
-i::
--input=<path>::
Input file name.
Input file name, for the 'report', 'diff' and 'buildid-list' subcommands.
-o::
--output=<path>::
Output file name.
Output file name, for the 'record' subcommand. Doesn't work with 'report',
just redirect the output to a file when using 'report'.
--host::
Collect host side performance profile.
--guest::

View File

@ -37,7 +37,7 @@
.text
.type perf_regs_load,%function
ENTRY(perf_regs_load)
SYM_FUNC_START(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
@ -56,4 +56,4 @@ ENTRY(perf_regs_load)
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
ENDPROC(perf_regs_load)
SYM_FUNC_END(perf_regs_load)

View File

@ -7,7 +7,7 @@
#define LDR_REG(r) ldr x##r, [x0, 8 * r]
#define SP (8 * 31)
#define PC (8 * 32)
ENTRY(perf_regs_load)
SYM_FUNC_START(perf_regs_load)
STR_REG(0)
STR_REG(1)
STR_REG(2)
@ -44,4 +44,4 @@ ENTRY(perf_regs_load)
str x30, [x0, #PC]
LDR_REG(1)
ret
ENDPROC(perf_regs_load)
SYM_FUNC_END(perf_regs_load)

View File

@ -28,7 +28,7 @@
.text
#ifdef HAVE_ARCH_X86_64_SUPPORT
ENTRY(perf_regs_load)
SYM_FUNC_START(perf_regs_load)
movq %rax, AX(%rdi)
movq %rbx, BX(%rdi)
movq %rcx, CX(%rdi)
@ -60,9 +60,9 @@ ENTRY(perf_regs_load)
movq %r14, R14(%rdi)
movq %r15, R15(%rdi)
ret
ENDPROC(perf_regs_load)
SYM_FUNC_END(perf_regs_load)
#else
ENTRY(perf_regs_load)
SYM_FUNC_START(perf_regs_load)
push %edi
movl 8(%esp), %edi
movl %eax, AX(%edi)
@ -88,7 +88,7 @@ ENTRY(perf_regs_load)
movl $0, FS(%edi)
movl $0, GS(%edi)
ret
ENDPROC(perf_regs_load)
SYM_FUNC_END(perf_regs_load)
#endif
/*

View File

@ -832,7 +832,7 @@ try_again:
if ((errno == EINVAL || errno == EBADF) &&
pos->leader != pos &&
pos->weak_group) {
pos = perf_evlist__reset_weak_group(evlist, pos);
pos = perf_evlist__reset_weak_group(evlist, pos, true);
goto try_again;
}
rc = -errno;

View File

@ -65,6 +65,7 @@
#include "util/target.h"
#include "util/time-utils.h"
#include "util/top.h"
#include "util/affinity.h"
#include "asm/bug.h"
#include <linux/time64.h>
@ -265,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter(struct evsel *counter, struct timespec *rs)
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int ncpus, cpu, thread;
if (target__has_cpu(&target) && !target__has_per_thread(&target))
ncpus = perf_evsel__nr_cpus(counter);
else
ncpus = 1;
int thread;
if (!counter->supported)
return -ENOENT;
@ -282,39 +278,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
struct perf_counts_values *count;
struct perf_counts_values *count;
count = perf_counts(counter->counts, cpu, thread);
count = perf_counts(counter->counts, cpu, thread);
/*
* The leader's group read loads data into its group members
* (via perf_evsel__read_counter) and sets threir count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
/*
* The leader's group read loads data into its group members
* (via perf_evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
}
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
perf_evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
}
@ -324,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
int ret;
struct affinity affinity;
int i, ncpus, cpu;
if (affinity__setup(&affinity) < 0)
return;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
if (i >= ncpus)
break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
ret = read_counter(counter, rs);
if (ret)
if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
counter->err = 0;
}
}
@ -420,6 +436,62 @@ static bool is_target_alive(struct target *_target,
return false;
}
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
COUNTER_FATAL,
};
static enum counter_recovery stat_handle_error(struct evsel *counter)
{
char msg[BUFSIZ];
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == EOPNOTSUPP ||
errno == ENXIO) {
if (verbose > 0)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
/*
* errored is a sticky flag that means one of the counter's
* cpu event had a problem and needs to be reexamined.
*/
counter->errored = true;
if ((counter->leader != counter) ||
!(counter->leader->core.nr_members > 1))
return COUNTER_SKIP;
} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
return COUNTER_RETRY;
} else if (target__has_per_thread(&target) &&
evsel_list->core.threads &&
evsel_list->core.threads->err_thread != -1) {
/*
* For global --per-thread case, skip current
* error thread.
*/
if (!thread_map__remove(evsel_list->core.threads,
evsel_list->core.threads->err_thread)) {
evsel_list->core.threads->err_thread = -1;
return COUNTER_RETRY;
}
}
perf_evsel__open_strerror(counter, &target,
errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
kill(child_pid, SIGTERM);
return COUNTER_FATAL;
}
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@ -433,6 +505,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct affinity affinity;
int i, cpu;
bool second_pass = false;
if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
@ -457,61 +532,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
perf_evlist__set_leader(evsel_list);
evlist__for_each_entry(evsel_list, counter) {
if (affinity__setup(&affinity) < 0)
return -1;
evlist__for_each_cpu (evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (counter->reset_group || counter->errored)
continue;
try_again:
if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
/* Weak group failed. Reset the group. */
if ((errno == EINVAL || errno == EBADF) &&
counter->leader != counter &&
counter->weak_group) {
counter = perf_evlist__reset_weak_group(evsel_list, counter);
goto try_again;
}
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
if (errno == EINVAL || errno == ENOSYS ||
errno == ENOENT || errno == EOPNOTSUPP ||
errno == ENXIO) {
if (verbose > 0)
ui__warning("%s event is not supported by the kernel.\n",
perf_evsel__name(counter));
counter->supported = false;
if ((counter->leader != counter) ||
!(counter->leader->core.nr_members > 1))
continue;
} else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
} else if (target__has_per_thread(&target) &&
evsel_list->core.threads &&
evsel_list->core.threads->err_thread != -1) {
/*
* For global --per-thread case, skip current
* error thread.
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if (!thread_map__remove(evsel_list->core.threads,
evsel_list->core.threads->err_thread)) {
evsel_list->core.threads->err_thread = -1;
goto try_again;
if ((errno == EINVAL || errno == EBADF) &&
counter->leader != counter &&
counter->weak_group) {
perf_evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}
}
perf_evsel__open_strerror(counter, &target,
errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
kill(child_pid, SIGTERM);
return -1;
counter->supported = true;
}
}
if (second_pass) {
/*
* Now redo all the weak group after closing them,
* and also close errored counters.
*/
evlist__for_each_cpu(evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
/* First close errored or weak retry */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip_no_inc(counter, cpu))
continue;
perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
}
/* Now reopen weak */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
}
counter->supported = true;
}
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
perf_evsel__free_fd(&counter->core);
continue;
}
counter->supported = true;
l = strlen(counter->unit);
if (l > stat_config.unit_width)

View File

@ -110,8 +110,8 @@ for i in $FILES; do
done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/ctype.h '-I "isdigit("'

View File

@ -68,14 +68,28 @@ static struct perf_cpu_map *cpu_map__default_new(void)
return cpus;
}
static int cmp_int(const void *a, const void *b)
{
return *(const int *)a - *(const int*)b;
}
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(int);
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
int i, j;
if (cpus != NULL) {
cpus->nr = nr_cpus;
memcpy(cpus->map, tmp_cpus, payload_size);
qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
/* Remove dups */
j = 0;
for (i = 0; i < nr_cpus; i++) {
if (i == 0 || cpus->map[i] != cpus->map[i - 1])
cpus->map[j++] = cpus->map[i];
}
cpus->nr = j;
assert(j <= nr_cpus);
refcount_set(&cpus->refcnt, 1);
}
@ -272,3 +286,60 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
return max;
}
/*
* Merge two cpumaps
*
* orig either gets freed and replaced with a new map, or reused
* with no reference count change (similar to "realloc")
* other has its reference count increased.
*/
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
int *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
if (!orig && !other)
return NULL;
if (!orig) {
perf_cpu_map__get(other);
return other;
}
if (!other)
return orig;
if (orig->nr == other->nr &&
!memcmp(orig->map, other->map, orig->nr * sizeof(int)))
return orig;
tmp_len = orig->nr + other->nr;
tmp_cpus = malloc(tmp_len * sizeof(int));
if (!tmp_cpus)
return NULL;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
while (i < orig->nr && j < other->nr) {
if (orig->map[i] <= other->map[j]) {
if (orig->map[i] == other->map[j])
j++;
tmp_cpus[k++] = orig->map[i++];
} else
tmp_cpus[k++] = other->map[j++];
}
while (i < orig->nr)
tmp_cpus[k++] = orig->map[i++];
while (j < other->nr)
tmp_cpus[k++] = other->map[j++];
assert(k <= tmp_len);
merged = cpu_map__trim_new(k, tmp_cpus);
free(tmp_cpus);
perf_cpu_map__put(orig);
return merged;
}

View File

@ -54,6 +54,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)

View File

@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
{
int thread;
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
if (FD(evsel, cpu, thread) >= 0)
close(FD(evsel, cpu, thread));
FD(evsel, cpu, thread) = -1;
}
}
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
int cpu, thread;
int cpu;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
if (FD(evsel, cpu, thread) >= 0)
close(FD(evsel, cpu, thread));
FD(evsel, cpu, thread) = -1;
}
perf_evsel__close_fd_cpu(evsel, cpu);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
{
if (evsel->fd == NULL)
return;
perf_evsel__close_fd_cpu(evsel, cpu);
}
int perf_evsel__read_size(struct perf_evsel *evsel)
{
u64 read_format = evsel->attr.read_format;
@ -183,38 +198,61 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
}
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
int ioc, void *arg)
int ioc, void *arg,
int cpu)
{
int cpu, thread;
int thread;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int fd = FD(evsel, cpu, thread),
err = ioctl(fd, ioc, arg);
for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
int fd = FD(evsel, cpu, thread),
err = ioctl(fd, ioc, arg);
if (err)
return err;
}
if (err)
return err;
}
return 0;
}
int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
}
int perf_evsel__enable(struct perf_evsel *evsel)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
int i;
int err = 0;
for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i);
return err;
}
int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
}
int perf_evsel__disable(struct perf_evsel *evsel)
{
return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
int i;
int err = 0;
for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i);
return err;
}
int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
return perf_evsel__run_ioctl(evsel,
int err = 0, i;
for (i = 0; i < evsel->cpus->nr && !err; i++)
err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
(void *)filter);
(void *)filter, i);
return err;
}
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)

View File

@ -18,6 +18,7 @@ struct perf_evlist {
int nr_entries;
bool has_user_cpus;
struct perf_cpu_map *cpus;
struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
size_t mmap_len;

View File

@ -12,6 +12,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);

View File

@ -26,10 +26,13 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);

View File

@ -54,6 +54,7 @@ perf-y += unit_number__scnprintf.o
perf-y += mem2node.o
perf-y += maps.o
perf-y += time-utils-test.o
perf-y += genelf.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)

View File

@ -259,6 +259,11 @@ static struct test generic_tests[] = {
.desc = "Print cpu map",
.func = test__cpu_map_print,
},
{
.desc = "Merge cpu map",
.func = test__cpu_map_merge,
},
{
.desc = "Probe SDT events",
.func = test__sdt_event,
@ -296,6 +301,10 @@ static struct test generic_tests[] = {
.desc = "time utils",
.func = test__time_utils,
},
{
.desc = "Test jit_write_elf",
.func = test__jit_write_elf,
},
{
.desc = "maps__merge_in",
.func = test__maps__merge_in,

View File

@ -120,3 +120,19 @@ int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_un
TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40"));
return 0;
}
int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused)
{
struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
char buf[100];
TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5);
cpu_map__snprint(c, buf, sizeof(buf));
TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
perf_cpu_map__put(a);
perf_cpu_map__put(b);
perf_cpu_map__put(c);
return 0;
}

View File

@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
err = perf_evsel__open_per_cpu(evsel, cpus);
err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err) {
if (err == -EACCES)
return TEST_SKIP;
@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
return -1;
}
err = perf_evsel__open_per_cpu(evsel, cpus);
err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err == -EACCES)
return TEST_SKIP;

51
tools/perf/tests/genelf.c Normal file
View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <linux/compiler.h>
#include "debug.h"
#include "tests.h"
#ifdef HAVE_JITDUMP
#include <libelf.h>
#include "../util/genelf.h"
#endif
#define TEMPL "/tmp/perf-test-XXXXXX"
int test__jit_write_elf(struct test *test __maybe_unused,
int subtest __maybe_unused)
{
#ifdef HAVE_JITDUMP
static unsigned char x86_code[] = {
0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
0xCD, 0x80 /* int $0x80 */
};
char path[PATH_MAX];
int fd, ret;
strcpy(path, TEMPL);
fd = mkstemp(path);
if (fd < 0) {
perror("mkstemp failed");
return TEST_FAIL;
}
pr_info("Writing jit code to: %s\n", path);
ret = jit_write_elf(fd, 0, "main", x86_code, sizeof(x86_code),
NULL, 0, NULL, 0, 0);
close(fd);
unlink(path);
return ret ? TEST_FAIL : 0;
#else
return TEST_SKIP;
#endif
}

View File

@ -98,6 +98,7 @@ int test__event_update(struct test *test, int subtest);
int test__event_times(struct test *test, int subtest);
int test__backward_ring_buffer(struct test *test, int subtest);
int test__cpu_map_print(struct test *test, int subtest);
int test__cpu_map_merge(struct test *test, int subtest);
int test__sdt_event(struct test *test, int subtest);
int test__is_printable_array(struct test *test, int subtest);
int test__bitmap_print(struct test *test, int subtest);
@ -109,6 +110,7 @@ int test__unit_number__scnprint(struct test *test, int subtest);
int test__mem2node(struct test *t, int subtest);
int test__maps__merge_in(struct test *t, int subtest);
int test__time_utils(struct test *t, int subtest);
int test__jit_write_elf(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);

View File

@ -45,6 +45,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
P_FLAG(NEWPID);
P_FLAG(NEWNET);
P_FLAG(IO);
P_FLAG(CLEAR_SIGHAND);
#undef P_FLAG
if (flags)

View File

@ -63,4 +63,5 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
#endif /* __PERF_CPUMAP_H */

View File

@ -18,6 +18,7 @@
#include "debug.h"
#include "units.h"
#include <internal/lib.h> // page_size
#include "affinity.h"
#include "../perf.h"
#include "asm/bug.h"
#include "bpf-event.h"
@ -342,14 +343,63 @@ static int perf_evlist__nr_threads(struct evlist *evlist,
return perf_thread_map__nr(evlist->core.threads);
}
void evlist__disable(struct evlist *evlist)
void evlist__cpu_iter_start(struct evlist *evlist)
{
struct evsel *pos;
/*
* Reset the per evsel cpu_iter. This is needed because
* each evsel's cpumap may have a different index space,
* and some operations need the index to modify
* the FD xyarray (e.g. open, close)
*/
evlist__for_each_entry(evlist, pos)
pos->cpu_iter = 0;
}
bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
{
if (ev->cpu_iter >= ev->core.cpus->nr)
return true;
if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
return true;
return false;
}
bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
{
if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
ev->cpu_iter++;
return false;
}
return true;
}
void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
struct affinity affinity;
int cpu, i;
if (affinity__setup(&affinity) < 0)
return;
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry(evlist, pos) {
if (evsel__cpu_iter_skip(pos, cpu))
continue;
if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__disable_cpu(pos, pos->cpu_iter - 1);
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__disable(pos);
pos->disabled = true;
}
evlist->enabled = false;
@ -358,11 +408,28 @@ void evlist__disable(struct evlist *evlist)
void evlist__enable(struct evlist *evlist)
{
struct evsel *pos;
struct affinity affinity;
int cpu, i;
if (affinity__setup(&affinity) < 0)
return;
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry(evlist, pos) {
if (evsel__cpu_iter_skip(pos, cpu))
continue;
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable_cpu(pos, pos->cpu_iter - 1);
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable(pos);
pos->disabled = false;
}
evlist->enabled = true;
@ -1137,9 +1204,35 @@ void perf_evlist__set_selected(struct evlist *evlist,
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
struct affinity affinity;
int cpu, i;
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
/*
* With perf record core.cpus is usually NULL.
* Use the old method to handle this for now.
*/
if (!evlist->core.cpus) {
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
return;
}
if (affinity__setup(&affinity) < 0)
return;
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry_reverse(evlist, evsel) {
if (evsel__cpu_iter_skip(evsel, cpu))
continue;
perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry_reverse(evlist, evsel) {
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
}
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
@ -1577,7 +1670,8 @@ void perf_evlist__force_leader(struct evlist *evlist)
}
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
struct evsel *evsel)
struct evsel *evsel,
bool close)
{
struct evsel *c2, *leader;
bool is_open = true;
@ -1594,10 +1688,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
if (c2 == evsel)
is_open = false;
if (c2->leader == leader) {
if (is_open)
if (is_open && close)
perf_evsel__close(&c2->core);
c2->leader = c2;
c2->core.nr_members = 0;
/*
* Set this for all former members of the group
* to indicate they get reopened.
*/
c2->reset_group = true;
}
}
return leader;

View File

@ -334,9 +334,17 @@ void perf_evlist__to_front(struct evlist *evlist,
#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
#define evlist__for_each_cpu(evlist, index, cpu) \
evlist__cpu_iter_start(evlist); \
perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
void perf_evlist__set_tracking_event(struct evlist *evlist,
struct evsel *tracking_evsel);
void evlist__cpu_iter_start(struct evlist *evlist);
bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
struct evsel *
perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
@ -348,5 +356,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist);
void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel);
struct evsel *evsel,
bool close);
#endif /* __PERF_EVLIST_H */

View File

@ -1223,16 +1223,27 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
return perf_evsel__append_filter(evsel, "%s,%s", filter);
}
/* Caller has to clear disabled after going through all CPUs. */
int evsel__enable_cpu(struct evsel *evsel, int cpu)
{
return perf_evsel__enable_cpu(&evsel->core, cpu);
}
int evsel__enable(struct evsel *evsel)
{
int err = perf_evsel__enable(&evsel->core);
if (!err)
evsel->disabled = false;
return err;
}
/* Caller has to set disabled after going through all CPUs. */
int evsel__disable_cpu(struct evsel *evsel, int cpu)
{
return perf_evsel__disable_cpu(&evsel->core, cpu);
}
int evsel__disable(struct evsel *evsel)
{
int err = perf_evsel__disable(&evsel->core);
@ -1587,8 +1598,9 @@ static int perf_event_open(struct evsel *evsel,
return fd;
}
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
int start_cpu, int end_cpu)
{
int cpu, thread, nthreads;
unsigned long flags = PERF_FLAG_FD_CLOEXEC;
@ -1665,7 +1677,7 @@ retry_sample_id:
display_attr(&evsel->core.attr);
for (cpu = 0; cpu < cpus->nr; cpu++) {
for (cpu = start_cpu; cpu < end_cpu; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@ -1843,6 +1855,12 @@ out_close:
return err;
}
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads)
{
return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
}
void evsel__close(struct evsel *evsel)
{
perf_evsel__close(&evsel->core);
@ -1850,9 +1868,14 @@ void evsel__close(struct evsel *evsel)
}
int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus)
struct perf_cpu_map *cpus,
int cpu)
{
return evsel__open(evsel, cpus, NULL);
if (cpu == -1)
return evsel__open_cpu(evsel, cpus, NULL, 0,
cpus ? cpus->nr : 1);
return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
}
int perf_evsel__open_per_thread(struct evsel *evsel,

View File

@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
@ -94,7 +95,10 @@ struct evsel {
struct evsel *metric_leader;
bool collect_stat;
bool weak_group;
bool reset_group;
bool errored;
bool percore;
int cpu_iter;
const char *pmu_name;
struct {
perf_evsel__sb_cb_t *cb;
@ -218,11 +222,14 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_addr_filter(struct evsel *evsel,
const char *filter);
int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu);
int perf_evsel__open_per_cpu(struct evsel *evsel,
struct perf_cpu_map *cpus);
struct perf_cpu_map *cpus,
int cpu);
int perf_evsel__open_per_thread(struct evsel *evsel,
struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,

View File

@ -8,15 +8,12 @@
*/
#include <sys/types.h>
#include <stdio.h>
#include <getopt.h>
#include <stddef.h>
#include <libelf.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <limits.h>
#include <fcntl.h>
#include <err.h>
#ifdef HAVE_DWARF_SUPPORT
@ -31,8 +28,6 @@
#define NT_GNU_BUILD_ID 3
#endif
#define JVMTI
#define BUILD_ID_URANDOM /* different uuid for each run */
#ifdef HAVE_LIBCRYPTO
@ -511,44 +506,3 @@ error:
return retval;
}
#ifndef JVMTI
static unsigned char x86_code[] = {
0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
0xCD, 0x80 /* int $0x80 */
};
static struct options options;
int main(int argc, char **argv)
{
int c, fd, ret;
while ((c = getopt(argc, argv, "o:h")) != -1) {
switch (c) {
case 'o':
options.output = optarg;
break;
case 'h':
printf("Usage: genelf -o output_file [-h]\n");
return 0;
default:
errx(1, "unknown option");
}
}
fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
if (fd == -1)
err(1, "cannot create file %s", options.output);
ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
close(fd);
if (ret != 0)
unlink(options.output);
return ret;
}
#endif

View File

@ -5,10 +5,93 @@
/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
#define ENTRY(name) \
.globl name; \
name:
/* Some toolchains use other characters (e.g. '`') to mark new line in macro */
#ifndef ASM_NL
#define ASM_NL ;
#endif
#define ENDPROC(name)
#ifndef __ALIGN
#define __ALIGN .align 4,0x90
#define __ALIGN_STR ".align 4,0x90"
#endif
/* SYM_T_FUNC -- type used by assembler to mark functions */
#ifndef SYM_T_FUNC
#define SYM_T_FUNC STT_FUNC
#endif
/* SYM_A_* -- align the symbol? */
#define SYM_A_ALIGN ALIGN
/* SYM_L_* -- linkage of symbols */
#define SYM_L_GLOBAL(name) .globl name
#define SYM_L_LOCAL(name) /* nothing */
#define ALIGN __ALIGN
/* === generic annotations === */
/* SYM_ENTRY -- use only if you have to for non-paired symbols */
#ifndef SYM_ENTRY
#define SYM_ENTRY(name, linkage, align...) \
linkage(name) ASM_NL \
align ASM_NL \
name:
#endif
/* SYM_START -- use only if you have to */
#ifndef SYM_START
#define SYM_START(name, linkage, align...) \
SYM_ENTRY(name, linkage, align)
#endif
/* SYM_END -- use only if you have to */
#ifndef SYM_END
#define SYM_END(name, sym_type) \
.type name sym_type ASM_NL \
.size name, .-name
#endif
/*
* SYM_FUNC_START_ALIAS -- use where there are two global names for one
* function
*/
#ifndef SYM_FUNC_START_ALIAS
#define SYM_FUNC_START_ALIAS(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
/* SYM_FUNC_START -- use for global functions */
#ifndef SYM_FUNC_START
/*
* The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
* later.
*/
#define SYM_FUNC_START(name) \
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
/* SYM_FUNC_START_LOCAL -- use for local functions */
#ifndef SYM_FUNC_START_LOCAL
/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_START_LOCAL(name) \
SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
#endif
/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
#ifndef SYM_FUNC_END_ALIAS
#define SYM_FUNC_END_ALIAS(name) \
SYM_END(name, SYM_T_FUNC)
#endif
/*
* SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
* SYM_FUNC_START_WEAK, ...
*/
#ifndef SYM_FUNC_END
/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
#define SYM_FUNC_END(name) \
SYM_END(name, SYM_T_FUNC)
#endif
#endif /* PERF_LINUX_LINKAGE_H_ */

View File

@ -2446,6 +2446,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms
list_for_each_entry(ilist, &inline_node->val, list) {
struct map_symbol ilist_ms = {
.maps = ms->maps,
.map = map,
.sym = ilist->symbol,
};

View File

@ -464,7 +464,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target)
struct target *target,
int cpu)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel->leader;
@ -518,7 +519,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
return perf_evsel__open_per_thread(evsel, evsel->core.threads);
}

View File

@ -214,7 +214,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
struct target *target);
struct target *target,
int cpu);
void
perf_evlist__print_counters(struct evlist *evlist,
struct perf_stat_config *config,