nds32: Perf porting
This is the commit that porting the perf for nds32. 1.Raw event: The raw events start with 'r'. Usage: perf stat -e rXYZ ./app X: the index of performance counter. YZ: the index(convert to hexdecimal) of events Example: 'perf stat -e r101 ./app' means the counter 1 will count the instruction event. The index of counter and events can be found in "Andes System Privilege Architecture Version 3 Manual". Or you can perform the 'perf list' to find the symbolic name of raw events. 2.Perf mmap2: Fix unexpected perf mmap2() page fault When the mmap2() called by perf application, you will encounter such condition:"failed to write." With return value -EFAULT This is due to the page fault caused by "reading" buffer from the mapped legal address region to write to the descriptor. The page_fault handler will get a VM_FAULT_SIGBUS return value, which should not happens here.(Due to this is a read request.) You can refer to kernel/events/core.c:perf_mmap_fault(...) If "(vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))" is evaluated as true, you will get VM_FAULT_SIGBUS as return value. However, this is not an write request. The flags which indicated why the page fault happens is wrong. Furthermore, NDS32 SPAv3 is not able to detect it is read or write. It only know either it is instruction fetch or data access. Therefore, by removing the wrong flag assignment(actually, the hardware is not able to show the reason), we can fix this bug. 3.Perf multiple events map to same counter. When there are multiple events map to the same counter, the counter counts inaccurately. This is because each counter only counts one event in the same time. So when there are multiple events map to same counter, they have to take turns in each context. There are two solution: 1. Print the error message when multiple events map to the same counter. But print the error message would let the program hang in loop. The ltp (linux test program) would be failed when the program hang in loop. 2. Don't print the error message, the ltp would pass. But the user need to have the knowledge that don't count the events which map to the same counter, or the user will get the inaccurate results. We choose method 2 for the solution Signed-off-by: Nickhu <nickhu@andestech.com> Acked-by: Greentime Hu <greentime@andestech.com> Signed-off-by: Greentime Hu <greentime@andestech.com>
This commit is contained in:
parent
9aaafac8cf
commit
ebd09753b5
|
@ -30,6 +30,7 @@ config NDS32
|
|||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_DEBUG_KMEMLEAK
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_PERF_EVENTS
|
||||
select IRQ_DOMAIN
|
||||
select LOCKDEP_SUPPORT
|
||||
select MODULES_USE_ELF_RELA
|
||||
|
|
|
@ -82,4 +82,9 @@
|
|||
interrupts = <18>;
|
||||
};
|
||||
};
|
||||
|
||||
pmu {
|
||||
compatible = "andestech,nds32v3-pmu";
|
||||
interrupts= <13>;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -36,6 +36,7 @@ generic-y += kprobes.h
|
|||
generic-y += kvm_para.h
|
||||
generic-y += limits.h
|
||||
generic-y += local.h
|
||||
generic-y += local64.h
|
||||
generic-y += mm-arch-hooks.h
|
||||
generic-y += mman.h
|
||||
generic-y += parport.h
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (C) 2008-2018 Andes Technology Corporation */
|
||||
|
||||
#ifndef __ASM_PERF_EVENT_H
|
||||
#define __ASM_PERF_EVENT_H
|
||||
|
||||
/*
|
||||
* This file is request by Perf,
|
||||
* please refer to tools/perf/design.txt for more details
|
||||
*/
|
||||
struct pt_regs;
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
#define perf_misc_flags(regs) perf_misc_flags(regs)
|
||||
|
||||
#endif
|
|
@ -0,0 +1,386 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (C) 2008-2018 Andes Technology Corporation */
|
||||
|
||||
#ifndef __ASM_PMU_H
|
||||
#define __ASM_PMU_H
|
||||
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/bitfield.h>
|
||||
|
||||
/* Has special meaning for perf core implementation */
|
||||
#define HW_OP_UNSUPPORTED 0x0
|
||||
#define C(_x) PERF_COUNT_HW_CACHE_##_x
|
||||
#define CACHE_OP_UNSUPPORTED 0x0
|
||||
|
||||
/* Enough for both software and hardware defined events */
|
||||
#define SOFTWARE_EVENT_MASK 0xFF
|
||||
|
||||
#define PFM_OFFSET_MAGIC_0 2 /* DO NOT START FROM 0 */
|
||||
#define PFM_OFFSET_MAGIC_1 (PFM_OFFSET_MAGIC_0 + 36)
|
||||
#define PFM_OFFSET_MAGIC_2 (PFM_OFFSET_MAGIC_1 + 36)
|
||||
|
||||
enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
|
||||
|
||||
u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
|
||||
PFM_CTL_mskOVF2 };
|
||||
u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
|
||||
PFM_CTL_mskEN2 };
|
||||
u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
|
||||
PFM_CTL_offSEL2 };
|
||||
u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
|
||||
u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
|
||||
u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
|
||||
u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
|
||||
/*
|
||||
* Perf Events' indices
|
||||
*/
|
||||
#define NDS32_IDX_CYCLE_COUNTER 0
|
||||
#define NDS32_IDX_COUNTER0 1
|
||||
#define NDS32_IDX_COUNTER1 2
|
||||
|
||||
/* The events for a given PMU register set. */
|
||||
struct pmu_hw_events {
|
||||
/*
|
||||
* The events that are active on the PMU for the given index.
|
||||
*/
|
||||
struct perf_event *events[MAX_COUNTERS];
|
||||
|
||||
/*
|
||||
* A 1 bit for an index indicates that the counter is being used for
|
||||
* an event. A 0 means that the counter can be used.
|
||||
*/
|
||||
unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
|
||||
|
||||
/*
|
||||
* Hardware lock to serialize accesses to PMU registers. Needed for the
|
||||
* read/modify/write sequences.
|
||||
*/
|
||||
raw_spinlock_t pmu_lock;
|
||||
};
|
||||
|
||||
struct nds32_pmu {
|
||||
struct pmu pmu;
|
||||
cpumask_t active_irqs;
|
||||
char *name;
|
||||
irqreturn_t (*handle_irq)(int irq_num, void *dev);
|
||||
void (*enable)(struct perf_event *event);
|
||||
void (*disable)(struct perf_event *event);
|
||||
int (*get_event_idx)(struct pmu_hw_events *hw_events,
|
||||
struct perf_event *event);
|
||||
int (*set_event_filter)(struct hw_perf_event *evt,
|
||||
struct perf_event_attr *attr);
|
||||
u32 (*read_counter)(struct perf_event *event);
|
||||
void (*write_counter)(struct perf_event *event, u32 val);
|
||||
void (*start)(struct nds32_pmu *nds32_pmu);
|
||||
void (*stop)(struct nds32_pmu *nds32_pmu);
|
||||
void (*reset)(void *data);
|
||||
int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
|
||||
void (*free_irq)(struct nds32_pmu *nds32_pmu);
|
||||
int (*map_event)(struct perf_event *event);
|
||||
int num_events;
|
||||
atomic_t active_events;
|
||||
u64 max_period;
|
||||
struct platform_device *plat_device;
|
||||
struct pmu_hw_events *(*get_hw_events)(void);
|
||||
};
|
||||
|
||||
#define to_nds32_pmu(p) (container_of(p, struct nds32_pmu, pmu))
|
||||
|
||||
int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
|
||||
|
||||
u64 nds32_pmu_event_update(struct perf_event *event);
|
||||
|
||||
int nds32_pmu_event_set_period(struct perf_event *event);
|
||||
|
||||
/*
|
||||
* Common NDS32 SPAv3 event types
|
||||
*
|
||||
* Note: An implementation may not be able to count all of these events
|
||||
* but the encodings are considered to be `reserved' in the case that
|
||||
* they are not available.
|
||||
*
|
||||
* SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
|
||||
* NOT_SUPPORTED EVENT mapping in generic perf code.
|
||||
* You will need to deal it in the event writing implementation.
|
||||
*/
|
||||
enum spav3_counter_0_perf_types {
|
||||
SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0, /* counting symbol */
|
||||
SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
|
||||
SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
|
||||
SPAV3_0_SEL_LAST /* counting symbol */
|
||||
};
|
||||
|
||||
enum spav3_counter_1_perf_types {
|
||||
SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1, /* counting symbol */
|
||||
SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
|
||||
SPAV3_1_SEL_LAST /* counting symbol */
|
||||
};
|
||||
|
||||
enum spav3_counter_2_perf_types {
|
||||
SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2, /* counting symbol */
|
||||
SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
|
||||
3 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
|
||||
SPAV3_2_SEL_LAST /* counting symbol */
|
||||
};
|
||||
|
||||
/* Get converted event counter index */
|
||||
static inline int get_converted_event_idx(unsigned long event)
|
||||
{
|
||||
int idx;
|
||||
|
||||
if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
|
||||
idx = 0;
|
||||
} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
|
||||
idx = 1;
|
||||
} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
|
||||
idx = 2;
|
||||
} else {
|
||||
pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
/* Get converted hardware event number */
|
||||
static inline u32 get_converted_evet_hw_num(u32 event)
|
||||
{
|
||||
if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
|
||||
event -= PFM_OFFSET_MAGIC_0;
|
||||
else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
|
||||
event -= PFM_OFFSET_MAGIC_1;
|
||||
else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
|
||||
event -= PFM_OFFSET_MAGIC_2;
|
||||
else if (event != 0)
|
||||
pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
/*
|
||||
* NDS32 HW events mapping
|
||||
*
|
||||
* The hardware events that we support. We do support cache operations but
|
||||
* we have harvard caches and no way to combine instruction and data
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
|
||||
};
|
||||
|
||||
static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_CODE_CACHE_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_CODE_CACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_CODE_CACHE_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_CODE_CACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
/* TODO: L2CC */
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
/* NDS32 PMU does not support TLB read/write hit/miss,
|
||||
* However, it can count access/miss, which mixed with read and write.
|
||||
* Therefore, only READ counter will use it.
|
||||
* We do as possible as we can.
|
||||
*/
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_UDTLB_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_UDTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
SPAV3_1_SEL_UITLB_ACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
SPAV3_2_SEL_UITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = { /* What is BPU? */
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(NODE)] = { /* What is NODE? */
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] =
|
||||
CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
int nds32_pmu_map_event(struct perf_event *event,
|
||||
const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
|
||||
const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
|
||||
|
||||
#endif /* __ASM_PMU_H */
|
|
@ -0,0 +1,39 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (C) 2008-2018 Andes Technology Corporation */
|
||||
|
||||
#ifndef __ASM_STACKTRACE_H
|
||||
#define __ASM_STACKTRACE_H
|
||||
|
||||
/* Kernel callchain */
|
||||
struct stackframe {
|
||||
unsigned long fp;
|
||||
unsigned long sp;
|
||||
unsigned long lp;
|
||||
};
|
||||
|
||||
/*
|
||||
* struct frame_tail: User callchain
|
||||
* IMPORTANT:
|
||||
* This struct is used for call-stack walking,
|
||||
* the order and types matters.
|
||||
* Do not use array, it only stores sizeof(pointer)
|
||||
*
|
||||
* The details can refer to arch/arm/kernel/perf_event.c
|
||||
*/
|
||||
struct frame_tail {
|
||||
unsigned long stack_fp;
|
||||
unsigned long stack_lp;
|
||||
};
|
||||
|
||||
/* For User callchain with optimize for size */
|
||||
struct frame_tail_opt_size {
|
||||
unsigned long stack_r6;
|
||||
unsigned long stack_fp;
|
||||
unsigned long stack_gp;
|
||||
unsigned long stack_lp;
|
||||
};
|
||||
|
||||
extern void
|
||||
get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph);
|
||||
|
||||
#endif /* __ASM_STACKTRACE_H */
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
CPPFLAGS_vmlinux.lds := -DTEXTADDR=$(TEXTADDR)
|
||||
AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR)
|
||||
|
||||
# Object file lists.
|
||||
|
||||
obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \
|
||||
|
@ -16,10 +15,10 @@ obj-$(CONFIG_MODULES) += nds32_ksyms.o module.o
|
|||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
|
||||
obj-$(CONFIG_OF) += devtree.o
|
||||
obj-$(CONFIG_CACHE_L2) += atl2c.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
|
||||
|
||||
extra-y := head.o vmlinux.lds
|
||||
|
||||
|
||||
obj-y += vdso/
|
||||
|
||||
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -9,6 +9,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
@ -169,8 +170,6 @@ good_area:
|
|||
mask = VM_EXEC;
|
||||
else {
|
||||
mask = VM_READ | VM_WRITE;
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
}
|
||||
} else if (entry == ENTRY_TLB_MISC) {
|
||||
switch (error_code & ITYPE_mskETYPE) {
|
||||
|
@ -231,11 +230,17 @@ good_area:
|
|||
* attempt. If we go through a retry, it is extremely likely that the
|
||||
* page will be found in page cache at that point.
|
||||
*/
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||
if (fault & VM_FAULT_MAJOR)
|
||||
if (fault & VM_FAULT_MAJOR) {
|
||||
tsk->maj_flt++;
|
||||
else
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
||||
1, regs, addr);
|
||||
} else {
|
||||
tsk->min_flt++;
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
||||
1, regs, addr);
|
||||
}
|
||||
if (fault & VM_FAULT_RETRY) {
|
||||
flags &= ~FAULT_FLAG_ALLOW_RETRY;
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include "../../arch/ia64/include/asm/barrier.h"
|
||||
#elif defined(__xtensa__)
|
||||
#include "../../arch/xtensa/include/asm/barrier.h"
|
||||
#elif defined(__nds32__)
|
||||
#include "../../arch/nds32/include/asm/barrier.h"
|
||||
#else
|
||||
#include <asm-generic/barrier.h>
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
libperf-y += util/
|
|
@ -0,0 +1 @@
|
|||
libperf-y += header.o
|
|
@ -0,0 +1,29 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2005-2017 Andes Technology Corporation
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <api/fs/fs.h>
|
||||
#include "header.h"
|
||||
|
||||
#define STR_LEN 1024
|
||||
|
||||
char *get_cpuid_str(struct perf_pmu *pmu)
|
||||
{
|
||||
/* In nds32, we only have one cpu */
|
||||
char *buf = NULL;
|
||||
struct cpu_map *cpus;
|
||||
const char *sysfs = sysfs__mountpoint();
|
||||
|
||||
if (!sysfs || !pmu || !pmu->cpus)
|
||||
return NULL;
|
||||
|
||||
buf = malloc(STR_LEN);
|
||||
if (!buf)
|
||||
return NULL;
|
||||
|
||||
cpus = cpu_map__get(pmu->cpus);
|
||||
sprintf(buf, "0x%x", cpus->nr - 1);
|
||||
cpu_map__put(cpus);
|
||||
return buf;
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
# Format:
|
||||
# MIDR,Version,JSON/file/pathname,Type
|
||||
#
|
||||
# where
|
||||
# MIDR Processor version
|
||||
# Variant[23:20] and Revision [3:0] should be zero.
|
||||
# Version could be used to track version of of JSON file
|
||||
# but currently unused.
|
||||
# JSON/file/pathname is the path to JSON file, relative
|
||||
# to tools/perf/pmu-events/arch/arm64/.
|
||||
# Type is core, uncore etc
|
||||
#
|
||||
#
|
||||
#Family-model,Version,Filename,EventType
|
||||
0x0,v3,n13,core
|
|
|
@ -0,0 +1,290 @@
|
|||
[
|
||||
{
|
||||
"PublicDescription": "Conditional branch",
|
||||
"EventCode": "0x102",
|
||||
"EventName": "cond_br",
|
||||
"BriefDescription": "V3 Conditional branch"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Taken conditional branches",
|
||||
"EventCode": "0x103",
|
||||
"EventName": "taken_cond_br",
|
||||
"BriefDescription": "V3 Taken Conditional branch"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Prefetch Instruction",
|
||||
"EventCode": "0x104",
|
||||
"EventName": "prefetch_inst",
|
||||
"BriefDescription": "V3 Prefetch Instruction"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "RET Inst",
|
||||
"EventCode": "0x105",
|
||||
"EventName": "ret_inst",
|
||||
"BriefDescription": "V3 RET Inst"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "JR(non-RET) instructions",
|
||||
"EventCode": "0x106",
|
||||
"EventName": "jr_inst",
|
||||
"BriefDescription": "V3 JR(non-RET) instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "JAL/JRAL instructions",
|
||||
"EventCode": "0x107",
|
||||
"EventName": "jal_jral_inst",
|
||||
"BriefDescription": "V3 JAL/JRAL instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "NOP instructions",
|
||||
"EventCode": "0x108",
|
||||
"EventName": "nop_inst",
|
||||
"BriefDescription": "V3 NOP instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "SCW instructions",
|
||||
"EventCode": "0x109",
|
||||
"EventName": "scw_inst",
|
||||
"BriefDescription": "V3 SCW instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "ISB/DSB instructions",
|
||||
"EventCode": "0x10a",
|
||||
"EventName": "isb_dsb_inst",
|
||||
"BriefDescription": "V3 ISB/DSB instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "CCTL instructions",
|
||||
"EventCode": "0x10b",
|
||||
"EventName": "cctl_inst",
|
||||
"BriefDescription": "V3 CCTL instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Taken Interrupts",
|
||||
"EventCode": "0x10c",
|
||||
"EventName": "taken_interrupts",
|
||||
"BriefDescription": "V3 Taken Interrupts"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Loads Completed",
|
||||
"EventCode": "0x10d",
|
||||
"EventName": "load_completed",
|
||||
"BriefDescription": "V3 Loads Completed"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "uITLB accesses",
|
||||
"EventCode": "0x10e",
|
||||
"EventName": "uitlb_access",
|
||||
"BriefDescription": "V3 uITLB accesses"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "uDTLB accesses",
|
||||
"EventCode": "0x10f",
|
||||
"EventName": "udtlb_access",
|
||||
"BriefDescription": "V3 uDTLB accesses"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "MTLB accesses",
|
||||
"EventCode": "0x110",
|
||||
"EventName": "mtlb_access",
|
||||
"BriefDescription": "V3 MTLB accesses"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "DATA_DEPENDENCY_STALL_CYCLES",
|
||||
"EventCode": "0x112",
|
||||
"EventName": "data_dependency_stall",
|
||||
"BriefDescription": "V3 DATA_DEPENDENCY_STALL_CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "DATA_CACHE_MISS_STALL_CYCLES",
|
||||
"EventCode": "0x113",
|
||||
"EventName": "dcache_miss_stall",
|
||||
"BriefDescription": "V3 DATA_CACHE_MISS_STALL_CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "ILM access",
|
||||
"EventCode": "0x118",
|
||||
"EventName": "ilm_access",
|
||||
"BriefDescription": "V3 ILM accesses"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "LSU BIU CYCLES",
|
||||
"EventCode": "0x119",
|
||||
"EventName": "lsu_biu_cycles",
|
||||
"BriefDescription": "V3 LSU BIU CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "HPTWK BIU CYCLES",
|
||||
"EventCode": "0x11a",
|
||||
"EventName": "hptwk_biu_cycles",
|
||||
"BriefDescription": "V3 HPTWK BIU CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "DMA BIU CYCLES",
|
||||
"EventCode": "0x11b",
|
||||
"EventName": "dma_biu_cycles",
|
||||
"BriefDescription": "V3 DMA BIU CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "CODE CACHE FILL BIU CYCLES",
|
||||
"EventCode": "0x11c",
|
||||
"EventName": "icache_fill_biu_cycles",
|
||||
"BriefDescription": "V3 CODE CACHE FILL BIU CYCLES"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "LEAGAL UNALIGN DCACHE ACCESS",
|
||||
"EventCode": "0x11d",
|
||||
"EventName": "legal_unalined_dcache_access",
|
||||
"BriefDescription": "V3 LEAGAL UNALIGN DCACHE ACCESS"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "PUSH25 instructions",
|
||||
"EventCode": "0x11e",
|
||||
"EventName": "push25_inst",
|
||||
"BriefDescription": "V3 PUSH25 instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "SYSCALL instructions",
|
||||
"EventCode": "0x11f",
|
||||
"EventName": "syscall_inst",
|
||||
"BriefDescription": "V3 SYSCALL instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "conditional branch miss",
|
||||
"EventCode": "0x202",
|
||||
"EventName": "cond_br_miss",
|
||||
"BriefDescription": "V3 conditional branch miss"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "taken conditional branch miss",
|
||||
"EventCode": "0x203",
|
||||
"EventName": "taken_cond_br_miss",
|
||||
"BriefDescription": "V3 taken conditional branch miss"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Prefetch Instructions with cache hit",
|
||||
"EventCode": "0x204",
|
||||
"EventName": "prefetch_icache_hit",
|
||||
"BriefDescription": "V3 Prefetch Instructions with cache hit"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "RET mispredict",
|
||||
"EventCode": "0x205",
|
||||
"EventName": "ret_mispredict",
|
||||
"BriefDescription": "V3 RET mispredict"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Immediate J instructions",
|
||||
"EventCode": "0x206",
|
||||
"EventName": "imm_j_inst",
|
||||
"BriefDescription": "V3 Immediate J instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Multiply instructions",
|
||||
"EventCode": "0x207",
|
||||
"EventName": "mul_inst",
|
||||
"BriefDescription": "V3 Multiply instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "16 bits instructions",
|
||||
"EventCode": "0x208",
|
||||
"EventName": "sixteen_bits_inst",
|
||||
"BriefDescription": "V3 16 bits instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Failed SCW instructions",
|
||||
"EventCode": "0x209",
|
||||
"EventName": "fail_scw_inst",
|
||||
"BriefDescription": "V3 Failed SCW instructions"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "ld-after-st conflict replays",
|
||||
"EventCode": "0x20a",
|
||||
"EventName": "ld_af_st_conflict",
|
||||
"BriefDescription": "V3 ld-after-st conflict replays"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Exception taken",
|
||||
"EventCode": "0x20c",
|
||||
"EventName": "exception_taken",
|
||||
"BriefDescription": "V3 Exception taken"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Stores completed",
|
||||
"EventCode": "0x20d",
|
||||
"EventName": "store_completed",
|
||||
"BriefDescription": "V3 Stores completed"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "uITLB miss",
|
||||
"EventCode": "0x20e",
|
||||
"EventName": "uitlb_miss",
|
||||
"BriefDescription": "V3 uITLB miss"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "uDTLB miss",
|
||||
"EventCode": "0x20f",
|
||||
"EventName": "udtlb_miss",
|
||||
"BriefDescription": "V3 uDTLB miss"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "MTLB miss",
|
||||
"EventCode": "0x210",
|
||||
"EventName": "mtlb_miss",
|
||||
"BriefDescription": "V3 MTLB miss"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Empty instructions queue stall cycles",
|
||||
"EventCode": "0x212",
|
||||
"EventName": "empty_inst_q_stall",
|
||||
"BriefDescription": "V3 Empty instructions queue stall cycles"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Data write back",
|
||||
"EventCode": "0x213",
|
||||
"EventName": "data_wb",
|
||||
"BriefDescription": "V3 Data write back"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "DLM access",
|
||||
"EventCode": "0x218",
|
||||
"EventName": "dlm_access",
|
||||
"BriefDescription": "V3 DLM access"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "LSU BIU request",
|
||||
"EventCode": "0x219",
|
||||
"EventName": "lsu_biu_req",
|
||||
"BriefDescription": "V3 LSU BIU request"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "HPTWK BIU request",
|
||||
"EventCode": "0x21a",
|
||||
"EventName": "hptwk_biu_req",
|
||||
"BriefDescription": "V3 HPTWK BIU request"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "DMA BIU request",
|
||||
"EventCode": "0x21b",
|
||||
"EventName": "dma_biu_req",
|
||||
"BriefDescription": "V3 DMA BIU request"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "Icache fill BIU request",
|
||||
"EventCode": "0x21c",
|
||||
"EventName": "icache_fill_biu_req",
|
||||
"BriefDescription": "V3 Icache fill BIU request"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "External events",
|
||||
"EventCode": "0x21d",
|
||||
"EventName": "external_events",
|
||||
"BriefDescription": "V3 External events"
|
||||
},
|
||||
{
|
||||
"PublicDescription": "POP25 instructions",
|
||||
"EventCode": "0x21e",
|
||||
"EventName": "pop25_inst",
|
||||
"BriefDescription": "V3 POP25 instructions"
|
||||
},
|
||||
]
|
Loading…
Reference in New Issue