ARC updates for 4.3
-perf support for ARCv2 based cores (sampling interrupt, SMP) -leftovers for ARCv2 support -Futex fixes -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQIcBAABAgAGBQJV5XSqAAoJEGnX8d3iisJe36kQAKMFXyx9hEXGEmQtgj5s4luT 0yBRnLBGNVYuaC+yAKjDKUMSKMQC5C7yjQ6oYgPu3cVNcWfVDsEDeEEL2BOcu0tB 45qFzgmgXN73xvj88CmLsBaXhG/qPNzJ8aARMyKMnmW/U04oM0CoOS/QhzrZZors R8gSYVkXh/qzYpcczU6G3Pl/s88bPSxh1vW7J4y8JscvnIsNmHqsBEx03UCchdMz 3wem0/xtM3TWuYPZoHMsx7o1isSr/hGyPBZZgiqWU/WC4gi+i9PBhmj3xQtK06FB uc2tIAc7YMs9zJR03o66eKJuYzqX3aWIRyz+OuEYwgrdduu7oXG7UtYd6tDMcko3 1olv9pSl7Ya7z2rvRV7p8//KkZM4K7rvvA1UeVz1OZM6XX0PhG9b9Jepbk1kTzqr rI8p75AXmBTr+K4D2Kt6B0tJAqnG88xDMNiGPo9A/UIuVsoHXz3JHFECBxQNFrH/ 2VURR+RZbfGLsGXkzOgLF6EkxawKg/zbqr8841aGW8gXt8NPra8CYpkODIsSeBpB WyTdQ/Wq3EVLjmX5d+oscAxUCvDWc/IP3c55p1W2DUQlAweeFXCIG+b+HpMF2SZn yjB6sjsdPZw29EzLTKKHYBOb8fMS6WeQuZtiM9g9fysvtZK7fwjSwYpq64mOMRaf hShcvP4QgOn7pPo6twAx =wem3 -----END PGP SIGNATURE----- Merge tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc Pull ARC architecture updates from Vineet Gupta: "ARC updates for 4.3: - perf support for ARCv2 based cores (sampling interrupt, SMP) - leftovers for ARCv2 support - futex fixes" * tag 'arc-4.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc: ARCv2: entry: Fix reserved handler ARCv2: perf: Finally introduce HS perf unit ARCv2: perf: SMP support ARCv2: perf: implement exclusion of event counting in user or kernel mode ARCv2: perf: Support sampling events using overflow interrupts ARCv2: perf: implement "event_set_period" ARC: perf: cap the number of counters to hardware max of 32 ARC: Eliminate some ARCv2 specific code for ARCompact build ARC: add/fix some comments in code - no functional change ARC: change some branchs to jumps to resolve linkage errors ARC: ensure futex ops are atomic in !LLSC config ARC: Enable HAVE_FUTEX_CMPXCHG ARC: make futex_atomic_cmpxchg_inatomic() return bimodal ARC: futex cosmetics ARC: add barriers to futex code ARCv2: IOC: Allow boot time disable ARCv2: SLC: Allow boot time disable ARCv2: Support IO Coherency and permutations involving L1 and L2 caches ARC: Enable optimistic spinning for LLSC config MAINTAINERS: add git tree for the arc architecture
This commit is contained in:
commit
28dce7c770
|
@ -0,0 +1,17 @@
|
|||
* ARC HS Performance Counters
|
||||
|
||||
The ARC HS can be configured with a pipeline performance monitor for counting
|
||||
CPU and cache events like cache misses and hits. Like conventional PCT there
|
||||
are 100+ hardware conditions dynamically mapped to upto 32 counters.
|
||||
It also supports overflow interrupts.
|
||||
|
||||
Required properties:
|
||||
|
||||
- compatible : should contain
|
||||
"snps,archs-pct"
|
||||
|
||||
Example:
|
||||
|
||||
pmu {
|
||||
compatible = "snps,archs-pct";
|
||||
};
|
|
@ -9911,8 +9911,9 @@ SYNOPSYS ARC ARCHITECTURE
|
|||
M: Vineet Gupta <vgupta@synopsys.com>
|
||||
S: Supported
|
||||
F: arch/arc/
|
||||
F: Documentation/devicetree/bindings/arc/
|
||||
F: Documentation/devicetree/bindings/arc/*
|
||||
F: drivers/tty/serial/arc_uart.c
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
|
||||
|
||||
SYNOPSYS ARC SDP platform support
|
||||
M: Alexey Brodkin <abrodkin@synopsys.com>
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
config ARC
|
||||
def_bool y
|
||||
select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC
|
||||
select BUILDTIME_EXTABLE_SORT
|
||||
select COMMON_CLK
|
||||
select CLONE_BACKWARDS
|
||||
|
@ -22,6 +23,7 @@ config ARC
|
|||
select GENERIC_SMP_IDLE_THREAD
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_TRACEHOOK
|
||||
select HAVE_FUTEX_CMPXCHG
|
||||
select HAVE_IOREMAP_PROT
|
||||
select HAVE_KPROBES
|
||||
select HAVE_KRETPROBES
|
||||
|
|
|
@ -72,12 +72,13 @@
|
|||
};
|
||||
|
||||
/*
|
||||
* This INTC is actually connected to DW APB GPIO
|
||||
* which acts as a wire between MB INTC and CPU INTC.
|
||||
* GPIO INTC is configured in platform init code
|
||||
* and here we mimic direct connection from MB INTC to
|
||||
* CPU INTC, thus we set "interrupts = <7>" instead of
|
||||
* "interrupts = <12>"
|
||||
* The DW APB ICTL intc on MB is connected to CPU intc via a
|
||||
* DT "invisible" DW APB GPIO block, configured to simply pass thru
|
||||
* interrupts - setup accordinly in platform init (plat-axs10x/ax10x.c)
|
||||
*
|
||||
* So here we mimic a direct connection betwen them, ignoring the
|
||||
* ABPG GPIO. Thus set "interrupts = <24>" (DW APB GPIO to core)
|
||||
* instead of "interrupts = <12>" (DW APB ICTL to DW APB GPIO)
|
||||
*
|
||||
* This intc actually resides on MB, but we move it here to
|
||||
* avoid duplicating the MB dtsi file given that IRQ from
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#define ARC_REG_RTT_BCR 0xF2
|
||||
#define ARC_REG_IRQ_BCR 0xF3
|
||||
#define ARC_REG_SMART_BCR 0xFF
|
||||
#define ARC_REG_CLUSTER_BCR 0xcf
|
||||
|
||||
/* status32 Bits Positions */
|
||||
#define STATUS_AE_BIT 5 /* Exception active */
|
||||
|
|
|
@ -53,6 +53,8 @@ extern void arc_cache_init(void);
|
|||
extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
|
||||
extern void read_decode_cache_bcr(void);
|
||||
|
||||
extern int ioc_exists;
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
|
||||
/* Instruction cache related Auxiliary registers */
|
||||
|
@ -94,4 +96,10 @@ extern void read_decode_cache_bcr(void);
|
|||
#define SLC_CTRL_BUSY 0x100
|
||||
#define SLC_CTRL_RGN_OP_INV 0x200
|
||||
|
||||
/* IO coherency related Auxiliary registers */
|
||||
#define ARC_REG_IO_COH_ENABLE 0x500
|
||||
#define ARC_REG_IO_COH_PARTIAL 0x501
|
||||
#define ARC_REG_IO_COH_AP0_BASE 0x508
|
||||
#define ARC_REG_IO_COH_AP0_SIZE 0x509
|
||||
|
||||
#endif /* _ASM_CACHE_H */
|
||||
|
|
|
@ -110,18 +110,18 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
|
|||
sizeof(*(ptr))))
|
||||
|
||||
/*
|
||||
* On ARC700, EX insn is inherently atomic, so by default "vanilla" xchg() need
|
||||
* not require any locking. However there's a quirk.
|
||||
* ARC lacks native CMPXCHG, thus emulated (see above), using external locking -
|
||||
* incidently it "reuses" the same atomic_ops_lock used by atomic APIs.
|
||||
* Now, llist code uses cmpxchg() and xchg() on same data, so xchg() needs to
|
||||
* abide by same serializing rules, thus ends up using atomic_ops_lock as well.
|
||||
* xchg() maps directly to ARC EX instruction which guarantees atomicity.
|
||||
* However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock
|
||||
* due to a subtle reason:
|
||||
* - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot
|
||||
* of kernel code which calls xchg()/cmpxchg() on same data (see llist.h)
|
||||
* Hence xchg() needs to follow same locking rules.
|
||||
*
|
||||
* This however is only relevant if SMP and/or ARC lacks LLSC
|
||||
* if (UP or LLSC)
|
||||
* xchg doesn't need serialization
|
||||
* else <==> !(UP or LLSC) <==> (!UP and !LLSC) <==> (SMP and !LLSC)
|
||||
* xchg needs serialization
|
||||
* Technically the lock is also needed for UP (boils down to irq save/restore)
|
||||
* but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
|
||||
* be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
|
||||
* Other way around, xchg is one instruction anyways, so can't be interrupted
|
||||
* as such
|
||||
*/
|
||||
|
||||
#if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP)
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
|
||||
\
|
||||
smp_mb(); \
|
||||
__asm__ __volatile__( \
|
||||
"1: llock %1, [%2] \n" \
|
||||
insn "\n" \
|
||||
|
@ -30,7 +31,7 @@
|
|||
" .section .fixup,\"ax\" \n" \
|
||||
" .align 4 \n" \
|
||||
"4: mov %0, %4 \n" \
|
||||
" b 3b \n" \
|
||||
" j 3b \n" \
|
||||
" .previous \n" \
|
||||
" .section __ex_table,\"a\" \n" \
|
||||
" .align 4 \n" \
|
||||
|
@ -40,12 +41,14 @@
|
|||
\
|
||||
: "=&r" (ret), "=&r" (oldval) \
|
||||
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
|
||||
: "cc", "memory")
|
||||
: "cc", "memory"); \
|
||||
smp_mb() \
|
||||
|
||||
#else /* !CONFIG_ARC_HAS_LLSC */
|
||||
|
||||
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)\
|
||||
\
|
||||
smp_mb(); \
|
||||
__asm__ __volatile__( \
|
||||
"1: ld %1, [%2] \n" \
|
||||
insn "\n" \
|
||||
|
@ -55,7 +58,7 @@
|
|||
" .section .fixup,\"ax\" \n" \
|
||||
" .align 4 \n" \
|
||||
"4: mov %0, %4 \n" \
|
||||
" b 3b \n" \
|
||||
" j 3b \n" \
|
||||
" .previous \n" \
|
||||
" .section __ex_table,\"a\" \n" \
|
||||
" .align 4 \n" \
|
||||
|
@ -65,7 +68,8 @@
|
|||
\
|
||||
: "=&r" (ret), "=&r" (oldval) \
|
||||
: "r" (uaddr), "r" (oparg), "ir" (-EFAULT) \
|
||||
: "cc", "memory")
|
||||
: "cc", "memory"); \
|
||||
smp_mb() \
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -83,6 +87,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
|
|||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
|
||||
#endif
|
||||
pagefault_disable();
|
||||
|
||||
switch (op) {
|
||||
|
@ -90,6 +97,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
|
|||
__futex_atomic_op("mov %0, %3", ret, oldval, uaddr, oparg);
|
||||
break;
|
||||
case FUTEX_OP_ADD:
|
||||
/* oldval = *uaddr; *uaddr += oparg ; ret = *uaddr */
|
||||
__futex_atomic_op("add %0, %1, %3", ret, oldval, uaddr, oparg);
|
||||
break;
|
||||
case FUTEX_OP_OR:
|
||||
|
@ -106,6 +114,9 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
|
|||
}
|
||||
|
||||
pagefault_enable();
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
preempt_enable();
|
||||
#endif
|
||||
|
||||
if (!ret) {
|
||||
switch (cmp) {
|
||||
|
@ -134,54 +145,57 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Compare-xchg with pagefaults disabled.
|
||||
* Notes:
|
||||
* -Best-Effort: Exchg happens only if compare succeeds.
|
||||
* If compare fails, returns; leaving retry/looping to upper layers
|
||||
* -successful cmp-xchg: return orig value in @addr (same as cmp val)
|
||||
* -Compare fails: return orig value in @addr
|
||||
* -user access r/w fails: return -EFAULT
|
||||
/*
|
||||
* cmpxchg of futex (pagefaults disabled by caller)
|
||||
* Return 0 for success, -EFAULT otherwise
|
||||
*/
|
||||
static inline int
|
||||
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
|
||||
u32 newval)
|
||||
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 expval,
|
||||
u32 newval)
|
||||
{
|
||||
u32 val;
|
||||
int ret = 0;
|
||||
u32 existval;
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
|
||||
return -EFAULT;
|
||||
|
||||
pagefault_disable();
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
preempt_disable(); /* to guarantee atomic r-m-w of futex op */
|
||||
#endif
|
||||
smp_mb();
|
||||
|
||||
__asm__ __volatile__(
|
||||
#ifdef CONFIG_ARC_HAS_LLSC
|
||||
"1: llock %0, [%3] \n"
|
||||
" brne %0, %1, 3f \n"
|
||||
"2: scond %2, [%3] \n"
|
||||
"1: llock %1, [%4] \n"
|
||||
" brne %1, %2, 3f \n"
|
||||
"2: scond %3, [%4] \n"
|
||||
" bnz 1b \n"
|
||||
#else
|
||||
"1: ld %0, [%3] \n"
|
||||
" brne %0, %1, 3f \n"
|
||||
"2: st %2, [%3] \n"
|
||||
"1: ld %1, [%4] \n"
|
||||
" brne %1, %2, 3f \n"
|
||||
"2: st %3, [%4] \n"
|
||||
#endif
|
||||
"3: \n"
|
||||
" .section .fixup,\"ax\" \n"
|
||||
"4: mov %0, %4 \n"
|
||||
" b 3b \n"
|
||||
"4: mov %0, %5 \n"
|
||||
" j 3b \n"
|
||||
" .previous \n"
|
||||
" .section __ex_table,\"a\" \n"
|
||||
" .align 4 \n"
|
||||
" .word 1b, 4b \n"
|
||||
" .word 2b, 4b \n"
|
||||
" .previous\n"
|
||||
: "=&r"(val)
|
||||
: "r"(oldval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
|
||||
: "+&r"(ret), "=&r"(existval)
|
||||
: "r"(expval), "r"(newval), "r"(uaddr), "ir"(-EFAULT)
|
||||
: "cc", "memory");
|
||||
|
||||
pagefault_enable();
|
||||
smp_mb();
|
||||
|
||||
*uval = val;
|
||||
return val;
|
||||
#ifndef CONFIG_ARC_HAS_LLSC
|
||||
preempt_enable();
|
||||
#endif
|
||||
*uval = existval;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
/*
|
||||
* Linux performance counter support for ARC
|
||||
*
|
||||
* Copyright (C) 2014-2015 Synopsys, Inc. (www.synopsys.com)
|
||||
* Copyright (C) 2011-2013 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
|
@ -12,8 +13,8 @@
|
|||
#ifndef __ASM_PERF_EVENT_H
|
||||
#define __ASM_PERF_EVENT_H
|
||||
|
||||
/* real maximum varies per CPU, this is the maximum supported by the driver */
|
||||
#define ARC_PMU_MAX_HWEVENTS 64
|
||||
/* Max number of counters that PCT block may ever have */
|
||||
#define ARC_PERF_MAX_COUNTERS 32
|
||||
|
||||
#define ARC_REG_CC_BUILD 0xF6
|
||||
#define ARC_REG_CC_INDEX 0x240
|
||||
|
@ -28,15 +29,22 @@
|
|||
#define ARC_REG_PCT_CONFIG 0x254
|
||||
#define ARC_REG_PCT_CONTROL 0x255
|
||||
#define ARC_REG_PCT_INDEX 0x256
|
||||
#define ARC_REG_PCT_INT_CNTL 0x25C
|
||||
#define ARC_REG_PCT_INT_CNTH 0x25D
|
||||
#define ARC_REG_PCT_INT_CTRL 0x25E
|
||||
#define ARC_REG_PCT_INT_ACT 0x25F
|
||||
|
||||
#define ARC_REG_PCT_CONFIG_USER (1 << 18) /* count in user mode */
|
||||
#define ARC_REG_PCT_CONFIG_KERN (1 << 19) /* count in kernel mode */
|
||||
|
||||
#define ARC_REG_PCT_CONTROL_CC (1 << 16) /* clear counts */
|
||||
#define ARC_REG_PCT_CONTROL_SN (1 << 17) /* snapshot */
|
||||
|
||||
struct arc_reg_pct_build {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int m:8, c:8, r:6, s:2, v:8;
|
||||
unsigned int m:8, c:8, r:5, i:1, s:2, v:8;
|
||||
#else
|
||||
unsigned int v:8, s:2, r:6, c:8, m:8;
|
||||
unsigned int v:8, s:2, i:1, r:5, c:8, m:8;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -95,10 +103,13 @@ static const char * const arc_pmu_ev_hw_map[] = {
|
|||
|
||||
/* counts condition */
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = "iall",
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp", /* Excludes ZOL jumps */
|
||||
[PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = "bpmp",
|
||||
#else
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
|
||||
|
||||
#endif
|
||||
[PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */
|
||||
[PERF_COUNT_ARC_STC] = "imemwrc", /* Instr: mem write cached */
|
||||
|
||||
|
|
|
@ -57,13 +57,8 @@ VECTOR handle_interrupt ; (23) End of fixed IRQs
|
|||
|
||||
.section .text, "ax",@progbits
|
||||
|
||||
res_service: ; processor restart
|
||||
flag 0x1 ; not implemented
|
||||
nop
|
||||
nop
|
||||
|
||||
reserved: ; processor restart
|
||||
rtie ; jump to processor initializations
|
||||
reserved:
|
||||
flag 1 ; Unexpected event, halt
|
||||
|
||||
;##################### Interrupt Handling ##############################
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ ENTRY(ret_from_fork)
|
|||
; when the forked child comes here from the __switch_to function
|
||||
; r0 has the last task pointer.
|
||||
; put last task in scheduler queue
|
||||
bl @schedule_tail
|
||||
jl @schedule_tail
|
||||
|
||||
ld r9, [sp, PT_status32]
|
||||
brne r9, 0, 1f
|
||||
|
@ -320,7 +320,7 @@ resume_user_mode_begin:
|
|||
; --- (Slow Path #1) task preemption ---
|
||||
bbit0 r9, TIF_NEED_RESCHED, .Lchk_pend_signals
|
||||
mov blink, resume_user_mode_begin ; tail-call to U mode ret chks
|
||||
b @schedule ; BTST+Bnz causes relo error in link
|
||||
j @schedule ; BTST+Bnz causes relo error in link
|
||||
|
||||
.Lchk_pend_signals:
|
||||
IRQ_ENABLE r10
|
||||
|
@ -381,7 +381,7 @@ resume_kernel_mode:
|
|||
bbit0 r9, TIF_NEED_RESCHED, .Lrestore_regs
|
||||
|
||||
; Invoke PREEMPTION
|
||||
bl preempt_schedule_irq
|
||||
jl preempt_schedule_irq
|
||||
|
||||
; preempt_schedule_irq() always returns with IRQ disabled
|
||||
#endif
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Linux performance counter support for ARC700 series
|
||||
*
|
||||
* Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com)
|
||||
* Copyright (C) 2013-2015 Synopsys, Inc. (www.synopsys.com)
|
||||
*
|
||||
* This code is inspired by the perf support of various other architectures.
|
||||
*
|
||||
|
@ -11,6 +11,7 @@
|
|||
*
|
||||
*/
|
||||
#include <linux/errno.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
@ -20,12 +21,25 @@
|
|||
|
||||
struct arc_pmu {
|
||||
struct pmu pmu;
|
||||
int counter_size; /* in bits */
|
||||
unsigned int irq;
|
||||
int n_counters;
|
||||
unsigned long used_mask[BITS_TO_LONGS(ARC_PMU_MAX_HWEVENTS)];
|
||||
u64 max_period;
|
||||
int ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
|
||||
};
|
||||
|
||||
struct arc_pmu_cpu {
|
||||
/*
|
||||
* A 1 bit for an index indicates that the counter is being used for
|
||||
* an event. A 0 means that the counter can be used.
|
||||
*/
|
||||
unsigned long used_mask[BITS_TO_LONGS(ARC_PERF_MAX_COUNTERS)];
|
||||
|
||||
/*
|
||||
* The events that are active on the PMU for the given index.
|
||||
*/
|
||||
struct perf_event *act_counter[ARC_PERF_MAX_COUNTERS];
|
||||
};
|
||||
|
||||
struct arc_callchain_trace {
|
||||
int depth;
|
||||
void *perf_stuff;
|
||||
|
@ -65,6 +79,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
|||
}
|
||||
|
||||
static struct arc_pmu *arc_pmu;
|
||||
static DEFINE_PER_CPU(struct arc_pmu_cpu, arc_pmu_cpu);
|
||||
|
||||
/* read counter #idx; note that counter# != event# on ARC! */
|
||||
static uint64_t arc_pmu_read_counter(int idx)
|
||||
|
@ -88,18 +103,15 @@ static uint64_t arc_pmu_read_counter(int idx)
|
|||
static void arc_perf_event_update(struct perf_event *event,
|
||||
struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
uint64_t prev_raw_count, new_raw_count;
|
||||
int64_t delta;
|
||||
|
||||
do {
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
new_raw_count = arc_pmu_read_counter(idx);
|
||||
} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count);
|
||||
|
||||
delta = (new_raw_count - prev_raw_count) &
|
||||
((1ULL << arc_pmu->counter_size) - 1ULL);
|
||||
uint64_t prev_raw_count = local64_read(&hwc->prev_count);
|
||||
uint64_t new_raw_count = arc_pmu_read_counter(idx);
|
||||
int64_t delta = new_raw_count - prev_raw_count;
|
||||
|
||||
/*
|
||||
* We don't afaraid of hwc->prev_count changing beneath our feet
|
||||
* because there's no way for us to re-enter this function anytime.
|
||||
*/
|
||||
local64_set(&hwc->prev_count, new_raw_count);
|
||||
local64_add(delta, &event->count);
|
||||
local64_sub(delta, &hwc->period_left);
|
||||
}
|
||||
|
@ -142,22 +154,41 @@ static int arc_pmu_event_init(struct perf_event *event)
|
|||
struct hw_perf_event *hwc = &event->hw;
|
||||
int ret;
|
||||
|
||||
if (!is_sampling_event(event)) {
|
||||
hwc->sample_period = arc_pmu->max_period;
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
}
|
||||
|
||||
hwc->config = 0;
|
||||
|
||||
if (is_isa_arcv2()) {
|
||||
/* "exclude user" means "count only kernel" */
|
||||
if (event->attr.exclude_user)
|
||||
hwc->config |= ARC_REG_PCT_CONFIG_KERN;
|
||||
|
||||
/* "exclude kernel" means "count only user" */
|
||||
if (event->attr.exclude_kernel)
|
||||
hwc->config |= ARC_REG_PCT_CONFIG_USER;
|
||||
}
|
||||
|
||||
switch (event->attr.type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
if (event->attr.config >= PERF_COUNT_HW_MAX)
|
||||
return -ENOENT;
|
||||
if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
|
||||
return -ENOENT;
|
||||
hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
|
||||
hwc->config |= arc_pmu->ev_hw_idx[event->attr.config];
|
||||
pr_debug("init event %d with h/w %d \'%s\'\n",
|
||||
(int) event->attr.config, (int) hwc->config,
|
||||
arc_pmu_ev_hw_map[event->attr.config]);
|
||||
return 0;
|
||||
|
||||
case PERF_TYPE_HW_CACHE:
|
||||
ret = arc_pmu_cache_event(event->attr.config);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
hwc->config = arc_pmu->ev_hw_idx[ret];
|
||||
hwc->config |= arc_pmu->ev_hw_idx[ret];
|
||||
return 0;
|
||||
default:
|
||||
return -ENOENT;
|
||||
|
@ -180,6 +211,47 @@ static void arc_pmu_disable(struct pmu *pmu)
|
|||
write_aux_reg(ARC_REG_PCT_CONTROL, (tmp & 0xffff0000) | 0x0);
|
||||
}
|
||||
|
||||
static int arc_pmu_event_set_period(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
s64 left = local64_read(&hwc->period_left);
|
||||
s64 period = hwc->sample_period;
|
||||
int idx = hwc->idx;
|
||||
int overflow = 0;
|
||||
u64 value;
|
||||
|
||||
if (unlikely(left <= -period)) {
|
||||
/* left underflowed by more than period. */
|
||||
left = period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
} else if (unlikely(left <= 0)) {
|
||||
/* left underflowed by less than period. */
|
||||
left += period;
|
||||
local64_set(&hwc->period_left, left);
|
||||
hwc->last_period = period;
|
||||
overflow = 1;
|
||||
}
|
||||
|
||||
if (left > arc_pmu->max_period)
|
||||
left = arc_pmu->max_period;
|
||||
|
||||
value = arc_pmu->max_period - left;
|
||||
local64_set(&hwc->prev_count, value);
|
||||
|
||||
/* Select counter */
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
|
||||
/* Write value */
|
||||
write_aux_reg(ARC_REG_PCT_COUNTL, (u32)value);
|
||||
write_aux_reg(ARC_REG_PCT_COUNTH, (value >> 32));
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
return overflow;
|
||||
}
|
||||
|
||||
/*
|
||||
* Assigns hardware counter to hardware condition.
|
||||
* Note that there is no separate start/stop mechanism;
|
||||
|
@ -194,13 +266,20 @@ static void arc_pmu_start(struct perf_event *event, int flags)
|
|||
return;
|
||||
|
||||
if (flags & PERF_EF_RELOAD)
|
||||
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
|
||||
event->hw.state = 0;
|
||||
hwc->state = 0;
|
||||
|
||||
arc_pmu_event_set_period(event);
|
||||
|
||||
/* Enable interrupt for this counter */
|
||||
if (is_sampling_event(event))
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
|
||||
|
||||
/* enable ARC pmu here */
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config);
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx); /* counter # */
|
||||
write_aux_reg(ARC_REG_PCT_CONFIG, hwc->config); /* condition */
|
||||
}
|
||||
|
||||
static void arc_pmu_stop(struct perf_event *event, int flags)
|
||||
|
@ -208,6 +287,17 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
|
|||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
/* Disable interrupt for this counter */
|
||||
if (is_sampling_event(event)) {
|
||||
/*
|
||||
* Reset interrupt flag by writing of 1. This is required
|
||||
* to make sure pending interrupt was not left.
|
||||
*/
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) & ~(1 << idx));
|
||||
}
|
||||
|
||||
if (!(event->hw.state & PERF_HES_STOPPED)) {
|
||||
/* stop ARC pmu here */
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
|
@ -227,8 +317,12 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
|
|||
|
||||
static void arc_pmu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
|
||||
|
||||
arc_pmu_stop(event, PERF_EF_UPDATE);
|
||||
__clear_bit(event->hw.idx, arc_pmu->used_mask);
|
||||
__clear_bit(event->hw.idx, pmu_cpu->used_mask);
|
||||
|
||||
pmu_cpu->act_counter[event->hw.idx] = 0;
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
@ -236,20 +330,31 @@ static void arc_pmu_del(struct perf_event *event, int flags)
|
|||
/* allocate hardware counter and optionally start counting */
|
||||
static int arc_pmu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (__test_and_set_bit(idx, arc_pmu->used_mask)) {
|
||||
idx = find_first_zero_bit(arc_pmu->used_mask,
|
||||
if (__test_and_set_bit(idx, pmu_cpu->used_mask)) {
|
||||
idx = find_first_zero_bit(pmu_cpu->used_mask,
|
||||
arc_pmu->n_counters);
|
||||
if (idx == arc_pmu->n_counters)
|
||||
return -EAGAIN;
|
||||
|
||||
__set_bit(idx, arc_pmu->used_mask);
|
||||
__set_bit(idx, pmu_cpu->used_mask);
|
||||
hwc->idx = idx;
|
||||
}
|
||||
|
||||
write_aux_reg(ARC_REG_PCT_INDEX, idx);
|
||||
|
||||
pmu_cpu->act_counter[idx] = event;
|
||||
|
||||
if (is_sampling_event(event)) {
|
||||
/* Mimic full counter overflow as other arches do */
|
||||
write_aux_reg(ARC_REG_PCT_INT_CNTL, (u32)arc_pmu->max_period);
|
||||
write_aux_reg(ARC_REG_PCT_INT_CNTH,
|
||||
(arc_pmu->max_period >> 32));
|
||||
}
|
||||
|
||||
write_aux_reg(ARC_REG_PCT_CONFIG, 0);
|
||||
write_aux_reg(ARC_REG_PCT_COUNTL, 0);
|
||||
write_aux_reg(ARC_REG_PCT_COUNTH, 0);
|
||||
|
@ -264,11 +369,82 @@ static int arc_pmu_add(struct perf_event *event, int flags)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ISA_ARCV2
|
||||
static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
|
||||
struct pt_regs *regs;
|
||||
int active_ints;
|
||||
int idx;
|
||||
|
||||
arc_pmu_disable(&arc_pmu->pmu);
|
||||
|
||||
active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT);
|
||||
|
||||
regs = get_irq_regs();
|
||||
|
||||
for (idx = 0; idx < arc_pmu->n_counters; idx++) {
|
||||
struct perf_event *event = pmu_cpu->act_counter[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
if (!(active_ints & (1 << idx)))
|
||||
continue;
|
||||
|
||||
/* Reset interrupt flag by writing of 1 */
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx);
|
||||
|
||||
/*
|
||||
* On reset of "interrupt active" bit corresponding
|
||||
* "interrupt enable" bit gets automatically reset as well.
|
||||
* Now we need to re-enable interrupt for the counter.
|
||||
*/
|
||||
write_aux_reg(ARC_REG_PCT_INT_CTRL,
|
||||
read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx));
|
||||
|
||||
hwc = &event->hw;
|
||||
|
||||
WARN_ON_ONCE(hwc->idx != idx);
|
||||
|
||||
arc_perf_event_update(event, &event->hw, event->hw.idx);
|
||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||
if (!arc_pmu_event_set_period(event))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
arc_pmu_stop(event, 0);
|
||||
}
|
||||
|
||||
arc_pmu_enable(&arc_pmu->pmu);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
#else
|
||||
|
||||
static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
||||
{
|
||||
return IRQ_NONE;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_ISA_ARCV2 */
|
||||
|
||||
void arc_cpu_pmu_irq_init(void)
|
||||
{
|
||||
struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu);
|
||||
|
||||
arc_request_percpu_irq(arc_pmu->irq, smp_processor_id(), arc_pmu_intr,
|
||||
"ARC perf counters", pmu_cpu);
|
||||
|
||||
/* Clear all pending interrupt flags */
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
|
||||
}
|
||||
|
||||
static int arc_pmu_device_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct arc_reg_pct_build pct_bcr;
|
||||
struct arc_reg_cc_build cc_bcr;
|
||||
int i, j;
|
||||
int i, j, has_interrupts;
|
||||
int counter_size; /* in bits */
|
||||
|
||||
union cc_name {
|
||||
struct {
|
||||
|
@ -284,7 +460,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
pr_err("This core does not have performance counters!\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
|
||||
BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS);
|
||||
|
||||
READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
|
||||
BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
|
||||
|
@ -293,11 +469,16 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
if (!arc_pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
arc_pmu->n_counters = pct_bcr.c;
|
||||
arc_pmu->counter_size = 32 + (pct_bcr.s << 4);
|
||||
has_interrupts = is_isa_arcv2() ? pct_bcr.i : 0;
|
||||
|
||||
pr_info("ARC perf\t: %d counters (%d bits), %d countable conditions\n",
|
||||
arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
|
||||
arc_pmu->n_counters = pct_bcr.c;
|
||||
counter_size = 32 + (pct_bcr.s << 4);
|
||||
|
||||
arc_pmu->max_period = (1ULL << counter_size) / 2 - 1ULL;
|
||||
|
||||
pr_info("ARC perf\t: %d counters (%d bits), %d conditions%s\n",
|
||||
arc_pmu->n_counters, counter_size, cc_bcr.c,
|
||||
has_interrupts ? ", [overflow IRQ support]":"");
|
||||
|
||||
cc_name.str[8] = 0;
|
||||
for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
|
||||
|
@ -332,8 +513,37 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
.read = arc_pmu_read,
|
||||
};
|
||||
|
||||
/* ARC 700 PMU does not support sampling events */
|
||||
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
if (has_interrupts) {
|
||||
int irq = platform_get_irq(pdev, 0);
|
||||
unsigned long flags;
|
||||
|
||||
if (irq < 0) {
|
||||
pr_err("Cannot get IRQ number for the platform\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
arc_pmu->irq = irq;
|
||||
|
||||
/*
|
||||
* arc_cpu_pmu_irq_init() needs to be called on all cores for
|
||||
* their respective local PMU.
|
||||
* However we use opencoded on_each_cpu() to ensure it is called
|
||||
* on core0 first, so that arc_request_percpu_irq() sets up
|
||||
* AUTOEN etc. Otherwise enable_percpu_irq() fails to enable
|
||||
* perf IRQ on non master cores.
|
||||
* see arc_request_percpu_irq()
|
||||
*/
|
||||
preempt_disable();
|
||||
local_irq_save(flags);
|
||||
arc_cpu_pmu_irq_init();
|
||||
local_irq_restore(flags);
|
||||
smp_call_function((smp_call_func_t)arc_cpu_pmu_irq_init, 0, 1);
|
||||
preempt_enable();
|
||||
|
||||
/* Clean all pending interrupt flags */
|
||||
write_aux_reg(ARC_REG_PCT_INT_ACT, 0xffffffff);
|
||||
} else
|
||||
arc_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
|
||||
return perf_pmu_register(&arc_pmu->pmu, pdev->name, PERF_TYPE_RAW);
|
||||
}
|
||||
|
@ -341,6 +551,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
|
|||
#ifdef CONFIG_OF
|
||||
static const struct of_device_id arc_pmu_match[] = {
|
||||
{ .compatible = "snps,arc700-pct" },
|
||||
{ .compatible = "snps,archs-pct" },
|
||||
{},
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, arc_pmu_match);
|
||||
|
@ -348,7 +559,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match);
|
|||
|
||||
static struct platform_driver arc_pmu_driver = {
|
||||
.driver = {
|
||||
.name = "arc700-pct",
|
||||
.name = "arc-pct",
|
||||
.of_match_table = of_match_ptr(arc_pmu_match),
|
||||
},
|
||||
.probe = arc_pmu_device_probe,
|
||||
|
|
|
@ -65,7 +65,7 @@ asmlinkage void ret_from_fork(void);
|
|||
* ------------------
|
||||
* | r25 | <==== top of Stack (thread.ksp)
|
||||
* ~ ~
|
||||
* | --to-- | (CALLEE Regs of user mode)
|
||||
* | --to-- | (CALLEE Regs of kernel mode)
|
||||
* | r13 |
|
||||
* ------------------
|
||||
* | fp |
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
" .section .fixup,\"ax\"\n" \
|
||||
" .align 4\n" \
|
||||
"3: mov %0, 1\n" \
|
||||
" b 2b\n" \
|
||||
" j 2b\n" \
|
||||
" .previous\n" \
|
||||
" .section __ex_table,\"a\"\n" \
|
||||
" .align 4\n" \
|
||||
|
@ -82,7 +82,7 @@
|
|||
" .section .fixup,\"ax\"\n" \
|
||||
" .align 4\n" \
|
||||
"4: mov %0, 1\n" \
|
||||
" b 3b\n" \
|
||||
" j 3b\n" \
|
||||
" .previous\n" \
|
||||
" .section __ex_table,\"a\"\n" \
|
||||
" .align 4\n" \
|
||||
|
@ -113,7 +113,7 @@
|
|||
" .section .fixup,\"ax\"\n" \
|
||||
" .align 4\n" \
|
||||
"6: mov %0, 1\n" \
|
||||
" b 5b\n" \
|
||||
" j 5b\n" \
|
||||
" .previous\n" \
|
||||
" .section __ex_table,\"a\"\n" \
|
||||
" .align 4\n" \
|
||||
|
|
|
@ -22,15 +22,22 @@
|
|||
#include <asm/setup.h>
|
||||
|
||||
static int l2_line_sz;
|
||||
int ioc_exists;
|
||||
volatile int slc_enable = 1, ioc_enable = 1;
|
||||
|
||||
void (*_cache_line_loop_ic_fn)(unsigned long paddr, unsigned long vaddr,
|
||||
unsigned long sz, const int cacheop);
|
||||
|
||||
void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
|
||||
void (*__dma_cache_inv)(unsigned long start, unsigned long sz);
|
||||
void (*__dma_cache_wback)(unsigned long start, unsigned long sz);
|
||||
|
||||
char *arc_cache_mumbojumbo(int c, char *buf, int len)
|
||||
{
|
||||
int n = 0;
|
||||
struct cpuinfo_arc_cache *p;
|
||||
|
||||
#define IS_USED_RUN(v) ((v) ? "" : "(disabled) ")
|
||||
#define PR_CACHE(p, cfg, str) \
|
||||
if (!(p)->ver) \
|
||||
n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
|
||||
|
@ -45,10 +52,18 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
|
|||
PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
|
||||
PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
|
||||
|
||||
if (!is_isa_arcv2())
|
||||
return buf;
|
||||
|
||||
p = &cpuinfo_arc700[c].slc;
|
||||
if (p->ver)
|
||||
n += scnprintf(buf + n, len - n,
|
||||
"SLC\t\t: %uK, %uB Line\n", p->sz_k, p->line_len);
|
||||
"SLC\t\t: %uK, %uB Line%s\n",
|
||||
p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
|
||||
|
||||
if (ioc_exists)
|
||||
n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
|
||||
IS_USED_RUN(ioc_enable));
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
@ -58,18 +73,9 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
|
|||
* the cpuinfo structure for later use.
|
||||
* No Validation done here, simply read/convert the BCRs
|
||||
*/
|
||||
void read_decode_cache_bcr(void)
|
||||
static void read_decode_cache_bcr_arcv2(int cpu)
|
||||
{
|
||||
struct cpuinfo_arc_cache *p_ic, *p_dc, *p_slc;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct bcr_cache {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
|
||||
#else
|
||||
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
|
||||
#endif
|
||||
} ibcr, dbcr;
|
||||
|
||||
struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
|
||||
struct bcr_generic sbcr;
|
||||
|
||||
struct bcr_slc_cfg {
|
||||
|
@ -80,6 +86,39 @@ void read_decode_cache_bcr(void)
|
|||
#endif
|
||||
} slc_cfg;
|
||||
|
||||
struct bcr_clust_cfg {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
|
||||
#else
|
||||
unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
|
||||
#endif
|
||||
} cbcr;
|
||||
|
||||
READ_BCR(ARC_REG_SLC_BCR, sbcr);
|
||||
if (sbcr.ver) {
|
||||
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
|
||||
p_slc->ver = sbcr.ver;
|
||||
p_slc->sz_k = 128 << slc_cfg.sz;
|
||||
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
|
||||
}
|
||||
|
||||
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
|
||||
if (cbcr.c && ioc_enable)
|
||||
ioc_exists = 1;
|
||||
}
|
||||
|
||||
void read_decode_cache_bcr(void)
|
||||
{
|
||||
struct cpuinfo_arc_cache *p_ic, *p_dc;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct bcr_cache {
|
||||
#ifdef CONFIG_CPU_BIG_ENDIAN
|
||||
unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
|
||||
#else
|
||||
unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
|
||||
#endif
|
||||
} ibcr, dbcr;
|
||||
|
||||
p_ic = &cpuinfo_arc700[cpu].icache;
|
||||
READ_BCR(ARC_REG_IC_BCR, ibcr);
|
||||
|
||||
|
@ -122,17 +161,8 @@ dc_chk:
|
|||
p_dc->ver = dbcr.ver;
|
||||
|
||||
slc_chk:
|
||||
if (!is_isa_arcv2())
|
||||
return;
|
||||
|
||||
p_slc = &cpuinfo_arc700[cpu].slc;
|
||||
READ_BCR(ARC_REG_SLC_BCR, sbcr);
|
||||
if (sbcr.ver) {
|
||||
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
|
||||
p_slc->ver = sbcr.ver;
|
||||
p_slc->sz_k = 128 << slc_cfg.sz;
|
||||
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
|
||||
}
|
||||
if (is_isa_arcv2())
|
||||
read_decode_cache_bcr_arcv2(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -516,11 +546,6 @@ noinline void slc_op(unsigned long paddr, unsigned long sz, const int op)
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline int need_slc_flush(void)
|
||||
{
|
||||
return is_isa_arcv2() && l2_line_sz;
|
||||
}
|
||||
|
||||
/***********************************************************
|
||||
* Exported APIs
|
||||
*/
|
||||
|
@ -569,30 +594,74 @@ void flush_dcache_page(struct page *page)
|
|||
}
|
||||
EXPORT_SYMBOL(flush_dcache_page);
|
||||
|
||||
void dma_cache_wback_inv(unsigned long start, unsigned long sz)
|
||||
/*
|
||||
* DMA ops for systems with L1 cache only
|
||||
* Make memory coherent with L1 cache by flushing/invalidating L1 lines
|
||||
*/
|
||||
static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
|
||||
}
|
||||
|
||||
if (need_slc_flush())
|
||||
slc_op(start, sz, OP_FLUSH_N_INV);
|
||||
static void __dma_cache_inv_l1(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_INV);
|
||||
}
|
||||
|
||||
static void __dma_cache_wback_l1(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_FLUSH);
|
||||
}
|
||||
|
||||
/*
|
||||
* DMA ops for systems with both L1 and L2 caches, but without IOC
|
||||
* Both L1 and L2 lines need to be explicity flushed/invalidated
|
||||
*/
|
||||
static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
|
||||
slc_op(start, sz, OP_FLUSH_N_INV);
|
||||
}
|
||||
|
||||
static void __dma_cache_inv_slc(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_INV);
|
||||
slc_op(start, sz, OP_INV);
|
||||
}
|
||||
|
||||
static void __dma_cache_wback_slc(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_FLUSH);
|
||||
slc_op(start, sz, OP_FLUSH);
|
||||
}
|
||||
|
||||
/*
|
||||
* DMA ops for systems with IOC
|
||||
* IOC hardware snoops all DMA traffic keeping the caches consistent with
|
||||
* memory - eliding need for any explicit cache maintenance of DMA buffers
|
||||
*/
|
||||
static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {}
|
||||
static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {}
|
||||
static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {}
|
||||
|
||||
/*
|
||||
* Exported DMA API
|
||||
*/
|
||||
void dma_cache_wback_inv(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dma_cache_wback_inv(start, sz);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_cache_wback_inv);
|
||||
|
||||
void dma_cache_inv(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_INV);
|
||||
|
||||
if (need_slc_flush())
|
||||
slc_op(start, sz, OP_INV);
|
||||
__dma_cache_inv(start, sz);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_cache_inv);
|
||||
|
||||
void dma_cache_wback(unsigned long start, unsigned long sz)
|
||||
{
|
||||
__dc_line_op_k(start, sz, OP_FLUSH);
|
||||
|
||||
if (need_slc_flush())
|
||||
slc_op(start, sz, OP_FLUSH);
|
||||
__dma_cache_wback(start, sz);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_cache_wback);
|
||||
|
||||
|
@ -848,4 +917,41 @@ void arc_cache_init(void)
|
|||
panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (is_isa_arcv2() && l2_line_sz && !slc_enable) {
|
||||
|
||||
/* IM set : flush before invalidate */
|
||||
write_aux_reg(ARC_REG_SLC_CTRL,
|
||||
read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM);
|
||||
|
||||
write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
|
||||
|
||||
/* Important to wait for flush to complete */
|
||||
while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
|
||||
write_aux_reg(ARC_REG_SLC_CTRL,
|
||||
read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
|
||||
}
|
||||
|
||||
if (is_isa_arcv2() && ioc_exists) {
|
||||
/* IO coherency base - 0x8z */
|
||||
write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
|
||||
/* IO coherency aperture size - 512Mb: 0x8z-0xAz */
|
||||
write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
|
||||
/* Enable partial writes */
|
||||
write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
|
||||
/* Enable IO coherency */
|
||||
write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
|
||||
|
||||
__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
|
||||
__dma_cache_inv = __dma_cache_inv_ioc;
|
||||
__dma_cache_wback = __dma_cache_wback_ioc;
|
||||
} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
|
||||
__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
|
||||
__dma_cache_inv = __dma_cache_inv_slc;
|
||||
__dma_cache_wback = __dma_cache_wback_slc;
|
||||
} else {
|
||||
__dma_cache_wback_inv = __dma_cache_wback_inv_l1;
|
||||
__dma_cache_inv = __dma_cache_inv_l1;
|
||||
__dma_cache_wback = __dma_cache_wback_l1;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-debug.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
/*
|
||||
|
@ -53,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
|
|||
{
|
||||
void *paddr, *kvaddr;
|
||||
|
||||
/*
|
||||
* IOC relies on all data (even coherent DMA data) being in cache
|
||||
* Thus allocate normal cached memory
|
||||
*
|
||||
* The gains with IOC are two pronged:
|
||||
* -For streaming data, elides needs for cache maintenance, saving
|
||||
* cycles in flush code, and bus bandwidth as all the lines of a
|
||||
* buffer need to be flushed out to memory
|
||||
* -For coherent data, Read/Write to buffers terminate early in cache
|
||||
* (vs. always going to memory - thus are faster)
|
||||
*/
|
||||
if (is_isa_arcv2() && ioc_exists)
|
||||
return dma_alloc_noncoherent(dev, size, dma_handle, gfp);
|
||||
|
||||
/* This is linear addr (0x8000_0000 based) */
|
||||
paddr = alloc_pages_exact(size, gfp);
|
||||
if (!paddr)
|
||||
|
@ -85,6 +100,9 @@ EXPORT_SYMBOL(dma_alloc_coherent);
|
|||
void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
|
||||
dma_addr_t dma_handle)
|
||||
{
|
||||
if (is_isa_arcv2() && ioc_exists)
|
||||
return dma_free_noncoherent(dev, size, kvaddr, dma_handle);
|
||||
|
||||
iounmap((void __force __iomem *)kvaddr);
|
||||
|
||||
free_pages_exact((void *)dma_handle, size);
|
||||
|
|
|
@ -46,7 +46,7 @@ static void __init axs10x_enable_gpio_intc_wire(void)
|
|||
* ------------------- -------------------
|
||||
* | snps,dw-apb-gpio | | snps,dw-apb-gpio |
|
||||
* ------------------- -------------------
|
||||
* | |
|
||||
* | #12 |
|
||||
* | [ Debug UART on cpu card ]
|
||||
* |
|
||||
* ------------------------
|
||||
|
|
Loading…
Reference in New Issue