ARM: 8805/2: remove unneeded naked function usage

The naked attribute is known to confuse some old gcc versions when
function arguments aren't explicitly listed as inline assembly operands
despite the gcc documentation. That resulted in commit 9a40ac8615
("ARM: 6164/1: Add kto and kfrom to input operands list.").

Yet that commit has problems of its own by having assembly operand
constraints completely wrong. If the generated code has been OK since
then, it is due to luck rather than correctness. So this patch also
provides proper assembly operand constraints, and removes two instances
of redundant register usages in the implementation while at it.

Inspection of the generated code with this patch doesn't show any
obvious quality degradation either, so not relying on __naked at all
will make the code less fragile, and avoid some issues with clang.

The only remaining __naked instances (excluding the kprobes test cases)
are exynos_pm_power_up_setup(), tc2_pm_power_up_setup() and

cci_enable_port_for_self(. But in the first two cases, only the function
address is used by the compiler with no chance of inlining it by
mistake, and the third case is called from assembly code only. And the
fact that no stack is available when the corresponding code is executed
does warrant the __naked usage in those cases.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Tested-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
This commit is contained in:
Nicolas Pitre 2018-11-07 17:49:00 +01:00 committed by Russell King
parent 31d0b9f998
commit b99afae139
7 changed files with 178 additions and 194 deletions

View File

@ -17,26 +17,25 @@
/* /*
* Faraday optimised copy_user_page * Faraday optimised copy_user_page
*/ */
static void __naked static void fa_copy_user_page(void *kto, const void *kfrom)
fa_copy_user_page(void *kto, const void *kfrom)
{ {
asm("\ int tmp;
stmfd sp!, {r4, lr} @ 2\n\
mov r2, %0 @ 1\n\ asm volatile ("\
1: ldmia r1!, {r3, r4, ip, lr} @ 4\n\ 1: ldmia %1!, {r3, r4, ip, lr} @ 4\n\
stmia r0, {r3, r4, ip, lr} @ 4\n\ stmia %0, {r3, r4, ip, lr} @ 4\n\
mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
add r0, r0, #16 @ 1\n\ add %0, %0, #16 @ 1\n\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
stmia r0, {r3, r4, ip, lr} @ 4\n\ stmia %0, {r3, r4, ip, lr} @ 4\n\
mcr p15, 0, r0, c7, c14, 1 @ 1 clean and invalidate D line\n\ mcr p15, 0, %0, c7, c14, 1 @ 1 clean and invalidate D line\n\
add r0, r0, #16 @ 1\n\ add %0, %0, #16 @ 1\n\
subs r2, r2, #1 @ 1\n\ subs %2, %2, #1 @ 1\n\
bne 1b @ 1\n\ bne 1b @ 1\n\
mcr p15, 0, r2, c7, c10, 4 @ 1 drain WB\n\ mcr p15, 0, %2, c7, c10, 4 @ 1 drain WB"
ldmfd sp!, {r4, pc} @ 3" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
: : "2" (PAGE_SIZE / 32)
: "I" (PAGE_SIZE / 32)); : "r3", "r4", "ip", "lr");
} }
void fa_copy_user_highpage(struct page *to, struct page *from, void fa_copy_user_highpage(struct page *to, struct page *from,

View File

@ -13,58 +13,56 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/highmem.h> #include <linux/highmem.h>
static void __naked static void feroceon_copy_user_page(void *kto, const void *kfrom)
feroceon_copy_user_page(void *kto, const void *kfrom)
{ {
asm("\ int tmp;
stmfd sp!, {r4-r9, lr} \n\
mov ip, %2 \n\ asm volatile ("\
1: mov lr, r1 \n\ 1: ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ pld [%1, #0] \n\
pld [lr, #32] \n\ pld [%1, #32] \n\
pld [lr, #64] \n\ pld [%1, #64] \n\
pld [lr, #96] \n\ pld [%1, #96] \n\
pld [lr, #128] \n\ pld [%1, #128] \n\
pld [lr, #160] \n\ pld [%1, #160] \n\
pld [lr, #192] \n\ pld [%1, #192] \n\
pld [lr, #224] \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ ldmia %1!, {r2 - r7, ip, lr} \n\
ldmia r1!, {r2 - r9} \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\ stmia %0, {r2 - r7, ip, lr} \n\
stmia r0, {r2 - r9} \n\ subs %2, %2, #(32 * 8) \n\
subs ip, ip, #(32 * 8) \n\ mcr p15, 0, %0, c7, c14, 1 @ clean and invalidate D line\n\
mcr p15, 0, r0, c7, c14, 1 @ clean and invalidate D line\n\ add %0, %0, #32 \n\
add r0, r0, #32 \n\
bne 1b \n\ bne 1b \n\
mcr p15, 0, ip, c7, c10, 4 @ drain WB\n\ mcr p15, 0, %2, c7, c10, 4 @ drain WB"
ldmfd sp!, {r4-r9, pc}" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
: : "2" (PAGE_SIZE)
: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE)); : "r2", "r3", "r4", "r5", "r6", "r7", "ip", "lr");
} }
void feroceon_copy_user_highpage(struct page *to, struct page *from, void feroceon_copy_user_highpage(struct page *to, struct page *from,

View File

@ -40,12 +40,11 @@ static DEFINE_RAW_SPINLOCK(minicache_lock);
* instruction. If your processor does not supply this, you have to write your * instruction. If your processor does not supply this, you have to write your
* own copy_user_highpage that does the right thing. * own copy_user_highpage that does the right thing.
*/ */
static void __naked static void mc_copy_user_page(void *from, void *to)
mc_copy_user_page(void *from, void *to)
{ {
asm volatile( int tmp;
"stmfd sp!, {r4, lr} @ 2\n\
mov r4, %2 @ 1\n\ asm volatile ("\
ldmia %0!, {r2, r3, ip, lr} @ 4\n\ ldmia %0!, {r2, r3, ip, lr} @ 4\n\
1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ 1: mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\
stmia %1!, {r2, r3, ip, lr} @ 4\n\ stmia %1!, {r2, r3, ip, lr} @ 4\n\
@ -55,13 +54,13 @@ mc_copy_user_page(void *from, void *to)
mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\ mcr p15, 0, %1, c7, c6, 1 @ 1 invalidate D line\n\
stmia %1!, {r2, r3, ip, lr} @ 4\n\ stmia %1!, {r2, r3, ip, lr} @ 4\n\
ldmia %0!, {r2, r3, ip, lr} @ 4\n\ ldmia %0!, {r2, r3, ip, lr} @ 4\n\
subs r4, r4, #1 @ 1\n\ subs %2, %2, #1 @ 1\n\
stmia %1!, {r2, r3, ip, lr} @ 4\n\ stmia %1!, {r2, r3, ip, lr} @ 4\n\
ldmneia %0!, {r2, r3, ip, lr} @ 4\n\ ldmneia %0!, {r2, r3, ip, lr} @ 4\n\
bne 1b @ 1\n\ bne 1b @ "
ldmfd sp!, {r4, pc} @ 3" : "+&r" (from), "+&r" (to), "=&r" (tmp)
: : "2" (PAGE_SIZE / 64)
: "r" (from), "r" (to), "I" (PAGE_SIZE / 64)); : "r2", "r3", "ip", "lr");
} }
void v4_mc_copy_user_highpage(struct page *to, struct page *from, void v4_mc_copy_user_highpage(struct page *to, struct page *from,

View File

@ -22,29 +22,28 @@
* instruction. If your processor does not supply this, you have to write your * instruction. If your processor does not supply this, you have to write your
* own copy_user_highpage that does the right thing. * own copy_user_highpage that does the right thing.
*/ */
static void __naked static void v4wb_copy_user_page(void *kto, const void *kfrom)
v4wb_copy_user_page(void *kto, const void *kfrom)
{ {
asm("\ int tmp;
stmfd sp!, {r4, lr} @ 2\n\
mov r2, %2 @ 1\n\ asm volatile ("\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
1: mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ 1: mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ ldmia %1!, {r3, r4, ip, lr} @ 4+1\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
mcr p15, 0, r0, c7, c6, 1 @ 1 invalidate D line\n\ mcr p15, 0, %0, c7, c6, 1 @ 1 invalidate D line\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
subs r2, r2, #1 @ 1\n\ subs %2, %2, #1 @ 1\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ ldmneia %1!, {r3, r4, ip, lr} @ 4\n\
bne 1b @ 1\n\ bne 1b @ 1\n\
mcr p15, 0, r1, c7, c10, 4 @ 1 drain WB\n\ mcr p15, 0, %1, c7, c10, 4 @ 1 drain WB"
ldmfd sp!, {r4, pc} @ 3" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
: : "2" (PAGE_SIZE / 64)
: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); : "r3", "r4", "ip", "lr");
} }
void v4wb_copy_user_highpage(struct page *to, struct page *from, void v4wb_copy_user_highpage(struct page *to, struct page *from,

View File

@ -20,27 +20,26 @@
* dirty data in the cache. However, we do have to ensure that * dirty data in the cache. However, we do have to ensure that
* subsequent reads are up to date. * subsequent reads are up to date.
*/ */
static void __naked static void v4wt_copy_user_page(void *kto, const void *kfrom)
v4wt_copy_user_page(void *kto, const void *kfrom)
{ {
asm("\ int tmp;
stmfd sp!, {r4, lr} @ 2\n\
mov r2, %2 @ 1\n\ asm volatile ("\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
1: stmia r0!, {r3, r4, ip, lr} @ 4\n\ 1: stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4+1\n\ ldmia %1!, {r3, r4, ip, lr} @ 4+1\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmia r1!, {r3, r4, ip, lr} @ 4\n\ ldmia %1!, {r3, r4, ip, lr} @ 4\n\
subs r2, r2, #1 @ 1\n\ subs %2, %2, #1 @ 1\n\
stmia r0!, {r3, r4, ip, lr} @ 4\n\ stmia %0!, {r3, r4, ip, lr} @ 4\n\
ldmneia r1!, {r3, r4, ip, lr} @ 4\n\ ldmneia %1!, {r3, r4, ip, lr} @ 4\n\
bne 1b @ 1\n\ bne 1b @ 1\n\
mcr p15, 0, r2, c7, c7, 0 @ flush ID cache\n\ mcr p15, 0, %2, c7, c7, 0 @ flush ID cache"
ldmfd sp!, {r4, pc} @ 3" : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
: : "2" (PAGE_SIZE / 64)
: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64)); : "r3", "r4", "ip", "lr");
} }
void v4wt_copy_user_highpage(struct page *to, struct page *from, void v4wt_copy_user_highpage(struct page *to, struct page *from,

View File

@ -21,53 +21,46 @@
/* /*
* XSC3 optimised copy_user_highpage * XSC3 optimised copy_user_highpage
* r0 = destination
* r1 = source
* *
* The source page may have some clean entries in the cache already, but we * The source page may have some clean entries in the cache already, but we
* can safely ignore them - break_cow() will flush them out of the cache * can safely ignore them - break_cow() will flush them out of the cache
* if we eventually end up using our copied page. * if we eventually end up using our copied page.
* *
*/ */
static void __naked static void xsc3_mc_copy_user_page(void *kto, const void *kfrom)
xsc3_mc_copy_user_page(void *kto, const void *kfrom)
{ {
asm("\ int tmp;
stmfd sp!, {r4, r5, lr} \n\
mov lr, %2 \n\ asm volatile ("\
pld [%1, #0] \n\
pld [%1, #32] \n\
1: pld [%1, #64] \n\
pld [%1, #96] \n\
\n\ \n\
pld [r1, #0] \n\ 2: ldrd r2, [%1], #8 \n\
pld [r1, #32] \n\ ldrd r4, [%1], #8 \n\
1: pld [r1, #64] \n\ mcr p15, 0, %0, c7, c6, 1 @ invalidate\n\
pld [r1, #96] \n\ strd r2, [%0], #8 \n\
\n\ ldrd r2, [%1], #8 \n\
2: ldrd r2, [r1], #8 \n\ strd r4, [%0], #8 \n\
mov ip, r0 \n\ ldrd r4, [%1], #8 \n\
ldrd r4, [r1], #8 \n\ strd r2, [%0], #8 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ strd r4, [%0], #8 \n\
strd r2, [r0], #8 \n\ ldrd r2, [%1], #8 \n\
ldrd r2, [r1], #8 \n\ ldrd r4, [%1], #8 \n\
strd r4, [r0], #8 \n\ mcr p15, 0, %0, c7, c6, 1 @ invalidate\n\
ldrd r4, [r1], #8 \n\ strd r2, [%0], #8 \n\
strd r2, [r0], #8 \n\ ldrd r2, [%1], #8 \n\
strd r4, [r0], #8 \n\ subs %2, %2, #1 \n\
ldrd r2, [r1], #8 \n\ strd r4, [%0], #8 \n\
mov ip, r0 \n\ ldrd r4, [%1], #8 \n\
ldrd r4, [r1], #8 \n\ strd r2, [%0], #8 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate\n\ strd r4, [%0], #8 \n\
strd r2, [r0], #8 \n\
ldrd r2, [r1], #8 \n\
subs lr, lr, #1 \n\
strd r4, [r0], #8 \n\
ldrd r4, [r1], #8 \n\
strd r2, [r0], #8 \n\
strd r4, [r0], #8 \n\
bgt 1b \n\ bgt 1b \n\
beq 2b \n\ beq 2b "
\n\ : "+&r" (kto), "+&r" (kfrom), "=&r" (tmp)
ldmfd sp!, {r4, r5, pc}" : "2" (PAGE_SIZE / 64 - 1)
: : "r2", "r3", "r4", "r5");
: "r" (kto), "r" (kfrom), "I" (PAGE_SIZE / 64 - 1));
} }
void xsc3_mc_copy_user_highpage(struct page *to, struct page *from, void xsc3_mc_copy_user_highpage(struct page *to, struct page *from,
@ -85,8 +78,6 @@ void xsc3_mc_copy_user_highpage(struct page *to, struct page *from,
/* /*
* XScale optimised clear_user_page * XScale optimised clear_user_page
* r0 = destination
* r1 = virtual user address of ultimate destination page
*/ */
void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr) void xsc3_mc_clear_user_highpage(struct page *page, unsigned long vaddr)
{ {

View File

@ -36,52 +36,51 @@ static DEFINE_RAW_SPINLOCK(minicache_lock);
* Dcache aliasing issue. The writes will be forwarded to the write buffer, * Dcache aliasing issue. The writes will be forwarded to the write buffer,
* and merged as appropriate. * and merged as appropriate.
*/ */
static void __naked static void mc_copy_user_page(void *from, void *to)
mc_copy_user_page(void *from, void *to)
{ {
int tmp;
/* /*
* Strangely enough, best performance is achieved * Strangely enough, best performance is achieved
* when prefetching destination as well. (NP) * when prefetching destination as well. (NP)
*/ */
asm volatile( asm volatile ("\
"stmfd sp!, {r4, r5, lr} \n\ pld [%0, #0] \n\
mov lr, %2 \n\ pld [%0, #32] \n\
pld [r0, #0] \n\ pld [%1, #0] \n\
pld [r0, #32] \n\ pld [%1, #32] \n\
pld [r1, #0] \n\ 1: pld [%0, #64] \n\
pld [r1, #32] \n\ pld [%0, #96] \n\
1: pld [r0, #64] \n\ pld [%1, #64] \n\
pld [r0, #96] \n\ pld [%1, #96] \n\
pld [r1, #64] \n\ 2: ldrd r2, [%0], #8 \n\
pld [r1, #96] \n\ ldrd r4, [%0], #8 \n\
2: ldrd r2, [r0], #8 \n\ mov ip, %1 \n\
ldrd r4, [r0], #8 \n\ strd r2, [%1], #8 \n\
mov ip, r1 \n\ ldrd r2, [%0], #8 \n\
strd r2, [r1], #8 \n\ strd r4, [%1], #8 \n\
ldrd r2, [r0], #8 \n\ ldrd r4, [%0], #8 \n\
strd r4, [r1], #8 \n\ strd r2, [%1], #8 \n\
ldrd r4, [r0], #8 \n\ strd r4, [%1], #8 \n\
strd r2, [r1], #8 \n\
strd r4, [r1], #8 \n\
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
ldrd r2, [r0], #8 \n\ ldrd r2, [%0], #8 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
ldrd r4, [r0], #8 \n\ ldrd r4, [%0], #8 \n\
mov ip, r1 \n\ mov ip, %1 \n\
strd r2, [r1], #8 \n\ strd r2, [%1], #8 \n\
ldrd r2, [r0], #8 \n\ ldrd r2, [%0], #8 \n\
strd r4, [r1], #8 \n\ strd r4, [%1], #8 \n\
ldrd r4, [r0], #8 \n\ ldrd r4, [%0], #8 \n\
strd r2, [r1], #8 \n\ strd r2, [%1], #8 \n\
strd r4, [r1], #8 \n\ strd r4, [%1], #8 \n\
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\ mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
subs lr, lr, #1 \n\ subs %2, %2, #1 \n\
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\ mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
bgt 1b \n\ bgt 1b \n\
beq 2b \n\ beq 2b "
ldmfd sp!, {r4, r5, pc} " : "+&r" (from), "+&r" (to), "=&r" (tmp)
: : "2" (PAGE_SIZE / 64 - 1)
: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1)); : "r2", "r3", "r4", "r5", "ip");
} }
void xscale_mc_copy_user_highpage(struct page *to, struct page *from, void xscale_mc_copy_user_highpage(struct page *to, struct page *from,