2020-05-06 11:40:26 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#ifndef _ASM_POWERPC_INST_H
|
|
|
|
#define _ASM_POWERPC_INST_H
|
|
|
|
|
2020-05-15 10:12:55 +08:00
|
|
|
#include <asm/ppc-opcode.h>
|
2021-11-30 01:49:40 +08:00
|
|
|
#include <asm/reg.h>
|
powerpc/inst: Optimise copy_inst_from_kernel_nofault()
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.
Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.
Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.
As the function has now become very small, inline it.
With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.
Even vmlinux size is reduced (by 152 instruction).
Before the patch:
00000018 <copy_inst_from_kernel_nofault>:
18: 94 21 ff e0 stwu r1,-32(r1)
1c: 7c 08 02 a6 mflr r0
20: 38 a0 00 04 li r5,4
24: 93 e1 00 1c stw r31,28(r1)
28: 7c 7f 1b 78 mr r31,r3
2c: 38 61 00 08 addi r3,r1,8
30: 90 01 00 24 stw r0,36(r1)
34: 48 00 00 01 bl 34 <copy_inst_from_kernel_nofault+0x1c>
34: R_PPC_REL24 copy_from_kernel_nofault
38: 2c 03 00 00 cmpwi r3,0
3c: 40 82 00 0c bne 48 <copy_inst_from_kernel_nofault+0x30>
40: 81 21 00 08 lwz r9,8(r1)
44: 91 3f 00 00 stw r9,0(r31)
48: 80 01 00 24 lwz r0,36(r1)
4c: 83 e1 00 1c lwz r31,28(r1)
50: 38 21 00 20 addi r1,r1,32
54: 7c 08 03 a6 mtlr r0
58: 4e 80 00 20 blr
After the patch (before inlining):
00000018 <copy_inst_from_kernel_nofault>:
18: 3d 20 b0 00 lis r9,-20480
1c: 7c 04 48 40 cmplw r4,r9
20: 7c 69 1b 78 mr r9,r3
24: 41 80 00 14 blt 38 <copy_inst_from_kernel_nofault+0x20>
28: 81 44 00 00 lwz r10,0(r4)
2c: 38 60 00 00 li r3,0
30: 91 49 00 00 stw r10,0(r9)
34: 4e 80 00 20 blr
38: 38 60 ff de li r3,-34
3c: 4e 80 00 20 blr
40: 38 60 ff f2 li r3,-14
44: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Add clang workaround, with version check as suggested by Nathan]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0d5b12183d5176dd702d29ad94c39c384e51c78f.1638208156.git.christophe.leroy@csgroup.eu
2021-11-30 01:49:41 +08:00
|
|
|
#include <asm/disassemble.h>
|
|
|
|
#include <asm/uaccess.h>
|
2020-05-15 10:12:55 +08:00
|
|
|
|
2021-03-11 01:46:44 +08:00
|
|
|
#define ___get_user_instr(gu_op, dest, ptr) \
|
|
|
|
({ \
|
2021-05-20 21:50:40 +08:00
|
|
|
long __gui_ret; \
|
2021-05-20 21:50:39 +08:00
|
|
|
u32 __user *__gui_ptr = (u32 __user *)ptr; \
|
2021-11-30 01:49:38 +08:00
|
|
|
ppc_inst_t __gui_inst; \
|
2021-03-11 01:46:44 +08:00
|
|
|
unsigned int __prefix, __suffix; \
|
2021-05-20 21:50:38 +08:00
|
|
|
\
|
|
|
|
__chk_user_ptr(ptr); \
|
2021-05-20 21:50:39 +08:00
|
|
|
__gui_ret = gu_op(__prefix, __gui_ptr); \
|
2021-03-11 01:46:44 +08:00
|
|
|
if (__gui_ret == 0) { \
|
2021-11-30 01:49:37 +08:00
|
|
|
if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
|
2021-05-20 21:50:39 +08:00
|
|
|
__gui_ret = gu_op(__suffix, __gui_ptr + 1); \
|
2021-05-20 21:50:40 +08:00
|
|
|
__gui_inst = ppc_inst_prefix(__prefix, __suffix); \
|
2021-03-11 01:46:44 +08:00
|
|
|
} else { \
|
|
|
|
__gui_inst = ppc_inst(__prefix); \
|
|
|
|
} \
|
|
|
|
if (__gui_ret == 0) \
|
|
|
|
(dest) = __gui_inst; \
|
|
|
|
} \
|
|
|
|
__gui_ret; \
|
|
|
|
})
|
|
|
|
|
2021-05-20 21:50:40 +08:00
|
|
|
#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
|
2021-03-11 01:46:44 +08:00
|
|
|
|
2021-05-20 21:50:40 +08:00
|
|
|
#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
|
2021-03-11 01:46:44 +08:00
|
|
|
|
2020-05-06 11:40:26 +08:00
|
|
|
/*
|
|
|
|
* Instruction data type for POWER
|
|
|
|
*/
|
|
|
|
|
2021-11-30 01:49:39 +08:00
|
|
|
#if defined(CONFIG_PPC64) || defined(__CHECKER__)
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline u32 ppc_inst_val(ppc_inst_t x)
|
2020-05-06 11:40:27 +08:00
|
|
|
{
|
2020-05-06 11:40:31 +08:00
|
|
|
return x.val;
|
2020-05-06 11:40:27 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:39 +08:00
|
|
|
#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
|
|
|
|
|
|
|
|
#else
|
|
|
|
static inline u32 ppc_inst_val(ppc_inst_t x)
|
|
|
|
{
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
#define ppc_inst(x) (x)
|
|
|
|
#endif
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline int ppc_inst_primary_opcode(ppc_inst_t x)
|
2020-05-06 11:40:37 +08:00
|
|
|
{
|
2020-05-15 10:12:55 +08:00
|
|
|
return ppc_inst_val(x) >> 26;
|
2020-05-06 11:40:37 +08:00
|
|
|
}
|
|
|
|
|
2021-05-20 21:50:46 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
2021-11-30 01:49:38 +08:00
|
|
|
#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
|
2020-05-15 10:12:55 +08:00
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline u32 ppc_inst_suffix(ppc_inst_t x)
|
2020-05-06 11:40:28 +08:00
|
|
|
{
|
2020-05-15 10:12:55 +08:00
|
|
|
return x.suffix;
|
|
|
|
}
|
|
|
|
|
2021-05-20 21:50:46 +08:00
|
|
|
#else
|
2021-11-30 01:49:37 +08:00
|
|
|
#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
|
2020-05-15 10:12:55 +08:00
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline u32 ppc_inst_suffix(ppc_inst_t x)
|
2020-05-15 10:12:55 +08:00
|
|
|
{
|
2021-05-20 21:50:46 +08:00
|
|
|
return 0;
|
2020-05-15 10:12:55 +08:00
|
|
|
}
|
|
|
|
|
2021-05-20 21:50:46 +08:00
|
|
|
#endif /* CONFIG_PPC64 */
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
|
2020-05-15 10:12:55 +08:00
|
|
|
{
|
2021-05-20 21:50:46 +08:00
|
|
|
if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
|
|
|
|
return ppc_inst_prefix(*ptr, *(ptr + 1));
|
|
|
|
else
|
|
|
|
return ppc_inst(*ptr);
|
2020-05-15 10:12:55 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline bool ppc_inst_prefixed(ppc_inst_t x)
|
2020-05-15 10:12:55 +08:00
|
|
|
{
|
2021-05-20 21:50:46 +08:00
|
|
|
return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
|
2020-05-06 11:40:28 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
|
2020-05-06 11:40:29 +08:00
|
|
|
{
|
2021-05-20 21:50:46 +08:00
|
|
|
return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
|
2020-05-06 11:40:32 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
|
2020-05-06 11:40:30 +08:00
|
|
|
{
|
2021-05-20 21:50:41 +08:00
|
|
|
if (ppc_inst_val(x) != ppc_inst_val(y))
|
|
|
|
return false;
|
|
|
|
if (!ppc_inst_prefixed(x))
|
|
|
|
return true;
|
|
|
|
return ppc_inst_suffix(x) == ppc_inst_suffix(y);
|
2020-05-06 11:40:30 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline int ppc_inst_len(ppc_inst_t x)
|
2020-05-15 10:12:55 +08:00
|
|
|
{
|
|
|
|
return ppc_inst_prefixed(x) ? 8 : 4;
|
|
|
|
}
|
|
|
|
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
/*
|
|
|
|
* Return the address of the next instruction, if the instruction @value was
|
|
|
|
* located at @location.
|
|
|
|
*/
|
2021-05-20 21:50:45 +08:00
|
|
|
static inline u32 *ppc_inst_next(u32 *location, u32 *value)
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
{
|
2021-11-30 01:49:38 +08:00
|
|
|
ppc_inst_t tmp;
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
|
|
|
|
tmp = ppc_inst_read(value);
|
|
|
|
|
2021-05-20 21:50:45 +08:00
|
|
|
return (void *)location + ppc_inst_len(tmp);
|
powerpc: Add ppc_inst_next()
In a few places we want to calculate the address of the next
instruction. Previously that was simple, we just added 4 bytes, or if
using a u32 * we incremented that pointer by 1.
But prefixed instructions make it more complicated, we need to advance
by either 4 or 8 bytes depending on the actual instruction. We also
can't do pointer arithmetic using struct ppc_inst, because it is
always 8 bytes in size on 64-bit, even though we might only need to
advance by 4 bytes.
So add a ppc_inst_next() helper which calculates the location of the
next instruction, if the given instruction was located at the given
address. Note the instruction doesn't need to actually be at the
address in memory.
Although it would seem natural for the value to be passed by value,
that makes it too easy to write a loop that will read off the end of a
page, eg:
for (; src < end; src = ppc_inst_next(src, *src),
dest = ppc_inst_next(dest, *dest))
As noticed by Christophe and Jordan, if end is the exact end of a
page, and the next page is not mapped, this will fault, because *dest
will read 8 bytes, 4 bytes into the next page.
So value is passed by reference, so the helper can be careful to use
ppc_inst_read() on it.
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
Link: https://lore.kernel.org/r/20200522133318.1681406-1-mpe@ellerman.id.au
2020-05-22 21:33:18 +08:00
|
|
|
}
|
|
|
|
|
2021-11-30 01:49:38 +08:00
|
|
|
static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
|
2020-05-26 15:26:30 +08:00
|
|
|
{
|
2021-04-20 22:02:06 +08:00
|
|
|
if (IS_ENABLED(CONFIG_PPC32))
|
|
|
|
return ppc_inst_val(x);
|
|
|
|
else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
|
|
|
|
return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
|
|
|
|
else
|
|
|
|
return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
|
2020-05-26 15:26:30 +08:00
|
|
|
}
|
|
|
|
|
2021-12-02 20:00:27 +08:00
|
|
|
static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x)
|
|
|
|
{
|
|
|
|
if (!ppc_inst_prefixed(x))
|
|
|
|
*ptr = ppc_inst_val(x);
|
|
|
|
else
|
|
|
|
*(u64 *)ptr = ppc_inst_as_ulong(x);
|
|
|
|
}
|
|
|
|
|
2022-05-09 13:36:18 +08:00
|
|
|
static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
|
powerpc/inst: Optimise copy_inst_from_kernel_nofault()
copy_inst_from_kernel_nofault() uses copy_from_kernel_nofault() to
copy one or two 32bits words. This means calling an out-of-line
function which itself calls back copy_from_kernel_nofault_allowed()
then performs a generic copy with loops.
Rewrite copy_inst_from_kernel_nofault() to do everything at a
single place and use __get_kernel_nofault() directly to perform
single accesses without loops.
Allthough the generic function uses pagefault_disable(), it is not
required on powerpc because do_page_fault() bails earlier when a
kernel mode fault happens on a kernel address.
As the function has now become very small, inline it.
With this change, on an 8xx the time spent in the loop in
ftrace_replace_code() is reduced by 23% at function tracer activation
and 27% at nop tracer activation.
The overall time to activate function tracer (measured with shell
command 'time') is 570ms before the patch and 470ms after the patch.
Even vmlinux size is reduced (by 152 instruction).
Before the patch:
00000018 <copy_inst_from_kernel_nofault>:
18: 94 21 ff e0 stwu r1,-32(r1)
1c: 7c 08 02 a6 mflr r0
20: 38 a0 00 04 li r5,4
24: 93 e1 00 1c stw r31,28(r1)
28: 7c 7f 1b 78 mr r31,r3
2c: 38 61 00 08 addi r3,r1,8
30: 90 01 00 24 stw r0,36(r1)
34: 48 00 00 01 bl 34 <copy_inst_from_kernel_nofault+0x1c>
34: R_PPC_REL24 copy_from_kernel_nofault
38: 2c 03 00 00 cmpwi r3,0
3c: 40 82 00 0c bne 48 <copy_inst_from_kernel_nofault+0x30>
40: 81 21 00 08 lwz r9,8(r1)
44: 91 3f 00 00 stw r9,0(r31)
48: 80 01 00 24 lwz r0,36(r1)
4c: 83 e1 00 1c lwz r31,28(r1)
50: 38 21 00 20 addi r1,r1,32
54: 7c 08 03 a6 mtlr r0
58: 4e 80 00 20 blr
After the patch (before inlining):
00000018 <copy_inst_from_kernel_nofault>:
18: 3d 20 b0 00 lis r9,-20480
1c: 7c 04 48 40 cmplw r4,r9
20: 7c 69 1b 78 mr r9,r3
24: 41 80 00 14 blt 38 <copy_inst_from_kernel_nofault+0x20>
28: 81 44 00 00 lwz r10,0(r4)
2c: 38 60 00 00 li r3,0
30: 91 49 00 00 stw r10,0(r9)
34: 4e 80 00 20 blr
38: 38 60 ff de li r3,-34
3c: 4e 80 00 20 blr
40: 38 60 ff f2 li r3,-14
44: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Add clang workaround, with version check as suggested by Nathan]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0d5b12183d5176dd702d29ad94c39c384e51c78f.1638208156.git.christophe.leroy@csgroup.eu
2021-11-30 01:49:41 +08:00
|
|
|
{
|
|
|
|
unsigned int val, suffix;
|
|
|
|
|
|
|
|
/* See https://github.com/ClangBuiltLinux/linux/issues/1521 */
|
|
|
|
#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 140000
|
|
|
|
val = suffix = 0;
|
|
|
|
#endif
|
|
|
|
__get_kernel_nofault(&val, src, u32, Efault);
|
|
|
|
if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
|
|
|
|
__get_kernel_nofault(&suffix, src + 1, u32, Efault);
|
|
|
|
*inst = ppc_inst_prefix(val, suffix);
|
|
|
|
} else {
|
|
|
|
*inst = ppc_inst(val);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
Efault:
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2020-05-06 11:40:34 +08:00
|
|
|
|
2022-05-09 13:36:18 +08:00
|
|
|
static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
|
|
|
|
{
|
|
|
|
if (unlikely(!is_kernel_addr((unsigned long)src)))
|
|
|
|
return -ERANGE;
|
|
|
|
|
|
|
|
return __copy_inst_from_kernel_nofault(inst, src);
|
|
|
|
}
|
|
|
|
|
2020-05-06 11:40:26 +08:00
|
|
|
#endif /* _ASM_POWERPC_INST_H */
|