2019-06-01 16:08:55 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0-only */
|
2016-06-23 00:25:06 +08:00
|
|
|
/*
|
|
|
|
* bpf_jit.h: BPF JIT compiler for PPC
|
2011-07-20 23:51:00 +08:00
|
|
|
*
|
|
|
|
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
|
2016-06-23 00:25:07 +08:00
|
|
|
* 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
|
2011-07-20 23:51:00 +08:00
|
|
|
*/
|
|
|
|
#ifndef _BPF_JIT_H
|
|
|
|
#define _BPF_JIT_H
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
2016-06-23 00:25:07 +08:00
|
|
|
#include <asm/types.h>
|
2020-06-24 19:30:35 +08:00
|
|
|
#include <asm/ppc-opcode.h>
|
2016-06-23 00:25:07 +08:00
|
|
|
|
|
|
|
#ifdef PPC64_ELF_ABI_v1
|
2011-07-20 23:51:00 +08:00
|
|
|
#define FUNCTION_DESCR_SIZE 24
|
2015-02-17 15:04:40 +08:00
|
|
|
#else
|
|
|
|
#define FUNCTION_DESCR_SIZE 0
|
|
|
|
#endif
|
2011-07-20 23:51:00 +08:00
|
|
|
|
|
|
|
#define PLANT_INSTR(d, idx, instr) \
|
|
|
|
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
|
|
|
|
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
|
|
|
|
|
|
|
|
/* Long jump; (unconditional 'branch') */
|
2021-10-06 04:25:21 +08:00
|
|
|
#define PPC_JMP(dest) \
|
|
|
|
do { \
|
|
|
|
long offset = (long)(dest) - (ctx->idx * 4); \
|
|
|
|
if (!is_offset_in_branch_range(offset)) { \
|
|
|
|
pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
|
|
|
|
return -ERANGE; \
|
|
|
|
} \
|
2021-10-12 20:30:52 +08:00
|
|
|
EMIT(PPC_RAW_BRANCH(offset)); \
|
2021-10-06 04:25:21 +08:00
|
|
|
} while (0)
|
|
|
|
|
2021-04-12 19:44:18 +08:00
|
|
|
/* blr; (unconditional 'branch' with link) to absolute address */
|
|
|
|
#define PPC_BL_ABS(dest) EMIT(PPC_INST_BL | \
|
|
|
|
(((dest) - (unsigned long)(image + ctx->idx)) & 0x03fffffc))
|
2011-07-20 23:51:00 +08:00
|
|
|
/* "cond" here covers BO:BI fields. */
|
2021-10-06 04:25:21 +08:00
|
|
|
#define PPC_BCC_SHORT(cond, dest) \
|
|
|
|
do { \
|
|
|
|
long offset = (long)(dest) - (ctx->idx * 4); \
|
|
|
|
if (!is_offset_in_cond_branch_range(offset)) { \
|
|
|
|
pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
|
|
|
|
return -ERANGE; \
|
|
|
|
} \
|
|
|
|
EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
|
|
|
|
} while (0)
|
|
|
|
|
2016-06-23 00:25:02 +08:00
|
|
|
/* Sign-extended 32-bit immediate load */
|
|
|
|
#define PPC_LI32(d, i) do { \
|
|
|
|
if ((int)(uintptr_t)(i) >= -32768 && \
|
|
|
|
(int)(uintptr_t)(i) < 32768) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_LI(d, i)); \
|
2016-06-23 00:25:02 +08:00
|
|
|
else { \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
|
2016-06-23 00:25:02 +08:00
|
|
|
if (IMM_L(i)) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
|
2011-07-20 23:51:00 +08:00
|
|
|
} } while(0)
|
2016-06-23 00:25:02 +08:00
|
|
|
|
2021-03-23 00:37:52 +08:00
|
|
|
#ifdef CONFIG_PPC32
|
|
|
|
#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
|
|
|
|
#endif
|
|
|
|
|
2011-07-20 23:51:00 +08:00
|
|
|
#define PPC_LI64(d, i) do { \
|
2016-06-23 00:25:03 +08:00
|
|
|
if ((long)(i) >= -2147483648 && \
|
|
|
|
(long)(i) < 2147483648) \
|
2011-07-20 23:51:00 +08:00
|
|
|
PPC_LI32(d, i); \
|
|
|
|
else { \
|
2016-06-23 00:25:03 +08:00
|
|
|
if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \
|
|
|
|
0xffff)); \
|
2016-06-23 00:25:03 +08:00
|
|
|
else { \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
|
2016-06-23 00:25:03 +08:00
|
|
|
if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_ORI(d, d, \
|
|
|
|
((uintptr_t)(i) >> 32) & 0xffff)); \
|
2016-06-23 00:25:03 +08:00
|
|
|
} \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_SLDI(d, d, 32)); \
|
2011-07-20 23:51:00 +08:00
|
|
|
if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_ORIS(d, d, \
|
|
|
|
((uintptr_t)(i) >> 16) & 0xffff)); \
|
2011-07-20 23:51:00 +08:00
|
|
|
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
|
|
|
|
0xffff)); \
|
2016-06-23 00:25:03 +08:00
|
|
|
} } while (0)
|
2011-07-20 23:51:00 +08:00
|
|
|
|
2015-02-17 15:04:40 +08:00
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0)
|
|
|
|
#else
|
|
|
|
#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0)
|
|
|
|
#endif
|
|
|
|
|
2011-07-20 23:51:00 +08:00
|
|
|
/*
|
|
|
|
* The fly in the ointment of code size changing from pass to pass is
|
|
|
|
* avoided by padding the short branch case with a NOP. If code size differs
|
|
|
|
* with different branch reaches we will have the issue of code moving from
|
|
|
|
* one pass to the next and will need a few passes to converge on a stable
|
|
|
|
* state.
|
|
|
|
*/
|
|
|
|
#define PPC_BCC(cond, dest) do { \
|
2021-10-06 04:25:20 +08:00
|
|
|
if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \
|
2011-07-20 23:51:00 +08:00
|
|
|
PPC_BCC_SHORT(cond, dest); \
|
2020-06-24 19:30:36 +08:00
|
|
|
EMIT(PPC_RAW_NOP()); \
|
2011-07-20 23:51:00 +08:00
|
|
|
} else { \
|
|
|
|
/* Flip the 'T or F' bit to invert comparison */ \
|
|
|
|
PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \
|
|
|
|
PPC_JMP(dest); \
|
|
|
|
} } while(0)
|
|
|
|
|
|
|
|
/* To create a branch condition, select a bit of cr0... */
|
|
|
|
#define CR0_LT 0
|
|
|
|
#define CR0_GT 1
|
|
|
|
#define CR0_EQ 2
|
|
|
|
/* ...and modify BO[3] */
|
|
|
|
#define COND_CMP_TRUE 0x100
|
|
|
|
#define COND_CMP_FALSE 0x000
|
|
|
|
/* Together, they make all required comparisons: */
|
|
|
|
#define COND_GT (CR0_GT | COND_CMP_TRUE)
|
|
|
|
#define COND_GE (CR0_LT | COND_CMP_FALSE)
|
|
|
|
#define COND_EQ (CR0_EQ | COND_CMP_TRUE)
|
|
|
|
#define COND_NE (CR0_EQ | COND_CMP_FALSE)
|
|
|
|
#define COND_LT (CR0_LT | COND_CMP_TRUE)
|
2017-08-10 07:40:00 +08:00
|
|
|
#define COND_LE (CR0_GT | COND_CMP_FALSE)
|
2011-07-20 23:51:00 +08:00
|
|
|
|
2021-03-23 00:37:50 +08:00
|
|
|
#define SEEN_FUNC 0x20000000 /* might call external helpers */
|
2021-10-12 20:30:49 +08:00
|
|
|
#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
|
2021-03-23 00:37:48 +08:00
|
|
|
|
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.
Before this patch, the test #359 ADD default X is:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 92 e1 00 2c stw r23,44(r1)
14: 93 01 00 30 stw r24,48(r1)
18: 93 21 00 34 stw r25,52(r1)
1c: 93 41 00 38 stw r26,56(r1)
20: 39 80 00 00 li r12,0
24: 39 60 00 00 li r11,0
28: 3b 40 00 00 li r26,0
2c: 3b 20 00 00 li r25,0
30: 7c 98 23 78 mr r24,r4
34: 7c 77 1b 78 mr r23,r3
38: 39 80 00 42 li r12,66
3c: 39 60 00 00 li r11,0
40: 7d 8c d2 14 add r12,r12,r26
44: 39 60 00 00 li r11,0
48: 7d 83 63 78 mr r3,r12
4c: 82 e1 00 2c lwz r23,44(r1)
50: 83 01 00 30 lwz r24,48(r1)
54: 83 21 00 34 lwz r25,52(r1)
58: 83 41 00 38 lwz r26,56(r1)
5c: 38 21 00 50 addi r1,r1,80
60: 4e 80 00 20 blr
After this patch, the same test has become:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 39 80 00 00 li r12,0
14: 39 60 00 00 li r11,0
18: 39 00 00 00 li r8,0
1c: 38 e0 00 00 li r7,0
20: 7c 86 23 78 mr r6,r4
24: 7c 65 1b 78 mr r5,r3
28: 39 80 00 42 li r12,66
2c: 39 60 00 00 li r11,0
30: 7d 8c 42 14 add r12,r12,r8
34: 39 60 00 00 li r11,0
38: 7d 83 63 78 mr r3,r12
3c: 38 21 00 50 addi r1,r1,80
40: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
|
|
|
#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
|
|
|
|
#define SEEN_NVREG_MASK 0x0003ffff /* Non volatile registers r14-r31 */
|
|
|
|
|
|
|
|
#ifdef CONFIG_PPC64
|
|
|
|
extern const int b2p[MAX_BPF_JIT_REG + 2];
|
|
|
|
#else
|
|
|
|
extern const int b2p[MAX_BPF_JIT_REG + 1];
|
|
|
|
#endif
|
|
|
|
|
2021-03-23 00:37:48 +08:00
|
|
|
struct codegen_context {
|
|
|
|
/*
|
|
|
|
* This is used to track register usage as well
|
|
|
|
* as calls to external helpers.
|
|
|
|
* - register usage is tracked with corresponding
|
2021-03-23 00:37:50 +08:00
|
|
|
* bits (r3-r31)
|
2021-03-23 00:37:48 +08:00
|
|
|
* - rest of the bits can be used to track other
|
2021-03-23 00:37:50 +08:00
|
|
|
* things -- for now, we use bits 0 to 2
|
2021-03-23 00:37:48 +08:00
|
|
|
* encoded in SEEN_* macros above
|
|
|
|
*/
|
|
|
|
unsigned int seen;
|
|
|
|
unsigned int idx;
|
|
|
|
unsigned int stack_size;
|
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.
Before this patch, the test #359 ADD default X is:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 92 e1 00 2c stw r23,44(r1)
14: 93 01 00 30 stw r24,48(r1)
18: 93 21 00 34 stw r25,52(r1)
1c: 93 41 00 38 stw r26,56(r1)
20: 39 80 00 00 li r12,0
24: 39 60 00 00 li r11,0
28: 3b 40 00 00 li r26,0
2c: 3b 20 00 00 li r25,0
30: 7c 98 23 78 mr r24,r4
34: 7c 77 1b 78 mr r23,r3
38: 39 80 00 42 li r12,66
3c: 39 60 00 00 li r11,0
40: 7d 8c d2 14 add r12,r12,r26
44: 39 60 00 00 li r11,0
48: 7d 83 63 78 mr r3,r12
4c: 82 e1 00 2c lwz r23,44(r1)
50: 83 01 00 30 lwz r24,48(r1)
54: 83 21 00 34 lwz r25,52(r1)
58: 83 41 00 38 lwz r26,56(r1)
5c: 38 21 00 50 addi r1,r1,80
60: 4e 80 00 20 blr
After this patch, the same test has become:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 39 80 00 00 li r12,0
14: 39 60 00 00 li r11,0
18: 39 00 00 00 li r8,0
1c: 38 e0 00 00 li r7,0
20: 7c 86 23 78 mr r6,r4
24: 7c 65 1b 78 mr r5,r3
28: 39 80 00 42 li r12,66
2c: 39 60 00 00 li r11,0
30: 7d 8c 42 14 add r12,r12,r8
34: 39 60 00 00 li r11,0
38: 7d 83 63 78 mr r3,r12
3c: 38 21 00 50 addi r1,r1,80
40: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
|
|
|
int b2p[ARRAY_SIZE(b2p)];
|
2021-03-23 00:37:48 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static inline void bpf_flush_icache(void *start, void *end)
|
|
|
|
{
|
|
|
|
smp_wmb(); /* smp write barrier */
|
|
|
|
flush_icache_range((unsigned long)start, (unsigned long)end);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
|
|
|
|
{
|
|
|
|
return ctx->seen & (1 << (31 - i));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
|
|
|
|
{
|
|
|
|
ctx->seen |= 1 << (31 - i);
|
|
|
|
}
|
|
|
|
|
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.
Before this patch, the test #359 ADD default X is:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 92 e1 00 2c stw r23,44(r1)
14: 93 01 00 30 stw r24,48(r1)
18: 93 21 00 34 stw r25,52(r1)
1c: 93 41 00 38 stw r26,56(r1)
20: 39 80 00 00 li r12,0
24: 39 60 00 00 li r11,0
28: 3b 40 00 00 li r26,0
2c: 3b 20 00 00 li r25,0
30: 7c 98 23 78 mr r24,r4
34: 7c 77 1b 78 mr r23,r3
38: 39 80 00 42 li r12,66
3c: 39 60 00 00 li r11,0
40: 7d 8c d2 14 add r12,r12,r26
44: 39 60 00 00 li r11,0
48: 7d 83 63 78 mr r3,r12
4c: 82 e1 00 2c lwz r23,44(r1)
50: 83 01 00 30 lwz r24,48(r1)
54: 83 21 00 34 lwz r25,52(r1)
58: 83 41 00 38 lwz r26,56(r1)
5c: 38 21 00 50 addi r1,r1,80
60: 4e 80 00 20 blr
After this patch, the same test has become:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 39 80 00 00 li r12,0
14: 39 60 00 00 li r11,0
18: 39 00 00 00 li r8,0
1c: 38 e0 00 00 li r7,0
20: 7c 86 23 78 mr r6,r4
24: 7c 65 1b 78 mr r5,r3
28: 39 80 00 42 li r12,66
2c: 39 60 00 00 li r11,0
30: 7d 8c 42 14 add r12,r12,r8
34: 39 60 00 00 li r11,0
38: 7d 83 63 78 mr r3,r12
3c: 38 21 00 50 addi r1,r1,80
40: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
|
|
|
static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
|
|
|
|
{
|
|
|
|
ctx->seen &= ~(1 << (31 - i));
|
|
|
|
}
|
|
|
|
|
2021-03-23 00:37:49 +08:00
|
|
|
void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
|
|
|
|
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
2021-10-12 20:30:50 +08:00
|
|
|
u32 *addrs);
|
2021-03-23 00:37:49 +08:00
|
|
|
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
|
|
|
|
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
|
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32
When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.
Before this patch, the test #359 ADD default X is:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 92 e1 00 2c stw r23,44(r1)
14: 93 01 00 30 stw r24,48(r1)
18: 93 21 00 34 stw r25,52(r1)
1c: 93 41 00 38 stw r26,56(r1)
20: 39 80 00 00 li r12,0
24: 39 60 00 00 li r11,0
28: 3b 40 00 00 li r26,0
2c: 3b 20 00 00 li r25,0
30: 7c 98 23 78 mr r24,r4
34: 7c 77 1b 78 mr r23,r3
38: 39 80 00 42 li r12,66
3c: 39 60 00 00 li r11,0
40: 7d 8c d2 14 add r12,r12,r26
44: 39 60 00 00 li r11,0
48: 7d 83 63 78 mr r3,r12
4c: 82 e1 00 2c lwz r23,44(r1)
50: 83 01 00 30 lwz r24,48(r1)
54: 83 21 00 34 lwz r25,52(r1)
58: 83 41 00 38 lwz r26,56(r1)
5c: 38 21 00 50 addi r1,r1,80
60: 4e 80 00 20 blr
After this patch, the same test has become:
0: 7c 64 1b 78 mr r4,r3
4: 38 60 00 00 li r3,0
8: 94 21 ff b0 stwu r1,-80(r1)
c: 60 00 00 00 nop
10: 39 80 00 00 li r12,0
14: 39 60 00 00 li r11,0
18: 39 00 00 00 li r8,0
1c: 38 e0 00 00 li r7,0
20: 7c 86 23 78 mr r6,r4
24: 7c 65 1b 78 mr r5,r3
28: 39 80 00 42 li r12,66
2c: 39 60 00 00 li r11,0
30: 7d 8c 42 14 add r12,r12,r8
34: 39 60 00 00 li r11,0
38: 7d 83 63 78 mr r3,r12
3c: 38 21 00 50 addi r1,r1,80
40: 4e 80 00 20 blr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
|
|
|
void bpf_jit_realloc_regs(struct codegen_context *ctx);
|
2021-03-23 00:37:49 +08:00
|
|
|
|
2011-07-20 23:51:00 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|