OpenCloudOS-Kernel/arch/powerpc/net/bpf_jit.h

186 lines
6.0 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-only */
/*
* bpf_jit.h: BPF JIT compiler for PPC
*
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
* 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
*/
#ifndef _BPF_JIT_H
#define _BPF_JIT_H
#ifndef __ASSEMBLY__
#include <asm/types.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_PPC64_ELF_ABI_V1
#define FUNCTION_DESCR_SIZE 24
#else
#define FUNCTION_DESCR_SIZE 0
#endif
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-08 10:17:51 +08:00
#define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
#define PLANT_INSTR(d, idx, instr) \
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
/* Long jump; (unconditional 'branch') */
#define PPC_JMP(dest) \
do { \
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-08 10:17:51 +08:00
long offset = (long)(dest) - CTX_NIA(ctx); \
if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \
pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
return -ERANGE; \
} \
EMIT(PPC_RAW_BRANCH(offset)); \
} while (0)
/* bl (unconditional 'branch' with link) */
#define PPC_BL(dest) EMIT(PPC_RAW_BL((dest) - (unsigned long)(image + ctx->idx)))
/* "cond" here covers BO:BI fields. */
#define PPC_BCC_SHORT(cond, dest) \
do { \
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-08 10:17:51 +08:00
long offset = (long)(dest) - CTX_NIA(ctx); \
if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \
pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
return -ERANGE; \
} \
EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
} while (0)
/* Sign-extended 32-bit immediate load */
#define PPC_LI32(d, i) do { \
if ((int)(uintptr_t)(i) >= -32768 && \
(int)(uintptr_t)(i) < 32768) \
EMIT(PPC_RAW_LI(d, i)); \
else { \
EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
if (IMM_L(i)) \
EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
} } while(0)
#ifdef CONFIG_PPC64
#define PPC_LI64(d, i) do { \
if ((long)(i) >= -2147483648 && \
(long)(i) < 2147483648) \
PPC_LI32(d, i); \
else { \
if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \
EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \
0xffff)); \
else { \
EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \
EMIT(PPC_RAW_ORI(d, d, \
((uintptr_t)(i) >> 32) & 0xffff)); \
} \
EMIT(PPC_RAW_SLDI(d, d, 32)); \
if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \
EMIT(PPC_RAW_ORIS(d, d, \
((uintptr_t)(i) >> 16) & 0xffff)); \
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
0xffff)); \
} } while (0)
#endif
/*
* The fly in the ointment of code size changing from pass to pass is
* avoided by padding the short branch case with a NOP. If code size differs
* with different branch reaches we will have the issue of code moving from
* one pass to the next and will need a few passes to converge on a stable
* state.
*/
#define PPC_BCC(cond, dest) do { \
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-08 10:17:51 +08:00
if (is_offset_in_cond_branch_range((long)(dest) - CTX_NIA(ctx))) { \
PPC_BCC_SHORT(cond, dest); \
EMIT(PPC_RAW_NOP()); \
} else { \
/* Flip the 'T or F' bit to invert comparison */ \
powerpc/64: vmlinux support building with PCREL addresing PC-Relative or PCREL addressing is an extension to the ELF ABI which uses Power ISA v3.1 PC-relative instructions to calculate addresses, rather than the traditional TOC scheme. Add an option to build vmlinux using pcrel addressing. Modules continue to use TOC addressing. - TOC address helpers and r2 are poisoned with -1 when running vmlinux. r2 could be used for something useful once things are ironed out. - Assembly must call C functions with @notoc annotation, or the linker complains aobut a missing nop after the call. This is done with the CFUNC macro introduced earlier. - Boot: with the exception of prom_init, the execution branches to the kernel virtual address early in boot, before any addresses are generated, which ensures 34-bit pcrel addressing does not miss the high PAGE_OFFSET bits. TOC relative addressing has a similar requirement. prom_init does not go to the virtual address and its addresses should not carry over to the post-prom kernel. - Ftrace trampolines are converted from TOC addressing to pcrel addressing, including module ftrace trampolines that currently use the kernel TOC to find ftrace target functions. - BPF function prologue and function calling generation are converted from TOC to pcrel. - copypage_64.S has an interesting problem, prefixed instructions have alignment restrictions so the linker can add padding, which makes the assembler treat the difference between two local labels as non-constant even if alignment is arranged so padding is not required. This may need toolchain help to solve nicely, for now move the prefix instruction out of the alternate patch section to work around it. This reduces kernel text size by about 6%. Signed-off-by: Nicholas Piggin <npiggin@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20230408021752.862660-6-npiggin@gmail.com
2023-04-08 10:17:51 +08:00
PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, CTX_NIA(ctx) + 2*4); \
PPC_JMP(dest); \
} } while(0)
/* To create a branch condition, select a bit of cr0... */
#define CR0_LT 0
#define CR0_GT 1
#define CR0_EQ 2
/* ...and modify BO[3] */
#define COND_CMP_TRUE 0x100
#define COND_CMP_FALSE 0x000
/* Together, they make all required comparisons: */
#define COND_GT (CR0_GT | COND_CMP_TRUE)
#define COND_GE (CR0_LT | COND_CMP_FALSE)
#define COND_EQ (CR0_EQ | COND_CMP_TRUE)
#define COND_NE (CR0_EQ | COND_CMP_FALSE)
#define COND_LT (CR0_LT | COND_CMP_TRUE)
#define COND_LE (CR0_GT | COND_CMP_FALSE)
#define SEEN_FUNC 0x20000000 /* might call external helpers */
#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
struct codegen_context {
/*
* This is used to track register usage as well
* as calls to external helpers.
* - register usage is tracked with corresponding
* bits (r3-r31)
* - rest of the bits can be used to track other
* things -- for now, we use bits 0 to 2
* encoded in SEEN_* macros above
*/
unsigned int seen;
unsigned int idx;
unsigned int stack_size;
int b2p[MAX_BPF_JIT_REG + 2];
unsigned int exentry_idx;
unsigned int alt_exit_addr;
};
#define bpf_to_ppc(r) (ctx->b2p[r])
2021-10-12 20:30:55 +08:00
#ifdef CONFIG_PPC32
#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */
#else
#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */
2021-10-12 20:30:55 +08:00
#endif
static inline void bpf_flush_icache(void *start, void *end)
{
smp_wmb(); /* smp write barrier */
flush_icache_range((unsigned long)start, (unsigned long)end);
}
static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
{
return ctx->seen & (1 << (31 - i));
}
static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
{
ctx->seen |= 1 << (31 - i);
}
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32 When the BPF routine doesn't call any function, the non volatile registers can be reallocated to volatile registers in order to avoid having to save them/restore on the stack. Before this patch, the test #359 ADD default X is: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 92 e1 00 2c stw r23,44(r1) 14: 93 01 00 30 stw r24,48(r1) 18: 93 21 00 34 stw r25,52(r1) 1c: 93 41 00 38 stw r26,56(r1) 20: 39 80 00 00 li r12,0 24: 39 60 00 00 li r11,0 28: 3b 40 00 00 li r26,0 2c: 3b 20 00 00 li r25,0 30: 7c 98 23 78 mr r24,r4 34: 7c 77 1b 78 mr r23,r3 38: 39 80 00 42 li r12,66 3c: 39 60 00 00 li r11,0 40: 7d 8c d2 14 add r12,r12,r26 44: 39 60 00 00 li r11,0 48: 7d 83 63 78 mr r3,r12 4c: 82 e1 00 2c lwz r23,44(r1) 50: 83 01 00 30 lwz r24,48(r1) 54: 83 21 00 34 lwz r25,52(r1) 58: 83 41 00 38 lwz r26,56(r1) 5c: 38 21 00 50 addi r1,r1,80 60: 4e 80 00 20 blr After this patch, the same test has become: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 39 80 00 00 li r12,0 14: 39 60 00 00 li r11,0 18: 39 00 00 00 li r8,0 1c: 38 e0 00 00 li r7,0 20: 7c 86 23 78 mr r6,r4 24: 7c 65 1b 78 mr r5,r3 28: 39 80 00 42 li r12,66 2c: 39 60 00 00 li r11,0 30: 7d 8c 42 14 add r12,r12,r8 34: 39 60 00 00 li r11,0 38: 7d 83 63 78 mr r3,r12 3c: 38 21 00 50 addi r1,r1,80 40: 4e 80 00 20 blr Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
{
ctx->seen &= ~(1 << (31 - i));
}
void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
u32 *addrs, int pass, bool extra_pass);
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32 When the BPF routine doesn't call any function, the non volatile registers can be reallocated to volatile registers in order to avoid having to save them/restore on the stack. Before this patch, the test #359 ADD default X is: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 92 e1 00 2c stw r23,44(r1) 14: 93 01 00 30 stw r24,48(r1) 18: 93 21 00 34 stw r25,52(r1) 1c: 93 41 00 38 stw r26,56(r1) 20: 39 80 00 00 li r12,0 24: 39 60 00 00 li r11,0 28: 3b 40 00 00 li r26,0 2c: 3b 20 00 00 li r25,0 30: 7c 98 23 78 mr r24,r4 34: 7c 77 1b 78 mr r23,r3 38: 39 80 00 42 li r12,66 3c: 39 60 00 00 li r11,0 40: 7d 8c d2 14 add r12,r12,r26 44: 39 60 00 00 li r11,0 48: 7d 83 63 78 mr r3,r12 4c: 82 e1 00 2c lwz r23,44(r1) 50: 83 01 00 30 lwz r24,48(r1) 54: 83 21 00 34 lwz r25,52(r1) 58: 83 41 00 38 lwz r26,56(r1) 5c: 38 21 00 50 addi r1,r1,80 60: 4e 80 00 20 blr After this patch, the same test has become: 0: 7c 64 1b 78 mr r4,r3 4: 38 60 00 00 li r3,0 8: 94 21 ff b0 stwu r1,-80(r1) c: 60 00 00 00 nop 10: 39 80 00 00 li r12,0 14: 39 60 00 00 li r11,0 18: 39 00 00 00 li r8,0 1c: 38 e0 00 00 li r7,0 20: 7c 86 23 78 mr r6,r4 24: 7c 65 1b 78 mr r5,r3 28: 39 80 00 42 li r12,66 2c: 39 60 00 00 li r11,0 30: 7d 8c 42 14 add r12,r12,r8 34: 39 60 00 00 li r11,0 38: 7d 83 63 78 mr r3,r12 3c: 38 21 00 50 addi r1,r1,80 40: 4e 80 00 20 blr Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu
2021-03-23 00:37:53 +08:00
void bpf_jit_realloc_regs(struct codegen_context *ctx);
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, int pass, struct codegen_context *ctx,
int insn_idx, int jmp_off, int dst_reg);
#endif
#endif