powerpc/bpf: Perform complete extra passes to update addresses
BPF core calls the jit compiler again for an extra pass in order to properly set subprog addresses. Unlike other architectures, powerpc only updates the addresses during that extra pass. It means that holes must have been left in the code in order to enable the maximum possible instruction size. In order to avoid waste of space, and waste of CPU time on powerpc processors on which the NOP instruction is not 0-cycle, perform two real additional passes. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/d484a4ac95949ff55fc4344b674e7c0d3ddbfcd5.1675245773.git.christophe.leroy@csgroup.eu
This commit is contained in:
parent
7dd0e28487
commit
85e031154c
|
@ -169,7 +169,7 @@ static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
|
||||||
void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
|
void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
|
||||||
int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
|
int bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
|
||||||
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
||||||
u32 *addrs, int pass);
|
u32 *addrs, int pass, bool extra_pass);
|
||||||
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
|
void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
|
||||||
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
|
void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
|
||||||
void bpf_jit_realloc_regs(struct codegen_context *ctx);
|
void bpf_jit_realloc_regs(struct codegen_context *ctx);
|
||||||
|
|
|
@ -23,74 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
|
||||||
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
|
memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */
|
|
||||||
static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image,
|
|
||||||
struct codegen_context *ctx, u32 *addrs)
|
|
||||||
{
|
|
||||||
const struct bpf_insn *insn = fp->insnsi;
|
|
||||||
bool func_addr_fixed;
|
|
||||||
u64 func_addr;
|
|
||||||
u32 tmp_idx;
|
|
||||||
int i, j, ret;
|
|
||||||
|
|
||||||
for (i = 0; i < fp->len; i++) {
|
|
||||||
/*
|
|
||||||
* During the extra pass, only the branch target addresses for
|
|
||||||
* the subprog calls need to be fixed. All other instructions
|
|
||||||
* can left untouched.
|
|
||||||
*
|
|
||||||
* The JITed image length does not change because we already
|
|
||||||
* ensure that the JITed instruction sequence for these calls
|
|
||||||
* are of fixed length by padding them with NOPs.
|
|
||||||
*/
|
|
||||||
if (insn[i].code == (BPF_JMP | BPF_CALL) &&
|
|
||||||
insn[i].src_reg == BPF_PSEUDO_CALL) {
|
|
||||||
ret = bpf_jit_get_func_addr(fp, &insn[i], true,
|
|
||||||
&func_addr,
|
|
||||||
&func_addr_fixed);
|
|
||||||
if (ret < 0)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Save ctx->idx as this would currently point to the
|
|
||||||
* end of the JITed image and set it to the offset of
|
|
||||||
* the instruction sequence corresponding to the
|
|
||||||
* subprog call temporarily.
|
|
||||||
*/
|
|
||||||
tmp_idx = ctx->idx;
|
|
||||||
ctx->idx = addrs[i] / 4;
|
|
||||||
ret = bpf_jit_emit_func_call_rel(image, ctx, func_addr);
|
|
||||||
if (ret)
|
|
||||||
return ret;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Restore ctx->idx here. This is safe as the length
|
|
||||||
* of the JITed sequence remains unchanged.
|
|
||||||
*/
|
|
||||||
ctx->idx = tmp_idx;
|
|
||||||
} else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) {
|
|
||||||
tmp_idx = ctx->idx;
|
|
||||||
ctx->idx = addrs[i] / 4;
|
|
||||||
#ifdef CONFIG_PPC32
|
|
||||||
PPC_LI32(bpf_to_ppc(insn[i].dst_reg) - 1, (u32)insn[i + 1].imm);
|
|
||||||
PPC_LI32(bpf_to_ppc(insn[i].dst_reg), (u32)insn[i].imm);
|
|
||||||
for (j = ctx->idx - addrs[i] / 4; j < 4; j++)
|
|
||||||
EMIT(PPC_RAW_NOP());
|
|
||||||
#else
|
|
||||||
func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32);
|
|
||||||
PPC_LI64(bpf_to_ppc(insn[i].dst_reg), func_addr);
|
|
||||||
/* overwrite rest with nops */
|
|
||||||
for (j = ctx->idx - addrs[i] / 4; j < 5; j++)
|
|
||||||
EMIT(PPC_RAW_NOP());
|
|
||||||
#endif
|
|
||||||
ctx->idx = tmp_idx;
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
|
int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
|
||||||
{
|
{
|
||||||
if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
|
if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
|
||||||
|
@ -185,7 +117,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
||||||
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
|
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
|
||||||
|
|
||||||
/* Scouting faux-generate pass 0 */
|
/* Scouting faux-generate pass 0 */
|
||||||
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
|
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
|
||||||
/* We hit something illegal or unsupported. */
|
/* We hit something illegal or unsupported. */
|
||||||
fp = org_fp;
|
fp = org_fp;
|
||||||
goto out_addrs;
|
goto out_addrs;
|
||||||
|
@ -200,7 +132,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
||||||
*/
|
*/
|
||||||
if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
|
if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
|
||||||
cgctx.idx = 0;
|
cgctx.idx = 0;
|
||||||
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0)) {
|
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, 0, false)) {
|
||||||
fp = org_fp;
|
fp = org_fp;
|
||||||
goto out_addrs;
|
goto out_addrs;
|
||||||
}
|
}
|
||||||
|
@ -234,29 +166,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
||||||
skip_init_ctx:
|
skip_init_ctx:
|
||||||
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
|
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
|
||||||
|
|
||||||
if (extra_pass) {
|
|
||||||
/*
|
|
||||||
* Do not touch the prologue and epilogue as they will remain
|
|
||||||
* unchanged. Only fix the branch target address for subprog
|
|
||||||
* calls in the body, and ldimm64 instructions.
|
|
||||||
*
|
|
||||||
* This does not change the offsets and lengths of the subprog
|
|
||||||
* call instruction sequences and hence, the size of the JITed
|
|
||||||
* image as well.
|
|
||||||
*/
|
|
||||||
bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs);
|
|
||||||
|
|
||||||
/* There is no need to perform the usual passes. */
|
|
||||||
goto skip_codegen_passes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Code generation passes 1-2 */
|
/* Code generation passes 1-2 */
|
||||||
for (pass = 1; pass < 3; pass++) {
|
for (pass = 1; pass < 3; pass++) {
|
||||||
/* Now build the prologue, body code & epilogue for real. */
|
/* Now build the prologue, body code & epilogue for real. */
|
||||||
cgctx.idx = 0;
|
cgctx.idx = 0;
|
||||||
cgctx.alt_exit_addr = 0;
|
cgctx.alt_exit_addr = 0;
|
||||||
bpf_jit_build_prologue(code_base, &cgctx);
|
bpf_jit_build_prologue(code_base, &cgctx);
|
||||||
if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass)) {
|
if (bpf_jit_build_body(fp, code_base, &cgctx, addrs, pass, extra_pass)) {
|
||||||
bpf_jit_binary_free(bpf_hdr);
|
bpf_jit_binary_free(bpf_hdr);
|
||||||
fp = org_fp;
|
fp = org_fp;
|
||||||
goto out_addrs;
|
goto out_addrs;
|
||||||
|
@ -268,7 +184,6 @@ skip_init_ctx:
|
||||||
proglen - (cgctx.idx * 4), cgctx.seen);
|
proglen - (cgctx.idx * 4), cgctx.seen);
|
||||||
}
|
}
|
||||||
|
|
||||||
skip_codegen_passes:
|
|
||||||
if (bpf_jit_enable > 1)
|
if (bpf_jit_enable > 1)
|
||||||
/*
|
/*
|
||||||
* Note that we output the base address of the code_base
|
* Note that we output the base address of the code_base
|
||||||
|
|
|
@ -282,7 +282,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o
|
||||||
|
|
||||||
/* Assemble the body code between the prologue & epilogue */
|
/* Assemble the body code between the prologue & epilogue */
|
||||||
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
||||||
u32 *addrs, int pass)
|
u32 *addrs, int pass, bool extra_pass)
|
||||||
{
|
{
|
||||||
const struct bpf_insn *insn = fp->insnsi;
|
const struct bpf_insn *insn = fp->insnsi;
|
||||||
int flen = fp->len;
|
int flen = fp->len;
|
||||||
|
@ -1002,7 +1002,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *
|
||||||
case BPF_JMP | BPF_CALL:
|
case BPF_JMP | BPF_CALL:
|
||||||
ctx->seen |= SEEN_FUNC;
|
ctx->seen |= SEEN_FUNC;
|
||||||
|
|
||||||
ret = bpf_jit_get_func_addr(fp, &insn[i], false,
|
ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
|
||||||
&func_addr, &func_addr_fixed);
|
&func_addr, &func_addr_fixed);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
|
@ -343,7 +343,7 @@ asm (
|
||||||
|
|
||||||
/* Assemble the body code between the prologue & epilogue */
|
/* Assemble the body code between the prologue & epilogue */
|
||||||
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
|
||||||
u32 *addrs, int pass)
|
u32 *addrs, int pass, bool extra_pass)
|
||||||
{
|
{
|
||||||
enum stf_barrier_type stf_barrier = stf_barrier_type_get();
|
enum stf_barrier_type stf_barrier = stf_barrier_type_get();
|
||||||
const struct bpf_insn *insn = fp->insnsi;
|
const struct bpf_insn *insn = fp->insnsi;
|
||||||
|
@ -967,7 +967,7 @@ emit_clear:
|
||||||
case BPF_JMP | BPF_CALL:
|
case BPF_JMP | BPF_CALL:
|
||||||
ctx->seen |= SEEN_FUNC;
|
ctx->seen |= SEEN_FUNC;
|
||||||
|
|
||||||
ret = bpf_jit_get_func_addr(fp, &insn[i], false,
|
ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
|
||||||
&func_addr, &func_addr_fixed);
|
&func_addr, &func_addr_fixed);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
Loading…
Reference in New Issue