bpf: arm64: add JIT support for multi-function programs

similar to x64 add support for bpf-to-bpf calls.
When program has calls to in-kernel helpers the target call offset
is known at JIT time and arm64 architecture needs 2 passes.
With bpf-to-bpf calls the dynamically allocated function start
is unknown until all functions of the program are JITed.
Therefore (just like x64) arm64 JIT needs one extra pass over
the program to emit correct call offsets.

Implementation detail:
Avoid being too clever in 64-bit immediate moves and
always use 4 instructions (instead of 3-4 depending on the address)
to make sure only one extra pass is needed.
If some future optimization would make it worth while to optimize
'call 64-bit imm' further, the JIT would need to do 4 passes
over the program instead of 3 as in this patch.
For typical bpf program address the mov needs 3 or 4 insns,
so unconditional 4 insns to save extra pass is a worthy trade off
at this state of JIT.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Alexei Starovoitov 2017-12-14 17:55:16 -08:00 committed by Daniel Borkmann
parent 1c2a088a66
commit db496944fd
1 changed files with 64 additions and 4 deletions

View File

@ -99,6 +99,20 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val,
} }
} }
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 tmp = val;
int shift = 0;
emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
for (;shift < 48;) {
tmp >>= 16;
shift += 16;
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
}
}
static inline void emit_a64_mov_i(const int is64, const int reg, static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx) const s32 val, struct jit_ctx *ctx)
{ {
@ -603,7 +617,10 @@ emit_cond_jmp:
const u8 r0 = bpf2a64[BPF_REG_0]; const u8 r0 = bpf2a64[BPF_REG_0];
const u64 func = (u64)__bpf_call_base + imm; const u64 func = (u64)__bpf_call_base + imm;
emit_a64_mov_i64(tmp, func, ctx); if (ctx->prog->is_func)
emit_addr_mov_i64(tmp, func, ctx);
else
emit_a64_mov_i64(tmp, func, ctx);
emit(A64_BLR(tmp), ctx); emit(A64_BLR(tmp), ctx);
emit(A64_MOV(1, r0, A64_R(0)), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx);
break; break;
@ -835,11 +852,19 @@ static inline void bpf_flush_icache(void *start, void *end)
flush_icache_range((unsigned long)start, (unsigned long)end); flush_icache_range((unsigned long)start, (unsigned long)end);
} }
struct arm64_jit_data {
struct bpf_binary_header *header;
u8 *image;
struct jit_ctx ctx;
};
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{ {
struct bpf_prog *tmp, *orig_prog = prog; struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header; struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
bool tmp_blinded = false; bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx; struct jit_ctx ctx;
int image_size; int image_size;
u8 *image_ptr; u8 *image_ptr;
@ -858,13 +883,29 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = tmp; prog = tmp;
} }
jit_data = prog->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
prog = orig_prog;
goto out;
}
prog->aux->jit_data = jit_data;
}
if (jit_data->ctx.offset) {
ctx = jit_data->ctx;
image_ptr = jit_data->image;
header = jit_data->header;
extra_pass = true;
goto skip_init_ctx;
}
memset(&ctx, 0, sizeof(ctx)); memset(&ctx, 0, sizeof(ctx));
ctx.prog = prog; ctx.prog = prog;
ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); ctx.offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
if (ctx.offset == NULL) { if (ctx.offset == NULL) {
prog = orig_prog; prog = orig_prog;
goto out; goto out_off;
} }
/* 1. Initial fake pass to compute ctx->idx. */ /* 1. Initial fake pass to compute ctx->idx. */
@ -895,6 +936,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* 2. Now, the actual pass. */ /* 2. Now, the actual pass. */
ctx.image = (__le32 *)image_ptr; ctx.image = (__le32 *)image_ptr;
skip_init_ctx:
ctx.idx = 0; ctx.idx = 0;
build_prologue(&ctx); build_prologue(&ctx);
@ -920,13 +962,31 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
bpf_flush_icache(header, ctx.image + ctx.idx); bpf_flush_icache(header, ctx.image + ctx.idx);
bpf_jit_binary_lock_ro(header); if (!prog->is_func || extra_pass) {
if (extra_pass && ctx.idx != jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d != %d\n",
ctx.idx, jit_data->ctx.idx);
bpf_jit_binary_free(header);
prog->bpf_func = NULL;
prog->jited = 0;
goto out_off;
}
bpf_jit_binary_lock_ro(header);
} else {
jit_data->ctx = ctx;
jit_data->image = image_ptr;
jit_data->header = header;
}
prog->bpf_func = (void *)ctx.image; prog->bpf_func = (void *)ctx.image;
prog->jited = 1; prog->jited = 1;
prog->jited_len = image_size; prog->jited_len = image_size;
if (!prog->is_func || extra_pass) {
out_off: out_off:
kfree(ctx.offset); kfree(ctx.offset);
kfree(jit_data);
prog->aux->jit_data = NULL;
}
out: out:
if (tmp_blinded) if (tmp_blinded)
bpf_jit_prog_release_other(prog, prog == orig_prog ? bpf_jit_prog_release_other(prog, prog == orig_prog ?