Merge branch 'bpf,x64: implement jump padding in jit'

Gary Lin says:

====================
This patch series implements jump padding to x64 jit to cover some
corner cases that used to consume more than 20 jit passes and caused
failure.

v4:
  - Add the detailed comments about the possible padding bytes
  - Add the second test case which triggers jmp_cond padding and imm32 nop
    jmp padding.
  - Add the new test case as another subprog

v3:
  - Copy the instructions of prologue separately or the size calculation
    of the first BPF instruction would include the prologue.
  - Replace WARN_ONCE() with pr_err() and EFAULT
  - Use MAX_PASSES in the for loop condition check
  - Remove the "padded" flag from x64_jit_data. For the extra pass of
    subprogs, padding is always enabled since it won't hurt the images
    that converge without padding.
v2:
  - Simplify the sample code in the commit description and provide the
    jit code
  - Check the expected padding bytes with WARN_ONCE
  - Move the 'padded' flag to 'struct x64_jit_data'
  - Remove the EXPECTED_FAIL flag from bpf_fill_maxinsns11() in test_bpf
  - Add 2 verifier tests
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2021-01-19 12:42:13 -08:00
commit 86e6b4e993
4 changed files with 209 additions and 34 deletions

View File

@ -869,8 +869,31 @@ static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt,
}
}
static int emit_nops(u8 **pprog, int len)
{
u8 *prog = *pprog;
int i, noplen, cnt = 0;
while (len > 0) {
noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
EMIT1(ideal_nops[noplen][i]);
len -= noplen;
}
*pprog = prog;
return cnt;
}
#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
@ -880,7 +903,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
int i, cnt = 0, excnt = 0;
int proglen = 0;
int ilen, proglen = 0;
u8 *prog = temp;
int err;
@ -894,7 +917,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_prog->aux->func_idx != 0);
push_callee_regs(&prog, callee_regs_used);
addrs[0] = prog - temp;
ilen = prog - temp;
if (image)
memcpy(image + proglen, temp, ilen);
proglen += ilen;
addrs[0] = proglen;
prog = temp;
for (i = 1; i <= insn_cnt; i++, insn++) {
const s32 imm32 = insn->imm;
@ -903,8 +932,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
u8 b2 = 0, b3 = 0;
s64 jmp_offset;
u8 jmp_cond;
int ilen;
u8 *func;
int nops;
switch (insn->code) {
/* ALU */
@ -1502,6 +1531,30 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
}
jmp_offset = addrs[i + insn->off] - addrs[i];
if (is_imm8(jmp_offset)) {
if (jmp_padding) {
/* To keep the jmp_offset valid, the extra bytes are
* padded before the jump insn, so we substract the
* 2 bytes of jmp_cond insn from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
* jmp_cond, then this BPF insn won't shrink, so
* "nops" is 0.
*
* On the other hand, if the previous pass emits an
* imm32 jmp_cond, the extra 4 bytes(*) is padded to
* keep the image from shrinking further.
*
* (*) imm32 jmp_cond is 6 bytes, and imm8 jmp_cond
* is 2 bytes, so the size difference is 4 bytes.
*/
nops = INSN_SZ_DIFF - 2;
if (nops != 0 && nops != 4) {
pr_err("unexpected jmp_cond padding: %d bytes\n",
nops);
return -EFAULT;
}
cnt += emit_nops(&prog, nops);
}
EMIT2(jmp_cond, jmp_offset);
} else if (is_simm32(jmp_offset)) {
EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
@ -1524,11 +1577,55 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
else
jmp_offset = addrs[i + insn->off] - addrs[i];
if (!jmp_offset)
/* Optimize out nop jumps */
if (!jmp_offset) {
/*
* If jmp_padding is enabled, the extra nops will
* be inserted. Otherwise, optimize out nop jumps.
*/
if (jmp_padding) {
/* There are 3 possible conditions.
* (1) This BPF_JA is already optimized out in
* the previous run, so there is no need
* to pad any extra byte (0 byte).
* (2) The previous pass emits an imm8 jmp,
* so we pad 2 bytes to match the previous
* insn size.
* (3) Similarly, the previous pass emits an
* imm32 jmp, and 5 bytes is padded.
*/
nops = INSN_SZ_DIFF;
if (nops != 0 && nops != 2 && nops != 5) {
pr_err("unexpected nop jump padding: %d bytes\n",
nops);
return -EFAULT;
}
cnt += emit_nops(&prog, nops);
}
break;
}
emit_jmp:
if (is_imm8(jmp_offset)) {
if (jmp_padding) {
/* To avoid breaking jmp_offset, the extra bytes
* are padded before the actual jmp insn, so
* 2 bytes is substracted from INSN_SZ_DIFF.
*
* If the previous pass already emits an imm8
* jmp, there is nothing to pad (0 byte).
*
* If it emits an imm32 jmp (5 bytes) previously
* and now an imm8 jmp (2 bytes), then we pad
* (5 - 2 = 3) bytes to stop the image from
* shrinking further.
*/
nops = INSN_SZ_DIFF - 2;
if (nops != 0 && nops != 3) {
pr_err("unexpected jump padding: %d bytes\n",
nops);
return -EFAULT;
}
cnt += emit_nops(&prog, INSN_SZ_DIFF - 2);
}
EMIT2(0xEB, jmp_offset);
} else if (is_simm32(jmp_offset)) {
EMIT1_off32(0xE9, jmp_offset);
@ -1671,26 +1768,6 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
return 0;
}
static void emit_nops(u8 **pprog, unsigned int len)
{
unsigned int i, noplen;
u8 *prog = *pprog;
int cnt = 0;
while (len > 0) {
noplen = len;
if (noplen > ASM_NOP_MAX)
noplen = ASM_NOP_MAX;
for (i = 0; i < noplen; i++)
EMIT1(ideal_nops[noplen][i]);
len -= noplen;
}
*pprog = prog;
}
static void emit_align(u8 **pprog, u32 align)
{
u8 *target, *prog = *pprog;
@ -2065,6 +2142,9 @@ struct x64_jit_data {
struct jit_context ctx;
};
#define MAX_PASSES 20
#define PADDING_PASSES (MAX_PASSES - 5)
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
@ -2074,6 +2154,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct jit_context ctx = {};
bool tmp_blinded = false;
bool extra_pass = false;
bool padding = false;
u8 *image = NULL;
int *addrs;
int pass;
@ -2110,6 +2191,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
image = jit_data->image;
header = jit_data->header;
extra_pass = true;
padding = true;
goto skip_init_addrs;
}
addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
@ -2135,8 +2217,10 @@ skip_init_addrs:
* may converge on the last pass. In such case do one more
* pass to emit the final image.
*/
for (pass = 0; pass < 20 || image; pass++) {
proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
for (pass = 0; pass < MAX_PASSES || image; pass++) {
if (!padding && pass >= PADDING_PASSES)
padding = true;
proglen = do_jit(prog, addrs, image, oldproglen, &ctx, padding);
if (proglen <= 0) {
out_image:
image = NULL;

View File

@ -345,7 +345,7 @@ static int __bpf_fill_ja(struct bpf_test *self, unsigned int len,
static int bpf_fill_maxinsns11(struct bpf_test *self)
{
/* Hits 70 passes on x86_64, so cannot get JITed there. */
/* Hits 70 passes on x86_64 and triggers NOPs padding. */
return __bpf_fill_ja(self, BPF_MAXINSNS, 68);
}
@ -5318,15 +5318,10 @@ static struct bpf_test tests[] = {
{
"BPF_MAXINSNS: Jump, gap, jump, ...",
{ },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
#else
CLASSIC | FLAG_NO_DATA,
#endif
{ },
{ { 0, 0xababcbac } },
.fill_helper = bpf_fill_maxinsns11,
.expected_errcode = -ENOTSUPP,
},
{
"BPF_MAXINSNS: jump over MSH",

View File

@ -297,6 +297,78 @@ static void bpf_fill_scale(struct bpf_test *self)
}
}
static int bpf_fill_torturous_jumps_insn_1(struct bpf_insn *insn)
{
unsigned int len = 259, hlen = 128;
int i;
insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32);
for (i = 1; i <= hlen; i++) {
insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, hlen);
insn[i + hlen] = BPF_JMP_A(hlen - i);
}
insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 1);
insn[len - 1] = BPF_EXIT_INSN();
return len;
}
static int bpf_fill_torturous_jumps_insn_2(struct bpf_insn *insn)
{
unsigned int len = 4100, jmp_off = 2048;
int i, j;
insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32);
for (i = 1; i <= jmp_off; i++) {
insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, jmp_off);
}
insn[i++] = BPF_JMP_A(jmp_off);
for (; i <= jmp_off * 2 + 1; i+=16) {
for (j = 0; j < 16; j++) {
insn[i + j] = BPF_JMP_A(16 - j - 1);
}
}
insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 2);
insn[len - 1] = BPF_EXIT_INSN();
return len;
}
static void bpf_fill_torturous_jumps(struct bpf_test *self)
{
struct bpf_insn *insn = self->fill_insns;
int i = 0;
switch (self->retval) {
case 1:
self->prog_len = bpf_fill_torturous_jumps_insn_1(insn);
return;
case 2:
self->prog_len = bpf_fill_torturous_jumps_insn_2(insn);
return;
case 3:
/* main */
insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4);
insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 262);
insn[i++] = BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0);
insn[i++] = BPF_MOV64_IMM(BPF_REG_0, 3);
insn[i++] = BPF_EXIT_INSN();
/* subprog 1 */
i += bpf_fill_torturous_jumps_insn_1(insn + i);
/* subprog 2 */
i += bpf_fill_torturous_jumps_insn_2(insn + i);
self->prog_len = i;
return;
default:
self->prog_len = 0;
break;
}
}
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \

View File

@ -105,3 +105,27 @@
.result = ACCEPT,
.retval = 2,
},
{
"jit: torturous jumps, imm8 nop jmp and pure jump padding",
.insns = { },
.fill_helper = bpf_fill_torturous_jumps,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 1,
},
{
"jit: torturous jumps, imm32 nop jmp and jmp_cond padding",
.insns = { },
.fill_helper = bpf_fill_torturous_jumps,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 2,
},
{
"jit: torturous jumps in subprog",
.insns = { },
.fill_helper = bpf_fill_torturous_jumps,
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
.retval = 3,
},