Merge branch 'filter-next'
Alexei Starovoitov says: ==================== internal BPF jit for x64 and JITed seccomp Internal BPF JIT compiler for x86_64 replaces classic BPF JIT. Use it in seccomp and in tracing filters (sent as separate patch) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
1f499d6a3b
|
@ -12,13 +12,16 @@
|
|||
|
||||
/*
|
||||
* Calling convention :
|
||||
* rdi : skb pointer
|
||||
* rbx : skb pointer (callee saved)
|
||||
* esi : offset of byte(s) to fetch in skb (can be scratched)
|
||||
* r8 : copy of skb->data
|
||||
* r10 : copy of skb->data
|
||||
* r9d : hlen = skb->len - skb->data_len
|
||||
*/
|
||||
#define SKBDATA %r8
|
||||
#define SKBDATA %r10
|
||||
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
|
||||
#define MAX_BPF_STACK (512 /* from filter.h */ + \
|
||||
32 /* space for rbx,r13,r14,r15 */ + \
|
||||
8 /* space for skb_copy_bits */)
|
||||
|
||||
sk_load_word:
|
||||
.globl sk_load_word
|
||||
|
@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
|
|||
movzbl (SKBDATA,%rsi),%eax
|
||||
ret
|
||||
|
||||
/**
|
||||
* sk_load_byte_msh - BPF_S_LDX_B_MSH helper
|
||||
*
|
||||
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
|
||||
* Must preserve A accumulator (%eax)
|
||||
* Inputs : %esi is the offset value
|
||||
*/
|
||||
sk_load_byte_msh:
|
||||
.globl sk_load_byte_msh
|
||||
test %esi,%esi
|
||||
js bpf_slow_path_byte_msh_neg
|
||||
|
||||
sk_load_byte_msh_positive_offset:
|
||||
.globl sk_load_byte_msh_positive_offset
|
||||
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
|
||||
jle bpf_slow_path_byte_msh
|
||||
movzbl (SKBDATA,%rsi),%ebx
|
||||
and $15,%bl
|
||||
shl $2,%bl
|
||||
ret
|
||||
|
||||
/* rsi contains offset and can be scratched */
|
||||
#define bpf_slow_path_common(LEN) \
|
||||
push %rdi; /* save skb */ \
|
||||
mov %rbx, %rdi; /* arg1 == skb */ \
|
||||
push %r9; \
|
||||
push SKBDATA; \
|
||||
/* rsi already has offset */ \
|
||||
mov $LEN,%ecx; /* len */ \
|
||||
lea -12(%rbp),%rdx; \
|
||||
lea - MAX_BPF_STACK + 32(%rbp),%rdx; \
|
||||
call skb_copy_bits; \
|
||||
test %eax,%eax; \
|
||||
pop SKBDATA; \
|
||||
pop %r9; \
|
||||
pop %rdi
|
||||
pop %r9;
|
||||
|
||||
|
||||
bpf_slow_path_word:
|
||||
bpf_slow_path_common(4)
|
||||
js bpf_error
|
||||
mov -12(%rbp),%eax
|
||||
mov - MAX_BPF_STACK + 32(%rbp),%eax
|
||||
bswap %eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_half:
|
||||
bpf_slow_path_common(2)
|
||||
js bpf_error
|
||||
mov -12(%rbp),%ax
|
||||
mov - MAX_BPF_STACK + 32(%rbp),%ax
|
||||
rol $8,%ax
|
||||
movzwl %ax,%eax
|
||||
ret
|
||||
|
@ -122,21 +103,11 @@ bpf_slow_path_half:
|
|||
bpf_slow_path_byte:
|
||||
bpf_slow_path_common(1)
|
||||
js bpf_error
|
||||
movzbl -12(%rbp),%eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_byte_msh:
|
||||
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
|
||||
bpf_slow_path_common(1)
|
||||
js bpf_error
|
||||
movzbl -12(%rbp),%eax
|
||||
and $15,%al
|
||||
shl $2,%al
|
||||
xchg %eax,%ebx
|
||||
movzbl - MAX_BPF_STACK + 32(%rbp),%eax
|
||||
ret
|
||||
|
||||
#define sk_negative_common(SIZE) \
|
||||
push %rdi; /* save skb */ \
|
||||
mov %rbx, %rdi; /* arg1 == skb */ \
|
||||
push %r9; \
|
||||
push SKBDATA; \
|
||||
/* rsi already has offset */ \
|
||||
|
@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
|
|||
test %rax,%rax; \
|
||||
pop SKBDATA; \
|
||||
pop %r9; \
|
||||
pop %rdi; \
|
||||
jz bpf_error
|
||||
|
||||
|
||||
bpf_slow_path_word_neg:
|
||||
cmp SKF_MAX_NEG_OFF, %esi /* test range */
|
||||
jl bpf_error /* offset lower -> error */
|
||||
|
@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
|
|||
movzbl (%rax), %eax
|
||||
ret
|
||||
|
||||
bpf_slow_path_byte_msh_neg:
|
||||
cmp SKF_MAX_NEG_OFF, %esi
|
||||
jl bpf_error
|
||||
sk_load_byte_msh_negative_offset:
|
||||
.globl sk_load_byte_msh_negative_offset
|
||||
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
|
||||
sk_negative_common(1)
|
||||
movzbl (%rax),%eax
|
||||
and $15,%al
|
||||
shl $2,%al
|
||||
xchg %eax,%ebx
|
||||
ret
|
||||
|
||||
bpf_error:
|
||||
# force a return 0 from jit handler
|
||||
xor %eax,%eax
|
||||
mov -8(%rbp),%rbx
|
||||
xor %eax,%eax
|
||||
mov - MAX_BPF_STACK(%rbp),%rbx
|
||||
mov - MAX_BPF_STACK + 8(%rbp),%r13
|
||||
mov - MAX_BPF_STACK + 16(%rbp),%r14
|
||||
mov - MAX_BPF_STACK + 24(%rbp),%r15
|
||||
leaveq
|
||||
ret
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
|
|||
void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
|
||||
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
|
||||
|
||||
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
|
||||
void bpf_int_jit_compile(struct sk_filter *fp);
|
||||
|
||||
#ifdef CONFIG_BPF_JIT
|
||||
#include <stdarg.h>
|
||||
#include <linux/linkage.h>
|
||||
|
|
|
@ -54,8 +54,7 @@
|
|||
struct seccomp_filter {
|
||||
atomic_t usage;
|
||||
struct seccomp_filter *prev;
|
||||
unsigned short len; /* Instruction count */
|
||||
struct sock_filter_int insnsi[];
|
||||
struct sk_filter *prog;
|
||||
};
|
||||
|
||||
/* Limit any path through the tree to 256KB worth of instructions. */
|
||||
|
@ -189,7 +188,8 @@ static u32 seccomp_run_filters(int syscall)
|
|||
* value always takes priority (ignoring the DATA).
|
||||
*/
|
||||
for (f = current->seccomp.filter; f; f = f->prev) {
|
||||
u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
|
||||
u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
|
||||
|
||||
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
|
||||
ret = cur_ret;
|
||||
}
|
||||
|
@ -215,7 +215,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
return -EINVAL;
|
||||
|
||||
for (filter = current->seccomp.filter; filter; filter = filter->prev)
|
||||
total_insns += filter->len + 4; /* include a 4 instr penalty */
|
||||
total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -256,19 +256,27 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
|
||||
/* Allocate a new seccomp_filter */
|
||||
ret = -ENOMEM;
|
||||
filter = kzalloc(sizeof(struct seccomp_filter) +
|
||||
sizeof(struct sock_filter_int) * new_len,
|
||||
filter = kzalloc(sizeof(struct seccomp_filter),
|
||||
GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!filter)
|
||||
goto free_prog;
|
||||
|
||||
ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
|
||||
if (ret)
|
||||
filter->prog = kzalloc(sk_filter_size(new_len),
|
||||
GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!filter->prog)
|
||||
goto free_filter;
|
||||
|
||||
ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
|
||||
if (ret)
|
||||
goto free_filter_prog;
|
||||
kfree(fp);
|
||||
|
||||
atomic_set(&filter->usage, 1);
|
||||
filter->len = new_len;
|
||||
filter->prog->len = new_len;
|
||||
filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
|
||||
|
||||
/* JIT internal BPF into native HW instructions */
|
||||
bpf_int_jit_compile(filter->prog);
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
|
@ -278,6 +286,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
|||
current->seccomp.filter = filter;
|
||||
return 0;
|
||||
|
||||
free_filter_prog:
|
||||
kfree(filter->prog);
|
||||
free_filter:
|
||||
kfree(filter);
|
||||
free_prog:
|
||||
|
@ -330,6 +340,7 @@ void put_seccomp_filter(struct task_struct *tsk)
|
|||
while (orig && atomic_dec_and_test(&orig->usage)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
bpf_jit_free(freeme->prog);
|
||||
kfree(freeme);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1524,6 +1524,10 @@ out_err:
|
|||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
void __weak bpf_int_jit_compile(struct sk_filter *prog)
|
||||
{
|
||||
}
|
||||
|
||||
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
|
||||
struct sock *sk)
|
||||
{
|
||||
|
@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
|
|||
/* JIT compiler couldn't process this filter, so do the
|
||||
* internal BPF translation for the optimized interpreter.
|
||||
*/
|
||||
if (!fp->jited)
|
||||
if (!fp->jited) {
|
||||
fp = __sk_migrate_filter(fp, sk);
|
||||
|
||||
/* Probe if internal BPF can be jit-ed */
|
||||
bpf_int_jit_compile(fp);
|
||||
}
|
||||
return fp;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue