Merge branch 'filter-next'

Alexei Starovoitov says:

====================
internal BPF jit for x64 and JITed seccomp

Internal BPF JIT compiler for x86_64 replaces classic BPF JIT.
Use it in seccomp and in tracing filters (sent as separate patch)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-05-15 16:32:20 -04:00
commit 1f499d6a3b
5 changed files with 833 additions and 702 deletions

View File

@ -12,13 +12,16 @@
/*
* Calling convention :
* rdi : skb pointer
* rbx : skb pointer (callee saved)
* esi : offset of byte(s) to fetch in skb (can be scratched)
* r8 : copy of skb->data
* r10 : copy of skb->data
* r9d : hlen = skb->len - skb->data_len
*/
#define SKBDATA %r8
#define SKBDATA %r10
#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */
#define MAX_BPF_STACK (512 /* from filter.h */ + \
32 /* space for rbx,r13,r14,r15 */ + \
8 /* space for skb_copy_bits */)
sk_load_word:
.globl sk_load_word
@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
movzbl (SKBDATA,%rsi),%eax
ret
/**
* sk_load_byte_msh - BPF_S_LDX_B_MSH helper
*
* Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf)
* Must preserve A accumulator (%eax)
* Inputs : %esi is the offset value
*/
sk_load_byte_msh:
.globl sk_load_byte_msh
test %esi,%esi
js bpf_slow_path_byte_msh_neg
sk_load_byte_msh_positive_offset:
.globl sk_load_byte_msh_positive_offset
cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
jle bpf_slow_path_byte_msh
movzbl (SKBDATA,%rsi),%ebx
and $15,%bl
shl $2,%bl
ret
/* rsi contains offset and can be scratched */
#define bpf_slow_path_common(LEN) \
push %rdi; /* save skb */ \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
mov $LEN,%ecx; /* len */ \
lea -12(%rbp),%rdx; \
lea - MAX_BPF_STACK + 32(%rbp),%rdx; \
call skb_copy_bits; \
test %eax,%eax; \
pop SKBDATA; \
pop %r9; \
pop %rdi
pop %r9;
bpf_slow_path_word:
bpf_slow_path_common(4)
js bpf_error
mov -12(%rbp),%eax
mov - MAX_BPF_STACK + 32(%rbp),%eax
bswap %eax
ret
bpf_slow_path_half:
bpf_slow_path_common(2)
js bpf_error
mov -12(%rbp),%ax
mov - MAX_BPF_STACK + 32(%rbp),%ax
rol $8,%ax
movzwl %ax,%eax
ret
@ -122,21 +103,11 @@ bpf_slow_path_half:
bpf_slow_path_byte:
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
ret
bpf_slow_path_byte_msh:
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
bpf_slow_path_common(1)
js bpf_error
movzbl -12(%rbp),%eax
and $15,%al
shl $2,%al
xchg %eax,%ebx
movzbl - MAX_BPF_STACK + 32(%rbp),%eax
ret
#define sk_negative_common(SIZE) \
push %rdi; /* save skb */ \
mov %rbx, %rdi; /* arg1 == skb */ \
push %r9; \
push SKBDATA; \
/* rsi already has offset */ \
@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
test %rax,%rax; \
pop SKBDATA; \
pop %r9; \
pop %rdi; \
jz bpf_error
bpf_slow_path_word_neg:
cmp SKF_MAX_NEG_OFF, %esi /* test range */
jl bpf_error /* offset lower -> error */
@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
movzbl (%rax), %eax
ret
bpf_slow_path_byte_msh_neg:
cmp SKF_MAX_NEG_OFF, %esi
jl bpf_error
sk_load_byte_msh_negative_offset:
.globl sk_load_byte_msh_negative_offset
xchg %eax,%ebx /* dont lose A , X is about to be scratched */
sk_negative_common(1)
movzbl (%rax),%eax
and $15,%al
shl $2,%al
xchg %eax,%ebx
ret
bpf_error:
# force a return 0 from jit handler
xor %eax,%eax
mov -8(%rbp),%rbx
xor %eax,%eax
mov - MAX_BPF_STACK(%rbp),%rbx
mov - MAX_BPF_STACK + 8(%rbp),%r13
mov - MAX_BPF_STACK + 16(%rbp),%r14
mov - MAX_BPF_STACK + 24(%rbp),%r15
leaveq
ret

File diff suppressed because it is too large Load Diff

View File

@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
void bpf_int_jit_compile(struct sk_filter *fp);
#ifdef CONFIG_BPF_JIT
#include <stdarg.h>
#include <linux/linkage.h>

View File

@ -54,8 +54,7 @@
struct seccomp_filter {
atomic_t usage;
struct seccomp_filter *prev;
unsigned short len; /* Instruction count */
struct sock_filter_int insnsi[];
struct sk_filter *prog;
};
/* Limit any path through the tree to 256KB worth of instructions. */
@ -189,7 +188,8 @@ static u32 seccomp_run_filters(int syscall)
* value always takes priority (ignoring the DATA).
*/
for (f = current->seccomp.filter; f; f = f->prev) {
u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
ret = cur_ret;
}
@ -215,7 +215,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
return -EINVAL;
for (filter = current->seccomp.filter; filter; filter = filter->prev)
total_insns += filter->len + 4; /* include a 4 instr penalty */
total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
if (total_insns > MAX_INSNS_PER_PATH)
return -ENOMEM;
@ -256,19 +256,27 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
/* Allocate a new seccomp_filter */
ret = -ENOMEM;
filter = kzalloc(sizeof(struct seccomp_filter) +
sizeof(struct sock_filter_int) * new_len,
filter = kzalloc(sizeof(struct seccomp_filter),
GFP_KERNEL|__GFP_NOWARN);
if (!filter)
goto free_prog;
ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
if (ret)
filter->prog = kzalloc(sk_filter_size(new_len),
GFP_KERNEL|__GFP_NOWARN);
if (!filter->prog)
goto free_filter;
ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
if (ret)
goto free_filter_prog;
kfree(fp);
atomic_set(&filter->usage, 1);
filter->len = new_len;
filter->prog->len = new_len;
filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
/* JIT internal BPF into native HW instructions */
bpf_int_jit_compile(filter->prog);
/*
* If there is an existing filter, make it the prev and don't drop its
@ -278,6 +286,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
current->seccomp.filter = filter;
return 0;
free_filter_prog:
kfree(filter->prog);
free_filter:
kfree(filter);
free_prog:
@ -330,6 +340,7 @@ void put_seccomp_filter(struct task_struct *tsk)
while (orig && atomic_dec_and_test(&orig->usage)) {
struct seccomp_filter *freeme = orig;
orig = orig->prev;
bpf_jit_free(freeme->prog);
kfree(freeme);
}
}

View File

@ -1524,6 +1524,10 @@ out_err:
return ERR_PTR(err);
}
void __weak bpf_int_jit_compile(struct sk_filter *prog)
{
}
static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
struct sock *sk)
{
@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
/* JIT compiler couldn't process this filter, so do the
* internal BPF translation for the optimized interpreter.
*/
if (!fp->jited)
if (!fp->jited) {
fp = __sk_migrate_filter(fp, sk);
/* Probe if internal BPF can be jit-ed */
bpf_int_jit_compile(fp);
}
return fp;
}