Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Alexei Starovoitov says: ==================== pull-request: bpf-next 2018-05-24 The following pull-request contains BPF updates for your *net-next* tree. The main changes are: 1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc). 2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers. 3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit. 4) Jiong Wang adds support for indirect and arithmetic shifts to NFP 5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible. 6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions. 7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT. 8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events. 9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
90fed9c946
|
@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
|
|||
|
||||
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
|
||||
{
|
||||
unsigned int i, ctx_idx = ctx->idx;
|
||||
|
||||
/* Load function address into r12 */
|
||||
PPC_LI64(12, func);
|
||||
|
||||
/* For bpf-to-bpf function calls, the callee's address is unknown
|
||||
* until the last extra pass. As seen above, we use PPC_LI64() to
|
||||
* load the callee's address, but this may optimize the number of
|
||||
* instructions required based on the nature of the address.
|
||||
*
|
||||
* Since we don't want the number of instructions emitted to change,
|
||||
* we pad the optimized PPC_LI64() call with NOPs to guarantee that
|
||||
* we always have a five-instruction sequence, which is the maximum
|
||||
* that PPC_LI64() can emit.
|
||||
*/
|
||||
for (i = ctx->idx - ctx_idx; i < 5; i++)
|
||||
PPC_NOP();
|
||||
|
||||
#ifdef PPC64_ELF_ABI_v1
|
||||
/* func points to the function descriptor */
|
||||
PPC_LI64(b2p[TMP_REG_2], func);
|
||||
/* Load actual entry point from function descriptor */
|
||||
PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
|
||||
/* ... and move it to LR */
|
||||
PPC_MTLR(b2p[TMP_REG_1]);
|
||||
/*
|
||||
* Load TOC from function descriptor at offset 8.
|
||||
* We can clobber r2 since we get called through a
|
||||
* function pointer (so caller will save/restore r2)
|
||||
* and since we don't use a TOC ourself.
|
||||
*/
|
||||
PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
|
||||
#else
|
||||
/* We can clobber r12 */
|
||||
PPC_FUNC_ADDR(12, func);
|
||||
PPC_MTLR(12);
|
||||
PPC_BPF_LL(2, 12, 8);
|
||||
/* Load actual entry point from function descriptor */
|
||||
PPC_BPF_LL(12, 12, 0);
|
||||
#endif
|
||||
|
||||
PPC_MTLR(12);
|
||||
PPC_BLRL();
|
||||
}
|
||||
|
||||
|
@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
|
|||
/* Assemble the body code between the prologue & epilogue */
|
||||
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
|
||||
struct codegen_context *ctx,
|
||||
u32 *addrs)
|
||||
u32 *addrs, bool extra_pass)
|
||||
{
|
||||
const struct bpf_insn *insn = fp->insnsi;
|
||||
int flen = fp->len;
|
||||
|
@ -712,11 +724,25 @@ emit_clear:
|
|||
break;
|
||||
|
||||
/*
|
||||
* Call kernel helper
|
||||
* Call kernel helper or bpf function
|
||||
*/
|
||||
case BPF_JMP | BPF_CALL:
|
||||
ctx->seen |= SEEN_FUNC;
|
||||
func = (u8 *) __bpf_call_base + imm;
|
||||
|
||||
/* bpf function call */
|
||||
if (insn[i].src_reg == BPF_PSEUDO_CALL)
|
||||
if (!extra_pass)
|
||||
func = NULL;
|
||||
else if (fp->aux->func && off < fp->aux->func_cnt)
|
||||
/* use the subprog id from the off
|
||||
* field to lookup the callee address
|
||||
*/
|
||||
func = (u8 *) fp->aux->func[off]->bpf_func;
|
||||
else
|
||||
return -EINVAL;
|
||||
/* kernel helper call */
|
||||
else
|
||||
func = (u8 *) __bpf_call_base + imm;
|
||||
|
||||
bpf_jit_emit_func_call(image, ctx, (u64)func);
|
||||
|
||||
|
@ -864,6 +890,14 @@ cond_branch:
|
|||
return 0;
|
||||
}
|
||||
|
||||
struct powerpc64_jit_data {
|
||||
struct bpf_binary_header *header;
|
||||
u32 *addrs;
|
||||
u8 *image;
|
||||
u32 proglen;
|
||||
struct codegen_context ctx;
|
||||
};
|
||||
|
||||
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
||||
{
|
||||
u32 proglen;
|
||||
|
@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
u8 *image = NULL;
|
||||
u32 *code_base;
|
||||
u32 *addrs;
|
||||
struct powerpc64_jit_data *jit_data;
|
||||
struct codegen_context cgctx;
|
||||
int pass;
|
||||
int flen;
|
||||
|
@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
struct bpf_prog *org_fp = fp;
|
||||
struct bpf_prog *tmp_fp;
|
||||
bool bpf_blinded = false;
|
||||
bool extra_pass = false;
|
||||
|
||||
if (!fp->jit_requested)
|
||||
return org_fp;
|
||||
|
@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
fp = tmp_fp;
|
||||
}
|
||||
|
||||
jit_data = fp->aux->jit_data;
|
||||
if (!jit_data) {
|
||||
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
|
||||
if (!jit_data) {
|
||||
fp = org_fp;
|
||||
goto out;
|
||||
}
|
||||
fp->aux->jit_data = jit_data;
|
||||
}
|
||||
|
||||
flen = fp->len;
|
||||
addrs = jit_data->addrs;
|
||||
if (addrs) {
|
||||
cgctx = jit_data->ctx;
|
||||
image = jit_data->image;
|
||||
bpf_hdr = jit_data->header;
|
||||
proglen = jit_data->proglen;
|
||||
alloclen = proglen + FUNCTION_DESCR_SIZE;
|
||||
extra_pass = true;
|
||||
goto skip_init_ctx;
|
||||
}
|
||||
|
||||
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
|
||||
if (addrs == NULL) {
|
||||
fp = org_fp;
|
||||
goto out;
|
||||
goto out_addrs;
|
||||
}
|
||||
|
||||
memset(&cgctx, 0, sizeof(struct codegen_context));
|
||||
|
@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
|
||||
|
||||
/* Scouting faux-generate pass 0 */
|
||||
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
|
||||
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
|
||||
/* We hit something illegal or unsupported. */
|
||||
fp = org_fp;
|
||||
goto out;
|
||||
goto out_addrs;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
bpf_jit_fill_ill_insns);
|
||||
if (!bpf_hdr) {
|
||||
fp = org_fp;
|
||||
goto out;
|
||||
goto out_addrs;
|
||||
}
|
||||
|
||||
skip_init_ctx:
|
||||
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
|
||||
|
||||
/* Code generation passes 1-2 */
|
||||
|
@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
/* Now build the prologue, body code & epilogue for real. */
|
||||
cgctx.idx = 0;
|
||||
bpf_jit_build_prologue(code_base, &cgctx);
|
||||
bpf_jit_build_body(fp, code_base, &cgctx, addrs);
|
||||
bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
|
||||
bpf_jit_build_epilogue(code_base, &cgctx);
|
||||
|
||||
if (bpf_jit_enable > 1)
|
||||
|
@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
|
|||
fp->jited_len = alloclen;
|
||||
|
||||
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
|
||||
if (!fp->is_func || extra_pass) {
|
||||
out_addrs:
|
||||
kfree(addrs);
|
||||
kfree(jit_data);
|
||||
fp->aux->jit_data = NULL;
|
||||
} else {
|
||||
jit_data->addrs = addrs;
|
||||
jit_data->ctx = cgctx;
|
||||
jit_data->proglen = proglen;
|
||||
jit_data->image = image;
|
||||
jit_data->header = bpf_hdr;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(addrs);
|
||||
|
||||
if (bpf_blinded)
|
||||
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
|
||||
|
||||
|
|
|
@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
|
|||
* @dev: netdev
|
||||
* @xdp: XDP buffer
|
||||
*
|
||||
* Returns Zero if sent, else an error code
|
||||
* Returns number of frames successfully sent. Frames that fail are
|
||||
* free'ed via XDP return API.
|
||||
*
|
||||
* For error cases, a negative errno code is returned and no-frames
|
||||
* are transmitted (caller must handle freeing frames).
|
||||
**/
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
|
||||
{
|
||||
struct i40e_netdev_priv *np = netdev_priv(dev);
|
||||
unsigned int queue_index = smp_processor_id();
|
||||
struct i40e_vsi *vsi = np->vsi;
|
||||
int err;
|
||||
int drops = 0;
|
||||
int i;
|
||||
|
||||
if (test_bit(__I40E_VSI_DOWN, vsi->state))
|
||||
return -ENETDOWN;
|
||||
|
@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
|||
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
|
||||
return -ENXIO;
|
||||
|
||||
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
|
||||
if (err != I40E_XDP_TX)
|
||||
return -ENOSPC;
|
||||
for (i = 0; i < n; i++) {
|
||||
struct xdp_frame *xdpf = frames[i];
|
||||
int err;
|
||||
|
||||
return 0;
|
||||
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
|
||||
if (err != I40E_XDP_TX) {
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
drops++;
|
||||
}
|
||||
}
|
||||
|
||||
return n - drops;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
|
|||
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
|
||||
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
|
||||
bool __i40e_chk_linearize(struct sk_buff *skb);
|
||||
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
|
||||
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
|
||||
void i40e_xdp_flush(struct net_device *dev);
|
||||
|
||||
/**
|
||||
|
|
|
@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
|
|||
}
|
||||
}
|
||||
|
||||
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
|
||||
struct xdp_frame **frames)
|
||||
{
|
||||
struct ixgbe_adapter *adapter = netdev_priv(dev);
|
||||
struct ixgbe_ring *ring;
|
||||
int err;
|
||||
int drops = 0;
|
||||
int i;
|
||||
|
||||
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
|
||||
return -ENETDOWN;
|
||||
|
@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
|||
if (unlikely(!ring))
|
||||
return -ENXIO;
|
||||
|
||||
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
|
||||
if (err != IXGBE_XDP_TX)
|
||||
return -ENOSPC;
|
||||
for (i = 0; i < n; i++) {
|
||||
struct xdp_frame *xdpf = frames[i];
|
||||
int err;
|
||||
|
||||
return 0;
|
||||
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
|
||||
if (err != IXGBE_XDP_TX) {
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
drops++;
|
||||
}
|
||||
}
|
||||
|
||||
return n - drops;
|
||||
}
|
||||
|
||||
static void ixgbe_xdp_flush(struct net_device *dev)
|
||||
|
|
|
@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
|
|||
emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
|
||||
}
|
||||
|
||||
static void
|
||||
__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
|
||||
bool set, bool src_lmextn)
|
||||
{
|
||||
u16 addr_lo, addr_hi;
|
||||
u64 insn;
|
||||
|
||||
addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
|
||||
addr_hi = addr != addr_lo;
|
||||
|
||||
insn = OP_BR_BIT_BASE |
|
||||
FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
|
||||
FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
|
||||
FIELD_PREP(OP_BR_BIT_BV, set) |
|
||||
FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
|
||||
FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
|
||||
FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
|
||||
FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
|
||||
|
||||
nfp_prog_push(nfp_prog, insn);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
|
||||
u8 defer, bool set, enum nfp_relo_type relo)
|
||||
{
|
||||
struct nfp_insn_re_regs reg;
|
||||
int err;
|
||||
|
||||
/* NOTE: The bit to test is specified as an rotation amount, such that
|
||||
* the bit to test will be placed on the MSB of the result when
|
||||
* doing a rotate right. For bit X, we need right rotate X + 1.
|
||||
*/
|
||||
bit += 1;
|
||||
|
||||
err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false);
|
||||
if (err) {
|
||||
nfp_prog->error = err;
|
||||
return;
|
||||
}
|
||||
|
||||
__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
|
||||
reg.src_lmextn);
|
||||
|
||||
nfp_prog->prog[nfp_prog->prog_len - 1] |=
|
||||
FIELD_PREP(OP_RELO_TYPE, relo);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
|
||||
{
|
||||
emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
|
||||
}
|
||||
|
||||
static void
|
||||
__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
|
||||
enum immed_width width, bool invert,
|
||||
|
@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
|
|||
reg.dst_lmextn, reg.src_lmextn);
|
||||
}
|
||||
|
||||
static void
|
||||
emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
|
||||
swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
|
||||
{
|
||||
if (sc == SHF_SC_R_ROT) {
|
||||
pr_err("indirect shift is not allowed on rotation\n");
|
||||
nfp_prog->error = -EFAULT;
|
||||
return;
|
||||
}
|
||||
|
||||
emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
|
||||
u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
|
||||
|
@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Pseudo code:
|
||||
* if shift_amt >= 32
|
||||
* dst_high = dst_low << shift_amt[4:0]
|
||||
* dst_low = 0;
|
||||
* else
|
||||
* dst_high = (dst_high, dst_low) >> (32 - shift_amt)
|
||||
* dst_low = dst_low << shift_amt
|
||||
*
|
||||
* The indirect shift will use the same logic at runtime.
|
||||
*/
|
||||
static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
|
||||
{
|
||||
if (shift_amt < 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
|
||||
SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
|
||||
32 - shift_amt);
|
||||
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_L_SHF, shift_amt);
|
||||
} else if (shift_amt == 32) {
|
||||
wrp_reg_mov(nfp_prog, dst + 1, dst);
|
||||
wrp_immed(nfp_prog, reg_both(dst), 0);
|
||||
} else if (shift_amt > 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
|
||||
wrp_immed(nfp_prog, reg_both(dst), 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
||||
{
|
||||
const struct bpf_insn *insn = &meta->insn;
|
||||
u8 dst = insn->dst_reg * 2;
|
||||
|
||||
if (insn->imm < 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst + 1),
|
||||
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
|
||||
SHF_SC_R_DSHF, 32 - insn->imm);
|
||||
emit_shf(nfp_prog, reg_both(dst),
|
||||
reg_none(), SHF_OP_NONE, reg_b(dst),
|
||||
SHF_SC_L_SHF, insn->imm);
|
||||
} else if (insn->imm == 32) {
|
||||
wrp_reg_mov(nfp_prog, dst + 1, dst);
|
||||
wrp_immed(nfp_prog, reg_both(dst), 0);
|
||||
} else if (insn->imm > 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst + 1),
|
||||
reg_none(), SHF_OP_NONE, reg_b(dst),
|
||||
SHF_SC_L_SHF, insn->imm - 32);
|
||||
wrp_immed(nfp_prog, reg_both(dst), 0);
|
||||
return __shl_imm64(nfp_prog, dst, insn->imm);
|
||||
}
|
||||
|
||||
static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
|
||||
reg_b(src));
|
||||
emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_R_DSHF);
|
||||
}
|
||||
|
||||
/* NOTE: for indirect left shift, HIGH part should be calculated first. */
|
||||
static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_L_SHF);
|
||||
}
|
||||
|
||||
static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
shl_reg64_lt32_high(nfp_prog, dst, src);
|
||||
shl_reg64_lt32_low(nfp_prog, dst, src);
|
||||
}
|
||||
|
||||
static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_L_SHF);
|
||||
wrp_immed(nfp_prog, reg_both(dst), 0);
|
||||
}
|
||||
|
||||
static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
||||
{
|
||||
const struct bpf_insn *insn = &meta->insn;
|
||||
u64 umin, umax;
|
||||
u8 dst, src;
|
||||
|
||||
dst = insn->dst_reg * 2;
|
||||
umin = meta->umin;
|
||||
umax = meta->umax;
|
||||
if (umin == umax)
|
||||
return __shl_imm64(nfp_prog, dst, umin);
|
||||
|
||||
src = insn->src_reg * 2;
|
||||
if (umax < 32) {
|
||||
shl_reg64_lt32(nfp_prog, dst, src);
|
||||
} else if (umin >= 32) {
|
||||
shl_reg64_ge32(nfp_prog, dst, src);
|
||||
} else {
|
||||
/* Generate different instruction sequences depending on runtime
|
||||
* value of shift amount.
|
||||
*/
|
||||
u16 label_ge32, label_end;
|
||||
|
||||
label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
|
||||
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
|
||||
|
||||
shl_reg64_lt32_high(nfp_prog, dst, src);
|
||||
label_end = nfp_prog_current_offset(nfp_prog) + 6;
|
||||
emit_br(nfp_prog, BR_UNC, label_end, 2);
|
||||
/* shl_reg64_lt32_low packed in delay slot. */
|
||||
shl_reg64_lt32_low(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
|
||||
return -EINVAL;
|
||||
shl_reg64_ge32(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Pseudo code:
|
||||
* if shift_amt >= 32
|
||||
* dst_high = 0;
|
||||
* dst_low = dst_high >> shift_amt[4:0]
|
||||
* else
|
||||
* dst_high = dst_high >> shift_amt
|
||||
* dst_low = (dst_high, dst_low) >> shift_amt
|
||||
*
|
||||
* The indirect shift will use the same logic at runtime.
|
||||
*/
|
||||
static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
|
||||
{
|
||||
if (shift_amt < 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
|
||||
} else if (shift_amt == 32) {
|
||||
wrp_reg_mov(nfp_prog, dst, dst + 1);
|
||||
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
|
||||
} else if (shift_amt > 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
|
||||
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
|||
const struct bpf_insn *insn = &meta->insn;
|
||||
u8 dst = insn->dst_reg * 2;
|
||||
|
||||
if (insn->imm < 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst),
|
||||
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
|
||||
SHF_SC_R_DSHF, insn->imm);
|
||||
emit_shf(nfp_prog, reg_both(dst + 1),
|
||||
reg_none(), SHF_OP_NONE, reg_b(dst + 1),
|
||||
SHF_SC_R_SHF, insn->imm);
|
||||
} else if (insn->imm == 32) {
|
||||
return __shr_imm64(nfp_prog, dst, insn->imm);
|
||||
}
|
||||
|
||||
/* NOTE: for indirect right shift, LOW part should be calculated first. */
|
||||
static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF);
|
||||
}
|
||||
|
||||
static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_R_DSHF);
|
||||
}
|
||||
|
||||
static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
shr_reg64_lt32_low(nfp_prog, dst, src);
|
||||
shr_reg64_lt32_high(nfp_prog, dst, src);
|
||||
}
|
||||
|
||||
static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF);
|
||||
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
|
||||
}
|
||||
|
||||
static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
||||
{
|
||||
const struct bpf_insn *insn = &meta->insn;
|
||||
u64 umin, umax;
|
||||
u8 dst, src;
|
||||
|
||||
dst = insn->dst_reg * 2;
|
||||
umin = meta->umin;
|
||||
umax = meta->umax;
|
||||
if (umin == umax)
|
||||
return __shr_imm64(nfp_prog, dst, umin);
|
||||
|
||||
src = insn->src_reg * 2;
|
||||
if (umax < 32) {
|
||||
shr_reg64_lt32(nfp_prog, dst, src);
|
||||
} else if (umin >= 32) {
|
||||
shr_reg64_ge32(nfp_prog, dst, src);
|
||||
} else {
|
||||
/* Generate different instruction sequences depending on runtime
|
||||
* value of shift amount.
|
||||
*/
|
||||
u16 label_ge32, label_end;
|
||||
|
||||
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
|
||||
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
|
||||
shr_reg64_lt32_low(nfp_prog, dst, src);
|
||||
label_end = nfp_prog_current_offset(nfp_prog) + 6;
|
||||
emit_br(nfp_prog, BR_UNC, label_end, 2);
|
||||
/* shr_reg64_lt32_high packed in delay slot. */
|
||||
shr_reg64_lt32_high(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
|
||||
return -EINVAL;
|
||||
shr_reg64_ge32(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
|
||||
* told through PREV_ALU result.
|
||||
*/
|
||||
static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
|
||||
{
|
||||
if (shift_amt < 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
|
||||
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
|
||||
/* Set signedness bit. */
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
|
||||
reg_imm(0));
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
|
||||
} else if (shift_amt == 32) {
|
||||
/* NOTE: this also helps setting signedness bit. */
|
||||
wrp_reg_mov(nfp_prog, dst, dst + 1);
|
||||
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
|
||||
} else if (insn->imm > 32) {
|
||||
emit_shf(nfp_prog, reg_both(dst),
|
||||
reg_none(), SHF_OP_NONE, reg_b(dst + 1),
|
||||
SHF_SC_R_SHF, insn->imm - 32);
|
||||
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, 31);
|
||||
} else if (shift_amt > 32) {
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
|
||||
reg_imm(0));
|
||||
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, 31);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
||||
{
|
||||
const struct bpf_insn *insn = &meta->insn;
|
||||
u8 dst = insn->dst_reg * 2;
|
||||
|
||||
return __ashr_imm64(nfp_prog, dst, insn->imm);
|
||||
}
|
||||
|
||||
static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
/* NOTE: the first insn will set both indirect shift amount (source A)
|
||||
* and signedness bit (MSB of result).
|
||||
*/
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF);
|
||||
}
|
||||
|
||||
static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
/* NOTE: it is the same as logic shift because we don't need to shift in
|
||||
* signedness bit when the shift amount is less than 32.
|
||||
*/
|
||||
return shr_reg64_lt32_low(nfp_prog, dst, src);
|
||||
}
|
||||
|
||||
static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
ashr_reg64_lt32_low(nfp_prog, dst, src);
|
||||
ashr_reg64_lt32_high(nfp_prog, dst, src);
|
||||
}
|
||||
|
||||
static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
|
||||
{
|
||||
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
|
||||
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF);
|
||||
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
|
||||
reg_b(dst + 1), SHF_SC_R_SHF, 31);
|
||||
}
|
||||
|
||||
/* Like ashr_imm64, but need to use indirect shift. */
|
||||
static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
|
||||
{
|
||||
const struct bpf_insn *insn = &meta->insn;
|
||||
u64 umin, umax;
|
||||
u8 dst, src;
|
||||
|
||||
dst = insn->dst_reg * 2;
|
||||
umin = meta->umin;
|
||||
umax = meta->umax;
|
||||
if (umin == umax)
|
||||
return __ashr_imm64(nfp_prog, dst, umin);
|
||||
|
||||
src = insn->src_reg * 2;
|
||||
if (umax < 32) {
|
||||
ashr_reg64_lt32(nfp_prog, dst, src);
|
||||
} else if (umin >= 32) {
|
||||
ashr_reg64_ge32(nfp_prog, dst, src);
|
||||
} else {
|
||||
u16 label_ge32, label_end;
|
||||
|
||||
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
|
||||
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
|
||||
ashr_reg64_lt32_low(nfp_prog, dst, src);
|
||||
label_end = nfp_prog_current_offset(nfp_prog) + 6;
|
||||
emit_br(nfp_prog, BR_UNC, label_end, 2);
|
||||
/* ashr_reg64_lt32_high packed in delay slot. */
|
||||
ashr_reg64_lt32_high(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
|
||||
return -EINVAL;
|
||||
ashr_reg64_ge32(nfp_prog, dst, src);
|
||||
|
||||
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = {
|
|||
[BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
|
||||
[BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
|
||||
[BPF_ALU64 | BPF_NEG] = neg_reg64,
|
||||
[BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
|
||||
[BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
|
||||
[BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
|
||||
[BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
|
||||
[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
|
||||
[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
|
||||
[BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
|
||||
[BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
|
||||
[BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
|
||||
|
|
|
@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
|
|||
* @func_id: function id for call instructions
|
||||
* @arg1: arg1 for call instructions
|
||||
* @arg2: arg2 for call instructions
|
||||
* @umin: copy of core verifier umin_value.
|
||||
* @umax: copy of core verifier umax_value.
|
||||
* @off: index of first generated machine instruction (in nfp_prog.prog)
|
||||
* @n: eBPF instruction number
|
||||
* @flags: eBPF instruction extra optimization flags
|
||||
|
@ -298,6 +300,13 @@ struct nfp_insn_meta {
|
|||
struct bpf_reg_state arg1;
|
||||
struct nfp_bpf_reg_state arg2;
|
||||
};
|
||||
/* We are interested in range info for some operands,
|
||||
* for example, the shift amount.
|
||||
*/
|
||||
struct {
|
||||
u64 umin;
|
||||
u64 umax;
|
||||
};
|
||||
};
|
||||
unsigned int off;
|
||||
unsigned short n;
|
||||
|
@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
|
|||
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
|
||||
}
|
||||
|
||||
static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
|
||||
{
|
||||
u8 code = meta->insn.code;
|
||||
bool is_alu, is_shift;
|
||||
u8 opclass, opcode;
|
||||
|
||||
opclass = BPF_CLASS(code);
|
||||
is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
|
||||
if (!is_alu)
|
||||
return false;
|
||||
|
||||
opcode = BPF_OP(code);
|
||||
is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
|
||||
if (!is_shift)
|
||||
return false;
|
||||
|
||||
return BPF_SRC(code) == BPF_X;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct nfp_prog - nfp BPF program
|
||||
* @bpf: backpointer to the bpf app priv structure
|
||||
|
|
|
@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
|
|||
|
||||
meta->insn = prog[i];
|
||||
meta->n = i;
|
||||
if (is_mbpf_indir_shift(meta))
|
||||
meta->umin = U64_MAX;
|
||||
|
||||
list_add_tail(&meta->l, &nfp_prog->insns);
|
||||
}
|
||||
|
|
|
@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
|
|||
if (is_mbpf_xadd(meta))
|
||||
return nfp_bpf_check_xadd(nfp_prog, meta, env);
|
||||
|
||||
if (is_mbpf_indir_shift(meta)) {
|
||||
const struct bpf_reg_state *sreg =
|
||||
cur_regs(env) + meta->insn.src_reg;
|
||||
|
||||
meta->umin = min(meta->umin, sreg->umin_value);
|
||||
meta->umax = max(meta->umax, sreg->umax_value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,8 +72,21 @@
|
|||
#define OP_BR_ADDR_LO 0x007ffc00000ULL
|
||||
#define OP_BR_ADDR_HI 0x10000000000ULL
|
||||
|
||||
#define nfp_is_br(_insn) \
|
||||
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
|
||||
#define OP_BR_BIT_BASE 0x0d000000000ULL
|
||||
#define OP_BR_BIT_BASE_MASK 0x0f800080300ULL
|
||||
#define OP_BR_BIT_A_SRC 0x000000000ffULL
|
||||
#define OP_BR_BIT_B_SRC 0x0000003fc00ULL
|
||||
#define OP_BR_BIT_BV 0x00000040000ULL
|
||||
#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL
|
||||
#define OP_BR_BIT_DEFBR OP_BR_DEFBR
|
||||
#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO
|
||||
#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI
|
||||
|
||||
static inline bool nfp_is_br(u64 insn)
|
||||
{
|
||||
return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
|
||||
(insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
|
||||
}
|
||||
|
||||
enum br_mask {
|
||||
BR_BEQ = 0x00,
|
||||
|
@ -161,6 +174,7 @@ enum shf_op {
|
|||
SHF_OP_NONE = 0,
|
||||
SHF_OP_AND = 2,
|
||||
SHF_OP_OR = 5,
|
||||
SHF_OP_ASHR = 6,
|
||||
};
|
||||
|
||||
enum shf_sc {
|
||||
|
|
|
@ -70,6 +70,7 @@
|
|||
#include <net/netns/generic.h>
|
||||
#include <net/rtnetlink.h>
|
||||
#include <net/sock.h>
|
||||
#include <net/xdp.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/skb_array.h>
|
||||
|
@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = {
|
|||
.ndo_get_stats64 = tun_net_get_stats64,
|
||||
};
|
||||
|
||||
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
|
||||
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
|
||||
{
|
||||
struct tun_struct *tun = netdev_priv(dev);
|
||||
struct tun_file *tfile;
|
||||
u32 numqueues;
|
||||
int ret = 0;
|
||||
int drops = 0;
|
||||
int cnt = n;
|
||||
int i;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
numqueues = READ_ONCE(tun->numqueues);
|
||||
if (!numqueues) {
|
||||
ret = -ENOSPC;
|
||||
goto out;
|
||||
rcu_read_unlock();
|
||||
return -ENXIO; /* Caller will free/return all frames */
|
||||
}
|
||||
|
||||
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
|
||||
numqueues]);
|
||||
/* Encode the XDP flag into lowest bit for consumer to differ
|
||||
* XDP buffer from sk_buff.
|
||||
*/
|
||||
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
|
||||
this_cpu_inc(tun->pcpu_stats->tx_dropped);
|
||||
ret = -ENOSPC;
|
||||
}
|
||||
|
||||
out:
|
||||
spin_lock(&tfile->tx_ring.producer_lock);
|
||||
for (i = 0; i < n; i++) {
|
||||
struct xdp_frame *xdp = frames[i];
|
||||
/* Encode the XDP flag into lowest bit for consumer to differ
|
||||
* XDP buffer from sk_buff.
|
||||
*/
|
||||
void *frame = tun_xdp_to_ptr(xdp);
|
||||
|
||||
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
|
||||
this_cpu_inc(tun->pcpu_stats->tx_dropped);
|
||||
xdp_return_frame_rx_napi(xdp);
|
||||
drops++;
|
||||
}
|
||||
}
|
||||
spin_unlock(&tfile->tx_ring.producer_lock);
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
return cnt - drops;
|
||||
}
|
||||
|
||||
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
|
||||
|
@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
|
|||
if (unlikely(!frame))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return tun_xdp_xmit(dev, frame);
|
||||
return tun_xdp_xmit(dev, 1, &frame);
|
||||
}
|
||||
|
||||
static void tun_xdp_flush(struct net_device *dev)
|
||||
|
|
|
@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
|
|||
virtqueue_kick(sq->vq);
|
||||
}
|
||||
|
||||
static int __virtnet_xdp_xmit(struct virtnet_info *vi,
|
||||
struct xdp_frame *xdpf)
|
||||
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
|
||||
struct send_queue *sq,
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
struct virtio_net_hdr_mrg_rxbuf *hdr;
|
||||
struct xdp_frame *xdpf_sent;
|
||||
struct send_queue *sq;
|
||||
unsigned int len;
|
||||
unsigned int qp;
|
||||
int err;
|
||||
|
||||
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
|
||||
sq = &vi->sq[qp];
|
||||
|
||||
/* Free up any pending old buffers before queueing new ones. */
|
||||
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
|
||||
xdp_return_frame(xdpf_sent);
|
||||
|
||||
/* virtqueue want to use data area in-front of packet */
|
||||
if (unlikely(xdpf->metasize > 0))
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
||||
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
|
||||
struct xdp_frame *xdpf)
|
||||
{
|
||||
struct xdp_frame *xdpf_sent;
|
||||
struct send_queue *sq;
|
||||
unsigned int len;
|
||||
unsigned int qp;
|
||||
|
||||
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
|
||||
sq = &vi->sq[qp];
|
||||
|
||||
/* Free up any pending old buffers before queueing new ones. */
|
||||
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
|
||||
xdp_return_frame(xdpf_sent);
|
||||
|
||||
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
|
||||
}
|
||||
|
||||
static int virtnet_xdp_xmit(struct net_device *dev,
|
||||
int n, struct xdp_frame **frames)
|
||||
{
|
||||
struct virtnet_info *vi = netdev_priv(dev);
|
||||
struct receive_queue *rq = vi->rq;
|
||||
struct xdp_frame *xdpf_sent;
|
||||
struct bpf_prog *xdp_prog;
|
||||
struct send_queue *sq;
|
||||
unsigned int len;
|
||||
unsigned int qp;
|
||||
int drops = 0;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
|
||||
sq = &vi->sq[qp];
|
||||
|
||||
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
|
||||
* indicate XDP resources have been successfully allocated.
|
||||
|
@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
|
|||
if (!xdp_prog)
|
||||
return -ENXIO;
|
||||
|
||||
return __virtnet_xdp_xmit(vi, xdpf);
|
||||
/* Free up any pending old buffers before queueing new ones. */
|
||||
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
|
||||
xdp_return_frame(xdpf_sent);
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
struct xdp_frame *xdpf = frames[i];
|
||||
|
||||
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
|
||||
if (err) {
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
drops++;
|
||||
}
|
||||
}
|
||||
return n - drops;
|
||||
}
|
||||
|
||||
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
|
||||
|
@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
|
|||
xdpf = convert_to_xdp_frame(&xdp);
|
||||
if (unlikely(!xdpf))
|
||||
goto err_xdp;
|
||||
err = __virtnet_xdp_xmit(vi, xdpf);
|
||||
err = __virtnet_xdp_tx_xmit(vi, xdpf);
|
||||
if (unlikely(err)) {
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
goto err_xdp;
|
||||
|
@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
|
|||
xdpf = convert_to_xdp_frame(&xdp);
|
||||
if (unlikely(!xdpf))
|
||||
goto err_xdp;
|
||||
err = __virtnet_xdp_xmit(vi, xdpf);
|
||||
err = __virtnet_xdp_tx_xmit(vi, xdpf);
|
||||
if (unlikely(err)) {
|
||||
trace_xdp_exception(vi->dev, xdp_prog, act);
|
||||
if (unlikely(xdp_page != page))
|
||||
|
|
|
@ -69,8 +69,8 @@ struct bpf_map {
|
|||
u32 pages;
|
||||
u32 id;
|
||||
int numa_node;
|
||||
u32 btf_key_id;
|
||||
u32 btf_value_id;
|
||||
u32 btf_key_type_id;
|
||||
u32 btf_value_type_id;
|
||||
struct btf *btf;
|
||||
bool unpriv_array;
|
||||
/* 55 bytes hole */
|
||||
|
@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
|
|||
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
|
||||
|
||||
int bpf_get_file_flag(int flags);
|
||||
int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
|
||||
size_t actual_size);
|
||||
|
||||
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
|
||||
* forced to use 'long' read/writes to try to atomically copy long counters.
|
||||
|
@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
|
|||
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
|
||||
|
||||
/* Map specifics */
|
||||
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
struct xdp_buff;
|
||||
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
|
||||
void __dev_map_flush(struct bpf_map *map);
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx);
|
||||
|
||||
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
|
||||
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
|
||||
void __cpu_map_flush(struct bpf_map *map);
|
||||
struct xdp_buff;
|
||||
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx);
|
||||
|
||||
|
@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
|
|||
{
|
||||
}
|
||||
|
||||
struct xdp_buff;
|
||||
struct bpf_dtab_netdev;
|
||||
|
||||
static inline
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
|
@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
|
|||
{
|
||||
}
|
||||
|
||||
struct xdp_buff;
|
||||
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
|
||||
struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
|
|
|
@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
|
|||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
|
||||
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
|
||||
|
|
|
@ -517,6 +517,7 @@ struct sk_msg_buff {
|
|||
bool sg_copy[MAX_SKB_FRAGS];
|
||||
__u32 flags;
|
||||
struct sock *sk_redir;
|
||||
struct sock *sk;
|
||||
struct sk_buff *skb;
|
||||
struct list_head list;
|
||||
};
|
||||
|
|
|
@ -1185,9 +1185,13 @@ struct dev_ifalias {
|
|||
* This function is used to set or query state related to XDP on the
|
||||
* netdevice and manage BPF offload. See definition of
|
||||
* enum bpf_netdev_command for details.
|
||||
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
|
||||
* This function is used to submit a XDP packet for transmit on a
|
||||
* netdevice.
|
||||
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
|
||||
* This function is used to submit @n XDP packets for transmit on a
|
||||
* netdevice. Returns number of frames successfully transmitted, frames
|
||||
* that got dropped are freed/returned via xdp_return_frame().
|
||||
* Returns negative number, means general error invoking ndo, meaning
|
||||
* no frames were xmit'ed and core-caller will free all frames.
|
||||
* TODO: Consider add flag to allow sending flush operation.
|
||||
* void (*ndo_xdp_flush)(struct net_device *dev);
|
||||
* This function is used to inform the driver to flush a particular
|
||||
* xdp tx queue. Must be called on same CPU as xdp_xmit.
|
||||
|
@ -1375,8 +1379,8 @@ struct net_device_ops {
|
|||
int needed_headroom);
|
||||
int (*ndo_bpf)(struct net_device *dev,
|
||||
struct netdev_bpf *bpf);
|
||||
int (*ndo_xdp_xmit)(struct net_device *dev,
|
||||
struct xdp_frame *xdp);
|
||||
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
|
||||
struct xdp_frame **xdp);
|
||||
void (*ndo_xdp_flush)(struct net_device *dev);
|
||||
};
|
||||
|
||||
|
|
|
@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
|
|||
extern void perf_event_free_task(struct task_struct *task);
|
||||
extern void perf_event_delayed_put(struct task_struct *task);
|
||||
extern struct file *perf_event_get(unsigned int fd);
|
||||
extern const struct perf_event *perf_get_event(struct file *file);
|
||||
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
|
||||
extern void perf_event_print_debug(void);
|
||||
extern void perf_pmu_disable(struct pmu *pmu);
|
||||
|
@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
|
|||
static inline void perf_event_free_task(struct task_struct *task) { }
|
||||
static inline void perf_event_delayed_put(struct task_struct *task) { }
|
||||
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
|
||||
static inline const struct perf_event *perf_get_event(struct file *file)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
|
|
@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
|
|||
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
|
||||
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
|
||||
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
|
||||
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
|
||||
u32 *fd_type, const char **buf,
|
||||
u64 *probe_offset, u64 *probe_addr);
|
||||
#else
|
||||
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
|
||||
{
|
||||
|
@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
|
|||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline int bpf_get_perf_event_info(const struct perf_event *event,
|
||||
u32 *prog_id, u32 *fd_type,
|
||||
const char **buf, u64 *probe_offset,
|
||||
u64 *probe_addr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif
|
||||
|
||||
enum {
|
||||
|
@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
|
|||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
|
||||
extern void perf_kprobe_destroy(struct perf_event *event);
|
||||
extern int bpf_get_kprobe_info(const struct perf_event *event,
|
||||
u32 *fd_type, const char **symbol,
|
||||
u64 *probe_offset, u64 *probe_addr,
|
||||
bool perf_type_tracepoint);
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
|
||||
extern void perf_uprobe_destroy(struct perf_event *event);
|
||||
extern int bpf_get_uprobe_info(const struct perf_event *event,
|
||||
u32 *fd_type, const char **filename,
|
||||
u64 *probe_offset, bool perf_type_tracepoint);
|
||||
#endif
|
||||
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
|
||||
char *filter_str);
|
||||
|
|
|
@ -236,6 +236,8 @@ struct ipv6_stub {
|
|||
struct flowi6 *fl6, int oif,
|
||||
const struct sk_buff *skb,
|
||||
int strict);
|
||||
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr);
|
||||
|
||||
void (*udpv6_encap_enable)(void);
|
||||
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
|
||||
|
|
|
@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
|
|||
return f6i->fib6_nh.nh_dev;
|
||||
}
|
||||
|
||||
static inline
|
||||
struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
|
||||
{
|
||||
return f6i->fib6_nh.nh_lwtstate;
|
||||
}
|
||||
|
||||
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
|
||||
unsigned int flags);
|
||||
|
||||
|
|
|
@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
|
|||
return mtu;
|
||||
}
|
||||
|
||||
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr);
|
||||
|
||||
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
const void *daddr);
|
||||
|
|
|
@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
|
|||
}
|
||||
#endif
|
||||
|
||||
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
|
||||
|
||||
#endif /* _NET_FIB_H */
|
||||
|
|
|
@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
|
|||
void __page_pool_put_page(struct page_pool *pool,
|
||||
struct page *page, bool allow_direct);
|
||||
|
||||
static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
|
||||
static inline void page_pool_put_page(struct page_pool *pool,
|
||||
struct page *page, bool allow_direct)
|
||||
{
|
||||
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
|
||||
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
|
||||
*/
|
||||
#ifdef CONFIG_PAGE_POOL
|
||||
__page_pool_put_page(pool, page, false);
|
||||
__page_pool_put_page(pool, page, allow_direct);
|
||||
#endif
|
||||
}
|
||||
/* Very limited use-cases allow recycle direct */
|
||||
|
|
|
@ -49,7 +49,11 @@ struct seg6_pernet_data {
|
|||
|
||||
static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
return net->ipv6.seg6_data;
|
||||
#else
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
extern int seg6_init(void);
|
||||
|
@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
|
|||
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
|
||||
int proto);
|
||||
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
|
||||
|
||||
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
|
||||
u32 tbl_id);
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* SR-IPv6 implementation
|
||||
*
|
||||
* Authors:
|
||||
* David Lebrun <david.lebrun@uclouvain.be>
|
||||
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
|
||||
*
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version
|
||||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#ifndef _NET_SEG6_LOCAL_H
|
||||
#define _NET_SEG6_LOCAL_H
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/net.h>
|
||||
#include <linux/ipv6.h>
|
||||
|
||||
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
|
||||
u32 tbl_id);
|
||||
|
||||
struct seg6_bpf_srh_state {
|
||||
bool valid;
|
||||
u16 hdrlen;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
|
||||
|
||||
#endif
|
|
@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
|
|||
}
|
||||
|
||||
void xdp_return_frame(struct xdp_frame *xdpf);
|
||||
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
|
||||
void xdp_return_buff(struct xdp_buff *xdp);
|
||||
|
||||
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
* AF_XDP internal functions
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* AF_XDP internal functions
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_XDP_SOCK_H
|
||||
|
|
|
@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
|
|||
__entry->map_id, __entry->map_index)
|
||||
);
|
||||
|
||||
#ifndef __DEVMAP_OBJ_TYPE
|
||||
#define __DEVMAP_OBJ_TYPE
|
||||
struct _bpf_dtab_netdev {
|
||||
struct net_device *dev;
|
||||
};
|
||||
#endif /* __DEVMAP_OBJ_TYPE */
|
||||
|
||||
#define devmap_ifindex(fwd, map) \
|
||||
(!fwd ? 0 : \
|
||||
(!map ? 0 : \
|
||||
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
|
||||
((struct net_device *)fwd)->ifindex : 0)))
|
||||
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
|
||||
|
||||
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
|
||||
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
|
||||
|
@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
|
|||
__entry->to_cpu)
|
||||
);
|
||||
|
||||
TRACE_EVENT(xdp_devmap_xmit,
|
||||
|
||||
TP_PROTO(const struct bpf_map *map, u32 map_index,
|
||||
int sent, int drops,
|
||||
const struct net_device *from_dev,
|
||||
const struct net_device *to_dev, int err),
|
||||
|
||||
TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(int, map_id)
|
||||
__field(u32, act)
|
||||
__field(u32, map_index)
|
||||
__field(int, drops)
|
||||
__field(int, sent)
|
||||
__field(int, from_ifindex)
|
||||
__field(int, to_ifindex)
|
||||
__field(int, err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->map_id = map->id;
|
||||
__entry->act = XDP_REDIRECT;
|
||||
__entry->map_index = map_index;
|
||||
__entry->drops = drops;
|
||||
__entry->sent = sent;
|
||||
__entry->from_ifindex = from_dev->ifindex;
|
||||
__entry->to_ifindex = to_dev->ifindex;
|
||||
__entry->err = err;
|
||||
),
|
||||
|
||||
TP_printk("ndo_xdp_xmit"
|
||||
" map_id=%d map_index=%d action=%s"
|
||||
" sent=%d drops=%d"
|
||||
" from_ifindex=%d to_ifindex=%d err=%d",
|
||||
__entry->map_id, __entry->map_index,
|
||||
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
|
||||
__entry->sent, __entry->drops,
|
||||
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_XDP_H */
|
||||
|
||||
#include <trace/define_trace.h>
|
||||
|
|
|
@ -97,6 +97,7 @@ enum bpf_cmd {
|
|||
BPF_RAW_TRACEPOINT_OPEN,
|
||||
BPF_BTF_LOAD,
|
||||
BPF_BTF_GET_FD_BY_ID,
|
||||
BPF_TASK_FD_QUERY,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
|
@ -141,6 +142,7 @@ enum bpf_prog_type {
|
|||
BPF_PROG_TYPE_SK_MSG,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT,
|
||||
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
|
||||
BPF_PROG_TYPE_LWT_SEG6LOCAL,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
|
@ -284,8 +286,8 @@ union bpf_attr {
|
|||
char map_name[BPF_OBJ_NAME_LEN];
|
||||
__u32 map_ifindex; /* ifindex of netdev to create on */
|
||||
__u32 btf_fd; /* fd pointing to a BTF type data */
|
||||
__u32 btf_key_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_id; /* BTF type_id of the value */
|
||||
__u32 btf_key_type_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_type_id; /* BTF type_id of the value */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||
|
@ -379,6 +381,22 @@ union bpf_attr {
|
|||
__u32 btf_log_size;
|
||||
__u32 btf_log_level;
|
||||
};
|
||||
|
||||
struct {
|
||||
__u32 pid; /* input: pid */
|
||||
__u32 fd; /* input: fd */
|
||||
__u32 flags; /* input: flags */
|
||||
__u32 buf_len; /* input/output: buf len */
|
||||
__aligned_u64 buf; /* input/output:
|
||||
* tp_name for tracepoint
|
||||
* symbol for kprobe
|
||||
* filename for uprobe
|
||||
*/
|
||||
__u32 prog_id; /* output: prod_id */
|
||||
__u32 fd_type; /* output: BPF_FD_TYPE_* */
|
||||
__u64 probe_offset; /* output: probe_offset */
|
||||
__u64 probe_addr; /* output: probe_addr */
|
||||
} task_fd_query;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
|
@ -1902,6 +1920,90 @@ union bpf_attr {
|
|||
* egress otherwise). This is the only flag supported for now.
|
||||
* Return
|
||||
* **SK_PASS** on success, or **SK_DROP** on error.
|
||||
*
|
||||
* int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
|
||||
* Description
|
||||
* Encapsulate the packet associated to *skb* within a Layer 3
|
||||
* protocol header. This header is provided in the buffer at
|
||||
* address *hdr*, with *len* its size in bytes. *type* indicates
|
||||
* the protocol of the header and can be one of:
|
||||
*
|
||||
* **BPF_LWT_ENCAP_SEG6**
|
||||
* IPv6 encapsulation with Segment Routing Header
|
||||
* (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
|
||||
* the IPv6 header is computed by the kernel.
|
||||
* **BPF_LWT_ENCAP_SEG6_INLINE**
|
||||
* Only works if *skb* contains an IPv6 packet. Insert a
|
||||
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
|
||||
* the IPv6 header.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
|
||||
* Description
|
||||
* Store *len* bytes from address *from* into the packet
|
||||
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
|
||||
* inside the outermost IPv6 Segment Routing Header can be
|
||||
* modified through this helper.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
|
||||
* Description
|
||||
* Adjust the size allocated to TLVs in the outermost IPv6
|
||||
* Segment Routing Header contained in the packet associated to
|
||||
* *skb*, at position *offset* by *delta* bytes. Only offsets
|
||||
* after the segments are accepted. *delta* can be as well
|
||||
* positive (growing) as negative (shrinking).
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
|
||||
* Description
|
||||
* Apply an IPv6 Segment Routing action of type *action* to the
|
||||
* packet associated to *skb*. Each action takes a parameter
|
||||
* contained at address *param*, and of length *param_len* bytes.
|
||||
* *action* can be one of:
|
||||
*
|
||||
* **SEG6_LOCAL_ACTION_END_X**
|
||||
* End.X action: Endpoint with Layer-3 cross-connect.
|
||||
* Type of *param*: **struct in6_addr**.
|
||||
* **SEG6_LOCAL_ACTION_END_T**
|
||||
* End.T action: Endpoint with specific IPv6 table lookup.
|
||||
* Type of *param*: **int**.
|
||||
* **SEG6_LOCAL_ACTION_END_B6**
|
||||
* End.B6 action: Endpoint bound to an SRv6 policy.
|
||||
* Type of param: **struct ipv6_sr_hdr**.
|
||||
* **SEG6_LOCAL_ACTION_END_B6_ENCAP**
|
||||
* End.B6.Encap action: Endpoint bound to an SRv6
|
||||
* encapsulation policy.
|
||||
* Type of param: **struct ipv6_sr_hdr**.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -1976,7 +2078,11 @@ union bpf_attr {
|
|||
FN(fib_lookup), \
|
||||
FN(sock_hash_update), \
|
||||
FN(msg_redirect_hash), \
|
||||
FN(sk_redirect_hash),
|
||||
FN(sk_redirect_hash), \
|
||||
FN(lwt_push_encap), \
|
||||
FN(lwt_seg6_store_bytes), \
|
||||
FN(lwt_seg6_adjust_srh), \
|
||||
FN(lwt_seg6_action),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
|
|||
BPF_HDR_START_NET,
|
||||
};
|
||||
|
||||
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
|
||||
enum bpf_lwt_encap_mode {
|
||||
BPF_LWT_ENCAP_SEG6,
|
||||
BPF_LWT_ENCAP_SEG6_INLINE
|
||||
};
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
*/
|
||||
|
@ -2176,6 +2288,14 @@ enum sk_action {
|
|||
struct sk_msg_md {
|
||||
void *data;
|
||||
void *data_end;
|
||||
|
||||
__u32 family;
|
||||
__u32 remote_ip4; /* Stored in network byte order */
|
||||
__u32 local_ip4; /* Stored in network byte order */
|
||||
__u32 remote_ip6[4]; /* Stored in network byte order */
|
||||
__u32 local_ip6[4]; /* Stored in network byte order */
|
||||
__u32 remote_port; /* Stored in network byte order */
|
||||
__u32 local_port; /* stored in host byte order */
|
||||
};
|
||||
|
||||
#define BPF_TAG_SIZE 8
|
||||
|
@ -2197,6 +2317,10 @@ struct bpf_prog_info {
|
|||
__u32 gpl_compatible:1;
|
||||
__u64 netns_dev;
|
||||
__u64 netns_ino;
|
||||
__u32 nr_jited_ksyms;
|
||||
__u32 nr_jited_func_lens;
|
||||
__aligned_u64 jited_ksyms;
|
||||
__aligned_u64 jited_func_lens;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_map_info {
|
||||
|
@ -2211,8 +2335,8 @@ struct bpf_map_info {
|
|||
__u64 netns_dev;
|
||||
__u64 netns_ino;
|
||||
__u32 btf_id;
|
||||
__u32 btf_key_id;
|
||||
__u32 btf_value_id;
|
||||
__u32 btf_key_type_id;
|
||||
__u32 btf_value_type_id;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_btf_info {
|
||||
|
@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
|
|||
__u8 dmac[6]; /* ETH_ALEN */
|
||||
};
|
||||
|
||||
enum bpf_task_fd_type {
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_UPROBE, /* filename + offset */
|
||||
BPF_FD_TYPE_URETPROBE, /* filename + offset */
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
|
@ -12,42 +12,29 @@ struct btf_header {
|
|||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
|
||||
__u32 parent_label;
|
||||
__u32 parent_name;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 label_off; /* offset of label section */
|
||||
__u32 object_off; /* offset of data object section*/
|
||||
__u32 func_off; /* offset of function section */
|
||||
__u32 type_off; /* offset of type section */
|
||||
__u32 type_len; /* length of type section */
|
||||
__u32 str_off; /* offset of string section */
|
||||
__u32 str_len; /* length of string section */
|
||||
};
|
||||
|
||||
/* Max # of type identifier */
|
||||
#define BTF_MAX_TYPE 0x7fffffff
|
||||
#define BTF_MAX_TYPE 0x0000ffff
|
||||
/* Max offset into the string section */
|
||||
#define BTF_MAX_NAME_OFFSET 0x7fffffff
|
||||
#define BTF_MAX_NAME_OFFSET 0x0000ffff
|
||||
/* Max # of struct/union/enum members or func args */
|
||||
#define BTF_MAX_VLEN 0xffff
|
||||
|
||||
/* The type id is referring to a parent BTF */
|
||||
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
|
||||
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
|
||||
|
||||
/* String is in the ELF string section */
|
||||
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
|
||||
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
|
||||
|
||||
struct btf_type {
|
||||
__u32 name_off;
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-28: kind (e.g. int, ptr, array...etc)
|
||||
* bits 29-30: unused
|
||||
* bits 31: root
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-31: unused
|
||||
*/
|
||||
__u32 info;
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
|
@ -62,8 +49,7 @@ struct btf_type {
|
|||
};
|
||||
};
|
||||
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
|
||||
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
|
||||
#define BTF_KIND_UNKN 0 /* Unknown */
|
||||
|
@ -88,15 +74,14 @@ struct btf_type {
|
|||
/* BTF_KIND_INT is followed by a u32 and the following
|
||||
* is the 32 bits arrangement:
|
||||
*/
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
|
||||
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
|
||||
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
|
||||
|
||||
/* Attributes stored in the BTF_INT_ENCODING */
|
||||
#define BTF_INT_SIGNED 0x1
|
||||
#define BTF_INT_CHAR 0x2
|
||||
#define BTF_INT_BOOL 0x4
|
||||
#define BTF_INT_VARARGS 0x8
|
||||
#define BTF_INT_SIGNED (1 << 0)
|
||||
#define BTF_INT_CHAR (1 << 1)
|
||||
#define BTF_INT_BOOL (1 << 2)
|
||||
|
||||
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
|
||||
* The exact number of btf_enum is stored in the vlen (of the
|
||||
|
|
|
@ -1,17 +1,8 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
|
||||
*
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/*
|
||||
* if_xdp: XDP socket user-space interface
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* Author(s): Björn Töpel <bjorn.topel@intel.com>
|
||||
* Magnus Karlsson <magnus.karlsson@intel.com>
|
||||
*/
|
||||
|
@ -26,19 +17,33 @@
|
|||
|
||||
struct sockaddr_xdp {
|
||||
__u16 sxdp_family;
|
||||
__u16 sxdp_flags;
|
||||
__u32 sxdp_ifindex;
|
||||
__u32 sxdp_queue_id;
|
||||
__u32 sxdp_shared_umem_fd;
|
||||
__u16 sxdp_flags;
|
||||
};
|
||||
|
||||
struct xdp_ring_offset {
|
||||
__u64 producer;
|
||||
__u64 consumer;
|
||||
__u64 desc;
|
||||
};
|
||||
|
||||
struct xdp_mmap_offsets {
|
||||
struct xdp_ring_offset rx;
|
||||
struct xdp_ring_offset tx;
|
||||
struct xdp_ring_offset fr; /* Fill */
|
||||
struct xdp_ring_offset cr; /* Completion */
|
||||
};
|
||||
|
||||
/* XDP socket options */
|
||||
#define XDP_RX_RING 1
|
||||
#define XDP_TX_RING 2
|
||||
#define XDP_UMEM_REG 3
|
||||
#define XDP_UMEM_FILL_RING 4
|
||||
#define XDP_UMEM_COMPLETION_RING 5
|
||||
#define XDP_STATISTICS 6
|
||||
#define XDP_MMAP_OFFSETS 1
|
||||
#define XDP_RX_RING 2
|
||||
#define XDP_TX_RING 3
|
||||
#define XDP_UMEM_REG 4
|
||||
#define XDP_UMEM_FILL_RING 5
|
||||
#define XDP_UMEM_COMPLETION_RING 6
|
||||
#define XDP_STATISTICS 7
|
||||
|
||||
struct xdp_umem_reg {
|
||||
__u64 addr; /* Start of packet data area */
|
||||
|
@ -59,6 +64,7 @@ struct xdp_statistics {
|
|||
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
|
||||
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
|
||||
|
||||
/* Rx/Tx descriptor */
|
||||
struct xdp_desc {
|
||||
__u32 idx;
|
||||
__u32 len;
|
||||
|
@ -67,21 +73,6 @@ struct xdp_desc {
|
|||
__u8 padding[5];
|
||||
};
|
||||
|
||||
struct xdp_ring {
|
||||
__u32 producer __attribute__((aligned(64)));
|
||||
__u32 consumer __attribute__((aligned(64)));
|
||||
};
|
||||
|
||||
/* Used for the RX and TX queues for packets */
|
||||
struct xdp_rxtx_ring {
|
||||
struct xdp_ring ptrs;
|
||||
struct xdp_desc desc[0] __attribute__((aligned(64)));
|
||||
};
|
||||
|
||||
/* Used for the fill and completion queues for buffers */
|
||||
struct xdp_umem_ring {
|
||||
struct xdp_ring ptrs;
|
||||
__u32 desc[0] __attribute__((aligned(64)));
|
||||
};
|
||||
/* UMEM descriptor is __u32 */
|
||||
|
||||
#endif /* _LINUX_IF_XDP_H */
|
||||
|
|
|
@ -25,6 +25,7 @@ enum {
|
|||
SEG6_LOCAL_NH6,
|
||||
SEG6_LOCAL_IIF,
|
||||
SEG6_LOCAL_OIF,
|
||||
SEG6_LOCAL_BPF,
|
||||
__SEG6_LOCAL_MAX,
|
||||
};
|
||||
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
|
||||
|
@ -59,10 +60,21 @@ enum {
|
|||
SEG6_LOCAL_ACTION_END_AS = 13,
|
||||
/* forward to SR-unaware VNF with masquerading */
|
||||
SEG6_LOCAL_ACTION_END_AM = 14,
|
||||
/* custom BPF action */
|
||||
SEG6_LOCAL_ACTION_END_BPF = 15,
|
||||
|
||||
__SEG6_LOCAL_ACTION_MAX,
|
||||
};
|
||||
|
||||
#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
|
||||
|
||||
enum {
|
||||
SEG6_LOCAL_BPF_PROG_UNSPEC,
|
||||
SEG6_LOCAL_BPF_PROG,
|
||||
SEG6_LOCAL_BPF_PROG_NAME,
|
||||
__SEG6_LOCAL_BPF_PROG_MAX,
|
||||
};
|
||||
|
||||
#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
|
|||
}
|
||||
|
||||
seq_printf(m, "%u: ", *(u32 *)key);
|
||||
btf_type_seq_show(map->btf, map->btf_value_id, value, m);
|
||||
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
|
||||
seq_puts(m, "\n");
|
||||
|
||||
rcu_read_unlock();
|
||||
|
|
334
kernel/bpf/btf.c
334
kernel/bpf/btf.c
|
@ -12,6 +12,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/btf.h>
|
||||
|
||||
|
@ -162,13 +163,16 @@
|
|||
#define BITS_ROUNDUP_BYTES(bits) \
|
||||
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
|
||||
|
||||
#define BTF_INFO_MASK 0x0f00ffff
|
||||
#define BTF_INT_MASK 0x0fffffff
|
||||
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
|
||||
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
|
||||
|
||||
/* 16MB for 64k structs and each has 16 members and
|
||||
* a few MB spaces for the string section.
|
||||
* The hard limit is S32_MAX.
|
||||
*/
|
||||
#define BTF_MAX_SIZE (16 * 1024 * 1024)
|
||||
/* 64k. We can raise it later. The hard limit is S32_MAX. */
|
||||
#define BTF_MAX_NR_TYPES 65535
|
||||
|
||||
#define for_each_member(i, struct_type, member) \
|
||||
for (i = 0, member = btf_type_member(struct_type); \
|
||||
|
@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
|
|||
static DEFINE_SPINLOCK(btf_idr_lock);
|
||||
|
||||
struct btf {
|
||||
union {
|
||||
struct btf_header *hdr;
|
||||
void *data;
|
||||
};
|
||||
void *data;
|
||||
struct btf_type **types;
|
||||
u32 *resolved_ids;
|
||||
u32 *resolved_sizes;
|
||||
const char *strings;
|
||||
void *nohdr_data;
|
||||
struct btf_header hdr;
|
||||
u32 nr_types;
|
||||
u32 types_size;
|
||||
u32 data_size;
|
||||
|
@ -228,6 +230,11 @@ enum resolve_mode {
|
|||
|
||||
#define MAX_RESOLVE_DEPTH 32
|
||||
|
||||
struct btf_sec_info {
|
||||
u32 off;
|
||||
u32 len;
|
||||
};
|
||||
|
||||
struct btf_verifier_env {
|
||||
struct btf *btf;
|
||||
u8 *visit_states;
|
||||
|
@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
|
|||
return "CHAR";
|
||||
else if (encoding == BTF_INT_BOOL)
|
||||
return "BOOL";
|
||||
else if (encoding == BTF_INT_VARARGS)
|
||||
return "VARARGS";
|
||||
else
|
||||
return "UNKN";
|
||||
}
|
||||
|
@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
|||
|
||||
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
|
||||
{
|
||||
return !BTF_STR_TBL_ELF_ID(offset) &&
|
||||
BTF_STR_OFFSET(offset) < btf->hdr->str_len;
|
||||
return BTF_STR_OFFSET_VALID(offset) &&
|
||||
offset < btf->hdr.str_len;
|
||||
}
|
||||
|
||||
static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
|
||||
{
|
||||
if (!BTF_STR_OFFSET(offset))
|
||||
if (!offset)
|
||||
return "(anon)";
|
||||
else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
|
||||
return &btf->strings[BTF_STR_OFFSET(offset)];
|
||||
else if (offset < btf->hdr.str_len)
|
||||
return &btf->strings[offset];
|
||||
else
|
||||
return "(invalid-name-offset)";
|
||||
}
|
||||
|
@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
|
|||
return btf->types[type_id];
|
||||
}
|
||||
|
||||
/*
|
||||
* Regular int is not a bit field and it must be either
|
||||
* u8/u16/u32/u64.
|
||||
*/
|
||||
static bool btf_type_int_is_regular(const struct btf_type *t)
|
||||
{
|
||||
u16 nr_bits, nr_bytes;
|
||||
u32 int_data;
|
||||
|
||||
int_data = btf_type_int(t);
|
||||
nr_bits = BTF_INT_BITS(int_data);
|
||||
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
|
||||
if (BITS_PER_BYTE_MASKED(nr_bits) ||
|
||||
BTF_INT_OFFSET(int_data) ||
|
||||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
|
||||
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
|
||||
const char *fmt, ...)
|
||||
{
|
||||
|
@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
|
|||
__btf_verifier_log(log, "\n");
|
||||
}
|
||||
|
||||
static void btf_verifier_log_hdr(struct btf_verifier_env *env)
|
||||
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
|
||||
u32 btf_data_size)
|
||||
{
|
||||
struct bpf_verifier_log *log = &env->log;
|
||||
const struct btf *btf = env->btf;
|
||||
|
@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
|
|||
if (!bpf_verifier_log_needed(log))
|
||||
return;
|
||||
|
||||
hdr = btf->hdr;
|
||||
hdr = &btf->hdr;
|
||||
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
|
||||
__btf_verifier_log(log, "version: %u\n", hdr->version);
|
||||
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
|
||||
__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
|
||||
__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
|
||||
__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
|
||||
__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
|
||||
__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
|
||||
__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
|
||||
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
|
||||
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
|
||||
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
|
||||
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
|
||||
__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
|
||||
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
|
||||
}
|
||||
|
||||
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
|
||||
|
@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
|
|||
struct btf_type **new_types;
|
||||
u32 expand_by, new_size;
|
||||
|
||||
if (btf->types_size == BTF_MAX_NR_TYPES) {
|
||||
if (btf->types_size == BTF_MAX_TYPE) {
|
||||
btf_verifier_log(env, "Exceeded max num of types");
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
expand_by = max_t(u32, btf->types_size >> 2, 16);
|
||||
new_size = min_t(u32, BTF_MAX_NR_TYPES,
|
||||
new_size = min_t(u32, BTF_MAX_TYPE,
|
||||
btf->types_size + expand_by);
|
||||
|
||||
new_types = kvzalloc(new_size * sizeof(*new_types),
|
||||
|
@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
|
|||
}
|
||||
|
||||
int_data = btf_type_int(t);
|
||||
if (int_data & ~BTF_INT_MASK) {
|
||||
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
|
||||
int_data);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
|
||||
|
||||
if (nr_bits > BITS_PER_U64) {
|
||||
|
@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only one of the encoding bits is allowed and it
|
||||
* should be sufficient for the pretty print purpose (i.e. decoding).
|
||||
* Multiple bits can be allowed later if it is found
|
||||
* to be insufficient.
|
||||
*/
|
||||
encoding = BTF_INT_ENCODING(int_data);
|
||||
if (encoding &&
|
||||
encoding != BTF_INT_SIGNED &&
|
||||
encoding != BTF_INT_CHAR &&
|
||||
encoding != BTF_INT_BOOL &&
|
||||
encoding != BTF_INT_VARARGS) {
|
||||
encoding != BTF_INT_BOOL) {
|
||||
btf_verifier_log_type(env, t, "Unsupported encoding");
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (BTF_TYPE_PARENT(t->type)) {
|
||||
if (!BTF_TYPE_ID_VALID(t->type)) {
|
||||
btf_verifier_log_type(env, t, "Invalid type_id");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* We are a little forgiving on array->index_type since
|
||||
* the kernel is not using it.
|
||||
/* Array elem type and index type cannot be in type void,
|
||||
* so !array->type and !array->index_type are not allowed.
|
||||
*/
|
||||
/* Array elem cannot be in type void,
|
||||
* so !array->type is not allowed.
|
||||
*/
|
||||
if (!array->type || BTF_TYPE_PARENT(array->type)) {
|
||||
btf_verifier_log_type(env, t, "Invalid type_id");
|
||||
if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
|
||||
btf_verifier_log_type(env, t, "Invalid elem");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
|
||||
btf_verifier_log_type(env, t, "Invalid index");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
|
|||
const struct resolve_vertex *v)
|
||||
{
|
||||
const struct btf_array *array = btf_type_array(v->t);
|
||||
const struct btf_type *elem_type;
|
||||
u32 elem_type_id = array->type;
|
||||
const struct btf_type *elem_type, *index_type;
|
||||
u32 elem_type_id, index_type_id;
|
||||
struct btf *btf = env->btf;
|
||||
u32 elem_size;
|
||||
|
||||
/* Check array->index_type */
|
||||
index_type_id = array->index_type;
|
||||
index_type = btf_type_by_id(btf, index_type_id);
|
||||
if (btf_type_is_void_or_null(index_type)) {
|
||||
btf_verifier_log_type(env, v->t, "Invalid index");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!env_type_is_resolve_sink(env, index_type) &&
|
||||
!env_type_is_resolved(env, index_type_id))
|
||||
return env_stack_push(env, index_type, index_type_id);
|
||||
|
||||
index_type = btf_type_id_size(btf, &index_type_id, NULL);
|
||||
if (!index_type || !btf_type_is_int(index_type) ||
|
||||
!btf_type_int_is_regular(index_type)) {
|
||||
btf_verifier_log_type(env, v->t, "Invalid index");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Check array->type */
|
||||
elem_type_id = array->type;
|
||||
elem_type = btf_type_by_id(btf, elem_type_id);
|
||||
if (btf_type_is_void_or_null(elem_type)) {
|
||||
btf_verifier_log_type(env, v->t,
|
||||
|
@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (btf_type_is_int(elem_type)) {
|
||||
int int_type_data = btf_type_int(elem_type);
|
||||
u16 nr_bits = BTF_INT_BITS(int_type_data);
|
||||
u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
|
||||
|
||||
/* Put more restriction on array of int. The int cannot
|
||||
* be a bit field and it must be either u8/u16/u32/u64.
|
||||
*/
|
||||
if (BITS_PER_BYTE_MASKED(nr_bits) ||
|
||||
BTF_INT_OFFSET(int_type_data) ||
|
||||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
|
||||
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
|
||||
btf_verifier_log_type(env, v->t,
|
||||
"Invalid array of int");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
|
||||
btf_verifier_log_type(env, v->t, "Invalid array of int");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (array->nelems && elem_size > U32_MAX / array->nelems) {
|
||||
|
@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
|
|||
}
|
||||
|
||||
/* A member cannot be in type void */
|
||||
if (!member->type || BTF_TYPE_PARENT(member->type)) {
|
||||
if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
|
||||
btf_verifier_log_member(env, t, member,
|
||||
"Invalid type_id");
|
||||
return -EINVAL;
|
||||
|
@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
|
|||
}
|
||||
meta_left -= sizeof(*t);
|
||||
|
||||
if (t->info & ~BTF_INFO_MASK) {
|
||||
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
|
||||
env->log_type_id, t->info);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
|
||||
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
|
||||
btf_verifier_log(env, "[%u] Invalid kind:%u",
|
||||
|
@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
|
|||
struct btf_header *hdr;
|
||||
void *cur, *end;
|
||||
|
||||
hdr = btf->hdr;
|
||||
hdr = &btf->hdr;
|
||||
cur = btf->nohdr_data + hdr->type_off;
|
||||
end = btf->nohdr_data + hdr->str_off;
|
||||
end = btf->nohdr_data + hdr->type_len;
|
||||
|
||||
env->log_type_id = 1;
|
||||
while (cur < end) {
|
||||
|
@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
|
|||
|
||||
static int btf_parse_type_sec(struct btf_verifier_env *env)
|
||||
{
|
||||
const struct btf_header *hdr = &env->btf->hdr;
|
||||
int err;
|
||||
|
||||
/* Type section must align to 4 bytes */
|
||||
if (hdr->type_off & (sizeof(u32) - 1)) {
|
||||
btf_verifier_log(env, "Unaligned type_off");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!hdr->type_len) {
|
||||
btf_verifier_log(env, "No type found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = btf_check_all_metas(env);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
|
|||
struct btf *btf = env->btf;
|
||||
const char *start, *end;
|
||||
|
||||
hdr = btf->hdr;
|
||||
hdr = &btf->hdr;
|
||||
start = btf->nohdr_data + hdr->str_off;
|
||||
end = start + hdr->str_len;
|
||||
|
||||
if (end != btf->data + btf->data_size) {
|
||||
btf_verifier_log(env, "String section is not at the end");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
|
||||
start[0] || end[-1]) {
|
||||
btf_verifier_log(env, "Invalid string section");
|
||||
|
@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int btf_parse_hdr(struct btf_verifier_env *env)
|
||||
static const size_t btf_sec_info_offset[] = {
|
||||
offsetof(struct btf_header, type_off),
|
||||
offsetof(struct btf_header, str_off),
|
||||
};
|
||||
|
||||
static int btf_sec_info_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct btf_sec_info *x = a;
|
||||
const struct btf_sec_info *y = b;
|
||||
|
||||
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
|
||||
}
|
||||
|
||||
static int btf_check_sec_info(struct btf_verifier_env *env,
|
||||
u32 btf_data_size)
|
||||
{
|
||||
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
|
||||
u32 total, expected_total, i;
|
||||
const struct btf_header *hdr;
|
||||
const struct btf *btf;
|
||||
|
||||
btf = env->btf;
|
||||
hdr = &btf->hdr;
|
||||
|
||||
/* Populate the secs from hdr */
|
||||
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
|
||||
secs[i] = *(struct btf_sec_info *)((void *)hdr +
|
||||
btf_sec_info_offset[i]);
|
||||
|
||||
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
|
||||
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
|
||||
|
||||
/* Check for gaps and overlap among sections */
|
||||
total = 0;
|
||||
expected_total = btf_data_size - hdr->hdr_len;
|
||||
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
|
||||
if (expected_total < secs[i].off) {
|
||||
btf_verifier_log(env, "Invalid section offset");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (total < secs[i].off) {
|
||||
/* gap */
|
||||
btf_verifier_log(env, "Unsupported section found");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (total > secs[i].off) {
|
||||
btf_verifier_log(env, "Section overlap found");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (expected_total - total < secs[i].len) {
|
||||
btf_verifier_log(env,
|
||||
"Total section length too long");
|
||||
return -EINVAL;
|
||||
}
|
||||
total += secs[i].len;
|
||||
}
|
||||
|
||||
/* There is data other than hdr and known sections */
|
||||
if (expected_total != total) {
|
||||
btf_verifier_log(env, "Unsupported section found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
|
||||
u32 btf_data_size)
|
||||
{
|
||||
const struct btf_header *hdr;
|
||||
struct btf *btf = env->btf;
|
||||
u32 meta_left;
|
||||
u32 hdr_len, hdr_copy;
|
||||
/*
|
||||
* Minimal part of the "struct btf_header" that
|
||||
* contains the hdr_len.
|
||||
*/
|
||||
struct btf_min_header {
|
||||
u16 magic;
|
||||
u8 version;
|
||||
u8 flags;
|
||||
u32 hdr_len;
|
||||
} __user *min_hdr;
|
||||
struct btf *btf;
|
||||
int err;
|
||||
|
||||
if (btf->data_size < sizeof(*hdr)) {
|
||||
btf = env->btf;
|
||||
min_hdr = btf_data;
|
||||
|
||||
if (btf_data_size < sizeof(*min_hdr)) {
|
||||
btf_verifier_log(env, "hdr_len not found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (get_user(hdr_len, &min_hdr->hdr_len))
|
||||
return -EFAULT;
|
||||
|
||||
if (btf_data_size < hdr_len) {
|
||||
btf_verifier_log(env, "btf_header not found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
btf_verifier_log_hdr(env);
|
||||
err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
|
||||
if (err) {
|
||||
if (err == -E2BIG)
|
||||
btf_verifier_log(env, "Unsupported btf_header");
|
||||
return err;
|
||||
}
|
||||
|
||||
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
|
||||
if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
|
||||
return -EFAULT;
|
||||
|
||||
hdr = &btf->hdr;
|
||||
|
||||
btf_verifier_log_hdr(env, btf_data_size);
|
||||
|
||||
hdr = btf->hdr;
|
||||
if (hdr->magic != BTF_MAGIC) {
|
||||
btf_verifier_log(env, "Invalid magic");
|
||||
return -EINVAL;
|
||||
|
@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
|
|||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
meta_left = btf->data_size - sizeof(*hdr);
|
||||
if (!meta_left) {
|
||||
if (btf_data_size == hdr->hdr_len) {
|
||||
btf_verifier_log(env, "No data");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
|
||||
/* Type section must align to 4 bytes */
|
||||
hdr->type_off & (sizeof(u32) - 1)) {
|
||||
btf_verifier_log(env, "Invalid type_off");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (meta_left < hdr->str_off ||
|
||||
meta_left - hdr->str_off < hdr->str_len) {
|
||||
btf_verifier_log(env, "Invalid str_off or str_len");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
btf->nohdr_data = btf->hdr + 1;
|
||||
err = btf_check_sec_info(env, btf_data_size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
|
|||
err = -ENOMEM;
|
||||
goto errout;
|
||||
}
|
||||
env->btf = btf;
|
||||
|
||||
err = btf_parse_hdr(env, btf_data, btf_data_size);
|
||||
if (err)
|
||||
goto errout;
|
||||
|
||||
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
|
||||
if (!data) {
|
||||
|
@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
|
|||
|
||||
btf->data = data;
|
||||
btf->data_size = btf_data_size;
|
||||
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
||||
|
||||
if (copy_from_user(data, btf_data, btf_data_size)) {
|
||||
err = -EFAULT;
|
||||
goto errout;
|
||||
}
|
||||
|
||||
env->btf = btf;
|
||||
|
||||
err = btf_parse_hdr(env);
|
||||
if (err)
|
||||
goto errout;
|
||||
|
||||
err = btf_parse_str_sec(env);
|
||||
if (err)
|
||||
goto errout;
|
||||
|
@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
|
|||
if (err)
|
||||
goto errout;
|
||||
|
||||
if (!err && log->level && bpf_verifier_log_full(log)) {
|
||||
if (log->level && bpf_verifier_log_full(log)) {
|
||||
err = -ENOSPC;
|
||||
goto errout;
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
btf_verifier_env_free(env);
|
||||
refcount_set(&btf->refcnt, 1);
|
||||
return btf;
|
||||
}
|
||||
btf_verifier_env_free(env);
|
||||
refcount_set(&btf->refcnt, 1);
|
||||
return btf;
|
||||
|
||||
errout:
|
||||
btf_verifier_env_free(env);
|
||||
|
|
|
@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
|
|||
err = __ptr_ring_produce(q, xdpf);
|
||||
if (err) {
|
||||
drops++;
|
||||
xdp_return_frame(xdpf);
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
}
|
||||
processed++;
|
||||
}
|
||||
|
|
|
@ -48,15 +48,25 @@
|
|||
* calls will fail at this point.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <net/xdp.h>
|
||||
#include <linux/filter.h>
|
||||
#include <trace/events/xdp.h>
|
||||
|
||||
#define DEV_CREATE_FLAG_MASK \
|
||||
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
||||
|
||||
#define DEV_MAP_BULK_SIZE 16
|
||||
struct xdp_bulk_queue {
|
||||
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
|
||||
struct net_device *dev_rx;
|
||||
unsigned int count;
|
||||
};
|
||||
|
||||
struct bpf_dtab_netdev {
|
||||
struct net_device *dev;
|
||||
struct net_device *dev; /* must be first member, due to tracepoint */
|
||||
struct bpf_dtab *dtab;
|
||||
unsigned int bit;
|
||||
struct xdp_bulk_queue __percpu *bulkq;
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
|
||||
|
@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
|
|||
__set_bit(bit, bitmap);
|
||||
}
|
||||
|
||||
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
|
||||
struct xdp_bulk_queue *bq)
|
||||
{
|
||||
struct net_device *dev = obj->dev;
|
||||
int sent = 0, drops = 0, err = 0;
|
||||
int i;
|
||||
|
||||
if (unlikely(!bq->count))
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
|
||||
prefetch(xdpf);
|
||||
}
|
||||
|
||||
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
|
||||
if (sent < 0) {
|
||||
err = sent;
|
||||
sent = 0;
|
||||
goto error;
|
||||
}
|
||||
drops = bq->count - sent;
|
||||
out:
|
||||
bq->count = 0;
|
||||
|
||||
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
|
||||
sent, drops, bq->dev_rx, dev, err);
|
||||
bq->dev_rx = NULL;
|
||||
return 0;
|
||||
error:
|
||||
/* If ndo_xdp_xmit fails with an errno, no frames have been
|
||||
* xmit'ed and it's our responsibility to them free all.
|
||||
*/
|
||||
for (i = 0; i < bq->count; i++) {
|
||||
struct xdp_frame *xdpf = bq->q[i];
|
||||
|
||||
/* RX path under NAPI protection, can return frames faster */
|
||||
xdp_return_frame_rx_napi(xdpf);
|
||||
drops++;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
|
||||
* from the driver before returning from its napi->poll() routine. The poll()
|
||||
* routine is called either from busy_poll context or net_rx_action signaled
|
||||
|
@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
|
|||
|
||||
for_each_set_bit(bit, bitmap, map->max_entries) {
|
||||
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
|
||||
struct xdp_bulk_queue *bq;
|
||||
struct net_device *netdev;
|
||||
|
||||
/* This is possible if the dev entry is removed by user space
|
||||
|
@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
|
|||
continue;
|
||||
|
||||
__clear_bit(bit, bitmap);
|
||||
|
||||
bq = this_cpu_ptr(dev->bulkq);
|
||||
bq_xmit_all(dev, bq);
|
||||
netdev = dev->dev;
|
||||
if (likely(netdev->netdev_ops->ndo_xdp_flush))
|
||||
netdev->netdev_ops->ndo_xdp_flush(netdev);
|
||||
|
@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
|
|||
* update happens in parallel here a dev_put wont happen until after reading the
|
||||
* ifindex.
|
||||
*/
|
||||
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
|
||||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct bpf_dtab_netdev *dev;
|
||||
struct bpf_dtab_netdev *obj;
|
||||
|
||||
if (key >= map->max_entries)
|
||||
return NULL;
|
||||
|
||||
dev = READ_ONCE(dtab->netdev_map[key]);
|
||||
return dev ? dev->dev : NULL;
|
||||
obj = READ_ONCE(dtab->netdev_map[key]);
|
||||
return obj;
|
||||
}
|
||||
|
||||
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
|
||||
* Thus, safe percpu variable access.
|
||||
*/
|
||||
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
|
||||
struct net_device *dev_rx)
|
||||
|
||||
{
|
||||
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
|
||||
|
||||
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
|
||||
bq_xmit_all(obj, bq);
|
||||
|
||||
/* Ingress dev_rx will be the same for all xdp_frame's in
|
||||
* bulk_queue, because bq stored per-CPU and must be flushed
|
||||
* from net_device drivers NAPI func end.
|
||||
*/
|
||||
if (!bq->dev_rx)
|
||||
bq->dev_rx = dev_rx;
|
||||
|
||||
bq->q[bq->count++] = xdpf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
|
||||
struct net_device *dev_rx)
|
||||
{
|
||||
struct net_device *dev = dst->dev;
|
||||
struct xdp_frame *xdpf;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return bq_enqueue(dst, xdpf, dev_rx);
|
||||
}
|
||||
|
||||
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
|
||||
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
|
||||
struct net_device *dev = dev = obj ? obj->dev : NULL;
|
||||
|
||||
return dev ? &dev->ifindex : NULL;
|
||||
}
|
||||
|
@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
|
|||
{
|
||||
if (dev->dev->netdev_ops->ndo_xdp_flush) {
|
||||
struct net_device *fl = dev->dev;
|
||||
struct xdp_bulk_queue *bq;
|
||||
unsigned long *bitmap;
|
||||
|
||||
int cpu;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
|
||||
__clear_bit(dev->bit, bitmap);
|
||||
|
||||
bq = per_cpu_ptr(dev->bulkq, cpu);
|
||||
bq_xmit_all(dev, bq);
|
||||
|
||||
fl->netdev_ops->ndo_xdp_flush(dev->dev);
|
||||
}
|
||||
}
|
||||
|
@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
|
|||
|
||||
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
|
||||
dev_map_flush_old(dev);
|
||||
free_percpu(dev->bulkq);
|
||||
dev_put(dev->dev);
|
||||
kfree(dev);
|
||||
}
|
||||
|
@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
{
|
||||
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
|
||||
struct bpf_dtab_netdev *dev, *old_dev;
|
||||
u32 i = *(u32 *)key;
|
||||
u32 ifindex = *(u32 *)value;
|
||||
|
@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
|
|||
if (!ifindex) {
|
||||
dev = NULL;
|
||||
} else {
|
||||
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
|
||||
map->numa_node);
|
||||
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
|
||||
sizeof(void *), gfp);
|
||||
if (!dev->bulkq) {
|
||||
kfree(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dev->dev = dev_get_by_index(net, ifindex);
|
||||
if (!dev->dev) {
|
||||
free_percpu(dev->bulkq);
|
||||
kfree(dev);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
|
|||
|
||||
static int __init dev_map_init(void)
|
||||
{
|
||||
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
|
||||
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
|
||||
offsetof(struct _bpf_dtab_netdev, dev));
|
||||
register_netdevice_notifier(&dev_map_notifier);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
|
|||
}
|
||||
|
||||
bpf_compute_data_pointers_sg(md);
|
||||
md->sk = sk;
|
||||
rc = (*prog->bpf_func)(md, prog->insnsi);
|
||||
psock->apply_bytes = md->apply_bytes;
|
||||
|
||||
|
@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
|||
struct smap_psock_map_entry *e = NULL;
|
||||
struct smap_psock *psock;
|
||||
bool new = false;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
/* 1. If sock map has BPF programs those will be inherited by the
|
||||
* sock being added. If the sock is already attached to BPF programs
|
||||
|
@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
|
|||
write_unlock_bh(&sock->sk_callback_lock);
|
||||
return err;
|
||||
out_free:
|
||||
kfree(e);
|
||||
smap_release_sock(psock, sock);
|
||||
out_progs:
|
||||
if (parse && verdict) {
|
||||
|
|
|
@ -18,7 +18,9 @@
|
|||
#include <linux/vmalloc.h>
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/license.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/version.h>
|
||||
|
@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
|
|||
* copy_from_user() call. However, this is not a concern since this function is
|
||||
* meant to be a future-proofing of bits.
|
||||
*/
|
||||
static int check_uarg_tail_zero(void __user *uaddr,
|
||||
size_t expected_size,
|
||||
size_t actual_size)
|
||||
int bpf_check_uarg_tail_zero(void __user *uaddr,
|
||||
size_t expected_size,
|
||||
size_t actual_size)
|
||||
{
|
||||
unsigned char __user *addr;
|
||||
unsigned char __user *end;
|
||||
|
@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
|
||||
#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
|
||||
/* called via syscall */
|
||||
static int map_create(union bpf_attr *attr)
|
||||
{
|
||||
|
@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
|
|||
atomic_set(&map->usercnt, 1);
|
||||
|
||||
if (bpf_map_support_seq_show(map) &&
|
||||
(attr->btf_key_id || attr->btf_value_id)) {
|
||||
(attr->btf_key_type_id || attr->btf_value_type_id)) {
|
||||
struct btf *btf;
|
||||
|
||||
if (!attr->btf_key_id || !attr->btf_value_id) {
|
||||
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
|
||||
err = -EINVAL;
|
||||
goto free_map_nouncharge;
|
||||
}
|
||||
|
@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
|
|||
goto free_map_nouncharge;
|
||||
}
|
||||
|
||||
err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
|
||||
attr->btf_value_id);
|
||||
err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
|
||||
attr->btf_value_type_id);
|
||||
if (err) {
|
||||
btf_put(btf);
|
||||
goto free_map_nouncharge;
|
||||
}
|
||||
|
||||
map->btf = btf;
|
||||
map->btf_key_id = attr->btf_key_id;
|
||||
map->btf_value_id = attr->btf_value_id;
|
||||
map->btf_key_type_id = attr->btf_key_type_id;
|
||||
map->btf_value_type_id = attr->btf_value_type_id;
|
||||
}
|
||||
|
||||
err = security_bpf_map_alloc(map);
|
||||
|
@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
|||
u32 ulen;
|
||||
int err;
|
||||
|
||||
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
info_len = min_t(u32, sizeof(info), info_len);
|
||||
|
@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
|||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
info.jited_prog_len = 0;
|
||||
info.xlated_prog_len = 0;
|
||||
info.nr_jited_ksyms = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
|||
* for offload.
|
||||
*/
|
||||
ulen = info.jited_prog_len;
|
||||
info.jited_prog_len = prog->jited_len;
|
||||
if (prog->aux->func_cnt) {
|
||||
u32 i;
|
||||
|
||||
info.jited_prog_len = 0;
|
||||
for (i = 0; i < prog->aux->func_cnt; i++)
|
||||
info.jited_prog_len += prog->aux->func[i]->jited_len;
|
||||
} else {
|
||||
info.jited_prog_len = prog->jited_len;
|
||||
}
|
||||
|
||||
if (info.jited_prog_len && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
uinsns = u64_to_user_ptr(info.jited_prog_insns);
|
||||
ulen = min_t(u32, info.jited_prog_len, ulen);
|
||||
if (copy_to_user(uinsns, prog->bpf_func, ulen))
|
||||
return -EFAULT;
|
||||
|
||||
/* for multi-function programs, copy the JITed
|
||||
* instructions for all the functions
|
||||
*/
|
||||
if (prog->aux->func_cnt) {
|
||||
u32 len, free, i;
|
||||
u8 *img;
|
||||
|
||||
free = ulen;
|
||||
for (i = 0; i < prog->aux->func_cnt; i++) {
|
||||
len = prog->aux->func[i]->jited_len;
|
||||
len = min_t(u32, len, free);
|
||||
img = (u8 *) prog->aux->func[i]->bpf_func;
|
||||
if (copy_to_user(uinsns, img, len))
|
||||
return -EFAULT;
|
||||
uinsns += len;
|
||||
free -= len;
|
||||
if (!free)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (copy_to_user(uinsns, prog->bpf_func, ulen))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_prog_insns = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ulen = info.nr_jited_ksyms;
|
||||
info.nr_jited_ksyms = prog->aux->func_cnt;
|
||||
if (info.nr_jited_ksyms && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
u64 __user *user_ksyms;
|
||||
ulong ksym_addr;
|
||||
u32 i;
|
||||
|
||||
/* copy the address of the kernel symbol
|
||||
* corresponding to each function
|
||||
*/
|
||||
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
|
||||
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
|
||||
for (i = 0; i < ulen; i++) {
|
||||
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
|
||||
ksym_addr &= PAGE_MASK;
|
||||
if (put_user((u64) ksym_addr, &user_ksyms[i]))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_ksyms = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ulen = info.nr_jited_func_lens;
|
||||
info.nr_jited_func_lens = prog->aux->func_cnt;
|
||||
if (info.nr_jited_func_lens && ulen) {
|
||||
if (bpf_dump_raw_ok()) {
|
||||
u32 __user *user_lens;
|
||||
u32 func_len, i;
|
||||
|
||||
/* copy the JITed image lengths for each function */
|
||||
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
|
||||
user_lens = u64_to_user_ptr(info.jited_func_lens);
|
||||
for (i = 0; i < ulen; i++) {
|
||||
func_len = prog->aux->func[i]->jited_len;
|
||||
if (put_user(func_len, &user_lens[i]))
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
info.jited_func_lens = 0;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (copy_to_user(uinfo, &info, info_len) ||
|
||||
put_user(info_len, &uattr->info.info_len))
|
||||
|
@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
|||
u32 info_len = attr->info.info_len;
|
||||
int err;
|
||||
|
||||
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
info_len = min_t(u32, sizeof(info), info_len);
|
||||
|
@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
|||
|
||||
if (map->btf) {
|
||||
info.btf_id = btf_id(map->btf);
|
||||
info.btf_key_id = map->btf_key_id;
|
||||
info.btf_value_id = map->btf_value_id;
|
||||
info.btf_key_type_id = map->btf_key_type_id;
|
||||
info.btf_value_type_id = map->btf_value_type_id;
|
||||
}
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
|
@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
|
|||
u32 info_len = attr->info.info_len;
|
||||
int err;
|
||||
|
||||
err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
|
||||
err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
|
|||
return btf_get_fd_by_id(attr->btf_id);
|
||||
}
|
||||
|
||||
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr,
|
||||
u32 prog_id, u32 fd_type,
|
||||
const char *buf, u64 probe_offset,
|
||||
u64 probe_addr)
|
||||
{
|
||||
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
|
||||
u32 len = buf ? strlen(buf) : 0, input_len;
|
||||
int err = 0;
|
||||
|
||||
if (put_user(len, &uattr->task_fd_query.buf_len))
|
||||
return -EFAULT;
|
||||
input_len = attr->task_fd_query.buf_len;
|
||||
if (input_len && ubuf) {
|
||||
if (!len) {
|
||||
/* nothing to copy, just make ubuf NULL terminated */
|
||||
char zero = '\0';
|
||||
|
||||
if (put_user(zero, ubuf))
|
||||
return -EFAULT;
|
||||
} else if (input_len >= len + 1) {
|
||||
/* ubuf can hold the string with NULL terminator */
|
||||
if (copy_to_user(ubuf, buf, len + 1))
|
||||
return -EFAULT;
|
||||
} else {
|
||||
/* ubuf cannot hold the string with NULL terminator,
|
||||
* do a partial copy with NULL terminator.
|
||||
*/
|
||||
char zero = '\0';
|
||||
|
||||
err = -ENOSPC;
|
||||
if (copy_to_user(ubuf, buf, input_len - 1))
|
||||
return -EFAULT;
|
||||
if (put_user(zero, ubuf + input_len - 1))
|
||||
return -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
|
||||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
|
||||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
|
||||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
|
||||
return -EFAULT;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
|
||||
|
||||
static int bpf_task_fd_query(const union bpf_attr *attr,
|
||||
union bpf_attr __user *uattr)
|
||||
{
|
||||
pid_t pid = attr->task_fd_query.pid;
|
||||
u32 fd = attr->task_fd_query.fd;
|
||||
const struct perf_event *event;
|
||||
struct files_struct *files;
|
||||
struct task_struct *task;
|
||||
struct file *file;
|
||||
int err;
|
||||
|
||||
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
|
||||
return -EINVAL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (attr->task_fd_query.flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
|
||||
if (!task)
|
||||
return -ENOENT;
|
||||
|
||||
files = get_files_struct(task);
|
||||
put_task_struct(task);
|
||||
if (!files)
|
||||
return -ENOENT;
|
||||
|
||||
err = 0;
|
||||
spin_lock(&files->file_lock);
|
||||
file = fcheck_files(files, fd);
|
||||
if (!file)
|
||||
err = -EBADF;
|
||||
else
|
||||
get_file(file);
|
||||
spin_unlock(&files->file_lock);
|
||||
put_files_struct(files);
|
||||
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_op == &bpf_raw_tp_fops) {
|
||||
struct bpf_raw_tracepoint *raw_tp = file->private_data;
|
||||
struct bpf_raw_event_map *btp = raw_tp->btp;
|
||||
|
||||
err = bpf_task_fd_query_copy(attr, uattr,
|
||||
raw_tp->prog->aux->id,
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT,
|
||||
btp->tp->name, 0, 0);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
event = perf_get_event(file);
|
||||
if (!IS_ERR(event)) {
|
||||
u64 probe_offset, probe_addr;
|
||||
u32 prog_id, fd_type;
|
||||
const char *buf;
|
||||
|
||||
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
|
||||
&buf, &probe_offset,
|
||||
&probe_addr);
|
||||
if (!err)
|
||||
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
|
||||
fd_type, buf,
|
||||
probe_offset,
|
||||
probe_addr);
|
||||
goto put_file;
|
||||
}
|
||||
|
||||
err = -ENOTSUPP;
|
||||
put_file:
|
||||
fput(file);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
|
@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
|||
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
|
||||
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
|
||||
if (err)
|
||||
return err;
|
||||
size = min_t(u32, size, sizeof(attr));
|
||||
|
@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
|
|||
case BPF_BTF_GET_FD_BY_ID:
|
||||
err = bpf_btf_get_fd_by_id(&attr);
|
||||
break;
|
||||
case BPF_TASK_FD_QUERY:
|
||||
err = bpf_task_fd_query(&attr, uattr);
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
break;
|
||||
|
|
|
@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
|
|||
switch (env->prog->type) {
|
||||
case BPF_PROG_TYPE_LWT_IN:
|
||||
case BPF_PROG_TYPE_LWT_OUT:
|
||||
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
||||
/* dst_input() and dst_output() can't write for now */
|
||||
if (t == BPF_WRITE)
|
||||
return false;
|
||||
|
@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
|||
insn->src_reg != BPF_PSEUDO_CALL)
|
||||
continue;
|
||||
subprog = insn->off;
|
||||
insn->off = 0;
|
||||
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
|
||||
func[subprog]->bpf_func -
|
||||
__bpf_call_base;
|
||||
}
|
||||
|
||||
/* we use the aux data to keep a list of the start addresses
|
||||
* of the JITed images for each function in the program
|
||||
*
|
||||
* for some architectures, such as powerpc64, the imm field
|
||||
* might not be large enough to hold the offset of the start
|
||||
* address of the callee's JITed image from __bpf_call_base
|
||||
*
|
||||
* in such cases, we can lookup the start address of a callee
|
||||
* by using its subprog id, available from the off field of
|
||||
* the call instruction, as an index for this list
|
||||
*/
|
||||
func[i]->aux->func = func;
|
||||
func[i]->aux->func_cnt = env->subprog_cnt;
|
||||
}
|
||||
for (i = 0; i < env->subprog_cnt; i++) {
|
||||
old_bpf_func = func[i]->bpf_func;
|
||||
|
@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
|||
* later look the same as if they were interpreted only.
|
||||
*/
|
||||
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
|
||||
unsigned long addr;
|
||||
|
||||
if (insn->code != (BPF_JMP | BPF_CALL) ||
|
||||
insn->src_reg != BPF_PSEUDO_CALL)
|
||||
continue;
|
||||
insn->off = env->insn_aux_data[i].call_imm;
|
||||
subprog = find_subprog(env, i + insn->off + 1);
|
||||
addr = (unsigned long)func[subprog]->bpf_func;
|
||||
addr &= PAGE_MASK;
|
||||
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
|
||||
addr - __bpf_call_base;
|
||||
insn->imm = subprog;
|
||||
}
|
||||
|
||||
prog->jited = 1;
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* XSKMAP used for AF_XDP sockets
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
|
|
|
@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
|
|||
return file;
|
||||
}
|
||||
|
||||
const struct perf_event *perf_get_event(struct file *file)
|
||||
{
|
||||
if (file->f_op != &perf_fops)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
return file->private_data;
|
||||
}
|
||||
|
||||
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
|
||||
{
|
||||
if (!event)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <linux/uaccess.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/error-injection.h>
|
||||
|
||||
#include "trace_probe.h"
|
||||
|
@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
|
|||
mutex_unlock(&bpf_event_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
|
||||
u32 *fd_type, const char **buf,
|
||||
u64 *probe_offset, u64 *probe_addr)
|
||||
{
|
||||
bool is_tracepoint, is_syscall_tp;
|
||||
struct bpf_prog *prog;
|
||||
int flags, err = 0;
|
||||
|
||||
prog = event->prog;
|
||||
if (!prog)
|
||||
return -ENOENT;
|
||||
|
||||
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
|
||||
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*prog_id = prog->aux->id;
|
||||
flags = event->tp_event->flags;
|
||||
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
|
||||
is_syscall_tp = is_syscall_trace_event(event->tp_event);
|
||||
|
||||
if (is_tracepoint || is_syscall_tp) {
|
||||
*buf = is_tracepoint ? event->tp_event->tp->name
|
||||
: event->tp_event->name;
|
||||
*fd_type = BPF_FD_TYPE_TRACEPOINT;
|
||||
*probe_offset = 0x0;
|
||||
*probe_addr = 0x0;
|
||||
} else {
|
||||
/* kprobe/uprobe */
|
||||
err = -EOPNOTSUPP;
|
||||
#ifdef CONFIG_KPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_KPROBE)
|
||||
err = bpf_get_kprobe_info(event, fd_type, buf,
|
||||
probe_offset, probe_addr,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
#ifdef CONFIG_UPROBE_EVENTS
|
||||
if (flags & TRACE_EVENT_FL_UPROBE)
|
||||
err = bpf_get_uprobe_info(event, fd_type, buf,
|
||||
probe_offset,
|
||||
event->attr.type == PERF_TYPE_TRACEPOINT);
|
||||
#endif
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
|||
head, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_perf_func);
|
||||
|
||||
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **symbol, u64 *probe_offset,
|
||||
u64 *probe_addr, bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_kprobe *tk;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tk = find_trace_kprobe(pevent, group);
|
||||
else
|
||||
tk = event->tp_event->data;
|
||||
if (!tk)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
|
||||
: BPF_FD_TYPE_KPROBE;
|
||||
if (tk->symbol) {
|
||||
*symbol = tk->symbol;
|
||||
*probe_offset = tk->rp.kp.offset;
|
||||
*probe_addr = 0;
|
||||
} else {
|
||||
*symbol = NULL;
|
||||
*probe_offset = 0;
|
||||
*probe_addr = (unsigned long)tk->rp.kp.addr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
/*
|
||||
|
|
|
@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
|
|||
{
|
||||
__uprobe_perf_func(tu, func, regs, ucb, dsize);
|
||||
}
|
||||
|
||||
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
|
||||
const char **filename, u64 *probe_offset,
|
||||
bool perf_type_tracepoint)
|
||||
{
|
||||
const char *pevent = trace_event_name(event->tp_event);
|
||||
const char *group = event->tp_event->class->system;
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
if (perf_type_tracepoint)
|
||||
tu = find_probe_event(pevent, group);
|
||||
else
|
||||
tu = event->tp_event->data;
|
||||
if (!tu)
|
||||
return -EINVAL;
|
||||
|
||||
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
|
||||
: BPF_FD_TYPE_UPROBE;
|
||||
*filename = tu->filename;
|
||||
*probe_offset = tu->offset;
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
static int
|
||||
|
|
|
@ -64,6 +64,10 @@
|
|||
#include <net/ip_fib.h>
|
||||
#include <net/flow.h>
|
||||
#include <net/arp.h>
|
||||
#include <net/ipv6.h>
|
||||
#include <linux/seg6_local.h>
|
||||
#include <net/seg6.h>
|
||||
#include <net/seg6_local.h>
|
||||
|
||||
/**
|
||||
* sk_filter_trim_cap - run a packet through a socket filter
|
||||
|
@ -3042,7 +3046,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
|
|||
u32 index)
|
||||
{
|
||||
struct xdp_frame *xdpf;
|
||||
int err;
|
||||
int sent;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit) {
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -3052,9 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
|
|||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
|
||||
if (err)
|
||||
return err;
|
||||
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
|
||||
if (sent <= 0)
|
||||
return sent;
|
||||
dev->netdev_ops->ndo_xdp_flush(dev);
|
||||
return 0;
|
||||
}
|
||||
|
@ -3068,20 +3072,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
|
|||
|
||||
switch (map->map_type) {
|
||||
case BPF_MAP_TYPE_DEVMAP: {
|
||||
struct net_device *dev = fwd;
|
||||
struct xdp_frame *xdpf;
|
||||
struct bpf_dtab_netdev *dst = fwd;
|
||||
|
||||
if (!dev->netdev_ops->ndo_xdp_xmit)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
xdpf = convert_to_xdp_frame(xdp);
|
||||
if (unlikely(!xdpf))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* TODO: move to inside map code instead, for bulk support
|
||||
* err = dev_map_enqueue(dev, xdp);
|
||||
*/
|
||||
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
|
||||
err = dev_map_enqueue(dst, xdp, dev_rx);
|
||||
if (err)
|
||||
return err;
|
||||
__dev_map_insert_ctx(map, index);
|
||||
|
@ -3370,28 +3363,6 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
|
|||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
bool bpf_helper_changes_pkt_data(void *func)
|
||||
{
|
||||
if (func == bpf_skb_vlan_push ||
|
||||
func == bpf_skb_vlan_pop ||
|
||||
func == bpf_skb_store_bytes ||
|
||||
func == bpf_skb_change_proto ||
|
||||
func == bpf_skb_change_head ||
|
||||
func == bpf_skb_change_tail ||
|
||||
func == bpf_skb_adjust_room ||
|
||||
func == bpf_skb_pull_data ||
|
||||
func == bpf_clone_redirect ||
|
||||
func == bpf_l3_csum_replace ||
|
||||
func == bpf_l4_csum_replace ||
|
||||
func == bpf_xdp_adjust_head ||
|
||||
func == bpf_xdp_adjust_meta ||
|
||||
func == bpf_msg_pull_data ||
|
||||
func == bpf_xdp_adjust_tail)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
|
||||
unsigned long off, unsigned long len)
|
||||
{
|
||||
|
@ -4096,7 +4067,7 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
|
|||
|
||||
#if IS_ENABLED(CONFIG_INET)
|
||||
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
||||
u32 flags)
|
||||
u32 flags, bool check_mtu)
|
||||
{
|
||||
struct in_device *in_dev;
|
||||
struct neighbour *neigh;
|
||||
|
@ -4105,6 +4076,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
struct fib_nh *nh;
|
||||
struct flowi4 fl4;
|
||||
int err;
|
||||
u32 mtu;
|
||||
|
||||
dev = dev_get_by_index_rcu(net, params->ifindex);
|
||||
if (unlikely(!dev))
|
||||
|
@ -4156,6 +4128,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
if (res.fi->fib_nhs > 1)
|
||||
fib_select_path(net, &res, &fl4, NULL);
|
||||
|
||||
if (check_mtu) {
|
||||
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
|
||||
if (params->tot_len > mtu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nh = &res.fi->fib_nh[res.nh_sel];
|
||||
|
||||
/* do not handle lwt encaps right now */
|
||||
|
@ -4184,7 +4162,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
||||
u32 flags)
|
||||
u32 flags, bool check_mtu)
|
||||
{
|
||||
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
|
||||
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
|
||||
|
@ -4195,6 +4173,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
struct flowi6 fl6;
|
||||
int strict = 0;
|
||||
int oif;
|
||||
u32 mtu;
|
||||
|
||||
/* link local addresses are never forwarded */
|
||||
if (rt6_need_strict(dst) || rt6_need_strict(src))
|
||||
|
@ -4257,6 +4236,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
|
|||
fl6.flowi6_oif, NULL,
|
||||
strict);
|
||||
|
||||
if (check_mtu) {
|
||||
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
|
||||
if (params->tot_len > mtu)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (f6i->fib6_nh.nh_lwtstate)
|
||||
return 0;
|
||||
|
||||
|
@ -4289,12 +4274,12 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
|
|||
#if IS_ENABLED(CONFIG_INET)
|
||||
case AF_INET:
|
||||
return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
|
||||
flags);
|
||||
flags, true);
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
|
||||
flags);
|
||||
flags, true);
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
|
@ -4313,20 +4298,34 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
|
|||
BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
|
||||
struct bpf_fib_lookup *, params, int, plen, u32, flags)
|
||||
{
|
||||
struct net *net = dev_net(skb->dev);
|
||||
int index = 0;
|
||||
|
||||
if (plen < sizeof(*params))
|
||||
return -EINVAL;
|
||||
|
||||
switch (params->family) {
|
||||
#if IS_ENABLED(CONFIG_INET)
|
||||
case AF_INET:
|
||||
return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
|
||||
index = bpf_ipv4_fib_lookup(net, params, flags, false);
|
||||
break;
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
case AF_INET6:
|
||||
return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
|
||||
index = bpf_ipv6_fib_lookup(net, params, flags, false);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (index > 0) {
|
||||
struct net_device *dev;
|
||||
|
||||
dev = dev_get_by_index_rcu(net, index);
|
||||
if (!is_skb_forwardable(dev, skb))
|
||||
index = 0;
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
|
||||
|
@ -4339,6 +4338,264 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
|
|||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
|
||||
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
|
||||
{
|
||||
int err;
|
||||
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
|
||||
|
||||
if (!seg6_validate_srh(srh, len))
|
||||
return -EINVAL;
|
||||
|
||||
switch (type) {
|
||||
case BPF_LWT_ENCAP_SEG6_INLINE:
|
||||
if (skb->protocol != htons(ETH_P_IPV6))
|
||||
return -EBADMSG;
|
||||
|
||||
err = seg6_do_srh_inline(skb, srh);
|
||||
break;
|
||||
case BPF_LWT_ENCAP_SEG6:
|
||||
skb_reset_inner_headers(skb);
|
||||
skb->encapsulation = 1;
|
||||
err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bpf_compute_data_pointers(skb);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
||||
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
||||
|
||||
return seg6_lookup_nexthop(skb, NULL, 0);
|
||||
}
|
||||
#endif /* CONFIG_IPV6_SEG6_BPF */
|
||||
|
||||
BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
|
||||
u32, len)
|
||||
{
|
||||
switch (type) {
|
||||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
|
||||
case BPF_LWT_ENCAP_SEG6:
|
||||
case BPF_LWT_ENCAP_SEG6_INLINE:
|
||||
return bpf_push_seg6_encap(skb, type, hdr, len);
|
||||
#endif
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
|
||||
.func = bpf_lwt_push_encap,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
|
||||
const void *, from, u32, len)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
void *srh_tlvs, *srh_end, *ptr;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
int srhoff = 0;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
|
||||
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
|
||||
|
||||
ptr = skb->data + offset;
|
||||
if (ptr >= srh_tlvs && ptr + len <= srh_end)
|
||||
srh_state->valid = 0;
|
||||
else if (ptr < (void *)&srh->flags ||
|
||||
ptr + len > (void *)&srh->segments)
|
||||
return -EFAULT;
|
||||
|
||||
if (unlikely(bpf_try_make_writable(skb, offset + len)))
|
||||
return -EFAULT;
|
||||
|
||||
memcpy(skb->data + offset, from, len);
|
||||
return 0;
|
||||
#else /* CONFIG_IPV6_SEG6_BPF */
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
|
||||
.func = bpf_lwt_seg6_store_bytes,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE
|
||||
};
|
||||
|
||||
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
|
||||
u32, action, void *, param, u32, param_len)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
struct ipv6_sr_hdr *srh;
|
||||
int srhoff = 0;
|
||||
int err;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
|
||||
if (!srh_state->valid) {
|
||||
if (unlikely((srh_state->hdrlen & 7) != 0))
|
||||
return -EBADMSG;
|
||||
|
||||
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
|
||||
if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
|
||||
return -EBADMSG;
|
||||
|
||||
srh_state->valid = 1;
|
||||
}
|
||||
|
||||
switch (action) {
|
||||
case SEG6_LOCAL_ACTION_END_X:
|
||||
if (param_len != sizeof(struct in6_addr))
|
||||
return -EINVAL;
|
||||
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
|
||||
case SEG6_LOCAL_ACTION_END_T:
|
||||
if (param_len != sizeof(int))
|
||||
return -EINVAL;
|
||||
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
|
||||
case SEG6_LOCAL_ACTION_END_B6:
|
||||
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
|
||||
param, param_len);
|
||||
if (!err)
|
||||
srh_state->hdrlen =
|
||||
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
|
||||
return err;
|
||||
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
|
||||
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
|
||||
param, param_len);
|
||||
if (!err)
|
||||
srh_state->hdrlen =
|
||||
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
|
||||
return err;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
#else /* CONFIG_IPV6_SEG6_BPF */
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
|
||||
.func = bpf_lwt_seg6_action,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM,
|
||||
.arg4_type = ARG_CONST_SIZE
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
|
||||
s32, len)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
void *srh_end, *srh_tlvs, *ptr;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
struct ipv6hdr *hdr;
|
||||
int srhoff = 0;
|
||||
int ret;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
return -EINVAL;
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
|
||||
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
|
||||
((srh->first_segment + 1) << 4));
|
||||
srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
|
||||
srh_state->hdrlen);
|
||||
ptr = skb->data + offset;
|
||||
|
||||
if (unlikely(ptr < srh_tlvs || ptr > srh_end))
|
||||
return -EFAULT;
|
||||
if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
|
||||
return -EFAULT;
|
||||
|
||||
if (len > 0) {
|
||||
ret = skb_cow_head(skb, len);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
ret = bpf_skb_net_hdr_push(skb, offset, len);
|
||||
} else {
|
||||
ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
|
||||
}
|
||||
|
||||
bpf_compute_data_pointers(skb);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
hdr = (struct ipv6hdr *)skb->data;
|
||||
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
||||
|
||||
srh_state->hdrlen += len;
|
||||
srh_state->valid = 0;
|
||||
return 0;
|
||||
#else /* CONFIG_IPV6_SEG6_BPF */
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
|
||||
.func = bpf_lwt_seg6_adjust_srh,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_CTX,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
bool bpf_helper_changes_pkt_data(void *func)
|
||||
{
|
||||
if (func == bpf_skb_vlan_push ||
|
||||
func == bpf_skb_vlan_pop ||
|
||||
func == bpf_skb_store_bytes ||
|
||||
func == bpf_skb_change_proto ||
|
||||
func == bpf_skb_change_head ||
|
||||
func == bpf_skb_change_tail ||
|
||||
func == bpf_skb_adjust_room ||
|
||||
func == bpf_skb_pull_data ||
|
||||
func == bpf_clone_redirect ||
|
||||
func == bpf_l3_csum_replace ||
|
||||
func == bpf_l4_csum_replace ||
|
||||
func == bpf_xdp_adjust_head ||
|
||||
func == bpf_xdp_adjust_meta ||
|
||||
func == bpf_msg_pull_data ||
|
||||
func == bpf_xdp_adjust_tail ||
|
||||
func == bpf_lwt_push_encap ||
|
||||
func == bpf_lwt_seg6_store_bytes ||
|
||||
func == bpf_lwt_seg6_adjust_srh ||
|
||||
func == bpf_lwt_seg6_action
|
||||
)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_base_func_proto(enum bpf_func_id func_id)
|
||||
{
|
||||
|
@ -4522,33 +4779,6 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_skb_load_bytes:
|
||||
return &bpf_skb_load_bytes_proto;
|
||||
case BPF_FUNC_skb_pull_data:
|
||||
return &bpf_skb_pull_data_proto;
|
||||
case BPF_FUNC_csum_diff:
|
||||
return &bpf_csum_diff_proto;
|
||||
case BPF_FUNC_get_cgroup_classid:
|
||||
return &bpf_get_cgroup_classid_proto;
|
||||
case BPF_FUNC_get_route_realm:
|
||||
return &bpf_get_route_realm_proto;
|
||||
case BPF_FUNC_get_hash_recalc:
|
||||
return &bpf_get_hash_recalc_proto;
|
||||
case BPF_FUNC_perf_event_output:
|
||||
return &bpf_skb_event_output_proto;
|
||||
case BPF_FUNC_get_smp_processor_id:
|
||||
return &bpf_get_smp_processor_id_proto;
|
||||
case BPF_FUNC_skb_under_cgroup:
|
||||
return &bpf_skb_under_cgroup_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -4614,6 +4844,44 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_skb_load_bytes:
|
||||
return &bpf_skb_load_bytes_proto;
|
||||
case BPF_FUNC_skb_pull_data:
|
||||
return &bpf_skb_pull_data_proto;
|
||||
case BPF_FUNC_csum_diff:
|
||||
return &bpf_csum_diff_proto;
|
||||
case BPF_FUNC_get_cgroup_classid:
|
||||
return &bpf_get_cgroup_classid_proto;
|
||||
case BPF_FUNC_get_route_realm:
|
||||
return &bpf_get_route_realm_proto;
|
||||
case BPF_FUNC_get_hash_recalc:
|
||||
return &bpf_get_hash_recalc_proto;
|
||||
case BPF_FUNC_perf_event_output:
|
||||
return &bpf_skb_event_output_proto;
|
||||
case BPF_FUNC_get_smp_processor_id:
|
||||
return &bpf_get_smp_processor_id_proto;
|
||||
case BPF_FUNC_skb_under_cgroup:
|
||||
return &bpf_skb_under_cgroup_proto;
|
||||
default:
|
||||
return bpf_base_func_proto(func_id);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_lwt_push_encap:
|
||||
return &bpf_lwt_push_encap_proto;
|
||||
default:
|
||||
return lwt_out_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -4645,7 +4913,22 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
|||
case BPF_FUNC_set_hash_invalid:
|
||||
return &bpf_set_hash_invalid_proto;
|
||||
default:
|
||||
return lwt_inout_func_proto(func_id, prog);
|
||||
return lwt_out_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
{
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_lwt_seg6_store_bytes:
|
||||
return &bpf_lwt_seg6_store_bytes_proto;
|
||||
case BPF_FUNC_lwt_seg6_action:
|
||||
return &bpf_lwt_seg6_action_proto;
|
||||
case BPF_FUNC_lwt_seg6_adjust_srh:
|
||||
return &bpf_lwt_seg6_adjust_srh_proto;
|
||||
default:
|
||||
return lwt_out_func_proto(func_id, prog);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4753,7 +5036,6 @@ static bool lwt_is_valid_access(int off, int size,
|
|||
return bpf_skb_is_valid_access(off, size, type, prog, info);
|
||||
}
|
||||
|
||||
|
||||
/* Attach type specific accesses */
|
||||
static bool __sock_filter_check_attach_type(int off,
|
||||
enum bpf_access_type access_type,
|
||||
|
@ -5155,18 +5437,23 @@ static bool sk_msg_is_valid_access(int off, int size,
|
|||
switch (off) {
|
||||
case offsetof(struct sk_msg_md, data):
|
||||
info->reg_type = PTR_TO_PACKET;
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
break;
|
||||
case offsetof(struct sk_msg_md, data_end):
|
||||
info->reg_type = PTR_TO_PACKET_END;
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
break;
|
||||
default:
|
||||
if (size != sizeof(__u32))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (off < 0 || off >= sizeof(struct sk_msg_md))
|
||||
return false;
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
if (size != sizeof(__u64))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -5842,7 +6129,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
|||
break;
|
||||
|
||||
case offsetof(struct bpf_sock_ops, local_ip4):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
|
||||
skc_rcv_saddr) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct bpf_sock_ops_kern, sk),
|
||||
|
@ -6159,6 +6447,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
|||
struct bpf_prog *prog, u32 *target_size)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
int off;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct sk_msg_md, data):
|
||||
|
@ -6171,6 +6460,107 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
|
|||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, data_end));
|
||||
break;
|
||||
case offsetof(struct sk_msg_md, family):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_family));
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, remote_ip4):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_daddr));
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, local_ip4):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
|
||||
skc_rcv_saddr) != 4);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_rcv_saddr));
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, remote_ip6[0]) ...
|
||||
offsetof(struct sk_msg_md, remote_ip6[3]):
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
|
||||
skc_v6_daddr.s6_addr32[0]) != 4);
|
||||
|
||||
off = si->off;
|
||||
off -= offsetof(struct sk_msg_md, remote_ip6[0]);
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_v6_daddr.s6_addr32[0]) +
|
||||
off);
|
||||
#else
|
||||
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, local_ip6[0]) ...
|
||||
offsetof(struct sk_msg_md, local_ip6[3]):
|
||||
#if IS_ENABLED(CONFIG_IPV6)
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
|
||||
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
|
||||
|
||||
off = si->off;
|
||||
off -= offsetof(struct sk_msg_md, local_ip6[0]);
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common,
|
||||
skc_v6_rcv_saddr.s6_addr32[0]) +
|
||||
off);
|
||||
#else
|
||||
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, remote_port):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_dport));
|
||||
#ifndef __BIG_ENDIAN_BITFIELD
|
||||
*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
|
||||
#endif
|
||||
break;
|
||||
|
||||
case offsetof(struct sk_msg_md, local_port):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
|
||||
struct sk_msg_buff, sk),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct sk_msg_buff, sk));
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
|
||||
offsetof(struct sock_common, skc_num));
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
|
@ -6219,13 +6609,23 @@ const struct bpf_prog_ops cg_skb_prog_ops = {
|
|||
.test_run = bpf_prog_test_run_skb,
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops lwt_inout_verifier_ops = {
|
||||
.get_func_proto = lwt_inout_func_proto,
|
||||
const struct bpf_verifier_ops lwt_in_verifier_ops = {
|
||||
.get_func_proto = lwt_in_func_proto,
|
||||
.is_valid_access = lwt_is_valid_access,
|
||||
.convert_ctx_access = bpf_convert_ctx_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops lwt_inout_prog_ops = {
|
||||
const struct bpf_prog_ops lwt_in_prog_ops = {
|
||||
.test_run = bpf_prog_test_run_skb,
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops lwt_out_verifier_ops = {
|
||||
.get_func_proto = lwt_out_func_proto,
|
||||
.is_valid_access = lwt_is_valid_access,
|
||||
.convert_ctx_access = bpf_convert_ctx_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops lwt_out_prog_ops = {
|
||||
.test_run = bpf_prog_test_run_skb,
|
||||
};
|
||||
|
||||
|
@ -6240,6 +6640,16 @@ const struct bpf_prog_ops lwt_xmit_prog_ops = {
|
|||
.test_run = bpf_prog_test_run_skb,
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
|
||||
.get_func_proto = lwt_seg6local_func_proto,
|
||||
.is_valid_access = lwt_is_valid_access,
|
||||
.convert_ctx_access = bpf_convert_ctx_access,
|
||||
};
|
||||
|
||||
const struct bpf_prog_ops lwt_seg6local_prog_ops = {
|
||||
.test_run = bpf_prog_test_run_skb,
|
||||
};
|
||||
|
||||
const struct bpf_verifier_ops cg_sock_verifier_ops = {
|
||||
.get_func_proto = sock_filter_func_proto,
|
||||
.is_valid_access = sock_filter_is_valid_access,
|
||||
|
|
|
@ -308,7 +308,13 @@ err:
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
|
||||
|
||||
static void xdp_return(void *data, struct xdp_mem_info *mem)
|
||||
/* XDP RX runs under NAPI protection, and in different delivery error
|
||||
* scenarios (e.g. queue full), it is possible to return the xdp_frame
|
||||
* while still leveraging this protection. The @napi_direct boolian
|
||||
* is used for those calls sites. Thus, allowing for faster recycling
|
||||
* of xdp_frames/pages in those cases.
|
||||
*/
|
||||
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
|
||||
{
|
||||
struct xdp_mem_allocator *xa;
|
||||
struct page *page;
|
||||
|
@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
|
|||
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
|
||||
page = virt_to_head_page(data);
|
||||
if (xa)
|
||||
page_pool_put_page(xa->page_pool, page);
|
||||
page_pool_put_page(xa->page_pool, page, napi_direct);
|
||||
else
|
||||
put_page(page);
|
||||
rcu_read_unlock();
|
||||
|
@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
|
|||
|
||||
void xdp_return_frame(struct xdp_frame *xdpf)
|
||||
{
|
||||
xdp_return(xdpf->data, &xdpf->mem);
|
||||
__xdp_return(xdpf->data, &xdpf->mem, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_frame);
|
||||
|
||||
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
|
||||
{
|
||||
__xdp_return(xdpf->data, &xdpf->mem, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
|
||||
|
||||
void xdp_return_buff(struct xdp_buff *xdp)
|
||||
{
|
||||
xdp_return(xdp->data, &xdp->rxq->mem);
|
||||
__xdp_return(xdp->data, &xdp->rxq->mem, true);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(xdp_return_buff);
|
||||
|
|
|
@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* MTU selection:
|
||||
* 1. mtu on route is locked - use it
|
||||
* 2. mtu from nexthop exception
|
||||
* 3. mtu from egress device
|
||||
*/
|
||||
|
||||
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
|
||||
{
|
||||
struct fib_info *fi = res->fi;
|
||||
struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
|
||||
struct net_device *dev = nh->nh_dev;
|
||||
u32 mtu = 0;
|
||||
|
||||
if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
|
||||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
|
||||
mtu = fi->fib_mtu;
|
||||
|
||||
if (likely(!mtu)) {
|
||||
struct fib_nh_exception *fnhe;
|
||||
|
||||
fnhe = find_exception(nh, daddr);
|
||||
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
|
||||
mtu = fnhe->fnhe_pmtu;
|
||||
}
|
||||
|
||||
if (likely(!mtu))
|
||||
mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
|
||||
|
||||
return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
|
||||
}
|
||||
|
||||
static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
|
||||
__be32 daddr, const bool do_cache)
|
||||
{
|
||||
|
|
|
@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config IPV6_SEG6_BPF
|
||||
def_bool y
|
||||
depends on IPV6_SEG6_LWTUNNEL
|
||||
depends on IPV6 = y
|
||||
|
||||
endif # IPV6
|
||||
|
|
|
@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
|
|||
return f6i;
|
||||
}
|
||||
|
||||
static u32
|
||||
eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
|
||||
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
|
||||
.fib6_get_table = eafnosupport_fib6_get_table,
|
||||
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
|
||||
.fib6_lookup = eafnosupport_fib6_lookup,
|
||||
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
|
||||
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(ipv6_stub);
|
||||
|
||||
|
|
|
@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
|
|||
.fib6_table_lookup = fib6_table_lookup,
|
||||
.fib6_lookup = fib6_lookup,
|
||||
.fib6_multipath_select = fib6_multipath_select,
|
||||
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
|
||||
.udpv6_encap_enable = udpv6_encap_enable,
|
||||
.ndisc_send_na = ndisc_send_na,
|
||||
.nd_tbl = &nd_tbl,
|
||||
|
|
|
@ -2604,6 +2604,54 @@ out:
|
|||
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
|
||||
}
|
||||
|
||||
/* MTU selection:
|
||||
* 1. mtu on route is locked - use it
|
||||
* 2. mtu from nexthop exception
|
||||
* 3. mtu from egress device
|
||||
*
|
||||
* based on ip6_dst_mtu_forward and exception logic of
|
||||
* rt6_find_cached_rt; called with rcu_read_lock
|
||||
*/
|
||||
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
|
||||
struct in6_addr *saddr)
|
||||
{
|
||||
struct rt6_exception_bucket *bucket;
|
||||
struct rt6_exception *rt6_ex;
|
||||
struct in6_addr *src_key;
|
||||
struct inet6_dev *idev;
|
||||
u32 mtu = 0;
|
||||
|
||||
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
|
||||
mtu = f6i->fib6_pmtu;
|
||||
if (mtu)
|
||||
goto out;
|
||||
}
|
||||
|
||||
src_key = NULL;
|
||||
#ifdef CONFIG_IPV6_SUBTREES
|
||||
if (f6i->fib6_src.plen)
|
||||
src_key = saddr;
|
||||
#endif
|
||||
|
||||
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
|
||||
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
|
||||
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
|
||||
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
|
||||
|
||||
if (likely(!mtu)) {
|
||||
struct net_device *dev = fib6_info_nh_dev(f6i);
|
||||
|
||||
mtu = IPV6_MIN_MTU;
|
||||
idev = __in6_dev_get(dev);
|
||||
if (idev && idev->cnf.mtu6 > mtu)
|
||||
mtu = idev->cnf.mtu6;
|
||||
}
|
||||
|
||||
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
|
||||
out:
|
||||
return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
|
||||
}
|
||||
|
||||
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
|
||||
struct flowi6 *fl6)
|
||||
{
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
/*
|
||||
* SR-IPv6 implementation
|
||||
*
|
||||
* Author:
|
||||
* Authors:
|
||||
* David Lebrun <david.lebrun@uclouvain.be>
|
||||
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
|
||||
*
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
|
@ -30,7 +31,9 @@
|
|||
#ifdef CONFIG_IPV6_SEG6_HMAC
|
||||
#include <net/seg6_hmac.h>
|
||||
#endif
|
||||
#include <net/seg6_local.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
struct seg6_local_lwt;
|
||||
|
||||
|
@ -41,6 +44,11 @@ struct seg6_action_desc {
|
|||
int static_headroom;
|
||||
};
|
||||
|
||||
struct bpf_lwt_prog {
|
||||
struct bpf_prog *prog;
|
||||
char *name;
|
||||
};
|
||||
|
||||
struct seg6_local_lwt {
|
||||
int action;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
|
@ -49,6 +57,7 @@ struct seg6_local_lwt {
|
|||
struct in6_addr nh6;
|
||||
int iif;
|
||||
int oif;
|
||||
struct bpf_lwt_prog bpf;
|
||||
|
||||
int headroom;
|
||||
struct seg6_action_desc *desc;
|
||||
|
@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
|
|||
*daddr = *addr;
|
||||
}
|
||||
|
||||
static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
|
||||
u32 tbl_id)
|
||||
int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
|
||||
u32 tbl_id)
|
||||
{
|
||||
struct net *net = dev_net(skb->dev);
|
||||
struct ipv6hdr *hdr = ipv6_hdr(skb);
|
||||
|
@ -187,6 +196,7 @@ out:
|
|||
|
||||
skb_dst_drop(skb);
|
||||
skb_dst_set(skb, dst);
|
||||
return dst->error;
|
||||
}
|
||||
|
||||
/* regular endpoint function */
|
||||
|
@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|||
|
||||
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
||||
|
||||
lookup_nexthop(skb, NULL, 0);
|
||||
seg6_lookup_nexthop(skb, NULL, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|||
|
||||
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
||||
|
||||
lookup_nexthop(skb, &slwt->nh6, 0);
|
||||
seg6_lookup_nexthop(skb, &slwt->nh6, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|||
|
||||
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
||||
|
||||
lookup_nexthop(skb, NULL, slwt->table);
|
||||
seg6_lookup_nexthop(skb, NULL, slwt->table);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
|
|||
if (!ipv6_addr_any(&slwt->nh6))
|
||||
nhaddr = &slwt->nh6;
|
||||
|
||||
lookup_nexthop(skb, nhaddr, 0);
|
||||
seg6_lookup_nexthop(skb, nhaddr, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
drop:
|
||||
|
@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
|
|||
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
|
||||
goto drop;
|
||||
|
||||
lookup_nexthop(skb, NULL, slwt->table);
|
||||
seg6_lookup_nexthop(skb, NULL, slwt->table);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
|||
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
||||
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
||||
|
||||
lookup_nexthop(skb, NULL, 0);
|
||||
seg6_lookup_nexthop(skb, NULL, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
|
|||
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
|
||||
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
|
||||
|
||||
lookup_nexthop(skb, NULL, 0);
|
||||
seg6_lookup_nexthop(skb, NULL, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
|
@ -447,6 +457,71 @@ drop:
|
|||
return err;
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
|
||||
|
||||
static int input_action_end_bpf(struct sk_buff *skb,
|
||||
struct seg6_local_lwt *slwt)
|
||||
{
|
||||
struct seg6_bpf_srh_state *srh_state =
|
||||
this_cpu_ptr(&seg6_bpf_srh_states);
|
||||
struct seg6_bpf_srh_state local_srh_state;
|
||||
struct ipv6_sr_hdr *srh;
|
||||
int srhoff = 0;
|
||||
int ret;
|
||||
|
||||
srh = get_and_validate_srh(skb);
|
||||
if (!srh)
|
||||
goto drop;
|
||||
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
|
||||
|
||||
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
|
||||
* which is also accessed by the bpf_lwt_seg6_* helpers
|
||||
*/
|
||||
preempt_disable();
|
||||
srh_state->hdrlen = srh->hdrlen << 3;
|
||||
srh_state->valid = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
bpf_compute_data_pointers(skb);
|
||||
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
|
||||
rcu_read_unlock();
|
||||
|
||||
local_srh_state = *srh_state;
|
||||
preempt_enable();
|
||||
|
||||
switch (ret) {
|
||||
case BPF_OK:
|
||||
case BPF_REDIRECT:
|
||||
break;
|
||||
case BPF_DROP:
|
||||
goto drop;
|
||||
default:
|
||||
pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
|
||||
goto drop;
|
||||
}
|
||||
|
||||
if (unlikely((local_srh_state.hdrlen & 7) != 0))
|
||||
goto drop;
|
||||
|
||||
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
|
||||
goto drop;
|
||||
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
|
||||
srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
|
||||
|
||||
if (!local_srh_state.valid &&
|
||||
unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
|
||||
goto drop;
|
||||
|
||||
if (ret != BPF_REDIRECT)
|
||||
seg6_lookup_nexthop(skb, NULL, 0);
|
||||
|
||||
return dst_input(skb);
|
||||
|
||||
drop:
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct seg6_action_desc seg6_action_table[] = {
|
||||
{
|
||||
.action = SEG6_LOCAL_ACTION_END,
|
||||
|
@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
|
|||
.attrs = (1 << SEG6_LOCAL_SRH),
|
||||
.input = input_action_end_b6_encap,
|
||||
.static_headroom = sizeof(struct ipv6hdr),
|
||||
}
|
||||
},
|
||||
{
|
||||
.action = SEG6_LOCAL_ACTION_END_BPF,
|
||||
.attrs = (1 << SEG6_LOCAL_BPF),
|
||||
.input = input_action_end_bpf,
|
||||
},
|
||||
|
||||
};
|
||||
|
||||
static struct seg6_action_desc *__get_action_desc(int action)
|
||||
|
@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
|
|||
.len = sizeof(struct in6_addr) },
|
||||
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
|
||||
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
|
||||
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
|
||||
};
|
||||
|
||||
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
||||
|
@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
|||
return 0;
|
||||
}
|
||||
|
||||
#define MAX_PROG_NAME 256
|
||||
static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
|
||||
[SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
|
||||
[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
|
||||
.len = MAX_PROG_NAME },
|
||||
};
|
||||
|
||||
static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
||||
{
|
||||
struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
|
||||
struct bpf_prog *p;
|
||||
int ret;
|
||||
u32 fd;
|
||||
|
||||
ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
|
||||
attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
|
||||
return -EINVAL;
|
||||
|
||||
slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
|
||||
if (!slwt->bpf.name)
|
||||
return -ENOMEM;
|
||||
|
||||
fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
|
||||
p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
|
||||
if (IS_ERR(p)) {
|
||||
kfree(slwt->bpf.name);
|
||||
return PTR_ERR(p);
|
||||
}
|
||||
|
||||
slwt->bpf.prog = p;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
|
||||
{
|
||||
struct nlattr *nest;
|
||||
|
||||
if (!slwt->bpf.prog)
|
||||
return 0;
|
||||
|
||||
nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
|
||||
if (!nest)
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
|
||||
return -EMSGSIZE;
|
||||
|
||||
if (slwt->bpf.name &&
|
||||
nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
|
||||
return -EMSGSIZE;
|
||||
|
||||
return nla_nest_end(skb, nest);
|
||||
}
|
||||
|
||||
static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
|
||||
{
|
||||
if (!a->bpf.name && !b->bpf.name)
|
||||
return 0;
|
||||
|
||||
if (!a->bpf.name || !b->bpf.name)
|
||||
return 1;
|
||||
|
||||
return strcmp(a->bpf.name, b->bpf.name);
|
||||
}
|
||||
|
||||
struct seg6_action_param {
|
||||
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
|
||||
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
|
||||
|
@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
|
|||
[SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
|
||||
.put = put_nla_oif,
|
||||
.cmp = cmp_nla_oif },
|
||||
|
||||
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
|
||||
.put = put_nla_bpf,
|
||||
.cmp = cmp_nla_bpf },
|
||||
|
||||
};
|
||||
|
||||
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
|
||||
|
@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
|
|||
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
|
||||
|
||||
kfree(slwt->srh);
|
||||
|
||||
if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
|
||||
kfree(slwt->bpf.name);
|
||||
bpf_prog_put(slwt->bpf.prog);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static int seg6_local_fill_encap(struct sk_buff *skb,
|
||||
|
@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
|
|||
if (attrs & (1 << SEG6_LOCAL_OIF))
|
||||
nlsize += nla_total_size(4);
|
||||
|
||||
if (attrs & (1 << SEG6_LOCAL_BPF))
|
||||
nlsize += nla_total_size(sizeof(struct nlattr)) +
|
||||
nla_total_size(MAX_PROG_NAME) +
|
||||
nla_total_size(4);
|
||||
|
||||
return nlsize;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,2 +1 @@
|
|||
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
|
||||
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* XDP user-space packet buffer
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
|
@ -25,39 +16,25 @@
|
|||
|
||||
#define XDP_UMEM_MIN_FRAME_SIZE 2048
|
||||
|
||||
int xdp_umem_create(struct xdp_umem **umem)
|
||||
{
|
||||
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
|
||||
|
||||
if (!(*umem))
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
if (umem->pgs) {
|
||||
for (i = 0; i < umem->npgs; i++) {
|
||||
struct page *page = umem->pgs[i];
|
||||
for (i = 0; i < umem->npgs; i++) {
|
||||
struct page *page = umem->pgs[i];
|
||||
|
||||
set_page_dirty_lock(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
kfree(umem->pgs);
|
||||
umem->pgs = NULL;
|
||||
set_page_dirty_lock(page);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
kfree(umem->pgs);
|
||||
umem->pgs = NULL;
|
||||
}
|
||||
|
||||
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
|
||||
{
|
||||
if (umem->user) {
|
||||
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
|
||||
free_uid(umem->user);
|
||||
}
|
||||
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
|
||||
free_uid(umem->user);
|
||||
}
|
||||
|
||||
static void xdp_umem_release(struct xdp_umem *umem)
|
||||
|
@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
|
|||
umem->cq = NULL;
|
||||
}
|
||||
|
||||
if (umem->pgs) {
|
||||
xdp_umem_unpin_pages(umem);
|
||||
xdp_umem_unpin_pages(umem);
|
||||
|
||||
task = get_pid_task(umem->pid, PIDTYPE_PID);
|
||||
put_pid(umem->pid);
|
||||
if (!task)
|
||||
goto out;
|
||||
mm = get_task_mm(task);
|
||||
put_task_struct(task);
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
mmput(mm);
|
||||
umem->pgs = NULL;
|
||||
}
|
||||
task = get_pid_task(umem->pid, PIDTYPE_PID);
|
||||
put_pid(umem->pid);
|
||||
if (!task)
|
||||
goto out;
|
||||
mm = get_task_mm(task);
|
||||
put_task_struct(task);
|
||||
if (!mm)
|
||||
goto out;
|
||||
|
||||
mmput(mm);
|
||||
xdp_umem_unaccount_pages(umem);
|
||||
out:
|
||||
kfree(umem);
|
||||
|
@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
|
|||
|
||||
void xdp_get_umem(struct xdp_umem *umem)
|
||||
{
|
||||
atomic_inc(&umem->users);
|
||||
refcount_inc(&umem->users);
|
||||
}
|
||||
|
||||
void xdp_put_umem(struct xdp_umem *umem)
|
||||
|
@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
|
|||
if (!umem)
|
||||
return;
|
||||
|
||||
if (atomic_dec_and_test(&umem->users)) {
|
||||
if (refcount_dec_and_test(&umem->users)) {
|
||||
INIT_WORK(&umem->work, xdp_umem_release_deferred);
|
||||
schedule_work(&umem->work);
|
||||
}
|
||||
|
@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
||||
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
||||
{
|
||||
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
|
||||
u64 addr = mr->addr, size = mr->len;
|
||||
unsigned int nframes, nfpp;
|
||||
int size_chk, err;
|
||||
|
||||
if (!umem)
|
||||
return -EINVAL;
|
||||
|
||||
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
|
||||
/* Strictly speaking we could support this, if:
|
||||
* - huge pages, or*
|
||||
|
@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
|
|||
umem->frame_size_log2 = ilog2(frame_size);
|
||||
umem->nfpp_mask = nfpp - 1;
|
||||
umem->nfpplog2 = ilog2(nfpp);
|
||||
atomic_set(&umem->users, 1);
|
||||
refcount_set(&umem->users, 1);
|
||||
|
||||
err = xdp_umem_account_pages(umem);
|
||||
if (err)
|
||||
|
@ -254,7 +224,25 @@ out:
|
|||
return err;
|
||||
}
|
||||
|
||||
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
|
||||
{
|
||||
struct xdp_umem *umem;
|
||||
int err;
|
||||
|
||||
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
|
||||
if (!umem)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
err = xdp_umem_reg(umem, mr);
|
||||
if (err) {
|
||||
kfree(umem);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
return umem;
|
||||
}
|
||||
|
||||
bool xdp_umem_validate_queues(struct xdp_umem *umem)
|
||||
{
|
||||
return (umem->fq && umem->cq);
|
||||
return umem->fq && umem->cq;
|
||||
}
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
* XDP user-space packet buffer
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* XDP user-space packet buffer
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef XDP_UMEM_H_
|
||||
|
@ -36,7 +27,7 @@ struct xdp_umem {
|
|||
struct pid *pid;
|
||||
unsigned long address;
|
||||
size_t size;
|
||||
atomic_t users;
|
||||
refcount_t users;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
|
@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
|
|||
}
|
||||
|
||||
bool xdp_umem_validate_queues(struct xdp_umem *umem);
|
||||
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
|
||||
void xdp_get_umem(struct xdp_umem *umem);
|
||||
void xdp_put_umem(struct xdp_umem *umem);
|
||||
int xdp_umem_create(struct xdp_umem **umem);
|
||||
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
|
||||
|
||||
#endif /* XDP_UMEM_H_ */
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
* XDP user-space packet buffer
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* XDP user-space packet buffer
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef XDP_UMEM_PROPS_H_
|
||||
|
|
152
net/xdp/xsk.c
152
net/xdp/xsk.c
|
@ -5,15 +5,6 @@
|
|||
* applications.
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*
|
||||
* Author(s): Björn Töpel <bjorn.topel@intel.com>
|
||||
* Magnus Karlsson <magnus.karlsson@intel.com>
|
||||
*/
|
||||
|
@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
|
|||
goto out;
|
||||
}
|
||||
|
||||
if (xs->queue_id >= xs->dev->real_num_tx_queues) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
|
||||
if (unlikely(!skb)) {
|
||||
err = -EAGAIN;
|
||||
|
@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
|
|||
if (!q)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Make sure queue is ready before it can be seen by others */
|
||||
smp_wmb();
|
||||
*queue = q;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __xsk_release(struct xdp_sock *xs)
|
||||
{
|
||||
/* Wait for driver to stop using the xdp socket. */
|
||||
synchronize_net();
|
||||
|
||||
dev_put(xs->dev);
|
||||
}
|
||||
|
||||
static int xsk_release(struct socket *sock)
|
||||
{
|
||||
struct sock *sk = sock->sk;
|
||||
|
@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock)
|
|||
local_bh_enable();
|
||||
|
||||
if (xs->dev) {
|
||||
__xsk_release(xs);
|
||||
/* Wait for driver to stop using the xdp socket. */
|
||||
synchronize_net();
|
||||
dev_put(xs->dev);
|
||||
xs->dev = NULL;
|
||||
}
|
||||
|
||||
|
@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
{
|
||||
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
|
||||
struct sock *sk = sock->sk;
|
||||
struct net_device *dev, *dev_curr;
|
||||
struct xdp_sock *xs = xdp_sk(sk);
|
||||
struct xdp_umem *old_umem = NULL;
|
||||
struct net_device *dev;
|
||||
int err = 0;
|
||||
|
||||
if (addr_len < sizeof(struct sockaddr_xdp))
|
||||
|
@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
return -EINVAL;
|
||||
|
||||
mutex_lock(&xs->mutex);
|
||||
dev_curr = xs->dev;
|
||||
if (xs->dev) {
|
||||
err = -EBUSY;
|
||||
goto out_release;
|
||||
}
|
||||
|
||||
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
|
||||
if (!dev) {
|
||||
err = -ENODEV;
|
||||
|
@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
|
||||
if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
|
||||
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
|
||||
err = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
}
|
||||
|
||||
xdp_get_umem(umem_xs->umem);
|
||||
old_umem = xs->umem;
|
||||
xs->umem = umem_xs->umem;
|
||||
sockfd_put(sock);
|
||||
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
|
||||
|
@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
|
|||
xskq_set_umem(xs->umem->cq, &xs->umem->props);
|
||||
}
|
||||
|
||||
/* Rebind? */
|
||||
if (dev_curr && (dev_curr != dev ||
|
||||
xs->queue_id != sxdp->sxdp_queue_id)) {
|
||||
__xsk_release(xs);
|
||||
if (old_umem)
|
||||
xdp_put_umem(old_umem);
|
||||
}
|
||||
|
||||
xs->dev = dev;
|
||||
xs->queue_id = sxdp->sxdp_queue_id;
|
||||
|
||||
|
@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
|
|||
struct xdp_umem_reg mr;
|
||||
struct xdp_umem *umem;
|
||||
|
||||
if (xs->umem)
|
||||
return -EBUSY;
|
||||
|
||||
if (copy_from_user(&mr, optval, sizeof(mr)))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&xs->mutex);
|
||||
err = xdp_umem_create(&umem);
|
||||
|
||||
err = xdp_umem_reg(umem, &mr);
|
||||
if (err) {
|
||||
kfree(umem);
|
||||
if (xs->umem) {
|
||||
mutex_unlock(&xs->mutex);
|
||||
return err;
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
umem = xdp_umem_create(&mr);
|
||||
if (IS_ERR(umem)) {
|
||||
mutex_unlock(&xs->mutex);
|
||||
return PTR_ERR(umem);
|
||||
}
|
||||
|
||||
/* Make sure umem is ready before it can be seen by others */
|
||||
smp_wmb();
|
||||
|
||||
xs->umem = umem;
|
||||
mutex_unlock(&xs->mutex);
|
||||
return 0;
|
||||
|
@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
|
|||
struct xsk_queue **q;
|
||||
int entries;
|
||||
|
||||
if (!xs->umem)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&entries, optval, sizeof(entries)))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&xs->mutex);
|
||||
if (!xs->umem) {
|
||||
mutex_unlock(&xs->mutex);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
|
||||
&xs->umem->cq;
|
||||
err = xsk_init_queue(entries, q, true);
|
||||
|
@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
|
|||
|
||||
return 0;
|
||||
}
|
||||
case XDP_MMAP_OFFSETS:
|
||||
{
|
||||
struct xdp_mmap_offsets off;
|
||||
|
||||
if (len < sizeof(off))
|
||||
return -EINVAL;
|
||||
|
||||
off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
|
||||
off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
|
||||
off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
|
||||
off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
|
||||
off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
|
||||
off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
|
||||
|
||||
off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
|
||||
off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
|
||||
off.fr.desc = offsetof(struct xdp_umem_ring, desc);
|
||||
off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
|
||||
off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
|
||||
off.cr.desc = offsetof(struct xdp_umem_ring, desc);
|
||||
|
||||
len = sizeof(off);
|
||||
if (copy_to_user(optval, &off, len))
|
||||
return -EFAULT;
|
||||
if (put_user(len, optlen))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
|
|||
unsigned long size = vma->vm_end - vma->vm_start;
|
||||
struct xdp_sock *xs = xdp_sk(sock->sk);
|
||||
struct xsk_queue *q = NULL;
|
||||
struct xdp_umem *umem;
|
||||
unsigned long pfn;
|
||||
struct page *qpg;
|
||||
|
||||
if (offset == XDP_PGOFF_RX_RING) {
|
||||
q = xs->rx;
|
||||
q = READ_ONCE(xs->rx);
|
||||
} else if (offset == XDP_PGOFF_TX_RING) {
|
||||
q = xs->tx;
|
||||
q = READ_ONCE(xs->tx);
|
||||
} else {
|
||||
if (!xs->umem)
|
||||
umem = READ_ONCE(xs->umem);
|
||||
if (!umem)
|
||||
return -EINVAL;
|
||||
|
||||
if (offset == XDP_UMEM_PGOFF_FILL_RING)
|
||||
q = xs->umem->fq;
|
||||
q = READ_ONCE(umem->fq);
|
||||
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
|
||||
q = xs->umem->cq;
|
||||
q = READ_ONCE(umem->cq);
|
||||
}
|
||||
|
||||
if (!q)
|
||||
|
@ -554,24 +572,24 @@ static struct proto xsk_proto = {
|
|||
};
|
||||
|
||||
static const struct proto_ops xsk_proto_ops = {
|
||||
.family = PF_XDP,
|
||||
.owner = THIS_MODULE,
|
||||
.release = xsk_release,
|
||||
.bind = xsk_bind,
|
||||
.connect = sock_no_connect,
|
||||
.socketpair = sock_no_socketpair,
|
||||
.accept = sock_no_accept,
|
||||
.getname = sock_no_getname,
|
||||
.poll = xsk_poll,
|
||||
.ioctl = sock_no_ioctl,
|
||||
.listen = sock_no_listen,
|
||||
.shutdown = sock_no_shutdown,
|
||||
.setsockopt = xsk_setsockopt,
|
||||
.getsockopt = xsk_getsockopt,
|
||||
.sendmsg = xsk_sendmsg,
|
||||
.recvmsg = sock_no_recvmsg,
|
||||
.mmap = xsk_mmap,
|
||||
.sendpage = sock_no_sendpage,
|
||||
.family = PF_XDP,
|
||||
.owner = THIS_MODULE,
|
||||
.release = xsk_release,
|
||||
.bind = xsk_bind,
|
||||
.connect = sock_no_connect,
|
||||
.socketpair = sock_no_socketpair,
|
||||
.accept = sock_no_accept,
|
||||
.getname = sock_no_getname,
|
||||
.poll = xsk_poll,
|
||||
.ioctl = sock_no_ioctl,
|
||||
.listen = sock_no_listen,
|
||||
.shutdown = sock_no_shutdown,
|
||||
.setsockopt = xsk_setsockopt,
|
||||
.getsockopt = xsk_getsockopt,
|
||||
.sendmsg = xsk_sendmsg,
|
||||
.recvmsg = sock_no_recvmsg,
|
||||
.mmap = xsk_mmap,
|
||||
.sendpage = sock_no_sendpage,
|
||||
};
|
||||
|
||||
static void xsk_destruct(struct sock *sk)
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* XDP user-space ring structure
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
|
@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
|
|||
|
||||
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
|
||||
{
|
||||
return (sizeof(struct xdp_ring) +
|
||||
q->nentries * sizeof(struct xdp_desc));
|
||||
return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
|
||||
}
|
||||
|
||||
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
|
||||
|
|
|
@ -1,15 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
* XDP user-space ring structure
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* XDP user-space ring structure
|
||||
* Copyright(c) 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_XSK_QUEUE_H
|
||||
|
@ -22,6 +13,23 @@
|
|||
|
||||
#define RX_BATCH_SIZE 16
|
||||
|
||||
struct xdp_ring {
|
||||
u32 producer ____cacheline_aligned_in_smp;
|
||||
u32 consumer ____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
||||
/* Used for the RX and TX queues for packets */
|
||||
struct xdp_rxtx_ring {
|
||||
struct xdp_ring ptrs;
|
||||
struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
||||
/* Used for the fill and completion queues for buffers */
|
||||
struct xdp_umem_ring {
|
||||
struct xdp_ring ptrs;
|
||||
u32 desc[0] ____cacheline_aligned_in_smp;
|
||||
};
|
||||
|
||||
struct xsk_queue {
|
||||
struct xdp_umem_props umem_props;
|
||||
u32 ring_mask;
|
||||
|
@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
|
|||
|
||||
static inline bool xskq_full_desc(struct xsk_queue *q)
|
||||
{
|
||||
return (xskq_nb_avail(q, q->nentries) == q->nentries);
|
||||
return xskq_nb_avail(q, q->nentries) == q->nentries;
|
||||
}
|
||||
|
||||
static inline bool xskq_empty_desc(struct xsk_queue *q)
|
||||
{
|
||||
return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
|
||||
return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
|
||||
}
|
||||
|
||||
void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
|
||||
|
|
|
@ -51,6 +51,7 @@ hostprogs-y += cpustat
|
|||
hostprogs-y += xdp_adjust_tail
|
||||
hostprogs-y += xdpsock
|
||||
hostprogs-y += xdp_fwd
|
||||
hostprogs-y += task_fd_query
|
||||
|
||||
# Libbpf dependencies
|
||||
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
|
||||
|
@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
|
|||
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
|
||||
xdpsock-objs := bpf_load.o xdpsock_user.o
|
||||
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
|
||||
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
|
||||
|
||||
# Tell kbuild to always build the programs
|
||||
always := $(hostprogs-y)
|
||||
|
@ -160,6 +162,7 @@ always += cpustat_kern.o
|
|||
always += xdp_adjust_tail_kern.o
|
||||
always += xdpsock_kern.o
|
||||
always += xdp_fwd_kern.o
|
||||
always += task_fd_query_kern.o
|
||||
|
||||
HOSTCFLAGS += -I$(objtree)/usr/include
|
||||
HOSTCFLAGS += -I$(srctree)/tools/lib/
|
||||
|
@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
|
|||
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
|
||||
|
||||
HOST_LOADLIBES += $(LIBBPF) -lelf
|
||||
HOSTLOADLIBES_tracex4 += -lrt
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/version.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
SEC("kprobe/blk_start_request")
|
||||
int bpf_prog1(struct pt_regs *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kretprobe/blk_account_io_completion")
|
||||
int bpf_prog2(struct pt_regs *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
char _license[] SEC("license") = "GPL";
|
||||
u32 _version SEC("version") = LINUX_VERSION_CODE;
|
|
@ -0,0 +1,382 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
#include "bpf_util.h"
|
||||
#include "perf-sys.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
#define CHECK_PERROR_RET(condition) ({ \
|
||||
int __ret = !!(condition); \
|
||||
if (__ret) { \
|
||||
printf("FAIL: %s:\n", __func__); \
|
||||
perror(" "); \
|
||||
return -1; \
|
||||
} \
|
||||
})
|
||||
|
||||
#define CHECK_AND_RET(condition) ({ \
|
||||
int __ret = !!(condition); \
|
||||
if (__ret) \
|
||||
return -1; \
|
||||
})
|
||||
|
||||
static __u64 ptr_to_u64(void *ptr)
|
||||
{
|
||||
return (__u64) (unsigned long) ptr;
|
||||
}
|
||||
|
||||
#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
|
||||
static int bpf_find_probe_type(const char *event_type)
|
||||
{
|
||||
char buf[256];
|
||||
int fd, ret;
|
||||
|
||||
ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
fd = open(buf, O_RDONLY);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
ret = read(fd, buf, sizeof(buf));
|
||||
close(fd);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
errno = 0;
|
||||
ret = (int)strtol(buf, NULL, 10);
|
||||
CHECK_PERROR_RET(errno);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
|
||||
static int bpf_get_retprobe_bit(const char *event_type)
|
||||
{
|
||||
char buf[256];
|
||||
int fd, ret;
|
||||
|
||||
ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
|
||||
fd = open(buf, O_RDONLY);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
ret = read(fd, buf, sizeof(buf));
|
||||
close(fd);
|
||||
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
|
||||
CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
|
||||
|
||||
errno = 0;
|
||||
ret = (int)strtol(buf + strlen("config:"), NULL, 10);
|
||||
CHECK_PERROR_RET(errno);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
|
||||
__u32 expected_fd_type)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
char buf[256];
|
||||
int err;
|
||||
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
|
||||
&prog_id, &fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
|
||||
__func__, prog_fd_idx, fn_name);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(buf, fn_name) != 0 ||
|
||||
fd_type != expected_fd_type ||
|
||||
probe_offset != 0x0 || probe_addr != 0x0) {
|
||||
printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
|
||||
prog_fd_idx);
|
||||
printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
|
||||
" probe_addr: 0x%llx\n",
|
||||
buf, fd_type, probe_offset, probe_addr);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_nondebug_fs_kuprobe_common(const char *event_type,
|
||||
const char *name, __u64 offset, __u64 addr, bool is_return,
|
||||
char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
||||
__u64 *probe_offset, __u64 *probe_addr)
|
||||
{
|
||||
int is_return_bit = bpf_get_retprobe_bit(event_type);
|
||||
int type = bpf_find_probe_type(event_type);
|
||||
struct perf_event_attr attr = {};
|
||||
int fd;
|
||||
|
||||
if (type < 0 || is_return_bit < 0) {
|
||||
printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
|
||||
__func__, type, is_return_bit);
|
||||
return -1;
|
||||
}
|
||||
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
if (is_return)
|
||||
attr.config |= 1 << is_return_bit;
|
||||
|
||||
if (name) {
|
||||
attr.config1 = ptr_to_u64((void *)name);
|
||||
attr.config2 = offset;
|
||||
} else {
|
||||
attr.config1 = 0;
|
||||
attr.config2 = addr;
|
||||
}
|
||||
attr.size = sizeof(attr);
|
||||
attr.type = type;
|
||||
|
||||
fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
|
||||
CHECK_PERROR_RET(fd < 0);
|
||||
|
||||
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
|
||||
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
|
||||
CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
|
||||
prog_id, fd_type, probe_offset, probe_addr) < 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_nondebug_fs_probe(const char *event_type, const char *name,
|
||||
__u64 offset, __u64 addr, bool is_return,
|
||||
__u32 expected_fd_type,
|
||||
__u32 expected_ret_fd_type,
|
||||
char *buf, __u32 buf_len)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 prog_id, fd_type;
|
||||
int err;
|
||||
|
||||
err = test_nondebug_fs_kuprobe_common(event_type, name,
|
||||
offset, addr, is_return,
|
||||
buf, &buf_len, &prog_id,
|
||||
&fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, "
|
||||
"for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
|
||||
__func__, name ? name : "", offset, addr, is_return);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if ((is_return && fd_type != expected_ret_fd_type) ||
|
||||
(!is_return && fd_type != expected_fd_type)) {
|
||||
printf("FAIL: %s, incorrect fd_type %u\n",
|
||||
__func__, fd_type);
|
||||
return -1;
|
||||
}
|
||||
if (name) {
|
||||
if (strcmp(name, buf) != 0) {
|
||||
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
|
||||
return -1;
|
||||
}
|
||||
if (probe_offset != offset) {
|
||||
printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
|
||||
__func__, probe_offset);
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
if (buf_len != 0) {
|
||||
printf("FAIL: %s, incorrect buf %p\n",
|
||||
__func__, buf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (probe_addr != addr) {
|
||||
printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
|
||||
__func__, probe_addr);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
|
||||
{
|
||||
const char *event_type = "uprobe";
|
||||
struct perf_event_attr attr = {};
|
||||
char buf[256], event_alias[256];
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
int err, res, kfd, efd;
|
||||
ssize_t bytes;
|
||||
|
||||
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
|
||||
event_type);
|
||||
kfd = open(buf, O_WRONLY | O_APPEND, 0);
|
||||
CHECK_PERROR_RET(kfd < 0);
|
||||
|
||||
res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
|
||||
CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
|
||||
|
||||
res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
|
||||
is_return ? 'r' : 'p', event_type, event_alias,
|
||||
binary_path, offset);
|
||||
CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
|
||||
CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
|
||||
|
||||
close(kfd);
|
||||
kfd = -1;
|
||||
|
||||
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
|
||||
event_type, event_alias);
|
||||
efd = open(buf, O_RDONLY, 0);
|
||||
CHECK_PERROR_RET(efd < 0);
|
||||
|
||||
bytes = read(efd, buf, sizeof(buf));
|
||||
CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
|
||||
close(efd);
|
||||
buf[bytes] = '\0';
|
||||
|
||||
attr.config = strtol(buf, NULL, 0);
|
||||
attr.type = PERF_TYPE_TRACEPOINT;
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
|
||||
CHECK_PERROR_RET(kfd < 0);
|
||||
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
|
||||
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
|
||||
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
|
||||
&prog_id, &fd_type, &probe_offset,
|
||||
&probe_addr);
|
||||
if (err < 0) {
|
||||
printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
|
||||
perror(" :");
|
||||
return -1;
|
||||
}
|
||||
if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
|
||||
(!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
|
||||
printf("FAIL: %s, incorrect fd_type %u\n", __func__,
|
||||
fd_type);
|
||||
return -1;
|
||||
}
|
||||
if (strcmp(binary_path, buf) != 0) {
|
||||
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
|
||||
return -1;
|
||||
}
|
||||
if (probe_offset != offset) {
|
||||
printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
|
||||
probe_offset);
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(kfd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct rlimit r = {1024*1024, RLIM_INFINITY};
|
||||
extern char __executable_start;
|
||||
char filename[256], buf[256];
|
||||
__u64 uprobe_file_offset;
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
||||
perror("setrlimit(RLIMIT_MEMLOCK)");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (load_kallsyms()) {
|
||||
printf("failed to process /proc/kallsyms\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (load_bpf_file(filename)) {
|
||||
printf("%s", bpf_log_buf);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* test two functions in the corresponding *_kern.c file */
|
||||
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
|
||||
BPF_FD_TYPE_KPROBE));
|
||||
CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
|
||||
BPF_FD_TYPE_KRETPROBE));
|
||||
|
||||
/* test nondebug fs kprobe */
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
|
||||
false, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
#ifdef __x86_64__
|
||||
/* set a kprobe on "bpf_check + 0x5", which is x64 specific */
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
|
||||
false, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
#endif
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
|
||||
true, BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), false,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), false,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
NULL, 0));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), true,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
|
||||
ksym_get_addr("bpf_check"), true,
|
||||
BPF_FD_TYPE_KPROBE,
|
||||
BPF_FD_TYPE_KRETPROBE,
|
||||
0, 0));
|
||||
|
||||
/* test nondebug fs uprobe */
|
||||
/* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
|
||||
* and the default linker script, which defines __executable_start as
|
||||
* the start of the .text section. The calculation could be different
|
||||
* on different systems with different compilers. The right way is
|
||||
* to parse the ELF file. We took a shortcut here.
|
||||
*/
|
||||
uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
|
||||
uprobe_file_offset, 0x0, false,
|
||||
BPF_FD_TYPE_UPROBE,
|
||||
BPF_FD_TYPE_URETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
|
||||
uprobe_file_offset, 0x0, true,
|
||||
BPF_FD_TYPE_UPROBE,
|
||||
BPF_FD_TYPE_URETPROBE,
|
||||
buf, sizeof(buf)));
|
||||
|
||||
/* test debug fs uprobe */
|
||||
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
|
||||
false));
|
||||
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
|
||||
true));
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -125,6 +125,7 @@ struct datarec {
|
|||
u64 processed;
|
||||
u64 dropped;
|
||||
u64 info;
|
||||
u64 err;
|
||||
};
|
||||
#define MAX_CPUS 64
|
||||
|
||||
|
@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||
.key_size = sizeof(u32),
|
||||
.value_size = sizeof(struct datarec),
|
||||
.max_entries = 1,
|
||||
};
|
||||
|
||||
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
|
||||
* Code in: kernel/include/trace/events/xdp.h
|
||||
*/
|
||||
struct devmap_xmit_ctx {
|
||||
u64 __pad; // First 8 bytes are not accessible by bpf code
|
||||
int map_id; // offset:8; size:4; signed:1;
|
||||
u32 act; // offset:12; size:4; signed:0;
|
||||
u32 map_index; // offset:16; size:4; signed:0;
|
||||
int drops; // offset:20; size:4; signed:1;
|
||||
int sent; // offset:24; size:4; signed:1;
|
||||
int from_ifindex; // offset:28; size:4; signed:1;
|
||||
int to_ifindex; // offset:32; size:4; signed:1;
|
||||
int err; // offset:36; size:4; signed:1;
|
||||
};
|
||||
|
||||
SEC("tracepoint/xdp/xdp_devmap_xmit")
|
||||
int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
|
||||
{
|
||||
struct datarec *rec;
|
||||
u32 key = 0;
|
||||
|
||||
rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
|
||||
if (!rec)
|
||||
return 0;
|
||||
rec->processed += ctx->sent;
|
||||
rec->dropped += ctx->drops;
|
||||
|
||||
/* Record bulk events, then userspace can calc average bulk size */
|
||||
rec->info += 1;
|
||||
|
||||
/* Record error cases, where no frame were sent */
|
||||
if (ctx->err)
|
||||
rec->err++;
|
||||
|
||||
/* Catch API error of drv ndo_xdp_xmit sent more than count */
|
||||
if (ctx->drops < 0)
|
||||
rec->err++;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -117,6 +117,7 @@ struct datarec {
|
|||
__u64 processed;
|
||||
__u64 dropped;
|
||||
__u64 info;
|
||||
__u64 err;
|
||||
};
|
||||
#define MAX_CPUS 64
|
||||
|
||||
|
@ -141,6 +142,7 @@ struct stats_record {
|
|||
struct record_u64 xdp_exception[XDP_ACTION_MAX];
|
||||
struct record xdp_cpumap_kthread;
|
||||
struct record xdp_cpumap_enqueue[MAX_CPUS];
|
||||
struct record xdp_devmap_xmit;
|
||||
};
|
||||
|
||||
static bool map_collect_record(int fd, __u32 key, struct record *rec)
|
||||
|
@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
|
|||
__u64 sum_processed = 0;
|
||||
__u64 sum_dropped = 0;
|
||||
__u64 sum_info = 0;
|
||||
__u64 sum_err = 0;
|
||||
int i;
|
||||
|
||||
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
|
||||
|
@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
|
|||
sum_dropped += values[i].dropped;
|
||||
rec->cpu[i].info = values[i].info;
|
||||
sum_info += values[i].info;
|
||||
rec->cpu[i].err = values[i].err;
|
||||
sum_err += values[i].err;
|
||||
}
|
||||
rec->total.processed = sum_processed;
|
||||
rec->total.dropped = sum_dropped;
|
||||
rec->total.info = sum_info;
|
||||
rec->total.err = sum_err;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
|
|||
return pps;
|
||||
}
|
||||
|
||||
static double calc_err(struct datarec *r, struct datarec *p, double period)
|
||||
{
|
||||
__u64 packets = 0;
|
||||
double pps = 0;
|
||||
|
||||
if (period > 0) {
|
||||
packets = r->err - p->err;
|
||||
pps = packets / period;
|
||||
}
|
||||
return pps;
|
||||
}
|
||||
|
||||
static void stats_print(struct stats_record *stats_rec,
|
||||
struct stats_record *stats_prev,
|
||||
bool err_only)
|
||||
|
@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec,
|
|||
info = calc_info(r, p, t);
|
||||
if (info > 0)
|
||||
i_str = "sched";
|
||||
if (pps > 0)
|
||||
if (pps > 0 || drop > 0)
|
||||
printf(fmt1, "cpumap-kthread",
|
||||
i, pps, drop, info, i_str);
|
||||
}
|
||||
|
@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec,
|
|||
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
|
||||
}
|
||||
|
||||
/* devmap ndo_xdp_xmit stats */
|
||||
{
|
||||
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
|
||||
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
|
||||
struct record *rec, *prev;
|
||||
double drop, info, err;
|
||||
char *i_str = "";
|
||||
char *err_str = "";
|
||||
|
||||
rec = &stats_rec->xdp_devmap_xmit;
|
||||
prev = &stats_prev->xdp_devmap_xmit;
|
||||
t = calc_period(rec, prev);
|
||||
for (i = 0; i < nr_cpus; i++) {
|
||||
struct datarec *r = &rec->cpu[i];
|
||||
struct datarec *p = &prev->cpu[i];
|
||||
|
||||
pps = calc_pps(r, p, t);
|
||||
drop = calc_drop(r, p, t);
|
||||
info = calc_info(r, p, t);
|
||||
err = calc_err(r, p, t);
|
||||
if (info > 0) {
|
||||
i_str = "bulk-average";
|
||||
info = (pps+drop) / info; /* calc avg bulk */
|
||||
}
|
||||
if (err > 0)
|
||||
err_str = "drv-err";
|
||||
if (pps > 0 || drop > 0)
|
||||
printf(fmt1, "devmap-xmit",
|
||||
i, pps, drop, info, i_str, err_str);
|
||||
}
|
||||
pps = calc_pps(&rec->total, &prev->total, t);
|
||||
drop = calc_drop(&rec->total, &prev->total, t);
|
||||
info = calc_info(&rec->total, &prev->total, t);
|
||||
err = calc_err(&rec->total, &prev->total, t);
|
||||
if (info > 0) {
|
||||
i_str = "bulk-average";
|
||||
info = (pps+drop) / info; /* calc avg bulk */
|
||||
}
|
||||
if (err > 0)
|
||||
err_str = "drv-err";
|
||||
printf(fmt2, "devmap-xmit", "total", pps, drop,
|
||||
info, i_str, err_str);
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec)
|
|||
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
|
||||
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
|
||||
|
||||
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
|
||||
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void)
|
|||
|
||||
rec_sz = sizeof(struct datarec);
|
||||
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
|
||||
rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
|
||||
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
|
||||
|
@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r)
|
|||
free(r->xdp_exception[i].cpu);
|
||||
|
||||
free(r->xdp_cpumap_kthread.cpu);
|
||||
free(r->xdp_devmap_xmit.cpu);
|
||||
|
||||
for (i = 0; i < MAX_CPUS; i++)
|
||||
free(r->xdp_cpumap_enqueue[i].cpu);
|
||||
|
|
|
@ -1,15 +1,5 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2017 - 2018 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
* version 2, as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
/* Copyright(c) 2017 - 2018 Intel Corporation. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
@ -89,7 +79,10 @@ struct xdp_umem_uqueue {
|
|||
u32 cached_cons;
|
||||
u32 mask;
|
||||
u32 size;
|
||||
struct xdp_umem_ring *ring;
|
||||
u32 *producer;
|
||||
u32 *consumer;
|
||||
u32 *ring;
|
||||
void *map;
|
||||
};
|
||||
|
||||
struct xdp_umem {
|
||||
|
@ -104,7 +97,10 @@ struct xdp_uqueue {
|
|||
u32 cached_cons;
|
||||
u32 mask;
|
||||
u32 size;
|
||||
struct xdp_rxtx_ring *ring;
|
||||
u32 *producer;
|
||||
u32 *consumer;
|
||||
struct xdp_desc *ring;
|
||||
void *map;
|
||||
};
|
||||
|
||||
struct xdpsock {
|
||||
|
@ -165,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
|
|||
return free_entries;
|
||||
|
||||
/* Refresh the local tail pointer */
|
||||
q->cached_cons = q->ring->ptrs.consumer;
|
||||
q->cached_cons = *q->consumer;
|
||||
|
||||
return q->size - (q->cached_prod - q->cached_cons);
|
||||
}
|
||||
|
@ -178,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
|
|||
return free_entries;
|
||||
|
||||
/* Refresh the local tail pointer */
|
||||
q->cached_cons = q->ring->ptrs.consumer + q->size;
|
||||
q->cached_cons = *q->consumer + q->size;
|
||||
return q->cached_cons - q->cached_prod;
|
||||
}
|
||||
|
||||
|
@ -187,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
|
|||
u32 entries = q->cached_prod - q->cached_cons;
|
||||
|
||||
if (entries == 0) {
|
||||
q->cached_prod = q->ring->ptrs.producer;
|
||||
q->cached_prod = *q->producer;
|
||||
entries = q->cached_prod - q->cached_cons;
|
||||
}
|
||||
|
||||
|
@ -199,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
|
|||
u32 entries = q->cached_prod - q->cached_cons;
|
||||
|
||||
if (entries == 0) {
|
||||
q->cached_prod = q->ring->ptrs.producer;
|
||||
q->cached_prod = *q->producer;
|
||||
entries = q->cached_prod - q->cached_cons;
|
||||
}
|
||||
|
||||
|
@ -218,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
|
|||
for (i = 0; i < nb; i++) {
|
||||
u32 idx = fq->cached_prod++ & fq->mask;
|
||||
|
||||
fq->ring->desc[idx] = d[i].idx;
|
||||
fq->ring[idx] = d[i].idx;
|
||||
}
|
||||
|
||||
u_smp_wmb();
|
||||
|
||||
fq->ring->ptrs.producer = fq->cached_prod;
|
||||
*fq->producer = fq->cached_prod;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -239,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
|
|||
for (i = 0; i < nb; i++) {
|
||||
u32 idx = fq->cached_prod++ & fq->mask;
|
||||
|
||||
fq->ring->desc[idx] = d[i];
|
||||
fq->ring[idx] = d[i];
|
||||
}
|
||||
|
||||
u_smp_wmb();
|
||||
|
||||
fq->ring->ptrs.producer = fq->cached_prod;
|
||||
*fq->producer = fq->cached_prod;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -258,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
|
|||
|
||||
for (i = 0; i < entries; i++) {
|
||||
idx = cq->cached_cons++ & cq->mask;
|
||||
d[i] = cq->ring->desc[idx];
|
||||
d[i] = cq->ring[idx];
|
||||
}
|
||||
|
||||
if (entries > 0) {
|
||||
u_smp_wmb();
|
||||
|
||||
cq->ring->ptrs.consumer = cq->cached_cons;
|
||||
*cq->consumer = cq->cached_cons;
|
||||
}
|
||||
|
||||
return entries;
|
||||
|
@ -280,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
|
|||
const struct xdp_desc *descs,
|
||||
unsigned int ndescs)
|
||||
{
|
||||
struct xdp_rxtx_ring *r = uq->ring;
|
||||
struct xdp_desc *r = uq->ring;
|
||||
unsigned int i;
|
||||
|
||||
if (xq_nb_free(uq, ndescs) < ndescs)
|
||||
|
@ -289,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
|
|||
for (i = 0; i < ndescs; i++) {
|
||||
u32 idx = uq->cached_prod++ & uq->mask;
|
||||
|
||||
r->desc[idx].idx = descs[i].idx;
|
||||
r->desc[idx].len = descs[i].len;
|
||||
r->desc[idx].offset = descs[i].offset;
|
||||
r[idx].idx = descs[i].idx;
|
||||
r[idx].len = descs[i].len;
|
||||
r[idx].offset = descs[i].offset;
|
||||
}
|
||||
|
||||
u_smp_wmb();
|
||||
|
||||
r->ptrs.producer = uq->cached_prod;
|
||||
*uq->producer = uq->cached_prod;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
|
||||
__u32 idx, unsigned int ndescs)
|
||||
{
|
||||
struct xdp_rxtx_ring *q = uq->ring;
|
||||
struct xdp_desc *r = uq->ring;
|
||||
unsigned int i;
|
||||
|
||||
if (xq_nb_free(uq, ndescs) < ndescs)
|
||||
|
@ -312,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
|
|||
for (i = 0; i < ndescs; i++) {
|
||||
u32 idx = uq->cached_prod++ & uq->mask;
|
||||
|
||||
q->desc[idx].idx = idx + i;
|
||||
q->desc[idx].len = sizeof(pkt_data) - 1;
|
||||
q->desc[idx].offset = 0;
|
||||
r[idx].idx = idx + i;
|
||||
r[idx].len = sizeof(pkt_data) - 1;
|
||||
r[idx].offset = 0;
|
||||
}
|
||||
|
||||
u_smp_wmb();
|
||||
|
||||
q->ptrs.producer = uq->cached_prod;
|
||||
*uq->producer = uq->cached_prod;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -327,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
|
|||
struct xdp_desc *descs,
|
||||
int ndescs)
|
||||
{
|
||||
struct xdp_rxtx_ring *r = uq->ring;
|
||||
struct xdp_desc *r = uq->ring;
|
||||
unsigned int idx;
|
||||
int i, entries;
|
||||
|
||||
|
@ -337,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
|
|||
|
||||
for (i = 0; i < entries; i++) {
|
||||
idx = uq->cached_cons++ & uq->mask;
|
||||
descs[i] = r->desc[idx];
|
||||
descs[i] = r[idx];
|
||||
}
|
||||
|
||||
if (entries > 0) {
|
||||
u_smp_wmb();
|
||||
|
||||
r->ptrs.consumer = uq->cached_cons;
|
||||
*uq->consumer = uq->cached_cons;
|
||||
}
|
||||
|
||||
return entries;
|
||||
|
@ -402,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
|
|||
static struct xdp_umem *xdp_umem_configure(int sfd)
|
||||
{
|
||||
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
|
||||
struct xdp_mmap_offsets off;
|
||||
struct xdp_umem_reg mr;
|
||||
struct xdp_umem *umem;
|
||||
socklen_t optlen;
|
||||
void *bufs;
|
||||
|
||||
umem = calloc(1, sizeof(*umem));
|
||||
|
@ -423,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
|
|||
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
|
||||
sizeof(int)) == 0);
|
||||
|
||||
umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
|
||||
FQ_NUM_DESCS * sizeof(u32),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_UMEM_PGOFF_FILL_RING);
|
||||
lassert(umem->fq.ring != MAP_FAILED);
|
||||
optlen = sizeof(off);
|
||||
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
|
||||
&optlen) == 0);
|
||||
|
||||
umem->fq.map = mmap(0, off.fr.desc +
|
||||
FQ_NUM_DESCS * sizeof(u32),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_UMEM_PGOFF_FILL_RING);
|
||||
lassert(umem->fq.map != MAP_FAILED);
|
||||
|
||||
umem->fq.mask = FQ_NUM_DESCS - 1;
|
||||
umem->fq.size = FQ_NUM_DESCS;
|
||||
umem->fq.producer = umem->fq.map + off.fr.producer;
|
||||
umem->fq.consumer = umem->fq.map + off.fr.consumer;
|
||||
umem->fq.ring = umem->fq.map + off.fr.desc;
|
||||
|
||||
umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
|
||||
umem->cq.map = mmap(0, off.cr.desc +
|
||||
CQ_NUM_DESCS * sizeof(u32),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_UMEM_PGOFF_COMPLETION_RING);
|
||||
lassert(umem->cq.ring != MAP_FAILED);
|
||||
lassert(umem->cq.map != MAP_FAILED);
|
||||
|
||||
umem->cq.mask = CQ_NUM_DESCS - 1;
|
||||
umem->cq.size = CQ_NUM_DESCS;
|
||||
umem->cq.producer = umem->cq.map + off.cr.producer;
|
||||
umem->cq.consumer = umem->cq.map + off.cr.consumer;
|
||||
umem->cq.ring = umem->cq.map + off.cr.desc;
|
||||
|
||||
umem->frames = (char (*)[FRAME_SIZE])bufs;
|
||||
umem->fd = sfd;
|
||||
|
@ -459,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
|
|||
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
|
||||
{
|
||||
struct sockaddr_xdp sxdp = {};
|
||||
struct xdp_mmap_offsets off;
|
||||
int sfd, ndescs = NUM_DESCS;
|
||||
struct xdpsock *xsk;
|
||||
bool shared = true;
|
||||
socklen_t optlen;
|
||||
u32 i;
|
||||
|
||||
sfd = socket(PF_XDP, SOCK_RAW, 0);
|
||||
|
@ -484,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
|
|||
&ndescs, sizeof(int)) == 0);
|
||||
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
|
||||
&ndescs, sizeof(int)) == 0);
|
||||
optlen = sizeof(off);
|
||||
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
|
||||
&optlen) == 0);
|
||||
|
||||
/* Rx */
|
||||
xsk->rx.ring = mmap(NULL,
|
||||
sizeof(struct xdp_ring) +
|
||||
NUM_DESCS * sizeof(struct xdp_desc),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_PGOFF_RX_RING);
|
||||
lassert(xsk->rx.ring != MAP_FAILED);
|
||||
xsk->rx.map = mmap(NULL,
|
||||
off.rx.desc +
|
||||
NUM_DESCS * sizeof(struct xdp_desc),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_PGOFF_RX_RING);
|
||||
lassert(xsk->rx.map != MAP_FAILED);
|
||||
|
||||
if (!shared) {
|
||||
for (i = 0; i < NUM_DESCS / 2; i++)
|
||||
|
@ -501,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
|
|||
}
|
||||
|
||||
/* Tx */
|
||||
xsk->tx.ring = mmap(NULL,
|
||||
sizeof(struct xdp_ring) +
|
||||
NUM_DESCS * sizeof(struct xdp_desc),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_PGOFF_TX_RING);
|
||||
lassert(xsk->tx.ring != MAP_FAILED);
|
||||
xsk->tx.map = mmap(NULL,
|
||||
off.tx.desc +
|
||||
NUM_DESCS * sizeof(struct xdp_desc),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_POPULATE, sfd,
|
||||
XDP_PGOFF_TX_RING);
|
||||
lassert(xsk->tx.map != MAP_FAILED);
|
||||
|
||||
xsk->rx.mask = NUM_DESCS - 1;
|
||||
xsk->rx.size = NUM_DESCS;
|
||||
xsk->rx.producer = xsk->rx.map + off.rx.producer;
|
||||
xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
|
||||
xsk->rx.ring = xsk->rx.map + off.rx.desc;
|
||||
|
||||
xsk->tx.mask = NUM_DESCS - 1;
|
||||
xsk->tx.size = NUM_DESCS;
|
||||
xsk->tx.producer = xsk->tx.map + off.tx.producer;
|
||||
xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
|
||||
xsk->tx.ring = xsk->tx.map + off.tx.desc;
|
||||
|
||||
sxdp.sxdp_family = PF_XDP;
|
||||
sxdp.sxdp_ifindex = opt_ifindex;
|
||||
|
|
|
@ -95,7 +95,7 @@ class HeaderParser(object):
|
|||
return capture.group(1)
|
||||
|
||||
def parse_desc(self):
|
||||
p = re.compile(' \* ?(?:\t| {6,8})Description$')
|
||||
p = re.compile(' \* ?(?:\t| {5,8})Description$')
|
||||
capture = p.match(self.line)
|
||||
if not capture:
|
||||
# Helper can have empty description and we might be parsing another
|
||||
|
@ -109,7 +109,7 @@ class HeaderParser(object):
|
|||
if self.line == ' *\n':
|
||||
desc += '\n'
|
||||
else:
|
||||
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
|
||||
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
|
||||
capture = p.match(self.line)
|
||||
if capture:
|
||||
desc += capture.group(1) + '\n'
|
||||
|
@ -118,7 +118,7 @@ class HeaderParser(object):
|
|||
return desc
|
||||
|
||||
def parse_ret(self):
|
||||
p = re.compile(' \* ?(?:\t| {6,8})Return$')
|
||||
p = re.compile(' \* ?(?:\t| {5,8})Return$')
|
||||
capture = p.match(self.line)
|
||||
if not capture:
|
||||
# Helper can have empty retval and we might be parsing another
|
||||
|
@ -132,7 +132,7 @@ class HeaderParser(object):
|
|||
if self.line == ' *\n':
|
||||
ret += '\n'
|
||||
else:
|
||||
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
|
||||
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
|
||||
capture = p.match(self.line)
|
||||
if capture:
|
||||
ret += capture.group(1) + '\n'
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
================
|
||||
bpftool-perf
|
||||
================
|
||||
-------------------------------------------------------------------------------
|
||||
tool for inspection of perf related bpf prog attachments
|
||||
-------------------------------------------------------------------------------
|
||||
|
||||
:Manual section: 8
|
||||
|
||||
SYNOPSIS
|
||||
========
|
||||
|
||||
**bpftool** [*OPTIONS*] **perf** *COMMAND*
|
||||
|
||||
*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
|
||||
|
||||
*COMMANDS* :=
|
||||
{ **show** | **list** | **help** }
|
||||
|
||||
PERF COMMANDS
|
||||
=============
|
||||
|
||||
| **bpftool** **perf { show | list }**
|
||||
| **bpftool** **perf help**
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
**bpftool perf { show | list }**
|
||||
List all raw_tracepoint, tracepoint, kprobe attachment in the system.
|
||||
|
||||
Output will start with process id and file descriptor in that process,
|
||||
followed by bpf program id, attachment information, and attachment point.
|
||||
The attachment point for raw_tracepoint/tracepoint is the trace probe name.
|
||||
The attachment point for k[ret]probe is either symbol name and offset,
|
||||
or a kernel virtual address.
|
||||
The attachment point for u[ret]probe is the file name and the file offset.
|
||||
|
||||
**bpftool perf help**
|
||||
Print short help message.
|
||||
|
||||
OPTIONS
|
||||
=======
|
||||
-h, --help
|
||||
Print short generic help message (similar to **bpftool help**).
|
||||
|
||||
-v, --version
|
||||
Print version number (similar to **bpftool version**).
|
||||
|
||||
-j, --json
|
||||
Generate JSON output. For commands that cannot produce JSON, this
|
||||
option has no effect.
|
||||
|
||||
-p, --pretty
|
||||
Generate human-readable JSON output. Implies **-j**.
|
||||
|
||||
EXAMPLES
|
||||
========
|
||||
|
||||
| **# bpftool perf**
|
||||
|
||||
::
|
||||
|
||||
pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
|
||||
pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
|
||||
pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
|
||||
pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
|
||||
|
||||
|
|
||||
| **# bpftool -j perf**
|
||||
|
||||
::
|
||||
|
||||
[{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
|
||||
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
|
||||
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
|
||||
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
|
||||
|
||||
|
||||
SEE ALSO
|
||||
========
|
||||
**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
|
|
@ -16,7 +16,7 @@ SYNOPSIS
|
|||
|
||||
**bpftool** **version**
|
||||
|
||||
*OBJECT* := { **map** | **program** | **cgroup** }
|
||||
*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
|
||||
|
||||
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
|
||||
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
|
||||
|
@ -30,6 +30,8 @@ SYNOPSIS
|
|||
|
||||
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
|
||||
|
||||
*PERF-COMMANDS* := { **show** | **list** | **help** }
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
*bpftool* allows for inspection and simple modification of BPF objects
|
||||
|
@ -56,3 +58,4 @@ OPTIONS
|
|||
SEE ALSO
|
||||
========
|
||||
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
|
||||
**bpftool-perf**\ (8)
|
||||
|
|
|
@ -448,6 +448,15 @@ _bpftool()
|
|||
;;
|
||||
esac
|
||||
;;
|
||||
perf)
|
||||
case $command in
|
||||
*)
|
||||
[[ $prev == $object ]] && \
|
||||
COMPREPLY=( $( compgen -W 'help \
|
||||
show list' -- "$cur" ) )
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
} &&
|
||||
complete -F _bpftool bpftool
|
||||
|
|
|
@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
|
|||
" %s batch file FILE\n"
|
||||
" %s version\n"
|
||||
"\n"
|
||||
" OBJECT := { prog | map | cgroup }\n"
|
||||
" OBJECT := { prog | map | cgroup | perf }\n"
|
||||
" " HELP_SPEC_OPTIONS "\n"
|
||||
"",
|
||||
bin_name, bin_name, bin_name);
|
||||
|
@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
|
|||
{ "prog", do_prog },
|
||||
{ "map", do_map },
|
||||
{ "cgroup", do_cgroup },
|
||||
{ "perf", do_perf },
|
||||
{ "version", do_version },
|
||||
{ 0 }
|
||||
};
|
||||
|
|
|
@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
|
|||
int do_map(int argc, char **arg);
|
||||
int do_event_pipe(int argc, char **argv);
|
||||
int do_cgroup(int argc, char **arg);
|
||||
int do_perf(int argc, char **arg);
|
||||
|
||||
int prog_parse_fd(int *argc, char ***argv);
|
||||
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
|
||||
|
|
|
@ -0,0 +1,246 @@
|
|||
// SPDX-License-Identifier: GPL-2.0+
|
||||
// Copyright (C) 2018 Facebook
|
||||
// Author: Yonghong Song <yhs@fb.com>
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <ftw.h>
|
||||
|
||||
#include <bpf.h>
|
||||
|
||||
#include "main.h"
|
||||
|
||||
/* 0: undecided, 1: supported, 2: not supported */
|
||||
static int perf_query_supported;
|
||||
static bool has_perf_query_support(void)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
char buf[256];
|
||||
int fd;
|
||||
|
||||
if (perf_query_supported)
|
||||
goto out;
|
||||
|
||||
fd = open(bin_name, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
p_err("perf_query_support: %s", strerror(errno));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* the following query will fail as no bpf attachment,
|
||||
* the expected errno is ENOTSUPP
|
||||
*/
|
||||
errno = 0;
|
||||
len = sizeof(buf);
|
||||
bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
|
||||
if (errno == 524 /* ENOTSUPP */) {
|
||||
perf_query_supported = 1;
|
||||
goto close_fd;
|
||||
}
|
||||
|
||||
perf_query_supported = 2;
|
||||
p_err("perf_query_support: %s", strerror(errno));
|
||||
fprintf(stderr,
|
||||
"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
|
||||
|
||||
close_fd:
|
||||
close(fd);
|
||||
out:
|
||||
return perf_query_supported == 1;
|
||||
}
|
||||
|
||||
static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
|
||||
char *buf, __u64 probe_offset, __u64 probe_addr)
|
||||
{
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_int_field(json_wtr, "pid", pid);
|
||||
jsonw_int_field(json_wtr, "fd", fd);
|
||||
jsonw_uint_field(json_wtr, "prog_id", prog_id);
|
||||
switch (fd_type) {
|
||||
case BPF_FD_TYPE_RAW_TRACEPOINT:
|
||||
jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
|
||||
jsonw_string_field(json_wtr, "tracepoint", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_TRACEPOINT:
|
||||
jsonw_string_field(json_wtr, "fd_type", "tracepoint");
|
||||
jsonw_string_field(json_wtr, "tracepoint", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_KPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "kprobe");
|
||||
if (buf[0] != '\0') {
|
||||
jsonw_string_field(json_wtr, "func", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
} else {
|
||||
jsonw_lluint_field(json_wtr, "addr", probe_addr);
|
||||
}
|
||||
break;
|
||||
case BPF_FD_TYPE_KRETPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "kretprobe");
|
||||
if (buf[0] != '\0') {
|
||||
jsonw_string_field(json_wtr, "func", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
} else {
|
||||
jsonw_lluint_field(json_wtr, "addr", probe_addr);
|
||||
}
|
||||
break;
|
||||
case BPF_FD_TYPE_UPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "uprobe");
|
||||
jsonw_string_field(json_wtr, "filename", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
break;
|
||||
case BPF_FD_TYPE_URETPROBE:
|
||||
jsonw_string_field(json_wtr, "fd_type", "uretprobe");
|
||||
jsonw_string_field(json_wtr, "filename", buf);
|
||||
jsonw_lluint_field(json_wtr, "offset", probe_offset);
|
||||
break;
|
||||
}
|
||||
jsonw_end_object(json_wtr);
|
||||
}
|
||||
|
||||
static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
|
||||
char *buf, __u64 probe_offset, __u64 probe_addr)
|
||||
{
|
||||
printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
|
||||
switch (fd_type) {
|
||||
case BPF_FD_TYPE_RAW_TRACEPOINT:
|
||||
printf("raw_tracepoint %s\n", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_TRACEPOINT:
|
||||
printf("tracepoint %s\n", buf);
|
||||
break;
|
||||
case BPF_FD_TYPE_KPROBE:
|
||||
if (buf[0] != '\0')
|
||||
printf("kprobe func %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
else
|
||||
printf("kprobe addr %llu\n", probe_addr);
|
||||
break;
|
||||
case BPF_FD_TYPE_KRETPROBE:
|
||||
if (buf[0] != '\0')
|
||||
printf("kretprobe func %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
else
|
||||
printf("kretprobe addr %llu\n", probe_addr);
|
||||
break;
|
||||
case BPF_FD_TYPE_UPROBE:
|
||||
printf("uprobe filename %s offset %llu\n", buf, probe_offset);
|
||||
break;
|
||||
case BPF_FD_TYPE_URETPROBE:
|
||||
printf("uretprobe filename %s offset %llu\n", buf,
|
||||
probe_offset);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int show_proc(const char *fpath, const struct stat *sb,
|
||||
int tflag, struct FTW *ftwbuf)
|
||||
{
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
int err, pid = 0, fd = 0;
|
||||
const char *pch;
|
||||
char buf[4096];
|
||||
|
||||
/* prefix always /proc */
|
||||
pch = fpath + 5;
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
|
||||
/* pid should be all numbers */
|
||||
pch++;
|
||||
while (isdigit(*pch)) {
|
||||
pid = pid * 10 + *pch - '0';
|
||||
pch++;
|
||||
}
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
if (*pch != '/')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* check /proc/<pid>/fd directory */
|
||||
pch++;
|
||||
if (strncmp(pch, "fd", 2))
|
||||
return FTW_SKIP_SUBTREE;
|
||||
pch += 2;
|
||||
if (*pch == '\0')
|
||||
return 0;
|
||||
if (*pch != '/')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* check /proc/<pid>/fd/<fd_num> */
|
||||
pch++;
|
||||
while (isdigit(*pch)) {
|
||||
fd = fd * 10 + *pch - '0';
|
||||
pch++;
|
||||
}
|
||||
if (*pch != '\0')
|
||||
return FTW_SKIP_SUBTREE;
|
||||
|
||||
/* query (pid, fd) for potential perf events */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
|
||||
&probe_offset, &probe_addr);
|
||||
if (err < 0)
|
||||
return 0;
|
||||
|
||||
if (json_output)
|
||||
print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
|
||||
probe_addr);
|
||||
else
|
||||
print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
|
||||
probe_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_show(int argc, char **argv)
|
||||
{
|
||||
int flags = FTW_ACTIONRETVAL | FTW_PHYS;
|
||||
int err = 0, nopenfd = 16;
|
||||
|
||||
if (!has_perf_query_support())
|
||||
return -1;
|
||||
|
||||
if (json_output)
|
||||
jsonw_start_array(json_wtr);
|
||||
if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
|
||||
p_err("%s", strerror(errno));
|
||||
err = -1;
|
||||
}
|
||||
if (json_output)
|
||||
jsonw_end_array(json_wtr);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int do_help(int argc, char **argv)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s %s { show | list | help }\n"
|
||||
"",
|
||||
bin_name, argv[-2]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct cmd cmds[] = {
|
||||
{ "show", do_show },
|
||||
{ "list", do_show },
|
||||
{ "help", do_help },
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
int do_perf(int argc, char **argv)
|
||||
{
|
||||
return cmd_select(cmds, argc, argv, do_help);
|
||||
}
|
|
@ -420,7 +420,11 @@ static int do_show(int argc, char **argv)
|
|||
|
||||
static int do_dump(int argc, char **argv)
|
||||
{
|
||||
unsigned long *func_ksyms = NULL;
|
||||
struct bpf_prog_info info = {};
|
||||
unsigned int *func_lens = NULL;
|
||||
unsigned int nr_func_ksyms;
|
||||
unsigned int nr_func_lens;
|
||||
struct dump_data dd = {};
|
||||
__u32 len = sizeof(info);
|
||||
unsigned int buf_size;
|
||||
|
@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv)
|
|||
return -1;
|
||||
}
|
||||
|
||||
nr_func_ksyms = info.nr_jited_ksyms;
|
||||
if (nr_func_ksyms) {
|
||||
func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
|
||||
if (!func_ksyms) {
|
||||
p_err("mem alloc failed");
|
||||
close(fd);
|
||||
goto err_free;
|
||||
}
|
||||
}
|
||||
|
||||
nr_func_lens = info.nr_jited_func_lens;
|
||||
if (nr_func_lens) {
|
||||
func_lens = malloc(nr_func_lens * sizeof(__u32));
|
||||
if (!func_lens) {
|
||||
p_err("mem alloc failed");
|
||||
close(fd);
|
||||
goto err_free;
|
||||
}
|
||||
}
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
|
||||
*member_ptr = ptr_to_u64(buf);
|
||||
*member_len = buf_size;
|
||||
info.jited_ksyms = ptr_to_u64(func_ksyms);
|
||||
info.nr_jited_ksyms = nr_func_ksyms;
|
||||
info.jited_func_lens = ptr_to_u64(func_lens);
|
||||
info.nr_jited_func_lens = nr_func_lens;
|
||||
|
||||
err = bpf_obj_get_info_by_fd(fd, &info, &len);
|
||||
close(fd);
|
||||
|
@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv)
|
|||
goto err_free;
|
||||
}
|
||||
|
||||
if (info.nr_jited_ksyms > nr_func_ksyms) {
|
||||
p_err("too many addresses returned");
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
if (info.nr_jited_func_lens > nr_func_lens) {
|
||||
p_err("too many values returned");
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
if ((member_len == &info.jited_prog_len &&
|
||||
info.jited_prog_insns == 0) ||
|
||||
(member_len == &info.xlated_prog_len &&
|
||||
|
@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv)
|
|||
goto err_free;
|
||||
}
|
||||
|
||||
disasm_print_insn(buf, *member_len, opcodes, name);
|
||||
if (info.nr_jited_func_lens && info.jited_func_lens) {
|
||||
struct kernel_sym *sym = NULL;
|
||||
char sym_name[SYM_MAX_NAME];
|
||||
unsigned char *img = buf;
|
||||
__u64 *ksyms = NULL;
|
||||
__u32 *lens;
|
||||
__u32 i;
|
||||
|
||||
if (info.nr_jited_ksyms) {
|
||||
kernel_syms_load(&dd);
|
||||
ksyms = (__u64 *) info.jited_ksyms;
|
||||
}
|
||||
|
||||
if (json_output)
|
||||
jsonw_start_array(json_wtr);
|
||||
|
||||
lens = (__u32 *) info.jited_func_lens;
|
||||
for (i = 0; i < info.nr_jited_func_lens; i++) {
|
||||
if (ksyms) {
|
||||
sym = kernel_syms_search(&dd, ksyms[i]);
|
||||
if (sym)
|
||||
sprintf(sym_name, "%s", sym->name);
|
||||
else
|
||||
sprintf(sym_name, "0x%016llx", ksyms[i]);
|
||||
} else {
|
||||
strcpy(sym_name, "unknown");
|
||||
}
|
||||
|
||||
if (json_output) {
|
||||
jsonw_start_object(json_wtr);
|
||||
jsonw_name(json_wtr, "name");
|
||||
jsonw_string(json_wtr, sym_name);
|
||||
jsonw_name(json_wtr, "insns");
|
||||
} else {
|
||||
printf("%s:\n", sym_name);
|
||||
}
|
||||
|
||||
disasm_print_insn(img, lens[i], opcodes, name);
|
||||
img += lens[i];
|
||||
|
||||
if (json_output)
|
||||
jsonw_end_object(json_wtr);
|
||||
else
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
if (json_output)
|
||||
jsonw_end_array(json_wtr);
|
||||
} else {
|
||||
disasm_print_insn(buf, *member_len, opcodes, name);
|
||||
}
|
||||
} else if (visual) {
|
||||
if (json_output)
|
||||
jsonw_null(json_wtr);
|
||||
|
@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv)
|
|||
dump_xlated_cfg(buf, *member_len);
|
||||
} else {
|
||||
kernel_syms_load(&dd);
|
||||
dd.nr_jited_ksyms = info.nr_jited_ksyms;
|
||||
dd.jited_ksyms = (__u64 *) info.jited_ksyms;
|
||||
|
||||
if (json_output)
|
||||
dump_xlated_json(&dd, buf, *member_len, opcodes);
|
||||
else
|
||||
|
@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv)
|
|||
}
|
||||
|
||||
free(buf);
|
||||
free(func_ksyms);
|
||||
free(func_lens);
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
free(buf);
|
||||
free(func_ksyms);
|
||||
free(func_lens);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
|
|||
free(dd->sym_mapping);
|
||||
}
|
||||
|
||||
static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
|
||||
unsigned long key)
|
||||
struct kernel_sym *kernel_syms_search(struct dump_data *dd,
|
||||
unsigned long key)
|
||||
{
|
||||
struct kernel_sym sym = {
|
||||
.address = key,
|
||||
|
@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
|
|||
unsigned long address,
|
||||
const struct bpf_insn *insn)
|
||||
{
|
||||
if (sym)
|
||||
if (!dd->nr_jited_ksyms)
|
||||
/* Do not show address for interpreted programs */
|
||||
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
|
||||
"%+d", insn->off);
|
||||
else if (sym)
|
||||
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
|
||||
"%+d#%s", insn->off, sym->name);
|
||||
else
|
||||
|
@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
|
|||
unsigned long address = dd->address_call_base + insn->imm;
|
||||
struct kernel_sym *sym;
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL &&
|
||||
(__u32) insn->imm < dd->nr_jited_ksyms)
|
||||
address = dd->jited_ksyms[insn->imm];
|
||||
|
||||
sym = kernel_syms_search(dd, address);
|
||||
if (insn->src_reg == BPF_PSEUDO_CALL)
|
||||
return print_call_pcrel(dd, sym, address, insn);
|
||||
|
|
|
@ -49,11 +49,14 @@ struct dump_data {
|
|||
unsigned long address_call_base;
|
||||
struct kernel_sym *sym_mapping;
|
||||
__u32 sym_count;
|
||||
__u64 *jited_ksyms;
|
||||
__u32 nr_jited_ksyms;
|
||||
char scratch_buff[SYM_MAX_NAME + 8];
|
||||
};
|
||||
|
||||
void kernel_syms_load(struct dump_data *dd);
|
||||
void kernel_syms_destroy(struct dump_data *dd);
|
||||
struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
|
||||
void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
|
||||
bool opcodes);
|
||||
void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
|
||||
|
|
|
@ -97,6 +97,7 @@ enum bpf_cmd {
|
|||
BPF_RAW_TRACEPOINT_OPEN,
|
||||
BPF_BTF_LOAD,
|
||||
BPF_BTF_GET_FD_BY_ID,
|
||||
BPF_TASK_FD_QUERY,
|
||||
};
|
||||
|
||||
enum bpf_map_type {
|
||||
|
@ -141,6 +142,7 @@ enum bpf_prog_type {
|
|||
BPF_PROG_TYPE_SK_MSG,
|
||||
BPF_PROG_TYPE_RAW_TRACEPOINT,
|
||||
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
|
||||
BPF_PROG_TYPE_LWT_SEG6LOCAL,
|
||||
};
|
||||
|
||||
enum bpf_attach_type {
|
||||
|
@ -284,8 +286,8 @@ union bpf_attr {
|
|||
char map_name[BPF_OBJ_NAME_LEN];
|
||||
__u32 map_ifindex; /* ifindex of netdev to create on */
|
||||
__u32 btf_fd; /* fd pointing to a BTF type data */
|
||||
__u32 btf_key_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_id; /* BTF type_id of the value */
|
||||
__u32 btf_key_type_id; /* BTF type_id of the key */
|
||||
__u32 btf_value_type_id; /* BTF type_id of the value */
|
||||
};
|
||||
|
||||
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
|
||||
|
@ -379,6 +381,22 @@ union bpf_attr {
|
|||
__u32 btf_log_size;
|
||||
__u32 btf_log_level;
|
||||
};
|
||||
|
||||
struct {
|
||||
__u32 pid; /* input: pid */
|
||||
__u32 fd; /* input: fd */
|
||||
__u32 flags; /* input: flags */
|
||||
__u32 buf_len; /* input/output: buf len */
|
||||
__aligned_u64 buf; /* input/output:
|
||||
* tp_name for tracepoint
|
||||
* symbol for kprobe
|
||||
* filename for uprobe
|
||||
*/
|
||||
__u32 prog_id; /* output: prod_id */
|
||||
__u32 fd_type; /* output: BPF_FD_TYPE_* */
|
||||
__u64 probe_offset; /* output: probe_offset */
|
||||
__u64 probe_addr; /* output: probe_addr */
|
||||
} task_fd_query;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
/* The description below is an attempt at providing documentation to eBPF
|
||||
|
@ -1902,6 +1920,90 @@ union bpf_attr {
|
|||
* egress otherwise). This is the only flag supported for now.
|
||||
* Return
|
||||
* **SK_PASS** on success, or **SK_DROP** on error.
|
||||
*
|
||||
* int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
|
||||
* Description
|
||||
* Encapsulate the packet associated to *skb* within a Layer 3
|
||||
* protocol header. This header is provided in the buffer at
|
||||
* address *hdr*, with *len* its size in bytes. *type* indicates
|
||||
* the protocol of the header and can be one of:
|
||||
*
|
||||
* **BPF_LWT_ENCAP_SEG6**
|
||||
* IPv6 encapsulation with Segment Routing Header
|
||||
* (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
|
||||
* the IPv6 header is computed by the kernel.
|
||||
* **BPF_LWT_ENCAP_SEG6_INLINE**
|
||||
* Only works if *skb* contains an IPv6 packet. Insert a
|
||||
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
|
||||
* the IPv6 header.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
|
||||
* Description
|
||||
* Store *len* bytes from address *from* into the packet
|
||||
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
|
||||
* inside the outermost IPv6 Segment Routing Header can be
|
||||
* modified through this helper.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
|
||||
* Description
|
||||
* Adjust the size allocated to TLVs in the outermost IPv6
|
||||
* Segment Routing Header contained in the packet associated to
|
||||
* *skb*, at position *offset* by *delta* bytes. Only offsets
|
||||
* after the segments are accepted. *delta* can be as well
|
||||
* positive (growing) as negative (shrinking).
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*
|
||||
* int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
|
||||
* Description
|
||||
* Apply an IPv6 Segment Routing action of type *action* to the
|
||||
* packet associated to *skb*. Each action takes a parameter
|
||||
* contained at address *param*, and of length *param_len* bytes.
|
||||
* *action* can be one of:
|
||||
*
|
||||
* **SEG6_LOCAL_ACTION_END_X**
|
||||
* End.X action: Endpoint with Layer-3 cross-connect.
|
||||
* Type of *param*: **struct in6_addr**.
|
||||
* **SEG6_LOCAL_ACTION_END_T**
|
||||
* End.T action: Endpoint with specific IPv6 table lookup.
|
||||
* Type of *param*: **int**.
|
||||
* **SEG6_LOCAL_ACTION_END_B6**
|
||||
* End.B6 action: Endpoint bound to an SRv6 policy.
|
||||
* Type of param: **struct ipv6_sr_hdr**.
|
||||
* **SEG6_LOCAL_ACTION_END_B6_ENCAP**
|
||||
* End.B6.Encap action: Endpoint bound to an SRv6
|
||||
* encapsulation policy.
|
||||
* Type of param: **struct ipv6_sr_hdr**.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlaying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
* performed again, if the helper is used in combination with
|
||||
* direct packet access.
|
||||
* Return
|
||||
* 0 on success, or a negative error in case of failure.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -1976,7 +2078,11 @@ union bpf_attr {
|
|||
FN(fib_lookup), \
|
||||
FN(sock_hash_update), \
|
||||
FN(msg_redirect_hash), \
|
||||
FN(sk_redirect_hash),
|
||||
FN(sk_redirect_hash), \
|
||||
FN(lwt_push_encap), \
|
||||
FN(lwt_seg6_store_bytes), \
|
||||
FN(lwt_seg6_adjust_srh), \
|
||||
FN(lwt_seg6_action),
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
* function eBPF program intends to call
|
||||
|
@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
|
|||
BPF_HDR_START_NET,
|
||||
};
|
||||
|
||||
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
|
||||
enum bpf_lwt_encap_mode {
|
||||
BPF_LWT_ENCAP_SEG6,
|
||||
BPF_LWT_ENCAP_SEG6_INLINE
|
||||
};
|
||||
|
||||
/* user accessible mirror of in-kernel sk_buff.
|
||||
* new fields can only be added to the end of this structure
|
||||
*/
|
||||
|
@ -2176,6 +2288,14 @@ enum sk_action {
|
|||
struct sk_msg_md {
|
||||
void *data;
|
||||
void *data_end;
|
||||
|
||||
__u32 family;
|
||||
__u32 remote_ip4; /* Stored in network byte order */
|
||||
__u32 local_ip4; /* Stored in network byte order */
|
||||
__u32 remote_ip6[4]; /* Stored in network byte order */
|
||||
__u32 local_ip6[4]; /* Stored in network byte order */
|
||||
__u32 remote_port; /* Stored in network byte order */
|
||||
__u32 local_port; /* stored in host byte order */
|
||||
};
|
||||
|
||||
#define BPF_TAG_SIZE 8
|
||||
|
@ -2197,6 +2317,10 @@ struct bpf_prog_info {
|
|||
__u32 gpl_compatible:1;
|
||||
__u64 netns_dev;
|
||||
__u64 netns_ino;
|
||||
__u32 nr_jited_ksyms;
|
||||
__u32 nr_jited_func_lens;
|
||||
__aligned_u64 jited_ksyms;
|
||||
__aligned_u64 jited_func_lens;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_map_info {
|
||||
|
@ -2211,8 +2335,8 @@ struct bpf_map_info {
|
|||
__u64 netns_dev;
|
||||
__u64 netns_ino;
|
||||
__u32 btf_id;
|
||||
__u32 btf_key_id;
|
||||
__u32 btf_value_id;
|
||||
__u32 btf_key_type_id;
|
||||
__u32 btf_value_type_id;
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
struct bpf_btf_info {
|
||||
|
@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
|
|||
__u8 dmac[6]; /* ETH_ALEN */
|
||||
};
|
||||
|
||||
enum bpf_task_fd_type {
|
||||
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_TRACEPOINT, /* tp name */
|
||||
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
|
||||
BPF_FD_TYPE_UPROBE, /* filename + offset */
|
||||
BPF_FD_TYPE_URETPROBE, /* filename + offset */
|
||||
};
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
|
@ -12,42 +12,29 @@ struct btf_header {
|
|||
__u16 magic;
|
||||
__u8 version;
|
||||
__u8 flags;
|
||||
|
||||
__u32 parent_label;
|
||||
__u32 parent_name;
|
||||
__u32 hdr_len;
|
||||
|
||||
/* All offsets are in bytes relative to the end of this header */
|
||||
__u32 label_off; /* offset of label section */
|
||||
__u32 object_off; /* offset of data object section*/
|
||||
__u32 func_off; /* offset of function section */
|
||||
__u32 type_off; /* offset of type section */
|
||||
__u32 type_len; /* length of type section */
|
||||
__u32 str_off; /* offset of string section */
|
||||
__u32 str_len; /* length of string section */
|
||||
};
|
||||
|
||||
/* Max # of type identifier */
|
||||
#define BTF_MAX_TYPE 0x7fffffff
|
||||
#define BTF_MAX_TYPE 0x0000ffff
|
||||
/* Max offset into the string section */
|
||||
#define BTF_MAX_NAME_OFFSET 0x7fffffff
|
||||
#define BTF_MAX_NAME_OFFSET 0x0000ffff
|
||||
/* Max # of struct/union/enum members or func args */
|
||||
#define BTF_MAX_VLEN 0xffff
|
||||
|
||||
/* The type id is referring to a parent BTF */
|
||||
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
|
||||
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
|
||||
|
||||
/* String is in the ELF string section */
|
||||
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
|
||||
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
|
||||
|
||||
struct btf_type {
|
||||
__u32 name_off;
|
||||
/* "info" bits arrangement
|
||||
* bits 0-15: vlen (e.g. # of struct's members)
|
||||
* bits 16-23: unused
|
||||
* bits 24-28: kind (e.g. int, ptr, array...etc)
|
||||
* bits 29-30: unused
|
||||
* bits 31: root
|
||||
* bits 24-27: kind (e.g. int, ptr, array...etc)
|
||||
* bits 28-31: unused
|
||||
*/
|
||||
__u32 info;
|
||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
||||
|
@ -62,8 +49,7 @@ struct btf_type {
|
|||
};
|
||||
};
|
||||
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
|
||||
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
|
||||
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
|
||||
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
|
||||
|
||||
#define BTF_KIND_UNKN 0 /* Unknown */
|
||||
|
@ -88,15 +74,14 @@ struct btf_type {
|
|||
/* BTF_KIND_INT is followed by a u32 and the following
|
||||
* is the 32 bits arrangement:
|
||||
*/
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
|
||||
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
|
||||
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
|
||||
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
|
||||
|
||||
/* Attributes stored in the BTF_INT_ENCODING */
|
||||
#define BTF_INT_SIGNED 0x1
|
||||
#define BTF_INT_CHAR 0x2
|
||||
#define BTF_INT_BOOL 0x4
|
||||
#define BTF_INT_VARARGS 0x8
|
||||
#define BTF_INT_SIGNED (1 << 0)
|
||||
#define BTF_INT_CHAR (1 << 1)
|
||||
#define BTF_INT_BOOL (1 << 2)
|
||||
|
||||
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
|
||||
* The exact number of btf_enum is stored in the vlen (of the
|
||||
|
|
|
@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
|
|||
min(name_len, BPF_OBJ_NAME_LEN - 1));
|
||||
attr.numa_node = create_attr->numa_node;
|
||||
attr.btf_fd = create_attr->btf_fd;
|
||||
attr.btf_key_id = create_attr->btf_key_id;
|
||||
attr.btf_value_id = create_attr->btf_value_id;
|
||||
attr.btf_key_type_id = create_attr->btf_key_type_id;
|
||||
attr.btf_value_type_id = create_attr->btf_value_type_id;
|
||||
attr.map_ifindex = create_attr->map_ifindex;
|
||||
|
||||
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
|
||||
|
@ -643,3 +643,26 @@ retry:
|
|||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
|
||||
__u64 *probe_addr)
|
||||
{
|
||||
union bpf_attr attr = {};
|
||||
int err;
|
||||
|
||||
attr.task_fd_query.pid = pid;
|
||||
attr.task_fd_query.fd = fd;
|
||||
attr.task_fd_query.flags = flags;
|
||||
attr.task_fd_query.buf = ptr_to_u64(buf);
|
||||
attr.task_fd_query.buf_len = *buf_len;
|
||||
|
||||
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
|
||||
*buf_len = attr.task_fd_query.buf_len;
|
||||
*prog_id = attr.task_fd_query.prog_id;
|
||||
*fd_type = attr.task_fd_query.fd_type;
|
||||
*probe_offset = attr.task_fd_query.probe_offset;
|
||||
*probe_addr = attr.task_fd_query.probe_addr;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
|
|
@ -36,8 +36,8 @@ struct bpf_create_map_attr {
|
|||
__u32 max_entries;
|
||||
__u32 numa_node;
|
||||
__u32 btf_fd;
|
||||
__u32 btf_key_id;
|
||||
__u32 btf_value_id;
|
||||
__u32 btf_key_type_id;
|
||||
__u32 btf_value_type_id;
|
||||
__u32 map_ifindex;
|
||||
};
|
||||
|
||||
|
@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
|
|||
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
|
||||
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
|
||||
bool do_log);
|
||||
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
|
||||
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
|
||||
__u64 *probe_addr);
|
||||
#endif
|
||||
|
|
|
@ -35,9 +35,8 @@ struct btf {
|
|||
|
||||
static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
|
||||
{
|
||||
if (!BTF_STR_TBL_ELF_ID(offset) &&
|
||||
BTF_STR_OFFSET(offset) < btf->hdr->str_len)
|
||||
return &btf->strings[BTF_STR_OFFSET(offset)];
|
||||
if (offset < btf->hdr->str_len)
|
||||
return &btf->strings[offset];
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -216,8 +216,8 @@ struct bpf_map {
|
|||
size_t offset;
|
||||
int map_ifindex;
|
||||
struct bpf_map_def def;
|
||||
uint32_t btf_key_id;
|
||||
uint32_t btf_value_id;
|
||||
uint32_t btf_key_type_id;
|
||||
uint32_t btf_value_type_id;
|
||||
void *priv;
|
||||
bpf_map_clear_priv_t clear_priv;
|
||||
};
|
||||
|
@ -1042,8 +1042,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
|
|||
}
|
||||
|
||||
if (def->key_size != key_size) {
|
||||
pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
|
||||
map->name, name, key_size, def->key_size);
|
||||
pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n",
|
||||
map->name, name, (unsigned int)key_size, def->key_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -1069,13 +1069,13 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
|
|||
}
|
||||
|
||||
if (def->value_size != value_size) {
|
||||
pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
|
||||
map->name, name, value_size, def->value_size);
|
||||
pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n",
|
||||
map->name, name, (unsigned int)value_size, def->value_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
map->btf_key_id = key_id;
|
||||
map->btf_value_id = value_id;
|
||||
map->btf_key_type_id = key_id;
|
||||
map->btf_value_type_id = value_id;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj)
|
|||
create_attr.value_size = def->value_size;
|
||||
create_attr.max_entries = def->max_entries;
|
||||
create_attr.btf_fd = 0;
|
||||
create_attr.btf_key_id = 0;
|
||||
create_attr.btf_value_id = 0;
|
||||
create_attr.btf_key_type_id = 0;
|
||||
create_attr.btf_value_type_id = 0;
|
||||
|
||||
if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
|
||||
create_attr.btf_fd = btf__fd(obj->btf);
|
||||
create_attr.btf_key_id = map->btf_key_id;
|
||||
create_attr.btf_value_id = map->btf_value_id;
|
||||
create_attr.btf_key_type_id = map->btf_key_type_id;
|
||||
create_attr.btf_value_type_id = map->btf_value_type_id;
|
||||
}
|
||||
|
||||
*pfd = bpf_create_map_xattr(&create_attr);
|
||||
if (*pfd < 0 && create_attr.btf_key_id) {
|
||||
if (*pfd < 0 && create_attr.btf_key_type_id) {
|
||||
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
|
||||
map->name, strerror(errno), errno);
|
||||
create_attr.btf_fd = 0;
|
||||
create_attr.btf_key_id = 0;
|
||||
create_attr.btf_value_id = 0;
|
||||
map->btf_key_id = 0;
|
||||
map->btf_value_id = 0;
|
||||
create_attr.btf_key_type_id = 0;
|
||||
create_attr.btf_value_type_id = 0;
|
||||
map->btf_key_type_id = 0;
|
||||
map->btf_value_type_id = 0;
|
||||
*pfd = bpf_create_map_xattr(&create_attr);
|
||||
}
|
||||
|
||||
|
@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
|
|||
case BPF_PROG_TYPE_LWT_IN:
|
||||
case BPF_PROG_TYPE_LWT_OUT:
|
||||
case BPF_PROG_TYPE_LWT_XMIT:
|
||||
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
||||
case BPF_PROG_TYPE_SOCK_OPS:
|
||||
case BPF_PROG_TYPE_SK_SKB:
|
||||
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
||||
|
@ -2085,14 +2086,14 @@ const char *bpf_map__name(struct bpf_map *map)
|
|||
return map ? map->name : NULL;
|
||||
}
|
||||
|
||||
uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
|
||||
uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
|
||||
{
|
||||
return map ? map->btf_key_id : 0;
|
||||
return map ? map->btf_key_type_id : 0;
|
||||
}
|
||||
|
||||
uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
|
||||
uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
|
||||
{
|
||||
return map ? map->btf_value_id : 0;
|
||||
return map ? map->btf_value_type_id : 0;
|
||||
}
|
||||
|
||||
int bpf_map__set_priv(struct bpf_map *map, void *priv,
|
||||
|
|
|
@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
|
|||
int bpf_map__fd(struct bpf_map *map);
|
||||
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
|
||||
const char *bpf_map__name(struct bpf_map *map);
|
||||
uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
|
||||
uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
|
||||
uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
|
||||
uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
|
||||
|
||||
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
|
||||
int bpf_map__set_priv(struct bpf_map *map, void *priv,
|
||||
|
|
|
@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
|
|||
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
|
||||
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
|
||||
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
|
||||
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
|
||||
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
|
||||
test_lwt_seg6local.o
|
||||
|
||||
# Order correspond to 'make run_tests' order
|
||||
TEST_PROGS := test_kmod.sh \
|
||||
|
@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
|
|||
test_xdp_meta.sh \
|
||||
test_offload.py \
|
||||
test_sock_addr.sh \
|
||||
test_tunnel.sh
|
||||
test_tunnel.sh \
|
||||
test_lwt_seg6local.sh
|
||||
|
||||
# Compile but not part of 'make run_tests'
|
||||
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
|
||||
|
@ -84,7 +86,17 @@ else
|
|||
CPU ?= generic
|
||||
endif
|
||||
|
||||
# Get Clang's default includes on this system, as opposed to those seen by
|
||||
# '-target bpf'. This fixes "missing" files on some architectures/distros,
|
||||
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
|
||||
#
|
||||
# Use '-idirafter': Don't interfere with include mechanics except where the
|
||||
# build would have failed anyways.
|
||||
CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
|
||||
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
|
||||
|
||||
CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
|
||||
$(CLANG_SYS_INCLUDES) \
|
||||
-Wno-compare-distinct-pointer-types
|
||||
|
||||
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
|
||||
|
|
|
@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
|
|||
static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
|
||||
int plen, __u32 flags) =
|
||||
(void *) BPF_FUNC_fib_lookup;
|
||||
static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
|
||||
unsigned int len) =
|
||||
(void *) BPF_FUNC_lwt_push_encap;
|
||||
static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
|
||||
void *from, unsigned int len) =
|
||||
(void *) BPF_FUNC_lwt_seg6_store_bytes;
|
||||
static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
|
||||
unsigned int param_len) =
|
||||
(void *) BPF_FUNC_lwt_seg6_action;
|
||||
static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
|
||||
unsigned int len) =
|
||||
(void *) BPF_FUNC_lwt_seg6_adjust_srh;
|
||||
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
|
|
|
@ -113,22 +113,25 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE];
|
|||
static struct btf_header hdr_tmpl = {
|
||||
.magic = BTF_MAGIC,
|
||||
.version = BTF_VERSION,
|
||||
.hdr_len = sizeof(struct btf_header),
|
||||
};
|
||||
|
||||
struct btf_raw_test {
|
||||
const char *descr;
|
||||
const char *str_sec;
|
||||
const char *map_name;
|
||||
const char *err_str;
|
||||
__u32 raw_types[MAX_NR_RAW_TYPES];
|
||||
__u32 str_sec_size;
|
||||
enum bpf_map_type map_type;
|
||||
__u32 key_size;
|
||||
__u32 value_size;
|
||||
__u32 key_id;
|
||||
__u32 value_id;
|
||||
__u32 key_type_id;
|
||||
__u32 value_type_id;
|
||||
__u32 max_entries;
|
||||
bool btf_load_err;
|
||||
bool map_create_err;
|
||||
int hdr_len_delta;
|
||||
int type_off_delta;
|
||||
int str_off_delta;
|
||||
int str_len_delta;
|
||||
|
@ -141,8 +144,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
* };
|
||||
*
|
||||
* struct A {
|
||||
* int m;
|
||||
* unsigned long long n;
|
||||
* unsigned long long m;
|
||||
* int n;
|
||||
* char o;
|
||||
* [3 bytes hole]
|
||||
* int p[8];
|
||||
|
@ -163,8 +166,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
|
||||
/* struct A { */ /* [5] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
|
||||
BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
|
||||
|
@ -172,6 +175,7 @@ static struct btf_raw_test raw_tests[] = {
|
|||
/* } */
|
||||
/* int[4][8] */
|
||||
BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
|
||||
/* enum E */ /* [7] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
|
||||
BTF_ENUM_ENC(NAME_TBD, 0),
|
||||
BTF_ENUM_ENC(NAME_TBD, 1),
|
||||
|
@ -183,8 +187,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "struct_test1_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 180,
|
||||
.key_id = 1,
|
||||
.value_id = 5,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 5,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
|
@ -238,8 +242,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "struct_test2_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 68,
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
|
@ -258,7 +262,7 @@ static struct btf_raw_test raw_tests[] = {
|
|||
/* struct A { */ /* [2] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
|
||||
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
|
||||
BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
|
||||
/* } */
|
||||
BTF_END_RAW,
|
||||
},
|
||||
|
@ -268,10 +272,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "size_check1_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 1,
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Member exceeds struct_size",
|
||||
},
|
||||
|
||||
/* Test member exeeds the size of struct
|
||||
|
@ -301,11 +306,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "size_check2_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 1,
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
|
||||
.err_str = "Member exceeds struct_size",
|
||||
},
|
||||
|
||||
/* Test member exeeds the size of struct
|
||||
|
@ -335,10 +340,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "size_check3_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 1,
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Member exceeds struct_size",
|
||||
},
|
||||
|
||||
/* Test member exceeds the size of struct
|
||||
|
@ -376,10 +382,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "size_check4_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 1,
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Member exceeds struct_size",
|
||||
},
|
||||
|
||||
/* typedef const void * const_void_ptr;
|
||||
|
@ -411,8 +418,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "void_test1_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *),
|
||||
.key_id = 1,
|
||||
.value_id = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 4,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
|
@ -440,10 +447,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "void_test2_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *),
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid member",
|
||||
},
|
||||
|
||||
/* typedef const void * const_void_ptr;
|
||||
|
@ -458,9 +466,9 @@ static struct btf_raw_test raw_tests[] = {
|
|||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
|
||||
/* const void* */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
|
||||
/* typedef const void * const_void_ptr */
|
||||
/* typedef const void * const_void_ptr */ /* [4] */
|
||||
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
|
||||
/* const_void_ptr[4] */ /* [4] */
|
||||
/* const_void_ptr[4] */ /* [5] */
|
||||
BTF_TYPE_ARRAY_ENC(3, 1, 4),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
|
@ -470,8 +478,8 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "void_test3_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *) * 4,
|
||||
.key_id = 1,
|
||||
.value_id = 4,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 4,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
|
@ -493,10 +501,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "void_test4_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *) * 4,
|
||||
.key_id = 1,
|
||||
.value_id = 3,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 3,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid elem",
|
||||
},
|
||||
|
||||
/* Array_A <------------------+
|
||||
|
@ -523,10 +532,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test1_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(sizeof(int) * 8),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
/* typedef is _before_ the BTF type of Array_A and Array_B
|
||||
|
@ -551,7 +561,6 @@ static struct btf_raw_test raw_tests[] = {
|
|||
BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
|
||||
/* Array_B */
|
||||
BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
|
||||
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "\0int_array\0",
|
||||
|
@ -560,10 +569,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test2_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(sizeof(int) * 8),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
/* Array_A <------------------+
|
||||
|
@ -582,7 +592,6 @@ static struct btf_raw_test raw_tests[] = {
|
|||
BTF_TYPE_ARRAY_ENC(3, 1, 8),
|
||||
/* Array_B */ /* [3] */
|
||||
BTF_TYPE_ARRAY_ENC(2, 1, 8),
|
||||
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
|
@ -591,10 +600,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test3_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(sizeof(int) * 8),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
/* typedef is _between_ the BTF type of Array_A and Array_B
|
||||
|
@ -627,10 +637,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test4_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(sizeof(int) * 8),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
/* typedef struct B Struct_B
|
||||
|
@ -668,10 +679,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test5_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 8,
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
/* struct A {
|
||||
|
@ -697,10 +709,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test6_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = 8,
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -724,10 +737,11 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test7_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
{
|
||||
|
@ -759,14 +773,73 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "loop_test8_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(void *),
|
||||
.key_id = 1,
|
||||
.value_id = 2,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 2,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Loop detected",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "type_off == str_off",
|
||||
.descr = "string section does not end with null",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "\0int",
|
||||
.str_sec_size = sizeof("\0int") - 1,
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid string section",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "empty string section",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = 0,
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid string section",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "empty type section",
|
||||
.raw_types = {
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "\0int",
|
||||
.str_sec_size = sizeof("\0int"),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "No type found",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "btf_header test. Longer hdr_len",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -778,15 +851,16 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
|
||||
.hdr_len_delta = 4,
|
||||
.err_str = "Unsupported btf_header",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "Unaligned type_off",
|
||||
.descr = "btf_header test. Gap between hdr and type",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -798,15 +872,16 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.type_off_delta = 1,
|
||||
.type_off_delta = 4,
|
||||
.err_str = "Unsupported section found",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "str_off beyonds btf size",
|
||||
.descr = "btf_header test. Gap between type and str",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -818,15 +893,16 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.str_off_delta = sizeof("\0int") + 1,
|
||||
.str_off_delta = 4,
|
||||
.err_str = "Unsupported section found",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "str_len beyonds btf size",
|
||||
.descr = "btf_header test. Overlap between type and str",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -838,15 +914,16 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.str_len_delta = 1,
|
||||
.str_off_delta = -4,
|
||||
.err_str = "Section overlap found",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "String section does not end with null",
|
||||
.descr = "btf_header test. Larger BTF size",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -858,15 +935,16 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.str_len_delta = -1,
|
||||
.str_len_delta = -4,
|
||||
.err_str = "Unsupported section found",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "Empty string section",
|
||||
.descr = "btf_header test. Smaller BTF size",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
|
||||
|
@ -878,11 +956,267 @@ static struct btf_raw_test raw_tests[] = {
|
|||
.map_name = "hdr_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_id = 1,
|
||||
.value_id = 1,
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.str_len_delta = 0 - (int)sizeof("\0int"),
|
||||
.str_len_delta = 4,
|
||||
.err_str = "Total section length too long",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type/elem_type \"int\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 1, 16),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type/elem_type \"const int\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(3, 3, 16),
|
||||
/* CONST type_id=1 */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type \"const int:31\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int:31 */ /* [2] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
|
||||
/* int[16] */ /* [3] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 4, 16),
|
||||
/* CONST type_id=2 */ /* [4] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid index",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. elem_type \"const int:31\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int:31 */ /* [2] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
|
||||
/* int[16] */ /* [3] */
|
||||
BTF_TYPE_ARRAY_ENC(4, 1, 16),
|
||||
/* CONST type_id=2 */ /* [4] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid array of int",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type \"void\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 0, 16),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid index",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type \"const void\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(1, 3, 16),
|
||||
/* CONST type_id=0 (void) */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid index",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. elem_type \"const void\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* int[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(3, 1, 16),
|
||||
/* CONST type_id=0 (void) */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid elem",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. elem_type \"const void *\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* const void *[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(3, 1, 16),
|
||||
/* CONST type_id=4 */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
|
||||
/* void* */ /* [4] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "array test. index_type \"const void *\"",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
/* const void *[16] */ /* [2] */
|
||||
BTF_TYPE_ARRAY_ENC(3, 3, 16),
|
||||
/* CONST type_id=4 */ /* [3] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
|
||||
/* void* */ /* [4] */
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid index",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "int test. invalid int_data",
|
||||
.raw_types = {
|
||||
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
|
||||
0x10000000,
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid int_data",
|
||||
},
|
||||
|
||||
{
|
||||
.descr = "invalid BTF_INFO",
|
||||
.raw_types = {
|
||||
/* int */ /* [1] */
|
||||
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
|
||||
BTF_TYPE_ENC(0, 0x10000000, 4),
|
||||
BTF_END_RAW,
|
||||
},
|
||||
.str_sec = "",
|
||||
.str_sec_size = sizeof(""),
|
||||
.map_type = BPF_MAP_TYPE_ARRAY,
|
||||
.map_name = "array_test_map",
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(int),
|
||||
.key_type_id = 1,
|
||||
.value_type_id = 1,
|
||||
.max_entries = 4,
|
||||
.btf_load_err = true,
|
||||
.err_str = "Invalid btf_info",
|
||||
},
|
||||
|
||||
}; /* struct btf_raw_test raw_tests[] */
|
||||
|
@ -951,6 +1285,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
|
|||
memcpy(raw_btf + offset, str, str_sec_size);
|
||||
|
||||
ret_hdr = (struct btf_header *)raw_btf;
|
||||
ret_hdr->type_len = type_sec_size;
|
||||
ret_hdr->str_off = type_sec_size;
|
||||
ret_hdr->str_len = str_sec_size;
|
||||
|
||||
|
@ -981,6 +1316,7 @@ static int do_test_raw(unsigned int test_num)
|
|||
|
||||
hdr = raw_btf;
|
||||
|
||||
hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
|
||||
hdr->type_off = (int)hdr->type_off + test->type_off_delta;
|
||||
hdr->str_off = (int)hdr->str_off + test->str_off_delta;
|
||||
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
|
||||
|
@ -992,8 +1328,13 @@ static int do_test_raw(unsigned int test_num)
|
|||
free(raw_btf);
|
||||
|
||||
err = ((btf_fd == -1) != test->btf_load_err);
|
||||
CHECK(err, "btf_fd:%d test->btf_load_err:%u",
|
||||
btf_fd, test->btf_load_err);
|
||||
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
|
||||
btf_fd, test->btf_load_err) ||
|
||||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
|
||||
"expected err_str:%s", test->err_str)) {
|
||||
err = -1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (err || btf_fd == -1)
|
||||
goto done;
|
||||
|
@ -1004,8 +1345,8 @@ static int do_test_raw(unsigned int test_num)
|
|||
create_attr.value_size = test->value_size;
|
||||
create_attr.max_entries = test->max_entries;
|
||||
create_attr.btf_fd = btf_fd;
|
||||
create_attr.btf_key_id = test->key_id;
|
||||
create_attr.btf_value_id = test->value_id;
|
||||
create_attr.btf_key_type_id = test->key_type_id;
|
||||
create_attr.btf_value_type_id = test->value_type_id;
|
||||
|
||||
map_fd = bpf_create_map_xattr(&create_attr);
|
||||
|
||||
|
@ -1267,8 +1608,8 @@ static int test_btf_id(unsigned int test_num)
|
|||
create_attr.value_size = sizeof(unsigned int);
|
||||
create_attr.max_entries = 4;
|
||||
create_attr.btf_fd = btf_fd[0];
|
||||
create_attr.btf_key_id = 1;
|
||||
create_attr.btf_value_id = 2;
|
||||
create_attr.btf_key_type_id = 1;
|
||||
create_attr.btf_value_type_id = 2;
|
||||
|
||||
map_fd = bpf_create_map_xattr(&create_attr);
|
||||
if (CHECK(map_fd == -1, "errno:%d", errno)) {
|
||||
|
@ -1279,10 +1620,10 @@ static int test_btf_id(unsigned int test_num)
|
|||
info_len = sizeof(map_info);
|
||||
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
|
||||
if (CHECK(err || map_info.btf_id != info[0].id ||
|
||||
map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
|
||||
"err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
|
||||
err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
|
||||
map_info.btf_value_id)) {
|
||||
map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
|
||||
"err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
|
||||
err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
|
||||
map_info.btf_value_type_id)) {
|
||||
err = -1;
|
||||
goto done;
|
||||
}
|
||||
|
@ -1542,10 +1883,10 @@ static int do_test_file(unsigned int test_num)
|
|||
goto done;
|
||||
}
|
||||
|
||||
err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
|
||||
err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
|
||||
!= test->btf_kv_notfound;
|
||||
if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
|
||||
bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
|
||||
if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
|
||||
bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
|
||||
test->btf_kv_notfound))
|
||||
goto done;
|
||||
|
||||
|
@ -1615,7 +1956,7 @@ static struct btf_raw_test pprint_test = {
|
|||
/* 28 bits */ /* [7] */
|
||||
BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
|
||||
/* uint8_t[8] */ /* [8] */
|
||||
BTF_TYPE_ARRAY_ENC(9, 3, 8),
|
||||
BTF_TYPE_ARRAY_ENC(9, 1, 8),
|
||||
/* typedef unsigned char uint8_t */ /* [9] */
|
||||
BTF_TYPEDEF_ENC(NAME_TBD, 1),
|
||||
/* typedef unsigned short uint16_t */ /* [10] */
|
||||
|
@ -1654,8 +1995,8 @@ static struct btf_raw_test pprint_test = {
|
|||
.map_name = "pprint_test",
|
||||
.key_size = sizeof(unsigned int),
|
||||
.value_size = sizeof(struct pprint_mapv),
|
||||
.key_id = 3, /* unsigned int */
|
||||
.value_id = 16, /* struct pprint_mapv */
|
||||
.key_type_id = 3, /* unsigned int */
|
||||
.value_type_id = 16, /* struct pprint_mapv */
|
||||
.max_entries = 128 * 1024,
|
||||
};
|
||||
|
||||
|
@ -1712,8 +2053,8 @@ static int test_pprint(void)
|
|||
create_attr.value_size = test->value_size;
|
||||
create_attr.max_entries = test->max_entries;
|
||||
create_attr.btf_fd = btf_fd;
|
||||
create_attr.btf_key_id = test->key_id;
|
||||
create_attr.btf_value_id = test->value_id;
|
||||
create_attr.btf_key_type_id = test->key_type_id;
|
||||
create_attr.btf_value_type_id = test->value_type_id;
|
||||
|
||||
map_fd = bpf_create_map_xattr(&create_attr);
|
||||
if (CHECK(map_fd == -1, "errno:%d", errno)) {
|
||||
|
|
|
@ -0,0 +1,437 @@
|
|||
#include <stddef.h>
|
||||
#include <inttypes.h>
|
||||
#include <errno.h>
|
||||
#include <linux/seg6_local.h>
|
||||
#include <linux/bpf.h>
|
||||
#include "bpf_helpers.h"
|
||||
#include "bpf_endian.h"
|
||||
|
||||
#define bpf_printk(fmt, ...) \
|
||||
({ \
|
||||
char ____fmt[] = fmt; \
|
||||
bpf_trace_printk(____fmt, sizeof(____fmt), \
|
||||
##__VA_ARGS__); \
|
||||
})
|
||||
|
||||
/* Packet parsing state machine helpers. */
|
||||
#define cursor_advance(_cursor, _len) \
|
||||
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
|
||||
|
||||
#define SR6_FLAG_ALERT (1 << 4)
|
||||
|
||||
#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
|
||||
0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
|
||||
#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
|
||||
0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
|
||||
#define BPF_PACKET_HEADER __attribute__((packed))
|
||||
|
||||
struct ip6_t {
|
||||
unsigned int ver:4;
|
||||
unsigned int priority:8;
|
||||
unsigned int flow_label:20;
|
||||
unsigned short payload_len;
|
||||
unsigned char next_header;
|
||||
unsigned char hop_limit;
|
||||
unsigned long long src_hi;
|
||||
unsigned long long src_lo;
|
||||
unsigned long long dst_hi;
|
||||
unsigned long long dst_lo;
|
||||
} BPF_PACKET_HEADER;
|
||||
|
||||
struct ip6_addr_t {
|
||||
unsigned long long hi;
|
||||
unsigned long long lo;
|
||||
} BPF_PACKET_HEADER;
|
||||
|
||||
struct ip6_srh_t {
|
||||
unsigned char nexthdr;
|
||||
unsigned char hdrlen;
|
||||
unsigned char type;
|
||||
unsigned char segments_left;
|
||||
unsigned char first_segment;
|
||||
unsigned char flags;
|
||||
unsigned short tag;
|
||||
|
||||
struct ip6_addr_t segments[0];
|
||||
} BPF_PACKET_HEADER;
|
||||
|
||||
struct sr6_tlv_t {
|
||||
unsigned char type;
|
||||
unsigned char len;
|
||||
unsigned char value[0];
|
||||
} BPF_PACKET_HEADER;
|
||||
|
||||
__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
|
||||
{
|
||||
void *cursor, *data_end;
|
||||
struct ip6_srh_t *srh;
|
||||
struct ip6_t *ip;
|
||||
uint8_t *ipver;
|
||||
|
||||
data_end = (void *)(long)skb->data_end;
|
||||
cursor = (void *)(long)skb->data;
|
||||
ipver = (uint8_t *)cursor;
|
||||
|
||||
if ((void *)ipver + sizeof(*ipver) > data_end)
|
||||
return NULL;
|
||||
|
||||
if ((*ipver >> 4) != 6)
|
||||
return NULL;
|
||||
|
||||
ip = cursor_advance(cursor, sizeof(*ip));
|
||||
if ((void *)ip + sizeof(*ip) > data_end)
|
||||
return NULL;
|
||||
|
||||
if (ip->next_header != 43)
|
||||
return NULL;
|
||||
|
||||
srh = cursor_advance(cursor, sizeof(*srh));
|
||||
if ((void *)srh + sizeof(*srh) > data_end)
|
||||
return NULL;
|
||||
|
||||
if (srh->type != 4)
|
||||
return NULL;
|
||||
|
||||
return srh;
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
|
||||
uint32_t old_pad, uint32_t pad_off)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (new_pad != old_pad) {
|
||||
err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
|
||||
(int) new_pad - (int) old_pad);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
if (new_pad > 0) {
|
||||
char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0};
|
||||
struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
|
||||
|
||||
pad_tlv->type = SR6_TLV_PADDING;
|
||||
pad_tlv->len = new_pad - 2;
|
||||
|
||||
err = bpf_lwt_seg6_store_bytes(skb, pad_off,
|
||||
(void *)pad_tlv_buf, new_pad);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
|
||||
uint32_t *tlv_off, uint32_t *pad_size,
|
||||
uint32_t *pad_off)
|
||||
{
|
||||
uint32_t srh_off, cur_off;
|
||||
int offset_valid = 0;
|
||||
int err;
|
||||
|
||||
srh_off = (char *)srh - (char *)(long)skb->data;
|
||||
// cur_off = end of segments, start of possible TLVs
|
||||
cur_off = srh_off + sizeof(*srh) +
|
||||
sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
|
||||
|
||||
*pad_off = 0;
|
||||
|
||||
// we can only go as far as ~10 TLVs due to the BPF max stack size
|
||||
#pragma clang loop unroll(full)
|
||||
for (int i = 0; i < 10; i++) {
|
||||
struct sr6_tlv_t tlv;
|
||||
|
||||
if (cur_off == *tlv_off)
|
||||
offset_valid = 1;
|
||||
|
||||
if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
|
||||
break;
|
||||
|
||||
err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (tlv.type == SR6_TLV_PADDING) {
|
||||
*pad_size = tlv.len + sizeof(tlv);
|
||||
*pad_off = cur_off;
|
||||
|
||||
if (*tlv_off == srh_off) {
|
||||
*tlv_off = cur_off;
|
||||
offset_valid = 1;
|
||||
}
|
||||
break;
|
||||
|
||||
} else if (tlv.type == SR6_TLV_HMAC) {
|
||||
break;
|
||||
}
|
||||
|
||||
cur_off += sizeof(tlv) + tlv.len;
|
||||
} // we reached the padding or HMAC TLVs, or the end of the SRH
|
||||
|
||||
if (*pad_off == 0)
|
||||
*pad_off = cur_off;
|
||||
|
||||
if (*tlv_off == -1)
|
||||
*tlv_off = cur_off;
|
||||
else if (!offset_valid)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
|
||||
struct sr6_tlv_t *itlv, uint8_t tlv_size)
|
||||
{
|
||||
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
|
||||
uint8_t len_remaining, new_pad;
|
||||
uint32_t pad_off = 0;
|
||||
uint32_t pad_size = 0;
|
||||
uint32_t partial_srh_len;
|
||||
int err;
|
||||
|
||||
if (tlv_off != -1)
|
||||
tlv_off += srh_off;
|
||||
|
||||
if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
|
||||
return -EINVAL;
|
||||
|
||||
err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
// the following can't be moved inside update_tlv_pad because the
|
||||
// bpf verifier has some issues with it
|
||||
pad_off += sizeof(*itlv) + itlv->len;
|
||||
partial_srh_len = pad_off - srh_off;
|
||||
len_remaining = partial_srh_len % 8;
|
||||
new_pad = 8 - len_remaining;
|
||||
|
||||
if (new_pad == 1) // cannot pad for 1 byte only
|
||||
new_pad = 9;
|
||||
else if (new_pad == 8)
|
||||
new_pad = 0;
|
||||
|
||||
return update_tlv_pad(skb, new_pad, pad_size, pad_off);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
|
||||
uint32_t tlv_off)
|
||||
{
|
||||
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
|
||||
uint8_t len_remaining, new_pad;
|
||||
uint32_t partial_srh_len;
|
||||
uint32_t pad_off = 0;
|
||||
uint32_t pad_size = 0;
|
||||
struct sr6_tlv_t tlv;
|
||||
int err;
|
||||
|
||||
tlv_off += srh_off;
|
||||
|
||||
err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
pad_off -= sizeof(tlv) + tlv.len;
|
||||
partial_srh_len = pad_off - srh_off;
|
||||
len_remaining = partial_srh_len % 8;
|
||||
new_pad = 8 - len_remaining;
|
||||
if (new_pad == 1) // cannot pad for 1 byte only
|
||||
new_pad = 9;
|
||||
else if (new_pad == 8)
|
||||
new_pad = 0;
|
||||
|
||||
return update_tlv_pad(skb, new_pad, pad_size, pad_off);
|
||||
}
|
||||
|
||||
__attribute__((always_inline))
|
||||
int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
|
||||
{
|
||||
int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
|
||||
((srh->first_segment + 1) << 4);
|
||||
struct sr6_tlv_t tlv;
|
||||
|
||||
if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
|
||||
return 0;
|
||||
|
||||
if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
|
||||
struct ip6_addr_t egr_addr;
|
||||
|
||||
if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
|
||||
return 0;
|
||||
|
||||
// check if egress TLV value is correct
|
||||
if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
|
||||
ntohll(egr_addr.lo) == 0x4)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
|
||||
// fd00::4
|
||||
SEC("encap_srh")
|
||||
int __encap_srh(struct __sk_buff *skb)
|
||||
{
|
||||
unsigned long long hi = 0xfd00000000000000;
|
||||
struct ip6_addr_t *seg;
|
||||
struct ip6_srh_t *srh;
|
||||
char srh_buf[72]; // room for 4 segments
|
||||
int err;
|
||||
|
||||
srh = (struct ip6_srh_t *)srh_buf;
|
||||
srh->nexthdr = 0;
|
||||
srh->hdrlen = 8;
|
||||
srh->type = 4;
|
||||
srh->segments_left = 3;
|
||||
srh->first_segment = 3;
|
||||
srh->flags = 0;
|
||||
srh->tag = 0;
|
||||
|
||||
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
|
||||
|
||||
#pragma clang loop unroll(full)
|
||||
for (unsigned long long lo = 0; lo < 4; lo++) {
|
||||
seg->lo = htonll(4 - lo);
|
||||
seg->hi = htonll(hi);
|
||||
seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
|
||||
}
|
||||
|
||||
err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
|
||||
return BPF_REDIRECT;
|
||||
}
|
||||
|
||||
// Add an Egress TLV fc00::4, add the flag A,
|
||||
// and apply End.X action to fc42::1
|
||||
SEC("add_egr_x")
|
||||
int __add_egr_x(struct __sk_buff *skb)
|
||||
{
|
||||
unsigned long long hi = 0xfc42000000000000;
|
||||
unsigned long long lo = 0x1;
|
||||
struct ip6_srh_t *srh = get_srh(skb);
|
||||
uint8_t new_flags = SR6_FLAG_ALERT;
|
||||
struct ip6_addr_t addr;
|
||||
int err, offset;
|
||||
|
||||
if (srh == NULL)
|
||||
return BPF_DROP;
|
||||
|
||||
uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
|
||||
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
|
||||
|
||||
err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
|
||||
(struct sr6_tlv_t *)&tlv, 20);
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
|
||||
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
|
||||
err = bpf_lwt_seg6_store_bytes(skb, offset,
|
||||
(void *)&new_flags, sizeof(new_flags));
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
|
||||
addr.lo = htonll(lo);
|
||||
addr.hi = htonll(hi);
|
||||
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
|
||||
(void *)&addr, sizeof(addr));
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
return BPF_REDIRECT;
|
||||
}
|
||||
|
||||
// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
|
||||
// simple End action
|
||||
SEC("pop_egr")
|
||||
int __pop_egr(struct __sk_buff *skb)
|
||||
{
|
||||
struct ip6_srh_t *srh = get_srh(skb);
|
||||
uint16_t new_tag = bpf_htons(2442);
|
||||
uint8_t new_flags = 0;
|
||||
int err, offset;
|
||||
|
||||
if (srh == NULL)
|
||||
return BPF_DROP;
|
||||
|
||||
if (srh->flags != SR6_FLAG_ALERT)
|
||||
return BPF_DROP;
|
||||
|
||||
if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
|
||||
return BPF_DROP;
|
||||
|
||||
if (!has_egr_tlv(skb, srh))
|
||||
return BPF_DROP;
|
||||
|
||||
err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
|
||||
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
|
||||
if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
|
||||
sizeof(new_flags)))
|
||||
return BPF_DROP;
|
||||
|
||||
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
|
||||
if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
|
||||
sizeof(new_tag)))
|
||||
return BPF_DROP;
|
||||
|
||||
return BPF_OK;
|
||||
}
|
||||
|
||||
// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
|
||||
// then apply a End.T action to reach the last segment
|
||||
SEC("inspect_t")
|
||||
int __inspect_t(struct __sk_buff *skb)
|
||||
{
|
||||
struct ip6_srh_t *srh = get_srh(skb);
|
||||
int table = 117;
|
||||
int err;
|
||||
|
||||
if (srh == NULL)
|
||||
return BPF_DROP;
|
||||
|
||||
if (srh->flags != 0)
|
||||
return BPF_DROP;
|
||||
|
||||
if (srh->tag != bpf_htons(2442))
|
||||
return BPF_DROP;
|
||||
|
||||
if (srh->hdrlen != 8) // 4 segments
|
||||
return BPF_DROP;
|
||||
|
||||
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
|
||||
(void *)&table, sizeof(table));
|
||||
|
||||
if (err)
|
||||
return BPF_DROP;
|
||||
|
||||
return BPF_REDIRECT;
|
||||
}
|
||||
|
||||
char __license[] SEC("license") = "GPL";
|
|
@ -0,0 +1,140 @@
|
|||
#!/bin/bash
|
||||
# Connects 6 network namespaces through veths.
|
||||
# Each NS may have different IPv6 global scope addresses :
|
||||
# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
|
||||
# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
|
||||
# fc42::1 fd00::4
|
||||
#
|
||||
# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
|
||||
# IPv6 header with a Segment Routing Header, with segments :
|
||||
# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
|
||||
#
|
||||
# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
|
||||
# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
|
||||
# - fd00::2 : remove the TLV, change the flags, add a tag
|
||||
# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
|
||||
#
|
||||
# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
|
||||
# Each End.BPF action will validate the operations applied on the SRH by the
|
||||
# previous BPF program in the chain, otherwise the packet is dropped.
|
||||
#
|
||||
# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
|
||||
# datagram can be read on NS6 when binding to fb00::6.
|
||||
|
||||
TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
|
||||
|
||||
cleanup()
|
||||
{
|
||||
if [ "$?" = "0" ]; then
|
||||
echo "selftests: test_lwt_seg6local [PASS]";
|
||||
else
|
||||
echo "selftests: test_lwt_seg6local [FAILED]";
|
||||
fi
|
||||
|
||||
set +e
|
||||
ip netns del ns1 2> /dev/null
|
||||
ip netns del ns2 2> /dev/null
|
||||
ip netns del ns3 2> /dev/null
|
||||
ip netns del ns4 2> /dev/null
|
||||
ip netns del ns5 2> /dev/null
|
||||
ip netns del ns6 2> /dev/null
|
||||
rm -f $TMP_FILE
|
||||
}
|
||||
|
||||
set -e
|
||||
|
||||
ip netns add ns1
|
||||
ip netns add ns2
|
||||
ip netns add ns3
|
||||
ip netns add ns4
|
||||
ip netns add ns5
|
||||
ip netns add ns6
|
||||
|
||||
trap cleanup 0 2 3 6 9
|
||||
|
||||
ip link add veth1 type veth peer name veth2
|
||||
ip link add veth3 type veth peer name veth4
|
||||
ip link add veth5 type veth peer name veth6
|
||||
ip link add veth7 type veth peer name veth8
|
||||
ip link add veth9 type veth peer name veth10
|
||||
|
||||
ip link set veth1 netns ns1
|
||||
ip link set veth2 netns ns2
|
||||
ip link set veth3 netns ns2
|
||||
ip link set veth4 netns ns3
|
||||
ip link set veth5 netns ns3
|
||||
ip link set veth6 netns ns4
|
||||
ip link set veth7 netns ns4
|
||||
ip link set veth8 netns ns5
|
||||
ip link set veth9 netns ns5
|
||||
ip link set veth10 netns ns6
|
||||
|
||||
ip netns exec ns1 ip link set dev veth1 up
|
||||
ip netns exec ns2 ip link set dev veth2 up
|
||||
ip netns exec ns2 ip link set dev veth3 up
|
||||
ip netns exec ns3 ip link set dev veth4 up
|
||||
ip netns exec ns3 ip link set dev veth5 up
|
||||
ip netns exec ns4 ip link set dev veth6 up
|
||||
ip netns exec ns4 ip link set dev veth7 up
|
||||
ip netns exec ns5 ip link set dev veth8 up
|
||||
ip netns exec ns5 ip link set dev veth9 up
|
||||
ip netns exec ns6 ip link set dev veth10 up
|
||||
ip netns exec ns6 ip link set dev lo up
|
||||
|
||||
# All link scope addresses and routes required between veths
|
||||
ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
|
||||
ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
|
||||
ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
|
||||
ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
|
||||
ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
|
||||
ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
|
||||
ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
|
||||
ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
|
||||
ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
|
||||
ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
|
||||
ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
|
||||
ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
|
||||
ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
|
||||
ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
|
||||
ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
|
||||
ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
|
||||
|
||||
ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
|
||||
ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
|
||||
|
||||
ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
|
||||
ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
|
||||
|
||||
ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
|
||||
ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
|
||||
|
||||
ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
|
||||
ip netns exec ns4 ip -6 addr add fc42::1 dev lo
|
||||
ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
|
||||
|
||||
ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
|
||||
ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
|
||||
|
||||
ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
|
||||
ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
|
||||
|
||||
ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
|
||||
|
||||
ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
|
||||
ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
|
||||
ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
|
||||
|
||||
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
|
||||
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
|
||||
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
|
||||
kill -INT $!
|
||||
|
||||
if [[ $(< $TMP_FILE) != "foobar" ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
exit 0
|
|
@ -1542,6 +1542,162 @@ close_prog_noerr:
|
|||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_rawtp(void)
|
||||
{
|
||||
const char *file = "./test_get_stack_rawtp.o";
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
struct bpf_object *obj;
|
||||
int efd, err, prog_fd;
|
||||
__u32 duration = 0;
|
||||
char buf[256];
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
|
||||
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
|
||||
return;
|
||||
|
||||
efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
|
||||
if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
|
||||
goto close_prog;
|
||||
|
||||
/* query (getpid(), efd) */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_prog;
|
||||
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
strcmp(buf, "sys_enter") == 0;
|
||||
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
|
||||
fd_type, buf))
|
||||
goto close_prog;
|
||||
|
||||
/* test zero len */
|
||||
len = 0;
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
|
||||
err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter");
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
/* test empty buffer */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
|
||||
err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter");
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
/* test smaller buffer */
|
||||
len = 3;
|
||||
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
|
||||
"err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
|
||||
len == strlen("sys_enter") &&
|
||||
strcmp(buf, "sy") == 0;
|
||||
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
|
||||
goto close_prog;
|
||||
|
||||
goto close_prog_noerr;
|
||||
close_prog:
|
||||
error_cnt++;
|
||||
close_prog_noerr:
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_tp_core(const char *probe_name,
|
||||
const char *tp_name)
|
||||
{
|
||||
const char *file = "./test_tracepoint.o";
|
||||
int err, bytes, efd, prog_fd, pmu_fd;
|
||||
struct perf_event_attr attr = {};
|
||||
__u64 probe_offset, probe_addr;
|
||||
__u32 len, prog_id, fd_type;
|
||||
struct bpf_object *obj;
|
||||
__u32 duration = 0;
|
||||
char buf[256];
|
||||
|
||||
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
|
||||
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
|
||||
goto close_prog;
|
||||
|
||||
snprintf(buf, sizeof(buf),
|
||||
"/sys/kernel/debug/tracing/events/%s/id", probe_name);
|
||||
efd = open(buf, O_RDONLY, 0);
|
||||
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
|
||||
goto close_prog;
|
||||
bytes = read(efd, buf, sizeof(buf));
|
||||
close(efd);
|
||||
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
|
||||
"bytes %d errno %d\n", bytes, errno))
|
||||
goto close_prog;
|
||||
|
||||
attr.config = strtol(buf, NULL, 0);
|
||||
attr.type = PERF_TYPE_TRACEPOINT;
|
||||
attr.sample_type = PERF_SAMPLE_RAW;
|
||||
attr.sample_period = 1;
|
||||
attr.wakeup_events = 1;
|
||||
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
|
||||
0 /* cpu 0 */, -1 /* group id */,
|
||||
0 /* flags */);
|
||||
if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
|
||||
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
/* query (getpid(), pmu_fd) */
|
||||
len = sizeof(buf);
|
||||
err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
|
||||
&fd_type, &probe_offset, &probe_addr);
|
||||
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
|
||||
errno))
|
||||
goto close_pmu;
|
||||
|
||||
err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
|
||||
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
|
||||
fd_type, buf))
|
||||
goto close_pmu;
|
||||
|
||||
close(pmu_fd);
|
||||
goto close_prog_noerr;
|
||||
|
||||
close_pmu:
|
||||
close(pmu_fd);
|
||||
close_prog:
|
||||
error_cnt++;
|
||||
close_prog_noerr:
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
static void test_task_fd_query_tp(void)
|
||||
{
|
||||
test_task_fd_query_tp_core("sched/sched_switch",
|
||||
"sched_switch");
|
||||
test_task_fd_query_tp_core("syscalls/sys_enter_read",
|
||||
"sys_enter_read");
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
jit_enabled = is_jit_enabled();
|
||||
|
@ -1561,6 +1717,8 @@ int main(void)
|
|||
test_stacktrace_build_id_nmi();
|
||||
test_stacktrace_map_raw_tp();
|
||||
test_get_stack_raw_tp();
|
||||
test_task_fd_query_rawtp();
|
||||
test_task_fd_query_tp();
|
||||
|
||||
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
|
||||
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
|
||||
|
|
|
@ -1685,6 +1685,121 @@ static struct bpf_test tests[] = {
|
|||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_SKB,
|
||||
},
|
||||
{
|
||||
"valid access family in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, family)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"valid access remote_ip4 in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_ip4)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"valid access local_ip4 in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_ip4)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"valid access remote_port in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_port)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"valid access local_port in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_port)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"valid access remote_ip6 in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_ip6[0])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_ip6[1])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_ip6[2])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, remote_ip6[3])),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_SKB,
|
||||
},
|
||||
{
|
||||
"valid access local_ip6 in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_ip6[0])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_ip6[1])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_ip6[2])),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_ip6[3])),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_SKB,
|
||||
},
|
||||
{
|
||||
"invalid 64B read of family in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, family)),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.errstr = "invalid bpf_context access",
|
||||
.result = REJECT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"invalid read past end of SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, local_port) + 4),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.errstr = "R0 !read_ok",
|
||||
.result = REJECT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"invalid read offset in SK_MSG",
|
||||
.insns = {
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
|
||||
offsetof(struct sk_msg_md, family) + 1),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.errstr = "invalid bpf_context access",
|
||||
.result = REJECT,
|
||||
.prog_type = BPF_PROG_TYPE_SK_MSG,
|
||||
},
|
||||
{
|
||||
"direct packet read for SK_MSG",
|
||||
.insns = {
|
||||
|
|
|
@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
|
|||
return &syms[0];
|
||||
}
|
||||
|
||||
long ksym_get_addr(const char *name)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < sym_cnt; i++) {
|
||||
if (strcmp(syms[i].name, name) == 0)
|
||||
return syms[i].addr;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int page_size;
|
||||
static int page_cnt = 8;
|
||||
static struct perf_event_mmap_page *header;
|
||||
|
|
|
@ -11,6 +11,7 @@ struct ksym {
|
|||
|
||||
int load_kallsyms(void);
|
||||
struct ksym *ksym_search(long key);
|
||||
long ksym_get_addr(const char *name);
|
||||
|
||||
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
|
||||
|
||||
|
|
Loading…
Reference in New Issue