Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Alexei Starovoitov says:

====================
pull-request: bpf-next 2018-05-24

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) Björn Töpel cleans up AF_XDP (removes rebind, explicit cache alignment from uapi, etc).

2) David Ahern adds mtu checks to bpf_ipv{4,6}_fib_lookup() helpers.

3) Jesper Dangaard Brouer adds bulking support to ndo_xdp_xmit.

4) Jiong Wang adds support for indirect and arithmetic shifts to NFP

5) Martin KaFai Lau cleans up BTF uapi and makes the btf_header extensible.

6) Mathieu Xhonneux adds an End.BPF action to seg6local with BPF helpers allowing
   to edit/grow/shrink a SRH and apply on a packet generic SRv6 actions.

7) Sandipan Das adds support for bpf2bpf function calls in ppc64 JIT.

8) Yonghong Song adds BPF_TASK_FD_QUERY command for introspection of tracing events.

9) other misc fixes from Gustavo A. R. Silva, Sirio Balmelli, John Fastabend, and Magnus Karlsson
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2018-05-24 22:20:51 -04:00
commit 90fed9c946
90 changed files with 5215 additions and 814 deletions

View File

@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
{
unsigned int i, ctx_idx = ctx->idx;
/* Load function address into r12 */
PPC_LI64(12, func);
/* For bpf-to-bpf function calls, the callee's address is unknown
* until the last extra pass. As seen above, we use PPC_LI64() to
* load the callee's address, but this may optimize the number of
* instructions required based on the nature of the address.
*
* Since we don't want the number of instructions emitted to change,
* we pad the optimized PPC_LI64() call with NOPs to guarantee that
* we always have a five-instruction sequence, which is the maximum
* that PPC_LI64() can emit.
*/
for (i = ctx->idx - ctx_idx; i < 5; i++)
PPC_NOP();
#ifdef PPC64_ELF_ABI_v1
/* func points to the function descriptor */
PPC_LI64(b2p[TMP_REG_2], func);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
/* ... and move it to LR */
PPC_MTLR(b2p[TMP_REG_1]);
/*
* Load TOC from function descriptor at offset 8.
* We can clobber r2 since we get called through a
* function pointer (so caller will save/restore r2)
* and since we don't use a TOC ourself.
*/
PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
#else
/* We can clobber r12 */
PPC_FUNC_ADDR(12, func);
PPC_MTLR(12);
PPC_BPF_LL(2, 12, 8);
/* Load actual entry point from function descriptor */
PPC_BPF_LL(12, 12, 0);
#endif
PPC_MTLR(12);
PPC_BLRL();
}
@ -256,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32
/* Assemble the body code between the prologue & epilogue */
static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
struct codegen_context *ctx,
u32 *addrs)
u32 *addrs, bool extra_pass)
{
const struct bpf_insn *insn = fp->insnsi;
int flen = fp->len;
@ -712,11 +724,25 @@ emit_clear:
break;
/*
* Call kernel helper
* Call kernel helper or bpf function
*/
case BPF_JMP | BPF_CALL:
ctx->seen |= SEEN_FUNC;
func = (u8 *) __bpf_call_base + imm;
/* bpf function call */
if (insn[i].src_reg == BPF_PSEUDO_CALL)
if (!extra_pass)
func = NULL;
else if (fp->aux->func && off < fp->aux->func_cnt)
/* use the subprog id from the off
* field to lookup the callee address
*/
func = (u8 *) fp->aux->func[off]->bpf_func;
else
return -EINVAL;
/* kernel helper call */
else
func = (u8 *) __bpf_call_base + imm;
bpf_jit_emit_func_call(image, ctx, (u64)func);
@ -864,6 +890,14 @@ cond_branch:
return 0;
}
struct powerpc64_jit_data {
struct bpf_binary_header *header;
u32 *addrs;
u8 *image;
u32 proglen;
struct codegen_context ctx;
};
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
{
u32 proglen;
@ -871,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
u8 *image = NULL;
u32 *code_base;
u32 *addrs;
struct powerpc64_jit_data *jit_data;
struct codegen_context cgctx;
int pass;
int flen;
@ -878,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
struct bpf_prog *org_fp = fp;
struct bpf_prog *tmp_fp;
bool bpf_blinded = false;
bool extra_pass = false;
if (!fp->jit_requested)
return org_fp;
@ -891,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp = tmp_fp;
}
jit_data = fp->aux->jit_data;
if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
if (!jit_data) {
fp = org_fp;
goto out;
}
fp->aux->jit_data = jit_data;
}
flen = fp->len;
addrs = jit_data->addrs;
if (addrs) {
cgctx = jit_data->ctx;
image = jit_data->image;
bpf_hdr = jit_data->header;
proglen = jit_data->proglen;
alloclen = proglen + FUNCTION_DESCR_SIZE;
extra_pass = true;
goto skip_init_ctx;
}
addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
if (addrs == NULL) {
fp = org_fp;
goto out;
goto out_addrs;
}
memset(&cgctx, 0, sizeof(struct codegen_context));
@ -904,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
/* Scouting faux-generate pass 0 */
if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
/* We hit something illegal or unsupported. */
fp = org_fp;
goto out;
goto out_addrs;
}
/*
@ -925,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
bpf_jit_fill_ill_insns);
if (!bpf_hdr) {
fp = org_fp;
goto out;
goto out_addrs;
}
skip_init_ctx:
code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
/* Code generation passes 1-2 */
@ -935,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/* Now build the prologue, body code & epilogue for real. */
cgctx.idx = 0;
bpf_jit_build_prologue(code_base, &cgctx);
bpf_jit_build_body(fp, code_base, &cgctx, addrs);
bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
bpf_jit_build_epilogue(code_base, &cgctx);
if (bpf_jit_enable > 1)
@ -961,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
fp->jited_len = alloclen;
bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
if (!fp->is_func || extra_pass) {
out_addrs:
kfree(addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
} else {
jit_data->addrs = addrs;
jit_data->ctx = cgctx;
jit_data->proglen = proglen;
jit_data->image = image;
jit_data->header = bpf_hdr;
}
out:
kfree(addrs);
if (bpf_blinded)
bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);

View File

@ -3664,14 +3664,19 @@ netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
* @dev: netdev
* @xdp: XDP buffer
*
* Returns Zero if sent, else an error code
* Returns number of frames successfully sent. Frames that fail are
* free'ed via XDP return API.
*
* For error cases, a negative errno code is returned and no-frames
* are transmitted (caller must handle freeing frames).
**/
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
unsigned int queue_index = smp_processor_id();
struct i40e_vsi *vsi = np->vsi;
int err;
int drops = 0;
int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
@ -3679,11 +3684,18 @@ int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!i40e_enabled_xdp_vsi(vsi) || queue_index >= vsi->num_queue_pairs)
return -ENXIO;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX)
return -ENOSPC;
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
return 0;
err = i40e_xmit_xdp_ring(xdpf, vsi->xdp_rings[queue_index]);
if (err != I40E_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
}
/**

View File

@ -487,7 +487,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw);
void i40e_detect_recover_hung(struct i40e_vsi *vsi);
int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
bool __i40e_chk_linearize(struct sk_buff *skb);
int i40e_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf);
int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames);
void i40e_xdp_flush(struct net_device *dev);
/**

View File

@ -10022,11 +10022,13 @@ static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames)
{
struct ixgbe_adapter *adapter = netdev_priv(dev);
struct ixgbe_ring *ring;
int err;
int drops = 0;
int i;
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state)))
return -ENETDOWN;
@ -10038,11 +10040,18 @@ static int ixgbe_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (unlikely(!ring))
return -ENXIO;
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX)
return -ENOSPC;
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
int err;
return 0;
err = ixgbe_xmit_xdp_ring(adapter, xdpf);
if (err != IXGBE_XDP_TX) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
}
static void ixgbe_xdp_flush(struct net_device *dev)

View File

@ -211,6 +211,60 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
}
static void
__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
bool set, bool src_lmextn)
{
u16 addr_lo, addr_hi;
u64 insn;
addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
addr_hi = addr != addr_lo;
insn = OP_BR_BIT_BASE |
FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
FIELD_PREP(OP_BR_BIT_BV, set) |
FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
u8 defer, bool set, enum nfp_relo_type relo)
{
struct nfp_insn_re_regs reg;
int err;
/* NOTE: The bit to test is specified as an rotation amount, such that
* the bit to test will be placed on the MSB of the result when
* doing a rotate right. For bit X, we need right rotate X + 1.
*/
bit += 1;
err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
if (err) {
nfp_prog->error = err;
return;
}
__emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
reg.src_lmextn);
nfp_prog->prog[nfp_prog->prog_len - 1] |=
FIELD_PREP(OP_RELO_TYPE, relo);
}
static void
emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
{
emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
}
static void
__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
enum immed_width width, bool invert,
@ -309,6 +363,19 @@ emit_shf(struct nfp_prog *nfp_prog, swreg dst,
reg.dst_lmextn, reg.src_lmextn);
}
static void
emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
{
if (sc == SHF_SC_R_ROT) {
pr_err("indirect shift is not allowed on rotation\n");
nfp_prog->error = -EFAULT;
return;
}
emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
}
static void
__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
@ -1629,26 +1696,142 @@ static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
/* Pseudo code:
* if shift_amt >= 32
* dst_high = dst_low << shift_amt[4:0]
* dst_low = 0;
* else
* dst_high = (dst_high, dst_low) >> (32 - shift_amt)
* dst_low = dst_low << shift_amt
*
* The indirect shift will use the same logic at runtime.
*/
static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
32 - shift_amt);
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF, shift_amt);
} else if (shift_amt == 32) {
wrp_reg_mov(nfp_prog, dst + 1, dst);
wrp_immed(nfp_prog, reg_both(dst), 0);
} else if (shift_amt > 32) {
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
wrp_immed(nfp_prog, reg_both(dst), 0);
}
return 0;
}
static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
if (insn->imm < 32) {
emit_shf(nfp_prog, reg_both(dst + 1),
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
SHF_SC_R_DSHF, 32 - insn->imm);
emit_shf(nfp_prog, reg_both(dst),
reg_none(), SHF_OP_NONE, reg_b(dst),
SHF_SC_L_SHF, insn->imm);
} else if (insn->imm == 32) {
wrp_reg_mov(nfp_prog, dst + 1, dst);
wrp_immed(nfp_prog, reg_both(dst), 0);
} else if (insn->imm > 32) {
emit_shf(nfp_prog, reg_both(dst + 1),
reg_none(), SHF_OP_NONE, reg_b(dst),
SHF_SC_L_SHF, insn->imm - 32);
wrp_immed(nfp_prog, reg_both(dst), 0);
return __shl_imm64(nfp_prog, dst, insn->imm);
}
static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
reg_b(src));
emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF);
}
/* NOTE: for indirect left shift, HIGH part should be calculated first. */
static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF);
}
static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
shl_reg64_lt32_high(nfp_prog, dst, src);
shl_reg64_lt32_low(nfp_prog, dst, src);
}
static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst), SHF_SC_L_SHF);
wrp_immed(nfp_prog, reg_both(dst), 0);
}
static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __shl_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
shl_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
shl_reg64_ge32(nfp_prog, dst, src);
} else {
/* Generate different instruction sequences depending on runtime
* value of shift amount.
*/
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
shl_reg64_lt32_high(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* shl_reg64_lt32_low packed in delay slot. */
shl_reg64_lt32_low(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
shl_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
}
return 0;
}
/* Pseudo code:
* if shift_amt >= 32
* dst_high = 0;
* dst_low = dst_high >> shift_amt[4:0]
* else
* dst_high = dst_high >> shift_amt
* dst_low = (dst_high, dst_low) >> shift_amt
*
* The indirect shift will use the same logic at runtime.
*/
static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
} else if (shift_amt == 32) {
wrp_reg_mov(nfp_prog, dst, dst + 1);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
} else if (shift_amt > 32) {
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
}
return 0;
@ -1659,21 +1842,186 @@ static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
if (insn->imm < 32) {
emit_shf(nfp_prog, reg_both(dst),
reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
SHF_SC_R_DSHF, insn->imm);
emit_shf(nfp_prog, reg_both(dst + 1),
reg_none(), SHF_OP_NONE, reg_b(dst + 1),
SHF_SC_R_SHF, insn->imm);
} else if (insn->imm == 32) {
return __shr_imm64(nfp_prog, dst, insn->imm);
}
/* NOTE: for indirect right shift, LOW part should be calculated first. */
static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF);
}
static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF);
}
static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
shr_reg64_lt32_low(nfp_prog, dst, src);
shr_reg64_lt32_high(nfp_prog, dst, src);
}
static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
reg_b(dst + 1), SHF_SC_R_SHF);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
}
static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __shr_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
shr_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
shr_reg64_ge32(nfp_prog, dst, src);
} else {
/* Generate different instruction sequences depending on runtime
* value of shift amount.
*/
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
shr_reg64_lt32_low(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* shr_reg64_lt32_high packed in delay slot. */
shr_reg64_lt32_high(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
shr_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
}
return 0;
}
/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
* told through PREV_ALU result.
*/
static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
{
if (shift_amt < 32) {
emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
reg_b(dst), SHF_SC_R_DSHF, shift_amt);
/* Set signedness bit. */
emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
reg_imm(0));
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
} else if (shift_amt == 32) {
/* NOTE: this also helps setting signedness bit. */
wrp_reg_mov(nfp_prog, dst, dst + 1);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
} else if (insn->imm > 32) {
emit_shf(nfp_prog, reg_both(dst),
reg_none(), SHF_OP_NONE, reg_b(dst + 1),
SHF_SC_R_SHF, insn->imm - 32);
wrp_immed(nfp_prog, reg_both(dst + 1), 0);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, 31);
} else if (shift_amt > 32) {
emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
reg_imm(0));
emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, 31);
}
return 0;
}
static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u8 dst = insn->dst_reg * 2;
return __ashr_imm64(nfp_prog, dst, insn->imm);
}
static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
/* NOTE: the first insn will set both indirect shift amount (source A)
* and signedness bit (MSB of result).
*/
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF);
}
static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
/* NOTE: it is the same as logic shift because we don't need to shift in
* signedness bit when the shift amount is less than 32.
*/
return shr_reg64_lt32_low(nfp_prog, dst, src);
}
static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
ashr_reg64_lt32_low(nfp_prog, dst, src);
ashr_reg64_lt32_high(nfp_prog, dst, src);
}
static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
{
emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF);
emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
reg_b(dst + 1), SHF_SC_R_SHF, 31);
}
/* Like ashr_imm64, but need to use indirect shift. */
static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 umin, umax;
u8 dst, src;
dst = insn->dst_reg * 2;
umin = meta->umin;
umax = meta->umax;
if (umin == umax)
return __ashr_imm64(nfp_prog, dst, umin);
src = insn->src_reg * 2;
if (umax < 32) {
ashr_reg64_lt32(nfp_prog, dst, src);
} else if (umin >= 32) {
ashr_reg64_ge32(nfp_prog, dst, src);
} else {
u16 label_ge32, label_end;
label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
ashr_reg64_lt32_low(nfp_prog, dst, src);
label_end = nfp_prog_current_offset(nfp_prog) + 6;
emit_br(nfp_prog, BR_UNC, label_end, 2);
/* ashr_reg64_lt32_high packed in delay slot. */
ashr_reg64_lt32_high(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
return -EINVAL;
ashr_reg64_ge32(nfp_prog, dst, src);
if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
return -EINVAL;
}
return 0;
@ -2501,8 +2849,12 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
[BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
[BPF_ALU64 | BPF_NEG] = neg_reg64,
[BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
[BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
[BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
[BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
[BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
[BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
[BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
[BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
[BPF_ALU | BPF_XOR | BPF_X] = xor_reg,

View File

@ -263,6 +263,8 @@ struct nfp_bpf_reg_state {
* @func_id: function id for call instructions
* @arg1: arg1 for call instructions
* @arg2: arg2 for call instructions
* @umin: copy of core verifier umin_value.
* @umax: copy of core verifier umax_value.
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @flags: eBPF instruction extra optimization flags
@ -298,6 +300,13 @@ struct nfp_insn_meta {
struct bpf_reg_state arg1;
struct nfp_bpf_reg_state arg2;
};
/* We are interested in range info for some operands,
* for example, the shift amount.
*/
struct {
u64 umin;
u64 umax;
};
};
unsigned int off;
unsigned short n;
@ -375,6 +384,25 @@ static inline bool is_mbpf_xadd(const struct nfp_insn_meta *meta)
return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_XADD);
}
static inline bool is_mbpf_indir_shift(const struct nfp_insn_meta *meta)
{
u8 code = meta->insn.code;
bool is_alu, is_shift;
u8 opclass, opcode;
opclass = BPF_CLASS(code);
is_alu = opclass == BPF_ALU64 || opclass == BPF_ALU;
if (!is_alu)
return false;
opcode = BPF_OP(code);
is_shift = opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH;
if (!is_shift)
return false;
return BPF_SRC(code) == BPF_X;
}
/**
* struct nfp_prog - nfp BPF program
* @bpf: backpointer to the bpf app priv structure

View File

@ -190,6 +190,8 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog,
meta->insn = prog[i];
meta->n = i;
if (is_mbpf_indir_shift(meta))
meta->umin = U64_MAX;
list_add_tail(&meta->l, &nfp_prog->insns);
}

View File

@ -551,6 +551,14 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
if (is_mbpf_xadd(meta))
return nfp_bpf_check_xadd(nfp_prog, meta, env);
if (is_mbpf_indir_shift(meta)) {
const struct bpf_reg_state *sreg =
cur_regs(env) + meta->insn.src_reg;
meta->umin = min(meta->umin, sreg->umin_value);
meta->umax = max(meta->umax, sreg->umax_value);
}
return 0;
}

View File

@ -72,8 +72,21 @@
#define OP_BR_ADDR_LO 0x007ffc00000ULL
#define OP_BR_ADDR_HI 0x10000000000ULL
#define nfp_is_br(_insn) \
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
#define OP_BR_BIT_BASE 0x0d000000000ULL
#define OP_BR_BIT_BASE_MASK 0x0f800080300ULL
#define OP_BR_BIT_A_SRC 0x000000000ffULL
#define OP_BR_BIT_B_SRC 0x0000003fc00ULL
#define OP_BR_BIT_BV 0x00000040000ULL
#define OP_BR_BIT_SRC_LMEXTN 0x40000000000ULL
#define OP_BR_BIT_DEFBR OP_BR_DEFBR
#define OP_BR_BIT_ADDR_LO OP_BR_ADDR_LO
#define OP_BR_BIT_ADDR_HI OP_BR_ADDR_HI
static inline bool nfp_is_br(u64 insn)
{
return (insn & OP_BR_BASE_MASK) == OP_BR_BASE ||
(insn & OP_BR_BIT_BASE_MASK) == OP_BR_BIT_BASE;
}
enum br_mask {
BR_BEQ = 0x00,
@ -161,6 +174,7 @@ enum shf_op {
SHF_OP_NONE = 0,
SHF_OP_AND = 2,
SHF_OP_OR = 5,
SHF_OP_ASHR = 6,
};
enum shf_sc {

View File

@ -70,6 +70,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/sock.h>
#include <net/xdp.h>
#include <linux/seq_file.h>
#include <linux/uio.h>
#include <linux/skb_array.h>
@ -1284,34 +1285,44 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64,
};
static int tun_xdp_xmit(struct net_device *dev, struct xdp_frame *frame)
static int tun_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
u32 numqueues;
int ret = 0;
int drops = 0;
int cnt = n;
int i;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
ret = -ENOSPC;
goto out;
rcu_read_unlock();
return -ENXIO; /* Caller will free/return all frames */
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(frame))) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
ret = -ENOSPC;
}
out:
spin_lock(&tfile->tx_ring.producer_lock);
for (i = 0; i < n; i++) {
struct xdp_frame *xdp = frames[i];
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
void *frame = tun_xdp_to_ptr(xdp);
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
xdp_return_frame_rx_napi(xdp);
drops++;
}
}
spin_unlock(&tfile->tx_ring.producer_lock);
rcu_read_unlock();
return ret;
return cnt - drops;
}
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
@ -1321,7 +1332,7 @@ static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
if (unlikely(!frame))
return -EOVERFLOW;
return tun_xdp_xmit(dev, frame);
return tun_xdp_xmit(dev, 1, &frame);
}
static void tun_xdp_flush(struct net_device *dev)

View File

@ -419,23 +419,13 @@ static void virtnet_xdp_flush(struct net_device *dev)
virtqueue_kick(sq->vq);
}
static int __virtnet_xdp_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct send_queue *sq,
struct xdp_frame *xdpf)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int err;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
/* virtqueue want to use data area in-front of packet */
if (unlikely(xdpf->metasize > 0))
return -EOPNOTSUPP;
@ -459,11 +449,40 @@ static int __virtnet_xdp_xmit(struct virtnet_info *vi,
return 0;
}
static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
static int __virtnet_xdp_tx_xmit(struct virtnet_info *vi,
struct xdp_frame *xdpf)
{
struct xdp_frame *xdpf_sent;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
return __virtnet_xdp_xmit_one(vi, sq, xdpf);
}
static int virtnet_xdp_xmit(struct net_device *dev,
int n, struct xdp_frame **frames)
{
struct virtnet_info *vi = netdev_priv(dev);
struct receive_queue *rq = vi->rq;
struct xdp_frame *xdpf_sent;
struct bpf_prog *xdp_prog;
struct send_queue *sq;
unsigned int len;
unsigned int qp;
int drops = 0;
int err;
int i;
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
sq = &vi->sq[qp];
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
* indicate XDP resources have been successfully allocated.
@ -472,7 +491,20 @@ static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_frame *xdpf)
if (!xdp_prog)
return -ENXIO;
return __virtnet_xdp_xmit(vi, xdpf);
/* Free up any pending old buffers before queueing new ones. */
while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL)
xdp_return_frame(xdpf_sent);
for (i = 0; i < n; i++) {
struct xdp_frame *xdpf = frames[i];
err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
if (err) {
xdp_return_frame_rx_napi(xdpf);
drops++;
}
}
return n - drops;
}
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
@ -616,7 +648,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf);
err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp;
@ -779,7 +811,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
xdpf = convert_to_xdp_frame(&xdp);
if (unlikely(!xdpf))
goto err_xdp;
err = __virtnet_xdp_xmit(vi, xdpf);
err = __virtnet_xdp_tx_xmit(vi, xdpf);
if (unlikely(err)) {
trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page))

View File

@ -69,8 +69,8 @@ struct bpf_map {
u32 pages;
u32 id;
int numa_node;
u32 btf_key_id;
u32 btf_value_id;
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
bool unpriv_array;
/* 55 bytes hole */
@ -463,6 +463,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
int bpf_get_file_flag(int flags);
int bpf_check_uarg_tail_zero(void __user *uaddr, size_t expected_size,
size_t actual_size);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
@ -485,14 +487,17 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct xdp_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map);
struct xdp_buff;
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@ -571,6 +576,16 @@ static inline void __dev_map_flush(struct bpf_map *map)
{
}
struct xdp_buff;
struct bpf_dtab_netdev;
static inline
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
return 0;
}
static inline
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
{
@ -585,7 +600,6 @@ static inline void __cpu_map_flush(struct bpf_map *map)
{
}
struct xdp_buff;
static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff *xdp,
struct net_device *dev_rx)

View File

@ -9,9 +9,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)

View File

@ -517,6 +517,7 @@ struct sk_msg_buff {
bool sg_copy[MAX_SKB_FRAGS];
__u32 flags;
struct sock *sk_redir;
struct sock *sk;
struct sk_buff *skb;
struct list_head list;
};

View File

@ -1185,9 +1185,13 @@ struct dev_ifalias {
* This function is used to set or query state related to XDP on the
* netdevice and manage BPF offload. See definition of
* enum bpf_netdev_command for details.
* int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_frame *xdp);
* This function is used to submit a XDP packet for transmit on a
* netdevice.
* int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp);
* This function is used to submit @n XDP packets for transmit on a
* netdevice. Returns number of frames successfully transmitted, frames
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
* TODO: Consider add flag to allow sending flush operation.
* void (*ndo_xdp_flush)(struct net_device *dev);
* This function is used to inform the driver to flush a particular
* xdp tx queue. Must be called on same CPU as xdp_xmit.
@ -1375,8 +1379,8 @@ struct net_device_ops {
int needed_headroom);
int (*ndo_bpf)(struct net_device *dev,
struct netdev_bpf *bpf);
int (*ndo_xdp_xmit)(struct net_device *dev,
struct xdp_frame *xdp);
int (*ndo_xdp_xmit)(struct net_device *dev, int n,
struct xdp_frame **xdp);
void (*ndo_xdp_flush)(struct net_device *dev);
};

View File

@ -868,6 +868,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void perf_event_delayed_put(struct task_struct *task);
extern struct file *perf_event_get(unsigned int fd);
extern const struct perf_event *perf_get_event(struct file *file);
extern const struct perf_event_attr *perf_event_attrs(struct perf_event *event);
extern void perf_event_print_debug(void);
extern void perf_pmu_disable(struct pmu *pmu);
@ -1289,6 +1290,10 @@ static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_delayed_put(struct task_struct *task) { }
static inline struct file *perf_event_get(unsigned int fd) { return ERR_PTR(-EINVAL); }
static inline const struct perf_event *perf_get_event(struct file *file)
{
return ERR_PTR(-EINVAL);
}
static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
return ERR_PTR(-EINVAL);

View File

@ -473,6 +473,9 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info);
int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name);
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr);
#else
static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
{
@ -504,6 +507,13 @@ static inline struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name
{
return NULL;
}
static inline int bpf_get_perf_event_info(const struct perf_event *event,
u32 *prog_id, u32 *fd_type,
const char **buf, u64 *probe_offset,
u64 *probe_addr)
{
return -EOPNOTSUPP;
}
#endif
enum {
@ -560,10 +570,17 @@ extern void perf_trace_del(struct perf_event *event, int flags);
#ifdef CONFIG_KPROBE_EVENTS
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
extern int bpf_get_kprobe_info(const struct perf_event *event,
u32 *fd_type, const char **symbol,
u64 *probe_offset, u64 *probe_addr,
bool perf_type_tracepoint);
#endif
#ifdef CONFIG_UPROBE_EVENTS
extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_uprobe_destroy(struct perf_event *event);
extern int bpf_get_uprobe_info(const struct perf_event *event,
u32 *fd_type, const char **filename,
u64 *probe_offset, bool perf_type_tracepoint);
#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);

View File

@ -236,6 +236,8 @@ struct ipv6_stub {
struct flowi6 *fl6, int oif,
const struct sk_buff *skb,
int strict);
u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,

View File

@ -412,6 +412,12 @@ static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
return f6i->fib6_nh.nh_dev;
}
static inline
struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
{
return f6i->fib6_nh.nh_lwtstate;
}
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);

View File

@ -294,6 +294,9 @@ static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr);
struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
struct net_device *dev, struct sk_buff *skb,
const void *daddr);

View File

@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net)
}
#endif
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr);
#endif /* _NET_FIB_H */

View File

@ -115,13 +115,14 @@ void page_pool_destroy(struct page_pool *pool);
void __page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct);
static inline void page_pool_put_page(struct page_pool *pool, struct page *page)
static inline void page_pool_put_page(struct page_pool *pool,
struct page *page, bool allow_direct)
{
/* When page_pool isn't compiled-in, net/core/xdp.c doesn't
* allow registering MEM_TYPE_PAGE_POOL, but shield linker.
*/
#ifdef CONFIG_PAGE_POOL
__page_pool_put_page(pool, page, false);
__page_pool_put_page(pool, page, allow_direct);
#endif
}
/* Very limited use-cases allow recycle direct */

View File

@ -49,7 +49,11 @@ struct seg6_pernet_data {
static inline struct seg6_pernet_data *seg6_pernet(struct net *net)
{
#if IS_ENABLED(CONFIG_IPV6)
return net->ipv6.seg6_data;
#else
return NULL;
#endif
}
extern int seg6_init(void);
@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
#endif

32
include/net/seg6_local.h Normal file
View File

@ -0,0 +1,32 @@
/*
* SR-IPv6 implementation
*
* Authors:
* David Lebrun <david.lebrun@uclouvain.be>
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _NET_SEG6_LOCAL_H
#define _NET_SEG6_LOCAL_H
#include <linux/percpu.h>
#include <linux/net.h>
#include <linux/ipv6.h>
extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id);
struct seg6_bpf_srh_state {
bool valid;
u16 hdrlen;
};
DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
#endif

View File

@ -104,6 +104,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
}
void xdp_return_frame(struct xdp_frame *xdpf);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf);
void xdp_return_buff(struct xdp_buff *xdp);
int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
* AF_XDP internal functions
/* SPDX-License-Identifier: GPL-2.0 */
/* AF_XDP internal functions
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _LINUX_XDP_SOCK_H

View File

@ -138,11 +138,18 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index)
);
#ifndef __DEVMAP_OBJ_TYPE
#define __DEVMAP_OBJ_TYPE
struct _bpf_dtab_netdev {
struct net_device *dev;
};
#endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct net_device *)fwd)->ifindex : 0)))
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
@ -222,6 +229,47 @@ TRACE_EVENT(xdp_cpumap_enqueue,
__entry->to_cpu)
);
TRACE_EVENT(xdp_devmap_xmit,
TP_PROTO(const struct bpf_map *map, u32 map_index,
int sent, int drops,
const struct net_device *from_dev,
const struct net_device *to_dev, int err),
TP_ARGS(map, map_index, sent, drops, from_dev, to_dev, err),
TP_STRUCT__entry(
__field(int, map_id)
__field(u32, act)
__field(u32, map_index)
__field(int, drops)
__field(int, sent)
__field(int, from_ifindex)
__field(int, to_ifindex)
__field(int, err)
),
TP_fast_assign(
__entry->map_id = map->id;
__entry->act = XDP_REDIRECT;
__entry->map_index = map_index;
__entry->drops = drops;
__entry->sent = sent;
__entry->from_ifindex = from_dev->ifindex;
__entry->to_ifindex = to_dev->ifindex;
__entry->err = err;
),
TP_printk("ndo_xdp_xmit"
" map_id=%d map_index=%d action=%s"
" sent=%d drops=%d"
" from_ifindex=%d to_ifindex=%d err=%d",
__entry->map_id, __entry->map_index,
__print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
__entry->sent, __entry->drops,
__entry->from_ifindex, __entry->to_ifindex, __entry->err)
);
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>

View File

@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@ -141,6 +142,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
};
enum bpf_attach_type {
@ -284,8 +286,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_id; /* BTF type_id of the key */
__u32 btf_value_id; /* BTF type_id of the value */
__u32 btf_key_type_id; /* BTF type_id of the key */
__u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@ -379,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
struct {
__u32 pid; /* input: pid */
__u32 fd; /* input: fd */
__u32 flags; /* input: flags */
__u32 buf_len; /* input/output: buf len */
__aligned_u64 buf; /* input/output:
* tp_name for tracepoint
* symbol for kprobe
* filename for uprobe
*/
__u32 prog_id; /* output: prod_id */
__u32 fd_type; /* output: BPF_FD_TYPE_* */
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@ -1902,6 +1920,90 @@ union bpf_attr {
* egress otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
* int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
* address *hdr*, with *len* its size in bytes. *type* indicates
* the protocol of the header and can be one of:
*
* **BPF_LWT_ENCAP_SEG6**
* IPv6 encapsulation with Segment Routing Header
* (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
* the IPv6 header is computed by the kernel.
* **BPF_LWT_ENCAP_SEG6_INLINE**
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
* inside the outermost IPv6 Segment Routing Header can be
* modified through this helper.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
* *skb*, at position *offset* by *delta* bytes. Only offsets
* after the segments are accepted. *delta* can be as well
* positive (growing) as negative (shrinking).
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
* contained at address *param*, and of length *param_len* bytes.
* *action* can be one of:
*
* **SEG6_LOCAL_ACTION_END_X**
* End.X action: Endpoint with Layer-3 cross-connect.
* Type of *param*: **struct in6_addr**.
* **SEG6_LOCAL_ACTION_END_T**
* End.T action: Endpoint with specific IPv6 table lookup.
* Type of *param*: **int**.
* **SEG6_LOCAL_ACTION_END_B6**
* End.B6 action: Endpoint bound to an SRv6 policy.
* Type of param: **struct ipv6_sr_hdr**.
* **SEG6_LOCAL_ACTION_END_B6_ENCAP**
* End.B6.Encap action: Endpoint bound to an SRv6
* encapsulation policy.
* Type of param: **struct ipv6_sr_hdr**.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -1976,7 +2078,11 @@ union bpf_attr {
FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
FN(sk_redirect_hash),
FN(sk_redirect_hash), \
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
FN(lwt_seg6_action),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
BPF_HDR_START_NET,
};
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
BPF_LWT_ENCAP_SEG6_INLINE
};
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@ -2176,6 +2288,14 @@ enum sk_action {
struct sk_msg_md {
void *data;
void *data_end;
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
};
#define BPF_TAG_SIZE 8
@ -2197,6 +2317,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
__u32 nr_jited_ksyms;
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
@ -2211,8 +2335,8 @@ struct bpf_map_info {
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
__u32 btf_key_id;
__u32 btf_value_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_UPROBE, /* filename + offset */
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -12,42 +12,29 @@ struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 parent_label;
__u32 parent_name;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 label_off; /* offset of label section */
__u32 object_off; /* offset of data object section*/
__u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
#define BTF_MAX_TYPE 0x7fffffff
#define BTF_MAX_TYPE 0x0000ffff
/* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x7fffffff
#define BTF_MAX_NAME_OFFSET 0x0000ffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
/* The type id is referring to a parent BTF */
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
/* String is in the ELF string section */
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
struct btf_type {
__u32 name_off;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bits 31: root
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-31: unused
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
@ -62,8 +49,7 @@ struct btf_type {
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
@ -88,15 +74,14 @@ struct btf_type {
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED 0x1
#define BTF_INT_CHAR 0x2
#define BTF_INT_BOOL 0x4
#define BTF_INT_VARARGS 0x8
#define BTF_INT_SIGNED (1 << 0)
#define BTF_INT_CHAR (1 << 1)
#define BTF_INT_BOOL (1 << 2)
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the

View File

@ -1,17 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* if_xdp: XDP socket user-space interface
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
@ -26,19 +17,33 @@
struct sockaddr_xdp {
__u16 sxdp_family;
__u16 sxdp_flags;
__u32 sxdp_ifindex;
__u32 sxdp_queue_id;
__u32 sxdp_shared_umem_fd;
__u16 sxdp_flags;
};
struct xdp_ring_offset {
__u64 producer;
__u64 consumer;
__u64 desc;
};
struct xdp_mmap_offsets {
struct xdp_ring_offset rx;
struct xdp_ring_offset tx;
struct xdp_ring_offset fr; /* Fill */
struct xdp_ring_offset cr; /* Completion */
};
/* XDP socket options */
#define XDP_RX_RING 1
#define XDP_TX_RING 2
#define XDP_UMEM_REG 3
#define XDP_UMEM_FILL_RING 4
#define XDP_UMEM_COMPLETION_RING 5
#define XDP_STATISTICS 6
#define XDP_MMAP_OFFSETS 1
#define XDP_RX_RING 2
#define XDP_TX_RING 3
#define XDP_UMEM_REG 4
#define XDP_UMEM_FILL_RING 5
#define XDP_UMEM_COMPLETION_RING 6
#define XDP_STATISTICS 7
struct xdp_umem_reg {
__u64 addr; /* Start of packet data area */
@ -59,6 +64,7 @@ struct xdp_statistics {
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000
/* Rx/Tx descriptor */
struct xdp_desc {
__u32 idx;
__u32 len;
@ -67,21 +73,6 @@ struct xdp_desc {
__u8 padding[5];
};
struct xdp_ring {
__u32 producer __attribute__((aligned(64)));
__u32 consumer __attribute__((aligned(64)));
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] __attribute__((aligned(64)));
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
__u32 desc[0] __attribute__((aligned(64)));
};
/* UMEM descriptor is __u32 */
#endif /* _LINUX_IF_XDP_H */

View File

@ -25,6 +25,7 @@ enum {
SEG6_LOCAL_NH6,
SEG6_LOCAL_IIF,
SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
@ -59,10 +60,21 @@ enum {
SEG6_LOCAL_ACTION_END_AS = 13,
/* forward to SR-unaware VNF with masquerading */
SEG6_LOCAL_ACTION_END_AM = 14,
/* custom BPF action */
SEG6_LOCAL_ACTION_END_BPF = 15,
__SEG6_LOCAL_ACTION_MAX,
};
#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
enum {
SEG6_LOCAL_BPF_PROG_UNSPEC,
SEG6_LOCAL_BPF_PROG,
SEG6_LOCAL_BPF_PROG_NAME,
__SEG6_LOCAL_BPF_PROG_MAX,
};
#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
#endif

View File

@ -352,7 +352,7 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key,
}
seq_printf(m, "%u: ", *(u32 *)key);
btf_type_seq_show(map->btf, map->btf_value_id, value, m);
btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
seq_puts(m, "\n");
rcu_read_unlock();

View File

@ -12,6 +12,7 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/idr.h>
#include <linux/sort.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
@ -162,13 +163,16 @@
#define BITS_ROUNDUP_BYTES(bits) \
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
#define BTF_INFO_MASK 0x0f00ffff
#define BTF_INT_MASK 0x0fffffff
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
/* 16MB for 64k structs and each has 16 members and
* a few MB spaces for the string section.
* The hard limit is S32_MAX.
*/
#define BTF_MAX_SIZE (16 * 1024 * 1024)
/* 64k. We can raise it later. The hard limit is S32_MAX. */
#define BTF_MAX_NR_TYPES 65535
#define for_each_member(i, struct_type, member) \
for (i = 0, member = btf_type_member(struct_type); \
@ -184,15 +188,13 @@ static DEFINE_IDR(btf_idr);
static DEFINE_SPINLOCK(btf_idr_lock);
struct btf {
union {
struct btf_header *hdr;
void *data;
};
void *data;
struct btf_type **types;
u32 *resolved_ids;
u32 *resolved_sizes;
const char *strings;
void *nohdr_data;
struct btf_header hdr;
u32 nr_types;
u32 types_size;
u32 data_size;
@ -228,6 +230,11 @@ enum resolve_mode {
#define MAX_RESOLVE_DEPTH 32
struct btf_sec_info {
u32 off;
u32 len;
};
struct btf_verifier_env {
struct btf *btf;
u8 *visit_states;
@ -379,8 +386,6 @@ static const char *btf_int_encoding_str(u8 encoding)
return "CHAR";
else if (encoding == BTF_INT_BOOL)
return "BOOL";
else if (encoding == BTF_INT_VARARGS)
return "VARARGS";
else
return "UNKN";
}
@ -417,16 +422,16 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
{
return !BTF_STR_TBL_ELF_ID(offset) &&
BTF_STR_OFFSET(offset) < btf->hdr->str_len;
return BTF_STR_OFFSET_VALID(offset) &&
offset < btf->hdr.str_len;
}
static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
{
if (!BTF_STR_OFFSET(offset))
if (!offset)
return "(anon)";
else if (BTF_STR_OFFSET(offset) < btf->hdr->str_len)
return &btf->strings[BTF_STR_OFFSET(offset)];
else if (offset < btf->hdr.str_len)
return &btf->strings[offset];
else
return "(invalid-name-offset)";
}
@ -439,6 +444,28 @@ static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
return btf->types[type_id];
}
/*
* Regular int is not a bit field and it must be either
* u8/u16/u32/u64.
*/
static bool btf_type_int_is_regular(const struct btf_type *t)
{
u16 nr_bits, nr_bytes;
u32 int_data;
int_data = btf_type_int(t);
nr_bits = BTF_INT_BITS(int_data);
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
return false;
}
return true;
}
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
const char *fmt, ...)
{
@ -536,7 +563,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
__btf_verifier_log(log, "\n");
}
static void btf_verifier_log_hdr(struct btf_verifier_env *env)
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
u32 btf_data_size)
{
struct bpf_verifier_log *log = &env->log;
const struct btf *btf = env->btf;
@ -545,19 +573,16 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env)
if (!bpf_verifier_log_needed(log))
return;
hdr = btf->hdr;
hdr = &btf->hdr;
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
__btf_verifier_log(log, "version: %u\n", hdr->version);
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
__btf_verifier_log(log, "parent_label: %u\n", hdr->parent_label);
__btf_verifier_log(log, "parent_name: %u\n", hdr->parent_name);
__btf_verifier_log(log, "label_off: %u\n", hdr->label_off);
__btf_verifier_log(log, "object_off: %u\n", hdr->object_off);
__btf_verifier_log(log, "func_off: %u\n", hdr->func_off);
__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
__btf_verifier_log(log, "btf_total_size: %u\n", btf->data_size);
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
}
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
@ -574,13 +599,13 @@ static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
struct btf_type **new_types;
u32 expand_by, new_size;
if (btf->types_size == BTF_MAX_NR_TYPES) {
if (btf->types_size == BTF_MAX_TYPE) {
btf_verifier_log(env, "Exceeded max num of types");
return -E2BIG;
}
expand_by = max_t(u32, btf->types_size >> 2, 16);
new_size = min_t(u32, BTF_MAX_NR_TYPES,
new_size = min_t(u32, BTF_MAX_TYPE,
btf->types_size + expand_by);
new_types = kvzalloc(new_size * sizeof(*new_types),
@ -910,6 +935,12 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
}
int_data = btf_type_int(t);
if (int_data & ~BTF_INT_MASK) {
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
int_data);
return -EINVAL;
}
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
if (nr_bits > BITS_PER_U64) {
@ -923,12 +954,17 @@ static s32 btf_int_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
/*
* Only one of the encoding bits is allowed and it
* should be sufficient for the pretty print purpose (i.e. decoding).
* Multiple bits can be allowed later if it is found
* to be insufficient.
*/
encoding = BTF_INT_ENCODING(int_data);
if (encoding &&
encoding != BTF_INT_SIGNED &&
encoding != BTF_INT_CHAR &&
encoding != BTF_INT_BOOL &&
encoding != BTF_INT_VARARGS) {
encoding != BTF_INT_BOOL) {
btf_verifier_log_type(env, t, "Unsupported encoding");
return -ENOTSUPP;
}
@ -1102,7 +1138,7 @@ static int btf_ref_type_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
if (BTF_TYPE_PARENT(t->type)) {
if (!BTF_TYPE_ID_VALID(t->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
return -EINVAL;
}
@ -1306,14 +1342,16 @@ static s32 btf_array_check_meta(struct btf_verifier_env *env,
return -EINVAL;
}
/* We are a little forgiving on array->index_type since
* the kernel is not using it.
/* Array elem type and index type cannot be in type void,
* so !array->type and !array->index_type are not allowed.
*/
/* Array elem cannot be in type void,
* so !array->type is not allowed.
*/
if (!array->type || BTF_TYPE_PARENT(array->type)) {
btf_verifier_log_type(env, t, "Invalid type_id");
if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
btf_verifier_log_type(env, t, "Invalid elem");
return -EINVAL;
}
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
btf_verifier_log_type(env, t, "Invalid index");
return -EINVAL;
}
@ -1326,11 +1364,32 @@ static int btf_array_resolve(struct btf_verifier_env *env,
const struct resolve_vertex *v)
{
const struct btf_array *array = btf_type_array(v->t);
const struct btf_type *elem_type;
u32 elem_type_id = array->type;
const struct btf_type *elem_type, *index_type;
u32 elem_type_id, index_type_id;
struct btf *btf = env->btf;
u32 elem_size;
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
if (btf_type_is_void_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
if (!env_type_is_resolve_sink(env, index_type) &&
!env_type_is_resolved(env, index_type_id))
return env_stack_push(env, index_type, index_type_id);
index_type = btf_type_id_size(btf, &index_type_id, NULL);
if (!index_type || !btf_type_is_int(index_type) ||
!btf_type_int_is_regular(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
if (btf_type_is_void_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
@ -1348,22 +1407,9 @@ static int btf_array_resolve(struct btf_verifier_env *env,
return -EINVAL;
}
if (btf_type_is_int(elem_type)) {
int int_type_data = btf_type_int(elem_type);
u16 nr_bits = BTF_INT_BITS(int_type_data);
u16 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
/* Put more restriction on array of int. The int cannot
* be a bit field and it must be either u8/u16/u32/u64.
*/
if (BITS_PER_BYTE_MASKED(nr_bits) ||
BTF_INT_OFFSET(int_type_data) ||
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) {
btf_verifier_log_type(env, v->t,
"Invalid array of int");
return -EINVAL;
}
if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
btf_verifier_log_type(env, v->t, "Invalid array of int");
return -EINVAL;
}
if (array->nelems && elem_size > U32_MAX / array->nelems) {
@ -1473,7 +1519,7 @@ static s32 btf_struct_check_meta(struct btf_verifier_env *env,
}
/* A member cannot be in type void */
if (!member->type || BTF_TYPE_PARENT(member->type)) {
if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
btf_verifier_log_member(env, t, member,
"Invalid type_id");
return -EINVAL;
@ -1726,6 +1772,12 @@ static s32 btf_check_meta(struct btf_verifier_env *env,
}
meta_left -= sizeof(*t);
if (t->info & ~BTF_INFO_MASK) {
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
env->log_type_id, t->info);
return -EINVAL;
}
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
btf_verifier_log(env, "[%u] Invalid kind:%u",
@ -1754,9 +1806,9 @@ static int btf_check_all_metas(struct btf_verifier_env *env)
struct btf_header *hdr;
void *cur, *end;
hdr = btf->hdr;
hdr = &btf->hdr;
cur = btf->nohdr_data + hdr->type_off;
end = btf->nohdr_data + hdr->str_off;
end = btf->nohdr_data + hdr->type_len;
env->log_type_id = 1;
while (cur < end) {
@ -1866,8 +1918,20 @@ static int btf_check_all_types(struct btf_verifier_env *env)
static int btf_parse_type_sec(struct btf_verifier_env *env)
{
const struct btf_header *hdr = &env->btf->hdr;
int err;
/* Type section must align to 4 bytes */
if (hdr->type_off & (sizeof(u32) - 1)) {
btf_verifier_log(env, "Unaligned type_off");
return -EINVAL;
}
if (!hdr->type_len) {
btf_verifier_log(env, "No type found");
return -EINVAL;
}
err = btf_check_all_metas(env);
if (err)
return err;
@ -1881,10 +1945,15 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
struct btf *btf = env->btf;
const char *start, *end;
hdr = btf->hdr;
hdr = &btf->hdr;
start = btf->nohdr_data + hdr->str_off;
end = start + hdr->str_len;
if (end != btf->data + btf->data_size) {
btf_verifier_log(env, "String section is not at the end");
return -EINVAL;
}
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
start[0] || end[-1]) {
btf_verifier_log(env, "Invalid string section");
@ -1896,20 +1965,121 @@ static int btf_parse_str_sec(struct btf_verifier_env *env)
return 0;
}
static int btf_parse_hdr(struct btf_verifier_env *env)
static const size_t btf_sec_info_offset[] = {
offsetof(struct btf_header, type_off),
offsetof(struct btf_header, str_off),
};
static int btf_sec_info_cmp(const void *a, const void *b)
{
const struct btf_sec_info *x = a;
const struct btf_sec_info *y = b;
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
}
static int btf_check_sec_info(struct btf_verifier_env *env,
u32 btf_data_size)
{
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
u32 total, expected_total, i;
const struct btf_header *hdr;
const struct btf *btf;
btf = env->btf;
hdr = &btf->hdr;
/* Populate the secs from hdr */
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
secs[i] = *(struct btf_sec_info *)((void *)hdr +
btf_sec_info_offset[i]);
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
/* Check for gaps and overlap among sections */
total = 0;
expected_total = btf_data_size - hdr->hdr_len;
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
if (expected_total < secs[i].off) {
btf_verifier_log(env, "Invalid section offset");
return -EINVAL;
}
if (total < secs[i].off) {
/* gap */
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
if (total > secs[i].off) {
btf_verifier_log(env, "Section overlap found");
return -EINVAL;
}
if (expected_total - total < secs[i].len) {
btf_verifier_log(env,
"Total section length too long");
return -EINVAL;
}
total += secs[i].len;
}
/* There is data other than hdr and known sections */
if (expected_total != total) {
btf_verifier_log(env, "Unsupported section found");
return -EINVAL;
}
return 0;
}
static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
u32 btf_data_size)
{
const struct btf_header *hdr;
struct btf *btf = env->btf;
u32 meta_left;
u32 hdr_len, hdr_copy;
/*
* Minimal part of the "struct btf_header" that
* contains the hdr_len.
*/
struct btf_min_header {
u16 magic;
u8 version;
u8 flags;
u32 hdr_len;
} __user *min_hdr;
struct btf *btf;
int err;
if (btf->data_size < sizeof(*hdr)) {
btf = env->btf;
min_hdr = btf_data;
if (btf_data_size < sizeof(*min_hdr)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
if (get_user(hdr_len, &min_hdr->hdr_len))
return -EFAULT;
if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found");
return -EINVAL;
}
btf_verifier_log_hdr(env);
err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
if (err) {
if (err == -E2BIG)
btf_verifier_log(env, "Unsupported btf_header");
return err;
}
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
return -EFAULT;
hdr = &btf->hdr;
btf_verifier_log_hdr(env, btf_data_size);
hdr = btf->hdr;
if (hdr->magic != BTF_MAGIC) {
btf_verifier_log(env, "Invalid magic");
return -EINVAL;
@ -1925,26 +2095,14 @@ static int btf_parse_hdr(struct btf_verifier_env *env)
return -ENOTSUPP;
}
meta_left = btf->data_size - sizeof(*hdr);
if (!meta_left) {
if (btf_data_size == hdr->hdr_len) {
btf_verifier_log(env, "No data");
return -EINVAL;
}
if (meta_left < hdr->type_off || hdr->str_off <= hdr->type_off ||
/* Type section must align to 4 bytes */
hdr->type_off & (sizeof(u32) - 1)) {
btf_verifier_log(env, "Invalid type_off");
return -EINVAL;
}
if (meta_left < hdr->str_off ||
meta_left - hdr->str_off < hdr->str_len) {
btf_verifier_log(env, "Invalid str_off or str_len");
return -EINVAL;
}
btf->nohdr_data = btf->hdr + 1;
err = btf_check_sec_info(env, btf_data_size);
if (err)
return err;
return 0;
}
@ -1987,6 +2145,11 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
err = -ENOMEM;
goto errout;
}
env->btf = btf;
err = btf_parse_hdr(env, btf_data, btf_data_size);
if (err)
goto errout;
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
@ -1996,18 +2159,13 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
btf->data = data;
btf->data_size = btf_data_size;
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT;
goto errout;
}
env->btf = btf;
err = btf_parse_hdr(env);
if (err)
goto errout;
err = btf_parse_str_sec(env);
if (err)
goto errout;
@ -2016,16 +2174,14 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size,
if (err)
goto errout;
if (!err && log->level && bpf_verifier_log_full(log)) {
if (log->level && bpf_verifier_log_full(log)) {
err = -ENOSPC;
goto errout;
}
if (!err) {
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
}
btf_verifier_env_free(env);
refcount_set(&btf->refcnt, 1);
return btf;
errout:
btf_verifier_env_free(env);

View File

@ -578,7 +578,7 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
xdp_return_frame(xdpf);
xdp_return_frame_rx_napi(xdpf);
}
processed++;
}

View File

@ -48,15 +48,25 @@
* calls will fail at this point.
*/
#include <linux/bpf.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
#define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
struct net_device *dev_rx;
unsigned int count;
};
struct bpf_dtab_netdev {
struct net_device *dev;
struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab;
unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu;
};
@ -206,6 +216,50 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap);
}
static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
{
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0;
int i;
if (unlikely(!bq->count))
return 0;
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
prefetch(xdpf);
}
sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q);
if (sent < 0) {
err = sent;
sent = 0;
goto error;
}
drops = bq->count - sent;
out:
bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit,
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL;
return 0;
error:
/* If ndo_xdp_xmit fails with an errno, no frames have been
* xmit'ed and it's our responsibility to them free all.
*/
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
/* RX path under NAPI protection, can return frames faster */
xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
}
/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
@ -221,6 +275,7 @@ void __dev_map_flush(struct bpf_map *map)
for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev;
/* This is possible if the dev entry is removed by user space
@ -230,6 +285,9 @@ void __dev_map_flush(struct bpf_map *map)
continue;
__clear_bit(bit, bitmap);
bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
@ -240,21 +298,61 @@ void __dev_map_flush(struct bpf_map *map)
* update happens in parallel here a dev_put wont happen until after reading the
* ifindex.
*/
struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev;
struct bpf_dtab_netdev *obj;
if (key >= map->max_entries)
return NULL;
dev = READ_ONCE(dtab->netdev_map[key]);
return dev ? dev->dev : NULL;
obj = READ_ONCE(dtab->netdev_map[key]);
return obj;
}
/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
* from net_device drivers NAPI func end.
*/
if (!bq->dev_rx)
bq->dev_rx = dev_rx;
bq->q[bq->count++] = xdpf;
return 0;
}
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
struct net_device *dev = dst->dev;
struct xdp_frame *xdpf;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
return bq_enqueue(dst, xdpf, dev_rx);
}
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct net_device *dev = __dev_map_lookup_elem(map, *(u32 *)key);
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
}
@ -263,13 +361,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq;
unsigned long *bitmap;
int cpu;
for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap);
bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);
fl->netdev_ops->ndo_xdp_flush(dev->dev);
}
}
@ -281,6 +384,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
}
@ -313,6 +417,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value;
@ -327,13 +432,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) {
dev = NULL;
} else {
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
map->numa_node);
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
if (!dev)
return -ENOMEM;
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev);
return -EINVAL;
}
@ -405,6 +517,9 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
return 0;
}

View File

@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
}
bpf_compute_data_pointers_sg(md);
md->sk = sk;
rc = (*prog->bpf_func)(md, prog->insnsi);
psock->apply_bytes = md->apply_bytes;
@ -1713,7 +1714,7 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err;
int err = 0;
/* 1. If sock map has BPF programs those will be inherited by the
* sock being added. If the sock is already attached to BPF programs
@ -1823,7 +1824,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
return err;
out_free:
kfree(e);
smap_release_sock(psock, sock);
out_progs:
if (parse && verdict) {

View File

@ -18,7 +18,9 @@
#include <linux/vmalloc.h>
#include <linux/mmzone.h>
#include <linux/anon_inodes.h>
#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/license.h>
#include <linux/filter.h>
#include <linux/version.h>
@ -65,9 +67,9 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
* copy_from_user() call. However, this is not a concern since this function is
* meant to be a future-proofing of bits.
*/
static int check_uarg_tail_zero(void __user *uaddr,
size_t expected_size,
size_t actual_size)
int bpf_check_uarg_tail_zero(void __user *uaddr,
size_t expected_size,
size_t actual_size)
{
unsigned char __user *addr;
unsigned char __user *end;
@ -422,7 +424,7 @@ static int bpf_obj_name_cpy(char *dst, const char *src)
return 0;
}
#define BPF_MAP_CREATE_LAST_FIELD btf_value_id
#define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
@ -457,10 +459,10 @@ static int map_create(union bpf_attr *attr)
atomic_set(&map->usercnt, 1);
if (bpf_map_support_seq_show(map) &&
(attr->btf_key_id || attr->btf_value_id)) {
(attr->btf_key_type_id || attr->btf_value_type_id)) {
struct btf *btf;
if (!attr->btf_key_id || !attr->btf_value_id) {
if (!attr->btf_key_type_id || !attr->btf_value_type_id) {
err = -EINVAL;
goto free_map_nouncharge;
}
@ -471,16 +473,16 @@ static int map_create(union bpf_attr *attr)
goto free_map_nouncharge;
}
err = map->ops->map_check_btf(map, btf, attr->btf_key_id,
attr->btf_value_id);
err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id,
attr->btf_value_type_id);
if (err) {
btf_put(btf);
goto free_map_nouncharge;
}
map->btf = btf;
map->btf_key_id = attr->btf_key_id;
map->btf_value_id = attr->btf_value_id;
map->btf_key_type_id = attr->btf_key_type_id;
map->btf_value_type_id = attr->btf_value_type_id;
}
err = security_bpf_map_alloc(map);
@ -1899,7 +1901,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
u32 ulen;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@ -1933,6 +1935,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0;
info.xlated_prog_len = 0;
info.nr_jited_ksyms = 0;
goto done;
}
@ -1969,18 +1972,93 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
* for offload.
*/
ulen = info.jited_prog_len;
info.jited_prog_len = prog->jited_len;
if (prog->aux->func_cnt) {
u32 i;
info.jited_prog_len = 0;
for (i = 0; i < prog->aux->func_cnt; i++)
info.jited_prog_len += prog->aux->func[i]->jited_len;
} else {
info.jited_prog_len = prog->jited_len;
}
if (info.jited_prog_len && ulen) {
if (bpf_dump_raw_ok()) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
/* for multi-function programs, copy the JITed
* instructions for all the functions
*/
if (prog->aux->func_cnt) {
u32 len, free, i;
u8 *img;
free = ulen;
for (i = 0; i < prog->aux->func_cnt; i++) {
len = prog->aux->func[i]->jited_len;
len = min_t(u32, len, free);
img = (u8 *) prog->aux->func[i]->bpf_func;
if (copy_to_user(uinsns, img, len))
return -EFAULT;
uinsns += len;
free -= len;
if (!free)
break;
}
} else {
if (copy_to_user(uinsns, prog->bpf_func, ulen))
return -EFAULT;
}
} else {
info.jited_prog_insns = 0;
}
}
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt;
if (info.nr_jited_ksyms && ulen) {
if (bpf_dump_raw_ok()) {
u64 __user *user_ksyms;
ulong ksym_addr;
u32 i;
/* copy the address of the kernel symbol
* corresponding to each function
*/
ulen = min_t(u32, info.nr_jited_ksyms, ulen);
user_ksyms = u64_to_user_ptr(info.jited_ksyms);
for (i = 0; i < ulen; i++) {
ksym_addr = (ulong) prog->aux->func[i]->bpf_func;
ksym_addr &= PAGE_MASK;
if (put_user((u64) ksym_addr, &user_ksyms[i]))
return -EFAULT;
}
} else {
info.jited_ksyms = 0;
}
}
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt;
if (info.nr_jited_func_lens && ulen) {
if (bpf_dump_raw_ok()) {
u32 __user *user_lens;
u32 func_len, i;
/* copy the JITed image lengths for each function */
ulen = min_t(u32, info.nr_jited_func_lens, ulen);
user_lens = u64_to_user_ptr(info.jited_func_lens);
for (i = 0; i < ulen; i++) {
func_len = prog->aux->func[i]->jited_len;
if (put_user(func_len, &user_lens[i]))
return -EFAULT;
}
} else {
info.jited_func_lens = 0;
}
}
done:
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
@ -1998,7 +2076,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
u32 info_len = attr->info.info_len;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
if (err)
return err;
info_len = min_t(u32, sizeof(info), info_len);
@ -2013,8 +2091,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
if (map->btf) {
info.btf_id = btf_id(map->btf);
info.btf_key_id = map->btf_key_id;
info.btf_value_id = map->btf_value_id;
info.btf_key_type_id = map->btf_key_type_id;
info.btf_value_type_id = map->btf_value_type_id;
}
if (bpf_map_is_dev_bound(map)) {
@ -2038,7 +2116,7 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
u32 info_len = attr->info.info_len;
int err;
err = check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
err = bpf_check_uarg_tail_zero(uinfo, sizeof(*uinfo), info_len);
if (err)
return err;
@ -2102,6 +2180,132 @@ static int bpf_btf_get_fd_by_id(const union bpf_attr *attr)
return btf_get_fd_by_id(attr->btf_id);
}
static int bpf_task_fd_query_copy(const union bpf_attr *attr,
union bpf_attr __user *uattr,
u32 prog_id, u32 fd_type,
const char *buf, u64 probe_offset,
u64 probe_addr)
{
char __user *ubuf = u64_to_user_ptr(attr->task_fd_query.buf);
u32 len = buf ? strlen(buf) : 0, input_len;
int err = 0;
if (put_user(len, &uattr->task_fd_query.buf_len))
return -EFAULT;
input_len = attr->task_fd_query.buf_len;
if (input_len && ubuf) {
if (!len) {
/* nothing to copy, just make ubuf NULL terminated */
char zero = '\0';
if (put_user(zero, ubuf))
return -EFAULT;
} else if (input_len >= len + 1) {
/* ubuf can hold the string with NULL terminator */
if (copy_to_user(ubuf, buf, len + 1))
return -EFAULT;
} else {
/* ubuf cannot hold the string with NULL terminator,
* do a partial copy with NULL terminator.
*/
char zero = '\0';
err = -ENOSPC;
if (copy_to_user(ubuf, buf, input_len - 1))
return -EFAULT;
if (put_user(zero, ubuf + input_len - 1))
return -EFAULT;
}
}
if (put_user(prog_id, &uattr->task_fd_query.prog_id) ||
put_user(fd_type, &uattr->task_fd_query.fd_type) ||
put_user(probe_offset, &uattr->task_fd_query.probe_offset) ||
put_user(probe_addr, &uattr->task_fd_query.probe_addr))
return -EFAULT;
return err;
}
#define BPF_TASK_FD_QUERY_LAST_FIELD task_fd_query.probe_addr
static int bpf_task_fd_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
pid_t pid = attr->task_fd_query.pid;
u32 fd = attr->task_fd_query.fd;
const struct perf_event *event;
struct files_struct *files;
struct task_struct *task;
struct file *file;
int err;
if (CHECK_ATTR(BPF_TASK_FD_QUERY))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (attr->task_fd_query.flags != 0)
return -EINVAL;
task = get_pid_task(find_vpid(pid), PIDTYPE_PID);
if (!task)
return -ENOENT;
files = get_files_struct(task);
put_task_struct(task);
if (!files)
return -ENOENT;
err = 0;
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
if (!file)
err = -EBADF;
else
get_file(file);
spin_unlock(&files->file_lock);
put_files_struct(files);
if (err)
goto out;
if (file->f_op == &bpf_raw_tp_fops) {
struct bpf_raw_tracepoint *raw_tp = file->private_data;
struct bpf_raw_event_map *btp = raw_tp->btp;
err = bpf_task_fd_query_copy(attr, uattr,
raw_tp->prog->aux->id,
BPF_FD_TYPE_RAW_TRACEPOINT,
btp->tp->name, 0, 0);
goto put_file;
}
event = perf_get_event(file);
if (!IS_ERR(event)) {
u64 probe_offset, probe_addr;
u32 prog_id, fd_type;
const char *buf;
err = bpf_get_perf_event_info(event, &prog_id, &fd_type,
&buf, &probe_offset,
&probe_addr);
if (!err)
err = bpf_task_fd_query_copy(attr, uattr, prog_id,
fd_type, buf,
probe_offset,
probe_addr);
goto put_file;
}
err = -ENOTSUPP;
put_file:
fput(file);
out:
return err;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@ -2110,7 +2314,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
return -EPERM;
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
if (err)
return err;
size = min_t(u32, size, sizeof(attr));
@ -2188,6 +2392,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_BTF_GET_FD_BY_ID:
err = bpf_btf_get_fd_by_id(&attr);
break;
case BPF_TASK_FD_QUERY:
err = bpf_task_fd_query(&attr, uattr);
break;
default:
err = -EINVAL;
break;

View File

@ -1262,6 +1262,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
switch (env->prog->type) {
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
/* dst_input() and dst_output() can't write for now */
if (t == BPF_WRITE)
return false;
@ -5383,11 +5384,24 @@ static int jit_subprogs(struct bpf_verifier_env *env)
insn->src_reg != BPF_PSEUDO_CALL)
continue;
subprog = insn->off;
insn->off = 0;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
func[subprog]->bpf_func -
__bpf_call_base;
}
/* we use the aux data to keep a list of the start addresses
* of the JITed images for each function in the program
*
* for some architectures, such as powerpc64, the imm field
* might not be large enough to hold the offset of the start
* address of the callee's JITed image from __bpf_call_base
*
* in such cases, we can lookup the start address of a callee
* by using its subprog id, available from the off field of
* the call instruction, as an index for this list
*/
func[i]->aux->func = func;
func[i]->aux->func_cnt = env->subprog_cnt;
}
for (i = 0; i < env->subprog_cnt; i++) {
old_bpf_func = func[i]->bpf_func;
@ -5413,17 +5427,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
* later look the same as if they were interpreted only.
*/
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
unsigned long addr;
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
continue;
insn->off = env->insn_aux_data[i].call_imm;
subprog = find_subprog(env, i + insn->off + 1);
addr = (unsigned long)func[subprog]->bpf_func;
addr &= PAGE_MASK;
insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
addr - __bpf_call_base;
insn->imm = subprog;
}
prog->jited = 1;

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XSKMAP used for AF_XDP sockets
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/bpf.h>

View File

@ -11212,6 +11212,14 @@ struct file *perf_event_get(unsigned int fd)
return file;
}
const struct perf_event *perf_get_event(struct file *file)
{
if (file->f_op != &perf_fops)
return ERR_PTR(-EINVAL);
return file->private_data;
}
const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
{
if (!event)

View File

@ -14,6 +14,7 @@
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include <linux/kprobes.h>
#include <linux/syscalls.h>
#include <linux/error-injection.h>
#include "trace_probe.h"
@ -1163,3 +1164,50 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
mutex_unlock(&bpf_event_mutex);
return err;
}
int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
u32 *fd_type, const char **buf,
u64 *probe_offset, u64 *probe_addr)
{
bool is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
int flags, err = 0;
prog = event->prog;
if (!prog)
return -ENOENT;
/* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
return -EOPNOTSUPP;
*prog_id = prog->aux->id;
flags = event->tp_event->flags;
is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
is_syscall_tp = is_syscall_trace_event(event->tp_event);
if (is_tracepoint || is_syscall_tp) {
*buf = is_tracepoint ? event->tp_event->tp->name
: event->tp_event->name;
*fd_type = BPF_FD_TYPE_TRACEPOINT;
*probe_offset = 0x0;
*probe_addr = 0x0;
} else {
/* kprobe/uprobe */
err = -EOPNOTSUPP;
#ifdef CONFIG_KPROBE_EVENTS
if (flags & TRACE_EVENT_FL_KPROBE)
err = bpf_get_kprobe_info(event, fd_type, buf,
probe_offset, probe_addr,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
#ifdef CONFIG_UPROBE_EVENTS
if (flags & TRACE_EVENT_FL_UPROBE)
err = bpf_get_uprobe_info(event, fd_type, buf,
probe_offset,
event->attr.type == PERF_TYPE_TRACEPOINT);
#endif
}
return err;
}

View File

@ -1287,6 +1287,35 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
head, NULL);
}
NOKPROBE_SYMBOL(kretprobe_perf_func);
int bpf_get_kprobe_info(const struct perf_event *event, u32 *fd_type,
const char **symbol, u64 *probe_offset,
u64 *probe_addr, bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
struct trace_kprobe *tk;
if (perf_type_tracepoint)
tk = find_trace_kprobe(pevent, group);
else
tk = event->tp_event->data;
if (!tk)
return -EINVAL;
*fd_type = trace_kprobe_is_return(tk) ? BPF_FD_TYPE_KRETPROBE
: BPF_FD_TYPE_KPROBE;
if (tk->symbol) {
*symbol = tk->symbol;
*probe_offset = tk->rp.kp.offset;
*probe_addr = 0;
} else {
*symbol = NULL;
*probe_offset = 0;
*probe_addr = (unsigned long)tk->rp.kp.addr;
}
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
/*

View File

@ -1161,6 +1161,28 @@ static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
{
__uprobe_perf_func(tu, func, regs, ucb, dsize);
}
int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type,
const char **filename, u64 *probe_offset,
bool perf_type_tracepoint)
{
const char *pevent = trace_event_name(event->tp_event);
const char *group = event->tp_event->class->system;
struct trace_uprobe *tu;
if (perf_type_tracepoint)
tu = find_probe_event(pevent, group);
else
tu = event->tp_event->data;
if (!tu)
return -EINVAL;
*fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE
: BPF_FD_TYPE_UPROBE;
*filename = tu->filename;
*probe_offset = tu->offset;
return 0;
}
#endif /* CONFIG_PERF_EVENTS */
static int

View File

@ -64,6 +64,10 @@
#include <net/ip_fib.h>
#include <net/flow.h>
#include <net/arp.h>
#include <net/ipv6.h>
#include <linux/seg6_local.h>
#include <net/seg6.h>
#include <net/seg6_local.h>
/**
* sk_filter_trim_cap - run a packet through a socket filter
@ -3042,7 +3046,7 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
int err;
int sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
@ -3052,9 +3056,9 @@ static int __bpf_tx_xdp(struct net_device *dev,
if (unlikely(!xdpf))
return -EOVERFLOW;
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
if (err)
return err;
sent = dev->netdev_ops->ndo_xdp_xmit(dev, 1, &xdpf);
if (sent <= 0)
return sent;
dev->netdev_ops->ndo_xdp_flush(dev);
return 0;
}
@ -3068,20 +3072,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: {
struct net_device *dev = fwd;
struct xdp_frame *xdpf;
struct bpf_dtab_netdev *dst = fwd;
if (!dev->netdev_ops->ndo_xdp_xmit)
return -EOPNOTSUPP;
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
/* TODO: move to inside map code instead, for bulk support
* err = dev_map_enqueue(dev, xdp);
*/
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
err = dev_map_enqueue(dst, xdp, dev_rx);
if (err)
return err;
__dev_map_insert_ctx(map, index);
@ -3370,28 +3363,6 @@ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
.arg3_type = ARG_ANYTHING,
};
bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
func == bpf_skb_vlan_pop ||
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail)
return true;
return false;
}
static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
unsigned long off, unsigned long len)
{
@ -4096,7 +4067,7 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
#if IS_ENABLED(CONFIG_INET)
static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 flags)
u32 flags, bool check_mtu)
{
struct in_device *in_dev;
struct neighbour *neigh;
@ -4105,6 +4076,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct fib_nh *nh;
struct flowi4 fl4;
int err;
u32 mtu;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@ -4156,6 +4128,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu)
return 0;
}
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
@ -4184,7 +4162,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
#if IS_ENABLED(CONFIG_IPV6)
static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
u32 flags)
u32 flags, bool check_mtu)
{
struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
@ -4195,6 +4173,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
struct flowi6 fl6;
int strict = 0;
int oif;
u32 mtu;
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
@ -4257,6 +4236,12 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
fl6.flowi6_oif, NULL,
strict);
if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu)
return 0;
}
if (f6i->fib6_nh.nh_lwtstate)
return 0;
@ -4289,12 +4274,12 @@ BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
flags);
flags, true);
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
flags);
flags, true);
#endif
}
return 0;
@ -4313,20 +4298,34 @@ static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
struct net *net = dev_net(skb->dev);
int index = 0;
if (plen < sizeof(*params))
return -EINVAL;
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
return bpf_ipv4_fib_lookup(dev_net(skb->dev), params, flags);
index = bpf_ipv4_fib_lookup(net, params, flags, false);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
return bpf_ipv6_fib_lookup(dev_net(skb->dev), params, flags);
index = bpf_ipv6_fib_lookup(net, params, flags, false);
break;
#endif
}
return -ENOTSUPP;
if (index > 0) {
struct net_device *dev;
dev = dev_get_by_index_rcu(net, index);
if (!is_skb_forwardable(dev, skb))
index = 0;
}
return index;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
@ -4339,6 +4338,264 @@ static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
.arg4_type = ARG_ANYTHING,
};
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
{
int err;
struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
if (!seg6_validate_srh(srh, len))
return -EINVAL;
switch (type) {
case BPF_LWT_ENCAP_SEG6_INLINE:
if (skb->protocol != htons(ETH_P_IPV6))
return -EBADMSG;
err = seg6_do_srh_inline(skb, srh);
break;
case BPF_LWT_ENCAP_SEG6:
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
break;
default:
return -EINVAL;
}
bpf_compute_data_pointers(skb);
if (err)
return err;
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
}
#endif /* CONFIG_IPV6_SEG6_BPF */
BPF_CALL_4(bpf_lwt_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
u32, len)
{
switch (type) {
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
case BPF_LWT_ENCAP_SEG6:
case BPF_LWT_ENCAP_SEG6_INLINE:
return bpf_push_seg6_encap(skb, type, hdr, len);
#endif
default:
return -EINVAL;
}
}
static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
.func = bpf_lwt_push_encap,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
const void *, from, u32, len)
{
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_tlvs, *srh_end, *ptr;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
ptr = skb->data + offset;
if (ptr >= srh_tlvs && ptr + len <= srh_end)
srh_state->valid = 0;
else if (ptr < (void *)&srh->flags ||
ptr + len > (void *)&srh->segments)
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
memcpy(skb->data + offset, from, len);
return 0;
#else /* CONFIG_IPV6_SEG6_BPF */
return -EOPNOTSUPP;
#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
.func = bpf_lwt_seg6_store_bytes,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int err;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
if (!srh_state->valid) {
if (unlikely((srh_state->hdrlen & 7) != 0))
return -EBADMSG;
srh->hdrlen = (u8)(srh_state->hdrlen >> 3);
if (unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
return -EBADMSG;
srh_state->valid = 1;
}
switch (action) {
case SEG6_LOCAL_ACTION_END_X:
if (param_len != sizeof(struct in6_addr))
return -EINVAL;
return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
case SEG6_LOCAL_ACTION_END_T:
if (param_len != sizeof(int))
return -EINVAL;
return seg6_lookup_nexthop(skb, NULL, *(int *)param);
case SEG6_LOCAL_ACTION_END_B6:
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
return err;
case SEG6_LOCAL_ACTION_END_B6_ENCAP:
err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
param, param_len);
if (!err)
srh_state->hdrlen =
((struct ipv6_sr_hdr *)param)->hdrlen << 3;
return err;
default:
return -EINVAL;
}
#else /* CONFIG_IPV6_SEG6_BPF */
return -EOPNOTSUPP;
#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
.func = bpf_lwt_seg6_action,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE
};
BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
s32, len)
{
#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_end, *srh_tlvs, *ptr;
struct ipv6_sr_hdr *srh;
struct ipv6hdr *hdr;
int srhoff = 0;
int ret;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
return -EINVAL;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
((srh->first_segment + 1) << 4));
srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
srh_state->hdrlen);
ptr = skb->data + offset;
if (unlikely(ptr < srh_tlvs || ptr > srh_end))
return -EFAULT;
if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
return -EFAULT;
if (len > 0) {
ret = skb_cow_head(skb, len);
if (unlikely(ret < 0))
return ret;
ret = bpf_skb_net_hdr_push(skb, offset, len);
} else {
ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
}
bpf_compute_data_pointers(skb);
if (unlikely(ret < 0))
return ret;
hdr = (struct ipv6hdr *)skb->data;
hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
srh_state->hdrlen += len;
srh_state->valid = 0;
return 0;
#else /* CONFIG_IPV6_SEG6_BPF */
return -EOPNOTSUPP;
#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
.func = bpf_lwt_seg6_adjust_srh,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
bool bpf_helper_changes_pkt_data(void *func)
{
if (func == bpf_skb_vlan_push ||
func == bpf_skb_vlan_pop ||
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
func == bpf_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
func == bpf_xdp_adjust_head ||
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail ||
func == bpf_lwt_push_encap ||
func == bpf_lwt_seg6_store_bytes ||
func == bpf_lwt_seg6_adjust_srh ||
func == bpf_lwt_seg6_action
)
return true;
return false;
}
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@ -4522,33 +4779,6 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
return &bpf_skb_pull_data_proto;
case BPF_FUNC_csum_diff:
return &bpf_csum_diff_proto;
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc:
return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto *
sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -4614,6 +4844,44 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
}
}
static const struct bpf_func_proto *
lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
return &bpf_skb_pull_data_proto;
case BPF_FUNC_csum_diff:
return &bpf_csum_diff_proto;
case BPF_FUNC_get_cgroup_classid:
return &bpf_get_cgroup_classid_proto;
case BPF_FUNC_get_route_realm:
return &bpf_get_route_realm_proto;
case BPF_FUNC_get_hash_recalc:
return &bpf_get_hash_recalc_proto;
case BPF_FUNC_perf_event_output:
return &bpf_skb_event_output_proto;
case BPF_FUNC_get_smp_processor_id:
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_skb_under_cgroup:
return &bpf_skb_under_cgroup_proto;
default:
return bpf_base_func_proto(func_id);
}
}
static const struct bpf_func_proto *
lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_lwt_push_encap:
return &bpf_lwt_push_encap_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
}
static const struct bpf_func_proto *
lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -4645,7 +4913,22 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_set_hash_invalid:
return &bpf_set_hash_invalid_proto;
default:
return lwt_inout_func_proto(func_id, prog);
return lwt_out_func_proto(func_id, prog);
}
}
static const struct bpf_func_proto *
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
case BPF_FUNC_lwt_seg6_store_bytes:
return &bpf_lwt_seg6_store_bytes_proto;
case BPF_FUNC_lwt_seg6_action:
return &bpf_lwt_seg6_action_proto;
case BPF_FUNC_lwt_seg6_adjust_srh:
return &bpf_lwt_seg6_adjust_srh_proto;
default:
return lwt_out_func_proto(func_id, prog);
}
}
@ -4753,7 +5036,6 @@ static bool lwt_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
}
/* Attach type specific accesses */
static bool __sock_filter_check_attach_type(int off,
enum bpf_access_type access_type,
@ -5155,18 +5437,23 @@ static bool sk_msg_is_valid_access(int off, int size,
switch (off) {
case offsetof(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
if (size != sizeof(__u64))
return false;
break;
case offsetof(struct sk_msg_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
if (size != sizeof(__u64))
return false;
break;
default:
if (size != sizeof(__u32))
return false;
}
if (off < 0 || off >= sizeof(struct sk_msg_md))
return false;
if (off % size != 0)
return false;
if (size != sizeof(__u64))
return false;
return true;
}
@ -5842,7 +6129,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_ip4):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@ -6159,6 +6447,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog, u32 *target_size)
{
struct bpf_insn *insn = insn_buf;
int off;
switch (si->off) {
case offsetof(struct sk_msg_md, data):
@ -6171,6 +6460,107 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, data_end));
break;
case offsetof(struct sk_msg_md, family):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_family));
break;
case offsetof(struct sk_msg_md, remote_ip4):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_daddr));
break;
case offsetof(struct sk_msg_md, local_ip4):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct sock_common,
skc_rcv_saddr));
break;
case offsetof(struct sk_msg_md, remote_ip6[0]) ...
offsetof(struct sk_msg_md, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
off -= offsetof(struct sk_msg_md, remote_ip6[0]);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct sock_common,
skc_v6_daddr.s6_addr32[0]) +
off);
#else
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
break;
case offsetof(struct sk_msg_md, local_ip6[0]) ...
offsetof(struct sk_msg_md, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
off -= offsetof(struct sk_msg_md, local_ip6[0]);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) +
off);
#else
*insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
#endif
break;
case offsetof(struct sk_msg_md, remote_port):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_dport));
#ifndef __BIG_ENDIAN_BITFIELD
*insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
#endif
break;
case offsetof(struct sk_msg_md, local_port):
BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg_buff, sk),
si->dst_reg, si->src_reg,
offsetof(struct sk_msg_buff, sk));
*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
offsetof(struct sock_common, skc_num));
break;
}
return insn - insn_buf;
@ -6219,13 +6609,23 @@ const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops lwt_inout_verifier_ops = {
.get_func_proto = lwt_inout_func_proto,
const struct bpf_verifier_ops lwt_in_verifier_ops = {
.get_func_proto = lwt_in_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
const struct bpf_prog_ops lwt_inout_prog_ops = {
const struct bpf_prog_ops lwt_in_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops lwt_out_verifier_ops = {
.get_func_proto = lwt_out_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
const struct bpf_prog_ops lwt_out_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
@ -6240,6 +6640,16 @@ const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
.get_func_proto = lwt_seg6local_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
const struct bpf_prog_ops lwt_seg6local_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,

View File

@ -308,7 +308,13 @@ err:
}
EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
static void xdp_return(void *data, struct xdp_mem_info *mem)
/* XDP RX runs under NAPI protection, and in different delivery error
* scenarios (e.g. queue full), it is possible to return the xdp_frame
* while still leveraging this protection. The @napi_direct boolian
* is used for those calls sites. Thus, allowing for faster recycling
* of xdp_frames/pages in those cases.
*/
static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
{
struct xdp_mem_allocator *xa;
struct page *page;
@ -320,7 +326,7 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
page = virt_to_head_page(data);
if (xa)
page_pool_put_page(xa->page_pool, page);
page_pool_put_page(xa->page_pool, page, napi_direct);
else
put_page(page);
rcu_read_unlock();
@ -340,12 +346,18 @@ static void xdp_return(void *data, struct xdp_mem_info *mem)
void xdp_return_frame(struct xdp_frame *xdpf)
{
xdp_return(xdpf->data, &xdpf->mem);
__xdp_return(xdpf->data, &xdpf->mem, false);
}
EXPORT_SYMBOL_GPL(xdp_return_frame);
void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
{
__xdp_return(xdpf->data, &xdpf->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
void xdp_return_buff(struct xdp_buff *xdp)
{
xdp_return(xdp->data, &xdp->rxq->mem);
__xdp_return(xdp->data, &xdp->rxq->mem, true);
}
EXPORT_SYMBOL_GPL(xdp_return_buff);

View File

@ -1352,6 +1352,37 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL;
}
/* MTU selection:
* 1. mtu on route is locked - use it
* 2. mtu from nexthop exception
* 3. mtu from egress device
*/
u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
{
struct fib_info *fi = res->fi;
struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
struct net_device *dev = nh->nh_dev;
u32 mtu = 0;
if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
if (likely(!mtu)) {
struct fib_nh_exception *fnhe;
fnhe = find_exception(nh, daddr);
if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires))
mtu = fnhe->fnhe_pmtu;
}
if (likely(!mtu))
mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU);
return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu);
}
static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr, const bool do_cache)
{

View File

@ -329,4 +329,9 @@ config IPV6_SEG6_HMAC
If unsure, say N.
config IPV6_SEG6_BPF
def_bool y
depends on IPV6_SEG6_LWTUNNEL
depends on IPV6 = y
endif # IPV6

View File

@ -161,12 +161,20 @@ eafnosupport_fib6_multipath_select(const struct net *net, struct fib6_info *f6i,
return f6i;
}
static u32
eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
return 0;
}
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
.fib6_multipath_select = eafnosupport_fib6_multipath_select,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
};
EXPORT_SYMBOL_GPL(ipv6_stub);

View File

@ -894,6 +894,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
.fib6_multipath_select = fib6_multipath_select,
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,

View File

@ -2604,6 +2604,54 @@ out:
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
/* MTU selection:
* 1. mtu on route is locked - use it
* 2. mtu from nexthop exception
* 3. mtu from egress device
*
* based on ip6_dst_mtu_forward and exception logic of
* rt6_find_cached_rt; called with rcu_read_lock
*/
u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct in6_addr *src_key;
struct inet6_dev *idev;
u32 mtu = 0;
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
mtu = f6i->fib6_pmtu;
if (mtu)
goto out;
}
src_key = NULL;
#ifdef CONFIG_IPV6_SUBTREES
if (f6i->fib6_src.plen)
src_key = saddr;
#endif
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) {
struct net_device *dev = fib6_info_nh_dev(f6i);
mtu = IPV6_MIN_MTU;
idev = __in6_dev_get(dev);
if (idev && idev->cnf.mtu6 > mtu)
mtu = idev->cnf.mtu6;
}
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
out:
return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
}
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct flowi6 *fl6)
{

View File

@ -1,8 +1,9 @@
/*
* SR-IPv6 implementation
*
* Author:
* Authors:
* David Lebrun <david.lebrun@uclouvain.be>
* eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com>
*
*
* This program is free software; you can redistribute it and/or
@ -30,7 +31,9 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
struct seg6_local_lwt;
@ -41,6 +44,11 @@ struct seg6_action_desc {
int static_headroom;
};
struct bpf_lwt_prog {
struct bpf_prog *prog;
char *name;
};
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@ -49,6 +57,7 @@ struct seg6_local_lwt {
struct in6_addr nh6;
int iif;
int oif;
struct bpf_lwt_prog bpf;
int headroom;
struct seg6_action_desc *desc;
@ -140,8 +149,8 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
*daddr = *addr;
}
static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id)
int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
u32 tbl_id)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
@ -187,6 +196,7 @@ out:
skb_dst_drop(skb);
skb_dst_set(skb, dst);
return dst->error;
}
/* regular endpoint function */
@ -200,7 +210,7 @@ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, NULL, 0);
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@ -220,7 +230,7 @@ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, &slwt->nh6, 0);
seg6_lookup_nexthop(skb, &slwt->nh6, 0);
return dst_input(skb);
@ -239,7 +249,7 @@ static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
lookup_nexthop(skb, NULL, slwt->table);
seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
@ -331,7 +341,7 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
lookup_nexthop(skb, nhaddr, 0);
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
drop:
@ -380,7 +390,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
lookup_nexthop(skb, NULL, slwt->table);
seg6_lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
@ -406,7 +416,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
lookup_nexthop(skb, NULL, 0);
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@ -438,7 +448,7 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
lookup_nexthop(skb, NULL, 0);
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@ -447,6 +457,71 @@ drop:
return err;
}
DEFINE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states);
static int input_action_end_bpf(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct seg6_bpf_srh_state local_srh_state;
struct ipv6_sr_hdr *srh;
int srhoff = 0;
int ret;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
/* preempt_disable is needed to protect the per-CPU buffer srh_state,
* which is also accessed by the bpf_lwt_seg6_* helpers
*/
preempt_disable();
srh_state->hdrlen = srh->hdrlen << 3;
srh_state->valid = 1;
rcu_read_lock();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(slwt->bpf.prog, skb);
rcu_read_unlock();
local_srh_state = *srh_state;
preempt_enable();
switch (ret) {
case BPF_OK:
case BPF_REDIRECT:
break;
case BPF_DROP:
goto drop;
default:
pr_warn_once("bpf-seg6local: Illegal return value %u\n", ret);
goto drop;
}
if (unlikely((local_srh_state.hdrlen & 7) != 0))
goto drop;
if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
goto drop;
srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
srh->hdrlen = (u8)(local_srh_state.hdrlen >> 3);
if (!local_srh_state.valid &&
unlikely(!seg6_validate_srh(srh, (srh->hdrlen + 1) << 3)))
goto drop;
if (ret != BPF_REDIRECT)
seg6_lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
static struct seg6_action_desc seg6_action_table[] = {
{
.action = SEG6_LOCAL_ACTION_END,
@ -493,7 +568,13 @@ static struct seg6_action_desc seg6_action_table[] = {
.attrs = (1 << SEG6_LOCAL_SRH),
.input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr),
}
},
{
.action = SEG6_LOCAL_ACTION_END_BPF,
.attrs = (1 << SEG6_LOCAL_BPF),
.input = input_action_end_bpf,
},
};
static struct seg6_action_desc *__get_action_desc(int action)
@ -538,6 +619,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
.len = sizeof(struct in6_addr) },
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
};
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@ -715,6 +797,75 @@ static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
#define MAX_PROG_NAME 256
static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = {
[SEG6_LOCAL_BPF_PROG] = { .type = NLA_U32, },
[SEG6_LOCAL_BPF_PROG_NAME] = { .type = NLA_NUL_STRING,
.len = MAX_PROG_NAME },
};
static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1];
struct bpf_prog *p;
int ret;
u32 fd;
ret = nla_parse_nested(tb, SEG6_LOCAL_BPF_PROG_MAX,
attrs[SEG6_LOCAL_BPF], bpf_prog_policy, NULL);
if (ret < 0)
return ret;
if (!tb[SEG6_LOCAL_BPF_PROG] || !tb[SEG6_LOCAL_BPF_PROG_NAME])
return -EINVAL;
slwt->bpf.name = nla_memdup(tb[SEG6_LOCAL_BPF_PROG_NAME], GFP_KERNEL);
if (!slwt->bpf.name)
return -ENOMEM;
fd = nla_get_u32(tb[SEG6_LOCAL_BPF_PROG]);
p = bpf_prog_get_type(fd, BPF_PROG_TYPE_LWT_SEG6LOCAL);
if (IS_ERR(p)) {
kfree(slwt->bpf.name);
return PTR_ERR(p);
}
slwt->bpf.prog = p;
return 0;
}
static int put_nla_bpf(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct nlattr *nest;
if (!slwt->bpf.prog)
return 0;
nest = nla_nest_start(skb, SEG6_LOCAL_BPF);
if (!nest)
return -EMSGSIZE;
if (nla_put_u32(skb, SEG6_LOCAL_BPF_PROG, slwt->bpf.prog->aux->id))
return -EMSGSIZE;
if (slwt->bpf.name &&
nla_put_string(skb, SEG6_LOCAL_BPF_PROG_NAME, slwt->bpf.name))
return -EMSGSIZE;
return nla_nest_end(skb, nest);
}
static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
{
if (!a->bpf.name && !b->bpf.name)
return 0;
if (!a->bpf.name || !b->bpf.name)
return 1;
return strcmp(a->bpf.name, b->bpf.name);
}
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
@ -745,6 +896,11 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_OIF] = { .parse = parse_nla_oif,
.put = put_nla_oif,
.cmp = cmp_nla_oif },
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
.put = put_nla_bpf,
.cmp = cmp_nla_bpf },
};
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
@ -830,6 +986,13 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
kfree(slwt->srh);
if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
kfree(slwt->bpf.name);
bpf_prog_put(slwt->bpf.prog);
}
return;
}
static int seg6_local_fill_encap(struct sk_buff *skb,
@ -882,6 +1045,11 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
if (attrs & (1 << SEG6_LOCAL_OIF))
nlsize += nla_total_size(4);
if (attrs & (1 << SEG6_LOCAL_BPF))
nlsize += nla_total_size(sizeof(struct nlattr)) +
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
return nlsize;
}

View File

@ -1,2 +1 @@
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/init.h>
@ -25,39 +16,25 @@
#define XDP_UMEM_MIN_FRAME_SIZE 2048
int xdp_umem_create(struct xdp_umem **umem)
{
*umem = kzalloc(sizeof(**umem), GFP_KERNEL);
if (!(*umem))
return -ENOMEM;
return 0;
}
static void xdp_umem_unpin_pages(struct xdp_umem *umem)
{
unsigned int i;
if (umem->pgs) {
for (i = 0; i < umem->npgs; i++) {
struct page *page = umem->pgs[i];
for (i = 0; i < umem->npgs; i++) {
struct page *page = umem->pgs[i];
set_page_dirty_lock(page);
put_page(page);
}
kfree(umem->pgs);
umem->pgs = NULL;
set_page_dirty_lock(page);
put_page(page);
}
kfree(umem->pgs);
umem->pgs = NULL;
}
static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{
if (umem->user) {
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user);
}
atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user);
}
static void xdp_umem_release(struct xdp_umem *umem)
@ -75,22 +52,18 @@ static void xdp_umem_release(struct xdp_umem *umem)
umem->cq = NULL;
}
if (umem->pgs) {
xdp_umem_unpin_pages(umem);
xdp_umem_unpin_pages(umem);
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm);
umem->pgs = NULL;
}
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
mmput(mm);
xdp_umem_unaccount_pages(umem);
out:
kfree(umem);
@ -105,7 +78,7 @@ static void xdp_umem_release_deferred(struct work_struct *work)
void xdp_get_umem(struct xdp_umem *umem)
{
atomic_inc(&umem->users);
refcount_inc(&umem->users);
}
void xdp_put_umem(struct xdp_umem *umem)
@ -113,7 +86,7 @@ void xdp_put_umem(struct xdp_umem *umem)
if (!umem)
return;
if (atomic_dec_and_test(&umem->users)) {
if (refcount_dec_and_test(&umem->users)) {
INIT_WORK(&umem->work, xdp_umem_release_deferred);
schedule_work(&umem->work);
}
@ -176,16 +149,13 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
u64 addr = mr->addr, size = mr->len;
unsigned int nframes, nfpp;
int size_chk, err;
if (!umem)
return -EINVAL;
if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
/* Strictly speaking we could support this, if:
* - huge pages, or*
@ -236,7 +206,7 @@ int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->frame_size_log2 = ilog2(frame_size);
umem->nfpp_mask = nfpp - 1;
umem->nfpplog2 = ilog2(nfpp);
atomic_set(&umem->users, 1);
refcount_set(&umem->users, 1);
err = xdp_umem_account_pages(umem);
if (err)
@ -254,7 +224,25 @@ out:
return err;
}
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
{
struct xdp_umem *umem;
int err;
umem = kzalloc(sizeof(*umem), GFP_KERNEL);
if (!umem)
return ERR_PTR(-ENOMEM);
err = xdp_umem_reg(umem, mr);
if (err) {
kfree(umem);
return ERR_PTR(err);
}
return umem;
}
bool xdp_umem_validate_queues(struct xdp_umem *umem)
{
return (umem->fq && umem->cq);
return umem->fq && umem->cq;
}

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
* XDP user-space packet buffer
/* SPDX-License-Identifier: GPL-2.0 */
/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef XDP_UMEM_H_
@ -36,7 +27,7 @@ struct xdp_umem {
struct pid *pid;
unsigned long address;
size_t size;
atomic_t users;
refcount_t users;
struct work_struct work;
};
@ -59,9 +50,8 @@ static inline char *xdp_umem_get_data_with_headroom(struct xdp_umem *umem,
}
bool xdp_umem_validate_queues(struct xdp_umem *umem);
int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr);
void xdp_get_umem(struct xdp_umem *umem);
void xdp_put_umem(struct xdp_umem *umem);
int xdp_umem_create(struct xdp_umem **umem);
struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr);
#endif /* XDP_UMEM_H_ */

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
* XDP user-space packet buffer
/* SPDX-License-Identifier: GPL-2.0 */
/* XDP user-space packet buffer
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef XDP_UMEM_PROPS_H_

View File

@ -5,15 +5,6 @@
* applications.
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* Author(s): Björn Töpel <bjorn.topel@intel.com>
* Magnus Karlsson <magnus.karlsson@intel.com>
*/
@ -151,6 +142,11 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
if (xs->queue_id >= xs->dev->real_num_tx_queues) {
err = -ENXIO;
goto out;
}
skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@ -232,18 +228,12 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
if (!q)
return -ENOMEM;
/* Make sure queue is ready before it can be seen by others */
smp_wmb();
*queue = q;
return 0;
}
static void __xsk_release(struct xdp_sock *xs)
{
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
}
static int xsk_release(struct socket *sock)
{
struct sock *sk = sock->sk;
@ -260,7 +250,9 @@ static int xsk_release(struct socket *sock)
local_bh_enable();
if (xs->dev) {
__xsk_release(xs);
/* Wait for driver to stop using the xdp socket. */
synchronize_net();
dev_put(xs->dev);
xs->dev = NULL;
}
@ -294,9 +286,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
{
struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
struct sock *sk = sock->sk;
struct net_device *dev, *dev_curr;
struct xdp_sock *xs = xdp_sk(sk);
struct xdp_umem *old_umem = NULL;
struct net_device *dev;
int err = 0;
if (addr_len < sizeof(struct sockaddr_xdp))
@ -305,7 +296,11 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
return -EINVAL;
mutex_lock(&xs->mutex);
dev_curr = xs->dev;
if (xs->dev) {
err = -EBUSY;
goto out_release;
}
dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
if (!dev) {
err = -ENODEV;
@ -317,7 +312,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
goto out_unlock;
}
if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
if ((xs->rx && sxdp->sxdp_queue_id >= dev->real_num_rx_queues) ||
(xs->tx && sxdp->sxdp_queue_id >= dev->real_num_tx_queues)) {
err = -EINVAL;
goto out_unlock;
}
@ -352,7 +348,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
}
xdp_get_umem(umem_xs->umem);
old_umem = xs->umem;
xs->umem = umem_xs->umem;
sockfd_put(sock);
} else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
@ -364,14 +359,6 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xskq_set_umem(xs->umem->cq, &xs->umem->props);
}
/* Rebind? */
if (dev_curr && (dev_curr != dev ||
xs->queue_id != sxdp->sxdp_queue_id)) {
__xsk_release(xs);
if (old_umem)
xdp_put_umem(old_umem);
}
xs->dev = dev;
xs->queue_id = sxdp->sxdp_queue_id;
@ -419,25 +406,23 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xdp_umem_reg mr;
struct xdp_umem *umem;
if (xs->umem)
return -EBUSY;
if (copy_from_user(&mr, optval, sizeof(mr)))
return -EFAULT;
mutex_lock(&xs->mutex);
err = xdp_umem_create(&umem);
err = xdp_umem_reg(umem, &mr);
if (err) {
kfree(umem);
if (xs->umem) {
mutex_unlock(&xs->mutex);
return err;
return -EBUSY;
}
umem = xdp_umem_create(&mr);
if (IS_ERR(umem)) {
mutex_unlock(&xs->mutex);
return PTR_ERR(umem);
}
/* Make sure umem is ready before it can be seen by others */
smp_wmb();
xs->umem = umem;
mutex_unlock(&xs->mutex);
return 0;
@ -448,13 +433,15 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
if (!xs->umem)
return -EINVAL;
if (copy_from_user(&entries, optval, sizeof(entries)))
return -EFAULT;
mutex_lock(&xs->mutex);
if (!xs->umem) {
mutex_unlock(&xs->mutex);
return -EINVAL;
}
q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
&xs->umem->cq;
err = xsk_init_queue(entries, q, true);
@ -504,6 +491,35 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
case XDP_MMAP_OFFSETS:
{
struct xdp_mmap_offsets off;
if (len < sizeof(off))
return -EINVAL;
off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.fr.desc = offsetof(struct xdp_umem_ring, desc);
off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
off.cr.desc = offsetof(struct xdp_umem_ring, desc);
len = sizeof(off);
if (copy_to_user(optval, &off, len))
return -EFAULT;
if (put_user(len, optlen))
return -EFAULT;
return 0;
}
default:
break;
}
@ -518,21 +534,23 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
struct xdp_umem *umem;
unsigned long pfn;
struct page *qpg;
if (offset == XDP_PGOFF_RX_RING) {
q = xs->rx;
q = READ_ONCE(xs->rx);
} else if (offset == XDP_PGOFF_TX_RING) {
q = xs->tx;
q = READ_ONCE(xs->tx);
} else {
if (!xs->umem)
umem = READ_ONCE(xs->umem);
if (!umem)
return -EINVAL;
if (offset == XDP_UMEM_PGOFF_FILL_RING)
q = xs->umem->fq;
q = READ_ONCE(umem->fq);
else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
q = xs->umem->cq;
q = READ_ONCE(umem->cq);
}
if (!q)
@ -554,24 +572,24 @@ static struct proto xsk_proto = {
};
static const struct proto_ops xsk_proto_ops = {
.family = PF_XDP,
.owner = THIS_MODULE,
.release = xsk_release,
.bind = xsk_bind,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = xsk_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = xsk_setsockopt,
.getsockopt = xsk_getsockopt,
.sendmsg = xsk_sendmsg,
.recvmsg = sock_no_recvmsg,
.mmap = xsk_mmap,
.sendpage = sock_no_sendpage,
.family = PF_XDP,
.owner = THIS_MODULE,
.release = xsk_release,
.bind = xsk_bind,
.connect = sock_no_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
.poll = xsk_poll,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = xsk_setsockopt,
.getsockopt = xsk_getsockopt,
.sendmsg = xsk_sendmsg,
.recvmsg = sock_no_recvmsg,
.mmap = xsk_mmap,
.sendpage = sock_no_sendpage,
};
static void xsk_destruct(struct sock *sk)

View File

@ -1,15 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/slab.h>
@ -31,8 +22,7 @@ static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
{
return (sizeof(struct xdp_ring) +
q->nentries * sizeof(struct xdp_desc));
return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
}
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)

View File

@ -1,15 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
* XDP user-space ring structure
/* SPDX-License-Identifier: GPL-2.0 */
/* XDP user-space ring structure
* Copyright(c) 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _LINUX_XSK_QUEUE_H
@ -22,6 +13,23 @@
#define RX_BATCH_SIZE 16
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
};
/* Used for the RX and TX queues for packets */
struct xdp_rxtx_ring {
struct xdp_ring ptrs;
struct xdp_desc desc[0] ____cacheline_aligned_in_smp;
};
/* Used for the fill and completion queues for buffers */
struct xdp_umem_ring {
struct xdp_ring ptrs;
u32 desc[0] ____cacheline_aligned_in_smp;
};
struct xsk_queue {
struct xdp_umem_props umem_props;
u32 ring_mask;
@ -232,12 +240,12 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
static inline bool xskq_full_desc(struct xsk_queue *q)
{
return (xskq_nb_avail(q, q->nentries) == q->nentries);
return xskq_nb_avail(q, q->nentries) == q->nentries;
}
static inline bool xskq_empty_desc(struct xsk_queue *q)
{
return (xskq_nb_free(q, q->prod_tail, 1) == q->nentries);
return xskq_nb_free(q, q->prod_tail, 1) == q->nentries;
}
void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);

View File

@ -51,6 +51,7 @@ hostprogs-y += cpustat
hostprogs-y += xdp_adjust_tail
hostprogs-y += xdpsock
hostprogs-y += xdp_fwd
hostprogs-y += task_fd_query
# Libbpf dependencies
LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a
@ -105,6 +106,7 @@ cpustat-objs := bpf_load.o cpustat_user.o
xdp_adjust_tail-objs := xdp_adjust_tail_user.o
xdpsock-objs := bpf_load.o xdpsock_user.o
xdp_fwd-objs := bpf_load.o xdp_fwd_user.o
task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS)
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@ -160,6 +162,7 @@ always += cpustat_kern.o
always += xdp_adjust_tail_kern.o
always += xdpsock_kern.o
always += xdp_fwd_kern.o
always += task_fd_query_kern.o
HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
@ -175,6 +178,7 @@ HOSTCFLAGS_offwaketime_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_spintest_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_trace_event_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_sampleip_user.o += -I$(srctree)/tools/lib/bpf/
HOSTCFLAGS_task_fd_query_user.o += -I$(srctree)/tools/lib/bpf/
HOST_LOADLIBES += $(LIBBPF) -lelf
HOSTLOADLIBES_tracex4 += -lrt

View File

@ -0,0 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/version.h>
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"
SEC("kprobe/blk_start_request")
int bpf_prog1(struct pt_regs *ctx)
{
return 0;
}
SEC("kretprobe/blk_account_io_completion")
int bpf_prog2(struct pt_regs *ctx)
{
return 0;
}
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;

View File

@ -0,0 +1,382 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include <signal.h>
#include <unistd.h>
#include <stdbool.h>
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
#include <linux/bpf.h>
#include <sys/ioctl.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "libbpf.h"
#include "bpf_load.h"
#include "bpf_util.h"
#include "perf-sys.h"
#include "trace_helpers.h"
#define CHECK_PERROR_RET(condition) ({ \
int __ret = !!(condition); \
if (__ret) { \
printf("FAIL: %s:\n", __func__); \
perror(" "); \
return -1; \
} \
})
#define CHECK_AND_RET(condition) ({ \
int __ret = !!(condition); \
if (__ret) \
return -1; \
})
static __u64 ptr_to_u64(void *ptr)
{
return (__u64) (unsigned long) ptr;
}
#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
static int bpf_find_probe_type(const char *event_type)
{
char buf[256];
int fd, ret;
ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
fd = open(buf, O_RDONLY);
CHECK_PERROR_RET(fd < 0);
ret = read(fd, buf, sizeof(buf));
close(fd);
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
errno = 0;
ret = (int)strtol(buf, NULL, 10);
CHECK_PERROR_RET(errno);
return ret;
}
#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
static int bpf_get_retprobe_bit(const char *event_type)
{
char buf[256];
int fd, ret;
ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
fd = open(buf, O_RDONLY);
CHECK_PERROR_RET(fd < 0);
ret = read(fd, buf, sizeof(buf));
close(fd);
CHECK_PERROR_RET(ret < 0 || ret >= sizeof(buf));
CHECK_PERROR_RET(strlen(buf) < strlen("config:"));
errno = 0;
ret = (int)strtol(buf + strlen("config:"), NULL, 10);
CHECK_PERROR_RET(errno);
return ret;
}
static int test_debug_fs_kprobe(int prog_fd_idx, const char *fn_name,
__u32 expected_fd_type)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
char buf[256];
int err;
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), event_fd[prog_fd_idx], 0, buf, &len,
&prog_id, &fd_type, &probe_offset,
&probe_addr);
if (err < 0) {
printf("FAIL: %s, for event_fd idx %d, fn_name %s\n",
__func__, prog_fd_idx, fn_name);
perror(" :");
return -1;
}
if (strcmp(buf, fn_name) != 0 ||
fd_type != expected_fd_type ||
probe_offset != 0x0 || probe_addr != 0x0) {
printf("FAIL: bpf_trace_event_query(event_fd[%d]):\n",
prog_fd_idx);
printf("buf: %s, fd_type: %u, probe_offset: 0x%llx,"
" probe_addr: 0x%llx\n",
buf, fd_type, probe_offset, probe_addr);
return -1;
}
return 0;
}
static int test_nondebug_fs_kuprobe_common(const char *event_type,
const char *name, __u64 offset, __u64 addr, bool is_return,
char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr)
{
int is_return_bit = bpf_get_retprobe_bit(event_type);
int type = bpf_find_probe_type(event_type);
struct perf_event_attr attr = {};
int fd;
if (type < 0 || is_return_bit < 0) {
printf("FAIL: %s incorrect type (%d) or is_return_bit (%d)\n",
__func__, type, is_return_bit);
return -1;
}
attr.sample_period = 1;
attr.wakeup_events = 1;
if (is_return)
attr.config |= 1 << is_return_bit;
if (name) {
attr.config1 = ptr_to_u64((void *)name);
attr.config2 = offset;
} else {
attr.config1 = 0;
attr.config2 = addr;
}
attr.size = sizeof(attr);
attr.type = type;
fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
CHECK_PERROR_RET(fd < 0);
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0);
CHECK_PERROR_RET(ioctl(fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
CHECK_PERROR_RET(bpf_task_fd_query(getpid(), fd, 0, buf, buf_len,
prog_id, fd_type, probe_offset, probe_addr) < 0);
return 0;
}
static int test_nondebug_fs_probe(const char *event_type, const char *name,
__u64 offset, __u64 addr, bool is_return,
__u32 expected_fd_type,
__u32 expected_ret_fd_type,
char *buf, __u32 buf_len)
{
__u64 probe_offset, probe_addr;
__u32 prog_id, fd_type;
int err;
err = test_nondebug_fs_kuprobe_common(event_type, name,
offset, addr, is_return,
buf, &buf_len, &prog_id,
&fd_type, &probe_offset,
&probe_addr);
if (err < 0) {
printf("FAIL: %s, "
"for name %s, offset 0x%llx, addr 0x%llx, is_return %d\n",
__func__, name ? name : "", offset, addr, is_return);
perror(" :");
return -1;
}
if ((is_return && fd_type != expected_ret_fd_type) ||
(!is_return && fd_type != expected_fd_type)) {
printf("FAIL: %s, incorrect fd_type %u\n",
__func__, fd_type);
return -1;
}
if (name) {
if (strcmp(name, buf) != 0) {
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
return -1;
}
if (probe_offset != offset) {
printf("FAIL: %s, incorrect probe_offset 0x%llx\n",
__func__, probe_offset);
return -1;
}
} else {
if (buf_len != 0) {
printf("FAIL: %s, incorrect buf %p\n",
__func__, buf);
return -1;
}
if (probe_addr != addr) {
printf("FAIL: %s, incorrect probe_addr 0x%llx\n",
__func__, probe_addr);
return -1;
}
}
return 0;
}
static int test_debug_fs_uprobe(char *binary_path, long offset, bool is_return)
{
const char *event_type = "uprobe";
struct perf_event_attr attr = {};
char buf[256], event_alias[256];
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
int err, res, kfd, efd;
ssize_t bytes;
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events",
event_type);
kfd = open(buf, O_WRONLY | O_APPEND, 0);
CHECK_PERROR_RET(kfd < 0);
res = snprintf(event_alias, sizeof(event_alias), "test_%d", getpid());
CHECK_PERROR_RET(res < 0 || res >= sizeof(event_alias));
res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx",
is_return ? 'r' : 'p', event_type, event_alias,
binary_path, offset);
CHECK_PERROR_RET(res < 0 || res >= sizeof(buf));
CHECK_PERROR_RET(write(kfd, buf, strlen(buf)) < 0);
close(kfd);
kfd = -1;
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s/id",
event_type, event_alias);
efd = open(buf, O_RDONLY, 0);
CHECK_PERROR_RET(efd < 0);
bytes = read(efd, buf, sizeof(buf));
CHECK_PERROR_RET(bytes <= 0 || bytes >= sizeof(buf));
close(efd);
buf[bytes] = '\0';
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_period = 1;
attr.wakeup_events = 1;
kfd = sys_perf_event_open(&attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
CHECK_PERROR_RET(kfd < 0);
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) < 0);
CHECK_PERROR_RET(ioctl(kfd, PERF_EVENT_IOC_ENABLE, 0) < 0);
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), kfd, 0, buf, &len,
&prog_id, &fd_type, &probe_offset,
&probe_addr);
if (err < 0) {
printf("FAIL: %s, binary_path %s\n", __func__, binary_path);
perror(" :");
return -1;
}
if ((is_return && fd_type != BPF_FD_TYPE_URETPROBE) ||
(!is_return && fd_type != BPF_FD_TYPE_UPROBE)) {
printf("FAIL: %s, incorrect fd_type %u\n", __func__,
fd_type);
return -1;
}
if (strcmp(binary_path, buf) != 0) {
printf("FAIL: %s, incorrect buf %s\n", __func__, buf);
return -1;
}
if (probe_offset != offset) {
printf("FAIL: %s, incorrect probe_offset 0x%llx\n", __func__,
probe_offset);
return -1;
}
close(kfd);
return 0;
}
int main(int argc, char **argv)
{
struct rlimit r = {1024*1024, RLIM_INFINITY};
extern char __executable_start;
char filename[256], buf[256];
__u64 uprobe_file_offset;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
if (load_kallsyms()) {
printf("failed to process /proc/kallsyms\n");
return 1;
}
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}
/* test two functions in the corresponding *_kern.c file */
CHECK_AND_RET(test_debug_fs_kprobe(0, "blk_start_request",
BPF_FD_TYPE_KPROBE));
CHECK_AND_RET(test_debug_fs_kprobe(1, "blk_account_io_completion",
BPF_FD_TYPE_KRETPROBE));
/* test nondebug fs kprobe */
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
false, BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
buf, sizeof(buf)));
#ifdef __x86_64__
/* set a kprobe on "bpf_check + 0x5", which is x64 specific */
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x5, 0x0,
false, BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
buf, sizeof(buf)));
#endif
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", "bpf_check", 0x0, 0x0,
true, BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
buf, sizeof(buf)));
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
ksym_get_addr("bpf_check"), false,
BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
buf, sizeof(buf)));
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
ksym_get_addr("bpf_check"), false,
BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
NULL, 0));
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
ksym_get_addr("bpf_check"), true,
BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
buf, sizeof(buf)));
CHECK_AND_RET(test_nondebug_fs_probe("kprobe", NULL, 0x0,
ksym_get_addr("bpf_check"), true,
BPF_FD_TYPE_KPROBE,
BPF_FD_TYPE_KRETPROBE,
0, 0));
/* test nondebug fs uprobe */
/* the calculation of uprobe file offset is based on gcc 7.3.1 on x64
* and the default linker script, which defines __executable_start as
* the start of the .text section. The calculation could be different
* on different systems with different compilers. The right way is
* to parse the ELF file. We took a shortcut here.
*/
uprobe_file_offset = (__u64)main - (__u64)&__executable_start;
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
uprobe_file_offset, 0x0, false,
BPF_FD_TYPE_UPROBE,
BPF_FD_TYPE_URETPROBE,
buf, sizeof(buf)));
CHECK_AND_RET(test_nondebug_fs_probe("uprobe", (char *)argv[0],
uprobe_file_offset, 0x0, true,
BPF_FD_TYPE_UPROBE,
BPF_FD_TYPE_URETPROBE,
buf, sizeof(buf)));
/* test debug fs uprobe */
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
false));
CHECK_AND_RET(test_debug_fs_uprobe((char *)argv[0], uprobe_file_offset,
true));
return 0;
}

View File

@ -125,6 +125,7 @@ struct datarec {
u64 processed;
u64 dropped;
u64 info;
u64 err;
};
#define MAX_CPUS 64
@ -208,3 +209,51 @@ int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
return 0;
}
struct bpf_map_def SEC("maps") devmap_xmit_cnt = {
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(struct datarec),
.max_entries = 1,
};
/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_devmap_xmit/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct devmap_xmit_ctx {
u64 __pad; // First 8 bytes are not accessible by bpf code
int map_id; // offset:8; size:4; signed:1;
u32 act; // offset:12; size:4; signed:0;
u32 map_index; // offset:16; size:4; signed:0;
int drops; // offset:20; size:4; signed:1;
int sent; // offset:24; size:4; signed:1;
int from_ifindex; // offset:28; size:4; signed:1;
int to_ifindex; // offset:32; size:4; signed:1;
int err; // offset:36; size:4; signed:1;
};
SEC("tracepoint/xdp/xdp_devmap_xmit")
int trace_xdp_devmap_xmit(struct devmap_xmit_ctx *ctx)
{
struct datarec *rec;
u32 key = 0;
rec = bpf_map_lookup_elem(&devmap_xmit_cnt, &key);
if (!rec)
return 0;
rec->processed += ctx->sent;
rec->dropped += ctx->drops;
/* Record bulk events, then userspace can calc average bulk size */
rec->info += 1;
/* Record error cases, where no frame were sent */
if (ctx->err)
rec->err++;
/* Catch API error of drv ndo_xdp_xmit sent more than count */
if (ctx->drops < 0)
rec->err++;
return 1;
}

View File

@ -117,6 +117,7 @@ struct datarec {
__u64 processed;
__u64 dropped;
__u64 info;
__u64 err;
};
#define MAX_CPUS 64
@ -141,6 +142,7 @@ struct stats_record {
struct record_u64 xdp_exception[XDP_ACTION_MAX];
struct record xdp_cpumap_kthread;
struct record xdp_cpumap_enqueue[MAX_CPUS];
struct record xdp_devmap_xmit;
};
static bool map_collect_record(int fd, __u32 key, struct record *rec)
@ -151,6 +153,7 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
__u64 sum_processed = 0;
__u64 sum_dropped = 0;
__u64 sum_info = 0;
__u64 sum_err = 0;
int i;
if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
@ -169,10 +172,13 @@ static bool map_collect_record(int fd, __u32 key, struct record *rec)
sum_dropped += values[i].dropped;
rec->cpu[i].info = values[i].info;
sum_info += values[i].info;
rec->cpu[i].err = values[i].err;
sum_err += values[i].err;
}
rec->total.processed = sum_processed;
rec->total.dropped = sum_dropped;
rec->total.info = sum_info;
rec->total.err = sum_err;
return true;
}
@ -273,6 +279,18 @@ static double calc_info(struct datarec *r, struct datarec *p, double period)
return pps;
}
static double calc_err(struct datarec *r, struct datarec *p, double period)
{
__u64 packets = 0;
double pps = 0;
if (period > 0) {
packets = r->err - p->err;
pps = packets / period;
}
return pps;
}
static void stats_print(struct stats_record *stats_rec,
struct stats_record *stats_prev,
bool err_only)
@ -397,7 +415,7 @@ static void stats_print(struct stats_record *stats_rec,
info = calc_info(r, p, t);
if (info > 0)
i_str = "sched";
if (pps > 0)
if (pps > 0 || drop > 0)
printf(fmt1, "cpumap-kthread",
i, pps, drop, info, i_str);
}
@ -409,6 +427,50 @@ static void stats_print(struct stats_record *stats_rec,
printf(fmt2, "cpumap-kthread", "total", pps, drop, info, i_str);
}
/* devmap ndo_xdp_xmit stats */
{
char *fmt1 = "%-15s %-7d %'-12.0f %'-12.0f %'-10.2f %s %s\n";
char *fmt2 = "%-15s %-7s %'-12.0f %'-12.0f %'-10.2f %s %s\n";
struct record *rec, *prev;
double drop, info, err;
char *i_str = "";
char *err_str = "";
rec = &stats_rec->xdp_devmap_xmit;
prev = &stats_prev->xdp_devmap_xmit;
t = calc_period(rec, prev);
for (i = 0; i < nr_cpus; i++) {
struct datarec *r = &rec->cpu[i];
struct datarec *p = &prev->cpu[i];
pps = calc_pps(r, p, t);
drop = calc_drop(r, p, t);
info = calc_info(r, p, t);
err = calc_err(r, p, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
if (pps > 0 || drop > 0)
printf(fmt1, "devmap-xmit",
i, pps, drop, info, i_str, err_str);
}
pps = calc_pps(&rec->total, &prev->total, t);
drop = calc_drop(&rec->total, &prev->total, t);
info = calc_info(&rec->total, &prev->total, t);
err = calc_err(&rec->total, &prev->total, t);
if (info > 0) {
i_str = "bulk-average";
info = (pps+drop) / info; /* calc avg bulk */
}
if (err > 0)
err_str = "drv-err";
printf(fmt2, "devmap-xmit", "total", pps, drop,
info, i_str, err_str);
}
printf("\n");
}
@ -437,6 +499,9 @@ static bool stats_collect(struct stats_record *rec)
fd = map_data[3].fd; /* map3: cpumap_kthread_cnt */
map_collect_record(fd, 0, &rec->xdp_cpumap_kthread);
fd = map_data[4].fd; /* map4: devmap_xmit_cnt */
map_collect_record(fd, 0, &rec->xdp_devmap_xmit);
return true;
}
@ -480,6 +545,7 @@ static struct stats_record *alloc_stats_record(void)
rec_sz = sizeof(struct datarec);
rec->xdp_cpumap_kthread.cpu = alloc_rec_per_cpu(rec_sz);
rec->xdp_devmap_xmit.cpu = alloc_rec_per_cpu(rec_sz);
for (i = 0; i < MAX_CPUS; i++)
rec->xdp_cpumap_enqueue[i].cpu = alloc_rec_per_cpu(rec_sz);
@ -498,6 +564,7 @@ static void free_stats_record(struct stats_record *r)
free(r->xdp_exception[i].cpu);
free(r->xdp_cpumap_kthread.cpu);
free(r->xdp_devmap_xmit.cpu);
for (i = 0; i < MAX_CPUS; i++)
free(r->xdp_cpumap_enqueue[i].cpu);

View File

@ -1,15 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2017 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
/* Copyright(c) 2017 - 2018 Intel Corporation. */
#include <assert.h>
#include <errno.h>
@ -89,7 +79,10 @@ struct xdp_umem_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
struct xdp_umem_ring *ring;
u32 *producer;
u32 *consumer;
u32 *ring;
void *map;
};
struct xdp_umem {
@ -104,7 +97,10 @@ struct xdp_uqueue {
u32 cached_cons;
u32 mask;
u32 size;
struct xdp_rxtx_ring *ring;
u32 *producer;
u32 *consumer;
struct xdp_desc *ring;
void *map;
};
struct xdpsock {
@ -165,7 +161,7 @@ static inline u32 umem_nb_free(struct xdp_umem_uqueue *q, u32 nb)
return free_entries;
/* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer;
q->cached_cons = *q->consumer;
return q->size - (q->cached_prod - q->cached_cons);
}
@ -178,7 +174,7 @@ static inline u32 xq_nb_free(struct xdp_uqueue *q, u32 ndescs)
return free_entries;
/* Refresh the local tail pointer */
q->cached_cons = q->ring->ptrs.consumer + q->size;
q->cached_cons = *q->consumer + q->size;
return q->cached_cons - q->cached_prod;
}
@ -187,7 +183,7 @@ static inline u32 umem_nb_avail(struct xdp_umem_uqueue *q, u32 nb)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer;
q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@ -199,7 +195,7 @@ static inline u32 xq_nb_avail(struct xdp_uqueue *q, u32 ndescs)
u32 entries = q->cached_prod - q->cached_cons;
if (entries == 0) {
q->cached_prod = q->ring->ptrs.producer;
q->cached_prod = *q->producer;
entries = q->cached_prod - q->cached_cons;
}
@ -218,12 +214,12 @@ static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i].idx;
fq->ring[idx] = d[i].idx;
}
u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod;
*fq->producer = fq->cached_prod;
return 0;
}
@ -239,12 +235,12 @@ static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq, u32 *d,
for (i = 0; i < nb; i++) {
u32 idx = fq->cached_prod++ & fq->mask;
fq->ring->desc[idx] = d[i];
fq->ring[idx] = d[i];
}
u_smp_wmb();
fq->ring->ptrs.producer = fq->cached_prod;
*fq->producer = fq->cached_prod;
return 0;
}
@ -258,13 +254,13 @@ static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
for (i = 0; i < entries; i++) {
idx = cq->cached_cons++ & cq->mask;
d[i] = cq->ring->desc[idx];
d[i] = cq->ring[idx];
}
if (entries > 0) {
u_smp_wmb();
cq->ring->ptrs.consumer = cq->cached_cons;
*cq->consumer = cq->cached_cons;
}
return entries;
@ -280,7 +276,7 @@ static inline int xq_enq(struct xdp_uqueue *uq,
const struct xdp_desc *descs,
unsigned int ndescs)
{
struct xdp_rxtx_ring *r = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@ -289,21 +285,21 @@ static inline int xq_enq(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
r->desc[idx].idx = descs[i].idx;
r->desc[idx].len = descs[i].len;
r->desc[idx].offset = descs[i].offset;
r[idx].idx = descs[i].idx;
r[idx].len = descs[i].len;
r[idx].offset = descs[i].offset;
}
u_smp_wmb();
r->ptrs.producer = uq->cached_prod;
*uq->producer = uq->cached_prod;
return 0;
}
static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
__u32 idx, unsigned int ndescs)
{
struct xdp_rxtx_ring *q = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int i;
if (xq_nb_free(uq, ndescs) < ndescs)
@ -312,14 +308,14 @@ static inline int xq_enq_tx_only(struct xdp_uqueue *uq,
for (i = 0; i < ndescs; i++) {
u32 idx = uq->cached_prod++ & uq->mask;
q->desc[idx].idx = idx + i;
q->desc[idx].len = sizeof(pkt_data) - 1;
q->desc[idx].offset = 0;
r[idx].idx = idx + i;
r[idx].len = sizeof(pkt_data) - 1;
r[idx].offset = 0;
}
u_smp_wmb();
q->ptrs.producer = uq->cached_prod;
*uq->producer = uq->cached_prod;
return 0;
}
@ -327,7 +323,7 @@ static inline int xq_deq(struct xdp_uqueue *uq,
struct xdp_desc *descs,
int ndescs)
{
struct xdp_rxtx_ring *r = uq->ring;
struct xdp_desc *r = uq->ring;
unsigned int idx;
int i, entries;
@ -337,13 +333,13 @@ static inline int xq_deq(struct xdp_uqueue *uq,
for (i = 0; i < entries; i++) {
idx = uq->cached_cons++ & uq->mask;
descs[i] = r->desc[idx];
descs[i] = r[idx];
}
if (entries > 0) {
u_smp_wmb();
r->ptrs.consumer = uq->cached_cons;
*uq->consumer = uq->cached_cons;
}
return entries;
@ -402,8 +398,10 @@ static size_t gen_eth_frame(char *frame)
static struct xdp_umem *xdp_umem_configure(int sfd)
{
int fq_size = FQ_NUM_DESCS, cq_size = CQ_NUM_DESCS;
struct xdp_mmap_offsets off;
struct xdp_umem_reg mr;
struct xdp_umem *umem;
socklen_t optlen;
void *bufs;
umem = calloc(1, sizeof(*umem));
@ -423,25 +421,35 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
lassert(setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
sizeof(int)) == 0);
umem->fq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
FQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.ring != MAP_FAILED);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
umem->fq.map = mmap(0, off.fr.desc +
FQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_FILL_RING);
lassert(umem->fq.map != MAP_FAILED);
umem->fq.mask = FQ_NUM_DESCS - 1;
umem->fq.size = FQ_NUM_DESCS;
umem->fq.producer = umem->fq.map + off.fr.producer;
umem->fq.consumer = umem->fq.map + off.fr.consumer;
umem->fq.ring = umem->fq.map + off.fr.desc;
umem->cq.ring = mmap(0, sizeof(struct xdp_umem_ring) +
umem->cq.map = mmap(0, off.cr.desc +
CQ_NUM_DESCS * sizeof(u32),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_UMEM_PGOFF_COMPLETION_RING);
lassert(umem->cq.ring != MAP_FAILED);
lassert(umem->cq.map != MAP_FAILED);
umem->cq.mask = CQ_NUM_DESCS - 1;
umem->cq.size = CQ_NUM_DESCS;
umem->cq.producer = umem->cq.map + off.cr.producer;
umem->cq.consumer = umem->cq.map + off.cr.consumer;
umem->cq.ring = umem->cq.map + off.cr.desc;
umem->frames = (char (*)[FRAME_SIZE])bufs;
umem->fd = sfd;
@ -459,9 +467,11 @@ static struct xdp_umem *xdp_umem_configure(int sfd)
static struct xdpsock *xsk_configure(struct xdp_umem *umem)
{
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
int sfd, ndescs = NUM_DESCS;
struct xdpsock *xsk;
bool shared = true;
socklen_t optlen;
u32 i;
sfd = socket(PF_XDP, SOCK_RAW, 0);
@ -484,15 +494,18 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
&ndescs, sizeof(int)) == 0);
lassert(setsockopt(sfd, SOL_XDP, XDP_TX_RING,
&ndescs, sizeof(int)) == 0);
optlen = sizeof(off);
lassert(getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off,
&optlen) == 0);
/* Rx */
xsk->rx.ring = mmap(NULL,
sizeof(struct xdp_ring) +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
lassert(xsk->rx.ring != MAP_FAILED);
xsk->rx.map = mmap(NULL,
off.rx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_RX_RING);
lassert(xsk->rx.map != MAP_FAILED);
if (!shared) {
for (i = 0; i < NUM_DESCS / 2; i++)
@ -501,19 +514,25 @@ static struct xdpsock *xsk_configure(struct xdp_umem *umem)
}
/* Tx */
xsk->tx.ring = mmap(NULL,
sizeof(struct xdp_ring) +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING);
lassert(xsk->tx.ring != MAP_FAILED);
xsk->tx.map = mmap(NULL,
off.tx.desc +
NUM_DESCS * sizeof(struct xdp_desc),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, sfd,
XDP_PGOFF_TX_RING);
lassert(xsk->tx.map != MAP_FAILED);
xsk->rx.mask = NUM_DESCS - 1;
xsk->rx.size = NUM_DESCS;
xsk->rx.producer = xsk->rx.map + off.rx.producer;
xsk->rx.consumer = xsk->rx.map + off.rx.consumer;
xsk->rx.ring = xsk->rx.map + off.rx.desc;
xsk->tx.mask = NUM_DESCS - 1;
xsk->tx.size = NUM_DESCS;
xsk->tx.producer = xsk->tx.map + off.tx.producer;
xsk->tx.consumer = xsk->tx.map + off.tx.consumer;
xsk->tx.ring = xsk->tx.map + off.tx.desc;
sxdp.sxdp_family = PF_XDP;
sxdp.sxdp_ifindex = opt_ifindex;

View File

@ -95,7 +95,7 @@ class HeaderParser(object):
return capture.group(1)
def parse_desc(self):
p = re.compile(' \* ?(?:\t| {6,8})Description$')
p = re.compile(' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line)
if not capture:
# Helper can have empty description and we might be parsing another
@ -109,7 +109,7 @@ class HeaderParser(object):
if self.line == ' *\n':
desc += '\n'
else:
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
desc += capture.group(1) + '\n'
@ -118,7 +118,7 @@ class HeaderParser(object):
return desc
def parse_ret(self):
p = re.compile(' \* ?(?:\t| {6,8})Return$')
p = re.compile(' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line)
if not capture:
# Helper can have empty retval and we might be parsing another
@ -132,7 +132,7 @@ class HeaderParser(object):
if self.line == ' *\n':
ret += '\n'
else:
p = re.compile(' \* ?(?:\t| {6,8})(?:\t| {8})(.*)')
p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
ret += capture.group(1) + '\n'

View File

@ -0,0 +1,81 @@
================
bpftool-perf
================
-------------------------------------------------------------------------------
tool for inspection of perf related bpf prog attachments
-------------------------------------------------------------------------------
:Manual section: 8
SYNOPSIS
========
**bpftool** [*OPTIONS*] **perf** *COMMAND*
*OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
*COMMANDS* :=
{ **show** | **list** | **help** }
PERF COMMANDS
=============
| **bpftool** **perf { show | list }**
| **bpftool** **perf help**
DESCRIPTION
===========
**bpftool perf { show | list }**
List all raw_tracepoint, tracepoint, kprobe attachment in the system.
Output will start with process id and file descriptor in that process,
followed by bpf program id, attachment information, and attachment point.
The attachment point for raw_tracepoint/tracepoint is the trace probe name.
The attachment point for k[ret]probe is either symbol name and offset,
or a kernel virtual address.
The attachment point for u[ret]probe is the file name and the file offset.
**bpftool perf help**
Print short help message.
OPTIONS
=======
-h, --help
Print short generic help message (similar to **bpftool help**).
-v, --version
Print version number (similar to **bpftool version**).
-j, --json
Generate JSON output. For commands that cannot produce JSON, this
option has no effect.
-p, --pretty
Generate human-readable JSON output. Implies **-j**.
EXAMPLES
========
| **# bpftool perf**
::
pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
|
| **# bpftool -j perf**
::
[{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
{"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
{"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
{"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
SEE ALSO
========
**bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)

View File

@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** **version**
*OBJECT* := { **map** | **program** | **cgroup** }
*OBJECT* := { **map** | **program** | **cgroup** | **perf** }
*OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
| { **-j** | **--json** } [{ **-p** | **--pretty** }] }
@ -30,6 +30,8 @@ SYNOPSIS
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
*PERF-COMMANDS* := { **show** | **list** | **help** }
DESCRIPTION
===========
*bpftool* allows for inspection and simple modification of BPF objects
@ -56,3 +58,4 @@ OPTIONS
SEE ALSO
========
**bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
**bpftool-perf**\ (8)

View File

@ -448,6 +448,15 @@ _bpftool()
;;
esac
;;
perf)
case $command in
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'help \
show list' -- "$cur" ) )
;;
esac
;;
esac
} &&
complete -F _bpftool bpftool

View File

@ -87,7 +87,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
" OBJECT := { prog | map | cgroup }\n"
" OBJECT := { prog | map | cgroup | perf }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@ -216,6 +216,7 @@ static const struct cmd cmds[] = {
{ "prog", do_prog },
{ "map", do_map },
{ "cgroup", do_cgroup },
{ "perf", do_perf },
{ "version", do_version },
{ 0 }
};

View File

@ -119,6 +119,7 @@ int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
int do_perf(int argc, char **arg);
int prog_parse_fd(int *argc, char ***argv);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);

246
tools/bpf/bpftool/perf.c Normal file
View File

@ -0,0 +1,246 @@
// SPDX-License-Identifier: GPL-2.0+
// Copyright (C) 2018 Facebook
// Author: Yonghong Song <yhs@fb.com>
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <ftw.h>
#include <bpf.h>
#include "main.h"
/* 0: undecided, 1: supported, 2: not supported */
static int perf_query_supported;
static bool has_perf_query_support(void)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
char buf[256];
int fd;
if (perf_query_supported)
goto out;
fd = open(bin_name, O_RDONLY);
if (fd < 0) {
p_err("perf_query_support: %s", strerror(errno));
goto out;
}
/* the following query will fail as no bpf attachment,
* the expected errno is ENOTSUPP
*/
errno = 0;
len = sizeof(buf);
bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (errno == 524 /* ENOTSUPP */) {
perf_query_supported = 1;
goto close_fd;
}
perf_query_supported = 2;
p_err("perf_query_support: %s", strerror(errno));
fprintf(stderr,
"HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
close_fd:
close(fd);
out:
return perf_query_supported == 1;
}
static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
char *buf, __u64 probe_offset, __u64 probe_addr)
{
jsonw_start_object(json_wtr);
jsonw_int_field(json_wtr, "pid", pid);
jsonw_int_field(json_wtr, "fd", fd);
jsonw_uint_field(json_wtr, "prog_id", prog_id);
switch (fd_type) {
case BPF_FD_TYPE_RAW_TRACEPOINT:
jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
jsonw_string_field(json_wtr, "tracepoint", buf);
break;
case BPF_FD_TYPE_TRACEPOINT:
jsonw_string_field(json_wtr, "fd_type", "tracepoint");
jsonw_string_field(json_wtr, "tracepoint", buf);
break;
case BPF_FD_TYPE_KPROBE:
jsonw_string_field(json_wtr, "fd_type", "kprobe");
if (buf[0] != '\0') {
jsonw_string_field(json_wtr, "func", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
} else {
jsonw_lluint_field(json_wtr, "addr", probe_addr);
}
break;
case BPF_FD_TYPE_KRETPROBE:
jsonw_string_field(json_wtr, "fd_type", "kretprobe");
if (buf[0] != '\0') {
jsonw_string_field(json_wtr, "func", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
} else {
jsonw_lluint_field(json_wtr, "addr", probe_addr);
}
break;
case BPF_FD_TYPE_UPROBE:
jsonw_string_field(json_wtr, "fd_type", "uprobe");
jsonw_string_field(json_wtr, "filename", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
break;
case BPF_FD_TYPE_URETPROBE:
jsonw_string_field(json_wtr, "fd_type", "uretprobe");
jsonw_string_field(json_wtr, "filename", buf);
jsonw_lluint_field(json_wtr, "offset", probe_offset);
break;
}
jsonw_end_object(json_wtr);
}
static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
char *buf, __u64 probe_offset, __u64 probe_addr)
{
printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
switch (fd_type) {
case BPF_FD_TYPE_RAW_TRACEPOINT:
printf("raw_tracepoint %s\n", buf);
break;
case BPF_FD_TYPE_TRACEPOINT:
printf("tracepoint %s\n", buf);
break;
case BPF_FD_TYPE_KPROBE:
if (buf[0] != '\0')
printf("kprobe func %s offset %llu\n", buf,
probe_offset);
else
printf("kprobe addr %llu\n", probe_addr);
break;
case BPF_FD_TYPE_KRETPROBE:
if (buf[0] != '\0')
printf("kretprobe func %s offset %llu\n", buf,
probe_offset);
else
printf("kretprobe addr %llu\n", probe_addr);
break;
case BPF_FD_TYPE_UPROBE:
printf("uprobe filename %s offset %llu\n", buf, probe_offset);
break;
case BPF_FD_TYPE_URETPROBE:
printf("uretprobe filename %s offset %llu\n", buf,
probe_offset);
break;
}
}
static int show_proc(const char *fpath, const struct stat *sb,
int tflag, struct FTW *ftwbuf)
{
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
int err, pid = 0, fd = 0;
const char *pch;
char buf[4096];
/* prefix always /proc */
pch = fpath + 5;
if (*pch == '\0')
return 0;
/* pid should be all numbers */
pch++;
while (isdigit(*pch)) {
pid = pid * 10 + *pch - '0';
pch++;
}
if (*pch == '\0')
return 0;
if (*pch != '/')
return FTW_SKIP_SUBTREE;
/* check /proc/<pid>/fd directory */
pch++;
if (strncmp(pch, "fd", 2))
return FTW_SKIP_SUBTREE;
pch += 2;
if (*pch == '\0')
return 0;
if (*pch != '/')
return FTW_SKIP_SUBTREE;
/* check /proc/<pid>/fd/<fd_num> */
pch++;
while (isdigit(*pch)) {
fd = fd * 10 + *pch - '0';
pch++;
}
if (*pch != '\0')
return FTW_SKIP_SUBTREE;
/* query (pid, fd) for potential perf events */
len = sizeof(buf);
err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
&probe_offset, &probe_addr);
if (err < 0)
return 0;
if (json_output)
print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
probe_addr);
else
print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
probe_addr);
return 0;
}
static int do_show(int argc, char **argv)
{
int flags = FTW_ACTIONRETVAL | FTW_PHYS;
int err = 0, nopenfd = 16;
if (!has_perf_query_support())
return -1;
if (json_output)
jsonw_start_array(json_wtr);
if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
p_err("%s", strerror(errno));
err = -1;
}
if (json_output)
jsonw_end_array(json_wtr);
return err;
}
static int do_help(int argc, char **argv)
{
fprintf(stderr,
"Usage: %s %s { show | list | help }\n"
"",
bin_name, argv[-2]);
return 0;
}
static const struct cmd cmds[] = {
{ "show", do_show },
{ "list", do_show },
{ "help", do_help },
{ 0 }
};
int do_perf(int argc, char **argv)
{
return cmd_select(cmds, argc, argv, do_help);
}

View File

@ -420,7 +420,11 @@ static int do_show(int argc, char **argv)
static int do_dump(int argc, char **argv)
{
unsigned long *func_ksyms = NULL;
struct bpf_prog_info info = {};
unsigned int *func_lens = NULL;
unsigned int nr_func_ksyms;
unsigned int nr_func_lens;
struct dump_data dd = {};
__u32 len = sizeof(info);
unsigned int buf_size;
@ -496,10 +500,34 @@ static int do_dump(int argc, char **argv)
return -1;
}
nr_func_ksyms = info.nr_jited_ksyms;
if (nr_func_ksyms) {
func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
if (!func_ksyms) {
p_err("mem alloc failed");
close(fd);
goto err_free;
}
}
nr_func_lens = info.nr_jited_func_lens;
if (nr_func_lens) {
func_lens = malloc(nr_func_lens * sizeof(__u32));
if (!func_lens) {
p_err("mem alloc failed");
close(fd);
goto err_free;
}
}
memset(&info, 0, sizeof(info));
*member_ptr = ptr_to_u64(buf);
*member_len = buf_size;
info.jited_ksyms = ptr_to_u64(func_ksyms);
info.nr_jited_ksyms = nr_func_ksyms;
info.jited_func_lens = ptr_to_u64(func_lens);
info.nr_jited_func_lens = nr_func_lens;
err = bpf_obj_get_info_by_fd(fd, &info, &len);
close(fd);
@ -513,6 +541,16 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
if (info.nr_jited_ksyms > nr_func_ksyms) {
p_err("too many addresses returned");
goto err_free;
}
if (info.nr_jited_func_lens > nr_func_lens) {
p_err("too many values returned");
goto err_free;
}
if ((member_len == &info.jited_prog_len &&
info.jited_prog_insns == 0) ||
(member_len == &info.xlated_prog_len &&
@ -550,7 +588,57 @@ static int do_dump(int argc, char **argv)
goto err_free;
}
disasm_print_insn(buf, *member_len, opcodes, name);
if (info.nr_jited_func_lens && info.jited_func_lens) {
struct kernel_sym *sym = NULL;
char sym_name[SYM_MAX_NAME];
unsigned char *img = buf;
__u64 *ksyms = NULL;
__u32 *lens;
__u32 i;
if (info.nr_jited_ksyms) {
kernel_syms_load(&dd);
ksyms = (__u64 *) info.jited_ksyms;
}
if (json_output)
jsonw_start_array(json_wtr);
lens = (__u32 *) info.jited_func_lens;
for (i = 0; i < info.nr_jited_func_lens; i++) {
if (ksyms) {
sym = kernel_syms_search(&dd, ksyms[i]);
if (sym)
sprintf(sym_name, "%s", sym->name);
else
sprintf(sym_name, "0x%016llx", ksyms[i]);
} else {
strcpy(sym_name, "unknown");
}
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_name(json_wtr, "name");
jsonw_string(json_wtr, sym_name);
jsonw_name(json_wtr, "insns");
} else {
printf("%s:\n", sym_name);
}
disasm_print_insn(img, lens[i], opcodes, name);
img += lens[i];
if (json_output)
jsonw_end_object(json_wtr);
else
printf("\n");
}
if (json_output)
jsonw_end_array(json_wtr);
} else {
disasm_print_insn(buf, *member_len, opcodes, name);
}
} else if (visual) {
if (json_output)
jsonw_null(json_wtr);
@ -558,6 +646,9 @@ static int do_dump(int argc, char **argv)
dump_xlated_cfg(buf, *member_len);
} else {
kernel_syms_load(&dd);
dd.nr_jited_ksyms = info.nr_jited_ksyms;
dd.jited_ksyms = (__u64 *) info.jited_ksyms;
if (json_output)
dump_xlated_json(&dd, buf, *member_len, opcodes);
else
@ -566,10 +657,14 @@ static int do_dump(int argc, char **argv)
}
free(buf);
free(func_ksyms);
free(func_lens);
return 0;
err_free:
free(buf);
free(func_ksyms);
free(func_lens);
return -1;
}

View File

@ -102,8 +102,8 @@ void kernel_syms_destroy(struct dump_data *dd)
free(dd->sym_mapping);
}
static struct kernel_sym *kernel_syms_search(struct dump_data *dd,
unsigned long key)
struct kernel_sym *kernel_syms_search(struct dump_data *dd,
unsigned long key)
{
struct kernel_sym sym = {
.address = key,
@ -174,7 +174,11 @@ static const char *print_call_pcrel(struct dump_data *dd,
unsigned long address,
const struct bpf_insn *insn)
{
if (sym)
if (!dd->nr_jited_ksyms)
/* Do not show address for interpreted programs */
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"%+d", insn->off);
else if (sym)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"%+d#%s", insn->off, sym->name);
else
@ -203,6 +207,10 @@ static const char *print_call(void *private_data,
unsigned long address = dd->address_call_base + insn->imm;
struct kernel_sym *sym;
if (insn->src_reg == BPF_PSEUDO_CALL &&
(__u32) insn->imm < dd->nr_jited_ksyms)
address = dd->jited_ksyms[insn->imm];
sym = kernel_syms_search(dd, address);
if (insn->src_reg == BPF_PSEUDO_CALL)
return print_call_pcrel(dd, sym, address, insn);

View File

@ -49,11 +49,14 @@ struct dump_data {
unsigned long address_call_base;
struct kernel_sym *sym_mapping;
__u32 sym_count;
__u64 *jited_ksyms;
__u32 nr_jited_ksyms;
char scratch_buff[SYM_MAX_NAME + 8];
};
void kernel_syms_load(struct dump_data *dd);
void kernel_syms_destroy(struct dump_data *dd);
struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
bool opcodes);
void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,

View File

@ -97,6 +97,7 @@ enum bpf_cmd {
BPF_RAW_TRACEPOINT_OPEN,
BPF_BTF_LOAD,
BPF_BTF_GET_FD_BY_ID,
BPF_TASK_FD_QUERY,
};
enum bpf_map_type {
@ -141,6 +142,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_RAW_TRACEPOINT,
BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_PROG_TYPE_LWT_SEG6LOCAL,
};
enum bpf_attach_type {
@ -284,8 +286,8 @@ union bpf_attr {
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
__u32 btf_fd; /* fd pointing to a BTF type data */
__u32 btf_key_id; /* BTF type_id of the key */
__u32 btf_value_id; /* BTF type_id of the value */
__u32 btf_key_type_id; /* BTF type_id of the key */
__u32 btf_value_type_id; /* BTF type_id of the value */
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@ -379,6 +381,22 @@ union bpf_attr {
__u32 btf_log_size;
__u32 btf_log_level;
};
struct {
__u32 pid; /* input: pid */
__u32 fd; /* input: fd */
__u32 flags; /* input: flags */
__u32 buf_len; /* input/output: buf len */
__aligned_u64 buf; /* input/output:
* tp_name for tracepoint
* symbol for kprobe
* filename for uprobe
*/
__u32 prog_id; /* output: prod_id */
__u32 fd_type; /* output: BPF_FD_TYPE_* */
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@ -1902,6 +1920,90 @@ union bpf_attr {
* egress otherwise). This is the only flag supported for now.
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
* int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
* address *hdr*, with *len* its size in bytes. *type* indicates
* the protocol of the header and can be one of:
*
* **BPF_LWT_ENCAP_SEG6**
* IPv6 encapsulation with Segment Routing Header
* (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
* the IPv6 header is computed by the kernel.
* **BPF_LWT_ENCAP_SEG6_INLINE**
* Only works if *skb* contains an IPv6 packet. Insert a
* Segment Routing Header (**struct ipv6_sr_hdr**) inside
* the IPv6 header.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
* inside the outermost IPv6 Segment Routing Header can be
* modified through this helper.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
* *skb*, at position *offset* by *delta* bytes. Only offsets
* after the segments are accepted. *delta* can be as well
* positive (growing) as negative (shrinking).
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
* contained at address *param*, and of length *param_len* bytes.
* *action* can be one of:
*
* **SEG6_LOCAL_ACTION_END_X**
* End.X action: Endpoint with Layer-3 cross-connect.
* Type of *param*: **struct in6_addr**.
* **SEG6_LOCAL_ACTION_END_T**
* End.T action: Endpoint with specific IPv6 table lookup.
* Type of *param*: **int**.
* **SEG6_LOCAL_ACTION_END_B6**
* End.B6 action: Endpoint bound to an SRv6 policy.
* Type of param: **struct ipv6_sr_hdr**.
* **SEG6_LOCAL_ACTION_END_B6_ENCAP**
* End.B6.Encap action: Endpoint bound to an SRv6
* encapsulation policy.
* Type of param: **struct ipv6_sr_hdr**.
*
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
* performed again, if the helper is used in combination with
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -1976,7 +2078,11 @@ union bpf_attr {
FN(fib_lookup), \
FN(sock_hash_update), \
FN(msg_redirect_hash), \
FN(sk_redirect_hash),
FN(sk_redirect_hash), \
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
FN(lwt_seg6_action),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -2043,6 +2149,12 @@ enum bpf_hdr_start_off {
BPF_HDR_START_NET,
};
/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_SEG6,
BPF_LWT_ENCAP_SEG6_INLINE
};
/* user accessible mirror of in-kernel sk_buff.
* new fields can only be added to the end of this structure
*/
@ -2176,6 +2288,14 @@ enum sk_action {
struct sk_msg_md {
void *data;
void *data_end;
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
__u32 remote_ip6[4]; /* Stored in network byte order */
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
};
#define BPF_TAG_SIZE 8
@ -2197,6 +2317,10 @@ struct bpf_prog_info {
__u32 gpl_compatible:1;
__u64 netns_dev;
__u64 netns_ino;
__u32 nr_jited_ksyms;
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
} __attribute__((aligned(8)));
struct bpf_map_info {
@ -2211,8 +2335,8 @@ struct bpf_map_info {
__u64 netns_dev;
__u64 netns_ino;
__u32 btf_id;
__u32 btf_key_id;
__u32 btf_value_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@ -2450,4 +2574,13 @@ struct bpf_fib_lookup {
__u8 dmac[6]; /* ETH_ALEN */
};
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
BPF_FD_TYPE_UPROBE, /* filename + offset */
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
#endif /* _UAPI__LINUX_BPF_H__ */

View File

@ -12,42 +12,29 @@ struct btf_header {
__u16 magic;
__u8 version;
__u8 flags;
__u32 parent_label;
__u32 parent_name;
__u32 hdr_len;
/* All offsets are in bytes relative to the end of this header */
__u32 label_off; /* offset of label section */
__u32 object_off; /* offset of data object section*/
__u32 func_off; /* offset of function section */
__u32 type_off; /* offset of type section */
__u32 type_len; /* length of type section */
__u32 str_off; /* offset of string section */
__u32 str_len; /* length of string section */
};
/* Max # of type identifier */
#define BTF_MAX_TYPE 0x7fffffff
#define BTF_MAX_TYPE 0x0000ffff
/* Max offset into the string section */
#define BTF_MAX_NAME_OFFSET 0x7fffffff
#define BTF_MAX_NAME_OFFSET 0x0000ffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
/* The type id is referring to a parent BTF */
#define BTF_TYPE_PARENT(id) (((id) >> 31) & 0x1)
#define BTF_TYPE_ID(id) ((id) & BTF_MAX_TYPE)
/* String is in the ELF string section */
#define BTF_STR_TBL_ELF_ID(ref) (((ref) >> 31) & 0x1)
#define BTF_STR_OFFSET(ref) ((ref) & BTF_MAX_NAME_OFFSET)
struct btf_type {
__u32 name_off;
/* "info" bits arrangement
* bits 0-15: vlen (e.g. # of struct's members)
* bits 16-23: unused
* bits 24-28: kind (e.g. int, ptr, array...etc)
* bits 29-30: unused
* bits 31: root
* bits 24-27: kind (e.g. int, ptr, array...etc)
* bits 28-31: unused
*/
__u32 info;
/* "size" is used by INT, ENUM, STRUCT and UNION.
@ -62,8 +49,7 @@ struct btf_type {
};
};
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_ISROOT(info) (!!(((info) >> 24) & 0x80))
#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_KIND_UNKN 0 /* Unknown */
@ -88,15 +74,14 @@ struct btf_type {
/* BTF_KIND_INT is followed by a u32 and the following
* is the 32 bits arrangement:
*/
#define BTF_INT_ENCODING(VAL) (((VAL) & 0xff000000) >> 24)
#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
#define BTF_INT_BITS(VAL) ((VAL) & 0x0000ffff)
/* Attributes stored in the BTF_INT_ENCODING */
#define BTF_INT_SIGNED 0x1
#define BTF_INT_CHAR 0x2
#define BTF_INT_BOOL 0x4
#define BTF_INT_VARARGS 0x8
#define BTF_INT_SIGNED (1 << 0)
#define BTF_INT_CHAR (1 << 1)
#define BTF_INT_BOOL (1 << 2)
/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
* The exact number of btf_enum is stored in the vlen (of the

View File

@ -89,8 +89,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
min(name_len, BPF_OBJ_NAME_LEN - 1));
attr.numa_node = create_attr->numa_node;
attr.btf_fd = create_attr->btf_fd;
attr.btf_key_id = create_attr->btf_key_id;
attr.btf_value_id = create_attr->btf_value_id;
attr.btf_key_type_id = create_attr->btf_key_type_id;
attr.btf_value_type_id = create_attr->btf_value_type_id;
attr.map_ifindex = create_attr->map_ifindex;
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
@ -643,3 +643,26 @@ retry:
return fd;
}
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr)
{
union bpf_attr attr = {};
int err;
attr.task_fd_query.pid = pid;
attr.task_fd_query.fd = fd;
attr.task_fd_query.flags = flags;
attr.task_fd_query.buf = ptr_to_u64(buf);
attr.task_fd_query.buf_len = *buf_len;
err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
*buf_len = attr.task_fd_query.buf_len;
*prog_id = attr.task_fd_query.prog_id;
*fd_type = attr.task_fd_query.fd_type;
*probe_offset = attr.task_fd_query.probe_offset;
*probe_addr = attr.task_fd_query.probe_addr;
return err;
}

View File

@ -36,8 +36,8 @@ struct bpf_create_map_attr {
__u32 max_entries;
__u32 numa_node;
__u32 btf_fd;
__u32 btf_key_id;
__u32 btf_value_id;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 map_ifindex;
};
@ -107,4 +107,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
int bpf_raw_tracepoint_open(const char *name, int prog_fd);
int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
bool do_log);
int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
__u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
__u64 *probe_addr);
#endif

View File

@ -35,9 +35,8 @@ struct btf {
static const char *btf_name_by_offset(const struct btf *btf, uint32_t offset)
{
if (!BTF_STR_TBL_ELF_ID(offset) &&
BTF_STR_OFFSET(offset) < btf->hdr->str_len)
return &btf->strings[BTF_STR_OFFSET(offset)];
if (offset < btf->hdr->str_len)
return &btf->strings[offset];
else
return NULL;
}

View File

@ -216,8 +216,8 @@ struct bpf_map {
size_t offset;
int map_ifindex;
struct bpf_map_def def;
uint32_t btf_key_id;
uint32_t btf_value_id;
uint32_t btf_key_type_id;
uint32_t btf_value_type_id;
void *priv;
bpf_map_clear_priv_t clear_priv;
};
@ -1042,8 +1042,8 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
}
if (def->key_size != key_size) {
pr_warning("map:%s key_type:%s has BTF type_size:%ld != key_size:%u\n",
map->name, name, key_size, def->key_size);
pr_warning("map:%s key_type:%s has BTF type_size:%u != key_size:%u\n",
map->name, name, (unsigned int)key_size, def->key_size);
return -EINVAL;
}
@ -1069,13 +1069,13 @@ static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
}
if (def->value_size != value_size) {
pr_warning("map:%s value_type:%s has BTF type_size:%ld != value_size:%u\n",
map->name, name, value_size, def->value_size);
pr_warning("map:%s value_type:%s has BTF type_size:%u != value_size:%u\n",
map->name, name, (unsigned int)value_size, def->value_size);
return -EINVAL;
}
map->btf_key_id = key_id;
map->btf_value_id = value_id;
map->btf_key_type_id = key_id;
map->btf_value_type_id = value_id;
return 0;
}
@ -1100,24 +1100,24 @@ bpf_object__create_maps(struct bpf_object *obj)
create_attr.value_size = def->value_size;
create_attr.max_entries = def->max_entries;
create_attr.btf_fd = 0;
create_attr.btf_key_id = 0;
create_attr.btf_value_id = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
create_attr.btf_fd = btf__fd(obj->btf);
create_attr.btf_key_id = map->btf_key_id;
create_attr.btf_value_id = map->btf_value_id;
create_attr.btf_key_type_id = map->btf_key_type_id;
create_attr.btf_value_type_id = map->btf_value_type_id;
}
*pfd = bpf_create_map_xattr(&create_attr);
if (*pfd < 0 && create_attr.btf_key_id) {
if (*pfd < 0 && create_attr.btf_key_type_id) {
pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, strerror(errno), errno);
create_attr.btf_fd = 0;
create_attr.btf_key_id = 0;
create_attr.btf_value_id = 0;
map->btf_key_id = 0;
map->btf_value_id = 0;
create_attr.btf_key_type_id = 0;
create_attr.btf_value_type_id = 0;
map->btf_key_type_id = 0;
map->btf_value_type_id = 0;
*pfd = bpf_create_map_xattr(&create_attr);
}
@ -1456,6 +1456,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_CGROUP_DEVICE:
@ -2085,14 +2086,14 @@ const char *bpf_map__name(struct bpf_map *map)
return map ? map->name : NULL;
}
uint32_t bpf_map__btf_key_id(const struct bpf_map *map)
uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map)
{
return map ? map->btf_key_id : 0;
return map ? map->btf_key_type_id : 0;
}
uint32_t bpf_map__btf_value_id(const struct bpf_map *map)
uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map)
{
return map ? map->btf_value_id : 0;
return map ? map->btf_value_type_id : 0;
}
int bpf_map__set_priv(struct bpf_map *map, void *priv,

View File

@ -244,8 +244,8 @@ bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
int bpf_map__fd(struct bpf_map *map);
const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
const char *bpf_map__name(struct bpf_map *map);
uint32_t bpf_map__btf_key_id(const struct bpf_map *map);
uint32_t bpf_map__btf_value_id(const struct bpf_map *map);
uint32_t bpf_map__btf_key_type_id(const struct bpf_map *map);
uint32_t bpf_map__btf_value_type_id(const struct bpf_map *map);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
int bpf_map__set_priv(struct bpf_map *map, void *priv,

View File

@ -33,7 +33,8 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o
test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
test_lwt_seg6local.o
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
@ -42,7 +43,8 @@ TEST_PROGS := test_kmod.sh \
test_xdp_meta.sh \
test_offload.py \
test_sock_addr.sh \
test_tunnel.sh
test_tunnel.sh \
test_lwt_seg6local.sh
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr
@ -84,7 +86,17 @@ else
CPU ?= generic
endif
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
$(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline

View File

@ -114,6 +114,18 @@ static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
int plen, __u32 flags) =
(void *) BPF_FUNC_fib_lookup;
static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
unsigned int len) =
(void *) BPF_FUNC_lwt_push_encap;
static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
void *from, unsigned int len) =
(void *) BPF_FUNC_lwt_seg6_store_bytes;
static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
unsigned int param_len) =
(void *) BPF_FUNC_lwt_seg6_action;
static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
unsigned int len) =
(void *) BPF_FUNC_lwt_seg6_adjust_srh;
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions

View File

@ -113,22 +113,25 @@ static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
.hdr_len = sizeof(struct btf_header),
};
struct btf_raw_test {
const char *descr;
const char *str_sec;
const char *map_name;
const char *err_str;
__u32 raw_types[MAX_NR_RAW_TYPES];
__u32 str_sec_size;
enum bpf_map_type map_type;
__u32 key_size;
__u32 value_size;
__u32 key_id;
__u32 value_id;
__u32 key_type_id;
__u32 value_type_id;
__u32 max_entries;
bool btf_load_err;
bool map_create_err;
int hdr_len_delta;
int type_off_delta;
int str_off_delta;
int str_len_delta;
@ -141,8 +144,8 @@ static struct btf_raw_test raw_tests[] = {
* };
*
* struct A {
* int m;
* unsigned long long n;
* unsigned long long m;
* int n;
* char o;
* [3 bytes hole]
* int p[8];
@ -163,8 +166,8 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
/* struct A { */ /* [5] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* unsigned long long n;*/
BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
@ -172,6 +175,7 @@ static struct btf_raw_test raw_tests[] = {
/* } */
/* int[4][8] */
BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
/* enum E */ /* [7] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
BTF_ENUM_ENC(NAME_TBD, 0),
BTF_ENUM_ENC(NAME_TBD, 1),
@ -183,8 +187,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test1_map",
.key_size = sizeof(int),
.value_size = 180,
.key_id = 1,
.value_id = 5,
.key_type_id = 1,
.value_type_id = 5,
.max_entries = 4,
},
@ -238,8 +242,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "struct_test2_map",
.key_size = sizeof(int),
.value_size = 68,
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
},
@ -258,7 +262,7 @@ static struct btf_raw_test raw_tests[] = {
/* struct A { */ /* [2] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n; */
BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
/* } */
BTF_END_RAW,
},
@ -268,10 +272,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check1_map",
.key_size = sizeof(int),
.value_size = 1,
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@ -301,11 +306,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check2_map",
.key_size = sizeof(int),
.value_size = 1,
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Member exceeds struct_size",
},
/* Test member exeeds the size of struct
@ -335,10 +340,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check3_map",
.key_size = sizeof(int),
.value_size = 1,
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Member exceeds struct_size",
},
/* Test member exceeds the size of struct
@ -376,10 +382,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "size_check4_map",
.key_size = sizeof(int),
.value_size = 1,
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Member exceeds struct_size",
},
/* typedef const void * const_void_ptr;
@ -411,8 +418,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
.key_id = 1,
.value_id = 4,
.key_type_id = 1,
.value_type_id = 4,
.max_entries = 4,
},
@ -440,10 +447,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid member",
},
/* typedef const void * const_void_ptr;
@ -458,9 +466,9 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
/* const void* */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
/* typedef const void * const_void_ptr */
/* typedef const void * const_void_ptr */ /* [4] */
BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
/* const_void_ptr[4] */ /* [4] */
/* const_void_ptr[4] */ /* [5] */
BTF_TYPE_ARRAY_ENC(3, 1, 4),
BTF_END_RAW,
},
@ -470,8 +478,8 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
.key_id = 1,
.value_id = 4,
.key_type_id = 1,
.value_type_id = 4,
.max_entries = 4,
},
@ -493,10 +501,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "void_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(void *) * 4,
.key_id = 1,
.value_id = 3,
.key_type_id = 1,
.value_type_id = 3,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid elem",
},
/* Array_A <------------------+
@ -523,10 +532,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test1_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
/* typedef is _before_ the BTF type of Array_A and Array_B
@ -551,7 +561,6 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
/* Array_B */
BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
BTF_END_RAW,
},
.str_sec = "\0int_array\0",
@ -560,10 +569,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test2_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
/* Array_A <------------------+
@ -582,7 +592,6 @@ static struct btf_raw_test raw_tests[] = {
BTF_TYPE_ARRAY_ENC(3, 1, 8),
/* Array_B */ /* [3] */
BTF_TYPE_ARRAY_ENC(2, 1, 8),
BTF_END_RAW,
},
.str_sec = "",
@ -591,10 +600,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test3_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
/* typedef is _between_ the BTF type of Array_A and Array_B
@ -627,10 +637,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test4_map",
.key_size = sizeof(int),
.value_size = sizeof(sizeof(int) * 8),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
/* typedef struct B Struct_B
@ -668,10 +679,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test5_map",
.key_size = sizeof(int),
.value_size = 8,
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
/* struct A {
@ -697,10 +709,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test6_map",
.key_size = sizeof(int),
.value_size = 8,
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
{
@ -724,10 +737,11 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test7_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
{
@ -759,14 +773,73 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "loop_test8_map",
.key_size = sizeof(int),
.value_size = sizeof(void *),
.key_id = 1,
.value_id = 2,
.key_type_id = 1,
.value_type_id = 2,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Loop detected",
},
{
.descr = "type_off == str_off",
.descr = "string section does not end with null",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "\0int",
.str_sec_size = sizeof("\0int") - 1,
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid string section",
},
{
.descr = "empty string section",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = 0,
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid string section",
},
{
.descr = "empty type section",
.raw_types = {
BTF_END_RAW,
},
.str_sec = "\0int",
.str_sec_size = sizeof("\0int"),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "No type found",
},
{
.descr = "btf_header test. Longer hdr_len",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -778,15 +851,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.type_off_delta = sizeof(struct btf_type) + sizeof(int) + sizeof("\0int"),
.hdr_len_delta = 4,
.err_str = "Unsupported btf_header",
},
{
.descr = "Unaligned type_off",
.descr = "btf_header test. Gap between hdr and type",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -798,15 +872,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.type_off_delta = 1,
.type_off_delta = 4,
.err_str = "Unsupported section found",
},
{
.descr = "str_off beyonds btf size",
.descr = "btf_header test. Gap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -818,15 +893,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.str_off_delta = sizeof("\0int") + 1,
.str_off_delta = 4,
.err_str = "Unsupported section found",
},
{
.descr = "str_len beyonds btf size",
.descr = "btf_header test. Overlap between type and str",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -838,15 +914,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.str_len_delta = 1,
.str_off_delta = -4,
.err_str = "Section overlap found",
},
{
.descr = "String section does not end with null",
.descr = "btf_header test. Larger BTF size",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -858,15 +935,16 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.str_len_delta = -1,
.str_len_delta = -4,
.err_str = "Unsupported section found",
},
{
.descr = "Empty string section",
.descr = "btf_header test. Smaller BTF size",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
@ -878,11 +956,267 @@ static struct btf_raw_test raw_tests[] = {
.map_name = "hdr_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_id = 1,
.value_id = 1,
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.str_len_delta = 0 - (int)sizeof("\0int"),
.str_len_delta = 4,
.err_str = "Total section length too long",
},
{
.descr = "array test. index_type/elem_type \"int\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(1, 1, 16),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
},
{
.descr = "array test. index_type/elem_type \"const int\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(3, 3, 16),
/* CONST type_id=1 */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
},
{
.descr = "array test. index_type \"const int:31\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int:31 */ /* [2] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
/* int[16] */ /* [3] */
BTF_TYPE_ARRAY_ENC(1, 4, 16),
/* CONST type_id=2 */ /* [4] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid index",
},
{
.descr = "array test. elem_type \"const int:31\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int:31 */ /* [2] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
/* int[16] */ /* [3] */
BTF_TYPE_ARRAY_ENC(4, 1, 16),
/* CONST type_id=2 */ /* [4] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid array of int",
},
{
.descr = "array test. index_type \"void\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(1, 0, 16),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid index",
},
{
.descr = "array test. index_type \"const void\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(1, 3, 16),
/* CONST type_id=0 (void) */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid index",
},
{
.descr = "array test. elem_type \"const void\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* int[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(3, 1, 16),
/* CONST type_id=0 (void) */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid elem",
},
{
.descr = "array test. elem_type \"const void *\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* const void *[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(3, 1, 16),
/* CONST type_id=4 */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
/* void* */ /* [4] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
},
{
.descr = "array test. index_type \"const void *\"",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
/* const void *[16] */ /* [2] */
BTF_TYPE_ARRAY_ENC(3, 3, 16),
/* CONST type_id=4 */ /* [3] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
/* void* */ /* [4] */
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid index",
},
{
.descr = "int test. invalid int_data",
.raw_types = {
BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
0x10000000,
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid int_data",
},
{
.descr = "invalid BTF_INFO",
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
BTF_TYPE_ENC(0, 0x10000000, 4),
BTF_END_RAW,
},
.str_sec = "",
.str_sec_size = sizeof(""),
.map_type = BPF_MAP_TYPE_ARRAY,
.map_name = "array_test_map",
.key_size = sizeof(int),
.value_size = sizeof(int),
.key_type_id = 1,
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
.err_str = "Invalid btf_info",
},
}; /* struct btf_raw_test raw_tests[] */
@ -951,6 +1285,7 @@ static void *btf_raw_create(const struct btf_header *hdr,
memcpy(raw_btf + offset, str, str_sec_size);
ret_hdr = (struct btf_header *)raw_btf;
ret_hdr->type_len = type_sec_size;
ret_hdr->str_off = type_sec_size;
ret_hdr->str_len = str_sec_size;
@ -981,6 +1316,7 @@ static int do_test_raw(unsigned int test_num)
hdr = raw_btf;
hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
hdr->type_off = (int)hdr->type_off + test->type_off_delta;
hdr->str_off = (int)hdr->str_off + test->str_off_delta;
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
@ -992,8 +1328,13 @@ static int do_test_raw(unsigned int test_num)
free(raw_btf);
err = ((btf_fd == -1) != test->btf_load_err);
CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
"expected err_str:%s", test->err_str)) {
err = -1;
goto done;
}
if (err || btf_fd == -1)
goto done;
@ -1004,8 +1345,8 @@ static int do_test_raw(unsigned int test_num)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
create_attr.btf_key_id = test->key_id;
create_attr.btf_value_id = test->value_id;
create_attr.btf_key_type_id = test->key_type_id;
create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
@ -1267,8 +1608,8 @@ static int test_btf_id(unsigned int test_num)
create_attr.value_size = sizeof(unsigned int);
create_attr.max_entries = 4;
create_attr.btf_fd = btf_fd[0];
create_attr.btf_key_id = 1;
create_attr.btf_value_id = 2;
create_attr.btf_key_type_id = 1;
create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {
@ -1279,10 +1620,10 @@ static int test_btf_id(unsigned int test_num)
info_len = sizeof(map_info);
err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
if (CHECK(err || map_info.btf_id != info[0].id ||
map_info.btf_key_id != 1 || map_info.btf_value_id != 2,
"err:%d errno:%d info.id:%u btf_id:%u btf_key_id:%u btf_value_id:%u",
err, errno, info[0].id, map_info.btf_id, map_info.btf_key_id,
map_info.btf_value_id)) {
map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
"err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
map_info.btf_value_type_id)) {
err = -1;
goto done;
}
@ -1542,10 +1883,10 @@ static int do_test_file(unsigned int test_num)
goto done;
}
err = (bpf_map__btf_key_id(map) == 0 || bpf_map__btf_value_id(map) == 0)
err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
!= test->btf_kv_notfound;
if (CHECK(err, "btf_key_id:%u btf_value_id:%u test->btf_kv_notfound:%u",
bpf_map__btf_key_id(map), bpf_map__btf_value_id(map),
if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
test->btf_kv_notfound))
goto done;
@ -1615,7 +1956,7 @@ static struct btf_raw_test pprint_test = {
/* 28 bits */ /* [7] */
BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
/* uint8_t[8] */ /* [8] */
BTF_TYPE_ARRAY_ENC(9, 3, 8),
BTF_TYPE_ARRAY_ENC(9, 1, 8),
/* typedef unsigned char uint8_t */ /* [9] */
BTF_TYPEDEF_ENC(NAME_TBD, 1),
/* typedef unsigned short uint16_t */ /* [10] */
@ -1654,8 +1995,8 @@ static struct btf_raw_test pprint_test = {
.map_name = "pprint_test",
.key_size = sizeof(unsigned int),
.value_size = sizeof(struct pprint_mapv),
.key_id = 3, /* unsigned int */
.value_id = 16, /* struct pprint_mapv */
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
.max_entries = 128 * 1024,
};
@ -1712,8 +2053,8 @@ static int test_pprint(void)
create_attr.value_size = test->value_size;
create_attr.max_entries = test->max_entries;
create_attr.btf_fd = btf_fd;
create_attr.btf_key_id = test->key_id;
create_attr.btf_value_id = test->value_id;
create_attr.btf_key_type_id = test->key_type_id;
create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
if (CHECK(map_fd == -1, "errno:%d", errno)) {

View File

@ -0,0 +1,437 @@
#include <stddef.h>
#include <inttypes.h>
#include <errno.h>
#include <linux/seg6_local.h>
#include <linux/bpf.h>
#include "bpf_helpers.h"
#include "bpf_endian.h"
#define bpf_printk(fmt, ...) \
({ \
char ____fmt[] = fmt; \
bpf_trace_printk(____fmt, sizeof(____fmt), \
##__VA_ARGS__); \
})
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
#define SR6_FLAG_ALERT (1 << 4)
#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
#define BPF_PACKET_HEADER __attribute__((packed))
struct ip6_t {
unsigned int ver:4;
unsigned int priority:8;
unsigned int flow_label:20;
unsigned short payload_len;
unsigned char next_header;
unsigned char hop_limit;
unsigned long long src_hi;
unsigned long long src_lo;
unsigned long long dst_hi;
unsigned long long dst_lo;
} BPF_PACKET_HEADER;
struct ip6_addr_t {
unsigned long long hi;
unsigned long long lo;
} BPF_PACKET_HEADER;
struct ip6_srh_t {
unsigned char nexthdr;
unsigned char hdrlen;
unsigned char type;
unsigned char segments_left;
unsigned char first_segment;
unsigned char flags;
unsigned short tag;
struct ip6_addr_t segments[0];
} BPF_PACKET_HEADER;
struct sr6_tlv_t {
unsigned char type;
unsigned char len;
unsigned char value[0];
} BPF_PACKET_HEADER;
__attribute__((always_inline)) struct ip6_srh_t *get_srh(struct __sk_buff *skb)
{
void *cursor, *data_end;
struct ip6_srh_t *srh;
struct ip6_t *ip;
uint8_t *ipver;
data_end = (void *)(long)skb->data_end;
cursor = (void *)(long)skb->data;
ipver = (uint8_t *)cursor;
if ((void *)ipver + sizeof(*ipver) > data_end)
return NULL;
if ((*ipver >> 4) != 6)
return NULL;
ip = cursor_advance(cursor, sizeof(*ip));
if ((void *)ip + sizeof(*ip) > data_end)
return NULL;
if (ip->next_header != 43)
return NULL;
srh = cursor_advance(cursor, sizeof(*srh));
if ((void *)srh + sizeof(*srh) > data_end)
return NULL;
if (srh->type != 4)
return NULL;
return srh;
}
__attribute__((always_inline))
int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
uint32_t old_pad, uint32_t pad_off)
{
int err;
if (new_pad != old_pad) {
err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
(int) new_pad - (int) old_pad);
if (err)
return err;
}
if (new_pad > 0) {
char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0};
struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
pad_tlv->type = SR6_TLV_PADDING;
pad_tlv->len = new_pad - 2;
err = bpf_lwt_seg6_store_bytes(skb, pad_off,
(void *)pad_tlv_buf, new_pad);
if (err)
return err;
}
return 0;
}
__attribute__((always_inline))
int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
uint32_t *tlv_off, uint32_t *pad_size,
uint32_t *pad_off)
{
uint32_t srh_off, cur_off;
int offset_valid = 0;
int err;
srh_off = (char *)srh - (char *)(long)skb->data;
// cur_off = end of segments, start of possible TLVs
cur_off = srh_off + sizeof(*srh) +
sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
#pragma clang loop unroll(full)
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
if (cur_off == *tlv_off)
offset_valid = 1;
if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
break;
err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
if (err)
return err;
if (tlv.type == SR6_TLV_PADDING) {
*pad_size = tlv.len + sizeof(tlv);
*pad_off = cur_off;
if (*tlv_off == srh_off) {
*tlv_off = cur_off;
offset_valid = 1;
}
break;
} else if (tlv.type == SR6_TLV_HMAC) {
break;
}
cur_off += sizeof(tlv) + tlv.len;
} // we reached the padding or HMAC TLVs, or the end of the SRH
if (*pad_off == 0)
*pad_off = cur_off;
if (*tlv_off == -1)
*tlv_off = cur_off;
else if (!offset_valid)
return -EINVAL;
return 0;
}
__attribute__((always_inline))
int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
struct sr6_tlv_t *itlv, uint8_t tlv_size)
{
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
uint8_t len_remaining, new_pad;
uint32_t pad_off = 0;
uint32_t pad_size = 0;
uint32_t partial_srh_len;
int err;
if (tlv_off != -1)
tlv_off += srh_off;
if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
return -EINVAL;
err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
if (err)
return err;
err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
if (err)
return err;
err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
if (err)
return err;
// the following can't be moved inside update_tlv_pad because the
// bpf verifier has some issues with it
pad_off += sizeof(*itlv) + itlv->len;
partial_srh_len = pad_off - srh_off;
len_remaining = partial_srh_len % 8;
new_pad = 8 - len_remaining;
if (new_pad == 1) // cannot pad for 1 byte only
new_pad = 9;
else if (new_pad == 8)
new_pad = 0;
return update_tlv_pad(skb, new_pad, pad_size, pad_off);
}
__attribute__((always_inline))
int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
uint32_t tlv_off)
{
uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
uint8_t len_remaining, new_pad;
uint32_t partial_srh_len;
uint32_t pad_off = 0;
uint32_t pad_size = 0;
struct sr6_tlv_t tlv;
int err;
tlv_off += srh_off;
err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
if (err)
return err;
err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
if (err)
return err;
err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
if (err)
return err;
pad_off -= sizeof(tlv) + tlv.len;
partial_srh_len = pad_off - srh_off;
len_remaining = partial_srh_len % 8;
new_pad = 8 - len_remaining;
if (new_pad == 1) // cannot pad for 1 byte only
new_pad = 9;
else if (new_pad == 8)
new_pad = 0;
return update_tlv_pad(skb, new_pad, pad_size, pad_off);
}
__attribute__((always_inline))
int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
{
int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
((srh->first_segment + 1) << 4);
struct sr6_tlv_t tlv;
if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
return 0;
if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
struct ip6_addr_t egr_addr;
if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
return 0;
// check if egress TLV value is correct
if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
ntohll(egr_addr.lo) == 0x4)
return 1;
}
return 0;
}
// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
// fd00::4
SEC("encap_srh")
int __encap_srh(struct __sk_buff *skb)
{
unsigned long long hi = 0xfd00000000000000;
struct ip6_addr_t *seg;
struct ip6_srh_t *srh;
char srh_buf[72]; // room for 4 segments
int err;
srh = (struct ip6_srh_t *)srh_buf;
srh->nexthdr = 0;
srh->hdrlen = 8;
srh->type = 4;
srh->segments_left = 3;
srh->first_segment = 3;
srh->flags = 0;
srh->tag = 0;
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
#pragma clang loop unroll(full)
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = htonll(4 - lo);
seg->hi = htonll(hi);
seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
}
err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
if (err)
return BPF_DROP;
return BPF_REDIRECT;
}
// Add an Egress TLV fc00::4, add the flag A,
// and apply End.X action to fc42::1
SEC("add_egr_x")
int __add_egr_x(struct __sk_buff *skb)
{
unsigned long long hi = 0xfc42000000000000;
unsigned long long lo = 0x1;
struct ip6_srh_t *srh = get_srh(skb);
uint8_t new_flags = SR6_FLAG_ALERT;
struct ip6_addr_t addr;
int err, offset;
if (srh == NULL)
return BPF_DROP;
uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
(struct sr6_tlv_t *)&tlv, 20);
if (err)
return BPF_DROP;
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
err = bpf_lwt_seg6_store_bytes(skb, offset,
(void *)&new_flags, sizeof(new_flags));
if (err)
return BPF_DROP;
addr.lo = htonll(lo);
addr.hi = htonll(hi);
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
(void *)&addr, sizeof(addr));
if (err)
return BPF_DROP;
return BPF_REDIRECT;
}
// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
// simple End action
SEC("pop_egr")
int __pop_egr(struct __sk_buff *skb)
{
struct ip6_srh_t *srh = get_srh(skb);
uint16_t new_tag = bpf_htons(2442);
uint8_t new_flags = 0;
int err, offset;
if (srh == NULL)
return BPF_DROP;
if (srh->flags != SR6_FLAG_ALERT)
return BPF_DROP;
if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
return BPF_DROP;
if (!has_egr_tlv(skb, srh))
return BPF_DROP;
err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
if (err)
return BPF_DROP;
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
sizeof(new_flags)))
return BPF_DROP;
offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
sizeof(new_tag)))
return BPF_DROP;
return BPF_OK;
}
// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
// then apply a End.T action to reach the last segment
SEC("inspect_t")
int __inspect_t(struct __sk_buff *skb)
{
struct ip6_srh_t *srh = get_srh(skb);
int table = 117;
int err;
if (srh == NULL)
return BPF_DROP;
if (srh->flags != 0)
return BPF_DROP;
if (srh->tag != bpf_htons(2442))
return BPF_DROP;
if (srh->hdrlen != 8) // 4 segments
return BPF_DROP;
err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
(void *)&table, sizeof(table));
if (err)
return BPF_DROP;
return BPF_REDIRECT;
}
char __license[] SEC("license") = "GPL";

View File

@ -0,0 +1,140 @@
#!/bin/bash
# Connects 6 network namespaces through veths.
# Each NS may have different IPv6 global scope addresses :
# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
# fc42::1 fd00::4
#
# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
# IPv6 header with a Segment Routing Header, with segments :
# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
#
# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
# - fd00::2 : remove the TLV, change the flags, add a tag
# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
#
# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
# Each End.BPF action will validate the operations applied on the SRH by the
# previous BPF program in the chain, otherwise the packet is dropped.
#
# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
# datagram can be read on NS6 when binding to fb00::6.
TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
cleanup()
{
if [ "$?" = "0" ]; then
echo "selftests: test_lwt_seg6local [PASS]";
else
echo "selftests: test_lwt_seg6local [FAILED]";
fi
set +e
ip netns del ns1 2> /dev/null
ip netns del ns2 2> /dev/null
ip netns del ns3 2> /dev/null
ip netns del ns4 2> /dev/null
ip netns del ns5 2> /dev/null
ip netns del ns6 2> /dev/null
rm -f $TMP_FILE
}
set -e
ip netns add ns1
ip netns add ns2
ip netns add ns3
ip netns add ns4
ip netns add ns5
ip netns add ns6
trap cleanup 0 2 3 6 9
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
ip link add veth7 type veth peer name veth8
ip link add veth9 type veth peer name veth10
ip link set veth1 netns ns1
ip link set veth2 netns ns2
ip link set veth3 netns ns2
ip link set veth4 netns ns3
ip link set veth5 netns ns3
ip link set veth6 netns ns4
ip link set veth7 netns ns4
ip link set veth8 netns ns5
ip link set veth9 netns ns5
ip link set veth10 netns ns6
ip netns exec ns1 ip link set dev veth1 up
ip netns exec ns2 ip link set dev veth2 up
ip netns exec ns2 ip link set dev veth3 up
ip netns exec ns3 ip link set dev veth4 up
ip netns exec ns3 ip link set dev veth5 up
ip netns exec ns4 ip link set dev veth6 up
ip netns exec ns4 ip link set dev veth7 up
ip netns exec ns5 ip link set dev veth8 up
ip netns exec ns5 ip link set dev veth9 up
ip netns exec ns6 ip link set dev veth10 up
ip netns exec ns6 ip link set dev lo up
# All link scope addresses and routes required between veths
ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF obj test_lwt_seg6local.o sec add_egr_x dev veth4
ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF obj test_lwt_seg6local.o sec pop_egr dev veth6
ip netns exec ns4 ip -6 addr add fc42::1 dev lo
ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF obj test_lwt_seg6local.o sec inspect_t dev veth8
ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
kill -INT $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1
fi
exit 0

View File

@ -1542,6 +1542,162 @@ close_prog_noerr:
bpf_object__close(obj);
}
static void test_task_fd_query_rawtp(void)
{
const char *file = "./test_get_stack_rawtp.o";
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
struct bpf_object *obj;
int efd, err, prog_fd;
__u32 duration = 0;
char buf[256];
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
goto close_prog;
/* query (getpid(), efd) */
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
errno))
goto close_prog;
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
strcmp(buf, "sys_enter") == 0;
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
fd_type, buf))
goto close_prog;
/* test zero len */
len = 0;
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
err, errno))
goto close_prog;
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
len == strlen("sys_enter");
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
goto close_prog;
/* test empty buffer */
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
err, errno))
goto close_prog;
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
len == strlen("sys_enter");
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
goto close_prog;
/* test smaller buffer */
len = 3;
err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
"err %d errno %d\n", err, errno))
goto close_prog;
err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
len == strlen("sys_enter") &&
strcmp(buf, "sy") == 0;
if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
goto close_prog;
goto close_prog_noerr;
close_prog:
error_cnt++;
close_prog_noerr:
bpf_object__close(obj);
}
static void test_task_fd_query_tp_core(const char *probe_name,
const char *tp_name)
{
const char *file = "./test_tracepoint.o";
int err, bytes, efd, prog_fd, pmu_fd;
struct perf_event_attr attr = {};
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
struct bpf_object *obj;
__u32 duration = 0;
char buf[256];
err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
goto close_prog;
snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/%s/id", probe_name);
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
bytes = read(efd, buf, sizeof(buf));
close(efd);
if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
"bytes %d errno %d\n", bytes, errno))
goto close_prog;
attr.config = strtol(buf, NULL, 0);
attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW;
attr.sample_period = 1;
attr.wakeup_events = 1;
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
goto close_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
errno))
goto close_pmu;
err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
errno))
goto close_pmu;
/* query (getpid(), pmu_fd) */
len = sizeof(buf);
err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
&fd_type, &probe_offset, &probe_addr);
if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
errno))
goto close_pmu;
err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
fd_type, buf))
goto close_pmu;
close(pmu_fd);
goto close_prog_noerr;
close_pmu:
close(pmu_fd);
close_prog:
error_cnt++;
close_prog_noerr:
bpf_object__close(obj);
}
static void test_task_fd_query_tp(void)
{
test_task_fd_query_tp_core("sched/sched_switch",
"sched_switch");
test_task_fd_query_tp_core("syscalls/sys_enter_read",
"sys_enter_read");
}
int main(void)
{
jit_enabled = is_jit_enabled();
@ -1561,6 +1717,8 @@ int main(void)
test_stacktrace_build_id_nmi();
test_stacktrace_map_raw_tp();
test_get_stack_raw_tp();
test_task_fd_query_rawtp();
test_task_fd_query_tp();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;

View File

@ -1685,6 +1685,121 @@ static struct bpf_test tests[] = {
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
},
{
"valid access family in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, family)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"valid access remote_ip4 in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_ip4)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"valid access local_ip4 in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_ip4)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"valid access remote_port in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_port)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"valid access local_port in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_port)),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"valid access remote_ip6 in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_ip6[0])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_ip6[1])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_ip6[2])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, remote_ip6[3])),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
},
{
"valid access local_ip6 in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_ip6[0])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_ip6[1])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_ip6[2])),
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct sk_msg_md, local_ip6[3])),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
},
{
"invalid 64B read of family in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
offsetof(struct sk_msg_md, family)),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"invalid read past end of SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct sk_msg_md, local_port) + 4),
BPF_EXIT_INSN(),
},
.errstr = "R0 !read_ok",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"invalid read offset in SK_MSG",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
offsetof(struct sk_msg_md, family) + 1),
BPF_EXIT_INSN(),
},
.errstr = "invalid bpf_context access",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_MSG,
},
{
"direct packet read for SK_MSG",
.insns = {

View File

@ -72,6 +72,18 @@ struct ksym *ksym_search(long key)
return &syms[0];
}
long ksym_get_addr(const char *name)
{
int i;
for (i = 0; i < sym_cnt; i++) {
if (strcmp(syms[i].name, name) == 0)
return syms[i].addr;
}
return 0;
}
static int page_size;
static int page_cnt = 8;
static struct perf_event_mmap_page *header;

View File

@ -11,6 +11,7 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);