Merge branch 'net-permit-skb_segment-on-head_frag-frag_list-skb'
Yonghong Song says: ==================== net: permit skb_segment on head_frag frag_list skb One of our in-house projects, bpf-based NAT, hits a kernel BUG_ON at function skb_segment(), line 3667. The bpf program attaches to clsact ingress, calls bpf_skb_change_proto to change protocol from ipv4 to ipv6 or from ipv6 to ipv4, and then calls bpf_redirect to send the changed packet out. ... 3665 while (pos < offset + len) { 3666 if (i >= nfrags) { 3667 BUG_ON(skb_headlen(list_skb)); ... The triggering input skb has the following properties: list_skb = skb->frag_list; skb->nfrags != NULL && skb_headlen(list_skb) != 0 and skb_segment() is not able to handle a frag_list skb if its headlen (list_skb->len - list_skb->data_len) is not 0. Patch #1 provides a simple solution to avoid BUG_ON. If list_skb->head_frag is true, its page-backed frag will be processed before the list_skb->frags. Patch #2 provides a test case in test_bpf module which constructs a skb and calls skb_segment() directly. The test case is able to trigger the BUG_ON without Patch #1. The patch has been tested in the following setup: ipv6_host <-> nat_server <-> ipv4_host where nat_server has a bpf program doing ipv4<->ipv6 translation and forwarding through clsact hook bpf_skb_change_proto. Changelog: v5 -> v6: . Added back missed BUG_ON(!nfrags) for zero skb_headlen(skb) case, plus a couple of cosmetic changes, from Alexander. v4 -> v5: . Replace local variable head_frag with a static inline function skb_head_frag_to_page_desc which gets the head_frag on-demand. This makes code more readable and also does not increase the stack size, from Alexander. . Remove the "if(nfrags)" guard for skb_orphan_frags and skb_zerocopy_clone as I found that they can handle zero-frag skb (with non-zero skb_headlen(skb)) properly. . Properly release segment list from skb_segment() in the test, from Eric. v3 -> v4: . Remove dynamic memory allocation and use rewinding for both index and frag to remove one branch in fast path, from Alexander. . Fix a bunch of issues in test_bpf skb_segment() test, including proper way to allocate skb, proper function argument for skb_add_rx_frag and not freeint skb, etc., from Eric. v2 -> v3: . Use starting frag index -1 (instead of 0) to special process head_frag before other frags in the skb, from Alexander Duyck. v1 -> v2: . Removed never-hit BUG_ON, spotted by Linyu Yuan. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
74b4bed972
|
@ -6574,6 +6574,93 @@ static bool exclude_test(int test_id)
|
|||
return test_id < test_range[0] || test_id > test_range[1];
|
||||
}
|
||||
|
||||
static __init struct sk_buff *build_test_skb(void)
|
||||
{
|
||||
u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
|
||||
struct sk_buff *skb[2];
|
||||
struct page *page[2];
|
||||
int i, data_size = 8;
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
page[i] = alloc_page(GFP_KERNEL);
|
||||
if (!page[i]) {
|
||||
if (i == 0)
|
||||
goto err_page0;
|
||||
else
|
||||
goto err_page1;
|
||||
}
|
||||
|
||||
/* this will set skb[i]->head_frag */
|
||||
skb[i] = dev_alloc_skb(headroom + data_size);
|
||||
if (!skb[i]) {
|
||||
if (i == 0)
|
||||
goto err_skb0;
|
||||
else
|
||||
goto err_skb1;
|
||||
}
|
||||
|
||||
skb_reserve(skb[i], headroom);
|
||||
skb_put(skb[i], data_size);
|
||||
skb[i]->protocol = htons(ETH_P_IP);
|
||||
skb_reset_network_header(skb[i]);
|
||||
skb_set_mac_header(skb[i], -ETH_HLEN);
|
||||
|
||||
skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
|
||||
// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
|
||||
}
|
||||
|
||||
/* setup shinfo */
|
||||
skb_shinfo(skb[0])->gso_size = 1448;
|
||||
skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
|
||||
skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
|
||||
skb_shinfo(skb[0])->gso_segs = 0;
|
||||
skb_shinfo(skb[0])->frag_list = skb[1];
|
||||
|
||||
/* adjust skb[0]'s len */
|
||||
skb[0]->len += skb[1]->len;
|
||||
skb[0]->data_len += skb[1]->data_len;
|
||||
skb[0]->truesize += skb[1]->truesize;
|
||||
|
||||
return skb[0];
|
||||
|
||||
err_skb1:
|
||||
__free_page(page[1]);
|
||||
err_page1:
|
||||
kfree_skb(skb[0]);
|
||||
err_skb0:
|
||||
__free_page(page[0]);
|
||||
err_page0:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __init int test_skb_segment(void)
|
||||
{
|
||||
netdev_features_t features;
|
||||
struct sk_buff *skb, *segs;
|
||||
int ret = -1;
|
||||
|
||||
features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
|
||||
NETIF_F_IPV6_CSUM;
|
||||
features |= NETIF_F_RXCSUM;
|
||||
skb = build_test_skb();
|
||||
if (!skb) {
|
||||
pr_info("%s: failed to build_test_skb", __func__);
|
||||
goto done;
|
||||
}
|
||||
|
||||
segs = skb_segment(skb, features);
|
||||
if (segs) {
|
||||
kfree_skb_list(segs);
|
||||
ret = 0;
|
||||
pr_info("%s: success in skb_segment!", __func__);
|
||||
} else {
|
||||
pr_info("%s: failed in skb_segment!", __func__);
|
||||
}
|
||||
kfree_skb(skb);
|
||||
done:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int test_bpf(void)
|
||||
{
|
||||
int i, err_cnt = 0, pass_cnt = 0;
|
||||
|
@ -6632,9 +6719,11 @@ static int __init test_bpf_init(void)
|
|||
return ret;
|
||||
|
||||
ret = test_bpf();
|
||||
|
||||
destroy_bpf_tests();
|
||||
return ret;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return test_skb_segment();
|
||||
}
|
||||
|
||||
static void __exit test_bpf_exit(void)
|
||||
|
|
|
@ -3460,6 +3460,19 @@ void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
|
||||
|
||||
static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
|
||||
{
|
||||
skb_frag_t head_frag;
|
||||
struct page *page;
|
||||
|
||||
page = virt_to_head_page(frag_skb->head);
|
||||
head_frag.page.p = page;
|
||||
head_frag.page_offset = frag_skb->data -
|
||||
(unsigned char *)page_address(page);
|
||||
head_frag.size = skb_headlen(frag_skb);
|
||||
return head_frag;
|
||||
}
|
||||
|
||||
/**
|
||||
* skb_segment - Perform protocol segmentation on skb.
|
||||
* @head_skb: buffer to segment
|
||||
|
@ -3664,15 +3677,19 @@ normal:
|
|||
|
||||
while (pos < offset + len) {
|
||||
if (i >= nfrags) {
|
||||
BUG_ON(skb_headlen(list_skb));
|
||||
|
||||
i = 0;
|
||||
nfrags = skb_shinfo(list_skb)->nr_frags;
|
||||
frag = skb_shinfo(list_skb)->frags;
|
||||
frag_skb = list_skb;
|
||||
if (!skb_headlen(list_skb)) {
|
||||
BUG_ON(!nfrags);
|
||||
} else {
|
||||
BUG_ON(!list_skb->head_frag);
|
||||
|
||||
BUG_ON(!nfrags);
|
||||
|
||||
/* to make room for head_frag. */
|
||||
i--;
|
||||
frag--;
|
||||
}
|
||||
if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
|
||||
skb_zerocopy_clone(nskb, frag_skb,
|
||||
GFP_ATOMIC))
|
||||
|
@ -3689,7 +3706,7 @@ normal:
|
|||
goto err;
|
||||
}
|
||||
|
||||
*nskb_frag = *frag;
|
||||
*nskb_frag = (i < 0) ? skb_head_frag_to_page_desc(frag_skb) : *frag;
|
||||
__skb_frag_ref(nskb_frag);
|
||||
size = skb_frag_size(nskb_frag);
|
||||
|
||||
|
|
Loading…
Reference in New Issue