Merge branch 'net-fix-CRC32c-in-the-forwarding-path'
Davide Caratti says: ==================== net: fix CRC32c in the forwarding path Current kernel allows offloading CRC32c computation when SCTP packets are generated, setting skb->ip_summed to CHECKSUM_PARTIAL, if the underlying device features have NETIF_F_SCTP_CRC set. However, after these packets are forwarded, they may land on a device where CRC32c offloading is not available: as a consequence, transmission is done with wrong CRC32c. It's not possible to use sctp_compte_cksum() in the forwarding path and in most drivers, because it needs symbols exported by libcrc32c module. Patch 1 and 2 of this series try to solve this problem, introducing a new helper function, namely skb_crc32c_csum_help(), that can be used to resolve CHECKSUM_PARTIAL when crc32c is needed instead of Internet Checksum. Currently, we need to parse the packet headers to understand what algorithm is needed to resolve CHECKSUM_PARTIAL. We can speedup things by storing this information in the skb metadata, and use it to call an appropriate helper (skb_checksum_help or skb_crc32c_csum_help), or leave the packet unmodified when the NIC is able to offload the checksum computation. Patch 3 deprecates skb->csum_bad to free one bit in skb metadata; patch 4 introduces skb->csum_not_inet, providing skb with an indication on the algorithm needed to resolve CHECKSUM_PARTIAL. Patch 5 and 6 fix the kernel forwarding path and openvswitch datapath, where skb_checksum_help was unconditionally called to resolve CHECKSUM_PARTIAL, thus generating wrong CRC32c in forwarded SCTP packets. Finally, patch 7 updates documentation to provide a better description of possible values of skb->ip_summed. Some further work is still possible: * drivers that parse the packet header to correctly resolve CHECKSUM_PARTIAL (e.g. ixgbe_tx_csum()) can benefit from testing skb->csum_not_inet to avoid calling ip_hdr(skb)->protocol or ixgbe_ipv6_csum_is_sctp(skb). * drivers that call skb_checksum_help() to resolve CHECKSUM_PARTIAL can call skb_csum_hwoffload_help to avoid corrupting SCTP packets. Changes v2->v3: - patch 1/7: more standard declaration of stub variables Changes v1->v2: - none Changes RFCv4->v1: - patch 2/7: use WARN_ON_ONCE() instead of BUG_ON(), and avoid computing CRC32c on the error path. - patch 3/7: don't invert tests on the values of same_flow and NAPI_GRO_CB(skb)->flush in dev_gro_receive(), it's useless and it breaks GRO functionality as reported by kernel test robot. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
5d65a16a68
|
@ -35,6 +35,9 @@ This interface only allows a single checksum to be offloaded. Where
|
|||
encapsulation is used, the packet may have multiple checksum fields in
|
||||
different header layers, and the rest will have to be handled by another
|
||||
mechanism such as LCO or RCO.
|
||||
CRC32c can also be offloaded using this interface, by means of filling
|
||||
skb->csum_start and skb->csum_offset as described above, and setting
|
||||
skb->csum_not_inet: see skbuff.h comment (section 'D') for more details.
|
||||
No offloading of the IP header checksum is performed; it is always done in
|
||||
software. This is OK because when we build the IP header, we obviously
|
||||
have it in cache, so summing it isn't expensive. It's also rather short.
|
||||
|
@ -49,9 +52,9 @@ A driver declares its offload capabilities in netdev->hw_features; see
|
|||
and csum_offset given in the SKB; if it tries to deduce these itself in
|
||||
hardware (as some NICs do) the driver should check that the values in the
|
||||
SKB match those which the hardware will deduce, and if not, fall back to
|
||||
checksumming in software instead (with skb_checksum_help or one of the
|
||||
skb_csum_off_chk* functions as mentioned in include/linux/skbuff.h). This
|
||||
is a pain, but that's what you get when hardware tries to be clever.
|
||||
checksumming in software instead (with skb_csum_hwoffload_help() or one of
|
||||
the skb_checksum_help() / skb_crc32c_csum_help functions, as mentioned in
|
||||
include/linux/skbuff.h).
|
||||
|
||||
The stack should, for the most part, assume that checksum offload is
|
||||
supported by the underlying device. The only place that should check is
|
||||
|
@ -60,7 +63,7 @@ The stack should, for the most part, assume that checksum offload is
|
|||
may include other offloads besides TX Checksum Offload) and, if they are
|
||||
not supported or enabled on the device (determined by netdev->features),
|
||||
performs the corresponding offload in software. In the case of TX
|
||||
Checksum Offload, that means calling skb_checksum_help(skb).
|
||||
Checksum Offload, that means calling skb_csum_hwoffload_help(skb, features).
|
||||
|
||||
|
||||
LCO: Local Checksum Offload
|
||||
|
|
|
@ -223,7 +223,7 @@ int aq_ring_rx_clean(struct aq_ring_s *self, int *work_done, int budget)
|
|||
skb->protocol = eth_type_trans(skb, ndev);
|
||||
if (unlikely(buff->is_cso_err)) {
|
||||
++self->stats.rx.errors;
|
||||
__skb_mark_checksum_bad(skb);
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
} else {
|
||||
if (buff->is_ip_cso) {
|
||||
__skb_incr_checksum_unnecessary(skb);
|
||||
|
|
|
@ -2573,9 +2573,7 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
|
|||
if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \
|
||||
__ret = __skb_gro_checksum_validate_complete(skb, \
|
||||
compute_pseudo(skb, proto)); \
|
||||
if (__ret) \
|
||||
__skb_mark_checksum_bad(skb); \
|
||||
else \
|
||||
if (!__ret) \
|
||||
skb_gro_incr_csum_unnecessary(skb); \
|
||||
__ret; \
|
||||
})
|
||||
|
@ -3931,6 +3929,10 @@ void netdev_rss_key_fill(void *buffer, size_t len);
|
|||
|
||||
int dev_get_nest_level(struct net_device *dev);
|
||||
int skb_checksum_help(struct sk_buff *skb);
|
||||
int skb_crc32c_csum_help(struct sk_buff *skb);
|
||||
int skb_csum_hwoffload_help(struct sk_buff *skb,
|
||||
const netdev_features_t features);
|
||||
|
||||
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
|
||||
netdev_features_t features, bool tx_path);
|
||||
struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
|
||||
|
|
|
@ -109,6 +109,7 @@
|
|||
* may perform further validation in this case.
|
||||
* GRE: only if the checksum is present in the header.
|
||||
* SCTP: indicates the CRC in SCTP header has been validated.
|
||||
* FCOE: indicates the CRC in FC frame has been validated.
|
||||
*
|
||||
* skb->csum_level indicates the number of consecutive checksums found in
|
||||
* the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
|
||||
|
@ -126,8 +127,10 @@
|
|||
* packet as seen by netif_rx() and fills out in skb->csum. Meaning, the
|
||||
* hardware doesn't need to parse L3/L4 headers to implement this.
|
||||
*
|
||||
* Note: Even if device supports only some protocols, but is able to produce
|
||||
* skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
|
||||
* Notes:
|
||||
* - Even if device supports only some protocols, but is able to produce
|
||||
* skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY.
|
||||
* - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols.
|
||||
*
|
||||
* CHECKSUM_PARTIAL:
|
||||
*
|
||||
|
@ -162,14 +165,11 @@
|
|||
*
|
||||
* NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM are being deprecated in favor of
|
||||
* NETIF_F_HW_CSUM. New devices should use NETIF_F_HW_CSUM to indicate
|
||||
* checksum offload capability. If a device has limited checksum capabilities
|
||||
* (for instance can only perform NETIF_F_IP_CSUM or NETIF_F_IPV6_CSUM as
|
||||
* described above) a helper function can be called to resolve
|
||||
* CHECKSUM_PARTIAL. The helper functions are skb_csum_off_chk*. The helper
|
||||
* function takes a spec argument that describes the protocol layer that is
|
||||
* supported for checksum offload and can be called for each packet. If a
|
||||
* packet does not match the specification for offload, skb_checksum_help
|
||||
* is called to resolve the checksum.
|
||||
* checksum offload capability.
|
||||
* skb_csum_hwoffload_help() can be called to resolve CHECKSUM_PARTIAL based
|
||||
* on network device checksumming capabilities: if a packet does not match
|
||||
* them, skb_checksum_help or skb_crc32c_help (depending on the value of
|
||||
* csum_not_inet, see item D.) is called to resolve the checksum.
|
||||
*
|
||||
* CHECKSUM_NONE:
|
||||
*
|
||||
|
@ -189,11 +189,13 @@
|
|||
*
|
||||
* NETIF_F_SCTP_CRC - This feature indicates that a device is capable of
|
||||
* offloading the SCTP CRC in a packet. To perform this offload the stack
|
||||
* will set ip_summed to CHECKSUM_PARTIAL and set csum_start and csum_offset
|
||||
* accordingly. Note the there is no indication in the skbuff that the
|
||||
* CHECKSUM_PARTIAL refers to an SCTP checksum, a driver that supports
|
||||
* both IP checksum offload and SCTP CRC offload must verify which offload
|
||||
* is configured for a packet presumably by inspecting packet headers.
|
||||
* will set set csum_start and csum_offset accordingly, set ip_summed to
|
||||
* CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication in
|
||||
* the skbuff that the CHECKSUM_PARTIAL refers to CRC32c.
|
||||
* A driver that supports both IP checksum offload and SCTP CRC32c offload
|
||||
* must verify which offload is configured for a packet by testing the
|
||||
* value of skb->csum_not_inet; skb_crc32c_csum_help is provided to resolve
|
||||
* CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1.
|
||||
*
|
||||
* NETIF_F_FCOE_CRC - This feature indicates that a device is capable of
|
||||
* offloading the FCOE CRC in a packet. To perform this offload the stack
|
||||
|
@ -556,6 +558,7 @@ typedef unsigned char *sk_buff_data_t;
|
|||
* @wifi_acked_valid: wifi_acked was set
|
||||
* @wifi_acked: whether frame was acked on wifi or not
|
||||
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
|
||||
* @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL
|
||||
* @dst_pending_confirm: need to confirm neighbour
|
||||
* @napi_id: id of the NAPI struct this skb came from
|
||||
* @secmark: security marking
|
||||
|
@ -684,7 +687,7 @@ struct sk_buff {
|
|||
__u8 csum_valid:1;
|
||||
__u8 csum_complete_sw:1;
|
||||
__u8 csum_level:2;
|
||||
__u8 csum_bad:1;
|
||||
__u8 csum_not_inet:1;
|
||||
|
||||
__u8 dst_pending_confirm:1;
|
||||
#ifdef CONFIG_IPV6_NDISC_NODETYPE
|
||||
|
@ -3076,6 +3079,8 @@ struct skb_checksum_ops {
|
|||
__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
|
||||
};
|
||||
|
||||
extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly;
|
||||
|
||||
__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
|
||||
__wsum csum, const struct skb_checksum_ops *ops);
|
||||
__wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
|
||||
|
@ -3333,21 +3338,6 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
|
||||
{
|
||||
/* Mark current checksum as bad (typically called from GRO
|
||||
* path). In the case that ip_summed is CHECKSUM_NONE
|
||||
* this must be the first checksum encountered in the packet.
|
||||
* When ip_summed is CHECKSUM_UNNECESSARY, this is the first
|
||||
* checksum after the last one validated. For UDP, a zero
|
||||
* checksum can not be marked as bad.
|
||||
*/
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_NONE ||
|
||||
skb->ip_summed == CHECKSUM_UNNECESSARY)
|
||||
skb->csum_bad = 1;
|
||||
}
|
||||
|
||||
/* Check if we need to perform checksum complete validation.
|
||||
*
|
||||
* Returns true if checksum complete is needed, false otherwise
|
||||
|
@ -3401,9 +3391,6 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
|
|||
skb->csum_valid = 1;
|
||||
return 0;
|
||||
}
|
||||
} else if (skb->csum_bad) {
|
||||
/* ip_summed == CHECKSUM_NONE in this case */
|
||||
return (__force __sum16)1;
|
||||
}
|
||||
|
||||
skb->csum = psum;
|
||||
|
@ -3463,8 +3450,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
|
|||
|
||||
static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
|
||||
{
|
||||
return (skb->ip_summed == CHECKSUM_NONE &&
|
||||
skb->csum_valid && !skb->csum_bad);
|
||||
return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
|
||||
}
|
||||
|
||||
static inline void __skb_checksum_convert(struct sk_buff *skb,
|
||||
|
|
|
@ -111,7 +111,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net,
|
|||
__wsum csum;
|
||||
u8 proto;
|
||||
|
||||
if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb))
|
||||
if (!nft_bridge_iphdr_validate(oldskb))
|
||||
return;
|
||||
|
||||
/* IP header checks: fragment. */
|
||||
|
@ -226,9 +226,6 @@ static bool reject6_br_csum_ok(struct sk_buff *skb, int hook)
|
|||
__be16 fo;
|
||||
u8 proto = ip6h->nexthdr;
|
||||
|
||||
if (skb->csum_bad)
|
||||
return false;
|
||||
|
||||
if (skb_csum_unnecessary(skb))
|
||||
return true;
|
||||
|
||||
|
|
|
@ -143,6 +143,7 @@
|
|||
#include <linux/hrtimer.h>
|
||||
#include <linux/netfilter_ingress.h>
|
||||
#include <linux/crash_dump.h>
|
||||
#include <linux/sctp.h>
|
||||
|
||||
#include "net-sysfs.h"
|
||||
|
||||
|
@ -2612,6 +2613,47 @@ out:
|
|||
}
|
||||
EXPORT_SYMBOL(skb_checksum_help);
|
||||
|
||||
int skb_crc32c_csum_help(struct sk_buff *skb)
|
||||
{
|
||||
__le32 crc32c_csum;
|
||||
int ret = 0, offset, start;
|
||||
|
||||
if (skb->ip_summed != CHECKSUM_PARTIAL)
|
||||
goto out;
|
||||
|
||||
if (unlikely(skb_is_gso(skb)))
|
||||
goto out;
|
||||
|
||||
/* Before computing a checksum, we should make sure no frag could
|
||||
* be modified by an external entity : checksum could be wrong.
|
||||
*/
|
||||
if (unlikely(skb_has_shared_frag(skb))) {
|
||||
ret = __skb_linearize(skb);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
start = skb_checksum_start_offset(skb);
|
||||
offset = start + offsetof(struct sctphdr, checksum);
|
||||
if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (skb_cloned(skb) &&
|
||||
!skb_clone_writable(skb, offset + sizeof(__le32))) {
|
||||
ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
|
||||
skb->len - start, ~(__u32)0,
|
||||
crc32c_csum_stub));
|
||||
*(__le32 *)(skb->data + offset) = crc32c_csum;
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
skb->csum_not_inet = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
|
||||
{
|
||||
__be16 type = skb->protocol;
|
||||
|
@ -2954,6 +2996,17 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
|
|||
return skb;
|
||||
}
|
||||
|
||||
int skb_csum_hwoffload_help(struct sk_buff *skb,
|
||||
const netdev_features_t features)
|
||||
{
|
||||
if (unlikely(skb->csum_not_inet))
|
||||
return !!(features & NETIF_F_SCTP_CRC) ? 0 :
|
||||
skb_crc32c_csum_help(skb);
|
||||
|
||||
return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
|
||||
}
|
||||
EXPORT_SYMBOL(skb_csum_hwoffload_help);
|
||||
|
||||
static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
netdev_features_t features;
|
||||
|
@ -2992,8 +3045,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
|
|||
else
|
||||
skb_set_transport_header(skb,
|
||||
skb_checksum_start_offset(skb));
|
||||
if (!(features & NETIF_F_CSUM_MASK) &&
|
||||
skb_checksum_help(skb))
|
||||
if (skb_csum_hwoffload_help(skb, features))
|
||||
goto out_kfree_skb;
|
||||
}
|
||||
}
|
||||
|
@ -4637,9 +4689,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||
if (netif_elide_gro(skb->dev))
|
||||
goto normal;
|
||||
|
||||
if (skb->csum_bad)
|
||||
goto normal;
|
||||
|
||||
gro_list_prepare(napi, skb);
|
||||
|
||||
rcu_read_lock();
|
||||
|
|
|
@ -2243,6 +2243,32 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
|
|||
}
|
||||
EXPORT_SYMBOL(skb_copy_and_csum_bits);
|
||||
|
||||
static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum)
|
||||
{
|
||||
net_warn_ratelimited(
|
||||
"%s: attempt to compute crc32c without libcrc32c.ko\n",
|
||||
__func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2,
|
||||
int offset, int len)
|
||||
{
|
||||
net_warn_ratelimited(
|
||||
"%s: attempt to compute crc32c without libcrc32c.ko\n",
|
||||
__func__);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct skb_checksum_ops default_crc32c_ops = {
|
||||
.update = warn_crc32c_csum_update,
|
||||
.combine = warn_crc32c_csum_combine,
|
||||
};
|
||||
|
||||
const struct skb_checksum_ops *crc32c_csum_stub __read_mostly =
|
||||
&default_crc32c_ops;
|
||||
EXPORT_SYMBOL(crc32c_csum_stub);
|
||||
|
||||
/**
|
||||
* skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy()
|
||||
* @from: source buffer
|
||||
|
|
|
@ -172,7 +172,7 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
|
|||
struct iphdr *iph = ip_hdr(skb_in);
|
||||
u8 proto;
|
||||
|
||||
if (skb_in->csum_bad || iph->frag_off & htons(IP_OFFSET))
|
||||
if (iph->frag_off & htons(IP_OFFSET))
|
||||
return;
|
||||
|
||||
if (skb_csum_unnecessary(skb_in)) {
|
||||
|
|
|
@ -220,9 +220,6 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
|
|||
__be16 fo;
|
||||
u8 proto;
|
||||
|
||||
if (skb->csum_bad)
|
||||
return false;
|
||||
|
||||
if (skb_csum_unnecessary(skb))
|
||||
return true;
|
||||
|
||||
|
|
|
@ -453,7 +453,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
|
|||
|
||||
/* Complete checksum if needed */
|
||||
if (skb->ip_summed == CHECKSUM_PARTIAL &&
|
||||
(err = skb_checksum_help(skb)))
|
||||
(err = skb_csum_hwoffload_help(skb, 0)))
|
||||
goto out;
|
||||
|
||||
/* Older versions of OVS user space enforce alignment of the last
|
||||
|
|
|
@ -350,6 +350,7 @@ static int tcf_csum_sctp(struct sk_buff *skb, unsigned int ihl,
|
|||
sctph->checksum = sctp_compute_cksum(skb,
|
||||
skb_network_offset(skb) + ihl);
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
skb->csum_not_inet = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
|
||||
{
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
skb->csum_not_inet = 0;
|
||||
return sctp_compute_cksum(skb, skb_transport_offset(skb));
|
||||
}
|
||||
|
||||
|
@ -98,6 +99,11 @@ static const struct net_offload sctp6_offload = {
|
|||
},
|
||||
};
|
||||
|
||||
static const struct skb_checksum_ops crc32c_csum_ops = {
|
||||
.update = sctp_csum_update,
|
||||
.combine = sctp_csum_combine,
|
||||
};
|
||||
|
||||
int __init sctp_offload_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
@ -110,6 +116,7 @@ int __init sctp_offload_init(void)
|
|||
if (ret)
|
||||
goto ipv4;
|
||||
|
||||
crc32c_csum_stub = &crc32c_csum_ops;
|
||||
return ret;
|
||||
|
||||
ipv4:
|
||||
|
|
|
@ -538,6 +538,7 @@ merge:
|
|||
} else {
|
||||
chksum:
|
||||
head->ip_summed = CHECKSUM_PARTIAL;
|
||||
head->csum_not_inet = 1;
|
||||
head->csum_start = skb_transport_header(head) - head->head;
|
||||
head->csum_offset = offsetof(struct sctphdr, checksum);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue