Merge branch 'csums-next'
Tom Herbert says: ==================== net: Checksum offload changes - Part VI I am working on overhauling RX checksum offload. Goals of this effort are: - Specify what exactly it means when driver returns CHECKSUM_UNNECESSARY - Preserve CHECKSUM_COMPLETE through encapsulation layers - Don't do skb_checksum more than once per packet - Unify GRO and non-GRO csum verification as much as possible - Unify the checksum functions (checksum_init) - Simplify code What is in this sixth patch set: - Clarify the specific requirements of devices returning CHECKSUM_UNNECESSARY (comments in skbuff.h). - Add csum_level field to skbuff. This is used to express how many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1). - Change __skb_checksum_validate_needed to "consume" each checksum as indicated by csum_level as layers of the the packet are parsed. - Remove skb_pop_rcv_encapsulation, no longer needed in the new csum_level model. - Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY and to report new verfied checksums for use in normal path fallback. - Add proper support to SCTP to accept CHECKSUM_UNNECESSARY to validate header CRC. - Modify drivers to set skb->csum_level instead of setting skb->encapsulation to indicate validation of an encapsulated checksum on receive. v2: Allocate a new 16 bits for flags in skbuff. Please review carefully and test if possible, mucking with basic checksum functions is always a little precarious :-) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
030824e039
|
@ -1683,7 +1683,7 @@ static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
|
|||
if (netdev->features & NETIF_F_RXHASH)
|
||||
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
|
||||
|
||||
skb->encapsulation = rxcp->tunneled;
|
||||
skb->csum_level = rxcp->tunneled;
|
||||
skb_mark_napi_id(skb, napi);
|
||||
|
||||
if (rxcp->vlanf)
|
||||
|
@ -1741,7 +1741,7 @@ static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
|
|||
if (adapter->netdev->features & NETIF_F_RXHASH)
|
||||
skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
|
||||
|
||||
skb->encapsulation = rxcp->tunneled;
|
||||
skb->csum_level = rxcp->tunneled;
|
||||
skb_mark_napi_id(skb, napi);
|
||||
|
||||
if (rxcp->vlanf)
|
||||
|
|
|
@ -1241,7 +1241,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
|
|||
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
|
||||
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
|
||||
|
||||
skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
|
||||
/* Rx csum enabled and ip headers found? */
|
||||
|
@ -1315,6 +1314,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
|
|||
}
|
||||
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = ipv4_tunnel || ipv6_tunnel;
|
||||
|
||||
return;
|
||||
|
||||
|
|
|
@ -746,7 +746,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
|
|||
ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
|
||||
(rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
|
||||
|
||||
skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
|
||||
/* Rx csum enabled and ip headers found? */
|
||||
|
@ -820,6 +819,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
|
|||
}
|
||||
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = ipv4_tunnel || ipv6_tunnel;
|
||||
|
||||
return;
|
||||
|
||||
|
|
|
@ -769,7 +769,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
|||
gro_skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
|
||||
if (l2_tunnel)
|
||||
gro_skb->encapsulation = 1;
|
||||
gro_skb->csum_level = 1;
|
||||
if ((cqe->vlan_my_qpn &
|
||||
cpu_to_be32(MLX4_CQE_VLAN_PRESENT_MASK)) &&
|
||||
(dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
|
||||
|
@ -823,8 +823,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
|||
skb->protocol = eth_type_trans(skb, dev);
|
||||
skb_record_rx_queue(skb, cq->ring);
|
||||
|
||||
if (l2_tunnel)
|
||||
skb->encapsulation = 1;
|
||||
if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
|
||||
skb->csum_level = 1;
|
||||
|
||||
if (dev->features & NETIF_F_RXHASH)
|
||||
skb_set_hash(skb,
|
||||
|
|
|
@ -1753,7 +1753,7 @@ qlcnic_83xx_process_rcv(struct qlcnic_adapter *adapter,
|
|||
|
||||
if (qlcnic_encap_length(sts_data[1]) &&
|
||||
skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
skb->encapsulation = 1;
|
||||
skb->csum_level = 1;
|
||||
adapter->stats.encap_rx_csummed++;
|
||||
}
|
||||
|
||||
|
|
|
@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|||
if (!vs)
|
||||
goto drop;
|
||||
|
||||
skb_pop_rcv_encapsulation(skb);
|
||||
|
||||
vs->rcv(vs, skb, vxh->vx_vni);
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -1883,8 +1883,8 @@ struct napi_gro_cb {
|
|||
/* GRO checksum is valid */
|
||||
u8 csum_valid:1;
|
||||
|
||||
/* Number encapsulation layers crossed */
|
||||
u8 encapsulation;
|
||||
/* Number of checksums via CHECKSUM_UNNECESSARY */
|
||||
u8 csum_cnt:3;
|
||||
|
||||
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
|
||||
__wsum csum;
|
||||
|
@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
|
|||
__sum16 check)
|
||||
{
|
||||
return (skb->ip_summed != CHECKSUM_PARTIAL &&
|
||||
(skb->ip_summed != CHECKSUM_UNNECESSARY ||
|
||||
(NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
|
||||
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
|
||||
(!zero_okay || check));
|
||||
}
|
||||
|
||||
|
@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
|
|||
return __skb_gro_checksum_complete(skb);
|
||||
}
|
||||
|
||||
/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
|
||||
* checksum or an encapsulated one during GRO. This saves work
|
||||
* if we fallback to normal path with the packet.
|
||||
*/
|
||||
static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (NAPI_GRO_CB(skb)->encapsulation)
|
||||
skb->encapsulation = 1;
|
||||
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->encapsulation = 0;
|
||||
if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
|
||||
/* Consume a checksum from CHECKSUM_UNNECESSARY */
|
||||
NAPI_GRO_CB(skb)->csum_cnt--;
|
||||
} else {
|
||||
/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
|
||||
* verified a new top level checksum or an encapsulated one
|
||||
* during GRO. This saves work if we fallback to normal path.
|
||||
*/
|
||||
__skb_incr_checksum_unnecessary(skb);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,11 +47,29 @@
|
|||
*
|
||||
* The hardware you're dealing with doesn't calculate the full checksum
|
||||
* (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
|
||||
* for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
|
||||
* set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
|
||||
* undefined in this case though. It is a bad option, but, unfortunately,
|
||||
* nowadays most vendors do this. Apparently with the secret goal to sell
|
||||
* you new devices, when you will add new protocol to your host, f.e. IPv6 8)
|
||||
* for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
|
||||
* if their checksums are okay. skb->csum is still undefined in this case
|
||||
* though. It is a bad option, but, unfortunately, nowadays most vendors do
|
||||
* this. Apparently with the secret goal to sell you new devices, when you
|
||||
* will add new protocol to your host, f.e. IPv6 8)
|
||||
*
|
||||
* CHECKSUM_UNNECESSARY is applicable to following protocols:
|
||||
* TCP: IPv6 and IPv4.
|
||||
* UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
|
||||
* zero UDP checksum for either IPv4 or IPv6, the networking stack
|
||||
* may perform further validation in this case.
|
||||
* GRE: only if the checksum is present in the header.
|
||||
* SCTP: indicates the CRC in SCTP header has been validated.
|
||||
*
|
||||
* skb->csum_level indicates the number of consecutive checksums found in
|
||||
* the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
|
||||
* For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
|
||||
* and a device is able to verify the checksums for UDP (possibly zero),
|
||||
* GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
|
||||
* two. If the device were only able to verify the UDP checksum and not
|
||||
* GRE, either because it doesn't support GRE checksum of because GRE
|
||||
* checksum is bad, skb->csum_level would be set to zero (TCP checksum is
|
||||
* not considered in this case).
|
||||
*
|
||||
* CHECKSUM_COMPLETE:
|
||||
*
|
||||
|
@ -112,6 +130,9 @@
|
|||
#define CHECKSUM_COMPLETE 2
|
||||
#define CHECKSUM_PARTIAL 3
|
||||
|
||||
/* Maximum value in skb->csum_level */
|
||||
#define SKB_MAX_CSUM_LEVEL 3
|
||||
|
||||
#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
|
||||
#define SKB_WITH_OVERHEAD(X) \
|
||||
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
|
||||
|
@ -571,11 +592,7 @@ struct sk_buff {
|
|||
__u8 wifi_acked:1;
|
||||
__u8 no_fcs:1;
|
||||
__u8 head_frag:1;
|
||||
/* Encapsulation protocol and NIC drivers should use
|
||||
* this flag to indicate to each other if the skb contains
|
||||
* encapsulated packet or not and maybe use the inner packet
|
||||
* headers if needed
|
||||
*/
|
||||
/* Indicates the inner headers are valid in the skbuff. */
|
||||
__u8 encapsulation:1;
|
||||
__u8 encap_hdr_csum:1;
|
||||
__u8 csum_valid:1;
|
||||
|
@ -598,6 +615,11 @@ struct sk_buff {
|
|||
__u32 reserved_tailroom;
|
||||
};
|
||||
|
||||
kmemcheck_bitfield_begin(flags3);
|
||||
__u8 csum_level:2;
|
||||
/* 14 bit hole */
|
||||
kmemcheck_bitfield_end(flags3);
|
||||
|
||||
__be16 inner_protocol;
|
||||
__u16 inner_transport_header;
|
||||
__u16 inner_network_header;
|
||||
|
@ -1862,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
|
|||
return pskb_may_pull(skb, skb_network_offset(skb) + len);
|
||||
}
|
||||
|
||||
static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
|
||||
{
|
||||
/* Only continue with checksum unnecessary if device indicated
|
||||
* it is valid across encapsulation (skb->encapsulation was set).
|
||||
*/
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
|
||||
skb->encapsulation = 0;
|
||||
skb->csum_valid = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* CPUs often take a performance hit when accessing unaligned memory
|
||||
* locations. The actual performance hit varies, it can be small if the
|
||||
|
@ -2794,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
|
|||
0 : __skb_checksum_complete(skb);
|
||||
}
|
||||
|
||||
static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (skb->csum_level == 0)
|
||||
skb->ip_summed = CHECKSUM_NONE;
|
||||
else
|
||||
skb->csum_level--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
|
||||
{
|
||||
if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
|
||||
if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
|
||||
skb->csum_level++;
|
||||
} else if (skb->ip_summed == CHECKSUM_NONE) {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
skb->csum_level = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check if we need to perform checksum complete validation.
|
||||
*
|
||||
* Returns true if checksum complete is needed, false otherwise
|
||||
|
@ -2805,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
|
|||
{
|
||||
if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
|
||||
skb->csum_valid = 1;
|
||||
__skb_decr_checksum_unnecessary(skb);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||
|
||||
gro_list_prepare(napi, skb);
|
||||
|
||||
if (skb->ip_summed == CHECKSUM_COMPLETE) {
|
||||
NAPI_GRO_CB(skb)->csum = skb->csum;
|
||||
NAPI_GRO_CB(skb)->csum_valid = 1;
|
||||
} else {
|
||||
NAPI_GRO_CB(skb)->csum_valid = 0;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(ptype, head, list) {
|
||||
if (ptype->type != type || !ptype->callbacks.gro_receive)
|
||||
|
@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
|
|||
NAPI_GRO_CB(skb)->flush = 0;
|
||||
NAPI_GRO_CB(skb)->free = 0;
|
||||
NAPI_GRO_CB(skb)->udp_mark = 0;
|
||||
NAPI_GRO_CB(skb)->encapsulation = 0;
|
||||
|
||||
/* Setup for GRO checksum validation */
|
||||
switch (skb->ip_summed) {
|
||||
case CHECKSUM_COMPLETE:
|
||||
NAPI_GRO_CB(skb)->csum = skb->csum;
|
||||
NAPI_GRO_CB(skb)->csum_valid = 1;
|
||||
NAPI_GRO_CB(skb)->csum_cnt = 0;
|
||||
break;
|
||||
case CHECKSUM_UNNECESSARY:
|
||||
NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
|
||||
NAPI_GRO_CB(skb)->csum_valid = 0;
|
||||
break;
|
||||
default:
|
||||
NAPI_GRO_CB(skb)->csum_cnt = 0;
|
||||
NAPI_GRO_CB(skb)->csum_valid = 0;
|
||||
}
|
||||
|
||||
pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
|
||||
break;
|
||||
|
|
|
@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
|
|||
*csum_err = true;
|
||||
return -EINVAL;
|
||||
}
|
||||
skb_pop_rcv_encapsulation(skb);
|
||||
options++;
|
||||
}
|
||||
|
||||
|
|
|
@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
|
|||
}
|
||||
|
||||
/* Don't bother verifying checksum if we're going to flush anyway. */
|
||||
if (greh->flags & GRE_CSUM) {
|
||||
if (!NAPI_GRO_CB(skb)->flush &&
|
||||
skb_gro_checksum_simple_validate(skb))
|
||||
if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
|
||||
skb_gro_checksum_simple_validate(skb))
|
||||
goto out_unlock;
|
||||
NAPI_GRO_CB(skb)->encapsulation++;
|
||||
}
|
||||
|
||||
flush = 0;
|
||||
|
||||
|
|
|
@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
|
|||
int flush = 1;
|
||||
|
||||
if (NAPI_GRO_CB(skb)->udp_mark ||
|
||||
(!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid))
|
||||
(skb->ip_summed != CHECKSUM_PARTIAL &&
|
||||
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
|
||||
!NAPI_GRO_CB(skb)->csum_valid))
|
||||
goto out;
|
||||
|
||||
/* mark that this skb passed once through the udp gro layer */
|
||||
NAPI_GRO_CB(skb)->udp_mark = 1;
|
||||
NAPI_GRO_CB(skb)->encapsulation++;
|
||||
|
||||
rcu_read_lock();
|
||||
uo_priv = rcu_dereference(udp_offload_base);
|
||||
|
|
|
@ -133,9 +133,13 @@ int sctp_rcv(struct sk_buff *skb)
|
|||
__skb_pull(skb, skb_transport_offset(skb));
|
||||
if (skb->len < sizeof(struct sctphdr))
|
||||
goto discard_it;
|
||||
if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
|
||||
sctp_rcv_checksum(net, skb) < 0)
|
||||
|
||||
skb->csum_valid = 0; /* Previous value not applicable */
|
||||
if (skb_csum_unnecessary(skb))
|
||||
__skb_decr_checksum_unnecessary(skb);
|
||||
else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0)
|
||||
goto discard_it;
|
||||
skb->csum_valid = 1;
|
||||
|
||||
skb_pull(skb, sizeof(struct sctphdr));
|
||||
|
||||
|
|
Loading…
Reference in New Issue