Merge branch 'sfc-more-encap-offloads'

Edward Cree says:

====================
sfc: more encap offloads

This patch series adds support for RX checksum offload of encapsulated packets.
It also adds support for configuring the hardware's lists of UDP ports used for
VXLAN and GENEVE encapsulation offloads.  Since changing these lists causes the
MC to reboot, the driver has been hardened against reboots, which used to be
considered an exceptional occurrence but are now normal.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2017-02-09 16:47:54 -05:00
commit caa2858cd5
11 changed files with 639 additions and 57 deletions

View File

@ -433,6 +433,9 @@ typedef union efx_oword {
(oword).u64[1] = (from).u64[1] & (mask).u64[1]; \
} while (0)
#define EFX_AND_QWORD(qword, from, mask) \
(qword).u64[0] = (from).u64[0] & (mask).u64[0]
#define EFX_OR_OWORD(oword, from, mask) \
do { \
(oword).u64[0] = (from).u64[0] | (mask).u64[0]; \

View File

@ -132,6 +132,7 @@ static int efx_ef10_filter_add_vlan(struct efx_nic *efx, u16 vid);
static void efx_ef10_filter_del_vlan_internal(struct efx_nic *efx,
struct efx_ef10_filter_vlan *vlan);
static void efx_ef10_filter_del_vlan(struct efx_nic *efx, u16 vid);
static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading);
static int efx_ef10_get_warm_boot_count(struct efx_nic *efx)
{
@ -624,6 +625,8 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc)
goto fail2;
mutex_init(&nic_data->udp_tunnels_lock);
/* Reset (most) configuration for this function */
rc = efx_mcdi_reset(efx, RESET_TYPE_ALL);
if (rc)
@ -712,6 +715,14 @@ fail5:
fail4:
device_remove_file(&efx->pci_dev->dev, &dev_attr_link_control_flag);
fail3:
efx_mcdi_detach(efx);
mutex_lock(&nic_data->udp_tunnels_lock);
memset(nic_data->udp_tunnels, 0, sizeof(nic_data->udp_tunnels));
(void)efx_ef10_set_udp_tnl_ports(efx, true);
mutex_unlock(&nic_data->udp_tunnels_lock);
mutex_destroy(&nic_data->udp_tunnels_lock);
efx_mcdi_fini(efx);
fail2:
efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
@ -981,6 +992,15 @@ static void efx_ef10_remove(struct efx_nic *efx)
device_remove_file(&efx->pci_dev->dev, &dev_attr_primary_flag);
device_remove_file(&efx->pci_dev->dev, &dev_attr_link_control_flag);
efx_mcdi_detach(efx);
memset(nic_data->udp_tunnels, 0, sizeof(nic_data->udp_tunnels));
mutex_lock(&nic_data->udp_tunnels_lock);
(void)efx_ef10_set_udp_tnl_ports(efx, true);
mutex_unlock(&nic_data->udp_tunnels_lock);
mutex_destroy(&nic_data->udp_tunnels_lock);
efx_mcdi_fini(efx);
efx_nic_free_buffer(efx, &nic_data->mcdi_buf);
kfree(nic_data);
@ -3154,13 +3174,103 @@ static void efx_ef10_handle_rx_abort(struct efx_rx_queue *rx_queue)
++efx_rx_queue_channel(rx_queue)->n_rx_nodesc_trunc;
}
static u16 efx_ef10_handle_rx_event_errors(struct efx_channel *channel,
unsigned int n_packets,
unsigned int rx_encap_hdr,
unsigned int rx_l3_class,
unsigned int rx_l4_class,
const efx_qword_t *event)
{
struct efx_nic *efx = channel->efx;
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)) {
if (!efx->loopback_selftest)
channel->n_rx_eth_crc_err += n_packets;
return EFX_RX_PKT_DISCARD;
}
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR)) {
if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
rx_l3_class != ESE_DZ_L3_CLASS_IP4 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP4_FRAG &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6_FRAG))
netdev_WARN(efx->net_dev,
"invalid class for RX_IPCKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
if (!efx->loopback_selftest)
*(rx_encap_hdr ?
&channel->n_rx_outer_ip_hdr_chksum_err :
&channel->n_rx_ip_hdr_chksum_err) += n_packets;
return 0;
}
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_TCPUDP_CKSUM_ERR)) {
if (unlikely(rx_encap_hdr != ESE_EZ_ENCAP_HDR_VXLAN &&
((rx_l3_class != ESE_DZ_L3_CLASS_IP4 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6) ||
(rx_l4_class != ESE_DZ_L4_CLASS_TCP &&
rx_l4_class != ESE_DZ_L4_CLASS_UDP))))
netdev_WARN(efx->net_dev,
"invalid class for RX_TCPUDP_CKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
if (!efx->loopback_selftest)
*(rx_encap_hdr ?
&channel->n_rx_outer_tcp_udp_chksum_err :
&channel->n_rx_tcp_udp_chksum_err) += n_packets;
return 0;
}
if (EFX_QWORD_FIELD(*event, ESF_EZ_RX_IP_INNER_CHKSUM_ERR)) {
if (unlikely(!rx_encap_hdr))
netdev_WARN(efx->net_dev,
"invalid encapsulation type for RX_IP_INNER_CHKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
else if (unlikely(rx_l3_class != ESE_DZ_L3_CLASS_IP4 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP4_FRAG &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6_FRAG))
netdev_WARN(efx->net_dev,
"invalid class for RX_IP_INNER_CHKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
if (!efx->loopback_selftest)
channel->n_rx_inner_ip_hdr_chksum_err += n_packets;
return 0;
}
if (EFX_QWORD_FIELD(*event, ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR)) {
if (unlikely(!rx_encap_hdr))
netdev_WARN(efx->net_dev,
"invalid encapsulation type for RX_TCP_UDP_INNER_CHKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
else if (unlikely((rx_l3_class != ESE_DZ_L3_CLASS_IP4 &&
rx_l3_class != ESE_DZ_L3_CLASS_IP6) ||
(rx_l4_class != ESE_DZ_L4_CLASS_TCP &&
rx_l4_class != ESE_DZ_L4_CLASS_UDP)))
netdev_WARN(efx->net_dev,
"invalid class for RX_TCP_UDP_INNER_CHKSUM_ERR: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
if (!efx->loopback_selftest)
channel->n_rx_inner_tcp_udp_chksum_err += n_packets;
return 0;
}
WARN_ON(1); /* No error bits were recognised */
return 0;
}
static int efx_ef10_handle_rx_event(struct efx_channel *channel,
const efx_qword_t *event)
{
unsigned int rx_bytes, next_ptr_lbits, rx_queue_label, rx_l4_class;
unsigned int rx_bytes, next_ptr_lbits, rx_queue_label;
unsigned int rx_l3_class, rx_l4_class, rx_encap_hdr;
unsigned int n_descs, n_packets, i;
struct efx_nic *efx = channel->efx;
struct efx_ef10_nic_data *nic_data = efx->nic_data;
struct efx_rx_queue *rx_queue;
efx_qword_t errors;
bool rx_cont;
u16 flags = 0;
@ -3171,8 +3281,14 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel,
rx_bytes = EFX_QWORD_FIELD(*event, ESF_DZ_RX_BYTES);
next_ptr_lbits = EFX_QWORD_FIELD(*event, ESF_DZ_RX_DSC_PTR_LBITS);
rx_queue_label = EFX_QWORD_FIELD(*event, ESF_DZ_RX_QLABEL);
rx_l3_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L3_CLASS);
rx_l4_class = EFX_QWORD_FIELD(*event, ESF_DZ_RX_L4_CLASS);
rx_cont = EFX_QWORD_FIELD(*event, ESF_DZ_RX_CONT);
rx_encap_hdr =
nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN) ?
EFX_QWORD_FIELD(*event, ESF_EZ_RX_ENCAP_HDR) :
ESE_EZ_ENCAP_HDR_NONE;
if (EFX_QWORD_FIELD(*event, ESF_DZ_RX_DROP_EVENT))
netdev_WARN(efx->net_dev, "saw RX_DROP_EVENT: event="
@ -3232,17 +3348,37 @@ static int efx_ef10_handle_rx_event(struct efx_channel *channel,
n_packets = 1;
}
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_ECRC_ERR)))
flags |= EFX_RX_PKT_DISCARD;
EFX_POPULATE_QWORD_5(errors, ESF_DZ_RX_ECRC_ERR, 1,
ESF_DZ_RX_IPCKSUM_ERR, 1,
ESF_DZ_RX_TCPUDP_CKSUM_ERR, 1,
ESF_EZ_RX_IP_INNER_CHKSUM_ERR, 1,
ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR, 1);
EFX_AND_QWORD(errors, *event, errors);
if (unlikely(!EFX_QWORD_IS_ZERO(errors))) {
flags |= efx_ef10_handle_rx_event_errors(channel, n_packets,
rx_l3_class, rx_l4_class,
rx_encap_hdr, event);
} else {
bool tcpudp = rx_l4_class == ESE_DZ_L4_CLASS_TCP ||
rx_l4_class == ESE_DZ_L4_CLASS_UDP;
if (unlikely(EFX_QWORD_FIELD(*event, ESF_DZ_RX_IPCKSUM_ERR))) {
channel->n_rx_ip_hdr_chksum_err += n_packets;
} else if (unlikely(EFX_QWORD_FIELD(*event,
ESF_DZ_RX_TCPUDP_CKSUM_ERR))) {
channel->n_rx_tcp_udp_chksum_err += n_packets;
} else if (rx_l4_class == ESE_DZ_L4_CLASS_TCP ||
rx_l4_class == ESE_DZ_L4_CLASS_UDP) {
flags |= EFX_RX_PKT_CSUMMED;
switch (rx_encap_hdr) {
case ESE_EZ_ENCAP_HDR_VXLAN: /* VxLAN or GENEVE */
flags |= EFX_RX_PKT_CSUMMED; /* outer UDP csum */
if (tcpudp)
flags |= EFX_RX_PKT_CSUM_LEVEL; /* inner L4 */
break;
case ESE_EZ_ENCAP_HDR_GRE:
case ESE_EZ_ENCAP_HDR_NONE:
if (tcpudp)
flags |= EFX_RX_PKT_CSUMMED;
break;
default:
netdev_WARN(efx->net_dev,
"unknown encapsulation type: event="
EFX_QWORD_FMT "\n",
EFX_QWORD_VAL(*event));
}
}
if (rx_l4_class == ESE_DZ_L4_CLASS_TCP)
@ -5950,6 +6086,271 @@ static int efx_ef10_vlan_rx_kill_vid(struct efx_nic *efx, __be16 proto, u16 vid)
return efx_ef10_del_vlan(efx, vid);
}
/* We rely on the MCDI wiping out our TX rings if it made any changes to the
* ports table, ensuring that any TSO descriptors that were made on a now-
* removed tunnel port will be blown away and won't break things when we try
* to transmit them using the new ports table.
*/
static int efx_ef10_set_udp_tnl_ports(struct efx_nic *efx, bool unloading)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMAX);
MCDI_DECLARE_BUF(outbuf, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_LEN);
bool will_reset = false;
size_t num_entries = 0;
size_t inlen, outlen;
size_t i;
int rc;
efx_dword_t flags_and_num_entries;
WARN_ON(!mutex_is_locked(&nic_data->udp_tunnels_lock));
nic_data->udp_tunnels_dirty = false;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))) {
netif_device_attach(efx->net_dev);
return 0;
}
BUILD_BUG_ON(ARRAY_SIZE(nic_data->udp_tunnels) >
MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES_MAXNUM);
for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) {
if (nic_data->udp_tunnels[i].count &&
nic_data->udp_tunnels[i].port) {
efx_dword_t entry;
EFX_POPULATE_DWORD_2(entry,
TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT,
ntohs(nic_data->udp_tunnels[i].port),
TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL,
nic_data->udp_tunnels[i].type);
*_MCDI_ARRAY_DWORD(inbuf,
SET_TUNNEL_ENCAP_UDP_PORTS_IN_ENTRIES,
num_entries++) = entry;
}
}
BUILD_BUG_ON((MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_OFST -
MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS_OFST) * 8 !=
EFX_WORD_1_LBN);
BUILD_BUG_ON(MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_NUM_ENTRIES_LEN * 8 !=
EFX_WORD_1_WIDTH);
EFX_POPULATE_DWORD_2(flags_and_num_entries,
MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_UNLOADING,
!!unloading,
EFX_WORD_1, num_entries);
*_MCDI_DWORD(inbuf, SET_TUNNEL_ENCAP_UDP_PORTS_IN_FLAGS) =
flags_and_num_entries;
inlen = MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LEN(num_entries);
rc = efx_mcdi_rpc_quiet(efx, MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS,
inbuf, inlen, outbuf, sizeof(outbuf), &outlen);
if (rc == -EIO) {
/* Most likely the MC rebooted due to another function also
* setting its tunnel port list. Mark the tunnel port list as
* dirty, so it will be pushed upon coming up from the reboot.
*/
nic_data->udp_tunnels_dirty = true;
return 0;
}
if (rc) {
/* expected not available on unprivileged functions */
if (rc != -EPERM)
netif_warn(efx, drv, efx->net_dev,
"Unable to set UDP tunnel ports; rc=%d.\n", rc);
} else if (MCDI_DWORD(outbuf, SET_TUNNEL_ENCAP_UDP_PORTS_OUT_FLAGS) &
(1 << MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_LBN)) {
netif_info(efx, drv, efx->net_dev,
"Rebooting MC due to UDP tunnel port list change\n");
will_reset = true;
if (unloading)
/* Delay for the MC reset to complete. This will make
* unloading other functions a bit smoother. This is a
* race, but the other unload will work whichever way
* it goes, this just avoids an unnecessary error
* message.
*/
msleep(100);
}
if (!will_reset && !unloading) {
/* The caller will have detached, relying on the MC reset to
* trigger a re-attach. Since there won't be an MC reset, we
* have to do the attach ourselves.
*/
netif_device_attach(efx->net_dev);
}
return rc;
}
static int efx_ef10_udp_tnl_push_ports(struct efx_nic *efx)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
int rc = 0;
mutex_lock(&nic_data->udp_tunnels_lock);
if (nic_data->udp_tunnels_dirty) {
/* Make sure all TX are stopped while we modify the table, else
* we might race against an efx_features_check().
*/
efx_device_detach_sync(efx);
rc = efx_ef10_set_udp_tnl_ports(efx, false);
}
mutex_unlock(&nic_data->udp_tunnels_lock);
return rc;
}
static struct efx_udp_tunnel *__efx_ef10_udp_tnl_lookup_port(struct efx_nic *efx,
__be16 port)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
size_t i;
for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i) {
if (!nic_data->udp_tunnels[i].count)
continue;
if (nic_data->udp_tunnels[i].port == port)
return &nic_data->udp_tunnels[i];
}
return NULL;
}
static int efx_ef10_udp_tnl_add_port(struct efx_nic *efx,
struct efx_udp_tunnel tnl)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
struct efx_udp_tunnel *match;
char typebuf[8];
size_t i;
int rc;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
return 0;
efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf));
netif_dbg(efx, drv, efx->net_dev, "Adding UDP tunnel (%s) port %d\n",
typebuf, ntohs(tnl.port));
mutex_lock(&nic_data->udp_tunnels_lock);
/* Make sure all TX are stopped while we add to the table, else we
* might race against an efx_features_check().
*/
efx_device_detach_sync(efx);
match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port);
if (match != NULL) {
if (match->type == tnl.type) {
netif_dbg(efx, drv, efx->net_dev,
"Referencing existing tunnel entry\n");
match->count++;
/* No need to cause an MCDI update */
rc = 0;
goto unlock_out;
}
efx_get_udp_tunnel_type_name(match->type,
typebuf, sizeof(typebuf));
netif_dbg(efx, drv, efx->net_dev,
"UDP port %d is already in use by %s\n",
ntohs(tnl.port), typebuf);
rc = -EEXIST;
goto unlock_out;
}
for (i = 0; i < ARRAY_SIZE(nic_data->udp_tunnels); ++i)
if (!nic_data->udp_tunnels[i].count) {
nic_data->udp_tunnels[i] = tnl;
nic_data->udp_tunnels[i].count = 1;
rc = efx_ef10_set_udp_tnl_ports(efx, false);
goto unlock_out;
}
netif_dbg(efx, drv, efx->net_dev,
"Unable to add UDP tunnel (%s) port %d; insufficient resources.\n",
typebuf, ntohs(tnl.port));
rc = -ENOMEM;
unlock_out:
mutex_unlock(&nic_data->udp_tunnels_lock);
return rc;
}
/* Called under the TX lock with the TX queue running, hence no-one can be
* in the middle of updating the UDP tunnels table. However, they could
* have tried and failed the MCDI, in which case they'll have set the dirty
* flag before dropping their locks.
*/
static bool efx_ef10_udp_tnl_has_port(struct efx_nic *efx, __be16 port)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
return false;
if (nic_data->udp_tunnels_dirty)
/* SW table may not match HW state, so just assume we can't
* use any UDP tunnel offloads.
*/
return false;
return __efx_ef10_udp_tnl_lookup_port(efx, port) != NULL;
}
static int efx_ef10_udp_tnl_del_port(struct efx_nic *efx,
struct efx_udp_tunnel tnl)
{
struct efx_ef10_nic_data *nic_data = efx->nic_data;
struct efx_udp_tunnel *match;
char typebuf[8];
int rc;
if (!(nic_data->datapath_caps &
(1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN)))
return 0;
efx_get_udp_tunnel_type_name(tnl.type, typebuf, sizeof(typebuf));
netif_dbg(efx, drv, efx->net_dev, "Removing UDP tunnel (%s) port %d\n",
typebuf, ntohs(tnl.port));
mutex_lock(&nic_data->udp_tunnels_lock);
/* Make sure all TX are stopped while we remove from the table, else we
* might race against an efx_features_check().
*/
efx_device_detach_sync(efx);
match = __efx_ef10_udp_tnl_lookup_port(efx, tnl.port);
if (match != NULL) {
if (match->type == tnl.type) {
if (--match->count) {
/* Port is still in use, so nothing to do */
netif_dbg(efx, drv, efx->net_dev,
"UDP tunnel port %d remains active\n",
ntohs(tnl.port));
rc = 0;
goto out_unlock;
}
rc = efx_ef10_set_udp_tnl_ports(efx, false);
goto out_unlock;
}
efx_get_udp_tunnel_type_name(match->type,
typebuf, sizeof(typebuf));
netif_warn(efx, drv, efx->net_dev,
"UDP port %d is actually in use by %s, not removing\n",
ntohs(tnl.port), typebuf);
}
rc = -ENOENT;
out_unlock:
mutex_unlock(&nic_data->udp_tunnels_lock);
return rc;
}
#define EF10_OFFLOAD_FEATURES \
(NETIF_F_IP_CSUM | \
NETIF_F_HW_VLAN_CTAG_FILTER | \
@ -6153,6 +6554,10 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.ptp_set_ts_config = efx_ef10_ptp_set_ts_config,
.vlan_rx_add_vid = efx_ef10_vlan_rx_add_vid,
.vlan_rx_kill_vid = efx_ef10_vlan_rx_kill_vid,
.udp_tnl_push_ports = efx_ef10_udp_tnl_push_ports,
.udp_tnl_add_port = efx_ef10_udp_tnl_add_port,
.udp_tnl_has_port = efx_ef10_udp_tnl_has_port,
.udp_tnl_del_port = efx_ef10_udp_tnl_del_port,
#ifdef CONFIG_SFC_SRIOV
.sriov_configure = efx_ef10_sriov_configure,
.sriov_init = efx_ef10_sriov_init,

View File

@ -23,12 +23,15 @@
#include <linux/aer.h>
#include <linux/interrupt.h>
#include "net_driver.h"
#include <net/gre.h>
#include <net/udp_tunnel.h>
#include "efx.h"
#include "nic.h"
#include "selftest.h"
#include "sriov.h"
#include "mcdi.h"
#include "mcdi_pcol.h"
#include "workarounds.h"
/**************************************************************************
@ -88,6 +91,21 @@ const char *const efx_reset_type_names[] = {
[RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
};
/* UDP tunnel type names */
static const char *const efx_udp_tunnel_type_names[] = {
[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
[TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE] = "geneve",
};
void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
{
if (type < ARRAY_SIZE(efx_udp_tunnel_type_names) &&
efx_udp_tunnel_type_names[type] != NULL)
snprintf(buf, buflen, "%s", efx_udp_tunnel_type_names[type]);
else
snprintf(buf, buflen, "type %d", type);
}
/* Reset workqueue. If any NIC has a hardware failure then a reset will be
* queued onto this work queue. This is not a per-nic work queue, because
* efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
@ -2336,6 +2354,52 @@ static int efx_vlan_rx_kill_vid(struct net_device *net_dev, __be16 proto, u16 vi
return -EOPNOTSUPP;
}
static int efx_udp_tunnel_type_map(enum udp_parsable_tunnel_type in)
{
switch (in) {
case UDP_TUNNEL_TYPE_VXLAN:
return TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN;
case UDP_TUNNEL_TYPE_GENEVE:
return TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE;
default:
return -1;
}
}
static void efx_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti)
{
struct efx_nic *efx = netdev_priv(dev);
struct efx_udp_tunnel tnl;
int efx_tunnel_type;
efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
if (efx_tunnel_type < 0)
return;
tnl.type = (u16)efx_tunnel_type;
tnl.port = ti->port;
if (efx->type->udp_tnl_add_port)
(void)efx->type->udp_tnl_add_port(efx, tnl);
}
static void efx_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti)
{
struct efx_nic *efx = netdev_priv(dev);
struct efx_udp_tunnel tnl;
int efx_tunnel_type;
efx_tunnel_type = efx_udp_tunnel_type_map(ti->type);
if (efx_tunnel_type < 0)
return;
tnl.type = (u16)efx_tunnel_type;
tnl.port = ti->port;
if (efx->type->udp_tnl_add_port)
(void)efx->type->udp_tnl_del_port(efx, tnl);
}
static const struct net_device_ops efx_netdev_ops = {
.ndo_open = efx_net_open,
.ndo_stop = efx_net_stop,
@ -2366,6 +2430,8 @@ static const struct net_device_ops efx_netdev_ops = {
#ifdef CONFIG_RFS_ACCEL
.ndo_rx_flow_steer = efx_filter_rfs,
#endif
.ndo_udp_tunnel_add = efx_udp_tunnel_add,
.ndo_udp_tunnel_del = efx_udp_tunnel_del,
};
static void efx_update_name(struct efx_nic *efx)
@ -2605,6 +2671,9 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
efx_start_all(efx);
if (efx->type->udp_tnl_push_ports)
efx->type->udp_tnl_push_ports(efx);
return 0;
fail:
@ -3136,6 +3205,51 @@ static int efx_pci_probe_main(struct efx_nic *efx)
return rc;
}
static int efx_pci_probe_post_io(struct efx_nic *efx)
{
struct net_device *net_dev = efx->net_dev;
int rc = efx_pci_probe_main(efx);
if (rc)
return rc;
if (efx->type->sriov_init) {
rc = efx->type->sriov_init(efx);
if (rc)
netif_err(efx, probe, efx->net_dev,
"SR-IOV can't be enabled rc %d\n", rc);
}
/* Determine netdevice features */
net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_RXCSUM);
if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
net_dev->features |= NETIF_F_TSO6;
/* Check whether device supports TSO */
if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
net_dev->features &= ~NETIF_F_ALL_TSO;
/* Mask for features that also apply to VLAN devices */
net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
NETIF_F_RXCSUM);
net_dev->hw_features = net_dev->features & ~efx->fixed_features;
/* Disable VLAN filtering by default. It may be enforced if
* the feature is fixed (i.e. VLAN filters are required to
* receive VLAN tagged packets due to vPort restrictions).
*/
net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
net_dev->features |= efx->fixed_features;
rc = efx_register_netdev(efx);
if (!rc)
return 0;
efx_pci_remove_main(efx);
return rc;
}
/* NIC initialisation
*
* This is called at module load (or hotplug insertion,
@ -3178,42 +3292,28 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
if (rc)
goto fail2;
rc = efx_pci_probe_main(efx);
rc = efx_pci_probe_post_io(efx);
if (rc) {
/* On failure, retry once immediately.
* If we aborted probe due to a scheduled reset, dismiss it.
*/
efx->reset_pending = 0;
rc = efx_pci_probe_post_io(efx);
if (rc) {
/* On another failure, retry once more
* after a 50-305ms delay.
*/
unsigned char r;
get_random_bytes(&r, 1);
msleep((unsigned int)r + 50);
efx->reset_pending = 0;
rc = efx_pci_probe_post_io(efx);
}
}
if (rc)
goto fail3;
net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
NETIF_F_TSO | NETIF_F_RXCSUM);
if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
net_dev->features |= NETIF_F_TSO6;
/* Check whether device supports TSO */
if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
net_dev->features &= ~NETIF_F_ALL_TSO;
/* Mask for features that also apply to VLAN devices */
net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
NETIF_F_RXCSUM);
net_dev->hw_features = net_dev->features & ~efx->fixed_features;
/* Disable VLAN filtering by default. It may be enforced if
* the feature is fixed (i.e. VLAN filters are required to
* receive VLAN tagged packets due to vPort restrictions).
*/
net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
net_dev->features |= efx->fixed_features;
rc = efx_register_netdev(efx);
if (rc)
goto fail4;
if (efx->type->sriov_init) {
rc = efx->type->sriov_init(efx);
if (rc)
netif_err(efx, probe, efx->net_dev,
"SR-IOV can't be enabled rc %d\n", rc);
}
netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
/* Try to create MTDs, but allow this to fail */
@ -3230,10 +3330,11 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
"PCIE error reporting unavailable (%d).\n",
rc);
if (efx->type->udp_tnl_push_ports)
efx->type->udp_tnl_push_ports(efx);
return 0;
fail4:
efx_pci_remove_main(efx);
fail3:
efx_fini_io(efx);
fail2:

View File

@ -77,6 +77,11 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),

View File

@ -128,7 +128,7 @@ fail:
return rc;
}
void efx_mcdi_fini(struct efx_nic *efx)
void efx_mcdi_detach(struct efx_nic *efx)
{
if (!efx->mcdi)
return;
@ -137,6 +137,12 @@ void efx_mcdi_fini(struct efx_nic *efx)
/* Relinquish the device (back to the BMC, if this is a LOM) */
efx_mcdi_drv_attach(efx, false, NULL);
}
void efx_mcdi_fini(struct efx_nic *efx)
{
if (!efx->mcdi)
return;
#ifdef CONFIG_SFC_MCDI_LOGGING
free_page((unsigned long)efx->mcdi->iface.logging_buffer);
@ -716,8 +722,11 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned int cmd,
if (cmd == MC_CMD_REBOOT && rc == -EIO) {
/* Don't reset if MC_CMD_REBOOT returns EIO */
} else if (rc == -EIO || rc == -EINTR) {
netif_err(efx, hw, efx->net_dev, "MC fatal error %d\n",
-rc);
netif_err(efx, hw, efx->net_dev, "MC reboot detected\n");
netif_dbg(efx, hw, efx->net_dev, "MC rebooted during command %d rc %d\n",
cmd, -rc);
if (efx->type->mcdi_reboot_detected)
efx->type->mcdi_reboot_detected(efx);
efx_schedule_reset(efx, RESET_TYPE_MC_FAILURE);
} else if (proxy_handle && (rc == -EPROTO) &&
efx_mcdi_get_proxy_handle(efx, hdr_len, data_len,

View File

@ -142,6 +142,7 @@ static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
#endif
int efx_mcdi_init(struct efx_nic *efx);
void efx_mcdi_detach(struct efx_nic *efx);
void efx_mcdi_fini(struct efx_nic *efx);
int efx_mcdi_rpc(struct efx_nic *efx, unsigned cmd, const efx_dword_t *inbuf,

View File

@ -11913,6 +11913,27 @@
#define MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_LBN 0
#define MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_RESETTING_WIDTH 1
/* TUNNEL_ENCAP_UDP_PORT_ENTRY structuredef */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_LEN 4
/* UDP port (the standard ports are named below but any port may be used) */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_OFST 0
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LEN 2
/* enum: the IANA allocated UDP port for VXLAN */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_VXLAN_UDP_PORT 0x12b5
/* enum: the IANA allocated UDP port for Geneve */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_IANA_GENEVE_UDP_PORT 0x17c1
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_LBN 0
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_UDP_PORT_WIDTH 16
/* tunnel encapsulation protocol (only those named below are supported) */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_OFST 2
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LEN 2
/* enum: VXLAN */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN 0x0
/* enum: Geneve */
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_GENEVE 0x1
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_LBN 16
#define TUNNEL_ENCAP_UDP_PORT_ENTRY_PROTOCOL_WIDTH 16
/***********************************/
/* MC_CMD_RX_BALANCING

View File

@ -307,6 +307,7 @@ struct efx_rx_buffer {
#define EFX_RX_PKT_DISCARD 0x0004
#define EFX_RX_PKT_TCP 0x0040
#define EFX_RX_PKT_PREFIX_LEN 0x0080 /* length is in prefix only */
#define EFX_RX_PKT_CSUM_LEVEL 0x0200
/**
* struct efx_rx_page_state - Page-based rx buffer state
@ -469,13 +470,18 @@ struct efx_channel {
u32 *rps_flow_id;
#endif
unsigned n_rx_tobe_disc;
unsigned n_rx_ip_hdr_chksum_err;
unsigned n_rx_tcp_udp_chksum_err;
unsigned n_rx_mcast_mismatch;
unsigned n_rx_frm_trunc;
unsigned n_rx_overlength;
unsigned n_skbuff_leaks;
unsigned int n_rx_tobe_disc;
unsigned int n_rx_ip_hdr_chksum_err;
unsigned int n_rx_tcp_udp_chksum_err;
unsigned int n_rx_outer_ip_hdr_chksum_err;
unsigned int n_rx_outer_tcp_udp_chksum_err;
unsigned int n_rx_inner_ip_hdr_chksum_err;
unsigned int n_rx_inner_tcp_udp_chksum_err;
unsigned int n_rx_eth_crc_err;
unsigned int n_rx_mcast_mismatch;
unsigned int n_rx_frm_trunc;
unsigned int n_rx_overlength;
unsigned int n_skbuff_leaks;
unsigned int n_rx_nodesc_trunc;
unsigned int n_rx_merge_events;
unsigned int n_rx_merge_packets;
@ -548,6 +554,8 @@ extern const unsigned int efx_reset_type_max;
#define RESET_TYPE(type) \
STRING_TABLE_LOOKUP(type, efx_reset_type)
void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen);
enum efx_int_mode {
/* Be careful if altering to correct macro below */
EFX_INT_MODE_MSIX = 0,
@ -987,6 +995,15 @@ struct efx_mtd_partition {
char name[IFNAMSIZ + 20];
};
struct efx_udp_tunnel {
u16 type; /* TUNNEL_ENCAP_UDP_PORT_ENTRY_foo, see mcdi_pcol.h */
__be16 port;
/* Count of repeated adds of the same port. Used only inside the list,
* not in request arguments.
*/
u16 count;
};
/**
* struct efx_nic_type - Efx device type definition
* @mem_bar: Get the memory BAR
@ -1107,6 +1124,10 @@ struct efx_mtd_partition {
* @set_mac_address: Set the MAC address of the device
* @tso_versions: Returns mask of firmware-assisted TSO versions supported.
* If %NULL, then device does not support any TSO version.
* @udp_tnl_push_ports: Push the list of UDP tunnel ports to the NIC if required.
* @udp_tnl_add_port: Add a UDP tunnel port
* @udp_tnl_has_port: Check if a port has been added as UDP tunnel
* @udp_tnl_del_port: Remove a UDP tunnel port
* @revision: Hardware architecture revision
* @txd_ptr_tbl_base: TX descriptor ring base address
* @rxd_ptr_tbl_base: RX descriptor ring base address
@ -1266,6 +1287,10 @@ struct efx_nic_type {
int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr);
int (*set_mac_address)(struct efx_nic *efx);
u32 (*tso_versions)(struct efx_nic *efx);
int (*udp_tnl_push_ports)(struct efx_nic *efx);
int (*udp_tnl_add_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port);
int (*udp_tnl_del_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
int revision;
unsigned int txd_ptr_tbl_base;

View File

@ -369,6 +369,10 @@ enum {
* @vport_mac: The MAC address on the vport, only for PFs; VFs will be zero
* @vlan_list: List of VLANs added over the interface. Serialised by vlan_lock.
* @vlan_lock: Lock to serialize access to vlan_list.
* @udp_tunnels: UDP tunnel port numbers and types.
* @udp_tunnels_dirty: flag indicating a reboot occurred while pushing
* @udp_tunnels to hardware and thus the push must be re-done.
* @udp_tunnels_lock: Serialises writes to @udp_tunnels and @udp_tunnels_dirty.
*/
struct efx_ef10_nic_data {
struct efx_buffer mcdi_buf;
@ -405,6 +409,9 @@ struct efx_ef10_nic_data {
u8 vport_mac[ETH_ALEN];
struct list_head vlan_list;
struct mutex vlan_lock;
struct efx_udp_tunnel udp_tunnels[16];
bool udp_tunnels_dirty;
struct mutex udp_tunnels_lock;
};
int efx_init_sriov(void);

View File

@ -434,6 +434,7 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
PKT_HASH_TYPE_L3);
skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
for (;;) {
skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
@ -621,8 +622,10 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
/* Set the SKB flags */
skb_checksum_none_assert(skb);
if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED))
if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
}
efx_rx_skb_attach_timestamp(channel, skb);

View File

@ -326,6 +326,7 @@ fail5:
efx_nic_free_buffer(efx, &efx->irq_status);
fail4:
fail3:
efx_mcdi_detach(efx);
efx_mcdi_fini(efx);
fail1:
kfree(efx->nic_data);
@ -450,6 +451,7 @@ static void siena_remove_nic(struct efx_nic *efx)
efx_mcdi_reset(efx, RESET_TYPE_ALL);
efx_mcdi_detach(efx);
efx_mcdi_fini(efx);
/* Tear down the private nic state */