net: ixgbe: add support for tc_u32 offload

This adds initial support for offloading the u32 tc classifier. This
initial implementation only implements a few base matches and actions
to illustrate the use of the infrastructure patches.

However it is an interesting subset because it handles the u32 next
hdr logic to correctly map tcp packets from ip headers using the ihl
and protocol fields. After this is accepted we can extend the match
and action fields easily by updating the model header file.

Also only the drop action is supported initially.

Here is a short test script,

 #tc qdisc add dev eth4 ingress
 #tc filter add dev eth4 parent ffff: protocol ip \
	u32 ht 800: order 1 \
	match ip dst 15.0.0.1/32 match ip src 15.0.0.2/32 action drop

<-- hardware has dst/src ip match rule installed -->

 #tc filter del dev eth4 parent ffff: prio 49152
 #tc filter add dev eth4 parent ffff: protocol ip prio 99 \
	handle 1: u32 divisor 1
 #tc filter add dev eth4 protocol ip parent ffff: prio 99 \
	u32 ht 800: order 1 link 1: \
	offset at 0 mask 0f00 shift 6 plus 0 eat match ip protocol 6 ff
 #tc filter add dev eth4 parent ffff: protocol ip \
	u32 ht 1: order 3 match tcp src 23 ffff action drop

<-- hardware has tcp src port rule installed -->

 #tc qdisc del dev eth4 parent ffff:

<-- hardware cleaned up -->

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
John Fastabend 2016-02-16 21:18:53 -08:00 committed by David S. Miller
parent 9d35cf062e
commit b82b17d929
3 changed files with 213 additions and 30 deletions

View File

@ -796,6 +796,9 @@ struct ixgbe_adapter {
u8 default_up; u8 default_up;
unsigned long fwd_bitmask; /* Bitmask indicating in use pools */ unsigned long fwd_bitmask; /* Bitmask indicating in use pools */
#define IXGBE_MAX_LINK_HANDLE 10
struct ixgbe_mat_field *jump_tables[IXGBE_MAX_LINK_HANDLE];
/* maximum number of RETA entries among all devices supported by ixgbe /* maximum number of RETA entries among all devices supported by ixgbe
* driver: currently it's x550 device in non-SRIOV mode * driver: currently it's x550 device in non-SRIOV mode
*/ */
@ -925,6 +928,9 @@ s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw,
u16 soft_id); u16 soft_id);
void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input,
union ixgbe_atr_input *mask); union ixgbe_atr_input *mask);
int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
struct ixgbe_fdir_filter *input,
u16 sw_idx);
void ixgbe_set_rx_mode(struct net_device *netdev); void ixgbe_set_rx_mode(struct net_device *netdev);
#ifdef CONFIG_IXGBE_DCB #ifdef CONFIG_IXGBE_DCB
void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter);

View File

@ -2520,9 +2520,9 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
return ret; return ret;
} }
static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter,
struct ixgbe_fdir_filter *input, struct ixgbe_fdir_filter *input,
u16 sw_idx) u16 sw_idx)
{ {
struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_hw *hw = &adapter->hw;
struct hlist_node *node2; struct hlist_node *node2;

View File

@ -51,6 +51,8 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <scsi/fc/fc_fcoe.h> #include <scsi/fc/fc_fcoe.h>
#include <net/vxlan.h> #include <net/vxlan.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_gact.h>
#ifdef CONFIG_OF #ifdef CONFIG_OF
#include <linux/of_net.h> #include <linux/of_net.h>
@ -65,6 +67,7 @@
#include "ixgbe_common.h" #include "ixgbe_common.h"
#include "ixgbe_dcb_82599.h" #include "ixgbe_dcb_82599.h"
#include "ixgbe_sriov.h" #include "ixgbe_sriov.h"
#include "ixgbe_model.h"
char ixgbe_driver_name[] = "ixgbe"; char ixgbe_driver_name[] = "ixgbe";
static const char ixgbe_driver_string[] = static const char ixgbe_driver_string[] =
@ -5545,6 +5548,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
#endif /* CONFIG_IXGBE_DCB */ #endif /* CONFIG_IXGBE_DCB */
#endif /* IXGBE_FCOE */ #endif /* IXGBE_FCOE */
/* initialize static ixgbe jump table entries */
adapter->jump_tables[0] = ixgbe_ipv4_fields;
adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) *
hw->mac.num_rar_entries, hw->mac.num_rar_entries,
GFP_ATOMIC); GFP_ATOMIC);
@ -8200,10 +8206,191 @@ int ixgbe_setup_tc(struct net_device *dev, u8 tc)
return 0; return 0;
} }
static int ixgbe_delete_clsu32(struct ixgbe_adapter *adapter,
struct tc_cls_u32_offload *cls)
{
int err;
spin_lock(&adapter->fdir_perfect_lock);
err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, cls->knode.handle);
spin_unlock(&adapter->fdir_perfect_lock);
return err;
}
static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
__be16 protocol,
struct tc_cls_u32_offload *cls)
{
u32 loc = cls->knode.handle & 0xfffff;
struct ixgbe_hw *hw = &adapter->hw;
struct ixgbe_mat_field *field_ptr;
struct ixgbe_fdir_filter *input;
union ixgbe_atr_input mask;
#ifdef CONFIG_NET_CLS_ACT
const struct tc_action *a;
#endif
int i, err = 0;
u8 queue;
u32 handle;
memset(&mask, 0, sizeof(union ixgbe_atr_input));
handle = cls->knode.handle;
/* At the moment cls_u32 jumps to transport layer and skips past
* L2 headers. The canonical method to match L2 frames is to use
* negative values. However this is error prone at best but really
* just broken because there is no way to "know" what sort of hdr
* is in front of the transport layer. Fix cls_u32 to support L2
* headers when needed.
*/
if (protocol != htons(ETH_P_IP))
return -EINVAL;
if (cls->knode.link_handle ||
cls->knode.link_handle >= IXGBE_MAX_LINK_HANDLE) {
struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
u32 uhtid = TC_U32_USERHTID(cls->knode.link_handle);
for (i = 0; nexthdr[i].jump; i++) {
if (nexthdr->o != cls->knode.sel->offoff ||
nexthdr->s != cls->knode.sel->offshift ||
nexthdr->m != cls->knode.sel->offmask ||
/* do not support multiple key jumps its just mad */
cls->knode.sel->nkeys > 1)
return -EINVAL;
if (nexthdr->off != cls->knode.sel->keys[0].off ||
nexthdr->val != cls->knode.sel->keys[0].val ||
nexthdr->mask != cls->knode.sel->keys[0].mask)
return -EINVAL;
if (uhtid >= IXGBE_MAX_LINK_HANDLE)
return -EINVAL;
adapter->jump_tables[uhtid] = nexthdr->jump;
}
return 0;
}
if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) {
e_err(drv, "Location out of range\n");
return -EINVAL;
}
/* cls u32 is a graph starting at root node 0x800. The driver tracks
* links and also the fields used to advance the parser across each
* link (e.g. nexthdr/eat parameters from 'tc'). This way we can map
* the u32 graph onto the hardware parse graph denoted in ixgbe_model.h
* To add support for new nodes update ixgbe_model.h parse structures
* this function _should_ be generic try not to hardcode values here.
*/
if (TC_U32_USERHTID(handle) == 0x800) {
field_ptr = adapter->jump_tables[0];
} else {
if (TC_U32_USERHTID(handle) >= ARRAY_SIZE(adapter->jump_tables))
return -EINVAL;
field_ptr = adapter->jump_tables[TC_U32_USERHTID(handle)];
}
if (!field_ptr)
return -EINVAL;
input = kzalloc(sizeof(*input), GFP_KERNEL);
if (!input)
return -ENOMEM;
for (i = 0; i < cls->knode.sel->nkeys; i++) {
int off = cls->knode.sel->keys[i].off;
__be32 val = cls->knode.sel->keys[i].val;
__be32 m = cls->knode.sel->keys[i].mask;
bool found_entry = false;
int j;
for (j = 0; field_ptr[j].val; j++) {
if (field_ptr[j].off == off &&
field_ptr[j].mask == m) {
field_ptr[j].val(input, &mask, val, m);
input->filter.formatted.flow_type |=
field_ptr[j].type;
found_entry = true;
break;
}
}
if (!found_entry)
goto err_out;
}
mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK |
IXGBE_ATR_L4TYPE_MASK;
if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4)
mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK;
#ifdef CONFIG_NET_CLS_ACT
if (list_empty(&cls->knode.exts->actions))
goto err_out;
list_for_each_entry(a, &cls->knode.exts->actions, list) {
if (!is_tcf_gact_shot(a))
goto err_out;
}
#endif
input->action = IXGBE_FDIR_DROP_QUEUE;
queue = IXGBE_FDIR_DROP_QUEUE;
input->sw_idx = loc;
spin_lock(&adapter->fdir_perfect_lock);
if (hlist_empty(&adapter->fdir_filter_list)) {
memcpy(&adapter->fdir_mask, &mask, sizeof(mask));
err = ixgbe_fdir_set_input_mask_82599(hw, &mask);
if (err)
goto err_out_w_lock;
} else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) {
err = -EINVAL;
goto err_out_w_lock;
}
ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask);
err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
input->sw_idx, queue);
if (!err)
ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
spin_unlock(&adapter->fdir_perfect_lock);
return err;
err_out_w_lock:
spin_unlock(&adapter->fdir_perfect_lock);
err_out:
kfree(input);
return -EINVAL;
}
int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto, int __ixgbe_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
struct tc_to_netdev *tc) struct tc_to_netdev *tc)
{ {
/* Only support egress tc setup for now */ struct ixgbe_adapter *adapter = netdev_priv(dev);
if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
tc->type == TC_SETUP_CLSU32) {
if (!(dev->features & NETIF_F_HW_TC))
return -EINVAL;
switch (tc->cls_u32->command) {
case TC_CLSU32_NEW_KNODE:
case TC_CLSU32_REPLACE_KNODE:
return ixgbe_configure_clsu32(adapter,
proto, tc->cls_u32);
case TC_CLSU32_DELETE_KNODE:
return ixgbe_delete_clsu32(adapter, tc->cls_u32);
default:
return -EINVAL;
}
}
if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO) if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
return -EINVAL; return -EINVAL;
@ -8272,19 +8459,17 @@ static int ixgbe_set_features(struct net_device *netdev,
} }
/* /*
* Check if Flow Director n-tuple support was enabled or disabled. If * Check if Flow Director n-tuple support or hw_tc support was
* the state changed, we need to reset. * enabled or disabled. If the state changed, we need to reset.
*/ */
switch (features & NETIF_F_NTUPLE) { if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) {
case NETIF_F_NTUPLE:
/* turn off ATR, enable perfect filters and reset */ /* turn off ATR, enable perfect filters and reset */
if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
need_reset = true; need_reset = true;
adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
break; } else {
default:
/* turn off perfect filters, enable ATR and reset */ /* turn off perfect filters, enable ATR and reset */
if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
need_reset = true; need_reset = true;
@ -8292,23 +8477,16 @@ static int ixgbe_set_features(struct net_device *netdev,
adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
/* We cannot enable ATR if SR-IOV is enabled */ /* We cannot enable ATR if SR-IOV is enabled */
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED ||
break; /* We cannot enable ATR if we have 2 or more tcs */
(netdev_get_num_tc(netdev) > 1) ||
/* We cannot enable ATR if we have 2 or more traffic classes */ /* We cannot enable ATR if RSS is disabled */
if (netdev_get_num_tc(netdev) > 1) (adapter->ring_feature[RING_F_RSS].limit <= 1) ||
break; /* A sample rate of 0 indicates ATR disabled */
(!adapter->atr_sample_rate))
/* We cannot enable ATR if RSS is disabled */ ; /* do nothing not supported */
if (adapter->ring_feature[RING_F_RSS].limit <= 1) else /* otherwise supported and set the flag */
break; adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
/* A sample rate of 0 indicates ATR disabled */
if (!adapter->atr_sample_rate)
break;
adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
break;
} }
if (features & NETIF_F_HW_VLAN_CTAG_RX) if (features & NETIF_F_HW_VLAN_CTAG_RX)
@ -8667,9 +8845,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
.ndo_set_vf_trust = ixgbe_ndo_set_vf_trust, .ndo_set_vf_trust = ixgbe_ndo_set_vf_trust,
.ndo_get_vf_config = ixgbe_ndo_get_vf_config, .ndo_get_vf_config = ixgbe_ndo_get_vf_config,
.ndo_get_stats64 = ixgbe_get_stats64, .ndo_get_stats64 = ixgbe_get_stats64,
#ifdef CONFIG_IXGBE_DCB
.ndo_setup_tc = __ixgbe_setup_tc, .ndo_setup_tc = __ixgbe_setup_tc,
#endif
#ifdef CONFIG_NET_POLL_CONTROLLER #ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = ixgbe_netpoll, .ndo_poll_controller = ixgbe_netpoll,
#endif #endif
@ -9040,7 +9216,8 @@ skip_sriov:
case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_x:
netdev->features |= NETIF_F_SCTP_CRC; netdev->features |= NETIF_F_SCTP_CRC;
netdev->hw_features |= NETIF_F_SCTP_CRC | netdev->hw_features |= NETIF_F_SCTP_CRC |
NETIF_F_NTUPLE; NETIF_F_NTUPLE |
NETIF_F_HW_TC;
break; break;
default: default:
break; break;