OpenCloudOS-Kernel/drivers/net/ntb_netdev.c

504 lines
12 KiB
C
Raw Normal View History

/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2012 Intel Corporation. All rights reserved.
* Copyright (C) 2015 EMC Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* BSD LICENSE
*
* Copyright(c) 2012 Intel Corporation. All rights reserved.
* Copyright (C) 2015 EMC Corporation. All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copy
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* PCIe NTB Network Linux driver
*
* Contact Information:
* Jon Mason <jon.mason@intel.com>
*/
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/ntb.h>
#include <linux/ntb_transport.h>
#define NTB_NETDEV_VER "0.7"
MODULE_DESCRIPTION(KBUILD_MODNAME);
MODULE_VERSION(NTB_NETDEV_VER);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Intel Corporation");
/* Time in usecs for tx resource reaper */
static unsigned int tx_time = 1;
/* Number of descriptors to free before resuming tx */
static unsigned int tx_start = 10;
/* Number of descriptors still available before stop upper layer tx */
static unsigned int tx_stop = 5;
struct ntb_netdev {
struct pci_dev *pdev;
struct net_device *ndev;
struct ntb_transport_qp *qp;
struct timer_list tx_timer;
};
#define NTB_TX_TIMEOUT_MS 1000
#define NTB_RXQ_SIZE 100
static void ntb_netdev_event_handler(void *data, int link_is_up)
{
struct net_device *ndev = data;
struct ntb_netdev *dev = netdev_priv(ndev);
netdev_dbg(ndev, "Event %x, Link %x\n", link_is_up,
ntb_transport_link_query(dev->qp));
if (link_is_up) {
if (ntb_transport_link_query(dev->qp))
netif_carrier_on(ndev);
} else {
netif_carrier_off(ndev);
}
}
static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
struct net_device *ndev = qp_data;
struct sk_buff *skb;
int rc;
skb = data;
if (!skb)
return;
netdev_dbg(ndev, "%s: %d byte payload received\n", __func__, len);
if (len < 0) {
ndev->stats.rx_errors++;
ndev->stats.rx_length_errors++;
goto enqueue_again;
}
skb_put(skb, len);
skb->protocol = eth_type_trans(skb, ndev);
skb->ip_summed = CHECKSUM_NONE;
net: dev: Makes sure netif_rx() can be invoked in any context. Dave suggested a while ago (eleven years by now) "Let's make netif_rx() work in all contexts and get rid of netif_rx_ni()". Eric agreed and pointed out that modern devices should use netif_receive_skb() to avoid the overhead. In the meantime someone added another variant, netif_rx_any_context(), which behaves as suggested. netif_rx() must be invoked with disabled bottom halves to ensure that pending softirqs, which were raised within the function, are handled. netif_rx_ni() can be invoked only from process context (bottom halves must be enabled) because the function handles pending softirqs without checking if bottom halves were disabled or not. netif_rx_any_context() invokes on the former functions by checking in_interrupts(). netif_rx() could be taught to handle both cases (disabled and enabled bottom halves) by simply disabling bottom halves while invoking netif_rx_internal(). The local_bh_enable() invocation will then invoke pending softirqs only if the BH-disable counter drops to zero. Eric is concerned about the overhead of BH-disable+enable especially in regard to the loopback driver. As critical as this driver is, it will receive a shortcut to avoid the additional overhead which is not needed. Add a local_bh_disable() section in netif_rx() to ensure softirqs are handled if needed. Provide __netif_rx() which does not disable BH and has a lockdep assert to ensure that interrupts are disabled. Use this shortcut in the loopback driver and in drivers/net/*.c. Make netif_rx_ni() and netif_rx_any_context() invoke netif_rx() so they can be removed once they are no more users left. Link: https://lkml.kernel.org/r/20100415.020246.218622820.davem@davemloft.net Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2022-02-12 07:38:38 +08:00
if (__netif_rx(skb) == NET_RX_DROP) {
ndev->stats.rx_errors++;
ndev->stats.rx_dropped++;
} else {
ndev->stats.rx_packets++;
ndev->stats.rx_bytes += len;
}
skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
if (!skb) {
ndev->stats.rx_errors++;
ndev->stats.rx_frame_errors++;
return;
}
enqueue_again:
rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN);
if (rc) {
ntb_netdev: Use dev_kfree_skb_any() in interrupt context TX/RX callback handlers (ntb_netdev_tx_handler(), ntb_netdev_rx_handler()) can be called in interrupt context via the DMA framework when the respective DMA operations have completed. As such, any calls by these routines to free skb's, should use the interrupt context safe dev_kfree_skb_any() function. Previously, these callback handlers would call the interrupt unsafe version of dev_kfree_skb(). This has not presented an issue on Intel IOAT DMA engines as that driver utilizes tasklets rather than a hard interrupt handler, like the AMD PTDMA DMA driver. On AMD systems, a kernel WARNING message is encountered, which is being issued from skb_release_head_state() due to in_hardirq() being true. Besides the user visible WARNING from the kernel, the other symptom of this bug was that TCP/IP performance across the ntb_netdev interface was very poor, i.e. approximately an order of magnitude below what was expected. With the repair to use dev_kfree_skb_any(), kernel WARNINGs from skb_release_head_state() ceased and TCP/IP performance, as measured by iperf, was on par with expected results, approximately 20 Gb/s on AMD Milan based server. Note that this performance is comparable with Intel based servers. Fixes: 765ccc7bc3d91 ("ntb_netdev: correct skb leak") Fixes: 548c237c0a997 ("net: Add support for NTB virtual ethernet device") Signed-off-by: Eric Pilmore <epilmore@gigaio.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/20221209000659.8318-1-epilmore@gigaio.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-12-09 08:06:59 +08:00
dev_kfree_skb_any(skb);
ndev->stats.rx_errors++;
ndev->stats.rx_fifo_errors++;
}
}
static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
struct ntb_transport_qp *qp, int size)
{
struct ntb_netdev *dev = netdev_priv(netdev);
netif_stop_queue(netdev);
/* Make sure to see the latest value of ntb_transport_tx_free_entry()
* since the queue was last started.
*/
smp_mb();
if (likely(ntb_transport_tx_free_entry(qp) < size)) {
mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
return -EBUSY;
}
netif_start_queue(netdev);
return 0;
}
static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
struct ntb_transport_qp *qp, int size)
{
if (netif_queue_stopped(ndev) ||
(ntb_transport_tx_free_entry(qp) >= size))
return 0;
return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
}
static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
void *data, int len)
{
struct net_device *ndev = qp_data;
struct sk_buff *skb;
struct ntb_netdev *dev = netdev_priv(ndev);
skb = data;
if (!skb || !ndev)
return;
if (len > 0) {
ndev->stats.tx_packets++;
ndev->stats.tx_bytes += skb->len;
} else {
ndev->stats.tx_errors++;
ndev->stats.tx_aborted_errors++;
}
ntb_netdev: Use dev_kfree_skb_any() in interrupt context TX/RX callback handlers (ntb_netdev_tx_handler(), ntb_netdev_rx_handler()) can be called in interrupt context via the DMA framework when the respective DMA operations have completed. As such, any calls by these routines to free skb's, should use the interrupt context safe dev_kfree_skb_any() function. Previously, these callback handlers would call the interrupt unsafe version of dev_kfree_skb(). This has not presented an issue on Intel IOAT DMA engines as that driver utilizes tasklets rather than a hard interrupt handler, like the AMD PTDMA DMA driver. On AMD systems, a kernel WARNING message is encountered, which is being issued from skb_release_head_state() due to in_hardirq() being true. Besides the user visible WARNING from the kernel, the other symptom of this bug was that TCP/IP performance across the ntb_netdev interface was very poor, i.e. approximately an order of magnitude below what was expected. With the repair to use dev_kfree_skb_any(), kernel WARNINGs from skb_release_head_state() ceased and TCP/IP performance, as measured by iperf, was on par with expected results, approximately 20 Gb/s on AMD Milan based server. Note that this performance is comparable with Intel based servers. Fixes: 765ccc7bc3d91 ("ntb_netdev: correct skb leak") Fixes: 548c237c0a997 ("net: Add support for NTB virtual ethernet device") Signed-off-by: Eric Pilmore <epilmore@gigaio.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/20221209000659.8318-1-epilmore@gigaio.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-12-09 08:06:59 +08:00
dev_kfree_skb_any(skb);
if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
/* Make sure anybody stopping the queue after this sees the new
* value of ntb_transport_tx_free_entry()
*/
smp_mb();
if (netif_queue_stopped(ndev))
netif_wake_queue(ndev);
}
}
static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
int rc;
ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
if (rc)
goto err;
/* check for next submit */
ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
return NETDEV_TX_OK;
err:
ndev->stats.tx_dropped++;
ndev->stats.tx_errors++;
return NETDEV_TX_BUSY;
}
static void ntb_netdev_tx_timer(struct timer_list *t)
{
struct ntb_netdev *dev = from_timer(dev, t, tx_timer);
struct net_device *ndev = dev->ndev;
if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
} else {
/* Make sure anybody stopping the queue after this sees the new
* value of ntb_transport_tx_free_entry()
*/
smp_mb();
if (netif_queue_stopped(ndev))
netif_wake_queue(ndev);
}
}
static int ntb_netdev_open(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
struct sk_buff *skb;
int rc, i, len;
/* Add some empty rx bufs */
for (i = 0; i < NTB_RXQ_SIZE; i++) {
skb = netdev_alloc_skb(ndev, ndev->mtu + ETH_HLEN);
if (!skb) {
rc = -ENOMEM;
goto err;
}
rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
ndev->mtu + ETH_HLEN);
if (rc) {
dev_kfree_skb(skb);
goto err;
}
}
timer_setup(&dev->tx_timer, ntb_netdev_tx_timer, 0);
netif_carrier_off(ndev);
ntb_transport_link_up(dev->qp);
netif_start_queue(ndev);
return 0;
err:
while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
dev_kfree_skb(skb);
return rc;
}
static int ntb_netdev_close(struct net_device *ndev)
{
struct ntb_netdev *dev = netdev_priv(ndev);
struct sk_buff *skb;
int len;
ntb_transport_link_down(dev->qp);
while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
dev_kfree_skb(skb);
del_timer_sync(&dev->tx_timer);
return 0;
}
static int ntb_netdev_change_mtu(struct net_device *ndev, int new_mtu)
{
struct ntb_netdev *dev = netdev_priv(ndev);
struct sk_buff *skb;
int len, rc;
if (new_mtu > ntb_transport_max_size(dev->qp) - ETH_HLEN)
return -EINVAL;
if (!netif_running(ndev)) {
ndev->mtu = new_mtu;
return 0;
}
/* Bring down the link and dispose of posted rx entries */
ntb_transport_link_down(dev->qp);
if (ndev->mtu < new_mtu) {
int i;
for (i = 0; (skb = ntb_transport_rx_remove(dev->qp, &len)); i++)
dev_kfree_skb(skb);
for (; i; i--) {
skb = netdev_alloc_skb(ndev, new_mtu + ETH_HLEN);
if (!skb) {
rc = -ENOMEM;
goto err;
}
rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data,
new_mtu + ETH_HLEN);
if (rc) {
dev_kfree_skb(skb);
goto err;
}
}
}
ndev->mtu = new_mtu;
ntb_transport_link_up(dev->qp);
return 0;
err:
ntb_transport_link_down(dev->qp);
while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
dev_kfree_skb(skb);
netdev_err(ndev, "Error changing MTU, device inoperable\n");
return rc;
}
static const struct net_device_ops ntb_netdev_ops = {
.ndo_open = ntb_netdev_open,
.ndo_stop = ntb_netdev_close,
.ndo_start_xmit = ntb_netdev_start_xmit,
.ndo_change_mtu = ntb_netdev_change_mtu,
.ndo_set_mac_address = eth_mac_addr,
};
static void ntb_get_drvinfo(struct net_device *ndev,
struct ethtool_drvinfo *info)
{
struct ntb_netdev *dev = netdev_priv(ndev);
strscpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
strscpy(info->version, NTB_NETDEV_VER, sizeof(info->version));
strscpy(info->bus_info, pci_name(dev->pdev), sizeof(info->bus_info));
}
static int ntb_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *cmd)
{
ethtool_link_ksettings_zero_link_mode(cmd, supported);
ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane);
ethtool_link_ksettings_zero_link_mode(cmd, advertising);
ethtool_link_ksettings_add_link_mode(cmd, advertising, Backplane);
cmd->base.speed = SPEED_UNKNOWN;
cmd->base.duplex = DUPLEX_FULL;
cmd->base.port = PORT_OTHER;
cmd->base.phy_address = 0;
cmd->base.autoneg = AUTONEG_ENABLE;
return 0;
}
static const struct ethtool_ops ntb_ethtool_ops = {
.get_drvinfo = ntb_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_link_ksettings = ntb_get_link_ksettings,
};
static const struct ntb_queue_handlers ntb_netdev_handlers = {
.tx_handler = ntb_netdev_tx_handler,
.rx_handler = ntb_netdev_rx_handler,
.event_handler = ntb_netdev_event_handler,
};
static int ntb_netdev_probe(struct device *client_dev)
{
struct ntb_dev *ntb;
struct net_device *ndev;
struct pci_dev *pdev;
struct ntb_netdev *dev;
int rc;
ntb = dev_ntb(client_dev->parent);
pdev = ntb->pdev;
if (!pdev)
return -ENODEV;
ndev = alloc_etherdev(sizeof(*dev));
if (!ndev)
return -ENOMEM;
SET_NETDEV_DEV(ndev, client_dev);
dev = netdev_priv(ndev);
dev->ndev = ndev;
dev->pdev = pdev;
ndev->features = NETIF_F_HIGHDMA;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ndev->hw_features = ndev->features;
ndev->watchdog_timeo = msecs_to_jiffies(NTB_TX_TIMEOUT_MS);
eth_random_addr(ndev->perm_addr);
dev_addr_set(ndev, ndev->perm_addr);
ndev->netdev_ops = &ntb_netdev_ops;
ndev->ethtool_ops = &ntb_ethtool_ops;
net: use core MTU range checking in core net infra geneve: - Merge __geneve_change_mtu back into geneve_change_mtu, set max_mtu - This one isn't quite as straight-forward as others, could use some closer inspection and testing macvlan: - set min/max_mtu tun: - set min/max_mtu, remove tun_net_change_mtu vxlan: - Merge __vxlan_change_mtu back into vxlan_change_mtu - Set max_mtu to IP_MAX_MTU and retain dynamic MTU range checks in change_mtu function - This one is also not as straight-forward and could use closer inspection and testing from vxlan folks bridge: - set max_mtu of IP_MAX_MTU and retain dynamic MTU range checks in change_mtu function openvswitch: - set min/max_mtu, remove internal_dev_change_mtu - note: max_mtu wasn't checked previously, it's been set to 65535, which is the largest possible size supported sch_teql: - set min/max_mtu (note: max_mtu previously unchecked, used max of 65535) macsec: - min_mtu = 0, max_mtu = 65535 macvlan: - min_mtu = 0, max_mtu = 65535 ntb_netdev: - min_mtu = 0, max_mtu = 65535 veth: - min_mtu = 68, max_mtu = 65535 8021q: - min_mtu = 0, max_mtu = 65535 CC: netdev@vger.kernel.org CC: Nicolas Dichtel <nicolas.dichtel@6wind.com> CC: Hannes Frederic Sowa <hannes@stressinduktion.org> CC: Tom Herbert <tom@herbertland.com> CC: Daniel Borkmann <daniel@iogearbox.net> CC: Alexander Duyck <alexander.h.duyck@intel.com> CC: Paolo Abeni <pabeni@redhat.com> CC: Jiri Benc <jbenc@redhat.com> CC: WANG Cong <xiyou.wangcong@gmail.com> CC: Roopa Prabhu <roopa@cumulusnetworks.com> CC: Pravin B Shelar <pshelar@ovn.org> CC: Sabrina Dubroca <sd@queasysnail.net> CC: Patrick McHardy <kaber@trash.net> CC: Stephen Hemminger <stephen@networkplumber.org> CC: Pravin Shelar <pshelar@nicira.com> CC: Maxim Krasnyansky <maxk@qti.qualcomm.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-10-21 01:55:20 +08:00
ndev->min_mtu = 0;
ndev->max_mtu = ETH_MAX_MTU;
dev->qp = ntb_transport_create_queue(ndev, client_dev,
&ntb_netdev_handlers);
if (!dev->qp) {
rc = -EIO;
goto err;
}
ndev->mtu = ntb_transport_max_size(dev->qp) - ETH_HLEN;
rc = register_netdev(ndev);
if (rc)
goto err1;
dev_set_drvdata(client_dev, ndev);
dev_info(&pdev->dev, "%s created\n", ndev->name);
return 0;
err1:
ntb_transport_free_queue(dev->qp);
err:
free_netdev(ndev);
return rc;
}
static void ntb_netdev_remove(struct device *client_dev)
{
struct net_device *ndev = dev_get_drvdata(client_dev);
struct ntb_netdev *dev = netdev_priv(ndev);
unregister_netdev(ndev);
ntb_transport_free_queue(dev->qp);
free_netdev(ndev);
}
static struct ntb_transport_client ntb_netdev_client = {
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.probe = ntb_netdev_probe,
.remove = ntb_netdev_remove,
};
static int __init ntb_netdev_init_module(void)
{
int rc;
rc = ntb_transport_register_client_dev(KBUILD_MODNAME);
if (rc)
return rc;
rc = ntb_transport_register_client(&ntb_netdev_client);
if (rc) {
ntb_transport_unregister_client_dev(KBUILD_MODNAME);
return rc;
}
return 0;
}
late_initcall(ntb_netdev_init_module);
static void __exit ntb_netdev_exit_module(void)
{
ntb_transport_unregister_client(&ntb_netdev_client);
ntb_transport_unregister_client_dev(KBUILD_MODNAME);
}
module_exit(ntb_netdev_exit_module);