Merge branch 'net-napi-addition-of-napi_defer_hard_irqs'
Eric Dumazet says: ==================== net: napi: addition of napi_defer_hard_irqs This patch series augments gro_glush_timeout feature with napi_defer_hard_irqs As extensively described in first patch changelog, this can suppresss the chit-chat traffic between NIC and host to signal interrupts and re-arming them, since this can be an issue on high speed NIC with many queues. The last patch in this series converts mlx4 TX completion to napi_complete_done(), to enable this new mechanism. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
4c532b144f
|
@ -946,7 +946,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
|
||||||
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
|
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
|
||||||
if (xdp_tx_cq->xdp_busy) {
|
if (xdp_tx_cq->xdp_busy) {
|
||||||
clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
|
clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
|
||||||
budget);
|
budget) < budget;
|
||||||
xdp_tx_cq->xdp_busy = !clean_complete;
|
xdp_tx_cq->xdp_busy = !clean_complete;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -382,8 +382,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
|
||||||
return cnt;
|
return cnt;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool mlx4_en_process_tx_cq(struct net_device *dev,
|
int mlx4_en_process_tx_cq(struct net_device *dev,
|
||||||
struct mlx4_en_cq *cq, int napi_budget)
|
struct mlx4_en_cq *cq, int napi_budget)
|
||||||
{
|
{
|
||||||
struct mlx4_en_priv *priv = netdev_priv(dev);
|
struct mlx4_en_priv *priv = netdev_priv(dev);
|
||||||
struct mlx4_cq *mcq = &cq->mcq;
|
struct mlx4_cq *mcq = &cq->mcq;
|
||||||
|
@ -405,7 +405,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
|
||||||
u32 ring_cons;
|
u32 ring_cons;
|
||||||
|
|
||||||
if (unlikely(!priv->port_up))
|
if (unlikely(!priv->port_up))
|
||||||
return true;
|
return 0;
|
||||||
|
|
||||||
netdev_txq_bql_complete_prefetchw(ring->tx_queue);
|
netdev_txq_bql_complete_prefetchw(ring->tx_queue);
|
||||||
|
|
||||||
|
@ -480,7 +480,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
|
||||||
WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
|
WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
|
||||||
|
|
||||||
if (cq->type == TX_XDP)
|
if (cq->type == TX_XDP)
|
||||||
return done < budget;
|
return done;
|
||||||
|
|
||||||
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
|
netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
|
||||||
|
|
||||||
|
@ -492,7 +492,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
|
||||||
ring->wake_queue++;
|
ring->wake_queue++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return done < budget;
|
return done;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mlx4_en_tx_irq(struct mlx4_cq *mcq)
|
void mlx4_en_tx_irq(struct mlx4_cq *mcq)
|
||||||
|
@ -512,14 +512,14 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
|
||||||
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
|
struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
|
||||||
struct net_device *dev = cq->dev;
|
struct net_device *dev = cq->dev;
|
||||||
struct mlx4_en_priv *priv = netdev_priv(dev);
|
struct mlx4_en_priv *priv = netdev_priv(dev);
|
||||||
bool clean_complete;
|
int work_done;
|
||||||
|
|
||||||
clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
|
work_done = mlx4_en_process_tx_cq(dev, cq, budget);
|
||||||
if (!clean_complete)
|
if (work_done >= budget)
|
||||||
return budget;
|
return budget;
|
||||||
|
|
||||||
napi_complete(napi);
|
if (napi_complete_done(napi, work_done))
|
||||||
mlx4_en_arm_cq(priv, cq);
|
mlx4_en_arm_cq(priv, cq);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -737,8 +737,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
|
||||||
int budget);
|
int budget);
|
||||||
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
|
int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
|
||||||
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
|
int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
|
||||||
bool mlx4_en_process_tx_cq(struct net_device *dev,
|
int mlx4_en_process_tx_cq(struct net_device *dev,
|
||||||
struct mlx4_en_cq *cq, int napi_budget);
|
struct mlx4_en_cq *cq, int napi_budget);
|
||||||
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
|
u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
|
||||||
struct mlx4_en_tx_ring *ring,
|
struct mlx4_en_tx_ring *ring,
|
||||||
int index, u64 timestamp,
|
int index, u64 timestamp,
|
||||||
|
|
|
@ -329,6 +329,7 @@ struct napi_struct {
|
||||||
|
|
||||||
unsigned long state;
|
unsigned long state;
|
||||||
int weight;
|
int weight;
|
||||||
|
int defer_hard_irqs_count;
|
||||||
unsigned long gro_bitmask;
|
unsigned long gro_bitmask;
|
||||||
int (*poll)(struct napi_struct *, int);
|
int (*poll)(struct napi_struct *, int);
|
||||||
#ifdef CONFIG_NETPOLL
|
#ifdef CONFIG_NETPOLL
|
||||||
|
@ -1995,6 +1996,7 @@ struct net_device {
|
||||||
|
|
||||||
struct bpf_prog __rcu *xdp_prog;
|
struct bpf_prog __rcu *xdp_prog;
|
||||||
unsigned long gro_flush_timeout;
|
unsigned long gro_flush_timeout;
|
||||||
|
int napi_defer_hard_irqs;
|
||||||
rx_handler_func_t __rcu *rx_handler;
|
rx_handler_func_t __rcu *rx_handler;
|
||||||
void __rcu *rx_handler_data;
|
void __rcu *rx_handler_data;
|
||||||
|
|
||||||
|
|
|
@ -6227,7 +6227,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
|
||||||
|
|
||||||
bool napi_complete_done(struct napi_struct *n, int work_done)
|
bool napi_complete_done(struct napi_struct *n, int work_done)
|
||||||
{
|
{
|
||||||
unsigned long flags, val, new;
|
unsigned long flags, val, new, timeout = 0;
|
||||||
|
bool ret = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 1) Don't let napi dequeue from the cpu poll list
|
* 1) Don't let napi dequeue from the cpu poll list
|
||||||
|
@ -6239,20 +6240,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
||||||
NAPIF_STATE_IN_BUSY_POLL)))
|
NAPIF_STATE_IN_BUSY_POLL)))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (work_done) {
|
||||||
|
if (n->gro_bitmask)
|
||||||
|
timeout = READ_ONCE(n->dev->gro_flush_timeout);
|
||||||
|
n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
|
||||||
|
}
|
||||||
|
if (n->defer_hard_irqs_count > 0) {
|
||||||
|
n->defer_hard_irqs_count--;
|
||||||
|
timeout = READ_ONCE(n->dev->gro_flush_timeout);
|
||||||
|
if (timeout)
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
if (n->gro_bitmask) {
|
if (n->gro_bitmask) {
|
||||||
unsigned long timeout = 0;
|
|
||||||
|
|
||||||
if (work_done)
|
|
||||||
timeout = n->dev->gro_flush_timeout;
|
|
||||||
|
|
||||||
/* When the NAPI instance uses a timeout and keeps postponing
|
/* When the NAPI instance uses a timeout and keeps postponing
|
||||||
* it, we need to bound somehow the time packets are kept in
|
* it, we need to bound somehow the time packets are kept in
|
||||||
* the GRO layer
|
* the GRO layer
|
||||||
*/
|
*/
|
||||||
napi_gro_flush(n, !!timeout);
|
napi_gro_flush(n, !!timeout);
|
||||||
if (timeout)
|
|
||||||
hrtimer_start(&n->timer, ns_to_ktime(timeout),
|
|
||||||
HRTIMER_MODE_REL_PINNED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gro_normal_list(n);
|
gro_normal_list(n);
|
||||||
|
@ -6284,7 +6288,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
if (timeout)
|
||||||
|
hrtimer_start(&n->timer, ns_to_ktime(timeout),
|
||||||
|
HRTIMER_MODE_REL_PINNED);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(napi_complete_done);
|
EXPORT_SYMBOL(napi_complete_done);
|
||||||
|
|
||||||
|
@ -6464,7 +6471,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
|
||||||
/* Note : we use a relaxed variant of napi_schedule_prep() not setting
|
/* Note : we use a relaxed variant of napi_schedule_prep() not setting
|
||||||
* NAPI_STATE_MISSED, since we do not react to a device IRQ.
|
* NAPI_STATE_MISSED, since we do not react to a device IRQ.
|
||||||
*/
|
*/
|
||||||
if (napi->gro_bitmask && !napi_disable_pending(napi) &&
|
if (!napi_disable_pending(napi) &&
|
||||||
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
|
!test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
|
||||||
__napi_schedule_irqoff(napi);
|
__napi_schedule_irqoff(napi);
|
||||||
|
|
||||||
|
|
|
@ -367,7 +367,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
|
||||||
|
|
||||||
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
|
static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
|
||||||
{
|
{
|
||||||
dev->gro_flush_timeout = val;
|
WRITE_ONCE(dev->gro_flush_timeout, val);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,6 +382,23 @@ static ssize_t gro_flush_timeout_store(struct device *dev,
|
||||||
}
|
}
|
||||||
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
|
NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
|
||||||
|
|
||||||
|
static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
|
||||||
|
{
|
||||||
|
WRITE_ONCE(dev->napi_defer_hard_irqs, val);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t napi_defer_hard_irqs_store(struct device *dev,
|
||||||
|
struct device_attribute *attr,
|
||||||
|
const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
if (!capable(CAP_NET_ADMIN))
|
||||||
|
return -EPERM;
|
||||||
|
|
||||||
|
return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
|
||||||
|
}
|
||||||
|
NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
|
||||||
|
|
||||||
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
|
static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
|
||||||
const char *buf, size_t len)
|
const char *buf, size_t len)
|
||||||
{
|
{
|
||||||
|
@ -545,6 +562,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
|
||||||
&dev_attr_flags.attr,
|
&dev_attr_flags.attr,
|
||||||
&dev_attr_tx_queue_len.attr,
|
&dev_attr_tx_queue_len.attr,
|
||||||
&dev_attr_gro_flush_timeout.attr,
|
&dev_attr_gro_flush_timeout.attr,
|
||||||
|
&dev_attr_napi_defer_hard_irqs.attr,
|
||||||
&dev_attr_phys_port_id.attr,
|
&dev_attr_phys_port_id.attr,
|
||||||
&dev_attr_phys_port_name.attr,
|
&dev_attr_phys_port_name.attr,
|
||||||
&dev_attr_phys_switch_id.attr,
|
&dev_attr_phys_switch_id.attr,
|
||||||
|
|
Loading…
Reference in New Issue