net: stmmac: Fix NAPI poll in TX path when in multi-queue
Commit 8fce333170
introduced the concept of NAPI per-channel and
independent cleaning of TX path.
This is currently breaking performance in some cases. The scenario
happens when all packets are being received in Queue 0 but the TX is
performed in Queue != 0.
Fix this by using different NAPI instances per each TX and RX queue, as
suggested by Florian.
Changes from v2:
- Only force restart transmission if there are pending packets
Changes from v1:
- Pass entire ring size to TX clean path (Florian)
Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
d0e698d57a
commit
4ccb45857c
|
@ -79,11 +79,10 @@ struct stmmac_rx_queue {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct stmmac_channel {
|
struct stmmac_channel {
|
||||||
struct napi_struct napi ____cacheline_aligned_in_smp;
|
struct napi_struct rx_napi ____cacheline_aligned_in_smp;
|
||||||
|
struct napi_struct tx_napi ____cacheline_aligned_in_smp;
|
||||||
struct stmmac_priv *priv_data;
|
struct stmmac_priv *priv_data;
|
||||||
u32 index;
|
u32 index;
|
||||||
int has_rx;
|
|
||||||
int has_tx;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct stmmac_tc_entry {
|
struct stmmac_tc_entry {
|
||||||
|
|
|
@ -155,7 +155,10 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
|
||||||
for (queue = 0; queue < maxq; queue++) {
|
for (queue = 0; queue < maxq; queue++) {
|
||||||
struct stmmac_channel *ch = &priv->channel[queue];
|
struct stmmac_channel *ch = &priv->channel[queue];
|
||||||
|
|
||||||
napi_disable(&ch->napi);
|
if (queue < rx_queues_cnt)
|
||||||
|
napi_disable(&ch->rx_napi);
|
||||||
|
if (queue < tx_queues_cnt)
|
||||||
|
napi_disable(&ch->tx_napi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,7 +176,10 @@ static void stmmac_enable_all_queues(struct stmmac_priv *priv)
|
||||||
for (queue = 0; queue < maxq; queue++) {
|
for (queue = 0; queue < maxq; queue++) {
|
||||||
struct stmmac_channel *ch = &priv->channel[queue];
|
struct stmmac_channel *ch = &priv->channel[queue];
|
||||||
|
|
||||||
napi_enable(&ch->napi);
|
if (queue < rx_queues_cnt)
|
||||||
|
napi_enable(&ch->rx_napi);
|
||||||
|
if (queue < tx_queues_cnt)
|
||||||
|
napi_enable(&ch->tx_napi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1955,6 +1961,10 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
|
||||||
mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
|
mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* We still have pending packets, let's call for a new scheduling */
|
||||||
|
if (tx_q->dirty_tx != tx_q->cur_tx)
|
||||||
|
mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
|
||||||
|
|
||||||
__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
|
__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
|
||||||
|
|
||||||
return count;
|
return count;
|
||||||
|
@ -2045,23 +2055,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
|
||||||
int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
|
int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
|
||||||
&priv->xstats, chan);
|
&priv->xstats, chan);
|
||||||
struct stmmac_channel *ch = &priv->channel[chan];
|
struct stmmac_channel *ch = &priv->channel[chan];
|
||||||
bool needs_work = false;
|
|
||||||
|
|
||||||
if ((status & handle_rx) && ch->has_rx) {
|
if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
|
||||||
needs_work = true;
|
|
||||||
} else {
|
|
||||||
status &= ~handle_rx;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((status & handle_tx) && ch->has_tx) {
|
|
||||||
needs_work = true;
|
|
||||||
} else {
|
|
||||||
status &= ~handle_tx;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (needs_work && napi_schedule_prep(&ch->napi)) {
|
|
||||||
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
|
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
|
||||||
__napi_schedule(&ch->napi);
|
napi_schedule_irqoff(&ch->rx_napi);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
|
||||||
|
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
|
||||||
|
napi_schedule_irqoff(&ch->tx_napi);
|
||||||
}
|
}
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
|
@ -2257,8 +2259,14 @@ static void stmmac_tx_timer(struct timer_list *t)
|
||||||
|
|
||||||
ch = &priv->channel[tx_q->queue_index];
|
ch = &priv->channel[tx_q->queue_index];
|
||||||
|
|
||||||
if (likely(napi_schedule_prep(&ch->napi)))
|
/*
|
||||||
__napi_schedule(&ch->napi);
|
* If NAPI is already running we can miss some events. Let's rearm
|
||||||
|
* the timer and try again.
|
||||||
|
*/
|
||||||
|
if (likely(napi_schedule_prep(&ch->tx_napi)))
|
||||||
|
__napi_schedule(&ch->tx_napi);
|
||||||
|
else
|
||||||
|
mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -3514,7 +3522,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
|
||||||
else
|
else
|
||||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||||
|
|
||||||
napi_gro_receive(&ch->napi, skb);
|
napi_gro_receive(&ch->rx_napi, skb);
|
||||||
|
|
||||||
priv->dev->stats.rx_packets++;
|
priv->dev->stats.rx_packets++;
|
||||||
priv->dev->stats.rx_bytes += frame_len;
|
priv->dev->stats.rx_bytes += frame_len;
|
||||||
|
@ -3529,40 +3537,45 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
|
||||||
* stmmac_poll - stmmac poll method (NAPI)
|
|
||||||
* @napi : pointer to the napi structure.
|
|
||||||
* @budget : maximum number of packets that the current CPU can receive from
|
|
||||||
* all interfaces.
|
|
||||||
* Description :
|
|
||||||
* To look at the incoming frames and clear the tx resources.
|
|
||||||
*/
|
|
||||||
static int stmmac_napi_poll(struct napi_struct *napi, int budget)
|
|
||||||
{
|
{
|
||||||
struct stmmac_channel *ch =
|
struct stmmac_channel *ch =
|
||||||
container_of(napi, struct stmmac_channel, napi);
|
container_of(napi, struct stmmac_channel, rx_napi);
|
||||||
struct stmmac_priv *priv = ch->priv_data;
|
struct stmmac_priv *priv = ch->priv_data;
|
||||||
int work_done, rx_done = 0, tx_done = 0;
|
|
||||||
u32 chan = ch->index;
|
u32 chan = ch->index;
|
||||||
|
int work_done;
|
||||||
|
|
||||||
priv->xstats.napi_poll++;
|
priv->xstats.napi_poll++;
|
||||||
|
|
||||||
if (ch->has_tx)
|
work_done = stmmac_rx(priv, budget, chan);
|
||||||
tx_done = stmmac_tx_clean(priv, budget, chan);
|
if (work_done < budget && napi_complete_done(napi, work_done))
|
||||||
if (ch->has_rx)
|
stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
|
||||||
rx_done = stmmac_rx(priv, budget, chan);
|
return work_done;
|
||||||
|
}
|
||||||
|
|
||||||
work_done = max(rx_done, tx_done);
|
static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
|
||||||
|
{
|
||||||
|
struct stmmac_channel *ch =
|
||||||
|
container_of(napi, struct stmmac_channel, tx_napi);
|
||||||
|
struct stmmac_priv *priv = ch->priv_data;
|
||||||
|
struct stmmac_tx_queue *tx_q;
|
||||||
|
u32 chan = ch->index;
|
||||||
|
int work_done;
|
||||||
|
|
||||||
|
priv->xstats.napi_poll++;
|
||||||
|
|
||||||
|
work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
|
||||||
work_done = min(work_done, budget);
|
work_done = min(work_done, budget);
|
||||||
|
|
||||||
if (work_done < budget && napi_complete_done(napi, work_done)) {
|
if (work_done < budget && napi_complete_done(napi, work_done))
|
||||||
int stat;
|
|
||||||
|
|
||||||
stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
|
stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
|
||||||
stat = stmmac_dma_interrupt_status(priv, priv->ioaddr,
|
|
||||||
&priv->xstats, chan);
|
/* Force transmission restart */
|
||||||
if (stat && napi_reschedule(napi))
|
tx_q = &priv->tx_queue[chan];
|
||||||
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
|
if (tx_q->cur_tx != tx_q->dirty_tx) {
|
||||||
|
stmmac_enable_dma_transmission(priv, priv->ioaddr);
|
||||||
|
stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
|
||||||
|
chan);
|
||||||
}
|
}
|
||||||
|
|
||||||
return work_done;
|
return work_done;
|
||||||
|
@ -4342,13 +4355,14 @@ int stmmac_dvr_probe(struct device *device,
|
||||||
ch->priv_data = priv;
|
ch->priv_data = priv;
|
||||||
ch->index = queue;
|
ch->index = queue;
|
||||||
|
|
||||||
if (queue < priv->plat->rx_queues_to_use)
|
if (queue < priv->plat->rx_queues_to_use) {
|
||||||
ch->has_rx = true;
|
netif_napi_add(ndev, &ch->rx_napi, stmmac_napi_poll_rx,
|
||||||
if (queue < priv->plat->tx_queues_to_use)
|
NAPI_POLL_WEIGHT);
|
||||||
ch->has_tx = true;
|
}
|
||||||
|
if (queue < priv->plat->tx_queues_to_use) {
|
||||||
netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
|
netif_napi_add(ndev, &ch->tx_napi, stmmac_napi_poll_tx,
|
||||||
NAPI_POLL_WEIGHT);
|
NAPI_POLL_WEIGHT);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_init(&priv->lock);
|
mutex_init(&priv->lock);
|
||||||
|
@ -4404,7 +4418,10 @@ error_mdio_register:
|
||||||
for (queue = 0; queue < maxq; queue++) {
|
for (queue = 0; queue < maxq; queue++) {
|
||||||
struct stmmac_channel *ch = &priv->channel[queue];
|
struct stmmac_channel *ch = &priv->channel[queue];
|
||||||
|
|
||||||
netif_napi_del(&ch->napi);
|
if (queue < priv->plat->rx_queues_to_use)
|
||||||
|
netif_napi_del(&ch->rx_napi);
|
||||||
|
if (queue < priv->plat->tx_queues_to_use)
|
||||||
|
netif_napi_del(&ch->tx_napi);
|
||||||
}
|
}
|
||||||
error_hw_init:
|
error_hw_init:
|
||||||
destroy_workqueue(priv->wq);
|
destroy_workqueue(priv->wq);
|
||||||
|
|
Loading…
Reference in New Issue