net: mana: Fix perf regression: remove rx_cqes, tx_cqes counters

The apc->eth_stats.rx_cqes is one per NIC (vport), and it's on the frequent and parallel code path of all queues. So, r/w into this single shared variable by many threads on different CPUs creates a lot caching and memory overhead, hence perf regression. And, it's not accurate due to the high volume concurrent r/w. For example, a workload is iperf with 128 threads, and with RPS enabled. We saw perf regression of 25% with the previous patch adding the counters. And this patch eliminates the regression. Since the error path of mana_poll_rx_cq() already has warnings, so keeping the counter and convert it to a per-queue variable is not necessary. So, just remove this counter from this high frequency code path. Also, remove the tx_cqes counter for the same reason. We have warnings & other counters for errors on that path, and don't need to count every normal cqe processing. Cc: stable@vger.kernel.org Fixes: bd7fc6e195 ("net: mana: Add new MANA VF performance counters for easier troubleshooting") Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com> Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com> Reviewed-by: Jiri Pirko <jiri@nvidia.com> Link: https://lore.kernel.org/r/1685115537-31675-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
2023-05-26 08:38:57 -07:00 · 2023-05-26 08:38:57 -07:00 · 1919b39fc6
parent 111d467485
commit 1919b39fc6
3 changed files with 0 additions and 14 deletions
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@ -1279,8 +1279,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 	if (comp_read < 1)
 		return;

-	apc->eth_stats.tx_cqes = comp_read;
-
 	for (i = 0; i < comp_read; i++) {
 		struct mana_tx_comp_oob *cqe_oob;

@ -1363,8 +1361,6 @@ static void mana_poll_tx_cq(struct mana_cq *cq)
 		WARN_ON_ONCE(1);

 	cq->work_done = pkt_transmitted;
-
-	apc->eth_stats.tx_cqes -= pkt_transmitted;
 }

 static void mana_post_pkt_rxq(struct mana_rxq *rxq)
@ -1626,15 +1622,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
 {
 	struct gdma_comp *comp = cq->gdma_comp_buf;
 	struct mana_rxq *rxq = cq->rxq;
-	struct mana_port_context *apc;
 	int comp_read, i;

-	apc = netdev_priv(rxq->ndev);
-
 	comp_read = mana_gd_poll_cq(cq->gdma_cq, comp, CQE_POLLING_BUFFER);
 	WARN_ON_ONCE(comp_read > CQE_POLLING_BUFFER);

-	apc->eth_stats.rx_cqes = comp_read;
 	rxq->xdp_flush = false;

 	for (i = 0; i < comp_read; i++) {
@ -1646,8 +1638,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq)
 			return;

 		mana_process_rx_cqe(rxq, cq, &comp[i]);
-
-		apc->eth_stats.rx_cqes--;
 	}

 	if (rxq->xdp_flush)
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@ -13,11 +13,9 @@ static const struct {
 } mana_eth_stats[] = {
 	{"stop_queue", offsetof(struct mana_ethtool_stats, stop_queue)},
 	{"wake_queue", offsetof(struct mana_ethtool_stats, wake_queue)},
-	{"tx_cqes", offsetof(struct mana_ethtool_stats, tx_cqes)},
 	{"tx_cq_err", offsetof(struct mana_ethtool_stats, tx_cqe_err)},
 	{"tx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
 					tx_cqe_unknown_type)},
-	{"rx_cqes", offsetof(struct mana_ethtool_stats, rx_cqes)},
 	{"rx_coalesced_err", offsetof(struct mana_ethtool_stats,
 					rx_coalesced_err)},
 	{"rx_cqe_unknown_type", offsetof(struct mana_ethtool_stats,
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@ -347,10 +347,8 @@ struct mana_tx_qp {
 struct mana_ethtool_stats {
 	u64 stop_queue;
 	u64 wake_queue;
-	u64 tx_cqes;
 	u64 tx_cqe_err;
 	u64 tx_cqe_unknown_type;
-	u64 rx_cqes;
 	u64 rx_coalesced_err;
 	u64 rx_cqe_unknown_type;
 };