Merge branch 'ibmvnic-affinity-hints'
Nick Child says: ==================== ibmvnic: Introduce affinity hint support This is a patchset to do 3 things to improve ibmvnic performance: 1. Assign affinity hints to ibmvnic queue irq's 2. Update affinity hints on cpu hotplug events 3. Introduce transmit packet steering (XPS) NOTE: If irqbalance is running, you need to stop it from overriding our affinity hints. To do this you can do one of: - systemctl stop irqbalance - ban the ibmvnic module irqs - you must have the latest irqbalance v9.2, the banmod argument was broken before this - in /etc/sysconfig/irqbalance -> IRQBALANCE_ARGS="--banmod=ibmvnic" - systemctl restart irqbalance ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
8a30b30b26
|
@ -68,6 +68,7 @@
|
|||
#include <linux/workqueue.h>
|
||||
#include <linux/if_vlan.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/cpu.h>
|
||||
|
||||
#include "ibmvnic.h"
|
||||
|
||||
|
@ -171,6 +172,193 @@ static int send_version_xchg(struct ibmvnic_adapter *adapter)
|
|||
return ibmvnic_send_crq(adapter, &crq);
|
||||
}
|
||||
|
||||
static void ibmvnic_clean_queue_affinity(struct ibmvnic_adapter *adapter,
|
||||
struct ibmvnic_sub_crq_queue *queue)
|
||||
{
|
||||
if (!(queue && queue->irq))
|
||||
return;
|
||||
|
||||
cpumask_clear(queue->affinity_mask);
|
||||
|
||||
if (irq_set_affinity_and_hint(queue->irq, NULL))
|
||||
netdev_warn(adapter->netdev,
|
||||
"%s: Clear affinity failed, queue addr = %p, IRQ = %d\n",
|
||||
__func__, queue, queue->irq);
|
||||
}
|
||||
|
||||
static void ibmvnic_clean_affinity(struct ibmvnic_adapter *adapter)
|
||||
{
|
||||
struct ibmvnic_sub_crq_queue **rxqs;
|
||||
struct ibmvnic_sub_crq_queue **txqs;
|
||||
int num_rxqs, num_txqs;
|
||||
int rc, i;
|
||||
|
||||
rc = 0;
|
||||
rxqs = adapter->rx_scrq;
|
||||
txqs = adapter->tx_scrq;
|
||||
num_txqs = adapter->num_active_tx_scrqs;
|
||||
num_rxqs = adapter->num_active_rx_scrqs;
|
||||
|
||||
netdev_dbg(adapter->netdev, "%s: Cleaning irq affinity hints", __func__);
|
||||
if (txqs) {
|
||||
for (i = 0; i < num_txqs; i++)
|
||||
ibmvnic_clean_queue_affinity(adapter, txqs[i]);
|
||||
}
|
||||
if (rxqs) {
|
||||
for (i = 0; i < num_rxqs; i++)
|
||||
ibmvnic_clean_queue_affinity(adapter, rxqs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
|
||||
unsigned int *cpu, int *stragglers,
|
||||
int stride)
|
||||
{
|
||||
cpumask_var_t mask;
|
||||
int i;
|
||||
int rc = 0;
|
||||
|
||||
if (!(queue && queue->irq))
|
||||
return rc;
|
||||
|
||||
/* cpumask_var_t is either a pointer or array, allocation works here */
|
||||
if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
/* while we have extra cpu give one extra to this irq */
|
||||
if (*stragglers) {
|
||||
stride++;
|
||||
(*stragglers)--;
|
||||
}
|
||||
/* atomic write is safer than writing bit by bit directly */
|
||||
for (i = 0; i < stride; i++) {
|
||||
cpumask_set_cpu(*cpu, mask);
|
||||
*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
|
||||
nr_cpu_ids, false);
|
||||
}
|
||||
/* set queue affinity mask */
|
||||
cpumask_copy(queue->affinity_mask, mask);
|
||||
rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
|
||||
free_cpumask_var(mask);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* assumes cpu read lock is held */
|
||||
static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
|
||||
{
|
||||
struct ibmvnic_sub_crq_queue **rxqs = adapter->rx_scrq;
|
||||
struct ibmvnic_sub_crq_queue **txqs = adapter->tx_scrq;
|
||||
struct ibmvnic_sub_crq_queue *queue;
|
||||
int num_rxqs = adapter->num_active_rx_scrqs;
|
||||
int num_txqs = adapter->num_active_tx_scrqs;
|
||||
int total_queues, stride, stragglers, i;
|
||||
unsigned int num_cpu, cpu;
|
||||
int rc = 0;
|
||||
|
||||
netdev_dbg(adapter->netdev, "%s: Setting irq affinity hints", __func__);
|
||||
if (!(adapter->rx_scrq && adapter->tx_scrq)) {
|
||||
netdev_warn(adapter->netdev,
|
||||
"%s: Set affinity failed, queues not allocated\n",
|
||||
__func__);
|
||||
return;
|
||||
}
|
||||
|
||||
total_queues = num_rxqs + num_txqs;
|
||||
num_cpu = num_online_cpus();
|
||||
/* number of cpu's assigned per irq */
|
||||
stride = max_t(int, num_cpu / total_queues, 1);
|
||||
/* number of leftover cpu's */
|
||||
stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
|
||||
/* next available cpu to assign irq to */
|
||||
cpu = cpumask_next(-1, cpu_online_mask);
|
||||
|
||||
for (i = 0; i < num_txqs; i++) {
|
||||
queue = txqs[i];
|
||||
rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
|
||||
stride);
|
||||
if (rc)
|
||||
goto out;
|
||||
|
||||
if (!queue)
|
||||
continue;
|
||||
|
||||
rc = __netif_set_xps_queue(adapter->netdev,
|
||||
cpumask_bits(queue->affinity_mask),
|
||||
i, XPS_CPUS);
|
||||
if (rc)
|
||||
netdev_warn(adapter->netdev, "%s: Set XPS on queue %d failed, rc = %d.\n",
|
||||
__func__, i, rc);
|
||||
}
|
||||
|
||||
for (i = 0; i < num_rxqs; i++) {
|
||||
queue = rxqs[i];
|
||||
rc = ibmvnic_set_queue_affinity(queue, &cpu, &stragglers,
|
||||
stride);
|
||||
if (rc)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (rc) {
|
||||
netdev_warn(adapter->netdev,
|
||||
"%s: Set affinity failed, queue addr = %p, IRQ = %d, rc = %d.\n",
|
||||
__func__, queue, queue->irq, rc);
|
||||
ibmvnic_clean_affinity(adapter);
|
||||
}
|
||||
}
|
||||
|
||||
static int ibmvnic_cpu_online(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter;
|
||||
|
||||
adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
|
||||
ibmvnic_set_affinity(adapter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ibmvnic_cpu_dead(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter;
|
||||
|
||||
adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node_dead);
|
||||
ibmvnic_set_affinity(adapter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ibmvnic_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct ibmvnic_adapter *adapter;
|
||||
|
||||
adapter = hlist_entry_safe(node, struct ibmvnic_adapter, node);
|
||||
ibmvnic_clean_affinity(adapter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum cpuhp_state ibmvnic_online;
|
||||
|
||||
static int ibmvnic_cpu_notif_add(struct ibmvnic_adapter *adapter)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_state_add_instance_nocalls(ibmvnic_online, &adapter->node);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = cpuhp_state_add_instance_nocalls(CPUHP_IBMVNIC_DEAD,
|
||||
&adapter->node_dead);
|
||||
if (!ret)
|
||||
return ret;
|
||||
cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ibmvnic_cpu_notif_remove(struct ibmvnic_adapter *adapter)
|
||||
{
|
||||
cpuhp_state_remove_instance_nocalls(ibmvnic_online, &adapter->node);
|
||||
cpuhp_state_remove_instance_nocalls(CPUHP_IBMVNIC_DEAD,
|
||||
&adapter->node_dead);
|
||||
}
|
||||
|
||||
static long h_reg_sub_crq(unsigned long unit_address, unsigned long token,
|
||||
unsigned long length, unsigned long *number,
|
||||
unsigned long *irq)
|
||||
|
@ -3626,6 +3814,8 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
|
|||
if (!adapter->tx_scrq || !adapter->rx_scrq)
|
||||
return -EINVAL;
|
||||
|
||||
ibmvnic_clean_affinity(adapter);
|
||||
|
||||
for (i = 0; i < adapter->req_tx_queues; i++) {
|
||||
netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
|
||||
rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
|
||||
|
@ -3675,6 +3865,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter,
|
|||
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
free_pages((unsigned long)scrq->msgs, 2);
|
||||
free_cpumask_var(scrq->affinity_mask);
|
||||
kfree(scrq);
|
||||
}
|
||||
|
||||
|
@ -3695,6 +3886,8 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter
|
|||
dev_warn(dev, "Couldn't allocate crq queue messages page\n");
|
||||
goto zero_page_failed;
|
||||
}
|
||||
if (!zalloc_cpumask_var(&scrq->affinity_mask, GFP_KERNEL))
|
||||
goto cpumask_alloc_failed;
|
||||
|
||||
scrq->msg_token = dma_map_single(dev, scrq->msgs, 4 * PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
|
@ -3747,6 +3940,8 @@ reg_failed:
|
|||
dma_unmap_single(dev, scrq->msg_token, 4 * PAGE_SIZE,
|
||||
DMA_BIDIRECTIONAL);
|
||||
map_failed:
|
||||
free_cpumask_var(scrq->affinity_mask);
|
||||
cpumask_alloc_failed:
|
||||
free_pages((unsigned long)scrq->msgs, 2);
|
||||
zero_page_failed:
|
||||
kfree(scrq);
|
||||
|
@ -3758,6 +3953,7 @@ static void release_sub_crqs(struct ibmvnic_adapter *adapter, bool do_h_free)
|
|||
{
|
||||
int i;
|
||||
|
||||
ibmvnic_clean_affinity(adapter);
|
||||
if (adapter->tx_scrq) {
|
||||
for (i = 0; i < adapter->num_active_tx_scrqs; i++) {
|
||||
if (!adapter->tx_scrq[i])
|
||||
|
@ -4035,6 +4231,11 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter)
|
|||
goto req_rx_irq_failed;
|
||||
}
|
||||
}
|
||||
|
||||
cpus_read_lock();
|
||||
ibmvnic_set_affinity(adapter);
|
||||
cpus_read_unlock();
|
||||
|
||||
return rc;
|
||||
|
||||
req_rx_irq_failed:
|
||||
|
@ -6152,10 +6353,19 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
|
|||
}
|
||||
dev_info(&dev->dev, "ibmvnic registered\n");
|
||||
|
||||
rc = ibmvnic_cpu_notif_add(adapter);
|
||||
if (rc) {
|
||||
netdev_err(netdev, "Registering cpu notifier failed\n");
|
||||
goto cpu_notif_add_failed;
|
||||
}
|
||||
|
||||
complete(&adapter->probe_done);
|
||||
|
||||
return 0;
|
||||
|
||||
cpu_notif_add_failed:
|
||||
unregister_netdev(netdev);
|
||||
|
||||
ibmvnic_register_fail:
|
||||
device_remove_file(&dev->dev, &dev_attr_failover);
|
||||
|
||||
|
@ -6206,6 +6416,8 @@ static void ibmvnic_remove(struct vio_dev *dev)
|
|||
|
||||
spin_unlock_irqrestore(&adapter->state_lock, flags);
|
||||
|
||||
ibmvnic_cpu_notif_remove(adapter);
|
||||
|
||||
flush_work(&adapter->ibmvnic_reset);
|
||||
flush_delayed_work(&adapter->ibmvnic_delayed_reset);
|
||||
|
||||
|
@ -6336,15 +6548,40 @@ static struct vio_driver ibmvnic_driver = {
|
|||
/* module functions */
|
||||
static int __init ibmvnic_module_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "net/ibmvnic:online",
|
||||
ibmvnic_cpu_online,
|
||||
ibmvnic_cpu_down_prep);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ibmvnic_online = ret;
|
||||
ret = cpuhp_setup_state_multi(CPUHP_IBMVNIC_DEAD, "net/ibmvnic:dead",
|
||||
NULL, ibmvnic_cpu_dead);
|
||||
if (ret)
|
||||
goto err_dead;
|
||||
|
||||
ret = vio_register_driver(&ibmvnic_driver);
|
||||
if (ret)
|
||||
goto err_vio_register;
|
||||
|
||||
pr_info("%s: %s %s\n", ibmvnic_driver_name, ibmvnic_driver_string,
|
||||
IBMVNIC_DRIVER_VERSION);
|
||||
|
||||
return vio_register_driver(&ibmvnic_driver);
|
||||
return 0;
|
||||
err_vio_register:
|
||||
cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
|
||||
err_dead:
|
||||
cpuhp_remove_multi_state(ibmvnic_online);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ibmvnic_module_exit(void)
|
||||
{
|
||||
vio_unregister_driver(&ibmvnic_driver);
|
||||
cpuhp_remove_multi_state(CPUHP_IBMVNIC_DEAD);
|
||||
cpuhp_remove_multi_state(ibmvnic_online);
|
||||
}
|
||||
|
||||
module_init(ibmvnic_module_init);
|
||||
|
|
|
@ -825,6 +825,7 @@ struct ibmvnic_sub_crq_queue {
|
|||
atomic_t used;
|
||||
char name[32];
|
||||
u64 handle;
|
||||
cpumask_var_t affinity_mask;
|
||||
} ____cacheline_aligned;
|
||||
|
||||
struct ibmvnic_long_term_buff {
|
||||
|
@ -983,6 +984,10 @@ struct ibmvnic_adapter {
|
|||
int reset_done_rc;
|
||||
bool wait_for_reset;
|
||||
|
||||
/* CPU hotplug instances for online & dead */
|
||||
struct hlist_node node;
|
||||
struct hlist_node node_dead;
|
||||
|
||||
/* partner capabilities */
|
||||
u64 min_tx_queues;
|
||||
u64 min_rx_queues;
|
||||
|
|
|
@ -69,6 +69,7 @@ enum cpuhp_state {
|
|||
CPUHP_X86_APB_DEAD,
|
||||
CPUHP_X86_MCE_DEAD,
|
||||
CPUHP_VIRT_NET_DEAD,
|
||||
CPUHP_IBMVNIC_DEAD,
|
||||
CPUHP_SLUB_DEAD,
|
||||
CPUHP_DEBUG_OBJ_DEAD,
|
||||
CPUHP_MM_WRITEBACK_DEAD,
|
||||
|
|
Loading…
Reference in New Issue