cxgb4: Try and provide an RDMA CIQ per cpu
To allow for better scalability on systems with large core counts, we will try and allocate enough RDMA Concentrator IQs and MSI/X vectors as we have cores. If we cannot get enough MSI/X vectors, fall back to the minimum required: 1 per adapter rx channel. Also clean up cxgb_enable_msix() to make it readable and correct a bug where the vectors are not correctly assigned if the driver doesn't get the full amount requested. Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
1c6a5b0e34
commit
f36e58e566
|
@ -369,7 +369,7 @@ enum {
|
||||||
MAX_OFLD_QSETS = 16, /* # of offload Tx/Rx queue sets */
|
MAX_OFLD_QSETS = 16, /* # of offload Tx/Rx queue sets */
|
||||||
MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */
|
MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */
|
||||||
MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */
|
MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */
|
||||||
MAX_RDMA_CIQS = NCHAN, /* # of RDMA concentrator IQs */
|
MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */
|
||||||
MAX_ISCSI_QUEUES = NCHAN, /* # of streaming iSCSI Rx queues */
|
MAX_ISCSI_QUEUES = NCHAN, /* # of streaming iSCSI Rx queues */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -599,8 +599,8 @@ struct sge {
|
||||||
u16 rdmaqs; /* # of available RDMA Rx queues */
|
u16 rdmaqs; /* # of available RDMA Rx queues */
|
||||||
u16 rdmaciqs; /* # of available RDMA concentrator IQs */
|
u16 rdmaciqs; /* # of available RDMA concentrator IQs */
|
||||||
u16 ofld_rxq[MAX_OFLD_QSETS];
|
u16 ofld_rxq[MAX_OFLD_QSETS];
|
||||||
u16 rdma_rxq[NCHAN];
|
u16 rdma_rxq[MAX_RDMA_QUEUES];
|
||||||
u16 rdma_ciq[NCHAN];
|
u16 rdma_ciq[MAX_RDMA_CIQS];
|
||||||
u16 timer_val[SGE_NTIMERS];
|
u16 timer_val[SGE_NTIMERS];
|
||||||
u8 counter_val[SGE_NCOUNTERS];
|
u8 counter_val[SGE_NCOUNTERS];
|
||||||
u32 fl_pg_order; /* large page allocation size */
|
u32 fl_pg_order; /* large page allocation size */
|
||||||
|
|
|
@ -1769,6 +1769,8 @@ do { \
|
||||||
int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx);
|
int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx);
|
||||||
|
|
||||||
S("QType:", "RDMA-CPL");
|
S("QType:", "RDMA-CPL");
|
||||||
|
S("Interface:",
|
||||||
|
rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
|
||||||
R("RspQ ID:", rspq.abs_id);
|
R("RspQ ID:", rspq.abs_id);
|
||||||
R("RspQ size:", rspq.size);
|
R("RspQ size:", rspq.size);
|
||||||
R("RspQE size:", rspq.iqe_len);
|
R("RspQE size:", rspq.iqe_len);
|
||||||
|
@ -1788,6 +1790,8 @@ do { \
|
||||||
int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
|
int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx);
|
||||||
|
|
||||||
S("QType:", "RDMA-CIQ");
|
S("QType:", "RDMA-CIQ");
|
||||||
|
S("Interface:",
|
||||||
|
rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A");
|
||||||
R("RspQ ID:", rspq.abs_id);
|
R("RspQ ID:", rspq.abs_id);
|
||||||
R("RspQ size:", rspq.size);
|
R("RspQ size:", rspq.size);
|
||||||
R("RspQE size:", rspq.iqe_len);
|
R("RspQE size:", rspq.iqe_len);
|
||||||
|
|
|
@ -1057,7 +1057,8 @@ freeout: t4_free_sge_resources(adap);
|
||||||
|
|
||||||
ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq);
|
ALLOC_OFLD_RXQS(s->ofldrxq, s->ofldqsets, j, s->ofld_rxq);
|
||||||
ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
|
ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq);
|
||||||
ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, 1, s->rdma_ciq);
|
j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
|
||||||
|
ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq);
|
||||||
|
|
||||||
#undef ALLOC_OFLD_RXQS
|
#undef ALLOC_OFLD_RXQS
|
||||||
|
|
||||||
|
@ -5702,7 +5703,16 @@ static void cfg_queues(struct adapter *adap)
|
||||||
s->ofldqsets = adap->params.nports;
|
s->ofldqsets = adap->params.nports;
|
||||||
/* For RDMA one Rx queue per channel suffices */
|
/* For RDMA one Rx queue per channel suffices */
|
||||||
s->rdmaqs = adap->params.nports;
|
s->rdmaqs = adap->params.nports;
|
||||||
s->rdmaciqs = adap->params.nports;
|
/* Try and allow at least 1 CIQ per cpu rounding down
|
||||||
|
* to the number of ports, with a minimum of 1 per port.
|
||||||
|
* A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
|
||||||
|
* A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
|
||||||
|
* A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
|
||||||
|
*/
|
||||||
|
s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
|
||||||
|
s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
|
||||||
|
adap->params.nports;
|
||||||
|
s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
|
for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
|
||||||
|
@ -5788,12 +5798,17 @@ static void reduce_ethqs(struct adapter *adap, int n)
|
||||||
static int enable_msix(struct adapter *adap)
|
static int enable_msix(struct adapter *adap)
|
||||||
{
|
{
|
||||||
int ofld_need = 0;
|
int ofld_need = 0;
|
||||||
int i, want, need;
|
int i, want, need, allocated;
|
||||||
struct sge *s = &adap->sge;
|
struct sge *s = &adap->sge;
|
||||||
unsigned int nchan = adap->params.nports;
|
unsigned int nchan = adap->params.nports;
|
||||||
struct msix_entry entries[MAX_INGQ + 1];
|
struct msix_entry *entries;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(entries); ++i)
|
entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
|
||||||
|
GFP_KERNEL);
|
||||||
|
if (!entries)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
for (i = 0; i < MAX_INGQ + 1; ++i)
|
||||||
entries[i].entry = i;
|
entries[i].entry = i;
|
||||||
|
|
||||||
want = s->max_ethqsets + EXTRA_VECS;
|
want = s->max_ethqsets + EXTRA_VECS;
|
||||||
|
@ -5810,29 +5825,39 @@ static int enable_msix(struct adapter *adap)
|
||||||
#else
|
#else
|
||||||
need = adap->params.nports + EXTRA_VECS + ofld_need;
|
need = adap->params.nports + EXTRA_VECS + ofld_need;
|
||||||
#endif
|
#endif
|
||||||
want = pci_enable_msix_range(adap->pdev, entries, need, want);
|
allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
|
||||||
if (want < 0)
|
if (allocated < 0) {
|
||||||
return want;
|
dev_info(adap->pdev_dev, "not enough MSI-X vectors left,"
|
||||||
|
" not using MSI-X\n");
|
||||||
|
kfree(entries);
|
||||||
|
return allocated;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/* Distribute available vectors to the various queue groups.
|
||||||
* Distribute available vectors to the various queue groups.
|
|
||||||
* Every group gets its minimum requirement and NIC gets top
|
* Every group gets its minimum requirement and NIC gets top
|
||||||
* priority for leftovers.
|
* priority for leftovers.
|
||||||
*/
|
*/
|
||||||
i = want - EXTRA_VECS - ofld_need;
|
i = allocated - EXTRA_VECS - ofld_need;
|
||||||
if (i < s->max_ethqsets) {
|
if (i < s->max_ethqsets) {
|
||||||
s->max_ethqsets = i;
|
s->max_ethqsets = i;
|
||||||
if (i < s->ethqsets)
|
if (i < s->ethqsets)
|
||||||
reduce_ethqs(adap, i);
|
reduce_ethqs(adap, i);
|
||||||
}
|
}
|
||||||
if (is_offload(adap)) {
|
if (is_offload(adap)) {
|
||||||
i = want - EXTRA_VECS - s->max_ethqsets;
|
if (allocated < want) {
|
||||||
i -= ofld_need - nchan;
|
s->rdmaqs = nchan;
|
||||||
|
s->rdmaciqs = nchan;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* leftovers go to OFLD */
|
||||||
|
i = allocated - EXTRA_VECS - s->max_ethqsets -
|
||||||
|
s->rdmaqs - s->rdmaciqs;
|
||||||
s->ofldqsets = (i / nchan) * nchan; /* round down */
|
s->ofldqsets = (i / nchan) * nchan; /* round down */
|
||||||
}
|
}
|
||||||
for (i = 0; i < want; ++i)
|
for (i = 0; i < allocated; ++i)
|
||||||
adap->msix_info[i].vec = entries[i].vector;
|
adap->msix_info[i].vec = entries[i].vector;
|
||||||
|
|
||||||
|
kfree(entries);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue