First batch of InfiniBand/RDMA changes for the 3.7 merge window:

- mlx4 IB support for SR-IOV
  - A couple of SRP initiator fixes
  - Batch of nes hardware driver fixes
  - Fix for long-standing use-after-free crash in IPoIB
  - Other miscellaneous fixes
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABCAAGBQJQav4jAAoJEENa44ZhAt0hmL0QAJTuMdSOzYFd/NB38owJCNM2
 kz/N1GlBm3z98fIlGo8u+lzgV2qxqZSAzsJsouMeK38KiAX3CL8HKe44A1QvTM6v
 dXTNL4JFX24/YF+nlmMY8Av518I9Mkte3BZCnpYkBjVFBWe0ePwoRC/btfBXPDIV
 0snq4OtjoBAn00dOOyuZ5PoyY9xf0z4UB0Gple9sM4mzEb8wVWdNDDPOiuPJc6fA
 L+gk6HLkZDg54+QswafdKYwpeTq45wIKLmCdS3oUNmppMLVhZY8rECOwzSa+KiTr
 /Yo19n+zl+IBlvjQHhmUqGHvdD17PaGlr+TckAsQqmVfXUH5qqpEnkF8FoEK59c5
 YA3lVU8Sj4BPhJ7qX54CuN3767mZizakkxCr9iPRzABFTgzWVgcSgCrE8jjx4i0h
 Pam+L5bmANFStgmGR8PmXiNgcrCUcEqYHsOWDDAnHa5ekb2nyv1JL1c18hlY9hC3
 Xb1YTMZFwvofGza89hBu7oHrMbLOUc5kW2lBpvUn2nlyf3i0F8ISlVbVbNjFA54p
 60/jHa2VOQ2CcJUJKnJOk4ajOOEfHnPtMn2q96XJ69Dp8+eSYEO/G+0i1OlChq4h
 ClnG0Yp+NkT1o8WXMd7guDR+RsXt+DXIij5TiUWRIqnIlopIsMTRhNH28tMu4jQL
 fgN5n987wru91ewdX4gW
 =PAcy
 -----END PGP SIGNATURE-----

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband updates from Roland Dreier:
 "First batch of InfiniBand/RDMA changes for the 3.7 merge window:
   - mlx4 IB support for SR-IOV
   - A couple of SRP initiator fixes
   - Batch of nes hardware driver fixes
   - Fix for long-standing use-after-free crash in IPoIB
   - Other miscellaneous fixes"

This merge also removes a new use of __cancel_delayed_work(), and
replaces it with the regular cancel_delayed_work() that is now irq-safe
thanks to the workqueue updates.

That said, I suspect the sequence in question should probably use
"mod_delayed_work()".  I just did the minimal "don't use deprecated
functions" fixup, though.

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (45 commits)
  IB/qib: Fix local access validation for user MRs
  mlx4_core: Disable SENSE_PORT for multifunction devices
  mlx4_core: Clean up enabling of SENSE_PORT for older (ConnectX-1/-2) HCAs
  mlx4_core: Stash PCI ID driver_data in mlx4_priv structure
  IB/srp: Avoid having aborted requests hang
  IB/srp: Fix use-after-free in srp_reset_req()
  IB/qib: Add a qib driver version
  RDMA/nes: Fix compilation error when nes_debug is enabled
  RDMA/nes: Print hardware resource type
  RDMA/nes: Fix for crash when TX checksum offload is off
  RDMA/nes: Cosmetic changes
  RDMA/nes: Fix for incorrect MSS when TSO is on
  RDMA/nes: Fix incorrect resolving of the loopback MAC address
  mlx4_core: Fix crash on uninitialized priv->cmd.slave_sem
  mlx4_core: Trivial cleanups to driver log messages
  mlx4_core: Trivial readability fix: "0X30" -> "0x30"
  IB/mlx4: Create paravirt contexts for VFs when master IB driver initializes
  mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations
  mlx4: Paravirtualize Node Guids for slaves
  mlx4: Activate SR-IOV mode for IB
  ...
This commit is contained in:
Linus Torvalds 2012-10-02 17:20:40 -07:00
commit 7a9a2970b5
43 changed files with 7304 additions and 463 deletions

View File

@ -167,6 +167,7 @@ int ib_find_cached_pkey(struct ib_device *device,
unsigned long flags;
int i;
int ret = -ENOENT;
int partial_ix = -1;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
@ -179,6 +180,46 @@ int ib_find_cached_pkey(struct ib_device *device,
for (i = 0; i < cache->table_len; ++i)
if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
if (cache->table[i] & 0x8000) {
*index = i;
ret = 0;
break;
} else
partial_ix = i;
}
if (ret && partial_ix >= 0) {
*index = partial_ix;
ret = 0;
}
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
int ib_find_exact_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index)
{
struct ib_pkey_cache *cache;
unsigned long flags;
int i;
int ret = -ENOENT;
if (port_num < start_port(device) || port_num > end_port(device))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
cache = device->cache.pkey_cache[port_num - start_port(device)];
*index = -1;
for (i = 0; i < cache->table_len; ++i)
if (cache->table[i] == pkey) {
*index = i;
ret = 0;
break;
@ -188,7 +229,7 @@ int ib_find_cached_pkey(struct ib_device *device,
return ret;
}
EXPORT_SYMBOL(ib_find_cached_pkey);
EXPORT_SYMBOL(ib_find_exact_cached_pkey);
int ib_get_cached_lmc(struct ib_device *device,
u8 port_num,

View File

@ -3058,7 +3058,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
if (id_priv->id.ps == RDMA_PS_IPOIB)
comp_mask |= IB_SA_MCMEMBER_REC_RATE |
IB_SA_MCMEMBER_REC_RATE_SELECTOR;
IB_SA_MCMEMBER_REC_RATE_SELECTOR |
IB_SA_MCMEMBER_REC_MTU_SELECTOR |
IB_SA_MCMEMBER_REC_MTU |
IB_SA_MCMEMBER_REC_HOP_LIMIT;
mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
id_priv->id.port_num, &rec,

View File

@ -707,18 +707,28 @@ int ib_find_pkey(struct ib_device *device,
{
int ret, i;
u16 tmp_pkey;
int partial_ix = -1;
for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
if (ret)
return ret;
if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
*index = i;
return 0;
/* if there is full-member pkey take it.*/
if (tmp_pkey & 0x8000) {
*index = i;
return 0;
}
if (partial_ix < 0)
partial_ix = i;
}
}
/*no full-member, if exists take the limited*/
if (partial_ix >= 0) {
*index = partial_ix;
return 0;
}
return -ENOENT;
}
EXPORT_SYMBOL(ib_find_pkey);

View File

@ -397,7 +397,6 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
struct ib_ucm_event_get cmd;
struct ib_ucm_event *uevent;
int result = 0;
DEFINE_WAIT(wait);
if (out_len < sizeof(struct ib_ucm_event_resp))
return -ENOSPC;

View File

@ -310,7 +310,6 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
struct rdma_ucm_get_event cmd;
struct ucma_event *uevent;
int ret = 0;
DEFINE_WAIT(wait);
if (out_len < sizeof uevent->resp)
return -ENOSPC;

View File

@ -137,19 +137,25 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
return -ENOMEM;
wq->rq.qid = c4iw_get_qpid(rdev, uctx);
if (!wq->rq.qid)
goto err1;
if (!wq->rq.qid) {
ret = -ENOMEM;
goto free_sq_qid;
}
if (!user) {
wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
GFP_KERNEL);
if (!wq->sq.sw_sq)
goto err2;
if (!wq->sq.sw_sq) {
ret = -ENOMEM;
goto free_rq_qid;
}
wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
GFP_KERNEL);
if (!wq->rq.sw_rq)
goto err3;
if (!wq->rq.sw_rq) {
ret = -ENOMEM;
goto free_sw_sq;
}
}
/*
@ -157,15 +163,23 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
*/
wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
if (!wq->rq.rqt_hwaddr)
goto err4;
if (!wq->rq.rqt_hwaddr) {
ret = -ENOMEM;
goto free_sw_rq;
}
if (user) {
if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq))
goto err5;
ret = alloc_oc_sq(rdev, &wq->sq);
if (ret)
goto free_hwaddr;
ret = alloc_host_sq(rdev, &wq->sq);
if (ret)
goto free_sq;
} else
if (alloc_host_sq(rdev, &wq->sq))
goto err5;
ret = alloc_host_sq(rdev, &wq->sq);
if (ret)
goto free_hwaddr;
memset(wq->sq.queue, 0, wq->sq.memsize);
dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
@ -173,7 +187,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
wq->rq.memsize, &(wq->rq.dma_addr),
GFP_KERNEL);
if (!wq->rq.queue)
goto err6;
goto free_sq;
PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
__func__, wq->sq.queue,
(unsigned long long)virt_to_phys(wq->sq.queue),
@ -201,7 +215,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
skb = alloc_skb(wr_len, GFP_KERNEL);
if (!skb) {
ret = -ENOMEM;
goto err7;
goto free_dma;
}
set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
@ -266,33 +280,33 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
ret = c4iw_ofld_send(rdev, skb);
if (ret)
goto err7;
goto free_dma;
ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
if (ret)
goto err7;
goto free_dma;
PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
__func__, wq->sq.qid, wq->rq.qid, wq->db,
(unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
return 0;
err7:
free_dma:
dma_free_coherent(&(rdev->lldi.pdev->dev),
wq->rq.memsize, wq->rq.queue,
dma_unmap_addr(&wq->rq, mapping));
err6:
free_sq:
dealloc_sq(rdev, &wq->sq);
err5:
free_hwaddr:
c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
err4:
free_sw_rq:
kfree(wq->rq.sw_rq);
err3:
free_sw_sq:
kfree(wq->sq.sw_sq);
err2:
free_rq_qid:
c4iw_put_qpid(rdev, wq->rq.qid, uctx);
err1:
free_sq_qid:
c4iw_put_qpid(rdev, wq->sq.qid, uctx);
return -ENOMEM;
return ret;
}
static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,

View File

@ -1,3 +1,3 @@
obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o

View File

@ -0,0 +1,688 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/***********************************************************/
/*This file support the handling of the Alias GUID feature. */
/***********************************************************/
#include <rdma/ib_mad.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_sa.h>
#include <rdma/ib_pack.h>
#include <linux/mlx4/cmd.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <rdma/ib_user_verbs.h>
#include <linux/delay.h>
#include "mlx4_ib.h"
/*
The driver keeps the current state of all guids, as they are in the HW.
Whenever we receive an smp mad GUIDInfo record, the data will be cached.
*/
struct mlx4_alias_guid_work_context {
u8 port;
struct mlx4_ib_dev *dev ;
struct ib_sa_query *sa_query;
struct completion done;
int query_id;
struct list_head list;
int block_num;
};
struct mlx4_next_alias_guid_work {
u8 port;
u8 block_num;
struct mlx4_sriov_alias_guid_info_rec_det rec_det;
};
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
u8 port_num, u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
int port_index = port_num - 1;
if (!mlx4_is_master(dev->dev))
return;
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* The location of the specific index starts from bit number 4
* until bit num 11 */
if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_slaves) {
pr_debug("The last slave: %d\n", slave_id);
return;
}
/* cache the guid: */
memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
&p_data[i * GUID_REC_SIZE],
GUID_REC_SIZE);
} else
pr_debug("Guid number: %d in block: %d"
" was not updated\n", i, block_num);
}
}
static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
{
if (index >= NUM_ALIAS_GUID_PER_PORT) {
pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
return (__force __be64) ((u64) 0xFFFFFFFFFFFFFFFFUL);
}
return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
}
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
{
return IB_SA_COMP_MASK(4 + index);
}
/*
* Whenever new GUID is set/unset (guid table change) create event and
* notify the relevant slave (master also should be notified).
* If the GUID value is not as we have in the cache the slave will not be
* updated; in this case it waits for the smp_snoop or the port management
* event to call the function and to update the slave.
* block_number - the index of the block (16 blocks available)
* port_number - 1 or 2
*/
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num, u8 port_num,
u8 *p_data)
{
int i;
u64 guid_indexes;
int slave_id;
enum slave_port_state new_state;
enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag;
enum slave_port_gen_event gen_event;
if (!mlx4_is_master(dev->dev))
return;
guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
ports_guid[port_num - 1].
all_rec_per_port[block_num].guid_indexes);
pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
/*calculate the slaves and notify them*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
/* the location of the specific index runs from bits 4..11 */
if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_slaves)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
(NUM_ALIAS_GUID_IN_REC * block_num) + i);
/*
* Check if guid is not the same as in the cache,
* If it is different, wait for the snoop_smp or the port mgmt
* change event to update the slave on its port state change
*/
if (tmp_cur_ag != form_cache_ag)
continue;
mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
/*2 cases: Valid GUID, and Invalid Guid*/
if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
&gen_event);
pr_debug("slave: %d, port: %d prev_port_state: %d,"
" new_port_state: %d, gen_event: %d\n",
slave_id, port_num, prev_state, new_state, gen_event);
if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
}
} else { /* request to invalidate GUID */
set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
&gen_event);
pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
slave_id, port_num);
mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
MLX4_PORT_CHANGE_SUBTYPE_DOWN);
}
}
}
static void aliasguid_query_handler(int status,
struct ib_sa_guidinfo_rec *guid_rec,
void *context)
{
struct mlx4_ib_dev *dev;
struct mlx4_alias_guid_work_context *cb_ctx = context;
u8 port_index ;
int i;
struct mlx4_sriov_alias_guid_info_rec_det *rec;
unsigned long flags, flags1;
if (!context)
return;
dev = cb_ctx->dev;
port_index = cb_ctx->port - 1;
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[cb_ctx->block_num];
if (status) {
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
pr_debug("(port: %d) failed: status = %d\n",
cb_ctx->port, status);
goto out;
}
if (guid_rec->block_num != cb_ctx->block_num) {
pr_err("block num mismatch: %d != %d\n",
cb_ctx->block_num, guid_rec->block_num);
goto out;
}
pr_debug("lid/port: %d/%d, block_num: %d\n",
be16_to_cpu(guid_rec->lid), cb_ctx->port,
guid_rec->block_num);
rec = &dev->sriov.alias_guid.ports_guid[port_index].
all_rec_per_port[guid_rec->block_num];
rec->status = MLX4_GUID_INFO_STATUS_SET;
rec->method = MLX4_GUID_INFO_RECORD_SET;
for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 tmp_cur_ag;
tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
/* check if the SM didn't assign one of the records.
* if it didn't, if it was not sysadmin request:
* ask the SM to give a new GUID, (instead of the driver request).
*/
if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
"block_num: %d was declined by SM, "
"ownership by %d (0 = driver, 1=sysAdmin,"
" 2=None)\n", __func__, i,
guid_rec->block_num, rec->ownership);
if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
/* if it is driver assign, asks for new GUID from SM*/
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
MLX4_NOT_SET_GUID;
/* Mark the record as not assigned, and let it
* be sent again in the next work sched.*/
rec->status = MLX4_GUID_INFO_STATUS_IDLE;
rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
} else {
/* properly assigned record. */
/* We save the GUID we just got from the SM in the
* admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */
if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
/* the sysadmin assignment failed.*/
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
" admin guid after SysAdmin "
"configuration. "
"Record num %d in block_num:%d "
"was declined by SM, "
"new val(0x%llx) was kept\n",
__func__, i,
guid_rec->block_num,
be64_to_cpu(*(__be64 *) &
rec->all_recs[i * GUID_REC_SIZE]));
} else {
memcpy(&rec->all_recs[i * GUID_REC_SIZE],
&guid_rec->guid_info_list[i * GUID_REC_SIZE],
GUID_REC_SIZE);
}
}
}
/*
The func is call here to close the cases when the
sm doesn't send smp, so in the sa response the driver
notifies the slave.
*/
mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
cb_ctx->port,
guid_rec->guid_info_list);
out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down)
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index].
alias_guid_work, 0);
if (cb_ctx->sa_query) {
list_del(&cb_ctx->list);
kfree(cb_ctx);
} else
complete(&cb_ctx->done);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
{
int i;
u64 cur_admin_val;
ib_sa_comp_mask comp_mask = 0;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
= MLX4_GUID_INFO_STATUS_IDLE;
dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
= MLX4_GUID_INFO_RECORD_SET;
/* calculate the comp_mask for that record.*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
cur_admin_val =
*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
/*
check the admin value: if it's for delete (~00LL) or
it is the first guid of the first record (hw guid) or
the records is not in ownership of the sysadmin and the sm doesn't
need to assign GUIDs, then don't put it up for assignment.
*/
if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
(!index && !i) ||
MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
ports_guid[port - 1].all_rec_per_port[index].ownership)
continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes = comp_mask;
}
static int set_guid_rec(struct ib_device *ibdev,
u8 port, int index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{
int err;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_sa_guidinfo_rec guid_info_rec;
ib_sa_comp_mask comp_mask;
struct ib_port_attr attr;
struct mlx4_alias_guid_work_context *callback_context;
unsigned long resched_delay, flags, flags1;
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
err, port);
return err;
}
/*check the port was configured by the sm, otherwise no need to send */
if (attr.state != IB_PORT_ACTIVE) {
pr_debug("port %d not active...rescheduling\n", port);
resched_delay = 5 * HZ;
err = -EAGAIN;
goto new_schedule;
}
callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
if (!callback_context) {
err = -ENOMEM;
resched_delay = HZ * 5;
goto new_schedule;
}
callback_context->port = port;
callback_context->dev = dev;
callback_context->block_num = index;
memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
guid_info_rec.lid = cpu_to_be16(attr.lid);
guid_info_rec.block_num = index;
memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
rec_det->guid_indexes;
init_completion(&callback_context->done);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
list_add_tail(&callback_context->list, head);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
callback_context->query_id =
ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
ibdev, port, &guid_info_rec,
comp_mask, rec_det->method, 1000,
GFP_KERNEL, aliasguid_query_handler,
callback_context,
&callback_context->sa_query);
if (callback_context->query_id < 0) {
pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
"%d. will reschedule to the next 1 sec.\n",
callback_context->query_id);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
list_del(&callback_context->list);
kfree(callback_context);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
resched_delay = 1 * HZ;
err = -EAGAIN;
goto new_schedule;
}
err = 0;
goto out;
new_schedule:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
invalidate_guid_record(dev, port, index);
if (!dev->sriov.is_going_down) {
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
&dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
resched_delay);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
out:
return err;
}
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
{
int i;
unsigned long flags, flags1;
pr_debug("port %d\n", port);
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i);
if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
/*
make sure no work waits in the queue, if the work is already
queued(not on the timer) the cancel will fail. That is not a problem
because we just want the work started.
*/
cancel_delayed_work(&dev->sriov.alias_guid.
ports_guid[port - 1].alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
&dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
/* The function returns the next record that was
* not configured (or failed to be configured) */
static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
struct mlx4_next_alias_guid_work *rec)
{
int j;
unsigned long flags;
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
MLX4_GUID_INFO_STATUS_IDLE) {
memcpy(&rec->rec_det,
&dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
rec->port = port;
rec->block_num = j;
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
MLX4_GUID_INFO_STATUS_PENDING;
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
return 0;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
}
return -ENOENT;
}
static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
int rec_index,
struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
{
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
rec_det->guid_indexes;
memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
rec_det->status;
}
static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
{
int j;
struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
IB_SA_GUIDINFO_REC_GID7;
rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
set_administratively_guid_record(dev, port, j, &rec_det);
}
}
static void alias_guid_work(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
int ret = 0;
struct mlx4_next_alias_guid_work *rec;
struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
alias_guid_work);
struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
struct mlx4_ib_sriov,
alias_guid);
struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
rec = kzalloc(sizeof *rec, GFP_KERNEL);
if (!rec) {
pr_err("alias_guid_work: No Memory\n");
return;
}
pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
if (ret) {
pr_debug("No more records to update.\n");
goto out;
}
set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
&rec->rec_det);
out:
kfree(rec);
}
void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
{
unsigned long flags, flags1;
if (!mlx4_is_master(dev->dev))
return;
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
if (!dev->sriov.is_going_down) {
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
{
int i;
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct mlx4_alias_guid_work_context *cb_ctx;
struct mlx4_sriov_alias_guid_port_rec_det *det;
struct ib_sa_query *sa_query;
unsigned long flags;
for (i = 0 ; i < dev->num_ports; i++) {
cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
det = &sriov->alias_guid.ports_guid[i];
spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
while (!list_empty(&det->cb_list)) {
cb_ctx = list_entry(det->cb_list.next,
struct mlx4_alias_guid_work_context,
list);
sa_query = cb_ctx->sa_query;
cb_ctx->sa_query = NULL;
list_del(&cb_ctx->list);
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
ib_sa_cancel_query(cb_ctx->query_id, sa_query);
wait_for_completion(&cb_ctx->done);
kfree(cb_ctx);
spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
}
spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
}
for (i = 0 ; i < dev->num_ports; i++) {
flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
}
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
}
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
char alias_wq_name[15];
int ret = 0;
int i, j, k;
union ib_gid gid;
if (!mlx4_is_master(dev->dev))
return 0;
dev->sriov.alias_guid.sa_client =
kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
if (!dev->sriov.alias_guid.sa_client)
return -ENOMEM;
ib_sa_register_client(dev->sriov.alias_guid.sa_client);
spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
for (i = 1; i <= dev->num_ports; ++i) {
if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
ret = -EFAULT;
goto err_unregister;
}
}
for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0,
sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
/*Check if the SM doesn't need to assign the GUIDs*/
for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
if (mlx4_ib_sm_guid_assign) {
dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].
ownership = MLX4_GUID_DRIVER_ASSIGN;
continue;
}
dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
ownership = MLX4_GUID_NONE_ASSIGN;
/*mark each val as it was deleted,
till the sysAdmin will give it valid val*/
for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
*(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
}
}
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
/*prepare the records, set them to be allocated by sm*/
for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j);
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
if (mlx4_ib_sm_guid_assign)
set_all_slaves_guids(dev, i);
snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
create_singlethread_workqueue(alias_wq_name);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;
}
INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
alias_guid_work);
}
return 0;
err_thread:
for (--i; i >= 0; i--) {
destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
dev->sriov.alias_guid.ports_guid[i].wq = NULL;
}
err_unregister:
ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
kfree(dev->sriov.alias_guid.sa_client);
dev->sriov.alias_guid.sa_client = NULL;
pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
return ret;
}

View File

@ -0,0 +1,437 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <rdma/ib_mad.h>
#include <linux/mlx4/cmd.h>
#include <linux/rbtree.h>
#include <linux/idr.h>
#include <rdma/ib_cm.h>
#include "mlx4_ib.h"
#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
struct id_map_entry {
struct rb_node node;
u32 sl_cm_id;
u32 pv_cm_id;
int slave_id;
int scheduled_delete;
struct mlx4_ib_dev *dev;
struct list_head list;
struct delayed_work timeout;
};
struct cm_generic_msg {
struct ib_mad_hdr hdr;
__be32 local_comm_id;
__be32 remote_comm_id;
};
struct cm_req_msg {
unsigned char unused[0x60];
union ib_gid primary_path_sgid;
};
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->local_comm_id = cpu_to_be32(cm_id);
}
static u32 get_local_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->local_comm_id);
}
static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
msg->remote_comm_id = cpu_to_be32(cm_id);
}
static u32 get_remote_comm_id(struct ib_mad *mad)
{
struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
return be32_to_cpu(msg->remote_comm_id);
}
static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
{
struct cm_req_msg *msg = (struct cm_req_msg *)mad;
return msg->primary_path_sgid;
}
/* Lock should be taken before called */
static struct id_map_entry *
id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
struct rb_node *node = sl_id_map->rb_node;
while (node) {
struct id_map_entry *id_map_entry =
rb_entry(node, struct id_map_entry, node);
if (id_map_entry->sl_cm_id > sl_cm_id)
node = node->rb_left;
else if (id_map_entry->sl_cm_id < sl_cm_id)
node = node->rb_right;
else if (id_map_entry->slave_id > slave_id)
node = node->rb_left;
else if (id_map_entry->slave_id < slave_id)
node = node->rb_right;
else
return id_map_entry;
}
return NULL;
}
static void id_map_ent_timeout(struct work_struct *work)
{
struct delayed_work *delay = to_delayed_work(work);
struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
struct id_map_entry *db_ent, *found_ent;
struct mlx4_ib_dev *dev = ent->dev;
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
int pv_id = (int) ent->pv_cm_id;
spin_lock(&sriov->id_map_lock);
db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
if (!db_ent)
goto out;
found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
if (found_ent && found_ent == ent)
rb_erase(&found_ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, pv_id);
out:
list_del(&ent->list);
spin_unlock(&sriov->id_map_lock);
kfree(ent);
}
static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
{
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct id_map_entry *ent, *found_ent;
spin_lock(&sriov->id_map_lock);
ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
if (!ent)
goto out;
found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
if (found_ent && found_ent == ent)
rb_erase(&found_ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, pv_cm_id);
out:
spin_unlock(&sriov->id_map_lock);
}
static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
{
struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
struct id_map_entry *ent;
int slave_id = new->slave_id;
int sl_cm_id = new->sl_cm_id;
ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
if (ent) {
pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
sl_cm_id);
rb_replace_node(&ent->node, &new->node, sl_id_map);
return;
}
/* Go to the bottom of the tree */
while (*link) {
parent = *link;
ent = rb_entry(parent, struct id_map_entry, node);
if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
link = &(*link)->rb_left;
else
link = &(*link)->rb_right;
}
rb_link_node(&new->node, parent, link);
rb_insert_color(&new->node, sl_id_map);
}
static struct id_map_entry *
id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
{
int ret, id;
static int next_id;
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
if (!ent) {
mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
return ERR_PTR(-ENOMEM);
}
ent->sl_cm_id = sl_cm_id;
ent->slave_id = slave_id;
ent->scheduled_delete = 0;
ent->dev = to_mdev(ibdev);
INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
do {
spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
ret = idr_get_new_above(&sriov->pv_id_table, ent,
next_id, &id);
if (!ret) {
next_id = ((unsigned) id + 1) & MAX_ID_MASK;
ent->pv_cm_id = (u32)id;
sl_id_map_add(ibdev, ent);
}
spin_unlock(&sriov->id_map_lock);
} while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
/*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
if (!ret) {
spin_lock(&sriov->id_map_lock);
list_add_tail(&ent->list, &sriov->cm_list);
spin_unlock(&sriov->id_map_lock);
return ent;
}
/*error flow*/
kfree(ent);
mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
return ERR_PTR(-ENOMEM);
}
static struct id_map_entry *
id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
{
struct id_map_entry *ent;
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
spin_lock(&sriov->id_map_lock);
if (*pv_cm_id == -1) {
ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
if (ent)
*pv_cm_id = (int) ent->pv_cm_id;
} else
ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
spin_unlock(&sriov->id_map_lock);
return ent;
}
static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
{
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
unsigned long flags;
spin_lock_irqsave(&sriov->going_down_lock, flags);
spin_lock(&sriov->id_map_lock);
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock(&sriov->id_map_lock);
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
}
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad)
{
struct id_map_entry *id;
u32 sl_cm_id;
int pv_cm_id = -1;
sl_cm_id = get_local_comm_id(mad);
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
id = id_map_alloc(ibdev, slave_id, sl_cm_id);
if (IS_ERR(id)) {
mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
__func__, slave_id, sl_cm_id);
return PTR_ERR(id);
}
} else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
return 0;
} else {
id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
}
if (!id) {
pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
slave_id, sl_cm_id);
return -EINVAL;
}
set_local_comm_id(mad, id->pv_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
id_map_find_del(ibdev, pv_cm_id);
return 0;
}
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad)
{
u32 pv_cm_id;
struct id_map_entry *id;
if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
union ib_gid gid;
gid = gid_from_req_msg(ibdev, mad);
*slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
if (*slave < 0) {
mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
gid.global.interface_id);
return -ENOENT;
}
return 0;
}
pv_cm_id = get_remote_comm_id(mad);
id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
if (!id) {
pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
return -ENOENT;
}
*slave = id->slave_id;
set_remote_comm_id(mad, id->sl_cm_id);
if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
schedule_delayed(ibdev, id);
else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
id_map_find_del(ibdev, (int) pv_cm_id);
}
return 0;
}
void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
{
spin_lock_init(&dev->sriov.id_map_lock);
INIT_LIST_HEAD(&dev->sriov.cm_list);
dev->sriov.sl_id_map = RB_ROOT;
idr_init(&dev->sriov.pv_id_table);
idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
}
/* slave = -1 ==> all slaves */
/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
{
struct mlx4_ib_sriov *sriov = &dev->sriov;
struct rb_root *sl_id_map = &sriov->sl_id_map;
struct list_head lh;
struct rb_node *nd;
int need_flush = 1;
struct id_map_entry *map, *tmp_map;
/* cancel all delayed work queue entries */
INIT_LIST_HEAD(&lh);
spin_lock(&sriov->id_map_lock);
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave < 0 || slave == map->slave_id) {
if (map->scheduled_delete)
need_flush &= !!cancel_delayed_work(&map->timeout);
}
}
spin_unlock(&sriov->id_map_lock);
if (!need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
if (slave < 0) {
while (rb_first(sl_id_map)) {
struct id_map_entry *ent =
rb_entry(rb_first(sl_id_map),
struct id_map_entry, node);
rb_erase(&ent->node, sl_id_map);
idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
}
list_splice_init(&dev->sriov.cm_list, &lh);
} else {
/* first, move nodes belonging to slave to db remove list */
nd = rb_first(sl_id_map);
while (nd) {
struct id_map_entry *ent =
rb_entry(nd, struct id_map_entry, node);
nd = rb_next(nd);
if (ent->slave_id == slave)
list_move_tail(&ent->list, &lh);
}
/* remove those nodes from databases */
list_for_each_entry_safe(map, tmp_map, &lh, list) {
rb_erase(&map->node, sl_id_map);
idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
}
/* add remaining nodes from cm_list */
list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
if (slave == map->slave_id)
list_move_tail(&map->list, &lh);
}
}
spin_unlock(&sriov->id_map_lock);
/* free any map entries left behind due to cancel_delayed_work above */
list_for_each_entry_safe(map, tmp_map, &lh, list) {
list_del(&map->list);
kfree(map);
}
}

View File

@ -547,6 +547,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
checksum == cpu_to_be16(0xffff);
}
static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
unsigned tail, struct mlx4_cqe *cqe)
{
struct mlx4_ib_proxy_sqp_hdr *hdr;
ib_dma_sync_single_for_cpu(qp->ibqp.device,
qp->sqp_proxy_rcv[tail].map,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
wc->dlid_path_bits = 0;
return 0;
}
static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
struct mlx4_ib_qp **cur_qp,
struct ib_wc *wc)
@ -559,6 +579,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
int is_error;
u32 g_mlpath_rqpn;
u16 wqe_ctr;
unsigned tail = 0;
repoll:
cqe = next_cqe_sw(cq);
@ -634,7 +655,8 @@ repoll:
mlx4_ib_free_srq_wqe(srq, wqe_ctr);
} else {
wq = &(*cur_qp)->rq;
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
tail = wq->tail & (wq->wqe_cnt - 1);
wc->wr_id = wq->wrid[tail];
++wq->tail;
}
@ -717,6 +739,13 @@ repoll:
break;
}
if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
if ((*cur_qp)->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
}
wc->slid = be16_to_cpu(cqe->rlid);
g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
wc->src_qp = g_mlpath_rqpn & 0xffffff;

File diff suppressed because it is too large Load Diff

View File

@ -59,6 +59,10 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 1;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
DRV_VERSION " (" DRV_RELDATE ")\n";
@ -70,6 +74,8 @@ struct update_gid_work {
int port;
};
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static struct workqueue_struct *wq;
static void init_query_mad(struct ib_smp *mad)
@ -98,7 +104,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@ -133,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
props->vendor_part_id = dev->dev->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
@ -182,11 +189,12 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
}
static int ib_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
struct ib_port_attr *props, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@ -198,7 +206,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL,
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
@ -211,7 +222,10 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
props->state = out_mad->data[32] & 0xf;
props->phys_state = out_mad->data[33] >> 4;
props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
if (netw_view)
props->gid_tbl_len = out_mad->data[50];
else
props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@ -244,7 +258,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port,
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@ -270,7 +284,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
struct ib_port_attr *props, int netw_view)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@ -320,26 +334,36 @@ out:
return err;
}
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view)
{
int err;
memset(props, 0, sizeof *props);
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props) :
eth_link_query_port(ibdev, port, props);
ib_link_query_port(ibdev, port, props, netw_view) :
eth_link_query_port(ibdev, port, props, netw_view);
return err;
}
static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props)
{
/* returns host view */
return __mlx4_ib_query_port(ibdev, port, props, 0);
}
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@ -350,23 +374,38 @@ static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
in_mad->attr_mod = cpu_to_be32(port);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (mlx4_is_mfunc(dev->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw, out_mad->data + 8, 8);
if (mlx4_is_mfunc(dev->dev) && !netw_view) {
if (index) {
/* For any index > 0, return the null guid */
err = 0;
clear = 1;
goto out;
}
}
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
in_mad->attr_mod = cpu_to_be32(index / 8);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
NULL, NULL, in_mad, out_mad);
if (err)
goto out;
memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
out:
if (clear)
memset(gid->raw + 8, 0, 8);
kfree(in_mad);
kfree(out_mad);
return err;
@ -386,16 +425,17 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid)
{
if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
return __mlx4_ib_query_gid(ibdev, port, index, gid);
return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
else
return iboe_query_gid(ibdev, port, index, gid);
}
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey)
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey, int netw_view)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@ -407,7 +447,11 @@ static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
in_mad->attr_mod = cpu_to_be32(index / 32);
err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
in_mad, out_mad);
if (err)
goto out;
@ -419,6 +463,11 @@ out:
return err;
}
static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
{
return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
}
static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
struct ib_device_modify *props)
{
@ -431,6 +480,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
return 0;
if (mlx4_is_slave(to_mdev(ibdev)->dev))
return -EOPNOTSUPP;
spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
memcpy(ibdev->node_desc, props->node_desc, 64);
spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
@ -446,7 +498,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
memset(mailbox->buf, 0, 256);
memcpy(mailbox->buf, props->node_desc, 64);
mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
@ -849,6 +901,7 @@ static int init_node_data(struct mlx4_ib_dev *dev)
{
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@ -858,8 +911,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
if (mlx4_is_master(dev->dev))
mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
@ -867,10 +922,11 @@ static int init_node_data(struct mlx4_ib_dev *dev)
in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
if (err)
goto out;
dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
out:
@ -959,7 +1015,7 @@ static void update_gids_task(struct work_struct *work)
err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_NATIVE);
MLX4_CMD_WRAPPED);
if (err)
pr_warn("set port command failed\n");
else {
@ -1121,6 +1177,38 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
return NOTIFY_DONE;
}
static void init_pkeys(struct mlx4_ib_dev *ibdev)
{
int port;
int slave;
int i;
if (mlx4_is_master(ibdev->dev)) {
for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i) {
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
/* master has the identity virt2phys pkey mapping */
(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
}
}
}
/* initialize pkey cache */
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
++i)
ibdev->pkeys.phys_pkey_cache[port-1][i] =
(i) ? 0 : 0xFFFF;
}
}
}
static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
{
char name[32];
@ -1207,11 +1295,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
pr_info_once("%s", mlx4_ib_version);
if (mlx4_is_mfunc(dev)) {
pr_warn("IB not yet supported in SRIOV\n");
mlx4_foreach_non_ib_transport_port(i, dev)
num_ports++;
if (mlx4_is_mfunc(dev) && num_ports) {
dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
return NULL;
}
num_ports = 0;
mlx4_foreach_ib_transport_port(i, dev)
num_ports++;
@ -1318,10 +1410,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
@ -1357,11 +1451,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
if (mlx4_ib_mad_init(ibdev))
goto err_reg;
if (mlx4_ib_init_sriov(ibdev))
goto err_mad;
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
iboe->nb.notifier_call = mlx4_ib_netdev_event;
err = register_netdevice_notifier(&iboe->nb);
if (err)
goto err_reg;
goto err_sriov;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@ -1372,6 +1469,18 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_active = true;
if (mlx4_is_mfunc(ibdev->dev))
init_pkeys(ibdev);
/* create paravirt contexts for any VFs which are active */
if (mlx4_is_master(ibdev->dev)) {
for (j = 0; j < MLX4_MFUNC_MAX; j++) {
if (j == mlx4_master_func_num(ibdev->dev))
continue;
if (mlx4_is_slave_active(ibdev->dev, j))
do_slave_init(ibdev, j, 1);
}
}
return ibdev;
err_notif:
@ -1379,6 +1488,12 @@ err_notif:
pr_warn("failure unregistering notifier\n");
flush_workqueue(wq);
err_sriov:
mlx4_ib_close_sriov(ibdev);
err_mad:
mlx4_ib_mad_cleanup(ibdev);
err_reg:
ib_unregister_device(&ibdev->ib_dev);
@ -1407,6 +1522,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
if (ibdev->iboe.nb.notifier_call) {
@ -1428,6 +1544,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ib_dealloc_device(&ibdev->ib_dev);
}
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
struct mlx4_ib_demux_work **dm = NULL;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
if (!mlx4_is_master(dev))
return;
dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
if (!dm) {
pr_err("failed to allocate memory for tunneling qp update\n");
goto out;
}
for (i = 0; i < dev->caps.num_ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
pr_err("failed to allocate memory for tunneling qp update work struct\n");
for (i = 0; i < dev->caps.num_ports; i++) {
if (dm[i])
kfree(dm[i]);
}
goto out;
}
}
/* initialize or tear down tunnel QPs for the slave */
for (i = 0; i < dev->caps.num_ports; i++) {
INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
dm[i]->port = i + 1;
dm[i]->slave = slave;
dm[i]->do_init = do_init;
dm[i]->dev = ibdev;
spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
if (!ibdev->sriov.is_going_down)
queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
}
out:
if (dm)
kfree(dm);
return;
}
static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
enum mlx4_dev_event event, unsigned long param)
{
@ -1435,22 +1596,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int port = 0;
int p = 0;
if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
eqe = (struct mlx4_eqe *)param;
else
port = (u8)param;
if (port > ibdev->num_ports)
return;
p = (int) param;
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
return;
if (mlx4_is_master(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
mlx4_ib_invalidate_all_guid_record(ibdev, p);
}
ibev.event = IB_EVENT_PORT_ACTIVE;
break;
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
return;
ibev.event = IB_EVENT_PORT_ERR;
break;
@ -1469,7 +1636,21 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
ew->ib_dev = ibdev;
handle_port_mgmt_change_event(&ew->work);
/* need to queue only for port owner, which uses GEN_EQE */
if (mlx4_is_master(dev))
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
return;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
do_slave_init(ibdev, p, 1);
return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
return;
default:
@ -1477,7 +1658,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
ibev.device = ibdev_ptr;
ibev.element.port_num = port;
ibev.element.port_num = (u8) p;
ib_dispatch_event(&ibev);
}
@ -1497,18 +1678,28 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
if (err)
goto clean_wq;
err = mlx4_register_interface(&mlx4_ib_interface);
if (err) {
destroy_workqueue(wq);
return err;
}
if (err)
goto clean_mcg;
return 0;
clean_mcg:
mlx4_ib_mcg_destroy();
clean_wq:
destroy_workqueue(wq);
return err;
}
static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
destroy_workqueue(wq);
}

File diff suppressed because it is too large Load Diff

View File

@ -37,9 +37,12 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_sa.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@ -62,6 +65,9 @@ enum {
#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
/*module param to indicate if SM assigns the alias_GUID*/
extern int mlx4_ib_sm_guid_assign;
struct mlx4_ib_ucontext {
struct ib_ucontext ibucontext;
struct mlx4_uar uar;
@ -133,8 +139,10 @@ struct mlx4_ib_wq {
};
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = 1 << 0,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
MLX4_IB_SRIOV_SQP = 1 << 31,
};
struct mlx4_ib_gid_entry {
@ -144,6 +152,80 @@ struct mlx4_ib_gid_entry {
u8 port;
};
enum mlx4_ib_qp_type {
/*
* IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
* here (and in that order) since the MAD layer uses them as
* indices into a 2-entry table.
*/
MLX4_IB_QPT_SMI = IB_QPT_SMI,
MLX4_IB_QPT_GSI = IB_QPT_GSI,
MLX4_IB_QPT_RC = IB_QPT_RC,
MLX4_IB_QPT_UC = IB_QPT_UC,
MLX4_IB_QPT_UD = IB_QPT_UD,
MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
MLX4_IB_QPT_PROXY_SMI = 1 << 17,
MLX4_IB_QPT_PROXY_GSI = 1 << 18,
MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
MLX4_IB_QPT_TUN_SMI = 1 << 20,
MLX4_IB_QPT_TUN_GSI = 1 << 21,
};
#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
enum mlx4_ib_mad_ifc_flags {
MLX4_MAD_IFC_IGNORE_MKEY = 1,
MLX4_MAD_IFC_IGNORE_BKEY = 2,
MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
MLX4_MAD_IFC_IGNORE_BKEY),
MLX4_MAD_IFC_NET_VIEW = 4,
};
enum {
MLX4_NUM_TUNNEL_BUFS = 256,
};
struct mlx4_ib_tunnel_header {
struct mlx4_av av;
__be32 remote_qpn;
__be32 qkey;
__be16 vlan;
u8 mac[6];
__be16 pkey_index;
u8 reserved[6];
};
struct mlx4_ib_buf {
void *addr;
dma_addr_t map;
};
struct mlx4_rcv_tunnel_hdr {
__be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
* 0x0 - no vlan was in the packet
* 0x01 - C-VLAN was in the packet */
u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
u8 reserved;
__be16 pkey_index;
__be16 sl_vid;
__be16 slid_mac_47_32;
__be32 mac_31_0;
};
struct mlx4_ib_proxy_sqp_hdr {
struct ib_grh grh;
struct mlx4_rcv_tunnel_hdr tun;
} __packed;
struct mlx4_ib_qp {
struct ib_qp ibqp;
struct mlx4_qp mqp;
@ -159,6 +241,7 @@ struct mlx4_ib_qp {
int sq_spare_wqes;
struct mlx4_ib_wq sq;
enum mlx4_ib_qp_type mlx4_ib_qp_type;
struct ib_umem *umem;
struct mlx4_mtt mtt;
int buf_size;
@ -174,6 +257,8 @@ struct mlx4_ib_qp {
int mlx_type;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
};
struct mlx4_ib_srq {
@ -196,6 +281,138 @@ struct mlx4_ib_ah {
union mlx4_ext_av av;
};
/****************************************/
/* alias guid support */
/****************************************/
#define NUM_PORT_ALIAS_GUID 2
#define NUM_ALIAS_GUID_IN_REC 8
#define NUM_ALIAS_GUID_REC_IN_PORT 16
#define GUID_REC_SIZE 8
#define NUM_ALIAS_GUID_PER_PORT 128
#define MLX4_NOT_SET_GUID (0x00LL)
#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
};
enum mlx4_guid_alias_rec_ownership {
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
};
struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
};
struct mlx4_sriov_alias_guid_port_rec_det {
struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
u8 port;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
};
struct mlx4_sriov_alias_guid {
struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
spinlock_t ag_work_lock;
struct ib_sa_client *sa_client;
};
struct mlx4_ib_demux_work {
struct work_struct work;
struct mlx4_ib_dev *dev;
int slave;
int do_init;
u8 port;
};
struct mlx4_ib_tun_tx_buf {
struct mlx4_ib_buf buf;
struct ib_ah *ah;
};
struct mlx4_ib_demux_pv_qp {
struct ib_qp *qp;
enum ib_qp_type proxy_qpt;
struct mlx4_ib_buf *ring;
struct mlx4_ib_tun_tx_buf *tx_ring;
spinlock_t tx_lock;
unsigned tx_ix_head;
unsigned tx_ix_tail;
};
enum mlx4_ib_demux_pv_state {
DEMUX_PV_STATE_DOWN,
DEMUX_PV_STATE_STARTING,
DEMUX_PV_STATE_ACTIVE,
DEMUX_PV_STATE_DOWNING,
};
struct mlx4_ib_demux_pv_ctx {
int port;
int slave;
enum mlx4_ib_demux_pv_state state;
int has_smi;
struct ib_device *ib_dev;
struct ib_cq *cq;
struct ib_pd *pd;
struct ib_mr *mr;
struct work_struct work;
struct workqueue_struct *wq;
struct mlx4_ib_demux_pv_qp qp[2];
};
struct mlx4_ib_demux_ctx {
struct ib_device *ib_dev;
int port;
struct workqueue_struct *wq;
struct workqueue_struct *ud_wq;
spinlock_t ud_lock;
__be64 subnet_prefix;
__be64 guid_cache[128];
struct mlx4_ib_dev *dev;
/* the following lock protects both mcg_table and mcg_mgid0_list */
struct mutex mcg_table_lock;
struct rb_root mcg_table;
struct list_head mcg_mgid0_list;
struct workqueue_struct *mcg_wq;
struct mlx4_ib_demux_pv_ctx **tun;
atomic_t tid;
int flushing; /* flushing the work queue */
};
struct mlx4_ib_sriov {
struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
/* when using this spinlock you should use "irq" because
* it may be called from interrupt context.*/
spinlock_t going_down_lock;
int is_going_down;
struct mlx4_sriov_alias_guid alias_guid;
/* CM paravirtualization fields */
struct list_head cm_list;
spinlock_t id_map_lock;
struct rb_root sl_id_map;
struct idr pv_id_table;
};
struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
@ -203,6 +420,42 @@ struct mlx4_ib_iboe {
union ib_gid gid_table[MLX4_MAX_PORTS][128];
};
struct pkey_mgt {
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
struct list_head pkey_port_list[MLX4_MFUNC_MAX];
struct kobject *device_parent[MLX4_MFUNC_MAX];
};
struct mlx4_ib_iov_sysfs_attr {
void *ctx;
struct kobject *kobj;
unsigned long data;
u32 entry_num;
char name[15];
struct device_attribute dentry;
struct device *dev;
};
struct mlx4_ib_iov_sysfs_attr_ar {
struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
};
struct mlx4_ib_iov_port {
char name[100];
u8 num;
struct mlx4_ib_dev *dev;
struct list_head list;
struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
struct ib_port_attr attr;
struct kobject *cur_port;
struct kobject *admin_alias_parent;
struct kobject *gids_parent;
struct kobject *pkeys_parent;
struct kobject *mcgs_parent;
struct mlx4_ib_iov_sysfs_attr mcg_dentry;
};
struct mlx4_ib_dev {
struct ib_device ib_dev;
struct mlx4_dev *dev;
@ -216,6 +469,7 @@ struct mlx4_ib_dev {
struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
struct ib_ah *sm_ah[MLX4_MAX_PORTS];
spinlock_t sm_lock;
struct mlx4_ib_sriov sriov;
struct mutex cap_mask_mutex;
bool ib_active;
@ -223,6 +477,11 @@ struct mlx4_ib_dev {
int counters[MLX4_MAX_PORTS];
int *eq_table;
int eq_added;
struct kobject *iov_parent;
struct kobject *ports_parent;
struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
struct pkey_mgt pkeys;
};
struct ib_event_work {
@ -231,6 +490,13 @@ struct ib_event_work {
struct mlx4_eqe ib_eqe;
};
struct mlx4_ib_qp_tunnel_init_attr {
struct ib_qp_init_attr init_attr;
int slave;
enum ib_qp_type proxy_qp_type;
u8 port;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@ -300,6 +566,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
return container_of(ibah, struct mlx4_ib_ah, ibah);
}
int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
struct mlx4_db *db);
void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@ -356,7 +625,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
struct ib_recv_wr **bad_wr);
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
void *in_mad, void *response_mad);
int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@ -371,6 +640,13 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props, int netw_view);
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
u16 *pkey, int netw_view);
int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *gid, int netw_view);
int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
u8 *mac, int *is_mcast, u8 port);
@ -385,10 +661,69 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
return !!(ah->av.ib.g_slid & 0x80);
}
int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
int mlx4_ib_mcg_init(void);
void mlx4_ib_mcg_destroy(void);
int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *sa_mad);
int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
struct ib_sa_mad *mad);
int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
union ib_gid *gid);
void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
enum ib_event_type type);
void mlx4_ib_tunnels_update_work(struct work_struct *work);
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad);
int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
struct ib_mad *mad);
int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
struct ib_mad *mad);
void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
/* alias guid support */
void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
int block_num,
u8 port_num, u8 *p_data);
void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
int block_num, u8 port_num,
u8 *p_data);
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
__be64 mlx4_ib_gen_node_guid(void);
#endif /* MLX4_IB_H */

View File

@ -38,6 +38,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <linux/mlx4/qp.h>
@ -110,16 +111,62 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
return container_of(mqp, struct mlx4_ib_sqp, qp);
}
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
if (!mlx4_is_master(dev->dev))
return 0;
return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
8 * MLX4_MFUNC_MAX;
}
static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
int proxy_sqp = 0;
int real_sqp = 0;
int i;
/* PPF or Native -- real SQP */
real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
if (real_sqp)
return 1;
/* VF or PF -- proxy SQP */
if (mlx4_is_mfunc(dev->dev)) {
for (i = 0; i < dev->dev->caps.num_ports; i++) {
if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
proxy_sqp = 1;
break;
}
}
}
return proxy_sqp;
}
/* used for INIT/CLOSE port logic */
static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
{
return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
int proxy_qp0 = 0;
int real_qp0 = 0;
int i;
/* PPF or Native -- real QP0 */
real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
if (real_qp0)
return 1;
/* VF or PF -- proxy QP0 */
if (mlx4_is_mfunc(dev->dev)) {
for (i = 0; i < dev->dev->caps.num_ports; i++) {
if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
proxy_qp0 = 1;
break;
}
}
}
return proxy_qp0;
}
static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@ -270,7 +317,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
}
}
static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
* UD WQEs must have a datagram segment.
@ -279,19 +326,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
* header and space for the ICRC).
*/
switch (type) {
case IB_QPT_UD:
case MLX4_IB_QPT_UD:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) +
((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
case IB_QPT_UC:
case MLX4_IB_QPT_PROXY_SMI_OWNER:
case MLX4_IB_QPT_PROXY_SMI:
case MLX4_IB_QPT_PROXY_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg) + 64;
case MLX4_IB_QPT_TUN_SMI_OWNER:
case MLX4_IB_QPT_TUN_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_datagram_seg);
case MLX4_IB_QPT_UC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_RC:
case MLX4_IB_QPT_RC:
return sizeof (struct mlx4_wqe_ctrl_seg) +
sizeof (struct mlx4_wqe_atomic_seg) +
sizeof (struct mlx4_wqe_raddr_seg);
case IB_QPT_SMI:
case IB_QPT_GSI:
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
return sizeof (struct mlx4_wqe_ctrl_seg) +
ALIGN(MLX4_IB_UD_HEADER_SIZE +
DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@ -345,7 +402,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
}
static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
enum ib_qp_type type, struct mlx4_ib_qp *qp)
enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
{
int s;
@ -360,7 +417,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* For MLX transport we need 2 extra S/G entries:
* one for the header and one for the checksum at the end
*/
if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI ||
type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
return -EINVAL;
@ -404,7 +462,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
*/
if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
qp->sq_signal_bits && BITS_PER_LONG == 64 &&
type != IB_QPT_SMI && type != IB_QPT_GSI)
type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
!(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
qp->sq.wqe_shift = ilog2(64);
else
qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@ -476,6 +536,54 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
return 0;
}
static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
{
int i;
qp->sqp_proxy_rcv =
kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
GFP_KERNEL);
if (!qp->sqp_proxy_rcv)
return -ENOMEM;
for (i = 0; i < qp->rq.wqe_cnt; i++) {
qp->sqp_proxy_rcv[i].addr =
kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
GFP_KERNEL);
if (!qp->sqp_proxy_rcv[i].addr)
goto err;
qp->sqp_proxy_rcv[i].map =
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
}
return 0;
err:
while (i > 0) {
--i;
ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
kfree(qp->sqp_proxy_rcv[i].addr);
}
kfree(qp->sqp_proxy_rcv);
qp->sqp_proxy_rcv = NULL;
return -ENOMEM;
}
static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
{
int i;
for (i = 0; i < qp->rq.wqe_cnt; i++) {
ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
kfree(qp->sqp_proxy_rcv[i].addr);
}
kfree(qp->sqp_proxy_rcv);
}
static int qp_has_rq(struct ib_qp_init_attr *attr)
{
if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
@ -486,10 +594,67 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
{
int qpn;
int err;
struct mlx4_ib_sqp *sqp;
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
if (mlx4_is_mfunc(dev->dev) &&
(!mlx4_is_master(dev->dev) ||
!(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
if (init_attr->qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_PROXY_GSI;
else if (mlx4_is_master(dev->dev))
qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
else
qp_type = MLX4_IB_QPT_PROXY_SMI;
}
qpn = sqpn;
/* add extra sg entry for tunneling */
init_attr->cap.max_recv_sge++;
} else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
container_of(init_attr,
struct mlx4_ib_qp_tunnel_init_attr, init_attr);
if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
tnl_init->proxy_qp_type != IB_QPT_GSI) ||
!mlx4_is_master(dev->dev))
return -EINVAL;
if (tnl_init->proxy_qp_type == IB_QPT_GSI)
qp_type = MLX4_IB_QPT_TUN_GSI;
else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
else
qp_type = MLX4_IB_QPT_TUN_SMI;
/* we are definitely in the PPF here, since we are creating
* tunnel QPs. base_tunnel_sqpn is therefore valid. */
qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
+ tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
sqpn = qpn;
}
if (!*caller_qp) {
if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
(qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
if (!sqp)
return -ENOMEM;
qp = &sqp->qp;
} else {
qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
if (!qp)
return -ENOMEM;
}
} else
qp = *caller_qp;
qp->mlx4_ib_qp_type = qp_type;
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
@ -550,7 +715,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
qp->flags |= MLX4_IB_QP_LSO;
err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
if (err)
goto err;
@ -586,7 +751,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
if (sqpn) {
qpn = sqpn;
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
if (alloc_proxy_bufs(pd->device, qp)) {
err = -ENOMEM;
goto err_wrid;
}
}
} else {
/* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
* BlueFlame setup flow wrongly causes VLAN insertion. */
@ -595,7 +766,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
if (err)
goto err_wrid;
goto err_proxy;
}
err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
@ -613,13 +784,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
qp->mqp.event = mlx4_ib_qp_event;
if (!*caller_qp)
*caller_qp = qp;
return 0;
err_qpn:
if (!sqpn)
mlx4_qp_release_range(dev->dev, qpn, 1);
err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
err_wrid:
if (pd->uobject) {
if (qp_has_rq(init_attr))
@ -643,6 +817,8 @@ err_db:
mlx4_db_free(dev->dev, &qp->db);
err:
if (!*caller_qp)
kfree(qp);
return err;
}
@ -755,7 +931,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_qp_free(dev->dev, &qp->mqp);
if (!is_sqp(dev, qp))
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
@ -768,6 +944,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
} else {
kfree(qp->sq.wrid);
kfree(qp->rq.wrid);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
free_proxy_bufs(&dev->ib_dev, qp);
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
if (qp->rq.wqe_cnt)
mlx4_db_free(dev->dev, &qp->db);
@ -776,25 +955,46 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
del_gid_entries(qp);
}
static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
{
/* Native or PPF */
if (!mlx4_is_mfunc(dev->dev) ||
(mlx4_is_master(dev->dev) &&
attr->create_flags & MLX4_IB_SRIOV_SQP)) {
return dev->dev->phys_caps.base_sqpn +
(attr->qp_type == IB_QPT_SMI ? 0 : 2) +
attr->port_num - 1;
}
/* PF or VF -- creating proxies */
if (attr->qp_type == IB_QPT_SMI)
return dev->dev->caps.qp0_proxy[attr->port_num - 1];
else
return dev->dev->caps.qp1_proxy[attr->port_num - 1];
}
struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata)
{
struct mlx4_ib_sqp *sqp;
struct mlx4_ib_qp *qp;
struct mlx4_ib_qp *qp = NULL;
int err;
u16 xrcdn = 0;
/*
* We only support LSO and multicast loopback blocking, and
* only for kernel UD QPs.
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
*/
if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
return ERR_PTR(-EINVAL);
if (init_attr->create_flags &&
(udata || init_attr->qp_type != IB_QPT_UD))
(udata ||
((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type != IB_QPT_UD) ||
((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
init_attr->qp_type > IB_QPT_GSI)))
return ERR_PTR(-EINVAL);
switch (init_attr->qp_type) {
@ -810,18 +1010,17 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
/* fall through */
case IB_QPT_RC:
case IB_QPT_UC:
case IB_QPT_UD:
case IB_QPT_RAW_PACKET:
{
qp = kzalloc(sizeof *qp, GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp);
if (err) {
kfree(qp);
/* fall through */
case IB_QPT_UD:
{
err = create_qp_common(to_mdev(pd->device), pd, init_attr,
udata, 0, &qp);
if (err)
return ERR_PTR(err);
}
qp->ibqp.qp_num = qp->mqp.qpn;
qp->xrcdn = xrcdn;
@ -835,21 +1034,11 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
if (udata)
return ERR_PTR(-EINVAL);
sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
if (!sqp)
return ERR_PTR(-ENOMEM);
qp = &sqp->qp;
err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
to_mdev(pd->device)->dev->caps.sqp_start +
(init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
init_attr->port_num - 1,
qp);
if (err) {
kfree(sqp);
get_sqp_num(to_mdev(pd->device), init_attr),
&qp);
if (err)
return ERR_PTR(err);
}
qp->port = init_attr->port_num;
qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@ -884,18 +1073,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
return 0;
}
static int to_mlx4_st(enum ib_qp_type type)
static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type)
{
switch (type) {
case IB_QPT_RC: return MLX4_QP_ST_RC;
case IB_QPT_UC: return MLX4_QP_ST_UC;
case IB_QPT_UD: return MLX4_QP_ST_UD;
case IB_QPT_XRC_INI:
case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
default: return -1;
case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC;
case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC;
case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD;
case MLX4_IB_QPT_XRC_INI:
case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
case MLX4_IB_QPT_PROXY_SMI_OWNER:
case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
MLX4_QP_ST_MLX : -1);
case MLX4_IB_QPT_PROXY_SMI:
case MLX4_IB_QPT_TUN_SMI:
case MLX4_IB_QPT_PROXY_GSI:
case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
MLX4_QP_ST_UD : -1);
default: return -1;
}
}
@ -1043,7 +1241,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
return -ENOMEM;
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(ibqp->qp_type) << 16));
(to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@ -1121,13 +1319,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_PKEY_INDEX) {
if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
context->pri_path.disable_pkey_check = 0x40;
context->pri_path.pkey_index = attr->pkey_index;
optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
}
if (attr_mask & IB_QP_AV) {
if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
attr_mask & IB_QP_PORT ?
attr->port_num : qp->port))
goto out;
optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@ -1210,8 +1411,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (attr_mask & IB_QP_RQ_PSN)
context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
/* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */
if (attr_mask & IB_QP_QKEY) {
context->qkey = cpu_to_be32(attr->qkey);
if (qp->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
else {
if (mlx4_is_mfunc(dev->dev) &&
!(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
(attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
MLX4_RESERVED_QKEY_BASE) {
pr_err("Cannot use reserved QKEY"
" 0x%x (range 0xffff0000..0xffffffff"
" is reserved)\n", attr->qkey);
err = -EINVAL;
goto out;
}
context->qkey = cpu_to_be32(attr->qkey);
}
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
@ -1227,10 +1444,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
ibqp->qp_type == IB_QPT_UD ||
ibqp->qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
if (is_qp0(dev, qp))
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
else
if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI)
context->pri_path.fl = 0x80;
} else {
if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
context->pri_path.fl = 0x80;
context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
}
}
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@ -1346,7 +1570,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if ((attr_mask & IB_QP_PORT) &&
(attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
(attr->port_num == 0 || attr->port_num > dev->num_ports)) {
pr_debug("qpn 0x%x: invalid port number (%d) specified "
"for transition %d to %d. qp_type %d\n",
ibqp->qp_num, attr->port_num, cur_state,
@ -1400,6 +1624,114 @@ out:
return err;
}
static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
struct ib_device *ib_dev = &mdev->ib_dev;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
u16 pkey;
u32 qkey;
int send_size;
int header_size;
int spc;
int i;
if (wr->opcode != IB_WR_SEND)
return -EINVAL;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
send_size += wr->sg_list[i].length;
/* for proxy-qp0 sends, need to add in size of tunnel header */
/* for tunnel-qp0 sends, tunnel header is already in s/g list */
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
send_size += sizeof (struct mlx4_ib_tunnel_header);
ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
sqp->ud_header.lrh.service_level =
be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
sqp->ud_header.lrh.destination_lid =
cpu_to_be16(ah->av.ib.g_slid & 0x7f);
sqp->ud_header.lrh.source_lid =
cpu_to_be16(ah->av.ib.g_slid & 0x7f);
}
mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
/* force loopback */
mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
mlx->rlid = sqp->ud_header.lrh.destination_lid;
sqp->ud_header.lrh.virtual_lane = 0;
sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
else
sqp->ud_header.bth.destination_qpn =
cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
return -EINVAL;
sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
sqp->ud_header.immediate_present = 0;
header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
/*
* Inline data segments may not cross a 64 byte boundary. If
* our UD header is bigger than the space available up to the
* next 64 byte boundary in the WQE, use two inline data
* segments to hold the UD header.
*/
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (header_size <= spc) {
inl->byte_count = cpu_to_be32(1 << 31 | header_size);
memcpy(inl + 1, sqp->header_buf, header_size);
i = 1;
} else {
inl->byte_count = cpu_to_be32(1 << 31 | spc);
memcpy(inl + 1, sqp->header_buf, spc);
inl = (void *) (inl + 1) + spc;
memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
/*
* Need a barrier here to make sure all the data is
* visible before the byte_count field is set.
* Otherwise the HCA prefetcher could grab the 64-byte
* chunk with this inline segment and get a valid (!=
* 0xffffffff) byte count but stale data, and end up
* generating a packet with bad headers.
*
* The first inline segment's byte_count field doesn't
* need a barrier, because it comes after a
* control/MLX segment and therefore is at an offset
* of 16 mod 64.
*/
wmb();
inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
i = 2;
}
*mlx_seg_len =
ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
return 0;
}
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
@ -1418,6 +1750,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
int is_vlan = 0;
int is_grh;
u16 vlan;
int err = 0;
send_size = 0;
for (i = 0; i < wr->num_sge; ++i)
@ -1426,8 +1759,24 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid);
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
* we must use our own cache */
sgid.global.subnet_prefix =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
subnet_prefix;
sgid.global.interface_id =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
guid_cache[ah->av.ib.gid_index];
} else {
err = ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sgid);
if (err)
return err;
}
vlan = rdma_get_vlan_id(&sgid);
is_vlan = vlan < 0x1000;
}
@ -1446,8 +1795,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
sqp->ud_header.grh.flow_label =
ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
* we must use our own cache */
sqp->ud_header.grh.source_gid.global.subnet_prefix =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
subnet_prefix;
sqp->ud_header.grh.source_gid.global.interface_id =
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
guid_cache[ah->av.ib.gid_index];
} else
ib_get_cached_gid(ib_dev,
be32_to_cpu(ah->av.ib.port_pd) >> 24,
ah->av.ib.gid_index,
&sqp->ud_header.grh.source_gid);
memcpy(sqp->ud_header.grh.destination_gid.raw,
ah->av.ib.dgid, 16);
}
@ -1459,6 +1821,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
(sqp->ud_header.lrh.destination_lid ==
IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
(sqp->ud_header.lrh.service_level << 8));
if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
mlx->flags |= cpu_to_be32(0x1); /* force loopback */
mlx->rlid = sqp->ud_header.lrh.destination_lid;
}
@ -1667,6 +2031,63 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
}
static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
struct mlx4_wqe_datagram_seg *dseg,
struct ib_send_wr *wr, enum ib_qp_type qpt)
{
union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
struct mlx4_av sqp_av = {0};
int port = *((u8 *) &av->ib.port_pd) & 0x3;
/* force loopback */
sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
cpu_to_be32(0xf0000000);
memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
/* This function used only for sending on QP1 proxies */
dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
/* Use QKEY from the QP context, which is set by master */
dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
}
static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
{
struct mlx4_wqe_inline_seg *inl = wqe;
struct mlx4_ib_tunnel_header hdr;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
int spc;
int i;
memcpy(&hdr.av, &ah->av, sizeof hdr.av);
hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
spc = MLX4_INLINE_ALIGN -
((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
if (sizeof (hdr) <= spc) {
memcpy(inl + 1, &hdr, sizeof (hdr));
wmb();
inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
i = 1;
} else {
memcpy(inl + 1, &hdr, spc);
wmb();
inl->byte_count = cpu_to_be32(1 << 31 | spc);
inl = (void *) (inl + 1) + spc;
memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
wmb();
inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
i = 2;
}
*mlx_seg_len =
ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
}
static void set_mlx_icrc_seg(void *dseg)
{
u32 *t = dseg;
@ -1748,6 +2169,13 @@ static __be32 send_ieth(struct ib_send_wr *wr)
}
}
static void add_zero_len_inline(void *wqe)
{
struct mlx4_wqe_inline_seg *inl = wqe;
memset(wqe, 0, 16);
inl->byte_count = cpu_to_be32(1 << 31);
}
int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr)
{
@ -1806,9 +2234,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
wqe += sizeof *ctrl;
size = sizeof *ctrl / 16;
switch (ibqp->qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
switch (qp->mlx4_ib_qp_type) {
case MLX4_IB_QPT_RC:
case MLX4_IB_QPT_UC:
switch (wr->opcode) {
case IB_WR_ATOMIC_CMP_AND_SWP:
case IB_WR_ATOMIC_FETCH_AND_ADD:
@ -1869,7 +2297,25 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
case IB_QPT_UD:
case MLX4_IB_QPT_TUN_SMI_OWNER:
err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_TUN_SMI:
case MLX4_IB_QPT_TUN_GSI:
/* this is a UD qp used in MAD responses to slaves. */
set_datagram_seg(wqe, wr);
/* set the forced-loopback bit in the data seg av */
*(__be32 *) wqe |= cpu_to_be32(0x80000000);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
break;
case MLX4_IB_QPT_UD:
set_datagram_seg(wqe, wr);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
@ -1886,8 +2332,47 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
break;
case IB_QPT_SMI:
case IB_QPT_GSI:
case MLX4_IB_QPT_PROXY_SMI_OWNER:
if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
err = -ENOSYS;
*bad_wr = wr;
goto out;
}
err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
wqe += seglen;
size += seglen / 16;
/* to start tunnel header on a cache-line boundary */
add_zero_len_inline(wqe);
wqe += 16;
size++;
build_tunnel_header(wr, wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_PROXY_SMI:
/* don't allow QP0 sends on guests */
err = -ENOSYS;
*bad_wr = wr;
goto out;
case MLX4_IB_QPT_PROXY_GSI:
/* If we are tunneling special qps, this is a UD qp.
* In this case we first add a UD segment targeting
* the tunnel qp, and then add a header with address
* information */
set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
wqe += sizeof (struct mlx4_wqe_datagram_seg);
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
build_tunnel_header(wr, wqe, &seglen);
wqe += seglen;
size += seglen / 16;
break;
case MLX4_IB_QPT_SMI:
case MLX4_IB_QPT_GSI:
err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
if (unlikely(err)) {
*bad_wr = wr;
@ -1913,8 +2398,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
/* Add one more inline data segment for ICRC for MLX sends */
if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)) {
if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI ||
qp->mlx4_ib_qp_type &
(MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
set_mlx_icrc_seg(dseg + 1);
size += sizeof (struct mlx4_wqe_data_seg) / 16;
}
@ -2006,8 +2493,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int err = 0;
int nreq;
int ind;
int max_gs;
int i;
max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@ -2027,10 +2516,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
scat = get_recv_wqe(qp, ind);
if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
ib_dma_sync_single_for_device(ibqp->device,
qp->sqp_proxy_rcv[ind].map,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
scat->byte_count =
cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
/* use dma lkey from upper layer entry */
scat->lkey = cpu_to_be32(wr->sg_list->lkey);
scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
scat++;
max_gs--;
}
for (i = 0; i < wr->num_sge; ++i)
__set_data_seg(scat + i, wr->sg_list + i);
if (i < qp->rq.max_gs) {
if (i < max_gs) {
scat[i].byte_count = 0;
scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
scat[i].addr = 0;
@ -2225,6 +2729,10 @@ done:
if (qp->flags & MLX4_IB_QP_LSO)
qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
qp_init_attr->sq_sig_type =
qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
out:
mutex_unlock(&qp->mutex);
return err;

View File

@ -0,0 +1,794 @@
/*
* Copyright (c) 2012 Mellanox Technologies. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/*#include "core_priv.h"*/
#include "mlx4_ib.h"
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <rdma/ib_mad.h>
/*show_admin_alias_guid returns the administratively assigned value of that GUID.
* Values returned in buf parameter string:
* 0 - requests opensm to assign a value.
* ffffffffffffffff - delete this entry.
* other - value assigned by administrator.
*/
static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
return sprintf(buf, "%llx\n",
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
* Values in buf parameter string:
* 0 - requests opensm to assign a value.
* 0xffffffffffffffff - delete this entry.
* other - guid value assigned by the administrator.
*/
static ssize_t store_admin_alias_guid(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
if (0 == record_num && 0 == guid_index_in_rec) {
pr_err("GUID 0 block 0 is RO\n");
return count;
}
sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[GUID_REC_SIZE * guid_index_in_rec] =
cpu_to_be64(sysadmin_ag_val);
/* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_SET;
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
/* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count;
}
static ssize_t show_port_gid(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
union ib_gid gid;
ssize_t ret;
ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &gid, 1);
if (ret)
return ret;
ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
be16_to_cpu(((__be16 *) gid.raw)[0]),
be16_to_cpu(((__be16 *) gid.raw)[1]),
be16_to_cpu(((__be16 *) gid.raw)[2]),
be16_to_cpu(((__be16 *) gid.raw)[3]),
be16_to_cpu(((__be16 *) gid.raw)[4]),
be16_to_cpu(((__be16 *) gid.raw)[5]),
be16_to_cpu(((__be16 *) gid.raw)[6]),
be16_to_cpu(((__be16 *) gid.raw)[7]));
return ret;
}
static ssize_t show_phys_port_pkey(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u16 pkey;
ssize_t ret;
ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
mlx4_ib_iov_dentry->entry_num, &pkey, 1);
if (ret)
return ret;
return sprintf(buf, "0x%04x\n", pkey);
}
#define DENTRY_REMOVE(_dentry) \
do { \
sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
} while (0);
static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
char *_name, struct kobject *_kobj,
ssize_t (*show)(struct device *dev,
struct device_attribute *attr,
char *buf),
ssize_t (*store)(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
)
{
int ret = 0;
struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
vdentry->ctx = _ctx;
vdentry->dentry.show = show;
vdentry->dentry.store = store;
sysfs_attr_init(&vdentry->dentry.attr);
vdentry->dentry.attr.name = vdentry->name;
vdentry->dentry.attr.mode = 0;
vdentry->kobj = _kobj;
snprintf(vdentry->name, 15, "%s", _name);
if (vdentry->dentry.store)
vdentry->dentry.attr.mode |= S_IWUSR;
if (vdentry->dentry.show)
vdentry->dentry.attr.mode |= S_IRUGO;
ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
if (ret) {
pr_err("failed to create %s\n", vdentry->dentry.attr.name);
vdentry->ctx = NULL;
return ret;
}
return ret;
}
int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr)
{
struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
int ret;
ret = sysfs_create_file(port->mcgs_parent, attr);
if (ret)
pr_err("failed to create %s\n", attr->name);
return ret;
}
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr)
{
struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
sysfs_remove_file(port->mcgs_parent, attr);
}
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
char buff[10];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
goto err;
port = &device->iov_ports[port_num - 1];
port->dev = device;
port->num = port_num;
/* Directory structure:
* iov -
* port num -
* admin_guids
* gids (operational)
* mcg_table
*/
port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
GFP_KERNEL);
if (!port->dentr_ar) {
ret = -ENOMEM;
goto err;
}
sprintf(buff, "%d", port_num);
port->cur_port = kobject_create_and_add(buff,
kobject_get(device->ports_parent));
if (!port->cur_port) {
ret = -ENOMEM;
goto kobj_create_err;
}
/* admin GUIDs */
port->admin_alias_parent = kobject_create_and_add("admin_guids",
kobject_get(port->cur_port));
if (!port->admin_alias_parent) {
ret = -ENOMEM;
goto err_admin_guids;
}
for (i = 0 ; i < attr.gid_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[i].entry_num = i;
ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
buff, port->admin_alias_parent,
show_admin_alias_guid, store_admin_alias_guid);
if (ret)
goto err_admin_alias_parent;
}
/* gids subdirectory (operational gids) */
port->gids_parent = kobject_create_and_add("gids",
kobject_get(port->cur_port));
if (!port->gids_parent) {
ret = -ENOMEM;
goto err_gids;
}
for (i = 0 ; i < attr.gid_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
ret = create_sysfs_entry(port,
&port->dentr_ar->dentries[attr.gid_tbl_len + i],
buff,
port->gids_parent, show_port_gid, NULL);
if (ret)
goto err_gids_parent;
}
/* physical port pkey table */
port->pkeys_parent =
kobject_create_and_add("pkeys", kobject_get(port->cur_port));
if (!port->pkeys_parent) {
ret = -ENOMEM;
goto err_pkeys;
}
for (i = 0 ; i < attr.pkey_tbl_len; i++) {
sprintf(buff, "%d", i);
port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
ret = create_sysfs_entry(port,
&port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
buff, port->pkeys_parent,
show_phys_port_pkey, NULL);
if (ret)
goto err_pkeys_parent;
}
/* MCGs table */
port->mcgs_parent =
kobject_create_and_add("mcgs", kobject_get(port->cur_port));
if (!port->mcgs_parent) {
ret = -ENOMEM;
goto err_mcgs;
}
return 0;
err_mcgs:
kobject_put(port->cur_port);
err_pkeys_parent:
kobject_put(port->pkeys_parent);
err_pkeys:
kobject_put(port->cur_port);
err_gids_parent:
kobject_put(port->gids_parent);
err_gids:
kobject_put(port->cur_port);
err_admin_alias_parent:
kobject_put(port->admin_alias_parent);
err_admin_guids:
kobject_put(port->cur_port);
kobject_put(port->cur_port); /* once more for create_and_add buff */
kobj_create_err:
kobject_put(device->ports_parent);
kfree(port->dentr_ar);
err:
pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
port_num, ret);
return ret;
}
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
char base_name[9];
/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
strlcpy(name, pci_name(dev->dev->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
}
struct mlx4_port {
struct kobject kobj;
struct mlx4_ib_dev *dev;
struct attribute_group pkey_group;
struct attribute_group gid_group;
u8 port_num;
int slave;
};
static void mlx4_port_release(struct kobject *kobj)
{
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
struct attribute *a;
int i;
for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
kfree(a);
kfree(p->pkey_group.attrs);
for (i = 0; (a = p->gid_group.attrs[i]); ++i)
kfree(a);
kfree(p->gid_group.attrs);
kfree(p);
}
struct port_attribute {
struct attribute attr;
ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
const char *buf, size_t count);
};
static ssize_t port_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
if (!port_attr->show)
return -EIO;
return port_attr->show(p, port_attr, buf);
}
static ssize_t port_attr_store(struct kobject *kobj,
struct attribute *attr,
const char *buf, size_t size)
{
struct port_attribute *port_attr =
container_of(attr, struct port_attribute, attr);
struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
if (!port_attr->store)
return -EIO;
return port_attr->store(p, port_attr, buf, size);
}
static const struct sysfs_ops port_sysfs_ops = {
.show = port_attr_show,
.store = port_attr_store,
};
static struct kobj_type port_type = {
.release = mlx4_port_release,
.sysfs_ops = &port_sysfs_ops,
};
struct port_table_attribute {
struct port_attribute attr;
char name[8];
int index;
};
static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
char *buf)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
ssize_t ret = -ENODEV;
if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
(p->dev->dev->caps.pkey_table_len[p->port_num]))
ret = sprintf(buf, "none\n");
else
ret = sprintf(buf, "%d\n",
p->dev->pkeys.virt2phys_pkey[p->slave]
[p->port_num - 1][tab_attr->index]);
return ret;
}
static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
const char *buf, size_t count)
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
int idx;
int err;
/* do not allow remapping Dom0 virtual pkey table */
if (p->slave == mlx4_master_func_num(p->dev->dev))
return -EINVAL;
if (!strncasecmp(buf, "no", 2))
idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
else if (sscanf(buf, "%i", &idx) != 1 ||
idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
idx < 0)
return -EINVAL;
p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
[tab_attr->index] = idx;
mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
tab_attr->index, idx);
err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
if (err) {
pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
" port %d, index %d\n", p->slave, p->port_num, idx);
return err;
}
return count;
}
static ssize_t show_port_gid_idx(struct mlx4_port *p,
struct port_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", p->slave);
}
static struct attribute **
alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
struct port_attribute *, char *buf),
ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
const char *buf, size_t count),
int len)
{
struct attribute **tab_attr;
struct port_table_attribute *element;
int i;
tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
if (!tab_attr)
return NULL;
for (i = 0; i < len; i++) {
element = kzalloc(sizeof (struct port_table_attribute),
GFP_KERNEL);
if (!element)
goto err;
if (snprintf(element->name, sizeof (element->name),
"%d", i) >= sizeof (element->name)) {
kfree(element);
goto err;
}
sysfs_attr_init(&element->attr.attr);
element->attr.attr.name = element->name;
if (store) {
element->attr.attr.mode = S_IWUSR | S_IRUGO;
element->attr.store = store;
} else
element->attr.attr.mode = S_IRUGO;
element->attr.show = show;
element->index = i;
tab_attr[i] = &element->attr.attr;
}
return tab_attr;
err:
while (--i >= 0)
kfree(tab_attr[i]);
kfree(tab_attr);
return NULL;
}
static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
{
struct mlx4_port *p;
int i;
int ret;
p = kzalloc(sizeof *p, GFP_KERNEL);
if (!p)
return -ENOMEM;
p->dev = dev;
p->port_num = port_num;
p->slave = slave;
ret = kobject_init_and_add(&p->kobj, &port_type,
kobject_get(dev->dev_ports_parent[slave]),
"%d", port_num);
if (ret)
goto err_alloc;
p->pkey_group.name = "pkey_idx";
p->pkey_group.attrs =
alloc_group_attrs(show_port_pkey, store_port_pkey,
dev->dev->caps.pkey_table_len[port_num]);
if (!p->pkey_group.attrs)
goto err_alloc;
ret = sysfs_create_group(&p->kobj, &p->pkey_group);
if (ret)
goto err_free_pkey;
p->gid_group.name = "gid_idx";
p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
if (!p->gid_group.attrs)
goto err_free_pkey;
ret = sysfs_create_group(&p->kobj, &p->gid_group);
if (ret)
goto err_free_gid;
list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
return 0;
err_free_gid:
kfree(p->gid_group.attrs[0]);
kfree(p->gid_group.attrs);
err_free_pkey:
for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
kfree(p->pkey_group.attrs[i]);
kfree(p->pkey_group.attrs);
err_alloc:
kobject_put(dev->dev_ports_parent[slave]);
kfree(p);
return ret;
}
static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
{
char name[32];
int err;
int port;
struct kobject *p, *t;
struct mlx4_port *mport;
get_name(dev, name, slave, sizeof name);
dev->pkeys.device_parent[slave] =
kobject_create_and_add(name, kobject_get(dev->iov_parent));
if (!dev->pkeys.device_parent[slave]) {
err = -ENOMEM;
goto fail_dev;
}
INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
dev->dev_ports_parent[slave] =
kobject_create_and_add("ports",
kobject_get(dev->pkeys.device_parent[slave]));
if (!dev->dev_ports_parent[slave]) {
err = -ENOMEM;
goto err_ports;
}
for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
err = add_port(dev, port, slave);
if (err)
goto err_add;
}
return 0;
err_add:
list_for_each_entry_safe(p, t,
&dev->pkeys.pkey_port_list[slave],
entry) {
list_del(&p->entry);
mport = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &mport->pkey_group);
sysfs_remove_group(p, &mport->gid_group);
kobject_put(p);
}
kobject_put(dev->dev_ports_parent[slave]);
err_ports:
kobject_put(dev->pkeys.device_parent[slave]);
/* extra put for the device_parent create_and_add */
kobject_put(dev->pkeys.device_parent[slave]);
fail_dev:
kobject_put(dev->iov_parent);
return err;
}
static int register_pkey_tree(struct mlx4_ib_dev *device)
{
int i;
if (!mlx4_is_master(device->dev))
return 0;
for (i = 0; i <= device->dev->num_vfs; ++i)
register_one_pkey_tree(device, i);
return 0;
}
static void unregister_pkey_tree(struct mlx4_ib_dev *device)
{
int slave;
struct kobject *p, *t;
struct mlx4_port *port;
if (!mlx4_is_master(device->dev))
return;
for (slave = device->dev->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
list_del(&p->entry);
port = container_of(p, struct mlx4_port, kobj);
sysfs_remove_group(p, &port->pkey_group);
sysfs_remove_group(p, &port->gid_group);
kobject_put(p);
kobject_put(device->dev_ports_parent[slave]);
}
kobject_put(device->dev_ports_parent[slave]);
kobject_put(device->pkeys.device_parent[slave]);
kobject_put(device->pkeys.device_parent[slave]);
kobject_put(device->iov_parent);
}
}
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
{
int i;
int ret = 0;
if (!mlx4_is_master(dev->dev))
return 0;
dev->iov_parent =
kobject_create_and_add("iov",
kobject_get(dev->ib_dev.ports_parent->parent));
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err;
}
dev->ports_parent =
kobject_create_and_add("ports",
kobject_get(dev->iov_parent));
if (!dev->iov_parent) {
ret = -ENOMEM;
goto err_ports;
}
for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
ret = add_port_entries(dev, i);
if (ret)
goto err_add_entries;
}
ret = register_pkey_tree(dev);
if (ret)
goto err_add_entries;
return 0;
err_add_entries:
kobject_put(dev->ports_parent);
err_ports:
kobject_put(dev->iov_parent);
err:
kobject_put(dev->ib_dev.ports_parent->parent);
pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
return ret;
}
static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
{
struct mlx4_ib_iov_port *p;
int i;
if (!mlx4_is_master(device->dev))
return;
for (i = 0; i < device->dev->caps.num_ports; i++) {
p = &device->iov_ports[i];
kobject_put(p->admin_alias_parent);
kobject_put(p->gids_parent);
kobject_put(p->pkeys_parent);
kobject_put(p->mcgs_parent);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->cur_port);
kobject_put(p->dev->ports_parent);
kfree(p->dentr_ar);
}
}
void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
{
unregister_alias_guid_tree(device);
unregister_pkey_tree(device);
kobject_put(device->ports_parent);
kobject_put(device->iov_parent);
kobject_put(device->iov_parent);
kobject_put(device->ib_dev.ports_parent->parent);
}

View File

@ -399,11 +399,20 @@ static inline void nes_write8(void __iomem *addr, u8 val)
writeb(val, addr);
}
enum nes_resource {
NES_RESOURCE_MW = 1,
NES_RESOURCE_FAST_MR,
NES_RESOURCE_PHYS_MR,
NES_RESOURCE_USER_MR,
NES_RESOURCE_PD,
NES_RESOURCE_QP,
NES_RESOURCE_CQ,
NES_RESOURCE_ARP
};
static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
unsigned long *resource_array, u32 max_resources,
u32 *req_resource_num, u32 *next)
u32 *req_resource_num, u32 *next, enum nes_resource resource_type)
{
unsigned long flags;
u32 resource_num;
@ -414,7 +423,7 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
if (resource_num >= max_resources) {
resource_num = find_first_zero_bit(resource_array, max_resources);
if (resource_num >= max_resources) {
printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__);
printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type);
spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
return -EMFILE;
}

View File

@ -430,6 +430,8 @@ static void form_cm_frame(struct sk_buff *skb,
buf += sizeof(*tcph);
skb->ip_summed = CHECKSUM_PARTIAL;
if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
skb->ip_summed = CHECKSUM_NONE;
skb->protocol = htons(0x800);
skb->data_len = 0;
skb->mac_len = ETH_HLEN;
@ -1356,7 +1358,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
else
netdev = nesvnic->netdev;
neigh = dst_neigh_lookup(&rt->dst, &dst_ip);
neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev);
rcu_read_lock();
if (neigh) {
@ -1465,12 +1467,8 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->loopbackpartner = NULL;
/* get the mac addr for the remote node */
if (ipv4_is_loopback(htonl(cm_node->rem_addr))) {
arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
} else {
oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
}
oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
if (arpindex < 0) {
kfree(cm_node);
return NULL;
@ -3153,11 +3151,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
nesqp->nesqp_context->tcpPorts[1] =
cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
nesqp->nesqp_context->ip0 =
cpu_to_le32(ntohl(nesvnic->local_ipaddr));
else
nesqp->nesqp_context->ip0 =
nesqp->nesqp_context->ip0 =
cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
@ -3182,10 +3176,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
memset(&nes_quad, 0, sizeof(nes_quad));
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
nes_quad.SrcIpadr = nesvnic->local_ipaddr;
else
nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
@ -3538,11 +3529,7 @@ static void cm_event_connected(struct nes_cm_event *event)
cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
nesqp->nesqp_context->tcpPorts[1] =
cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
nesqp->nesqp_context->ip0 =
cpu_to_le32(ntohl(nesvnic->local_ipaddr));
else
nesqp->nesqp_context->ip0 =
nesqp->nesqp_context->ip0 =
cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
nesqp->nesqp_context->misc2 |= cpu_to_le32(
@ -3571,10 +3558,7 @@ static void cm_event_connected(struct nes_cm_event *event)
nes_quad.DstIpAdrIndex =
cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
nes_quad.SrcIpadr = nesvnic->local_ipaddr;
else
nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;

View File

@ -3575,10 +3575,10 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
" Tcp state = %s, iWARP state = %s\n",
" Tcp state = %d, iWARP state = %d\n",
async_event_id,
le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
tcp_state, iwarp_state);
aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
if (aeq_info & NES_AEQE_QP) {

View File

@ -384,24 +384,20 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
/* bump past the vlan tag */
wqe_fragment_length++;
/* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
tcph = tcp_hdr(skb);
if (1) {
if (skb_is_gso(skb)) {
/* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n",
netdev->name, skb_is_gso(skb)); */
wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE |
NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
((u32)tcph->doff) |
(((u32)(((unsigned char *)tcph) - skb->data)) << 4));
} else {
wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
}
if (skb_is_gso(skb)) {
tcph = tcp_hdr(skb);
/* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n",
netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */
wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size;
set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
((u32)tcph->doff) |
(((u32)(((unsigned char *)tcph) - skb->data)) << 4));
}
} else { /* CHECKSUM_HW */
wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION;
wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM;
}
set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
@ -596,10 +592,10 @@ tso_sq_no_longer_full:
nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
original_first_length, NES_FIRST_FRAG_SIZE);
nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
" (%u frags), tso_size=%u\n",
" (%u frags), is_gso = %u tso_size=%u\n",
netdev->name,
skb->len, skb_headlen(skb),
skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size);
}
memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
@ -651,8 +647,8 @@ tso_sq_no_longer_full:
} else {
nesnic->tx_skb[nesnic->sq_head] = NULL;
}
wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) {
wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size;
if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) {
wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
} else {
iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
@ -1678,12 +1674,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
netdev->hard_header_len = ETH_HLEN;
netdev->addr_len = ETH_ALEN;
netdev->type = ARPHRD_ETHER;
netdev->features = NETIF_F_HIGHDMA;
netdev->netdev_ops = &nes_netdev_ops;
netdev->ethtool_ops = &nes_ethtool_ops;
netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
netdev->features |= NETIF_F_HW_VLAN_TX;
/* Fill in the port structure */
nesvnic->netdev = netdev;
@ -1710,11 +1704,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
netdev->dev_addr[5] = (u8)u64temp;
memcpy(netdev->perm_addr, netdev->dev_addr, 6);
netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
NETIF_F_HW_VLAN_RX;
netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX;
if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
netdev->hw_features |= NETIF_F_TSO;
netdev->features |= netdev->hw_features;
netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX;
netdev->hw_features |= NETIF_F_LRO;
nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"

View File

@ -699,7 +699,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
arp_index = 0;
err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index);
nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP);
if (err) {
nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
return err;

View File

@ -80,7 +80,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
next_stag_index %= nesadapter->max_mr;
ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
nesadapter->max_mr, &stag_index, &next_stag_index);
nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW);
if (ret) {
return ERR_PTR(ret);
}
@ -404,7 +404,7 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
nesadapter->max_mr, &stag_index,
&next_stag_index);
&next_stag_index, NES_RESOURCE_FAST_MR);
if (err)
return ERR_PTR(err);
@ -780,7 +780,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
netdev_refcnt_read(nesvnic->netdev));
err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD);
if (err) {
return ERR_PTR(err);
}
@ -1157,7 +1157,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
nesadapter->max_qp, &qp_num, &nesadapter->next_qp);
nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP);
if (ret) {
return ERR_PTR(ret);
}
@ -1546,7 +1546,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ);
if (err) {
return ERR_PTR(err);
}
@ -2129,7 +2129,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
return ERR_PTR(-EINVAL);
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
&stag_index, &next_stag_index);
&stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR);
if (err) {
return ERR_PTR(err);
}
@ -2360,7 +2360,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
next_stag_index %= nesadapter->max_mr;
err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
nesadapter->max_mr, &stag_index, &next_stag_index);
nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR);
if (err) {
ib_umem_release(region);
return ERR_PTR(err);

View File

@ -285,7 +285,6 @@ struct qib_base_info {
#ifndef QIB_KERN_TYPE
#define QIB_KERN_TYPE 0
#define QIB_IDSTR "QLogic kernel.org driver"
#endif
/*
@ -301,6 +300,19 @@ struct qib_base_info {
*/
#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
/*
* Define the driver version number. This is something that refers only
* to the driver itself, not the software interfaces it supports.
*/
#define QIB_DRIVER_VERSION_BASE "1.11"
/* create the final driver version string */
#ifdef QIB_IDSTR
#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
#else
#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
#endif
/*
* If the unit is specified via open, HCA choice is fixed. If port is
* specified, it's also fixed. Otherwise we try to spread contexts

View File

@ -46,7 +46,7 @@
* The size has to be longer than this string, so we can append
* board/chip information to it in the init code.
*/
const char ib_qib_version[] = QIB_IDSTR "\n";
const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
DEFINE_SPINLOCK(qib_devs_lock);
LIST_HEAD(qib_dev_list);
@ -65,6 +65,7 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("QLogic <support@qlogic.com>");
MODULE_DESCRIPTION("QLogic IB driver");
MODULE_VERSION(QIB_DRIVER_VERSION);
/*
* QIB_PIO_MAXIBHDR is the max IB header size allowed for in our

View File

@ -186,8 +186,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
goto bail;
off = sge->addr - mr->user_base;
if (unlikely(sge->addr < mr->iova || off + sge->length > mr->length ||
(mr->access_flags & acc) == 0))
if (unlikely(sge->addr < mr->user_base ||
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
goto bail;

View File

@ -2224,7 +2224,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->dma_ops = &qib_dma_mapping_ops;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
QIB_IDSTR " %s", init_utsname()->nodename);
"QLogic Infiniband HCA %s", init_utsname()->nodename);
ret = ib_register_device(ibdev, qib_create_port_files);
if (ret)

View File

@ -150,7 +150,7 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
ipoib_ib_dev_down(dev, 0);
ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev, 0);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {

View File

@ -175,7 +175,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
mcast->mcmember = *mcmember;
/* Set the cached Q_Key before we attach if it's the broadcast group */
/* Set the multicast MTU and cached Q_Key before we attach if it's
* the broadcast group.
*/
if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
sizeof (union ib_gid))) {
spin_lock_irq(&priv->lock);
@ -183,10 +185,17 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
spin_unlock_irq(&priv->lock);
return -EAGAIN;
}
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
spin_unlock_irq(&priv->lock);
priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
set_qkey = 1;
if (!ipoib_cm_admin_enabled(dev)) {
rtnl_lock();
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
rtnl_unlock();
}
}
if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@ -574,14 +583,6 @@ void ipoib_mcast_join_task(struct work_struct *work)
return;
}
priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
if (!ipoib_cm_admin_enabled(dev)) {
rtnl_lock();
dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
rtnl_unlock();
}
ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
clear_bit(IPOIB_MCAST_RUN, &priv->flags);

View File

@ -638,9 +638,9 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re
struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL);
if (scmnd) {
srp_free_req(target, req, scmnd, 0);
scmnd->result = DID_RESET << 16;
scmnd->scsi_done(scmnd);
srp_free_req(target, req, scmnd, 0);
}
}
@ -1687,6 +1687,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
SRP_TSK_ABORT_TASK);
srp_free_req(target, req, scmnd, 0);
scmnd->result = DID_ABORT << 16;
scmnd->scsi_done(scmnd);
return SUCCESS;
}

View File

@ -40,6 +40,7 @@
#include <linux/mlx4/cmd.h>
#include <linux/semaphore.h>
#include <rdma/ib_smi.h>
#include <asm/io.h>
@ -394,7 +395,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
struct mlx4_vhcr_cmd *vhcr = priv->mfunc.vhcr;
int ret;
down(&priv->cmd.slave_sem);
mutex_lock(&priv->cmd.slave_cmd_mutex);
vhcr->in_param = cpu_to_be64(in_param);
vhcr->out_param = out_param ? cpu_to_be64(*out_param) : 0;
vhcr->in_modifier = cpu_to_be32(in_modifier);
@ -402,6 +404,7 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
vhcr->token = cpu_to_be16(CMD_POLL_TOKEN);
vhcr->status = 0;
vhcr->flags = !!(priv->cmd.use_events) << 6;
if (mlx4_is_master(dev)) {
ret = mlx4_master_process_vhcr(dev, dev->caps.function, vhcr);
if (!ret) {
@ -438,7 +441,8 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
mlx4_err(dev, "failed execution of VHCR_POST command"
"opcode 0x%x\n", op);
}
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return ret;
}
@ -627,6 +631,162 @@ static int mlx4_ACCESS_MEM(struct mlx4_dev *dev, u64 master_addr,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
}
static int query_pkey_block(struct mlx4_dev *dev, u8 port, u16 index, u16 *pkey,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox)
{
struct ib_smp *in_mad = (struct ib_smp *)(inbox->buf);
struct ib_smp *out_mad = (struct ib_smp *)(outbox->buf);
int err;
int i;
if (index & 0x1f)
return -EINVAL;
in_mad->attr_mod = cpu_to_be32(index / 32);
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_NATIVE);
if (err)
return err;
for (i = 0; i < 32; ++i)
pkey[i] = be16_to_cpu(((__be16 *) out_mad->data)[i]);
return err;
}
static int get_full_pkey_table(struct mlx4_dev *dev, u8 port, u16 *table,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox)
{
int i;
int err;
for (i = 0; i < dev->caps.pkey_table_len[port]; i += 32) {
err = query_pkey_block(dev, port, i, table + i, inbox, outbox);
if (err)
return err;
}
return 0;
}
#define PORT_CAPABILITY_LOCATION_IN_SMP 20
#define PORT_STATE_OFFSET 32
static enum ib_port_state vf_port_state(struct mlx4_dev *dev, int port, int vf)
{
if (mlx4_get_slave_port_state(dev, vf, port) == SLAVE_PORT_UP)
return IB_PORT_ACTIVE;
else
return IB_PORT_DOWN;
}
static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
struct ib_smp *smp = inbox->buf;
u32 index;
u8 port;
u16 *table;
int err;
int vidx, pidx;
struct mlx4_priv *priv = mlx4_priv(dev);
struct ib_smp *outsmp = outbox->buf;
__be16 *outtab = (__be16 *)(outsmp->data);
__be32 slave_cap_mask;
__be64 slave_node_guid;
port = vhcr->in_modifier;
if (smp->base_version == 1 &&
smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
smp->class_version == 1) {
if (smp->method == IB_MGMT_METHOD_GET) {
if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
index = be32_to_cpu(smp->attr_mod);
if (port < 1 || port > dev->caps.num_ports)
return -EINVAL;
table = kcalloc(dev->caps.pkey_table_len[port], sizeof *table, GFP_KERNEL);
if (!table)
return -ENOMEM;
/* need to get the full pkey table because the paravirtualized
* pkeys may be scattered among several pkey blocks.
*/
err = get_full_pkey_table(dev, port, table, inbox, outbox);
if (!err) {
for (vidx = index * 32; vidx < (index + 1) * 32; ++vidx) {
pidx = priv->virt2phys_pkey[slave][port - 1][vidx];
outtab[vidx % 32] = cpu_to_be16(table[pidx]);
}
}
kfree(table);
return err;
}
if (smp->attr_id == IB_SMP_ATTR_PORT_INFO) {
/*get the slave specific caps:*/
/*do the command */
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
vhcr->in_modifier, vhcr->op_modifier,
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
/* modify the response for slaves */
if (!err && slave != mlx4_master_func_num(dev)) {
u8 *state = outsmp->data + PORT_STATE_OFFSET;
*state = (*state & 0xf0) | vf_port_state(dev, port, slave);
slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
memcpy(outsmp->data + PORT_CAPABILITY_LOCATION_IN_SMP, &slave_cap_mask, 4);
}
return err;
}
if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
/* compute slave's gid block */
smp->attr_mod = cpu_to_be32(slave / 8);
/* execute cmd */
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
vhcr->in_modifier, vhcr->op_modifier,
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
if (!err) {
/* if needed, move slave gid to index 0 */
if (slave % 8)
memcpy(outsmp->data,
outsmp->data + (slave % 8) * 8, 8);
/* delete all other gids */
memset(outsmp->data + 8, 0, 56);
}
return err;
}
if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
vhcr->in_modifier, vhcr->op_modifier,
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
if (!err) {
slave_node_guid = mlx4_get_slave_node_guid(dev, slave);
memcpy(outsmp->data + 12, &slave_node_guid, 8);
}
return err;
}
}
}
if (slave != mlx4_master_func_num(dev) &&
((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
smp->method == IB_MGMT_METHOD_SET))) {
mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
"class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
slave, smp->method, smp->mgmt_class,
be16_to_cpu(smp->attr_id));
return -EPERM;
}
/*default:*/
return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
vhcr->in_modifier, vhcr->op_modifier,
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
}
int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@ -950,7 +1110,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_INIT2INIT_QP_wrapper
},
{
.opcode = MLX4_CMD_INIT2RTR_QP,
@ -968,7 +1128,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_RTR2RTS_QP_wrapper
},
{
.opcode = MLX4_CMD_RTS2RTS_QP,
@ -977,7 +1137,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_RTS2RTS_QP_wrapper
},
{
.opcode = MLX4_CMD_SQERR2RTS_QP,
@ -986,7 +1146,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_SQERR2RTS_QP_wrapper
},
{
.opcode = MLX4_CMD_2ERR_QP,
@ -1013,7 +1173,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_SQD2SQD_QP_wrapper
},
{
.opcode = MLX4_CMD_SQD2RTS_QP,
@ -1022,7 +1182,7 @@ static struct mlx4_cmd_info cmd_info[] = {
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
.wrapper = mlx4_SQD2RTS_QP_wrapper
},
{
.opcode = MLX4_CMD_2RST_QP,
@ -1060,6 +1220,24 @@ static struct mlx4_cmd_info cmd_info[] = {
.verify = NULL,
.wrapper = mlx4_GEN_QP_wrapper
},
{
.opcode = MLX4_CMD_CONF_SPECIAL_QP,
.has_inbox = false,
.has_outbox = false,
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL, /* XXX verify: only demux can do this */
.wrapper = NULL
},
{
.opcode = MLX4_CMD_MAD_IFC,
.has_inbox = true,
.has_outbox = true,
.out_is_imm = false,
.encode_slave_id = false,
.verify = NULL,
.wrapper = mlx4_MAD_IFC_wrapper
},
{
.opcode = MLX4_CMD_QUERY_IF_STAT,
.has_inbox = false,
@ -1340,6 +1518,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd)
goto inform_slave_state;
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave);
/* write the version in the event field */
reply |= mlx4_comm_get_version();
@ -1376,19 +1556,21 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
goto reset_slave;
slave_state[slave].vhcr_dma |= param;
slave_state[slave].active = true;
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave);
break;
case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
goto reset_slave;
down(&priv->cmd.slave_sem);
mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_master_process_vhcr(dev, slave, NULL)) {
mlx4_err(dev, "Failed processing vhcr for slave:%d,"
" resetting slave.\n", slave);
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
goto reset_slave;
}
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
break;
default:
mlx4_warn(dev, "Bad comm cmd:%d from slave:%d\n", cmd, slave);
@ -1529,14 +1711,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
struct mlx4_slave_state *s_state;
int i, j, err, port;
priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
&priv->mfunc.vhcr_dma,
GFP_KERNEL);
if (!priv->mfunc.vhcr) {
mlx4_err(dev, "Couldn't allocate vhcr.\n");
return -ENOMEM;
}
if (mlx4_is_master(dev))
priv->mfunc.comm =
ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
@ -1590,6 +1764,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
mlx4_master_handle_slave_flr);
spin_lock_init(&priv->mfunc.master.slave_state_lock);
spin_lock_init(&priv->mfunc.master.slave_eq.event_lock);
priv->mfunc.master.comm_wq =
create_singlethread_workqueue("mlx4_comm");
if (!priv->mfunc.master.comm_wq)
@ -1598,7 +1773,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_init_resource_tracker(dev))
goto err_thread;
sema_init(&priv->cmd.slave_sem, 1);
err = mlx4_ARM_COMM_CHANNEL(dev);
if (err) {
mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
@ -1612,8 +1786,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
mlx4_err(dev, "Couldn't sync toggles\n");
goto err_comm;
}
sema_init(&priv->cmd.slave_sem, 1);
}
return 0;
@ -1643,6 +1815,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
mutex_init(&priv->cmd.hcr_mutex);
mutex_init(&priv->cmd.slave_cmd_mutex);
sema_init(&priv->cmd.poll_sem, 1);
priv->cmd.use_events = 0;
priv->cmd.toggle = 1;
@ -1659,14 +1832,30 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
}
}
if (mlx4_is_mfunc(dev)) {
priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
&priv->mfunc.vhcr_dma,
GFP_KERNEL);
if (!priv->mfunc.vhcr) {
mlx4_err(dev, "Couldn't allocate VHCR.\n");
goto err_hcr;
}
}
priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
MLX4_MAILBOX_SIZE,
MLX4_MAILBOX_SIZE, 0);
if (!priv->cmd.pool)
goto err_hcr;
goto err_vhcr;
return 0;
err_vhcr:
if (mlx4_is_mfunc(dev))
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
priv->mfunc.vhcr = NULL;
err_hcr:
if (!mlx4_is_slave(dev))
iounmap(priv->cmd.hcr);
@ -1689,9 +1878,6 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
}
iounmap(priv->mfunc.comm);
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
priv->mfunc.vhcr = NULL;
}
void mlx4_cmd_cleanup(struct mlx4_dev *dev)
@ -1702,6 +1888,10 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev)
if (!mlx4_is_slave(dev))
iounmap(priv->cmd.hcr);
if (mlx4_is_mfunc(dev))
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
priv->mfunc.vhcr = NULL;
}
/*

View File

@ -164,13 +164,16 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
struct mlx4_eqe *s_eqe =
&slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
struct mlx4_eqe *s_eqe;
unsigned long flags;
spin_lock_irqsave(&slave_eq->event_lock, flags);
s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
if ((!!(s_eqe->owner & 0x80)) ^
(!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
"No free EQE on slave events queue\n", slave);
spin_unlock_irqrestore(&slave_eq->event_lock, flags);
return;
}
@ -183,6 +186,7 @@ static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
queue_work(priv->mfunc.master.comm_wq,
&priv->mfunc.master.slave_event_work);
spin_unlock_irqrestore(&slave_eq->event_lock, flags);
}
static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
@ -200,6 +204,196 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
slave_event(dev, slave, eqe);
}
int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
{
struct mlx4_eqe eqe;
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
if (!s_slave->active)
return 0;
memset(&eqe, 0, sizeof eqe);
eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
eqe.event.port_mgmt_change.port = port;
return mlx4_GEN_EQE(dev, slave, &eqe);
}
EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
{
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
if (dev->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
eqe.event.port_mgmt_change.port = port;
return mlx4_GEN_EQE(dev, slave, &eqe);
}
EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
u8 port_subtype_change)
{
struct mlx4_eqe eqe;
/*don't send if we don't have the that slave */
if (dev->num_vfs < slave)
return 0;
memset(&eqe, 0, sizeof eqe);
eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
eqe.subtype = port_subtype_change;
eqe.event.port_change.port = cpu_to_be32(port << 28);
mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
port_subtype_change, slave, port);
return mlx4_GEN_EQE(dev, slave, &eqe);
}
EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
pr_err("%s: Error: asking for slave:%d, port:%d\n",
__func__, slave, port);
return SLAVE_PORT_DOWN;
}
return s_state[slave].port_state[port];
}
EXPORT_SYMBOL(mlx4_get_slave_port_state);
static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
enum slave_port_state state)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
pr_err("%s: Error: asking for slave:%d, port:%d\n",
__func__, slave, port);
return -1;
}
s_state[slave].port_state[port] = state;
return 0;
}
static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
{
int i;
enum slave_port_gen_event gen_event;
for (i = 0; i < dev->num_slaves; i++)
set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
}
/**************************************************************************
The function get as input the new event to that port,
and according to the prev state change the slave's port state.
The events are:
MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
MLX4_PORT_STATE_DEV_EVENT_PORT_UP
MLX4_PORT_STATE_IB_EVENT_GID_VALID
MLX4_PORT_STATE_IB_EVENT_GID_INVALID
***************************************************************************/
int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
u8 port, int event,
enum slave_port_gen_event *gen_event)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_slave_state *ctx = NULL;
unsigned long flags;
int ret = -1;
enum slave_port_state cur_state =
mlx4_get_slave_port_state(dev, slave, port);
*gen_event = SLAVE_PORT_GEN_EVENT_NONE;
if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
pr_err("%s: Error: asking for slave:%d, port:%d\n",
__func__, slave, port);
return ret;
}
ctx = &priv->mfunc.master.slave_state[slave];
spin_lock_irqsave(&ctx->lock, flags);
mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n",
__func__, slave, cur_state, event);
switch (cur_state) {
case SLAVE_PORT_DOWN:
if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
mlx4_set_slave_port_state(dev, slave, port,
SLAVE_PENDING_UP);
break;
case SLAVE_PENDING_UP:
if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
mlx4_set_slave_port_state(dev, slave, port,
SLAVE_PORT_DOWN);
else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
mlx4_set_slave_port_state(dev, slave, port,
SLAVE_PORT_UP);
*gen_event = SLAVE_PORT_GEN_EVENT_UP;
}
break;
case SLAVE_PORT_UP:
if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
mlx4_set_slave_port_state(dev, slave, port,
SLAVE_PORT_DOWN);
*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
} else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
event) {
mlx4_set_slave_port_state(dev, slave, port,
SLAVE_PENDING_UP);
*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
}
break;
default:
pr_err("%s: BUG!!! UNKNOWN state: "
"slave:%d, port:%d\n", __func__, slave, port);
goto out;
}
ret = mlx4_get_slave_port_state(dev, slave, port);
mlx4_dbg(dev, "%s: slave: %d, current state: %d new event"
" :%d gen_event: %d\n",
__func__, slave, cur_state, event, *gen_event);
out:
spin_unlock_irqrestore(&ctx->lock, flags);
return ret;
}
EXPORT_SYMBOL(set_and_calc_slave_port_state);
int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr)
{
struct mlx4_eqe eqe;
memset(&eqe, 0, sizeof eqe);
eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
eqe.event.port_mgmt_change.port = port;
eqe.event.port_mgmt_change.params.port_info.changed_attr =
cpu_to_be32((u32) attr);
slave_event(dev, ALL_SLAVES, &eqe);
return 0;
}
EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
void mlx4_master_handle_slave_flr(struct work_struct *work)
{
struct mlx4_mfunc_master_ctx *master =
@ -251,6 +445,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
u32 flr_slave;
u8 update_slave_state;
int i;
enum slave_port_gen_event gen_event;
while ((eqe = next_eqe_sw(eq))) {
/*
@ -347,35 +542,49 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
case MLX4_EVENT_TYPE_PORT_CHANGE:
port = be32_to_cpu(eqe->event.port_change.port) >> 28;
if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
mlx4_dispatch_event(dev,
MLX4_DEV_EVENT_PORT_DOWN,
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
port);
mlx4_priv(dev)->sense.do_sense_port[port] = 1;
if (mlx4_is_master(dev))
/*change the state of all slave's port
* to down:*/
for (i = 0; i < dev->num_slaves; i++) {
mlx4_dbg(dev, "%s: Sending "
"MLX4_PORT_CHANGE_SUBTYPE_DOWN"
if (!mlx4_is_master(dev))
break;
for (i = 0; i < dev->num_slaves; i++) {
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
if (i == mlx4_master_func_num(dev))
continue;
mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
" to slave: %d, port:%d\n",
__func__, i, port);
if (i == dev->caps.function)
continue;
mlx4_slave_event(dev, i, eqe);
}
} else {
mlx4_dispatch_event(dev,
MLX4_DEV_EVENT_PORT_UP,
port);
mlx4_priv(dev)->sense.do_sense_port[port] = 0;
if (mlx4_is_master(dev)) {
for (i = 0; i < dev->num_slaves; i++) {
if (i == dev->caps.function)
continue;
mlx4_slave_event(dev, i, eqe);
} else { /* IB port */
set_and_calc_slave_port_state(dev, i, port,
MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
&gen_event);
/*we can be in pending state, then do not send port_down event*/
if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) {
if (i == mlx4_master_func_num(dev))
continue;
mlx4_slave_event(dev, i, eqe);
}
}
}
} else {
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
mlx4_priv(dev)->sense.do_sense_port[port] = 0;
if (!mlx4_is_master(dev))
break;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
for (i = 0; i < dev->num_slaves; i++) {
if (i == mlx4_master_func_num(dev))
continue;
mlx4_slave_event(dev, i, eqe);
}
else /* IB port */
/* port-up event will be sent to a slave when the
* slave's alias-guid is set. This is done in alias_GUID.c
*/
set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
}
break;

View File

@ -183,7 +183,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
#define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET 0x24
#define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28
#define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c
#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30
#define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0x30
#define QUERY_FUNC_CAP_FMR_FLAG 0x80
#define QUERY_FUNC_CAP_FLAG_RDMA 0x40
@ -194,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
#define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8
#define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc
#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10
#define QUERY_FUNC_CAP_QP0_PROXY 0x14
#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18
#define QUERY_FUNC_CAP_QP1_PROXY 0x1c
#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40
#define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80
#define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80
if (vhcr->op_modifier == 1) {
field = vhcr->in_modifier;
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
field = 0;
/* ensure force vlan and force mac bits are not set */
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
/* ensure that phy_wqe_gid bit is not set */
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
field = vhcr->in_modifier; /* phys-port = logical-port */
MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
/* size is now the QP number */
size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
size += 2;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
size += 2;
MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
} else if (vhcr->op_modifier == 0) {
/* enable rdma and ethernet interfaces */
field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA);
@ -253,99 +271,118 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
return err;
}
int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap)
int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
struct mlx4_func_cap *func_cap)
{
struct mlx4_cmd_mailbox *mailbox;
u32 *outbox;
u8 field;
u8 field, op_modifier;
u32 size;
int i;
int err = 0;
op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP,
err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
MLX4_CMD_QUERY_FUNC_CAP,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
goto out;
outbox = mailbox->buf;
MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
err = -EPROTONOSUPPORT;
if (!op_modifier) {
MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET);
if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) {
mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n");
err = -EPROTONOSUPPORT;
goto out;
}
func_cap->flags = field;
MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
func_cap->num_ports = field;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
func_cap->pf_context_behaviour = size;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
func_cap->qp_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
func_cap->srq_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
func_cap->cq_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
func_cap->max_eq = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
func_cap->reserved_eq = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
func_cap->mpt_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
func_cap->mtt_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
func_cap->mcg_quota = size & 0xFFFFFF;
goto out;
}
func_cap->flags = field;
MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET);
func_cap->num_ports = field;
/* logical port query */
if (gen_or_port > dev->caps.num_ports) {
err = -EINVAL;
goto out;
}
MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET);
func_cap->pf_context_behaviour = size;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET);
func_cap->qp_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET);
func_cap->srq_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET);
func_cap->cq_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
func_cap->max_eq = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
func_cap->reserved_eq = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
func_cap->mpt_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
func_cap->mtt_quota = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
func_cap->mcg_quota = size & 0xFFFFFF;
for (i = 1; i <= func_cap->num_ports; ++i) {
err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1,
MLX4_CMD_QUERY_FUNC_CAP,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) {
MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
mlx4_err(dev, "VLAN is enforced on this port\n");
err = -EPROTONOSUPPORT;
goto out;
if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET);
if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) {
mlx4_err(dev, "VLAN is enforced on this port\n");
err = -EPROTONOSUPPORT;
goto out;
}
if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
mlx4_err(dev, "Force mac is enabled on this port\n");
err = -EPROTONOSUPPORT;
goto out;
}
} else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) {
MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
mlx4_err(dev, "phy_wqe_gid is "
"enforced on this ib port\n");
err = -EPROTONOSUPPORT;
goto out;
}
}
MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
func_cap->physical_port[i] = field;
if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) {
mlx4_err(dev, "Force mac is enabled on this port\n");
err = -EPROTONOSUPPORT;
goto out;
}
} else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) {
MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET);
if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) {
mlx4_err(dev, "phy_wqe_gid is "
"enforced on this ib port\n");
err = -EPROTONOSUPPORT;
goto out;
}
}
MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
func_cap->physical_port = field;
if (func_cap->physical_port != gen_or_port) {
err = -ENOSYS;
goto out;
}
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY);
func_cap->qp0_proxy_qpn = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL);
func_cap->qp1_tunnel_qpn = size & 0xFFFFFF;
MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY);
func_cap->qp1_proxy_qpn = size & 0xFFFFFF;
/* All other resources are allocated by the master, but we still report
* 'num' and 'reserved' capabilities as follows:
* - num remains the maximum resource index
@ -559,7 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->max_pds = 1 << (field & 0x3f);
MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_XRC_OFFSET);
dev_cap->reserved_xrcds = field >> 4;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_PD_OFFSET);
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_XRC_OFFSET);
dev_cap->max_xrcds = 1 << (field & 0x1f);
MLX4_GET(size, outbox, QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET);
@ -715,6 +752,7 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
u64 flags;
int err = 0;
u8 field;
@ -723,6 +761,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
if (err)
return err;
/* add port mng change event capability unconditionally to slaves */
MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV;
MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
/* For guests, report Blueflame disabled */
MLX4_GET(field, outbox->buf, QUERY_DEV_CAP_BF_OFFSET);
field &= 0x7f;
@ -1345,6 +1388,19 @@ out:
return err;
}
/* for IB-type ports only in SRIOV mode. Checks that both proxy QP0
* and real QP0 are active, so that the paravirtualized QP0 is ready
* to operate */
static int check_qp0_state(struct mlx4_dev *dev, int function, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
/* irrelevant if not infiniband */
if (priv->mfunc.master.qp0_state[port].proxy_qp0_active &&
priv->mfunc.master.qp0_state[port].qp0_active)
return 1;
return 0;
}
int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@ -1358,17 +1414,29 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port))
return 0;
if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB)
return -ENODEV;
/* Enable port only if it was previously disabled */
if (!priv->mfunc.master.init_port_ref[port]) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
return err;
if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
/* Enable port only if it was previously disabled */
if (!priv->mfunc.master.init_port_ref[port]) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
return err;
}
priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
} else {
if (slave == mlx4_master_func_num(dev)) {
if (check_qp0_state(dev, slave, port) &&
!priv->mfunc.master.qp0_state[port].port_active) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
if (err)
return err;
priv->mfunc.master.qp0_state[port].port_active = 1;
priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
}
} else
priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
}
priv->mfunc.master.slave_state[slave].init_port_mask |= (1 << port);
++priv->mfunc.master.init_port_ref[port];
return 0;
}
@ -1441,15 +1509,29 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
(1 << port)))
return 0;
if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB)
return -ENODEV;
if (priv->mfunc.master.init_port_ref[port] == 1) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
MLX4_CMD_NATIVE);
if (err)
return err;
if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
if (priv->mfunc.master.init_port_ref[port] == 1) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
1000, MLX4_CMD_NATIVE);
if (err)
return err;
}
priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
} else {
/* infiniband port */
if (slave == mlx4_master_func_num(dev)) {
if (!priv->mfunc.master.qp0_state[port].qp0_active &&
priv->mfunc.master.qp0_state[port].port_active) {
err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
1000, MLX4_CMD_NATIVE);
if (err)
return err;
priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
priv->mfunc.master.qp0_state[port].port_active = 0;
}
} else
priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
}
priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
--priv->mfunc.master.init_port_ref[port];
return 0;
}

View File

@ -134,8 +134,12 @@ struct mlx4_func_cap {
int max_eq;
int reserved_eq;
int mcg_quota;
u8 physical_port[MLX4_MAX_PORTS + 1];
u8 port_flags[MLX4_MAX_PORTS + 1];
u32 qp0_tunnel_qpn;
u32 qp0_proxy_qpn;
u32 qp1_tunnel_qpn;
u32 qp1_proxy_qpn;
u8 physical_port;
u8 port_flags;
};
struct mlx4_adapter {
@ -192,7 +196,8 @@ struct mlx4_set_ib_param {
};
int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap);
int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
struct mlx4_func_cap *func_cap);
int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,

View File

@ -95,8 +95,6 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
" Not in use with device managed"
" flow steering");
#define MLX4_VF (1 << 0)
#define HCA_GLOBAL_CAP_MASK 0
#define PF_CONTEXT_BEHAVIOUR_MASK 0
@ -299,9 +297,12 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
mlx4_dbg(dev, "Steering mode is: %s\n",
mlx4_steering_mode_str(dev->caps.steering_mode));
/* Sense port always allowed on supported devices for ConnectX1 and 2 */
if (dev->pdev->device != 0x1003)
/* Sense port always allowed on supported devices for ConnectX-1 and -2 */
if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
/* Don't do sense port on multifunction devices (for now at least) */
if (mlx4_is_mfunc(dev))
dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
dev->caps.log_num_macs = log_num_mac;
dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
@ -384,6 +385,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
return 0;
}
/*The function checks if there are live vf, return the num of them*/
@ -409,20 +411,54 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
{
u32 qk = MLX4_RESERVED_QKEY_BASE;
if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
qpn < dev->caps.sqp_start)
if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
qpn < dev->phys_caps.base_proxy_sqpn)
return -EINVAL;
if (qpn >= dev->caps.base_tunnel_sqpn)
if (qpn >= dev->phys_caps.base_tunnel_sqpn)
/* tunnel qp */
qk += qpn - dev->caps.base_tunnel_sqpn;
qk += qpn - dev->phys_caps.base_tunnel_sqpn;
else
qk += qpn - dev->caps.sqp_start;
qk += qpn - dev->phys_caps.base_proxy_sqpn;
*qkey = qk;
return 0;
}
EXPORT_SYMBOL(mlx4_get_parav_qkey);
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
{
struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
if (!mlx4_is_master(dev))
return;
priv->virt2phys_pkey[slave][port - 1][i] = val;
}
EXPORT_SYMBOL(mlx4_sync_pkey_table);
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
{
struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
if (!mlx4_is_master(dev))
return;
priv->slave_node_guids[slave] = guid;
}
EXPORT_SYMBOL(mlx4_put_slave_node_guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
{
struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
if (!mlx4_is_master(dev))
return 0;
return priv->slave_node_guids[slave];
}
EXPORT_SYMBOL(mlx4_get_slave_node_guid);
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@ -493,9 +529,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
}
memset(&func_cap, 0, sizeof(func_cap));
err = mlx4_QUERY_FUNC_CAP(dev, &func_cap);
err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap);
if (err) {
mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n");
mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n",
err);
return err;
}
@ -523,12 +560,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
return -ENODEV;
}
dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
!dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
err = -ENOMEM;
goto err_mem;
}
for (i = 1; i <= dev->caps.num_ports; ++i) {
err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap);
if (err) {
mlx4_err(dev, "QUERY_FUNC_CAP port command failed for"
" port %d, aborting (%d).\n", i, err);
goto err_mem;
}
dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn;
dev->caps.port_mask[i] = dev->caps.port_type[i];
if (mlx4_get_slave_pkey_gid_tbl_len(dev, i,
&dev->caps.gid_table_len[i],
&dev->caps.pkey_table_len[i]))
return -ENODEV;
goto err_mem;
}
if (dev->caps.uar_page_size * (dev->caps.num_uars -
@ -538,10 +596,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
"PCI resource 2 size of 0x%llx, aborting.\n",
dev->caps.uar_page_size * dev->caps.num_uars,
(unsigned long long) pci_resource_len(dev->pdev, 2));
return -ENODEV;
goto err_mem;
}
return 0;
err_mem:
kfree(dev->caps.qp0_tunnel);
kfree(dev->caps.qp0_proxy);
kfree(dev->caps.qp1_tunnel);
kfree(dev->caps.qp1_proxy);
dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
return err;
}
/*
@ -1092,10 +1160,10 @@ static void mlx4_slave_exit(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
down(&priv->cmd.slave_sem);
mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
mlx4_warn(dev, "Failed to close slave function.\n");
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
}
static int map_bf_area(struct mlx4_dev *dev)
@ -1147,7 +1215,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
u32 slave_read;
u32 cmd_channel_ver;
down(&priv->cmd.slave_sem);
mutex_lock(&priv->cmd.slave_cmd_mutex);
priv->cmd.max_cmds = 1;
mlx4_warn(dev, "Sending reset\n");
ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
@ -1196,12 +1264,13 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
goto err;
if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
goto err;
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return 0;
err:
mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
up(&priv->cmd.slave_sem);
mutex_unlock(&priv->cmd.slave_cmd_mutex);
return -EIO;
}
@ -1848,7 +1917,7 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
iounmap(owner);
}
static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
{
struct mlx4_priv *priv;
struct mlx4_dev *dev;
@ -1871,12 +1940,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
/*
* Check for BARs.
*/
if (((id == NULL) || !(id->driver_data & MLX4_VF)) &&
if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "Missing DCS, aborting."
"(id == 0X%p, id->driver_data: 0x%lx,"
" pci_resource_flags(pdev, 0):0x%lx)\n", id,
id ? id->driver_data : 0, pci_resource_flags(pdev, 0));
"(driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
pci_dev_data, pci_resource_flags(pdev, 0));
err = -ENODEV;
goto err_disable_pdev;
}
@ -1941,7 +2009,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev->rev_id = pdev->revision;
/* Detect if this device is a virtual function */
if (id && id->driver_data & MLX4_VF) {
if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
/* When acting as pf, we normally skip vfs unless explicitly
* requested to probe them. */
if (num_vfs && extended_func_num(pdev) > probe_vf) {
@ -1969,12 +2037,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
}
if (num_vfs) {
mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs);
mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", num_vfs);
err = pci_enable_sriov(pdev, num_vfs);
if (err) {
mlx4_err(dev, "Failed to enable sriov,"
"continuing without sriov enabled"
" (err = %d).\n", err);
mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d).\n",
err);
err = 0;
} else {
mlx4_warn(dev, "Running in master mode\n");
@ -2089,6 +2156,7 @@ slave_start:
mlx4_sense_init(dev);
mlx4_start_sense(dev);
priv->pci_dev_data = pci_dev_data;
pci_set_drvdata(pdev, dev);
return 0;
@ -2158,7 +2226,7 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev,
{
printk_once(KERN_INFO "%s", mlx4_version);
return __mlx4_init_one(pdev, id);
return __mlx4_init_one(pdev, id->driver_data);
}
static void mlx4_remove_one(struct pci_dev *pdev)
@ -2217,12 +2285,18 @@ static void mlx4_remove_one(struct pci_dev *pdev)
if (dev->flags & MLX4_FLAG_MSI_X)
pci_disable_msix(pdev);
if (dev->flags & MLX4_FLAG_SRIOV) {
mlx4_warn(dev, "Disabling sriov\n");
mlx4_warn(dev, "Disabling SR-IOV\n");
pci_disable_sriov(pdev);
}
if (!mlx4_is_slave(dev))
mlx4_free_ownership(dev);
kfree(dev->caps.qp0_tunnel);
kfree(dev->caps.qp0_proxy);
kfree(dev->caps.qp1_tunnel);
kfree(dev->caps.qp1_proxy);
kfree(priv);
pci_release_regions(pdev);
pci_disable_device(pdev);
@ -2232,41 +2306,46 @@ static void mlx4_remove_one(struct pci_dev *pdev)
int mlx4_restart_one(struct pci_dev *pdev)
{
struct mlx4_dev *dev = pci_get_drvdata(pdev);
struct mlx4_priv *priv = mlx4_priv(dev);
int pci_dev_data;
pci_dev_data = priv->pci_dev_data;
mlx4_remove_one(pdev);
return __mlx4_init_one(pdev, NULL);
return __mlx4_init_one(pdev, pci_dev_data);
}
static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
/* MT25408 "Hermon" SDR */
{ PCI_VDEVICE(MELLANOX, 0x6340), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6340), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" DDR */
{ PCI_VDEVICE(MELLANOX, 0x634a), 0 },
{ PCI_VDEVICE(MELLANOX, 0x634a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" QDR */
{ PCI_VDEVICE(MELLANOX, 0x6354), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6354), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" DDR PCIe gen2 */
{ PCI_VDEVICE(MELLANOX, 0x6732), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6732), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" QDR PCIe gen2 */
{ PCI_VDEVICE(MELLANOX, 0x673c), 0 },
{ PCI_VDEVICE(MELLANOX, 0x673c), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" EN 10GigE */
{ PCI_VDEVICE(MELLANOX, 0x6368), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6368), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
{ PCI_VDEVICE(MELLANOX, 0x6750), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6750), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25458 ConnectX EN 10GBASE-T 10GigE */
{ PCI_VDEVICE(MELLANOX, 0x6372), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6372), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
{ PCI_VDEVICE(MELLANOX, 0x675a), 0 },
{ PCI_VDEVICE(MELLANOX, 0x675a), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT26468 ConnectX EN 10GigE PCIe gen2*/
{ PCI_VDEVICE(MELLANOX, 0x6764), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6764), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
{ PCI_VDEVICE(MELLANOX, 0x6746), 0 },
{ PCI_VDEVICE(MELLANOX, 0x6746), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT26478 ConnectX2 40GigE PCIe gen2 */
{ PCI_VDEVICE(MELLANOX, 0x676e), 0 },
{ PCI_VDEVICE(MELLANOX, 0x676e), MLX4_PCI_DEV_FORCE_SENSE_PORT },
/* MT25400 Family [ConnectX-2 Virtual Function] */
{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF },
{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_PCI_DEV_IS_VF },
/* MT27500 Family [ConnectX-3] */
{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
/* MT27500 Family [ConnectX-3 Virtual Function] */
{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF },
{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_PCI_DEV_IS_VF },
{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
@ -2295,7 +2374,7 @@ static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
{
int ret = __mlx4_init_one(pdev, NULL);
int ret = __mlx4_init_one(pdev, 0);
return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
}

View File

@ -452,6 +452,7 @@ struct mlx4_slave_state {
/*initialized via the kzalloc*/
u8 is_slave_going_down;
u32 cookie;
enum slave_port_state port_state[MLX4_MAX_PORTS + 1];
};
struct slave_list {
@ -472,6 +473,7 @@ struct mlx4_slave_event_eq {
u32 eqn;
u32 cons;
u32 prod;
spinlock_t event_lock;
struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
};
@ -511,9 +513,9 @@ struct mlx4_cmd {
struct pci_pool *pool;
void __iomem *hcr;
struct mutex hcr_mutex;
struct mutex slave_cmd_mutex;
struct semaphore poll_sem;
struct semaphore event_sem;
struct semaphore slave_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
@ -766,6 +768,11 @@ struct _rule_hw {
};
};
enum {
MLX4_PCI_DEV_IS_VF = 1 << 0,
MLX4_PCI_DEV_FORCE_SENSE_PORT = 1 << 1,
};
struct mlx4_priv {
struct mlx4_dev dev;
@ -773,6 +780,8 @@ struct mlx4_priv {
struct list_head ctx_list;
spinlock_t ctx_lock;
int pci_dev_data;
struct list_head pgdir_list;
struct mutex pgdir_mutex;
@ -807,6 +816,9 @@ struct mlx4_priv {
struct io_mapping *bf_mapping;
int reserved_mtts;
int fs_hash_mode;
u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
__be64 slave_node_guids[MLX4_MFUNC_MAX];
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@ -1011,16 +1023,61 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);

View File

@ -732,6 +732,16 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
new_cap_mask = ((__be32 *) inbox->buf)[1];
}
/* slave may not set the IS_SM capability for the port */
if (slave != mlx4_master_func_num(dev) &&
(be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM))
return -EINVAL;
/* No DEV_MGMT in multifunc mode */
if (mlx4_is_mfunc(dev) &&
(be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP))
return -EINVAL;
agg_cap_mask = 0;
slave_cap_mask =
priv->mfunc.master.slave_state[slave].ib_cap_mask[port];

View File

@ -67,10 +67,18 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
complete(&qp->free);
}
static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp)
/* used for INIT/CLOSE port logic */
static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0)
{
return qp->qpn >= dev->caps.sqp_start &&
qp->qpn <= dev->caps.sqp_start + 1;
/* this procedure is called after we already know we are on the master */
/* qp0 is either the proxy qp0, or the real qp0 */
u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev);
*proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1;
*real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn &&
qp->qpn <= dev->phys_caps.base_sqpn + 1;
return *real_qp0 || *proxy_qp0;
}
static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
@ -122,6 +130,8 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_cmd_mailbox *mailbox;
int ret = 0;
int real_qp0 = 0;
int proxy_qp0 = 0;
u8 port;
if (cur_state >= MLX4_QP_NUM_STATE || new_state >= MLX4_QP_NUM_STATE ||
@ -133,9 +143,12 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native);
if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR &&
cur_state != MLX4_QP_STATE_RST &&
is_qp0(dev, qp)) {
is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
port = (qp->qpn & 1) + 1;
priv->mfunc.master.qp0_state[port].qp0_active = 0;
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
else
priv->mfunc.master.qp0_state[port].qp0_active = 0;
}
return ret;
}
@ -162,6 +175,23 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
new_state == MLX4_QP_STATE_RST ? 2 : 0,
op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native);
if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) {
port = (qp->qpn & 1) + 1;
if (cur_state != MLX4_QP_STATE_ERR &&
cur_state != MLX4_QP_STATE_RST &&
new_state == MLX4_QP_STATE_ERR) {
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0;
else
priv->mfunc.master.qp0_state[port].qp0_active = 0;
} else if (new_state == MLX4_QP_STATE_RTR) {
if (proxy_qp0)
priv->mfunc.master.qp0_state[port].proxy_qp0_active = 1;
else
priv->mfunc.master.qp0_state[port].qp0_active = 1;
}
}
mlx4_free_cmd_mailbox(dev, mailbox);
return ret;
}
@ -392,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
int err;
int reserved_from_top = 0;
int k;
spin_lock_init(&qp_table->lock);
INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
@ -406,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
* We also reserve the MSB of the 24-bit QP number to indicate
* that a QP is an XRC QP.
*/
dev->caps.sqp_start =
dev->phys_caps.base_sqpn =
ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
{
@ -437,13 +468,66 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
}
/* Reserve 8 real SQPs in both native and SRIOV modes.
* In addition, in SRIOV mode, reserve 8 proxy SQPs per function
* (for all PFs and VFs), and 8 corresponding tunnel QPs.
* Each proxy SQP works opposite its own tunnel QP.
*
* The QPs are arranged as follows:
* a. 8 real SQPs
* b. All the proxy SQPs (8 per function)
* c. All the tunnel QPs (8 per function)
*/
err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
(1 << 23) - 1, dev->caps.sqp_start + 8,
(1 << 23) - 1, dev->phys_caps.base_sqpn + 8 +
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev),
reserved_from_top);
if (err)
return err;
return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start);
if (mlx4_is_mfunc(dev)) {
/* for PPF use */
dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8;
dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX;
/* In mfunc, calculate proxy and tunnel qp offsets for the PF here,
* since the PF does not call mlx4_slave_caps */
dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
!dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
err = -ENOMEM;
goto err_mem;
}
for (k = 0; k < dev->caps.num_ports; k++) {
dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn +
8 * mlx4_master_func_num(dev) + k;
dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX;
dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn +
8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k;
dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX;
}
}
err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
if (err)
goto err_mem;
return 0;
err_mem:
kfree(dev->caps.qp0_tunnel);
kfree(dev->caps.qp0_proxy);
kfree(dev->caps.qp1_tunnel);
kfree(dev->caps.qp1_proxy);
dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
return err;
}
void mlx4_cleanup_qp_table(struct mlx4_dev *dev)

View File

@ -242,6 +242,15 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res)
return 0;
}
enum qp_transition {
QP_TRANS_INIT2RTR,
QP_TRANS_RTR2RTS,
QP_TRANS_RTS2RTS,
QP_TRANS_SQERR2RTS,
QP_TRANS_SQD2SQD,
QP_TRANS_SQD2RTS
};
/* For Debug uses */
static const char *ResourceType(enum mlx4_resource rt)
{
@ -308,14 +317,41 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev,
}
}
static void update_ud_gid(struct mlx4_dev *dev,
struct mlx4_qp_context *qp_ctx, u8 slave)
static void update_pkey_index(struct mlx4_dev *dev, int slave,
struct mlx4_cmd_mailbox *inbox)
{
u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
u8 sched = *(u8 *)(inbox->buf + 64);
u8 orig_index = *(u8 *)(inbox->buf + 35);
u8 new_index;
struct mlx4_priv *priv = mlx4_priv(dev);
int port;
port = (sched >> 6 & 1) + 1;
new_index = priv->virt2phys_pkey[slave][port - 1][orig_index];
*(u8 *)(inbox->buf + 35) = new_index;
mlx4_dbg(dev, "port = %d, orig pkey index = %d, "
"new pkey index = %d\n", port, orig_index, new_index);
}
static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox,
u8 slave)
{
struct mlx4_qp_context *qp_ctx = inbox->buf + 8;
enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf);
u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
if (MLX4_QP_ST_UD == ts)
qp_ctx->pri_path.mgid_index = 0x80 | slave;
if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) {
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
qp_ctx->pri_path.mgid_index = slave & 0x7F;
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
qp_ctx->alt_path.mgid_index = slave & 0x7F;
}
mlx4_dbg(dev, "slave %d, new gid index: 0x%x ",
slave, qp_ctx->pri_path.mgid_index);
}
@ -360,8 +396,6 @@ static int get_res(struct mlx4_dev *dev, int slave, u64 res_id,
r->from_state = r->state;
r->state = RES_ANY_BUSY;
mlx4_dbg(dev, "res %s id 0x%llx to busy\n",
ResourceType(type), r->res_id);
if (res)
*((struct res_common **)res) = r;
@ -1105,7 +1139,13 @@ static void res_end_move(struct mlx4_dev *dev, int slave,
static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn)
{
return mlx4_is_qp_reserved(dev, qpn);
return mlx4_is_qp_reserved(dev, qpn) &&
(mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn));
}
static int fw_reserved(struct mlx4_dev *dev, int qpn)
{
return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
}
static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
@ -1145,7 +1185,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
if (err)
return err;
if (!valid_reserved(dev, slave, qpn)) {
if (!fw_reserved(dev, qpn)) {
err = __mlx4_qp_alloc_icm(dev, qpn);
if (err) {
res_abort_move(dev, slave, RES_QP, qpn);
@ -1498,7 +1538,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
if (err)
return err;
if (!valid_reserved(dev, slave, qpn))
if (!fw_reserved(dev, qpn))
__mlx4_qp_free_icm(dev, qpn);
res_end_move(dev, slave, RES_QP, qpn);
@ -1938,6 +1978,19 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc)
return be32_to_cpu(qpc->srqn) & 0x1ffffff;
}
static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr,
struct mlx4_qp_context *context)
{
u32 qpn = vhcr->in_modifier & 0xffffff;
u32 qkey = 0;
if (mlx4_get_parav_qkey(dev, qpn, &qkey))
return;
/* adjust qkey in qp context */
context->qkey = cpu_to_be32(qkey);
}
int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@ -1990,6 +2043,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
goto ex_put_scq;
}
adjust_proxy_tun_qkey(dev, vhcr, qpc);
update_pkey_index(dev, slave, inbox);
err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
if (err)
goto ex_put_srq;
@ -2135,6 +2190,48 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
return err;
}
static int verify_qp_parameters(struct mlx4_dev *dev,
struct mlx4_cmd_mailbox *inbox,
enum qp_transition transition, u8 slave)
{
u32 qp_type;
struct mlx4_qp_context *qp_ctx;
enum mlx4_qp_optpar optpar;
qp_ctx = inbox->buf + 8;
qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
optpar = be32_to_cpu(*(__be32 *) inbox->buf);
switch (qp_type) {
case MLX4_QP_ST_RC:
case MLX4_QP_ST_UC:
switch (transition) {
case QP_TRANS_INIT2RTR:
case QP_TRANS_RTR2RTS:
case QP_TRANS_RTS2RTS:
case QP_TRANS_SQD2SQD:
case QP_TRANS_SQD2RTS:
if (slave != mlx4_master_func_num(dev))
/* slaves have only gid index 0 */
if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH)
if (qp_ctx->pri_path.mgid_index)
return -EINVAL;
if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH)
if (qp_ctx->alt_path.mgid_index)
return -EINVAL;
break;
default:
break;
}
break;
default:
break;
}
return 0;
}
int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
@ -2622,16 +2719,123 @@ out:
return err;
}
int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
struct mlx4_qp_context *context = inbox->buf + 8;
adjust_proxy_tun_qkey(dev, vhcr, context);
update_pkey_index(dev, slave, inbox);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err;
struct mlx4_qp_context *qpc = inbox->buf + 8;
update_ud_gid(dev, qpc, (u8)slave);
err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
if (err)
return err;
update_pkey_index(dev, slave, inbox);
update_gid(dev, inbox, (u8)slave);
adjust_proxy_tun_qkey(dev, vhcr, qpc);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err;
struct mlx4_qp_context *context = inbox->buf + 8;
err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
if (err)
return err;
update_pkey_index(dev, slave, inbox);
update_gid(dev, inbox, (u8)slave);
adjust_proxy_tun_qkey(dev, vhcr, context);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err;
struct mlx4_qp_context *context = inbox->buf + 8;
err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
if (err)
return err;
update_pkey_index(dev, slave, inbox);
update_gid(dev, inbox, (u8)slave);
adjust_proxy_tun_qkey(dev, vhcr, context);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
struct mlx4_qp_context *context = inbox->buf + 8;
adjust_proxy_tun_qkey(dev, vhcr, context);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err;
struct mlx4_qp_context *context = inbox->buf + 8;
err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
if (err)
return err;
adjust_proxy_tun_qkey(dev, vhcr, context);
update_gid(dev, inbox, (u8)slave);
update_pkey_index(dev, slave, inbox);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}
int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_vhcr *vhcr,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd)
{
int err;
struct mlx4_qp_context *context = inbox->buf + 8;
err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
if (err)
return err;
adjust_proxy_tun_qkey(dev, vhcr, context);
update_gid(dev, inbox, (u8)slave);
update_pkey_index(dev, slave, inbox);
return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
}

View File

@ -54,7 +54,13 @@ enum {
};
enum {
MLX4_MAX_PORTS = 2
MLX4_PORT_CAP_IS_SM = 1 << 1,
MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19,
};
enum {
MLX4_MAX_PORTS = 2,
MLX4_MAX_PORT_PKEYS = 128
};
/* base qkey for use in sriov tunnel-qp/proxy-qp communication.
@ -191,6 +197,25 @@ enum {
MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
};
enum slave_port_state {
SLAVE_PORT_DOWN = 0,
SLAVE_PENDING_UP,
SLAVE_PORT_UP,
};
enum slave_port_gen_event {
SLAVE_PORT_GEN_EVENT_DOWN = 0,
SLAVE_PORT_GEN_EVENT_UP,
SLAVE_PORT_GEN_EVENT_NONE,
};
enum slave_port_state_event {
MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
MLX4_PORT_STATE_DEV_EVENT_PORT_UP,
MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
};
enum {
MLX4_PERM_LOCAL_READ = 1 << 10,
MLX4_PERM_LOCAL_WRITE = 1 << 11,
@ -303,6 +328,9 @@ struct mlx4_phys_caps {
u32 gid_phys_table_len[MLX4_MAX_PORTS + 1];
u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1];
u32 num_phys_eqs;
u32 base_sqpn;
u32 base_proxy_sqpn;
u32 base_tunnel_sqpn;
};
struct mlx4_caps {
@ -333,9 +361,10 @@ struct mlx4_caps {
int max_rq_desc_sz;
int max_qp_init_rdma;
int max_qp_dest_rdma;
int sqp_start;
u32 base_sqpn;
u32 base_tunnel_sqpn;
u32 *qp0_proxy;
u32 *qp1_proxy;
u32 *qp0_tunnel;
u32 *qp1_tunnel;
int num_srqs;
int max_srq_wqes;
int max_srq_sge;
@ -389,6 +418,7 @@ struct mlx4_caps {
enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1];
u32 max_counters;
u8 port_ib_mtu[MLX4_MAX_PORTS + 1];
u16 sqp_demux;
};
struct mlx4_buf_list {
@ -671,6 +701,10 @@ struct mlx4_init_port_param {
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if ((type) == (dev)->caps.port_mask[(port)])
#define mlx4_foreach_non_ib_transport_port(port, dev) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB))
#define mlx4_foreach_ib_transport_port(port, dev) \
for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \
if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
@ -692,7 +726,18 @@ static inline int mlx4_is_master(struct mlx4_dev *dev)
static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
{
return (qpn < dev->caps.sqp_start + 8);
return (qpn < dev->phys_caps.base_sqpn + 8 +
16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
}
static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
{
int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8;
if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8)
return 1;
return 0;
}
static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
@ -927,6 +972,20 @@ int mlx4_flow_attach(struct mlx4_dev *dev,
struct mlx4_net_trans_rule *rule, u64 *reg_id);
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
int i, int val);
int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey);
int mlx4_is_slave_active(struct mlx4_dev *dev, int slave);
int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port);
int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port);
int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr);
int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change);
enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port);
int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event);
void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
#endif /* MLX4_DEVICE_H */

View File

@ -45,6 +45,8 @@ enum mlx4_dev_event {
MLX4_DEV_EVENT_PORT_DOWN,
MLX4_DEV_EVENT_PORT_REINIT,
MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
MLX4_DEV_EVENT_SLAVE_INIT,
MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
};
struct mlx4_interface {

View File

@ -126,7 +126,8 @@ struct mlx4_rss_context {
struct mlx4_qp_path {
u8 fl;
u8 reserved1[2];
u8 reserved1[1];
u8 disable_pkey_check;
u8 pkey_index;
u8 counter_index;
u8 grh_mylmc;

View File

@ -100,6 +100,22 @@ int ib_find_cached_pkey(struct ib_device *device,
u16 pkey,
u16 *index);
/**
* ib_find_exact_cached_pkey - Returns the PKey table index where a specified
* PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit)
* @device: The device to query.
* @port_num: The port number of the device to search for the PKey.
* @pkey: The PKey value to search for.
* @index: The index into the cached PKey table where the PKey was found.
*
* ib_find_exact_cached_pkey() searches the specified PKey table in
* the local software cache.
*/
int ib_find_exact_cached_pkey(struct ib_device *device,
u8 port_num,
u16 pkey,
u16 *index);
/**
* ib_get_cached_lmc - Returns a cached lmc table entry
* @device: The device to query.

View File

@ -614,6 +614,9 @@ enum ib_qp_type {
enum ib_qp_create_flags {
IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0,
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
};
struct ib_qp_init_attr {