v5.18 second rc pull request

A few recent regressions in rxe's multicast code, and some old driver
 bugs:
 
 - Error case unwind bug in rxe for rkeys
 
 - Dot not call netdev functions under a spinlock in rxe multicast code
 
 - Use the proper BH lock type in rxe multicast code
 
 - Fix idrma deadlock and crash
 
 - Add a missing flush to drain irdma QPs when in error
 
 - Fix high userspace latency in irdma during destroy due to
   synchronize_rcu()
 
 - Rare race in siw MPA processing
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYIAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCYnUqeQAKCRCFwuHvBreF
 Yf58AQCNUQZlmEiuBid6WxggXPW/MM5sxJdOqZeX+Ddbmm7swAEAidtoVBILozLC
 ltd8+P8qNdccqOZDatgqYYSpXUfHIA4=
 =Idcg
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "A few recent regressions in rxe's multicast code, and some old driver
  bugs:

   - Error case unwind bug in rxe for rkeys

   - Dot not call netdev functions under a spinlock in rxe multicast
     code

   - Use the proper BH lock type in rxe multicast code

   - Fix idrma deadlock and crash

   - Add a missing flush to drain irdma QPs when in error

   - Fix high userspace latency in irdma during destroy due to
     synchronize_rcu()

   - Rare race in siw MPA processing"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/rxe: Change mcg_lock to a _bh lock
  RDMA/rxe: Do not call  dev_mc_add/del() under a spinlock
  RDMA/siw: Fix a condition race issue in MPA request processing
  RDMA/irdma: Fix possible crash due to NULL netdev in notifier
  RDMA/irdma: Reduce iWARP QP destroy time
  RDMA/irdma: Flush iWARP QP if modified to ERR from RTR state
  RDMA/rxe: Recheck the MR in when generating a READ reply
  RDMA/irdma: Fix deadlock in irdma_cleanup_cm_core()
  RDMA/rxe: Fix "Replace mr by rkey in responder resources"
This commit is contained in:
Linus Torvalds 2022-05-06 09:50:25 -07:00
commit 4df22ca85d
6 changed files with 92 additions and 103 deletions

View File

@ -2308,10 +2308,8 @@ err:
return NULL;
}
static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
static void irdma_destroy_connection(struct irdma_cm_node *cm_node)
{
struct irdma_cm_node *cm_node =
container_of(rcu_head, struct irdma_cm_node, rcu_head);
struct irdma_cm_core *cm_core = cm_node->cm_core;
struct irdma_qp *iwqp;
struct irdma_cm_info nfo;
@ -2359,7 +2357,6 @@ static void irdma_cm_node_free_cb(struct rcu_head *rcu_head)
}
cm_core->cm_free_ah(cm_node);
kfree(cm_node);
}
/**
@ -2387,8 +2384,9 @@ void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
/* wait for all list walkers to exit their grace period */
call_rcu(&cm_node->rcu_head, irdma_cm_node_free_cb);
irdma_destroy_connection(cm_node);
kfree_rcu(cm_node, rcu_head);
}
/**
@ -3246,15 +3244,10 @@ int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
*/
void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
{
unsigned long flags;
if (!cm_core)
return;
spin_lock_irqsave(&cm_core->ht_lock, flags);
if (timer_pending(&cm_core->tcp_timer))
del_timer_sync(&cm_core->tcp_timer);
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
del_timer_sync(&cm_core->tcp_timer);
destroy_workqueue(cm_core->event_wq);
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
@ -3467,12 +3460,6 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
}
cm_id = iwqp->cm_id;
/* make sure we havent already closed this connection */
if (!cm_id) {
spin_unlock_irqrestore(&iwqp->lock, flags);
return;
}
original_hw_tcp_state = iwqp->hw_tcp_state;
original_ibqp_state = iwqp->ibqp_state;
last_ae = iwqp->last_aeq;
@ -3494,11 +3481,11 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp)
disconn_status = -ECONNRESET;
}
if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
last_ae == IRDMA_AE_BAD_CLOSE ||
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) {
if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
last_ae == IRDMA_AE_BAD_CLOSE ||
last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
issue_close = 1;
iwqp->cm_id = NULL;
qp->term_flags = 0;

View File

@ -258,18 +258,16 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
u32 local_ipaddr[4] = {};
bool ipv4 = true;
real_dev = rdma_vlan_dev_real_dev(netdev);
if (!real_dev)
real_dev = netdev;
ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
if (!ibdev)
return NOTIFY_DONE;
iwdev = to_iwdev(ibdev);
switch (event) {
case NETEVENT_NEIGH_UPDATE:
real_dev = rdma_vlan_dev_real_dev(netdev);
if (!real_dev)
real_dev = netdev;
ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA);
if (!ibdev)
return NOTIFY_DONE;
iwdev = to_iwdev(ibdev);
p = (__be32 *)neigh->primary_key;
if (neigh->tbl->family == AF_INET6) {
ipv4 = false;
@ -290,13 +288,12 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event,
irdma_manage_arp_cache(iwdev->rf, neigh->ha,
local_ipaddr, ipv4,
IRDMA_ARP_DELETE);
ib_device_put(ibdev);
break;
default:
break;
}
ib_device_put(ibdev);
return NOTIFY_DONE;
}

View File

@ -1618,13 +1618,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
if (issue_modify_qp && iwqp->ibqp_state > IB_QPS_RTS) {
if (dont_wait) {
if (iwqp->cm_id && iwqp->hw_tcp_state) {
if (iwqp->hw_tcp_state) {
spin_lock_irqsave(&iwqp->lock, flags);
iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
iwqp->last_aeq = IRDMA_AE_RESET_SENT;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_cm_disconn(iwqp);
}
irdma_cm_disconn(iwqp);
} else {
int close_timer_started;

View File

@ -38,13 +38,13 @@ static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
}
/**
* rxe_mcast_delete - delete multicast address from rxe device
* rxe_mcast_del - delete multicast address from rxe device
* @rxe: rxe device object
* @mgid: multicast address as a gid
*
* Returns 0 on success else an error
*/
static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
static int rxe_mcast_del(struct rxe_dev *rxe, union ib_gid *mgid)
{
unsigned char ll_addr[ETH_ALEN];
@ -143,11 +143,10 @@ static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe,
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
struct rxe_mcg *mcg;
unsigned long flags;
spin_lock_irqsave(&rxe->mcg_lock, flags);
spin_lock_bh(&rxe->mcg_lock);
mcg = __rxe_lookup_mcg(rxe, mgid);
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
return mcg;
}
@ -159,17 +158,10 @@ struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
* @mcg: new mcg object
*
* Context: caller should hold rxe->mcg lock
* Returns: 0 on success else an error
*/
static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mcg *mcg)
static void __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
struct rxe_mcg *mcg)
{
int err;
err = rxe_mcast_add(rxe, mgid);
if (unlikely(err))
return err;
kref_init(&mcg->ref_cnt);
memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid));
INIT_LIST_HEAD(&mcg->qp_list);
@ -184,8 +176,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
*/
kref_get(&mcg->ref_cnt);
__rxe_insert_mcg(mcg);
return 0;
}
/**
@ -198,7 +188,6 @@ static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid,
static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
{
struct rxe_mcg *mcg, *tmp;
unsigned long flags;
int err;
if (rxe->attr.max_mcast_grp == 0)
@ -209,36 +198,38 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
if (mcg)
return mcg;
/* speculative alloc of new mcg */
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
if (!mcg)
return ERR_PTR(-ENOMEM);
spin_lock_irqsave(&rxe->mcg_lock, flags);
/* re-check to see if someone else just added it */
tmp = __rxe_lookup_mcg(rxe, mgid);
if (tmp) {
kfree(mcg);
mcg = tmp;
goto out;
}
/* check to see if we have reached limit */
if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) {
err = -ENOMEM;
goto err_dec;
}
err = __rxe_init_mcg(rxe, mgid, mcg);
if (err)
goto err_dec;
out:
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
return mcg;
/* speculative alloc of new mcg */
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
if (!mcg)
return ERR_PTR(-ENOMEM);
spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just added it */
tmp = __rxe_lookup_mcg(rxe, mgid);
if (tmp) {
spin_unlock_bh(&rxe->mcg_lock);
atomic_dec(&rxe->mcg_num);
kfree(mcg);
return tmp;
}
__rxe_init_mcg(rxe, mgid, mcg);
spin_unlock_bh(&rxe->mcg_lock);
/* add mcast address outside of lock */
err = rxe_mcast_add(rxe, mgid);
if (!err)
return mcg;
kfree(mcg);
err_dec:
atomic_dec(&rxe->mcg_num);
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
kfree(mcg);
return ERR_PTR(err);
}
@ -268,7 +259,6 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
__rxe_remove_mcg(mcg);
kref_put(&mcg->ref_cnt, rxe_cleanup_mcg);
rxe_mcast_delete(mcg->rxe, &mcg->mgid);
atomic_dec(&rxe->mcg_num);
}
@ -280,11 +270,12 @@ static void __rxe_destroy_mcg(struct rxe_mcg *mcg)
*/
static void rxe_destroy_mcg(struct rxe_mcg *mcg)
{
unsigned long flags;
/* delete mcast address outside of lock */
rxe_mcast_del(mcg->rxe, &mcg->mgid);
spin_lock_irqsave(&mcg->rxe->mcg_lock, flags);
spin_lock_bh(&mcg->rxe->mcg_lock);
__rxe_destroy_mcg(mcg);
spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags);
spin_unlock_bh(&mcg->rxe->mcg_lock);
}
/**
@ -339,25 +330,24 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
struct rxe_dev *rxe = mcg->rxe;
struct rxe_mca *mca, *tmp;
unsigned long flags;
int err;
/* check to see if the qp is already a member of the group */
spin_lock_irqsave(&rxe->mcg_lock, flags);
spin_lock_bh(&rxe->mcg_lock);
list_for_each_entry(mca, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
/* speculative alloc new mca without using GFP_ATOMIC */
mca = kzalloc(sizeof(*mca), GFP_KERNEL);
if (!mca)
return -ENOMEM;
spin_lock_irqsave(&rxe->mcg_lock, flags);
spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just attached qp */
list_for_each_entry(tmp, &mcg->qp_list, qp_list) {
if (tmp->qp == qp) {
@ -371,7 +361,7 @@ static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
if (err)
kfree(mca);
out:
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
return err;
}
@ -405,9 +395,8 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
{
struct rxe_dev *rxe = mcg->rxe;
struct rxe_mca *mca, *tmp;
unsigned long flags;
spin_lock_irqsave(&rxe->mcg_lock, flags);
spin_lock_bh(&rxe->mcg_lock);
list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) {
if (mca->qp == qp) {
__rxe_cleanup_mca(mca, mcg);
@ -421,13 +410,13 @@ static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp)
if (atomic_read(&mcg->qp_num) <= 0)
__rxe_destroy_mcg(mcg);
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
return 0;
}
}
/* we didn't find the qp on the list */
spin_unlock_irqrestore(&rxe->mcg_lock, flags);
spin_unlock_bh(&rxe->mcg_lock);
return -EINVAL;
}

View File

@ -680,6 +680,11 @@ static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp,
* It is assumed that the access permissions if originally good
* are OK and the mappings to be unchanged.
*
* TODO: If someone reregisters an MR to change its size or
* access permissions during the processing of an RDMA read
* we should kill the responder resource and complete the
* operation with an error.
*
* Return: mr on success else NULL
*/
static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
@ -690,23 +695,27 @@ static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
if (rkey_is_mw(rkey)) {
mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8);
if (!mw || mw->rkey != rkey)
if (!mw)
return NULL;
if (mw->state != RXE_MW_STATE_VALID) {
mr = mw->mr;
if (mw->rkey != rkey || mw->state != RXE_MW_STATE_VALID ||
!mr || mr->state != RXE_MR_STATE_VALID) {
rxe_put(mw);
return NULL;
}
mr = mw->mr;
rxe_get(mr);
rxe_put(mw);
} else {
mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
if (!mr || mr->rkey != rkey)
return NULL;
return mr;
}
if (mr->state != RXE_MR_STATE_VALID) {
mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
if (!mr)
return NULL;
if (mr->rkey != rkey || mr->state != RXE_MR_STATE_VALID) {
rxe_put(mr);
return NULL;
}
@ -736,8 +745,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
}
if (res->state == rdatm_res_state_new) {
mr = qp->resp.mr;
qp->resp.mr = NULL;
if (!res->replay) {
mr = qp->resp.mr;
qp->resp.mr = NULL;
} else {
mr = rxe_recheck_mr(qp, res->read.rkey);
if (!mr)
return RESPST_ERR_RKEY_VIOLATION;
}
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY;

View File

@ -968,14 +968,15 @@ static void siw_accept_newconn(struct siw_cep *cep)
siw_cep_set_inuse(new_cep);
rv = siw_proc_mpareq(new_cep);
siw_cep_set_free(new_cep);
if (rv != -EAGAIN) {
siw_cep_put(cep);
new_cep->listen_cep = NULL;
if (rv)
if (rv) {
siw_cep_set_free(new_cep);
goto error;
}
}
siw_cep_set_free(new_cep);
}
return;