From c5483388bb4d771007ef36478db038e07922a020 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Mon, 24 Sep 2007 13:19:09 -0700 Subject: [PATCH] RDMA/cma: Add locking around QP accesses If a user allocates a QP on an rdma_cm_id, the rdma_cm will automatically transition the QP through its states (RTR, RTS, error, etc.) While the QP state transitions are occurring, the QP itself must remain valid. Provide locking around the QP pointer to prevent its destruction while accessing the pointer. This fixes an issue reported by Olaf Kirch from Oracle that resulted in a system crash: "An incoming connection arrives and we decide to tear down the nascent connection. The remote ends decides to do the same. We start to shut down the connection, and call rdma_destroy_qp on our cm_id. ... Now apparently a 'connect reject' message comes in from the other host, and cma_ib_handler() is called with an event of IB_CM_REJ_RECEIVED. It calls cma_modify_qp_err, which for some odd reason tries to modify the exact same QP we just destroyed." Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 90 +++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 93644f82592c..01ae052ac3f4 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -121,6 +121,8 @@ struct rdma_id_private { enum cma_state state; spinlock_t lock; + struct mutex qp_mutex; + struct completion comp; atomic_t refcount; wait_queue_head_t wait_remove; @@ -389,6 +391,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, id_priv->id.event_handler = event_handler; id_priv->id.ps = ps; spin_lock_init(&id_priv->lock); + mutex_init(&id_priv->qp_mutex); init_completion(&id_priv->comp); atomic_set(&id_priv->refcount, 1); init_waitqueue_head(&id_priv->wait_remove); @@ -474,61 +477,86 @@ EXPORT_SYMBOL(rdma_create_qp); void rdma_destroy_qp(struct rdma_cm_id *id) { - ib_destroy_qp(id->qp); + struct rdma_id_private *id_priv; + + id_priv = container_of(id, struct rdma_id_private, id); + mutex_lock(&id_priv->qp_mutex); + ib_destroy_qp(id_priv->id.qp); + id_priv->id.qp = NULL; + mutex_unlock(&id_priv->qp_mutex); } EXPORT_SYMBOL(rdma_destroy_qp); -static int cma_modify_qp_rtr(struct rdma_cm_id *id) +static int cma_modify_qp_rtr(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; - if (!id->qp) - return 0; + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } /* Need to update QP attributes from default values. */ qp_attr.qp_state = IB_QPS_INIT; - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) - return ret; + goto out; - ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); if (ret) - return ret; + goto out; qp_attr.qp_state = IB_QPS_RTR; - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) - return ret; + goto out; - return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; } -static int cma_modify_qp_rts(struct rdma_cm_id *id) +static int cma_modify_qp_rts(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; int qp_attr_mask, ret; - if (!id->qp) - return 0; + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } qp_attr.qp_state = IB_QPS_RTS; - ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); if (ret) - return ret; + goto out; - return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; } -static int cma_modify_qp_err(struct rdma_cm_id *id) +static int cma_modify_qp_err(struct rdma_id_private *id_priv) { struct ib_qp_attr qp_attr; + int ret; - if (!id->qp) - return 0; + mutex_lock(&id_priv->qp_mutex); + if (!id_priv->id.qp) { + ret = 0; + goto out; + } qp_attr.qp_state = IB_QPS_ERR; - return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); + ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE); +out: + mutex_unlock(&id_priv->qp_mutex); + return ret; } static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, @@ -857,11 +885,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) { int ret; - ret = cma_modify_qp_rtr(&id_priv->id); + ret = cma_modify_qp_rtr(id_priv); if (ret) goto reject; - ret = cma_modify_qp_rts(&id_priv->id); + ret = cma_modify_qp_rts(id_priv); if (ret) goto reject; @@ -871,7 +899,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv) return 0; reject: - cma_modify_qp_err(&id_priv->id); + cma_modify_qp_err(id_priv); ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, NULL, 0); return ret; @@ -947,7 +975,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) /* ignore event */ goto out; case IB_CM_REJ_RECEIVED: - cma_modify_qp_err(&id_priv->id); + cma_modify_qp_err(id_priv); event.status = ib_event->param.rej_rcvd.reason; event.event = RDMA_CM_EVENT_REJECTED; event.param.conn.private_data = ib_event->private_data; @@ -2264,7 +2292,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv, sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; cm_id->remote_addr = *sin; - ret = cma_modify_qp_rtr(&id_priv->id); + ret = cma_modify_qp_rtr(id_priv); if (ret) goto out; @@ -2331,7 +2359,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv, int qp_attr_mask, ret; if (id_priv->id.qp) { - ret = cma_modify_qp_rtr(&id_priv->id); + ret = cma_modify_qp_rtr(id_priv); if (ret) goto out; @@ -2370,7 +2398,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv, struct iw_cm_conn_param iw_param; int ret; - ret = cma_modify_qp_rtr(&id_priv->id); + ret = cma_modify_qp_rtr(id_priv); if (ret) return ret; @@ -2442,7 +2470,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) return 0; reject: - cma_modify_qp_err(id); + cma_modify_qp_err(id_priv); rdma_reject(id, NULL, 0); return ret; } @@ -2512,7 +2540,7 @@ int rdma_disconnect(struct rdma_cm_id *id) switch (rdma_node_get_transport(id->device->node_type)) { case RDMA_TRANSPORT_IB: - ret = cma_modify_qp_err(id); + ret = cma_modify_qp_err(id_priv); if (ret) goto out; /* Initiate or respond to a disconnect. */ @@ -2543,9 +2571,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) return 0; + mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, multicast->rec.mlid); + mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); event.status = status;