diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c index 6c8f09ee489d..f76c57074529 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c @@ -939,8 +939,6 @@ kiblnd_check_sends(kib_conn_t *conn) kiblnd_queue_tx_locked(tx, conn); } - kiblnd_conn_addref(conn); /* 1 ref for me.... (see b21911) */ - for (;;) { int credit; @@ -966,8 +964,6 @@ kiblnd_check_sends(kib_conn_t *conn) } spin_unlock(&conn->ibc_lock); - - kiblnd_conn_decref(conn); /* ...until here */ } static void @@ -2131,6 +2127,16 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) return; } + /** + * refcount taken by cmid is not reliable after I released the glock + * because this connection is visible to other threads now, another + * thread can find and close this connection right after I released + * the glock, if kiblnd_cm_callback for RDMA_CM_EVENT_DISCONNECTED is + * called, it can release the connection refcount taken by cmid. + * It means the connection could be destroyed before I finish my + * operations on it. + */ + kiblnd_conn_addref(conn); write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); /* Schedule blocked txs */ @@ -2146,6 +2152,8 @@ kiblnd_connreq_done(kib_conn_t *conn, int status) /* schedule blocked rxs */ kiblnd_handle_early_rxs(conn); + + kiblnd_conn_decref(conn); } static void