staging/rdma/hfi: fix CQ completion order issue
The current implementation of the sdma_wait variable has a timing hole that can cause a completion Q entry to be returned from a pio send prior to an older sdma packets completion queue entry. The sdma_wait variable used to be decremented prior to calling the packet complete routine. The hole is between decrement and the verbs completion where send engine using pio could return a out of order completion in that window. This patch closes the hole by allowing an API option to specify an sdma_drained callback. The atomic dec is positioned after the complete callback to avoid the window as long as the pio path doesn't execute when there is a non-zero sdma count. Reviewed-by: Jubin John <jubin.john@intel.com> Signed-off-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com>
This commit is contained in:
parent
91702b4a39
commit
a545f5308b
|
@ -69,7 +69,8 @@ struct sdma_engine;
|
|||
* @list: used to add/insert into QP/PQ wait lists
|
||||
* @tx_head: overflow list of sdma_txreq's
|
||||
* @sleep: no space callback
|
||||
* @wakeup: space callback
|
||||
* @wakeup: space callback wakeup
|
||||
* @sdma_drained: sdma count drained
|
||||
* @iowork: workqueue overhead
|
||||
* @wait_dma: wait for sdma_busy == 0
|
||||
* @wait_pio: wait for pio_busy == 0
|
||||
|
@ -104,6 +105,7 @@ struct iowait {
|
|||
struct sdma_txreq *tx,
|
||||
unsigned seq);
|
||||
void (*wakeup)(struct iowait *wait, int reason);
|
||||
void (*sdma_drained)(struct iowait *wait);
|
||||
struct work_struct iowork;
|
||||
wait_queue_head_t wait_dma;
|
||||
wait_queue_head_t wait_pio;
|
||||
|
@ -122,7 +124,7 @@ struct iowait {
|
|||
* @tx_limit: limit for overflow queuing
|
||||
* @func: restart function for workqueue
|
||||
* @sleep: sleep function for no space
|
||||
* @wakeup: wakeup function for no space
|
||||
* @resume: wakeup function for no space
|
||||
*
|
||||
* This function initializes the iowait
|
||||
* structure embedded in the QP or PQ.
|
||||
|
@ -138,7 +140,8 @@ static inline void iowait_init(
|
|||
struct iowait *wait,
|
||||
struct sdma_txreq *tx,
|
||||
unsigned seq),
|
||||
void (*wakeup)(struct iowait *wait, int reason))
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait))
|
||||
{
|
||||
wait->count = 0;
|
||||
INIT_LIST_HEAD(&wait->list);
|
||||
|
@ -151,6 +154,7 @@ static inline void iowait_init(
|
|||
wait->tx_limit = tx_limit;
|
||||
wait->sleep = sleep;
|
||||
wait->wakeup = wakeup;
|
||||
wait->sdma_drained = sdma_drained;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -273,6 +277,8 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
|
|||
{
|
||||
wake_up(&wait->wait_dma);
|
||||
wake_up(&wait->wait_pio);
|
||||
if (wait->sdma_drained)
|
||||
wait->sdma_drained(wait);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -73,6 +73,7 @@ static int iowait_sleep(
|
|||
struct sdma_txreq *stx,
|
||||
unsigned seq);
|
||||
static void iowait_wakeup(struct iowait *wait, int reason);
|
||||
static void iowait_sdma_drained(struct iowait *wait);
|
||||
static void qp_pio_drain(struct rvt_qp *qp);
|
||||
|
||||
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
|
||||
|
@ -509,6 +510,22 @@ static void iowait_wakeup(struct iowait *wait, int reason)
|
|||
hfi1_qp_wakeup(qp, RVT_S_WAIT_DMA_DESC);
|
||||
}
|
||||
|
||||
static void iowait_sdma_drained(struct iowait *wait)
|
||||
{
|
||||
struct rvt_qp *qp = iowait_to_qp(wait);
|
||||
|
||||
/*
|
||||
* This happens when the send engine notes
|
||||
* a QP in the error state and cannot
|
||||
* do the flush work until that QP's
|
||||
* sdma work has finished.
|
||||
*/
|
||||
if (qp->s_flags & RVT_S_WAIT_DMA) {
|
||||
qp->s_flags &= ~RVT_S_WAIT_DMA;
|
||||
hfi1_schedule_send(qp);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* qp_to_sdma_engine - map a qp to a send engine
|
||||
|
@ -773,7 +790,8 @@ void notify_qp_reset(struct rvt_qp *qp)
|
|||
1,
|
||||
_hfi1_do_send,
|
||||
iowait_sleep,
|
||||
iowait_wakeup);
|
||||
iowait_wakeup,
|
||||
iowait_sdma_drained);
|
||||
priv->r_adefered = 0;
|
||||
clear_ahg(qp);
|
||||
}
|
||||
|
|
|
@ -361,6 +361,28 @@ static inline void sdma_set_desc_cnt(struct sdma_engine *sde, unsigned cnt)
|
|||
write_sde_csr(sde, SD(DESC_CNT), reg);
|
||||
}
|
||||
|
||||
static inline void complete_tx(struct sdma_engine *sde,
|
||||
struct sdma_txreq *tx,
|
||||
int res)
|
||||
{
|
||||
/* protect against complete modifying */
|
||||
struct iowait *wait = tx->wait;
|
||||
callback_t complete = tx->complete;
|
||||
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
trace_hfi1_sdma_out_sn(sde, txp->sn);
|
||||
if (WARN_ON_ONCE(sde->head_sn != txp->sn))
|
||||
dd_dev_err(sde->dd, "expected %llu got %llu\n",
|
||||
sde->head_sn, txp->sn);
|
||||
sde->head_sn++;
|
||||
#endif
|
||||
sdma_txclean(sde->dd, tx);
|
||||
if (complete)
|
||||
(*complete)(tx, res);
|
||||
if (iowait_sdma_dec(wait) && wait)
|
||||
iowait_drain_wakeup(wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status
|
||||
*
|
||||
|
@ -395,27 +417,8 @@ static void sdma_flush(struct sdma_engine *sde)
|
|||
}
|
||||
spin_unlock_irqrestore(&sde->flushlist_lock, flags);
|
||||
/* flush from flush list */
|
||||
list_for_each_entry_safe(txp, txp_next, &flushlist, list) {
|
||||
int drained = 0;
|
||||
/* protect against complete modifying */
|
||||
struct iowait *wait = txp->wait;
|
||||
|
||||
list_del_init(&txp->list);
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
trace_hfi1_sdma_out_sn(sde, txp->sn);
|
||||
if (WARN_ON_ONCE(sde->head_sn != txp->sn))
|
||||
dd_dev_err(sde->dd, "expected %llu got %llu\n",
|
||||
sde->head_sn, txp->sn);
|
||||
sde->head_sn++;
|
||||
#endif
|
||||
sdma_txclean(sde->dd, txp);
|
||||
if (wait)
|
||||
drained = iowait_sdma_dec(wait);
|
||||
if (txp->complete)
|
||||
(*txp->complete)(txp, SDMA_TXREQ_S_ABORTED, drained);
|
||||
if (wait && drained)
|
||||
iowait_drain_wakeup(wait);
|
||||
}
|
||||
list_for_each_entry_safe(txp, txp_next, &flushlist, list)
|
||||
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -577,31 +580,10 @@ static void sdma_flush_descq(struct sdma_engine *sde)
|
|||
head = ++sde->descq_head & sde->sdma_mask;
|
||||
/* if now past this txp's descs, do the callback */
|
||||
if (txp && txp->next_descq_idx == head) {
|
||||
int drained = 0;
|
||||
/* protect against complete modifying */
|
||||
struct iowait *wait = txp->wait;
|
||||
|
||||
/* remove from list */
|
||||
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
|
||||
if (wait)
|
||||
drained = iowait_sdma_dec(wait);
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
trace_hfi1_sdma_out_sn(sde, txp->sn);
|
||||
if (WARN_ON_ONCE(sde->head_sn != txp->sn))
|
||||
dd_dev_err(sde->dd, "expected %llu got %llu\n",
|
||||
sde->head_sn, txp->sn);
|
||||
sde->head_sn++;
|
||||
#endif
|
||||
sdma_txclean(sde->dd, txp);
|
||||
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
|
||||
trace_hfi1_sdma_progress(sde, head, tail, txp);
|
||||
if (txp->complete)
|
||||
(*txp->complete)(
|
||||
txp,
|
||||
SDMA_TXREQ_S_ABORTED,
|
||||
drained);
|
||||
if (wait && drained)
|
||||
iowait_drain_wakeup(wait);
|
||||
/* see if there is another txp */
|
||||
txp = get_txhead(sde);
|
||||
}
|
||||
progress++;
|
||||
|
@ -1470,7 +1452,7 @@ static void sdma_make_progress(struct sdma_engine *sde, u64 status)
|
|||
{
|
||||
struct sdma_txreq *txp = NULL;
|
||||
int progress = 0;
|
||||
u16 hwhead, swhead, swtail;
|
||||
u16 hwhead, swhead;
|
||||
int idle_check_done = 0;
|
||||
|
||||
hwhead = sdma_gethead(sde);
|
||||
|
@ -1491,29 +1473,9 @@ retry:
|
|||
|
||||
/* if now past this txp's descs, do the callback */
|
||||
if (txp && txp->next_descq_idx == swhead) {
|
||||
int drained = 0;
|
||||
/* protect against complete modifying */
|
||||
struct iowait *wait = txp->wait;
|
||||
|
||||
/* remove from list */
|
||||
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
|
||||
if (wait)
|
||||
drained = iowait_sdma_dec(wait);
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
trace_hfi1_sdma_out_sn(sde, txp->sn);
|
||||
if (WARN_ON_ONCE(sde->head_sn != txp->sn))
|
||||
dd_dev_err(sde->dd, "expected %llu got %llu\n",
|
||||
sde->head_sn, txp->sn);
|
||||
sde->head_sn++;
|
||||
#endif
|
||||
sdma_txclean(sde->dd, txp);
|
||||
if (txp->complete)
|
||||
(*txp->complete)(
|
||||
txp,
|
||||
SDMA_TXREQ_S_OK,
|
||||
drained);
|
||||
if (wait && drained)
|
||||
iowait_drain_wakeup(wait);
|
||||
complete_tx(sde, txp, SDMA_TXREQ_S_OK);
|
||||
/* see if there is another txp */
|
||||
txp = get_txhead(sde);
|
||||
}
|
||||
|
@ -1531,6 +1493,8 @@ retry:
|
|||
* of sdma_make_progress(..) which is ensured by idle_check_done flag
|
||||
*/
|
||||
if ((status & sde->idle_mask) && !idle_check_done) {
|
||||
u16 swtail;
|
||||
|
||||
swtail = ACCESS_ONCE(sde->descq_tail) & sde->sdma_mask;
|
||||
if (swtail != hwhead) {
|
||||
hwhead = (u16)read_sde_csr(sde, SD(HEAD));
|
||||
|
|
|
@ -555,7 +555,7 @@ static inline int sdma_txinit_ahg(
|
|||
u8 num_ahg,
|
||||
u32 *ahg,
|
||||
u8 ahg_hlen,
|
||||
void (*cb)(struct sdma_txreq *, int, int))
|
||||
void (*cb)(struct sdma_txreq *, int))
|
||||
{
|
||||
if (tlen == 0)
|
||||
return -ENODATA;
|
||||
|
@ -618,7 +618,7 @@ static inline int sdma_txinit(
|
|||
struct sdma_txreq *tx,
|
||||
u16 flags,
|
||||
u16 tlen,
|
||||
void (*cb)(struct sdma_txreq *, int, int))
|
||||
void (*cb)(struct sdma_txreq *, int))
|
||||
{
|
||||
return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ struct sdma_desc {
|
|||
#define SDMA_TXREQ_F_USE_AHG 0x0004
|
||||
|
||||
struct sdma_txreq;
|
||||
typedef void (*callback_t)(struct sdma_txreq *, int, int);
|
||||
typedef void (*callback_t)(struct sdma_txreq *, int);
|
||||
|
||||
struct iowait;
|
||||
struct sdma_txreq {
|
||||
|
|
|
@ -273,7 +273,7 @@ struct user_sdma_txreq {
|
|||
|
||||
static int user_sdma_send_pkts(struct user_sdma_request *, unsigned);
|
||||
static int num_user_pages(const struct iovec *);
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *, int, int);
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *, int);
|
||||
static inline void pq_update(struct hfi1_user_sdma_pkt_q *);
|
||||
static void user_sdma_free_request(struct user_sdma_request *, bool);
|
||||
static int pin_vector_pages(struct user_sdma_request *,
|
||||
|
@ -388,7 +388,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, struct file *fp)
|
|||
init_waitqueue_head(&pq->wait);
|
||||
|
||||
iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
|
||||
activate_packet_queue);
|
||||
activate_packet_queue, NULL);
|
||||
pq->reqidx = 0;
|
||||
snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
|
||||
fd->subctxt);
|
||||
|
@ -1341,8 +1341,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
|
|||
* tx request have been processed by the DMA engine. Called in
|
||||
* interrupt context.
|
||||
*/
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status,
|
||||
int drain)
|
||||
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
|
||||
{
|
||||
struct user_sdma_txreq *tx =
|
||||
container_of(txreq, struct user_sdma_txreq, txreq);
|
||||
|
|
|
@ -130,8 +130,7 @@ MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
|
|||
|
||||
static void verbs_sdma_complete(
|
||||
struct sdma_txreq *cookie,
|
||||
int status,
|
||||
int drained);
|
||||
int status);
|
||||
|
||||
static int pio_wait(struct rvt_qp *qp,
|
||||
struct send_context *sc,
|
||||
|
@ -523,8 +522,7 @@ void update_sge(struct rvt_sge_state *ss, u32 length)
|
|||
/* New API */
|
||||
static void verbs_sdma_complete(
|
||||
struct sdma_txreq *cookie,
|
||||
int status,
|
||||
int drained)
|
||||
int status)
|
||||
{
|
||||
struct verbs_txreq *tx =
|
||||
container_of(cookie, struct verbs_txreq, txreq);
|
||||
|
@ -539,18 +537,6 @@ static void verbs_sdma_complete(
|
|||
hdr = &tx->phdr.hdr;
|
||||
hfi1_rc_send_complete(qp, hdr);
|
||||
}
|
||||
if (drained) {
|
||||
/*
|
||||
* This happens when the send engine notes
|
||||
* a QP in the error state and cannot
|
||||
* do the flush work until that QP's
|
||||
* sdma work has finished.
|
||||
*/
|
||||
if (qp->s_flags & RVT_S_WAIT_DMA) {
|
||||
qp->s_flags &= ~RVT_S_WAIT_DMA;
|
||||
hfi1_schedule_send(qp);
|
||||
}
|
||||
}
|
||||
spin_unlock(&qp->s_lock);
|
||||
|
||||
hfi1_put_txreq(tx);
|
||||
|
|
Loading…
Reference in New Issue